xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 79f08d9e)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
82 					 bool enable);
83 static void si_fini_pg(struct radeon_device *rdev);
84 static void si_fini_cg(struct radeon_device *rdev);
85 static void si_rlc_stop(struct radeon_device *rdev);
86 
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89 	(0x8000 << 16) | (0x98f4 >> 2),
90 	0x00000000,
91 	(0x8040 << 16) | (0x98f4 >> 2),
92 	0x00000000,
93 	(0x8000 << 16) | (0xe80 >> 2),
94 	0x00000000,
95 	(0x8040 << 16) | (0xe80 >> 2),
96 	0x00000000,
97 	(0x8000 << 16) | (0x89bc >> 2),
98 	0x00000000,
99 	(0x8040 << 16) | (0x89bc >> 2),
100 	0x00000000,
101 	(0x8000 << 16) | (0x8c1c >> 2),
102 	0x00000000,
103 	(0x8040 << 16) | (0x8c1c >> 2),
104 	0x00000000,
105 	(0x9c00 << 16) | (0x98f0 >> 2),
106 	0x00000000,
107 	(0x9c00 << 16) | (0xe7c >> 2),
108 	0x00000000,
109 	(0x8000 << 16) | (0x9148 >> 2),
110 	0x00000000,
111 	(0x8040 << 16) | (0x9148 >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x9150 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0x897c >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x8d8c >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0xac54 >> 2),
120 	0X00000000,
121 	0x3,
122 	(0x9c00 << 16) | (0x98f8 >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0x9910 >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0x9914 >> 2),
127 	0x00000000,
128 	(0x9c00 << 16) | (0x9918 >> 2),
129 	0x00000000,
130 	(0x9c00 << 16) | (0x991c >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x9920 >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x9924 >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9928 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x992c >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x9930 >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x9934 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9938 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x993c >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x9940 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9944 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9948 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x994c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x9950 >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x9954 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9958 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x995c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9960 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9964 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9968 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x996c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9970 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9974 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9978 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x997c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9980 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9984 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9988 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x998c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x8c00 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x8c14 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x8c04 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x8c08 >> 2),
195 	0x00000000,
196 	(0x8000 << 16) | (0x9b7c >> 2),
197 	0x00000000,
198 	(0x8040 << 16) | (0x9b7c >> 2),
199 	0x00000000,
200 	(0x8000 << 16) | (0xe84 >> 2),
201 	0x00000000,
202 	(0x8040 << 16) | (0xe84 >> 2),
203 	0x00000000,
204 	(0x8000 << 16) | (0x89c0 >> 2),
205 	0x00000000,
206 	(0x8040 << 16) | (0x89c0 >> 2),
207 	0x00000000,
208 	(0x8000 << 16) | (0x914c >> 2),
209 	0x00000000,
210 	(0x8040 << 16) | (0x914c >> 2),
211 	0x00000000,
212 	(0x8000 << 16) | (0x8c20 >> 2),
213 	0x00000000,
214 	(0x8040 << 16) | (0x8c20 >> 2),
215 	0x00000000,
216 	(0x8000 << 16) | (0x9354 >> 2),
217 	0x00000000,
218 	(0x8040 << 16) | (0x9354 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9060 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9364 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9100 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x913c >> 2),
227 	0x00000000,
228 	(0x8000 << 16) | (0x90e0 >> 2),
229 	0x00000000,
230 	(0x8000 << 16) | (0x90e4 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x90e8 >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x90e0 >> 2),
235 	0x00000000,
236 	(0x8040 << 16) | (0x90e4 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0x90e8 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x8bcc >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8b24 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x88c4 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x8e50 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8c0c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x8e58 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x8e5c >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9508 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x950c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9494 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0xac0c >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0xac10 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0xac14 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0xae00 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0xac08 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x88d4 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x88c8 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x88cc >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x89b0 >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8b10 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x8a14 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x9830 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x9834 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x9838 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x9a10 >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0x9870 >> 2),
291 	0x00000000,
292 	(0x8000 << 16) | (0x9874 >> 2),
293 	0x00000000,
294 	(0x8001 << 16) | (0x9870 >> 2),
295 	0x00000000,
296 	(0x8001 << 16) | (0x9874 >> 2),
297 	0x00000000,
298 	(0x8040 << 16) | (0x9870 >> 2),
299 	0x00000000,
300 	(0x8040 << 16) | (0x9874 >> 2),
301 	0x00000000,
302 	(0x8041 << 16) | (0x9870 >> 2),
303 	0x00000000,
304 	(0x8041 << 16) | (0x9874 >> 2),
305 	0x00000000,
306 	0x00000000
307 };
308 
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311 	0xc424, 0xffffffff, 0x00601005,
312 	0xc47c, 0xffffffff, 0x10104040,
313 	0xc488, 0xffffffff, 0x0100000a,
314 	0xc314, 0xffffffff, 0x00000800,
315 	0xc30c, 0xffffffff, 0x800000f4,
316 	0xf4a8, 0xffffffff, 0x00000000
317 };
318 
319 static const u32 tahiti_golden_registers[] =
320 {
321 	0x9a10, 0x00010000, 0x00018208,
322 	0x9830, 0xffffffff, 0x00000000,
323 	0x9834, 0xf00fffff, 0x00000400,
324 	0x9838, 0x0002021c, 0x00020200,
325 	0xc78, 0x00000080, 0x00000000,
326 	0xd030, 0x000300c0, 0x00800040,
327 	0xd830, 0x000300c0, 0x00800040,
328 	0x5bb0, 0x000000f0, 0x00000070,
329 	0x5bc0, 0x00200000, 0x50100000,
330 	0x7030, 0x31000311, 0x00000011,
331 	0x277c, 0x00000003, 0x000007ff,
332 	0x240c, 0x000007ff, 0x00000000,
333 	0x8a14, 0xf000001f, 0x00000007,
334 	0x8b24, 0xffffffff, 0x00ffffff,
335 	0x8b10, 0x0000ff0f, 0x00000000,
336 	0x28a4c, 0x07ffffff, 0x4e000000,
337 	0x28350, 0x3f3f3fff, 0x2a00126a,
338 	0x30, 0x000000ff, 0x0040,
339 	0x34, 0x00000040, 0x00004040,
340 	0x9100, 0x07ffffff, 0x03000000,
341 	0x8e88, 0x01ff1f3f, 0x00000000,
342 	0x8e84, 0x01ff1f3f, 0x00000000,
343 	0x9060, 0x0000007f, 0x00000020,
344 	0x9508, 0x00010000, 0x00010000,
345 	0xac14, 0x00000200, 0x000002fb,
346 	0xac10, 0xffffffff, 0x0000543b,
347 	0xac0c, 0xffffffff, 0xa9210876,
348 	0x88d0, 0xffffffff, 0x000fff40,
349 	0x88d4, 0x0000001f, 0x00000010,
350 	0x1410, 0x20000000, 0x20fffed8,
351 	0x15c0, 0x000c0fc0, 0x000c0400
352 };
353 
354 static const u32 tahiti_golden_registers2[] =
355 {
356 	0xc64, 0x00000001, 0x00000001
357 };
358 
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361 	0xc424, 0xffffffff, 0x00601004,
362 	0xc47c, 0xffffffff, 0x10102020,
363 	0xc488, 0xffffffff, 0x01000020,
364 	0xc314, 0xffffffff, 0x00000800,
365 	0xc30c, 0xffffffff, 0x800000a4
366 };
367 
368 static const u32 pitcairn_golden_registers[] =
369 {
370 	0x9a10, 0x00010000, 0x00018208,
371 	0x9830, 0xffffffff, 0x00000000,
372 	0x9834, 0xf00fffff, 0x00000400,
373 	0x9838, 0x0002021c, 0x00020200,
374 	0xc78, 0x00000080, 0x00000000,
375 	0xd030, 0x000300c0, 0x00800040,
376 	0xd830, 0x000300c0, 0x00800040,
377 	0x5bb0, 0x000000f0, 0x00000070,
378 	0x5bc0, 0x00200000, 0x50100000,
379 	0x7030, 0x31000311, 0x00000011,
380 	0x2ae4, 0x00073ffe, 0x000022a2,
381 	0x240c, 0x000007ff, 0x00000000,
382 	0x8a14, 0xf000001f, 0x00000007,
383 	0x8b24, 0xffffffff, 0x00ffffff,
384 	0x8b10, 0x0000ff0f, 0x00000000,
385 	0x28a4c, 0x07ffffff, 0x4e000000,
386 	0x28350, 0x3f3f3fff, 0x2a00126a,
387 	0x30, 0x000000ff, 0x0040,
388 	0x34, 0x00000040, 0x00004040,
389 	0x9100, 0x07ffffff, 0x03000000,
390 	0x9060, 0x0000007f, 0x00000020,
391 	0x9508, 0x00010000, 0x00010000,
392 	0xac14, 0x000003ff, 0x000000f7,
393 	0xac10, 0xffffffff, 0x00000000,
394 	0xac0c, 0xffffffff, 0x32761054,
395 	0x88d4, 0x0000001f, 0x00000010,
396 	0x15c0, 0x000c0fc0, 0x000c0400
397 };
398 
399 static const u32 verde_golden_rlc_registers[] =
400 {
401 	0xc424, 0xffffffff, 0x033f1005,
402 	0xc47c, 0xffffffff, 0x10808020,
403 	0xc488, 0xffffffff, 0x00800008,
404 	0xc314, 0xffffffff, 0x00001000,
405 	0xc30c, 0xffffffff, 0x80010014
406 };
407 
408 static const u32 verde_golden_registers[] =
409 {
410 	0x9a10, 0x00010000, 0x00018208,
411 	0x9830, 0xffffffff, 0x00000000,
412 	0x9834, 0xf00fffff, 0x00000400,
413 	0x9838, 0x0002021c, 0x00020200,
414 	0xc78, 0x00000080, 0x00000000,
415 	0xd030, 0x000300c0, 0x00800040,
416 	0xd030, 0x000300c0, 0x00800040,
417 	0xd830, 0x000300c0, 0x00800040,
418 	0xd830, 0x000300c0, 0x00800040,
419 	0x5bb0, 0x000000f0, 0x00000070,
420 	0x5bc0, 0x00200000, 0x50100000,
421 	0x7030, 0x31000311, 0x00000011,
422 	0x2ae4, 0x00073ffe, 0x000022a2,
423 	0x2ae4, 0x00073ffe, 0x000022a2,
424 	0x2ae4, 0x00073ffe, 0x000022a2,
425 	0x240c, 0x000007ff, 0x00000000,
426 	0x240c, 0x000007ff, 0x00000000,
427 	0x240c, 0x000007ff, 0x00000000,
428 	0x8a14, 0xf000001f, 0x00000007,
429 	0x8a14, 0xf000001f, 0x00000007,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8b24, 0xffffffff, 0x00ffffff,
432 	0x8b10, 0x0000ff0f, 0x00000000,
433 	0x28a4c, 0x07ffffff, 0x4e000000,
434 	0x28350, 0x3f3f3fff, 0x0000124a,
435 	0x28350, 0x3f3f3fff, 0x0000124a,
436 	0x28350, 0x3f3f3fff, 0x0000124a,
437 	0x30, 0x000000ff, 0x0040,
438 	0x34, 0x00000040, 0x00004040,
439 	0x9100, 0x07ffffff, 0x03000000,
440 	0x9100, 0x07ffffff, 0x03000000,
441 	0x8e88, 0x01ff1f3f, 0x00000000,
442 	0x8e88, 0x01ff1f3f, 0x00000000,
443 	0x8e88, 0x01ff1f3f, 0x00000000,
444 	0x8e84, 0x01ff1f3f, 0x00000000,
445 	0x8e84, 0x01ff1f3f, 0x00000000,
446 	0x8e84, 0x01ff1f3f, 0x00000000,
447 	0x9060, 0x0000007f, 0x00000020,
448 	0x9508, 0x00010000, 0x00010000,
449 	0xac14, 0x000003ff, 0x00000003,
450 	0xac14, 0x000003ff, 0x00000003,
451 	0xac14, 0x000003ff, 0x00000003,
452 	0xac10, 0xffffffff, 0x00000000,
453 	0xac10, 0xffffffff, 0x00000000,
454 	0xac10, 0xffffffff, 0x00000000,
455 	0xac0c, 0xffffffff, 0x00001032,
456 	0xac0c, 0xffffffff, 0x00001032,
457 	0xac0c, 0xffffffff, 0x00001032,
458 	0x88d4, 0x0000001f, 0x00000010,
459 	0x88d4, 0x0000001f, 0x00000010,
460 	0x88d4, 0x0000001f, 0x00000010,
461 	0x15c0, 0x000c0fc0, 0x000c0400
462 };
463 
464 static const u32 oland_golden_rlc_registers[] =
465 {
466 	0xc424, 0xffffffff, 0x00601005,
467 	0xc47c, 0xffffffff, 0x10104040,
468 	0xc488, 0xffffffff, 0x0100000a,
469 	0xc314, 0xffffffff, 0x00000800,
470 	0xc30c, 0xffffffff, 0x800000f4
471 };
472 
473 static const u32 oland_golden_registers[] =
474 {
475 	0x9a10, 0x00010000, 0x00018208,
476 	0x9830, 0xffffffff, 0x00000000,
477 	0x9834, 0xf00fffff, 0x00000400,
478 	0x9838, 0x0002021c, 0x00020200,
479 	0xc78, 0x00000080, 0x00000000,
480 	0xd030, 0x000300c0, 0x00800040,
481 	0xd830, 0x000300c0, 0x00800040,
482 	0x5bb0, 0x000000f0, 0x00000070,
483 	0x5bc0, 0x00200000, 0x50100000,
484 	0x7030, 0x31000311, 0x00000011,
485 	0x2ae4, 0x00073ffe, 0x000022a2,
486 	0x240c, 0x000007ff, 0x00000000,
487 	0x8a14, 0xf000001f, 0x00000007,
488 	0x8b24, 0xffffffff, 0x00ffffff,
489 	0x8b10, 0x0000ff0f, 0x00000000,
490 	0x28a4c, 0x07ffffff, 0x4e000000,
491 	0x28350, 0x3f3f3fff, 0x00000082,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9060, 0x0000007f, 0x00000020,
496 	0x9508, 0x00010000, 0x00010000,
497 	0xac14, 0x000003ff, 0x000000f3,
498 	0xac10, 0xffffffff, 0x00000000,
499 	0xac0c, 0xffffffff, 0x00003210,
500 	0x88d4, 0x0000001f, 0x00000010,
501 	0x15c0, 0x000c0fc0, 0x000c0400
502 };
503 
504 static const u32 hainan_golden_registers[] =
505 {
506 	0x9a10, 0x00010000, 0x00018208,
507 	0x9830, 0xffffffff, 0x00000000,
508 	0x9834, 0xf00fffff, 0x00000400,
509 	0x9838, 0x0002021c, 0x00020200,
510 	0xd0c0, 0xff000fff, 0x00000100,
511 	0xd030, 0x000300c0, 0x00800040,
512 	0xd8c0, 0xff000fff, 0x00000100,
513 	0xd830, 0x000300c0, 0x00800040,
514 	0x2ae4, 0x00073ffe, 0x000022a2,
515 	0x240c, 0x000007ff, 0x00000000,
516 	0x8a14, 0xf000001f, 0x00000007,
517 	0x8b24, 0xffffffff, 0x00ffffff,
518 	0x8b10, 0x0000ff0f, 0x00000000,
519 	0x28a4c, 0x07ffffff, 0x4e000000,
520 	0x28350, 0x3f3f3fff, 0x00000000,
521 	0x30, 0x000000ff, 0x0040,
522 	0x34, 0x00000040, 0x00004040,
523 	0x9100, 0x03e00000, 0x03600000,
524 	0x9060, 0x0000007f, 0x00000020,
525 	0x9508, 0x00010000, 0x00010000,
526 	0xac14, 0x000003ff, 0x000000f1,
527 	0xac10, 0xffffffff, 0x00000000,
528 	0xac0c, 0xffffffff, 0x00003210,
529 	0x88d4, 0x0000001f, 0x00000010,
530 	0x15c0, 0x000c0fc0, 0x000c0400
531 };
532 
533 static const u32 hainan_golden_registers2[] =
534 {
535 	0x98f8, 0xffffffff, 0x02010001
536 };
537 
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540 	0xc400, 0xffffffff, 0xfffffffc,
541 	0x802c, 0xffffffff, 0xe0000000,
542 	0x9a60, 0xffffffff, 0x00000100,
543 	0x92a4, 0xffffffff, 0x00000100,
544 	0xc164, 0xffffffff, 0x00000100,
545 	0x9774, 0xffffffff, 0x00000100,
546 	0x8984, 0xffffffff, 0x06000100,
547 	0x8a18, 0xffffffff, 0x00000100,
548 	0x92a0, 0xffffffff, 0x00000100,
549 	0xc380, 0xffffffff, 0x00000100,
550 	0x8b28, 0xffffffff, 0x00000100,
551 	0x9144, 0xffffffff, 0x00000100,
552 	0x8d88, 0xffffffff, 0x00000100,
553 	0x8d8c, 0xffffffff, 0x00000100,
554 	0x9030, 0xffffffff, 0x00000100,
555 	0x9034, 0xffffffff, 0x00000100,
556 	0x9038, 0xffffffff, 0x00000100,
557 	0x903c, 0xffffffff, 0x00000100,
558 	0xad80, 0xffffffff, 0x00000100,
559 	0xac54, 0xffffffff, 0x00000100,
560 	0x897c, 0xffffffff, 0x06000100,
561 	0x9868, 0xffffffff, 0x00000100,
562 	0x9510, 0xffffffff, 0x00000100,
563 	0xaf04, 0xffffffff, 0x00000100,
564 	0xae04, 0xffffffff, 0x00000100,
565 	0x949c, 0xffffffff, 0x00000100,
566 	0x802c, 0xffffffff, 0xe0000000,
567 	0x9160, 0xffffffff, 0x00010000,
568 	0x9164, 0xffffffff, 0x00030002,
569 	0x9168, 0xffffffff, 0x00040007,
570 	0x916c, 0xffffffff, 0x00060005,
571 	0x9170, 0xffffffff, 0x00090008,
572 	0x9174, 0xffffffff, 0x00020001,
573 	0x9178, 0xffffffff, 0x00040003,
574 	0x917c, 0xffffffff, 0x00000007,
575 	0x9180, 0xffffffff, 0x00060005,
576 	0x9184, 0xffffffff, 0x00090008,
577 	0x9188, 0xffffffff, 0x00030002,
578 	0x918c, 0xffffffff, 0x00050004,
579 	0x9190, 0xffffffff, 0x00000008,
580 	0x9194, 0xffffffff, 0x00070006,
581 	0x9198, 0xffffffff, 0x000a0009,
582 	0x919c, 0xffffffff, 0x00040003,
583 	0x91a0, 0xffffffff, 0x00060005,
584 	0x91a4, 0xffffffff, 0x00000009,
585 	0x91a8, 0xffffffff, 0x00080007,
586 	0x91ac, 0xffffffff, 0x000b000a,
587 	0x91b0, 0xffffffff, 0x00050004,
588 	0x91b4, 0xffffffff, 0x00070006,
589 	0x91b8, 0xffffffff, 0x0008000b,
590 	0x91bc, 0xffffffff, 0x000a0009,
591 	0x91c0, 0xffffffff, 0x000d000c,
592 	0x91c4, 0xffffffff, 0x00060005,
593 	0x91c8, 0xffffffff, 0x00080007,
594 	0x91cc, 0xffffffff, 0x0000000b,
595 	0x91d0, 0xffffffff, 0x000a0009,
596 	0x91d4, 0xffffffff, 0x000d000c,
597 	0x91d8, 0xffffffff, 0x00070006,
598 	0x91dc, 0xffffffff, 0x00090008,
599 	0x91e0, 0xffffffff, 0x0000000c,
600 	0x91e4, 0xffffffff, 0x000b000a,
601 	0x91e8, 0xffffffff, 0x000e000d,
602 	0x91ec, 0xffffffff, 0x00080007,
603 	0x91f0, 0xffffffff, 0x000a0009,
604 	0x91f4, 0xffffffff, 0x0000000d,
605 	0x91f8, 0xffffffff, 0x000c000b,
606 	0x91fc, 0xffffffff, 0x000f000e,
607 	0x9200, 0xffffffff, 0x00090008,
608 	0x9204, 0xffffffff, 0x000b000a,
609 	0x9208, 0xffffffff, 0x000c000f,
610 	0x920c, 0xffffffff, 0x000e000d,
611 	0x9210, 0xffffffff, 0x00110010,
612 	0x9214, 0xffffffff, 0x000a0009,
613 	0x9218, 0xffffffff, 0x000c000b,
614 	0x921c, 0xffffffff, 0x0000000f,
615 	0x9220, 0xffffffff, 0x000e000d,
616 	0x9224, 0xffffffff, 0x00110010,
617 	0x9228, 0xffffffff, 0x000b000a,
618 	0x922c, 0xffffffff, 0x000d000c,
619 	0x9230, 0xffffffff, 0x00000010,
620 	0x9234, 0xffffffff, 0x000f000e,
621 	0x9238, 0xffffffff, 0x00120011,
622 	0x923c, 0xffffffff, 0x000c000b,
623 	0x9240, 0xffffffff, 0x000e000d,
624 	0x9244, 0xffffffff, 0x00000011,
625 	0x9248, 0xffffffff, 0x0010000f,
626 	0x924c, 0xffffffff, 0x00130012,
627 	0x9250, 0xffffffff, 0x000d000c,
628 	0x9254, 0xffffffff, 0x000f000e,
629 	0x9258, 0xffffffff, 0x00100013,
630 	0x925c, 0xffffffff, 0x00120011,
631 	0x9260, 0xffffffff, 0x00150014,
632 	0x9264, 0xffffffff, 0x000e000d,
633 	0x9268, 0xffffffff, 0x0010000f,
634 	0x926c, 0xffffffff, 0x00000013,
635 	0x9270, 0xffffffff, 0x00120011,
636 	0x9274, 0xffffffff, 0x00150014,
637 	0x9278, 0xffffffff, 0x000f000e,
638 	0x927c, 0xffffffff, 0x00110010,
639 	0x9280, 0xffffffff, 0x00000014,
640 	0x9284, 0xffffffff, 0x00130012,
641 	0x9288, 0xffffffff, 0x00160015,
642 	0x928c, 0xffffffff, 0x0010000f,
643 	0x9290, 0xffffffff, 0x00120011,
644 	0x9294, 0xffffffff, 0x00000015,
645 	0x9298, 0xffffffff, 0x00140013,
646 	0x929c, 0xffffffff, 0x00170016,
647 	0x9150, 0xffffffff, 0x96940200,
648 	0x8708, 0xffffffff, 0x00900100,
649 	0xc478, 0xffffffff, 0x00000080,
650 	0xc404, 0xffffffff, 0x0020003f,
651 	0x30, 0xffffffff, 0x0000001c,
652 	0x34, 0x000f0000, 0x000f0000,
653 	0x160c, 0xffffffff, 0x00000100,
654 	0x1024, 0xffffffff, 0x00000100,
655 	0x102c, 0x00000101, 0x00000000,
656 	0x20a8, 0xffffffff, 0x00000104,
657 	0x264c, 0x000c0000, 0x000c0000,
658 	0x2648, 0x000c0000, 0x000c0000,
659 	0x55e4, 0xff000fff, 0x00000100,
660 	0x55e8, 0x00000001, 0x00000001,
661 	0x2f50, 0x00000001, 0x00000001,
662 	0x30cc, 0xc0000fff, 0x00000104,
663 	0xc1e4, 0x00000001, 0x00000001,
664 	0xd0c0, 0xfffffff0, 0x00000100,
665 	0xd8c0, 0xfffffff0, 0x00000100
666 };
667 
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670 	0xc400, 0xffffffff, 0xfffffffc,
671 	0x802c, 0xffffffff, 0xe0000000,
672 	0x9a60, 0xffffffff, 0x00000100,
673 	0x92a4, 0xffffffff, 0x00000100,
674 	0xc164, 0xffffffff, 0x00000100,
675 	0x9774, 0xffffffff, 0x00000100,
676 	0x8984, 0xffffffff, 0x06000100,
677 	0x8a18, 0xffffffff, 0x00000100,
678 	0x92a0, 0xffffffff, 0x00000100,
679 	0xc380, 0xffffffff, 0x00000100,
680 	0x8b28, 0xffffffff, 0x00000100,
681 	0x9144, 0xffffffff, 0x00000100,
682 	0x8d88, 0xffffffff, 0x00000100,
683 	0x8d8c, 0xffffffff, 0x00000100,
684 	0x9030, 0xffffffff, 0x00000100,
685 	0x9034, 0xffffffff, 0x00000100,
686 	0x9038, 0xffffffff, 0x00000100,
687 	0x903c, 0xffffffff, 0x00000100,
688 	0xad80, 0xffffffff, 0x00000100,
689 	0xac54, 0xffffffff, 0x00000100,
690 	0x897c, 0xffffffff, 0x06000100,
691 	0x9868, 0xffffffff, 0x00000100,
692 	0x9510, 0xffffffff, 0x00000100,
693 	0xaf04, 0xffffffff, 0x00000100,
694 	0xae04, 0xffffffff, 0x00000100,
695 	0x949c, 0xffffffff, 0x00000100,
696 	0x802c, 0xffffffff, 0xe0000000,
697 	0x9160, 0xffffffff, 0x00010000,
698 	0x9164, 0xffffffff, 0x00030002,
699 	0x9168, 0xffffffff, 0x00040007,
700 	0x916c, 0xffffffff, 0x00060005,
701 	0x9170, 0xffffffff, 0x00090008,
702 	0x9174, 0xffffffff, 0x00020001,
703 	0x9178, 0xffffffff, 0x00040003,
704 	0x917c, 0xffffffff, 0x00000007,
705 	0x9180, 0xffffffff, 0x00060005,
706 	0x9184, 0xffffffff, 0x00090008,
707 	0x9188, 0xffffffff, 0x00030002,
708 	0x918c, 0xffffffff, 0x00050004,
709 	0x9190, 0xffffffff, 0x00000008,
710 	0x9194, 0xffffffff, 0x00070006,
711 	0x9198, 0xffffffff, 0x000a0009,
712 	0x919c, 0xffffffff, 0x00040003,
713 	0x91a0, 0xffffffff, 0x00060005,
714 	0x91a4, 0xffffffff, 0x00000009,
715 	0x91a8, 0xffffffff, 0x00080007,
716 	0x91ac, 0xffffffff, 0x000b000a,
717 	0x91b0, 0xffffffff, 0x00050004,
718 	0x91b4, 0xffffffff, 0x00070006,
719 	0x91b8, 0xffffffff, 0x0008000b,
720 	0x91bc, 0xffffffff, 0x000a0009,
721 	0x91c0, 0xffffffff, 0x000d000c,
722 	0x9200, 0xffffffff, 0x00090008,
723 	0x9204, 0xffffffff, 0x000b000a,
724 	0x9208, 0xffffffff, 0x000c000f,
725 	0x920c, 0xffffffff, 0x000e000d,
726 	0x9210, 0xffffffff, 0x00110010,
727 	0x9214, 0xffffffff, 0x000a0009,
728 	0x9218, 0xffffffff, 0x000c000b,
729 	0x921c, 0xffffffff, 0x0000000f,
730 	0x9220, 0xffffffff, 0x000e000d,
731 	0x9224, 0xffffffff, 0x00110010,
732 	0x9228, 0xffffffff, 0x000b000a,
733 	0x922c, 0xffffffff, 0x000d000c,
734 	0x9230, 0xffffffff, 0x00000010,
735 	0x9234, 0xffffffff, 0x000f000e,
736 	0x9238, 0xffffffff, 0x00120011,
737 	0x923c, 0xffffffff, 0x000c000b,
738 	0x9240, 0xffffffff, 0x000e000d,
739 	0x9244, 0xffffffff, 0x00000011,
740 	0x9248, 0xffffffff, 0x0010000f,
741 	0x924c, 0xffffffff, 0x00130012,
742 	0x9250, 0xffffffff, 0x000d000c,
743 	0x9254, 0xffffffff, 0x000f000e,
744 	0x9258, 0xffffffff, 0x00100013,
745 	0x925c, 0xffffffff, 0x00120011,
746 	0x9260, 0xffffffff, 0x00150014,
747 	0x9150, 0xffffffff, 0x96940200,
748 	0x8708, 0xffffffff, 0x00900100,
749 	0xc478, 0xffffffff, 0x00000080,
750 	0xc404, 0xffffffff, 0x0020003f,
751 	0x30, 0xffffffff, 0x0000001c,
752 	0x34, 0x000f0000, 0x000f0000,
753 	0x160c, 0xffffffff, 0x00000100,
754 	0x1024, 0xffffffff, 0x00000100,
755 	0x102c, 0x00000101, 0x00000000,
756 	0x20a8, 0xffffffff, 0x00000104,
757 	0x55e4, 0xff000fff, 0x00000100,
758 	0x55e8, 0x00000001, 0x00000001,
759 	0x2f50, 0x00000001, 0x00000001,
760 	0x30cc, 0xc0000fff, 0x00000104,
761 	0xc1e4, 0x00000001, 0x00000001,
762 	0xd0c0, 0xfffffff0, 0x00000100,
763 	0xd8c0, 0xfffffff0, 0x00000100
764 };
765 
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768 	0xc400, 0xffffffff, 0xfffffffc,
769 	0x802c, 0xffffffff, 0xe0000000,
770 	0x9a60, 0xffffffff, 0x00000100,
771 	0x92a4, 0xffffffff, 0x00000100,
772 	0xc164, 0xffffffff, 0x00000100,
773 	0x9774, 0xffffffff, 0x00000100,
774 	0x8984, 0xffffffff, 0x06000100,
775 	0x8a18, 0xffffffff, 0x00000100,
776 	0x92a0, 0xffffffff, 0x00000100,
777 	0xc380, 0xffffffff, 0x00000100,
778 	0x8b28, 0xffffffff, 0x00000100,
779 	0x9144, 0xffffffff, 0x00000100,
780 	0x8d88, 0xffffffff, 0x00000100,
781 	0x8d8c, 0xffffffff, 0x00000100,
782 	0x9030, 0xffffffff, 0x00000100,
783 	0x9034, 0xffffffff, 0x00000100,
784 	0x9038, 0xffffffff, 0x00000100,
785 	0x903c, 0xffffffff, 0x00000100,
786 	0xad80, 0xffffffff, 0x00000100,
787 	0xac54, 0xffffffff, 0x00000100,
788 	0x897c, 0xffffffff, 0x06000100,
789 	0x9868, 0xffffffff, 0x00000100,
790 	0x9510, 0xffffffff, 0x00000100,
791 	0xaf04, 0xffffffff, 0x00000100,
792 	0xae04, 0xffffffff, 0x00000100,
793 	0x949c, 0xffffffff, 0x00000100,
794 	0x802c, 0xffffffff, 0xe0000000,
795 	0x9160, 0xffffffff, 0x00010000,
796 	0x9164, 0xffffffff, 0x00030002,
797 	0x9168, 0xffffffff, 0x00040007,
798 	0x916c, 0xffffffff, 0x00060005,
799 	0x9170, 0xffffffff, 0x00090008,
800 	0x9174, 0xffffffff, 0x00020001,
801 	0x9178, 0xffffffff, 0x00040003,
802 	0x917c, 0xffffffff, 0x00000007,
803 	0x9180, 0xffffffff, 0x00060005,
804 	0x9184, 0xffffffff, 0x00090008,
805 	0x9188, 0xffffffff, 0x00030002,
806 	0x918c, 0xffffffff, 0x00050004,
807 	0x9190, 0xffffffff, 0x00000008,
808 	0x9194, 0xffffffff, 0x00070006,
809 	0x9198, 0xffffffff, 0x000a0009,
810 	0x919c, 0xffffffff, 0x00040003,
811 	0x91a0, 0xffffffff, 0x00060005,
812 	0x91a4, 0xffffffff, 0x00000009,
813 	0x91a8, 0xffffffff, 0x00080007,
814 	0x91ac, 0xffffffff, 0x000b000a,
815 	0x91b0, 0xffffffff, 0x00050004,
816 	0x91b4, 0xffffffff, 0x00070006,
817 	0x91b8, 0xffffffff, 0x0008000b,
818 	0x91bc, 0xffffffff, 0x000a0009,
819 	0x91c0, 0xffffffff, 0x000d000c,
820 	0x9200, 0xffffffff, 0x00090008,
821 	0x9204, 0xffffffff, 0x000b000a,
822 	0x9208, 0xffffffff, 0x000c000f,
823 	0x920c, 0xffffffff, 0x000e000d,
824 	0x9210, 0xffffffff, 0x00110010,
825 	0x9214, 0xffffffff, 0x000a0009,
826 	0x9218, 0xffffffff, 0x000c000b,
827 	0x921c, 0xffffffff, 0x0000000f,
828 	0x9220, 0xffffffff, 0x000e000d,
829 	0x9224, 0xffffffff, 0x00110010,
830 	0x9228, 0xffffffff, 0x000b000a,
831 	0x922c, 0xffffffff, 0x000d000c,
832 	0x9230, 0xffffffff, 0x00000010,
833 	0x9234, 0xffffffff, 0x000f000e,
834 	0x9238, 0xffffffff, 0x00120011,
835 	0x923c, 0xffffffff, 0x000c000b,
836 	0x9240, 0xffffffff, 0x000e000d,
837 	0x9244, 0xffffffff, 0x00000011,
838 	0x9248, 0xffffffff, 0x0010000f,
839 	0x924c, 0xffffffff, 0x00130012,
840 	0x9250, 0xffffffff, 0x000d000c,
841 	0x9254, 0xffffffff, 0x000f000e,
842 	0x9258, 0xffffffff, 0x00100013,
843 	0x925c, 0xffffffff, 0x00120011,
844 	0x9260, 0xffffffff, 0x00150014,
845 	0x9150, 0xffffffff, 0x96940200,
846 	0x8708, 0xffffffff, 0x00900100,
847 	0xc478, 0xffffffff, 0x00000080,
848 	0xc404, 0xffffffff, 0x0020003f,
849 	0x30, 0xffffffff, 0x0000001c,
850 	0x34, 0x000f0000, 0x000f0000,
851 	0x160c, 0xffffffff, 0x00000100,
852 	0x1024, 0xffffffff, 0x00000100,
853 	0x102c, 0x00000101, 0x00000000,
854 	0x20a8, 0xffffffff, 0x00000104,
855 	0x264c, 0x000c0000, 0x000c0000,
856 	0x2648, 0x000c0000, 0x000c0000,
857 	0x55e4, 0xff000fff, 0x00000100,
858 	0x55e8, 0x00000001, 0x00000001,
859 	0x2f50, 0x00000001, 0x00000001,
860 	0x30cc, 0xc0000fff, 0x00000104,
861 	0xc1e4, 0x00000001, 0x00000001,
862 	0xd0c0, 0xfffffff0, 0x00000100,
863 	0xd8c0, 0xfffffff0, 0x00000100
864 };
865 
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868 	0xc400, 0xffffffff, 0xfffffffc,
869 	0x802c, 0xffffffff, 0xe0000000,
870 	0x9a60, 0xffffffff, 0x00000100,
871 	0x92a4, 0xffffffff, 0x00000100,
872 	0xc164, 0xffffffff, 0x00000100,
873 	0x9774, 0xffffffff, 0x00000100,
874 	0x8984, 0xffffffff, 0x06000100,
875 	0x8a18, 0xffffffff, 0x00000100,
876 	0x92a0, 0xffffffff, 0x00000100,
877 	0xc380, 0xffffffff, 0x00000100,
878 	0x8b28, 0xffffffff, 0x00000100,
879 	0x9144, 0xffffffff, 0x00000100,
880 	0x8d88, 0xffffffff, 0x00000100,
881 	0x8d8c, 0xffffffff, 0x00000100,
882 	0x9030, 0xffffffff, 0x00000100,
883 	0x9034, 0xffffffff, 0x00000100,
884 	0x9038, 0xffffffff, 0x00000100,
885 	0x903c, 0xffffffff, 0x00000100,
886 	0xad80, 0xffffffff, 0x00000100,
887 	0xac54, 0xffffffff, 0x00000100,
888 	0x897c, 0xffffffff, 0x06000100,
889 	0x9868, 0xffffffff, 0x00000100,
890 	0x9510, 0xffffffff, 0x00000100,
891 	0xaf04, 0xffffffff, 0x00000100,
892 	0xae04, 0xffffffff, 0x00000100,
893 	0x949c, 0xffffffff, 0x00000100,
894 	0x802c, 0xffffffff, 0xe0000000,
895 	0x9160, 0xffffffff, 0x00010000,
896 	0x9164, 0xffffffff, 0x00030002,
897 	0x9168, 0xffffffff, 0x00040007,
898 	0x916c, 0xffffffff, 0x00060005,
899 	0x9170, 0xffffffff, 0x00090008,
900 	0x9174, 0xffffffff, 0x00020001,
901 	0x9178, 0xffffffff, 0x00040003,
902 	0x917c, 0xffffffff, 0x00000007,
903 	0x9180, 0xffffffff, 0x00060005,
904 	0x9184, 0xffffffff, 0x00090008,
905 	0x9188, 0xffffffff, 0x00030002,
906 	0x918c, 0xffffffff, 0x00050004,
907 	0x9190, 0xffffffff, 0x00000008,
908 	0x9194, 0xffffffff, 0x00070006,
909 	0x9198, 0xffffffff, 0x000a0009,
910 	0x919c, 0xffffffff, 0x00040003,
911 	0x91a0, 0xffffffff, 0x00060005,
912 	0x91a4, 0xffffffff, 0x00000009,
913 	0x91a8, 0xffffffff, 0x00080007,
914 	0x91ac, 0xffffffff, 0x000b000a,
915 	0x91b0, 0xffffffff, 0x00050004,
916 	0x91b4, 0xffffffff, 0x00070006,
917 	0x91b8, 0xffffffff, 0x0008000b,
918 	0x91bc, 0xffffffff, 0x000a0009,
919 	0x91c0, 0xffffffff, 0x000d000c,
920 	0x91c4, 0xffffffff, 0x00060005,
921 	0x91c8, 0xffffffff, 0x00080007,
922 	0x91cc, 0xffffffff, 0x0000000b,
923 	0x91d0, 0xffffffff, 0x000a0009,
924 	0x91d4, 0xffffffff, 0x000d000c,
925 	0x9150, 0xffffffff, 0x96940200,
926 	0x8708, 0xffffffff, 0x00900100,
927 	0xc478, 0xffffffff, 0x00000080,
928 	0xc404, 0xffffffff, 0x0020003f,
929 	0x30, 0xffffffff, 0x0000001c,
930 	0x34, 0x000f0000, 0x000f0000,
931 	0x160c, 0xffffffff, 0x00000100,
932 	0x1024, 0xffffffff, 0x00000100,
933 	0x102c, 0x00000101, 0x00000000,
934 	0x20a8, 0xffffffff, 0x00000104,
935 	0x264c, 0x000c0000, 0x000c0000,
936 	0x2648, 0x000c0000, 0x000c0000,
937 	0x55e4, 0xff000fff, 0x00000100,
938 	0x55e8, 0x00000001, 0x00000001,
939 	0x2f50, 0x00000001, 0x00000001,
940 	0x30cc, 0xc0000fff, 0x00000104,
941 	0xc1e4, 0x00000001, 0x00000001,
942 	0xd0c0, 0xfffffff0, 0x00000100,
943 	0xd8c0, 0xfffffff0, 0x00000100
944 };
945 
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948 	0xc400, 0xffffffff, 0xfffffffc,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9a60, 0xffffffff, 0x00000100,
951 	0x92a4, 0xffffffff, 0x00000100,
952 	0xc164, 0xffffffff, 0x00000100,
953 	0x9774, 0xffffffff, 0x00000100,
954 	0x8984, 0xffffffff, 0x06000100,
955 	0x8a18, 0xffffffff, 0x00000100,
956 	0x92a0, 0xffffffff, 0x00000100,
957 	0xc380, 0xffffffff, 0x00000100,
958 	0x8b28, 0xffffffff, 0x00000100,
959 	0x9144, 0xffffffff, 0x00000100,
960 	0x8d88, 0xffffffff, 0x00000100,
961 	0x8d8c, 0xffffffff, 0x00000100,
962 	0x9030, 0xffffffff, 0x00000100,
963 	0x9034, 0xffffffff, 0x00000100,
964 	0x9038, 0xffffffff, 0x00000100,
965 	0x903c, 0xffffffff, 0x00000100,
966 	0xad80, 0xffffffff, 0x00000100,
967 	0xac54, 0xffffffff, 0x00000100,
968 	0x897c, 0xffffffff, 0x06000100,
969 	0x9868, 0xffffffff, 0x00000100,
970 	0x9510, 0xffffffff, 0x00000100,
971 	0xaf04, 0xffffffff, 0x00000100,
972 	0xae04, 0xffffffff, 0x00000100,
973 	0x949c, 0xffffffff, 0x00000100,
974 	0x802c, 0xffffffff, 0xe0000000,
975 	0x9160, 0xffffffff, 0x00010000,
976 	0x9164, 0xffffffff, 0x00030002,
977 	0x9168, 0xffffffff, 0x00040007,
978 	0x916c, 0xffffffff, 0x00060005,
979 	0x9170, 0xffffffff, 0x00090008,
980 	0x9174, 0xffffffff, 0x00020001,
981 	0x9178, 0xffffffff, 0x00040003,
982 	0x917c, 0xffffffff, 0x00000007,
983 	0x9180, 0xffffffff, 0x00060005,
984 	0x9184, 0xffffffff, 0x00090008,
985 	0x9188, 0xffffffff, 0x00030002,
986 	0x918c, 0xffffffff, 0x00050004,
987 	0x9190, 0xffffffff, 0x00000008,
988 	0x9194, 0xffffffff, 0x00070006,
989 	0x9198, 0xffffffff, 0x000a0009,
990 	0x919c, 0xffffffff, 0x00040003,
991 	0x91a0, 0xffffffff, 0x00060005,
992 	0x91a4, 0xffffffff, 0x00000009,
993 	0x91a8, 0xffffffff, 0x00080007,
994 	0x91ac, 0xffffffff, 0x000b000a,
995 	0x91b0, 0xffffffff, 0x00050004,
996 	0x91b4, 0xffffffff, 0x00070006,
997 	0x91b8, 0xffffffff, 0x0008000b,
998 	0x91bc, 0xffffffff, 0x000a0009,
999 	0x91c0, 0xffffffff, 0x000d000c,
1000 	0x91c4, 0xffffffff, 0x00060005,
1001 	0x91c8, 0xffffffff, 0x00080007,
1002 	0x91cc, 0xffffffff, 0x0000000b,
1003 	0x91d0, 0xffffffff, 0x000a0009,
1004 	0x91d4, 0xffffffff, 0x000d000c,
1005 	0x9150, 0xffffffff, 0x96940200,
1006 	0x8708, 0xffffffff, 0x00900100,
1007 	0xc478, 0xffffffff, 0x00000080,
1008 	0xc404, 0xffffffff, 0x0020003f,
1009 	0x30, 0xffffffff, 0x0000001c,
1010 	0x34, 0x000f0000, 0x000f0000,
1011 	0x160c, 0xffffffff, 0x00000100,
1012 	0x1024, 0xffffffff, 0x00000100,
1013 	0x20a8, 0xffffffff, 0x00000104,
1014 	0x264c, 0x000c0000, 0x000c0000,
1015 	0x2648, 0x000c0000, 0x000c0000,
1016 	0x2f50, 0x00000001, 0x00000001,
1017 	0x30cc, 0xc0000fff, 0x00000104,
1018 	0xc1e4, 0x00000001, 0x00000001,
1019 	0xd0c0, 0xfffffff0, 0x00000100,
1020 	0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022 
1023 static u32 verde_pg_init[] =
1024 {
1025 	0x353c, 0xffffffff, 0x40000,
1026 	0x3538, 0xffffffff, 0x200010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x7007,
1033 	0x3538, 0xffffffff, 0x300010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x400000,
1040 	0x3538, 0xffffffff, 0x100010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x120200,
1047 	0x3538, 0xffffffff, 0x500010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x1e1e16,
1054 	0x3538, 0xffffffff, 0x600010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x171f1e,
1061 	0x3538, 0xffffffff, 0x700010ff,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x3538, 0xffffffff, 0x9ff,
1069 	0x3500, 0xffffffff, 0x0,
1070 	0x3504, 0xffffffff, 0x10000800,
1071 	0x3504, 0xffffffff, 0xf,
1072 	0x3504, 0xffffffff, 0xf,
1073 	0x3500, 0xffffffff, 0x4,
1074 	0x3504, 0xffffffff, 0x1000051e,
1075 	0x3504, 0xffffffff, 0xffff,
1076 	0x3504, 0xffffffff, 0xffff,
1077 	0x3500, 0xffffffff, 0x8,
1078 	0x3504, 0xffffffff, 0x80500,
1079 	0x3500, 0xffffffff, 0x12,
1080 	0x3504, 0xffffffff, 0x9050c,
1081 	0x3500, 0xffffffff, 0x1d,
1082 	0x3504, 0xffffffff, 0xb052c,
1083 	0x3500, 0xffffffff, 0x2a,
1084 	0x3504, 0xffffffff, 0x1053e,
1085 	0x3500, 0xffffffff, 0x2d,
1086 	0x3504, 0xffffffff, 0x10546,
1087 	0x3500, 0xffffffff, 0x30,
1088 	0x3504, 0xffffffff, 0xa054e,
1089 	0x3500, 0xffffffff, 0x3c,
1090 	0x3504, 0xffffffff, 0x1055f,
1091 	0x3500, 0xffffffff, 0x3f,
1092 	0x3504, 0xffffffff, 0x10567,
1093 	0x3500, 0xffffffff, 0x42,
1094 	0x3504, 0xffffffff, 0x1056f,
1095 	0x3500, 0xffffffff, 0x45,
1096 	0x3504, 0xffffffff, 0x10572,
1097 	0x3500, 0xffffffff, 0x48,
1098 	0x3504, 0xffffffff, 0x20575,
1099 	0x3500, 0xffffffff, 0x4c,
1100 	0x3504, 0xffffffff, 0x190801,
1101 	0x3500, 0xffffffff, 0x67,
1102 	0x3504, 0xffffffff, 0x1082a,
1103 	0x3500, 0xffffffff, 0x6a,
1104 	0x3504, 0xffffffff, 0x1b082d,
1105 	0x3500, 0xffffffff, 0x87,
1106 	0x3504, 0xffffffff, 0x310851,
1107 	0x3500, 0xffffffff, 0xba,
1108 	0x3504, 0xffffffff, 0x891,
1109 	0x3500, 0xffffffff, 0xbc,
1110 	0x3504, 0xffffffff, 0x893,
1111 	0x3500, 0xffffffff, 0xbe,
1112 	0x3504, 0xffffffff, 0x20895,
1113 	0x3500, 0xffffffff, 0xc2,
1114 	0x3504, 0xffffffff, 0x20899,
1115 	0x3500, 0xffffffff, 0xc6,
1116 	0x3504, 0xffffffff, 0x2089d,
1117 	0x3500, 0xffffffff, 0xca,
1118 	0x3504, 0xffffffff, 0x8a1,
1119 	0x3500, 0xffffffff, 0xcc,
1120 	0x3504, 0xffffffff, 0x8a3,
1121 	0x3500, 0xffffffff, 0xce,
1122 	0x3504, 0xffffffff, 0x308a5,
1123 	0x3500, 0xffffffff, 0xd3,
1124 	0x3504, 0xffffffff, 0x6d08cd,
1125 	0x3500, 0xffffffff, 0x142,
1126 	0x3504, 0xffffffff, 0x2000095a,
1127 	0x3504, 0xffffffff, 0x1,
1128 	0x3500, 0xffffffff, 0x144,
1129 	0x3504, 0xffffffff, 0x301f095b,
1130 	0x3500, 0xffffffff, 0x165,
1131 	0x3504, 0xffffffff, 0xc094d,
1132 	0x3500, 0xffffffff, 0x173,
1133 	0x3504, 0xffffffff, 0xf096d,
1134 	0x3500, 0xffffffff, 0x184,
1135 	0x3504, 0xffffffff, 0x15097f,
1136 	0x3500, 0xffffffff, 0x19b,
1137 	0x3504, 0xffffffff, 0xc0998,
1138 	0x3500, 0xffffffff, 0x1a9,
1139 	0x3504, 0xffffffff, 0x409a7,
1140 	0x3500, 0xffffffff, 0x1af,
1141 	0x3504, 0xffffffff, 0xcdc,
1142 	0x3500, 0xffffffff, 0x1b1,
1143 	0x3504, 0xffffffff, 0x800,
1144 	0x3508, 0xffffffff, 0x6c9b2000,
1145 	0x3510, 0xfc00, 0x2000,
1146 	0x3544, 0xffffffff, 0xfc0,
1147 	0x28d4, 0x00000100, 0x100
1148 };
1149 
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152 	switch (rdev->family) {
1153 	case CHIP_TAHITI:
1154 		radeon_program_register_sequence(rdev,
1155 						 tahiti_golden_registers,
1156 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157 		radeon_program_register_sequence(rdev,
1158 						 tahiti_golden_rlc_registers,
1159 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160 		radeon_program_register_sequence(rdev,
1161 						 tahiti_mgcg_cgcg_init,
1162 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163 		radeon_program_register_sequence(rdev,
1164 						 tahiti_golden_registers2,
1165 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166 		break;
1167 	case CHIP_PITCAIRN:
1168 		radeon_program_register_sequence(rdev,
1169 						 pitcairn_golden_registers,
1170 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171 		radeon_program_register_sequence(rdev,
1172 						 pitcairn_golden_rlc_registers,
1173 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174 		radeon_program_register_sequence(rdev,
1175 						 pitcairn_mgcg_cgcg_init,
1176 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177 		break;
1178 	case CHIP_VERDE:
1179 		radeon_program_register_sequence(rdev,
1180 						 verde_golden_registers,
1181 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 verde_golden_rlc_registers,
1184 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185 		radeon_program_register_sequence(rdev,
1186 						 verde_mgcg_cgcg_init,
1187 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188 		radeon_program_register_sequence(rdev,
1189 						 verde_pg_init,
1190 						 (const u32)ARRAY_SIZE(verde_pg_init));
1191 		break;
1192 	case CHIP_OLAND:
1193 		radeon_program_register_sequence(rdev,
1194 						 oland_golden_registers,
1195 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 oland_golden_rlc_registers,
1198 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199 		radeon_program_register_sequence(rdev,
1200 						 oland_mgcg_cgcg_init,
1201 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202 		break;
1203 	case CHIP_HAINAN:
1204 		radeon_program_register_sequence(rdev,
1205 						 hainan_golden_registers,
1206 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 hainan_golden_registers2,
1209 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210 		radeon_program_register_sequence(rdev,
1211 						 hainan_mgcg_cgcg_init,
1212 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213 		break;
1214 	default:
1215 		break;
1216 	}
1217 }
1218 
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221 
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233 	u32 tmp;
1234 
1235 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1236 	if (tmp & MUX_TCLK_TO_XCLK)
1237 		return TCLK;
1238 
1239 	tmp = RREG32(CG_CLKPIN_CNTL);
1240 	if (tmp & XTALIN_DIVIDE)
1241 		return reference_clock / 4;
1242 
1243 	return reference_clock;
1244 }
1245 
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249 	u32 temp;
1250 	int actual_temp = 0;
1251 
1252 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253 		CTF_TEMP_SHIFT;
1254 
1255 	if (temp & 0x200)
1256 		actual_temp = 255;
1257 	else
1258 		actual_temp = temp & 0x1ff;
1259 
1260 	actual_temp = (actual_temp * 1000);
1261 
1262 	return actual_temp;
1263 }
1264 
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266 
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268 	{0x0000006f, 0x03044000},
1269 	{0x00000070, 0x0480c018},
1270 	{0x00000071, 0x00000040},
1271 	{0x00000072, 0x01000000},
1272 	{0x00000074, 0x000000ff},
1273 	{0x00000075, 0x00143400},
1274 	{0x00000076, 0x08ec0800},
1275 	{0x00000077, 0x040000cc},
1276 	{0x00000079, 0x00000000},
1277 	{0x0000007a, 0x21000409},
1278 	{0x0000007c, 0x00000000},
1279 	{0x0000007d, 0xe8000000},
1280 	{0x0000007e, 0x044408a8},
1281 	{0x0000007f, 0x00000003},
1282 	{0x00000080, 0x00000000},
1283 	{0x00000081, 0x01000000},
1284 	{0x00000082, 0x02000000},
1285 	{0x00000083, 0x00000000},
1286 	{0x00000084, 0xe3f3e4f4},
1287 	{0x00000085, 0x00052024},
1288 	{0x00000087, 0x00000000},
1289 	{0x00000088, 0x66036603},
1290 	{0x00000089, 0x01000000},
1291 	{0x0000008b, 0x1c0a0000},
1292 	{0x0000008c, 0xff010000},
1293 	{0x0000008e, 0xffffefff},
1294 	{0x0000008f, 0xfff3efff},
1295 	{0x00000090, 0xfff3efbf},
1296 	{0x00000094, 0x00101101},
1297 	{0x00000095, 0x00000fff},
1298 	{0x00000096, 0x00116fff},
1299 	{0x00000097, 0x60010000},
1300 	{0x00000098, 0x10010000},
1301 	{0x00000099, 0x00006000},
1302 	{0x0000009a, 0x00001000},
1303 	{0x0000009f, 0x00a77400}
1304 };
1305 
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307 	{0x0000006f, 0x03044000},
1308 	{0x00000070, 0x0480c018},
1309 	{0x00000071, 0x00000040},
1310 	{0x00000072, 0x01000000},
1311 	{0x00000074, 0x000000ff},
1312 	{0x00000075, 0x00143400},
1313 	{0x00000076, 0x08ec0800},
1314 	{0x00000077, 0x040000cc},
1315 	{0x00000079, 0x00000000},
1316 	{0x0000007a, 0x21000409},
1317 	{0x0000007c, 0x00000000},
1318 	{0x0000007d, 0xe8000000},
1319 	{0x0000007e, 0x044408a8},
1320 	{0x0000007f, 0x00000003},
1321 	{0x00000080, 0x00000000},
1322 	{0x00000081, 0x01000000},
1323 	{0x00000082, 0x02000000},
1324 	{0x00000083, 0x00000000},
1325 	{0x00000084, 0xe3f3e4f4},
1326 	{0x00000085, 0x00052024},
1327 	{0x00000087, 0x00000000},
1328 	{0x00000088, 0x66036603},
1329 	{0x00000089, 0x01000000},
1330 	{0x0000008b, 0x1c0a0000},
1331 	{0x0000008c, 0xff010000},
1332 	{0x0000008e, 0xffffefff},
1333 	{0x0000008f, 0xfff3efff},
1334 	{0x00000090, 0xfff3efbf},
1335 	{0x00000094, 0x00101101},
1336 	{0x00000095, 0x00000fff},
1337 	{0x00000096, 0x00116fff},
1338 	{0x00000097, 0x60010000},
1339 	{0x00000098, 0x10010000},
1340 	{0x00000099, 0x00006000},
1341 	{0x0000009a, 0x00001000},
1342 	{0x0000009f, 0x00a47400}
1343 };
1344 
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346 	{0x0000006f, 0x03044000},
1347 	{0x00000070, 0x0480c018},
1348 	{0x00000071, 0x00000040},
1349 	{0x00000072, 0x01000000},
1350 	{0x00000074, 0x000000ff},
1351 	{0x00000075, 0x00143400},
1352 	{0x00000076, 0x08ec0800},
1353 	{0x00000077, 0x040000cc},
1354 	{0x00000079, 0x00000000},
1355 	{0x0000007a, 0x21000409},
1356 	{0x0000007c, 0x00000000},
1357 	{0x0000007d, 0xe8000000},
1358 	{0x0000007e, 0x044408a8},
1359 	{0x0000007f, 0x00000003},
1360 	{0x00000080, 0x00000000},
1361 	{0x00000081, 0x01000000},
1362 	{0x00000082, 0x02000000},
1363 	{0x00000083, 0x00000000},
1364 	{0x00000084, 0xe3f3e4f4},
1365 	{0x00000085, 0x00052024},
1366 	{0x00000087, 0x00000000},
1367 	{0x00000088, 0x66036603},
1368 	{0x00000089, 0x01000000},
1369 	{0x0000008b, 0x1c0a0000},
1370 	{0x0000008c, 0xff010000},
1371 	{0x0000008e, 0xffffefff},
1372 	{0x0000008f, 0xfff3efff},
1373 	{0x00000090, 0xfff3efbf},
1374 	{0x00000094, 0x00101101},
1375 	{0x00000095, 0x00000fff},
1376 	{0x00000096, 0x00116fff},
1377 	{0x00000097, 0x60010000},
1378 	{0x00000098, 0x10010000},
1379 	{0x00000099, 0x00006000},
1380 	{0x0000009a, 0x00001000},
1381 	{0x0000009f, 0x00a37400}
1382 };
1383 
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385 	{0x0000006f, 0x03044000},
1386 	{0x00000070, 0x0480c018},
1387 	{0x00000071, 0x00000040},
1388 	{0x00000072, 0x01000000},
1389 	{0x00000074, 0x000000ff},
1390 	{0x00000075, 0x00143400},
1391 	{0x00000076, 0x08ec0800},
1392 	{0x00000077, 0x040000cc},
1393 	{0x00000079, 0x00000000},
1394 	{0x0000007a, 0x21000409},
1395 	{0x0000007c, 0x00000000},
1396 	{0x0000007d, 0xe8000000},
1397 	{0x0000007e, 0x044408a8},
1398 	{0x0000007f, 0x00000003},
1399 	{0x00000080, 0x00000000},
1400 	{0x00000081, 0x01000000},
1401 	{0x00000082, 0x02000000},
1402 	{0x00000083, 0x00000000},
1403 	{0x00000084, 0xe3f3e4f4},
1404 	{0x00000085, 0x00052024},
1405 	{0x00000087, 0x00000000},
1406 	{0x00000088, 0x66036603},
1407 	{0x00000089, 0x01000000},
1408 	{0x0000008b, 0x1c0a0000},
1409 	{0x0000008c, 0xff010000},
1410 	{0x0000008e, 0xffffefff},
1411 	{0x0000008f, 0xfff3efff},
1412 	{0x00000090, 0xfff3efbf},
1413 	{0x00000094, 0x00101101},
1414 	{0x00000095, 0x00000fff},
1415 	{0x00000096, 0x00116fff},
1416 	{0x00000097, 0x60010000},
1417 	{0x00000098, 0x10010000},
1418 	{0x00000099, 0x00006000},
1419 	{0x0000009a, 0x00001000},
1420 	{0x0000009f, 0x00a17730}
1421 };
1422 
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424 	{0x0000006f, 0x03044000},
1425 	{0x00000070, 0x0480c018},
1426 	{0x00000071, 0x00000040},
1427 	{0x00000072, 0x01000000},
1428 	{0x00000074, 0x000000ff},
1429 	{0x00000075, 0x00143400},
1430 	{0x00000076, 0x08ec0800},
1431 	{0x00000077, 0x040000cc},
1432 	{0x00000079, 0x00000000},
1433 	{0x0000007a, 0x21000409},
1434 	{0x0000007c, 0x00000000},
1435 	{0x0000007d, 0xe8000000},
1436 	{0x0000007e, 0x044408a8},
1437 	{0x0000007f, 0x00000003},
1438 	{0x00000080, 0x00000000},
1439 	{0x00000081, 0x01000000},
1440 	{0x00000082, 0x02000000},
1441 	{0x00000083, 0x00000000},
1442 	{0x00000084, 0xe3f3e4f4},
1443 	{0x00000085, 0x00052024},
1444 	{0x00000087, 0x00000000},
1445 	{0x00000088, 0x66036603},
1446 	{0x00000089, 0x01000000},
1447 	{0x0000008b, 0x1c0a0000},
1448 	{0x0000008c, 0xff010000},
1449 	{0x0000008e, 0xffffefff},
1450 	{0x0000008f, 0xfff3efff},
1451 	{0x00000090, 0xfff3efbf},
1452 	{0x00000094, 0x00101101},
1453 	{0x00000095, 0x00000fff},
1454 	{0x00000096, 0x00116fff},
1455 	{0x00000097, 0x60010000},
1456 	{0x00000098, 0x10010000},
1457 	{0x00000099, 0x00006000},
1458 	{0x0000009a, 0x00001000},
1459 	{0x0000009f, 0x00a07730}
1460 };
1461 
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465 	const __be32 *fw_data;
1466 	u32 running, blackout = 0;
1467 	u32 *io_mc_regs;
1468 	int i, ucode_size, regs_size;
1469 
1470 	if (!rdev->mc_fw)
1471 		return -EINVAL;
1472 
1473 	switch (rdev->family) {
1474 	case CHIP_TAHITI:
1475 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476 		ucode_size = SI_MC_UCODE_SIZE;
1477 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1478 		break;
1479 	case CHIP_PITCAIRN:
1480 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481 		ucode_size = SI_MC_UCODE_SIZE;
1482 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1483 		break;
1484 	case CHIP_VERDE:
1485 	default:
1486 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1487 		ucode_size = SI_MC_UCODE_SIZE;
1488 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1489 		break;
1490 	case CHIP_OLAND:
1491 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1492 		ucode_size = OLAND_MC_UCODE_SIZE;
1493 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1494 		break;
1495 	case CHIP_HAINAN:
1496 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497 		ucode_size = OLAND_MC_UCODE_SIZE;
1498 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1499 		break;
1500 	}
1501 
1502 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503 
1504 	if (running == 0) {
1505 		if (running) {
1506 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508 		}
1509 
1510 		/* reset the engine and set to writable */
1511 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513 
1514 		/* load mc io regs */
1515 		for (i = 0; i < regs_size; i++) {
1516 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518 		}
1519 		/* load the MC ucode */
1520 		fw_data = (const __be32 *)rdev->mc_fw->data;
1521 		for (i = 0; i < ucode_size; i++)
1522 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523 
1524 		/* put the engine back into the active state */
1525 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528 
1529 		/* wait for training to complete */
1530 		for (i = 0; i < rdev->usec_timeout; i++) {
1531 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532 				break;
1533 			udelay(1);
1534 		}
1535 		for (i = 0; i < rdev->usec_timeout; i++) {
1536 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537 				break;
1538 			udelay(1);
1539 		}
1540 
1541 		if (running)
1542 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543 	}
1544 
1545 	return 0;
1546 }
1547 
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550 	const char *chip_name;
1551 	const char *rlc_chip_name;
1552 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553 	size_t smc_req_size;
1554 	char fw_name[30];
1555 	int err;
1556 
1557 	DRM_DEBUG("\n");
1558 
1559 	switch (rdev->family) {
1560 	case CHIP_TAHITI:
1561 		chip_name = "TAHITI";
1562 		rlc_chip_name = "TAHITI";
1563 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1565 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1566 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1568 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569 		break;
1570 	case CHIP_PITCAIRN:
1571 		chip_name = "PITCAIRN";
1572 		rlc_chip_name = "PITCAIRN";
1573 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1575 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1576 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1578 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579 		break;
1580 	case CHIP_VERDE:
1581 		chip_name = "VERDE";
1582 		rlc_chip_name = "VERDE";
1583 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1585 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1586 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1588 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589 		break;
1590 	case CHIP_OLAND:
1591 		chip_name = "OLAND";
1592 		rlc_chip_name = "OLAND";
1593 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1595 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1596 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599 		break;
1600 	case CHIP_HAINAN:
1601 		chip_name = "HAINAN";
1602 		rlc_chip_name = "HAINAN";
1603 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1605 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1606 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609 		break;
1610 	default: BUG();
1611 	}
1612 
1613 	DRM_INFO("Loading %s Microcode\n", chip_name);
1614 
1615 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617 	if (err)
1618 		goto out;
1619 	if (rdev->pfp_fw->size != pfp_req_size) {
1620 		printk(KERN_ERR
1621 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622 		       rdev->pfp_fw->size, fw_name);
1623 		err = -EINVAL;
1624 		goto out;
1625 	}
1626 
1627 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629 	if (err)
1630 		goto out;
1631 	if (rdev->me_fw->size != me_req_size) {
1632 		printk(KERN_ERR
1633 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634 		       rdev->me_fw->size, fw_name);
1635 		err = -EINVAL;
1636 	}
1637 
1638 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640 	if (err)
1641 		goto out;
1642 	if (rdev->ce_fw->size != ce_req_size) {
1643 		printk(KERN_ERR
1644 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645 		       rdev->ce_fw->size, fw_name);
1646 		err = -EINVAL;
1647 	}
1648 
1649 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651 	if (err)
1652 		goto out;
1653 	if (rdev->rlc_fw->size != rlc_req_size) {
1654 		printk(KERN_ERR
1655 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656 		       rdev->rlc_fw->size, fw_name);
1657 		err = -EINVAL;
1658 	}
1659 
1660 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662 	if (err)
1663 		goto out;
1664 	if (rdev->mc_fw->size != mc_req_size) {
1665 		printk(KERN_ERR
1666 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667 		       rdev->mc_fw->size, fw_name);
1668 		err = -EINVAL;
1669 	}
1670 
1671 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673 	if (err) {
1674 		printk(KERN_ERR
1675 		       "smc: error loading firmware \"%s\"\n",
1676 		       fw_name);
1677 		release_firmware(rdev->smc_fw);
1678 		rdev->smc_fw = NULL;
1679 		err = 0;
1680 	} else if (rdev->smc_fw->size != smc_req_size) {
1681 		printk(KERN_ERR
1682 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1683 		       rdev->smc_fw->size, fw_name);
1684 		err = -EINVAL;
1685 	}
1686 
1687 out:
1688 	if (err) {
1689 		if (err != -EINVAL)
1690 			printk(KERN_ERR
1691 			       "si_cp: Failed to load firmware \"%s\"\n",
1692 			       fw_name);
1693 		release_firmware(rdev->pfp_fw);
1694 		rdev->pfp_fw = NULL;
1695 		release_firmware(rdev->me_fw);
1696 		rdev->me_fw = NULL;
1697 		release_firmware(rdev->ce_fw);
1698 		rdev->ce_fw = NULL;
1699 		release_firmware(rdev->rlc_fw);
1700 		rdev->rlc_fw = NULL;
1701 		release_firmware(rdev->mc_fw);
1702 		rdev->mc_fw = NULL;
1703 		release_firmware(rdev->smc_fw);
1704 		rdev->smc_fw = NULL;
1705 	}
1706 	return err;
1707 }
1708 
1709 /* watermark setup */
1710 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1711 				   struct radeon_crtc *radeon_crtc,
1712 				   struct drm_display_mode *mode,
1713 				   struct drm_display_mode *other_mode)
1714 {
1715 	u32 tmp, buffer_alloc, i;
1716 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1717 	/*
1718 	 * Line Buffer Setup
1719 	 * There are 3 line buffers, each one shared by 2 display controllers.
1720 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1721 	 * the display controllers.  The paritioning is done via one of four
1722 	 * preset allocations specified in bits 21:20:
1723 	 *  0 - half lb
1724 	 *  2 - whole lb, other crtc must be disabled
1725 	 */
1726 	/* this can get tricky if we have two large displays on a paired group
1727 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1728 	 * non-linked crtcs for maximum line buffer allocation.
1729 	 */
1730 	if (radeon_crtc->base.enabled && mode) {
1731 		if (other_mode) {
1732 			tmp = 0; /* 1/2 */
1733 			buffer_alloc = 1;
1734 		} else {
1735 			tmp = 2; /* whole */
1736 			buffer_alloc = 2;
1737 		}
1738 	} else {
1739 		tmp = 0;
1740 		buffer_alloc = 0;
1741 	}
1742 
1743 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1744 	       DC_LB_MEMORY_CONFIG(tmp));
1745 
1746 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1747 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1748 	for (i = 0; i < rdev->usec_timeout; i++) {
1749 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1750 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1751 			break;
1752 		udelay(1);
1753 	}
1754 
1755 	if (radeon_crtc->base.enabled && mode) {
1756 		switch (tmp) {
1757 		case 0:
1758 		default:
1759 			return 4096 * 2;
1760 		case 2:
1761 			return 8192 * 2;
1762 		}
1763 	}
1764 
1765 	/* controller not enabled, so no lb used */
1766 	return 0;
1767 }
1768 
1769 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1770 {
1771 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1772 
1773 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1774 	case 0:
1775 	default:
1776 		return 1;
1777 	case 1:
1778 		return 2;
1779 	case 2:
1780 		return 4;
1781 	case 3:
1782 		return 8;
1783 	case 4:
1784 		return 3;
1785 	case 5:
1786 		return 6;
1787 	case 6:
1788 		return 10;
1789 	case 7:
1790 		return 12;
1791 	case 8:
1792 		return 16;
1793 	}
1794 }
1795 
1796 struct dce6_wm_params {
1797 	u32 dram_channels; /* number of dram channels */
1798 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1799 	u32 sclk;          /* engine clock in kHz */
1800 	u32 disp_clk;      /* display clock in kHz */
1801 	u32 src_width;     /* viewport width */
1802 	u32 active_time;   /* active display time in ns */
1803 	u32 blank_time;    /* blank time in ns */
1804 	bool interlaced;    /* mode is interlaced */
1805 	fixed20_12 vsc;    /* vertical scale ratio */
1806 	u32 num_heads;     /* number of active crtcs */
1807 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1808 	u32 lb_size;       /* line buffer allocated to pipe */
1809 	u32 vtaps;         /* vertical scaler taps */
1810 };
1811 
1812 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1813 {
1814 	/* Calculate raw DRAM Bandwidth */
1815 	fixed20_12 dram_efficiency; /* 0.7 */
1816 	fixed20_12 yclk, dram_channels, bandwidth;
1817 	fixed20_12 a;
1818 
1819 	a.full = dfixed_const(1000);
1820 	yclk.full = dfixed_const(wm->yclk);
1821 	yclk.full = dfixed_div(yclk, a);
1822 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1823 	a.full = dfixed_const(10);
1824 	dram_efficiency.full = dfixed_const(7);
1825 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1826 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1827 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1828 
1829 	return dfixed_trunc(bandwidth);
1830 }
1831 
1832 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1833 {
1834 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1835 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1836 	fixed20_12 yclk, dram_channels, bandwidth;
1837 	fixed20_12 a;
1838 
1839 	a.full = dfixed_const(1000);
1840 	yclk.full = dfixed_const(wm->yclk);
1841 	yclk.full = dfixed_div(yclk, a);
1842 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843 	a.full = dfixed_const(10);
1844 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1845 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1846 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1847 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1848 
1849 	return dfixed_trunc(bandwidth);
1850 }
1851 
1852 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1853 {
1854 	/* Calculate the display Data return Bandwidth */
1855 	fixed20_12 return_efficiency; /* 0.8 */
1856 	fixed20_12 sclk, bandwidth;
1857 	fixed20_12 a;
1858 
1859 	a.full = dfixed_const(1000);
1860 	sclk.full = dfixed_const(wm->sclk);
1861 	sclk.full = dfixed_div(sclk, a);
1862 	a.full = dfixed_const(10);
1863 	return_efficiency.full = dfixed_const(8);
1864 	return_efficiency.full = dfixed_div(return_efficiency, a);
1865 	a.full = dfixed_const(32);
1866 	bandwidth.full = dfixed_mul(a, sclk);
1867 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1868 
1869 	return dfixed_trunc(bandwidth);
1870 }
1871 
1872 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1873 {
1874 	return 32;
1875 }
1876 
1877 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1878 {
1879 	/* Calculate the DMIF Request Bandwidth */
1880 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1881 	fixed20_12 disp_clk, sclk, bandwidth;
1882 	fixed20_12 a, b1, b2;
1883 	u32 min_bandwidth;
1884 
1885 	a.full = dfixed_const(1000);
1886 	disp_clk.full = dfixed_const(wm->disp_clk);
1887 	disp_clk.full = dfixed_div(disp_clk, a);
1888 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1889 	b1.full = dfixed_mul(a, disp_clk);
1890 
1891 	a.full = dfixed_const(1000);
1892 	sclk.full = dfixed_const(wm->sclk);
1893 	sclk.full = dfixed_div(sclk, a);
1894 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1895 	b2.full = dfixed_mul(a, sclk);
1896 
1897 	a.full = dfixed_const(10);
1898 	disp_clk_request_efficiency.full = dfixed_const(8);
1899 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1900 
1901 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1902 
1903 	a.full = dfixed_const(min_bandwidth);
1904 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1905 
1906 	return dfixed_trunc(bandwidth);
1907 }
1908 
1909 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1910 {
1911 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1912 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1913 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1914 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1915 
1916 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1917 }
1918 
1919 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1920 {
1921 	/* Calculate the display mode Average Bandwidth
1922 	 * DisplayMode should contain the source and destination dimensions,
1923 	 * timing, etc.
1924 	 */
1925 	fixed20_12 bpp;
1926 	fixed20_12 line_time;
1927 	fixed20_12 src_width;
1928 	fixed20_12 bandwidth;
1929 	fixed20_12 a;
1930 
1931 	a.full = dfixed_const(1000);
1932 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1933 	line_time.full = dfixed_div(line_time, a);
1934 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1935 	src_width.full = dfixed_const(wm->src_width);
1936 	bandwidth.full = dfixed_mul(src_width, bpp);
1937 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1938 	bandwidth.full = dfixed_div(bandwidth, line_time);
1939 
1940 	return dfixed_trunc(bandwidth);
1941 }
1942 
1943 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1944 {
1945 	/* First calcualte the latency in ns */
1946 	u32 mc_latency = 2000; /* 2000 ns. */
1947 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1948 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1949 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1950 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1951 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1952 		(wm->num_heads * cursor_line_pair_return_time);
1953 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1954 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1955 	u32 tmp, dmif_size = 12288;
1956 	fixed20_12 a, b, c;
1957 
1958 	if (wm->num_heads == 0)
1959 		return 0;
1960 
1961 	a.full = dfixed_const(2);
1962 	b.full = dfixed_const(1);
1963 	if ((wm->vsc.full > a.full) ||
1964 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1965 	    (wm->vtaps >= 5) ||
1966 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1967 		max_src_lines_per_dst_line = 4;
1968 	else
1969 		max_src_lines_per_dst_line = 2;
1970 
1971 	a.full = dfixed_const(available_bandwidth);
1972 	b.full = dfixed_const(wm->num_heads);
1973 	a.full = dfixed_div(a, b);
1974 
1975 	b.full = dfixed_const(mc_latency + 512);
1976 	c.full = dfixed_const(wm->disp_clk);
1977 	b.full = dfixed_div(b, c);
1978 
1979 	c.full = dfixed_const(dmif_size);
1980 	b.full = dfixed_div(c, b);
1981 
1982 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1983 
1984 	b.full = dfixed_const(1000);
1985 	c.full = dfixed_const(wm->disp_clk);
1986 	b.full = dfixed_div(c, b);
1987 	c.full = dfixed_const(wm->bytes_per_pixel);
1988 	b.full = dfixed_mul(b, c);
1989 
1990 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1991 
1992 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1993 	b.full = dfixed_const(1000);
1994 	c.full = dfixed_const(lb_fill_bw);
1995 	b.full = dfixed_div(c, b);
1996 	a.full = dfixed_div(a, b);
1997 	line_fill_time = dfixed_trunc(a);
1998 
1999 	if (line_fill_time < wm->active_time)
2000 		return latency;
2001 	else
2002 		return latency + (line_fill_time - wm->active_time);
2003 
2004 }
2005 
2006 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2007 {
2008 	if (dce6_average_bandwidth(wm) <=
2009 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2010 		return true;
2011 	else
2012 		return false;
2013 };
2014 
2015 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2016 {
2017 	if (dce6_average_bandwidth(wm) <=
2018 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2019 		return true;
2020 	else
2021 		return false;
2022 };
2023 
2024 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2025 {
2026 	u32 lb_partitions = wm->lb_size / wm->src_width;
2027 	u32 line_time = wm->active_time + wm->blank_time;
2028 	u32 latency_tolerant_lines;
2029 	u32 latency_hiding;
2030 	fixed20_12 a;
2031 
2032 	a.full = dfixed_const(1);
2033 	if (wm->vsc.full > a.full)
2034 		latency_tolerant_lines = 1;
2035 	else {
2036 		if (lb_partitions <= (wm->vtaps + 1))
2037 			latency_tolerant_lines = 1;
2038 		else
2039 			latency_tolerant_lines = 2;
2040 	}
2041 
2042 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2043 
2044 	if (dce6_latency_watermark(wm) <= latency_hiding)
2045 		return true;
2046 	else
2047 		return false;
2048 }
2049 
2050 static void dce6_program_watermarks(struct radeon_device *rdev,
2051 					 struct radeon_crtc *radeon_crtc,
2052 					 u32 lb_size, u32 num_heads)
2053 {
2054 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2055 	struct dce6_wm_params wm_low, wm_high;
2056 	u32 dram_channels;
2057 	u32 pixel_period;
2058 	u32 line_time = 0;
2059 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2060 	u32 priority_a_mark = 0, priority_b_mark = 0;
2061 	u32 priority_a_cnt = PRIORITY_OFF;
2062 	u32 priority_b_cnt = PRIORITY_OFF;
2063 	u32 tmp, arb_control3;
2064 	fixed20_12 a, b, c;
2065 
2066 	if (radeon_crtc->base.enabled && num_heads && mode) {
2067 		pixel_period = 1000000 / (u32)mode->clock;
2068 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2069 		priority_a_cnt = 0;
2070 		priority_b_cnt = 0;
2071 
2072 		if (rdev->family == CHIP_ARUBA)
2073 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2074 		else
2075 			dram_channels = si_get_number_of_dram_channels(rdev);
2076 
2077 		/* watermark for high clocks */
2078 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079 			wm_high.yclk =
2080 				radeon_dpm_get_mclk(rdev, false) * 10;
2081 			wm_high.sclk =
2082 				radeon_dpm_get_sclk(rdev, false) * 10;
2083 		} else {
2084 			wm_high.yclk = rdev->pm.current_mclk * 10;
2085 			wm_high.sclk = rdev->pm.current_sclk * 10;
2086 		}
2087 
2088 		wm_high.disp_clk = mode->clock;
2089 		wm_high.src_width = mode->crtc_hdisplay;
2090 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2091 		wm_high.blank_time = line_time - wm_high.active_time;
2092 		wm_high.interlaced = false;
2093 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094 			wm_high.interlaced = true;
2095 		wm_high.vsc = radeon_crtc->vsc;
2096 		wm_high.vtaps = 1;
2097 		if (radeon_crtc->rmx_type != RMX_OFF)
2098 			wm_high.vtaps = 2;
2099 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100 		wm_high.lb_size = lb_size;
2101 		wm_high.dram_channels = dram_channels;
2102 		wm_high.num_heads = num_heads;
2103 
2104 		/* watermark for low clocks */
2105 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106 			wm_low.yclk =
2107 				radeon_dpm_get_mclk(rdev, true) * 10;
2108 			wm_low.sclk =
2109 				radeon_dpm_get_sclk(rdev, true) * 10;
2110 		} else {
2111 			wm_low.yclk = rdev->pm.current_mclk * 10;
2112 			wm_low.sclk = rdev->pm.current_sclk * 10;
2113 		}
2114 
2115 		wm_low.disp_clk = mode->clock;
2116 		wm_low.src_width = mode->crtc_hdisplay;
2117 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2118 		wm_low.blank_time = line_time - wm_low.active_time;
2119 		wm_low.interlaced = false;
2120 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121 			wm_low.interlaced = true;
2122 		wm_low.vsc = radeon_crtc->vsc;
2123 		wm_low.vtaps = 1;
2124 		if (radeon_crtc->rmx_type != RMX_OFF)
2125 			wm_low.vtaps = 2;
2126 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127 		wm_low.lb_size = lb_size;
2128 		wm_low.dram_channels = dram_channels;
2129 		wm_low.num_heads = num_heads;
2130 
2131 		/* set for high clocks */
2132 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2133 		/* set for low clocks */
2134 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2135 
2136 		/* possibly force display priority to high */
2137 		/* should really do this at mode validation time... */
2138 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2139 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2140 		    !dce6_check_latency_hiding(&wm_high) ||
2141 		    (rdev->disp_priority == 2)) {
2142 			DRM_DEBUG_KMS("force priority to high\n");
2143 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2144 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2145 		}
2146 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2147 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2148 		    !dce6_check_latency_hiding(&wm_low) ||
2149 		    (rdev->disp_priority == 2)) {
2150 			DRM_DEBUG_KMS("force priority to high\n");
2151 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2152 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2153 		}
2154 
2155 		a.full = dfixed_const(1000);
2156 		b.full = dfixed_const(mode->clock);
2157 		b.full = dfixed_div(b, a);
2158 		c.full = dfixed_const(latency_watermark_a);
2159 		c.full = dfixed_mul(c, b);
2160 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2161 		c.full = dfixed_div(c, a);
2162 		a.full = dfixed_const(16);
2163 		c.full = dfixed_div(c, a);
2164 		priority_a_mark = dfixed_trunc(c);
2165 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2166 
2167 		a.full = dfixed_const(1000);
2168 		b.full = dfixed_const(mode->clock);
2169 		b.full = dfixed_div(b, a);
2170 		c.full = dfixed_const(latency_watermark_b);
2171 		c.full = dfixed_mul(c, b);
2172 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2173 		c.full = dfixed_div(c, a);
2174 		a.full = dfixed_const(16);
2175 		c.full = dfixed_div(c, a);
2176 		priority_b_mark = dfixed_trunc(c);
2177 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2178 	}
2179 
2180 	/* select wm A */
2181 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2182 	tmp = arb_control3;
2183 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2184 	tmp |= LATENCY_WATERMARK_MASK(1);
2185 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2186 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2187 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2188 		LATENCY_HIGH_WATERMARK(line_time)));
2189 	/* select wm B */
2190 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2191 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2192 	tmp |= LATENCY_WATERMARK_MASK(2);
2193 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2194 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2195 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2196 		LATENCY_HIGH_WATERMARK(line_time)));
2197 	/* restore original selection */
2198 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2199 
2200 	/* write the priority marks */
2201 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2202 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2203 
2204 	/* save values for DPM */
2205 	radeon_crtc->line_time = line_time;
2206 	radeon_crtc->wm_high = latency_watermark_a;
2207 	radeon_crtc->wm_low = latency_watermark_b;
2208 }
2209 
2210 void dce6_bandwidth_update(struct radeon_device *rdev)
2211 {
2212 	struct drm_display_mode *mode0 = NULL;
2213 	struct drm_display_mode *mode1 = NULL;
2214 	u32 num_heads = 0, lb_size;
2215 	int i;
2216 
2217 	radeon_update_display_priority(rdev);
2218 
2219 	for (i = 0; i < rdev->num_crtc; i++) {
2220 		if (rdev->mode_info.crtcs[i]->base.enabled)
2221 			num_heads++;
2222 	}
2223 	for (i = 0; i < rdev->num_crtc; i += 2) {
2224 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2225 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2226 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2227 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2228 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2229 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2230 	}
2231 }
2232 
2233 /*
2234  * Core functions
2235  */
2236 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2237 {
2238 	const u32 num_tile_mode_states = 32;
2239 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2240 
2241 	switch (rdev->config.si.mem_row_size_in_kb) {
2242 	case 1:
2243 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2244 		break;
2245 	case 2:
2246 	default:
2247 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2248 		break;
2249 	case 4:
2250 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2251 		break;
2252 	}
2253 
2254 	if ((rdev->family == CHIP_TAHITI) ||
2255 	    (rdev->family == CHIP_PITCAIRN)) {
2256 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2257 			switch (reg_offset) {
2258 			case 0:  /* non-AA compressed depth or any compressed stencil */
2259 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2264 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2267 				break;
2268 			case 1:  /* 2xAA/4xAA compressed depth only */
2269 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2273 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2274 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277 				break;
2278 			case 2:  /* 8xAA compressed depth only */
2279 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2283 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2284 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287 				break;
2288 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2289 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2294 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297 				break;
2298 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2304 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307 				break;
2308 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2309 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312 						 TILE_SPLIT(split_equal_to_row_size) |
2313 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2314 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317 				break;
2318 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322 						 TILE_SPLIT(split_equal_to_row_size) |
2323 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2324 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2327 				break;
2328 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332 						 TILE_SPLIT(split_equal_to_row_size) |
2333 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2334 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337 				break;
2338 			case 8:  /* 1D and 1D Array Surfaces */
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2343 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2344 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2347 				break;
2348 			case 9:  /* Displayable maps. */
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2353 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2354 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357 				break;
2358 			case 10:  /* Display 8bpp. */
2359 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2364 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367 				break;
2368 			case 11:  /* Display 16bpp. */
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2373 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2374 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377 				break;
2378 			case 12:  /* Display 32bpp. */
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2383 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2384 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2387 				break;
2388 			case 13:  /* Thin. */
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2393 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2394 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397 				break;
2398 			case 14:  /* Thin 8 bpp. */
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2404 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407 				break;
2408 			case 15:  /* Thin 16 bpp. */
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2414 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2417 				break;
2418 			case 16:  /* Thin 32 bpp. */
2419 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2423 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2424 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427 				break;
2428 			case 17:  /* Thin 64 bpp. */
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432 						 TILE_SPLIT(split_equal_to_row_size) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2434 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437 				break;
2438 			case 21:  /* 8 bpp PRT. */
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2444 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447 				break;
2448 			case 22:  /* 16 bpp PRT */
2449 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2454 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2457 				break;
2458 			case 23:  /* 32 bpp PRT */
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2464 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467 				break;
2468 			case 24:  /* 64 bpp PRT */
2469 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2474 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477 				break;
2478 			case 25:  /* 128 bpp PRT */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2483 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2487 				break;
2488 			default:
2489 				gb_tile_moden = 0;
2490 				break;
2491 			}
2492 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2493 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2494 		}
2495 	} else if ((rdev->family == CHIP_VERDE) ||
2496 		   (rdev->family == CHIP_OLAND) ||
2497 		   (rdev->family == CHIP_HAINAN)) {
2498 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2499 			switch (reg_offset) {
2500 			case 0:  /* non-AA compressed depth or any compressed stencil */
2501 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2504 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2505 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2506 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2509 				break;
2510 			case 1:  /* 2xAA/4xAA compressed depth only */
2511 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2514 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2516 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2519 				break;
2520 			case 2:  /* 8xAA compressed depth only */
2521 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2525 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2526 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529 				break;
2530 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2536 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539 				break;
2540 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2546 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549 				break;
2550 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554 						 TILE_SPLIT(split_equal_to_row_size) |
2555 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2556 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 				break;
2560 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564 						 TILE_SPLIT(split_equal_to_row_size) |
2565 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2566 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569 				break;
2570 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2571 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574 						 TILE_SPLIT(split_equal_to_row_size) |
2575 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2576 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2579 				break;
2580 			case 8:  /* 1D and 1D Array Surfaces */
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2582 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2586 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589 				break;
2590 			case 9:  /* Displayable maps. */
2591 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2596 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599 				break;
2600 			case 10:  /* Display 8bpp. */
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2606 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2609 				break;
2610 			case 11:  /* Display 16bpp. */
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2616 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619 				break;
2620 			case 12:  /* Display 32bpp. */
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2626 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629 				break;
2630 			case 13:  /* Thin. */
2631 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2632 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2636 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639 				break;
2640 			case 14:  /* Thin 8 bpp. */
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2646 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649 				break;
2650 			case 15:  /* Thin 16 bpp. */
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2656 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659 				break;
2660 			case 16:  /* Thin 32 bpp. */
2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2665 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2666 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669 				break;
2670 			case 17:  /* Thin 64 bpp. */
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 						 TILE_SPLIT(split_equal_to_row_size) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2676 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679 				break;
2680 			case 21:  /* 8 bpp PRT. */
2681 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2685 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2686 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689 				break;
2690 			case 22:  /* 16 bpp PRT */
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2696 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699 				break;
2700 			case 23:  /* 32 bpp PRT */
2701 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2706 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709 				break;
2710 			case 24:  /* 64 bpp PRT */
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2715 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2716 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719 				break;
2720 			case 25:  /* 128 bpp PRT */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2725 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2729 				break;
2730 			default:
2731 				gb_tile_moden = 0;
2732 				break;
2733 			}
2734 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2735 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2736 		}
2737 	} else
2738 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2739 }
2740 
2741 static void si_select_se_sh(struct radeon_device *rdev,
2742 			    u32 se_num, u32 sh_num)
2743 {
2744 	u32 data = INSTANCE_BROADCAST_WRITES;
2745 
2746 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2747 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2748 	else if (se_num == 0xffffffff)
2749 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2750 	else if (sh_num == 0xffffffff)
2751 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2752 	else
2753 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2754 	WREG32(GRBM_GFX_INDEX, data);
2755 }
2756 
2757 static u32 si_create_bitmask(u32 bit_width)
2758 {
2759 	u32 i, mask = 0;
2760 
2761 	for (i = 0; i < bit_width; i++) {
2762 		mask <<= 1;
2763 		mask |= 1;
2764 	}
2765 	return mask;
2766 }
2767 
2768 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2769 {
2770 	u32 data, mask;
2771 
2772 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2773 	if (data & 1)
2774 		data &= INACTIVE_CUS_MASK;
2775 	else
2776 		data = 0;
2777 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2778 
2779 	data >>= INACTIVE_CUS_SHIFT;
2780 
2781 	mask = si_create_bitmask(cu_per_sh);
2782 
2783 	return ~data & mask;
2784 }
2785 
2786 static void si_setup_spi(struct radeon_device *rdev,
2787 			 u32 se_num, u32 sh_per_se,
2788 			 u32 cu_per_sh)
2789 {
2790 	int i, j, k;
2791 	u32 data, mask, active_cu;
2792 
2793 	for (i = 0; i < se_num; i++) {
2794 		for (j = 0; j < sh_per_se; j++) {
2795 			si_select_se_sh(rdev, i, j);
2796 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2797 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2798 
2799 			mask = 1;
2800 			for (k = 0; k < 16; k++) {
2801 				mask <<= k;
2802 				if (active_cu & mask) {
2803 					data &= ~mask;
2804 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2805 					break;
2806 				}
2807 			}
2808 		}
2809 	}
2810 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2811 }
2812 
2813 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2814 			      u32 max_rb_num, u32 se_num,
2815 			      u32 sh_per_se)
2816 {
2817 	u32 data, mask;
2818 
2819 	data = RREG32(CC_RB_BACKEND_DISABLE);
2820 	if (data & 1)
2821 		data &= BACKEND_DISABLE_MASK;
2822 	else
2823 		data = 0;
2824 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2825 
2826 	data >>= BACKEND_DISABLE_SHIFT;
2827 
2828 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2829 
2830 	return data & mask;
2831 }
2832 
2833 static void si_setup_rb(struct radeon_device *rdev,
2834 			u32 se_num, u32 sh_per_se,
2835 			u32 max_rb_num)
2836 {
2837 	int i, j;
2838 	u32 data, mask;
2839 	u32 disabled_rbs = 0;
2840 	u32 enabled_rbs = 0;
2841 
2842 	for (i = 0; i < se_num; i++) {
2843 		for (j = 0; j < sh_per_se; j++) {
2844 			si_select_se_sh(rdev, i, j);
2845 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2846 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2847 		}
2848 	}
2849 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2850 
2851 	mask = 1;
2852 	for (i = 0; i < max_rb_num; i++) {
2853 		if (!(disabled_rbs & mask))
2854 			enabled_rbs |= mask;
2855 		mask <<= 1;
2856 	}
2857 
2858 	for (i = 0; i < se_num; i++) {
2859 		si_select_se_sh(rdev, i, 0xffffffff);
2860 		data = 0;
2861 		for (j = 0; j < sh_per_se; j++) {
2862 			switch (enabled_rbs & 3) {
2863 			case 1:
2864 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2865 				break;
2866 			case 2:
2867 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2868 				break;
2869 			case 3:
2870 			default:
2871 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2872 				break;
2873 			}
2874 			enabled_rbs >>= 2;
2875 		}
2876 		WREG32(PA_SC_RASTER_CONFIG, data);
2877 	}
2878 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2879 }
2880 
2881 static void si_gpu_init(struct radeon_device *rdev)
2882 {
2883 	u32 gb_addr_config = 0;
2884 	u32 mc_shared_chmap, mc_arb_ramcfg;
2885 	u32 sx_debug_1;
2886 	u32 hdp_host_path_cntl;
2887 	u32 tmp;
2888 	int i, j;
2889 
2890 	switch (rdev->family) {
2891 	case CHIP_TAHITI:
2892 		rdev->config.si.max_shader_engines = 2;
2893 		rdev->config.si.max_tile_pipes = 12;
2894 		rdev->config.si.max_cu_per_sh = 8;
2895 		rdev->config.si.max_sh_per_se = 2;
2896 		rdev->config.si.max_backends_per_se = 4;
2897 		rdev->config.si.max_texture_channel_caches = 12;
2898 		rdev->config.si.max_gprs = 256;
2899 		rdev->config.si.max_gs_threads = 32;
2900 		rdev->config.si.max_hw_contexts = 8;
2901 
2902 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2903 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2904 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2905 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2906 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2907 		break;
2908 	case CHIP_PITCAIRN:
2909 		rdev->config.si.max_shader_engines = 2;
2910 		rdev->config.si.max_tile_pipes = 8;
2911 		rdev->config.si.max_cu_per_sh = 5;
2912 		rdev->config.si.max_sh_per_se = 2;
2913 		rdev->config.si.max_backends_per_se = 4;
2914 		rdev->config.si.max_texture_channel_caches = 8;
2915 		rdev->config.si.max_gprs = 256;
2916 		rdev->config.si.max_gs_threads = 32;
2917 		rdev->config.si.max_hw_contexts = 8;
2918 
2919 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2920 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2921 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2922 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2923 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2924 		break;
2925 	case CHIP_VERDE:
2926 	default:
2927 		rdev->config.si.max_shader_engines = 1;
2928 		rdev->config.si.max_tile_pipes = 4;
2929 		rdev->config.si.max_cu_per_sh = 5;
2930 		rdev->config.si.max_sh_per_se = 2;
2931 		rdev->config.si.max_backends_per_se = 4;
2932 		rdev->config.si.max_texture_channel_caches = 4;
2933 		rdev->config.si.max_gprs = 256;
2934 		rdev->config.si.max_gs_threads = 32;
2935 		rdev->config.si.max_hw_contexts = 8;
2936 
2937 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2938 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2939 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2940 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2941 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2942 		break;
2943 	case CHIP_OLAND:
2944 		rdev->config.si.max_shader_engines = 1;
2945 		rdev->config.si.max_tile_pipes = 4;
2946 		rdev->config.si.max_cu_per_sh = 6;
2947 		rdev->config.si.max_sh_per_se = 1;
2948 		rdev->config.si.max_backends_per_se = 2;
2949 		rdev->config.si.max_texture_channel_caches = 4;
2950 		rdev->config.si.max_gprs = 256;
2951 		rdev->config.si.max_gs_threads = 16;
2952 		rdev->config.si.max_hw_contexts = 8;
2953 
2954 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2955 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2956 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2957 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2958 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2959 		break;
2960 	case CHIP_HAINAN:
2961 		rdev->config.si.max_shader_engines = 1;
2962 		rdev->config.si.max_tile_pipes = 4;
2963 		rdev->config.si.max_cu_per_sh = 5;
2964 		rdev->config.si.max_sh_per_se = 1;
2965 		rdev->config.si.max_backends_per_se = 1;
2966 		rdev->config.si.max_texture_channel_caches = 2;
2967 		rdev->config.si.max_gprs = 256;
2968 		rdev->config.si.max_gs_threads = 16;
2969 		rdev->config.si.max_hw_contexts = 8;
2970 
2971 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2972 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2973 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2974 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2975 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2976 		break;
2977 	}
2978 
2979 	/* Initialize HDP */
2980 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2981 		WREG32((0x2c14 + j), 0x00000000);
2982 		WREG32((0x2c18 + j), 0x00000000);
2983 		WREG32((0x2c1c + j), 0x00000000);
2984 		WREG32((0x2c20 + j), 0x00000000);
2985 		WREG32((0x2c24 + j), 0x00000000);
2986 	}
2987 
2988 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2989 
2990 	evergreen_fix_pci_max_read_req_size(rdev);
2991 
2992 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2993 
2994 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2995 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2996 
2997 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2998 	rdev->config.si.mem_max_burst_length_bytes = 256;
2999 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3000 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3001 	if (rdev->config.si.mem_row_size_in_kb > 4)
3002 		rdev->config.si.mem_row_size_in_kb = 4;
3003 	/* XXX use MC settings? */
3004 	rdev->config.si.shader_engine_tile_size = 32;
3005 	rdev->config.si.num_gpus = 1;
3006 	rdev->config.si.multi_gpu_tile_size = 64;
3007 
3008 	/* fix up row size */
3009 	gb_addr_config &= ~ROW_SIZE_MASK;
3010 	switch (rdev->config.si.mem_row_size_in_kb) {
3011 	case 1:
3012 	default:
3013 		gb_addr_config |= ROW_SIZE(0);
3014 		break;
3015 	case 2:
3016 		gb_addr_config |= ROW_SIZE(1);
3017 		break;
3018 	case 4:
3019 		gb_addr_config |= ROW_SIZE(2);
3020 		break;
3021 	}
3022 
3023 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3024 	 * not have bank info, so create a custom tiling dword.
3025 	 * bits 3:0   num_pipes
3026 	 * bits 7:4   num_banks
3027 	 * bits 11:8  group_size
3028 	 * bits 15:12 row_size
3029 	 */
3030 	rdev->config.si.tile_config = 0;
3031 	switch (rdev->config.si.num_tile_pipes) {
3032 	case 1:
3033 		rdev->config.si.tile_config |= (0 << 0);
3034 		break;
3035 	case 2:
3036 		rdev->config.si.tile_config |= (1 << 0);
3037 		break;
3038 	case 4:
3039 		rdev->config.si.tile_config |= (2 << 0);
3040 		break;
3041 	case 8:
3042 	default:
3043 		/* XXX what about 12? */
3044 		rdev->config.si.tile_config |= (3 << 0);
3045 		break;
3046 	}
3047 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3048 	case 0: /* four banks */
3049 		rdev->config.si.tile_config |= 0 << 4;
3050 		break;
3051 	case 1: /* eight banks */
3052 		rdev->config.si.tile_config |= 1 << 4;
3053 		break;
3054 	case 2: /* sixteen banks */
3055 	default:
3056 		rdev->config.si.tile_config |= 2 << 4;
3057 		break;
3058 	}
3059 	rdev->config.si.tile_config |=
3060 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3061 	rdev->config.si.tile_config |=
3062 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3063 
3064 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3065 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3066 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3067 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3068 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3069 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3070 	if (rdev->has_uvd) {
3071 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3072 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3073 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3074 	}
3075 
3076 	si_tiling_mode_table_init(rdev);
3077 
3078 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3079 		    rdev->config.si.max_sh_per_se,
3080 		    rdev->config.si.max_backends_per_se);
3081 
3082 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3083 		     rdev->config.si.max_sh_per_se,
3084 		     rdev->config.si.max_cu_per_sh);
3085 
3086 
3087 	/* set HW defaults for 3D engine */
3088 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3089 				     ROQ_IB2_START(0x2b)));
3090 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3091 
3092 	sx_debug_1 = RREG32(SX_DEBUG_1);
3093 	WREG32(SX_DEBUG_1, sx_debug_1);
3094 
3095 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3096 
3097 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3098 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3099 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3100 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3101 
3102 	WREG32(VGT_NUM_INSTANCES, 1);
3103 
3104 	WREG32(CP_PERFMON_CNTL, 0);
3105 
3106 	WREG32(SQ_CONFIG, 0);
3107 
3108 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3109 					  FORCE_EOV_MAX_REZ_CNT(255)));
3110 
3111 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3112 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3113 
3114 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3115 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3116 
3117 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3118 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3119 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3120 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3121 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3122 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3123 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3124 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3125 
3126 	tmp = RREG32(HDP_MISC_CNTL);
3127 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3128 	WREG32(HDP_MISC_CNTL, tmp);
3129 
3130 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3131 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3132 
3133 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3134 
3135 	udelay(50);
3136 }
3137 
3138 /*
3139  * GPU scratch registers helpers function.
3140  */
3141 static void si_scratch_init(struct radeon_device *rdev)
3142 {
3143 	int i;
3144 
3145 	rdev->scratch.num_reg = 7;
3146 	rdev->scratch.reg_base = SCRATCH_REG0;
3147 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3148 		rdev->scratch.free[i] = true;
3149 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3150 	}
3151 }
3152 
3153 void si_fence_ring_emit(struct radeon_device *rdev,
3154 			struct radeon_fence *fence)
3155 {
3156 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3157 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3158 
3159 	/* flush read cache over gart */
3160 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3161 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3162 	radeon_ring_write(ring, 0);
3163 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3164 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3165 			  PACKET3_TC_ACTION_ENA |
3166 			  PACKET3_SH_KCACHE_ACTION_ENA |
3167 			  PACKET3_SH_ICACHE_ACTION_ENA);
3168 	radeon_ring_write(ring, 0xFFFFFFFF);
3169 	radeon_ring_write(ring, 0);
3170 	radeon_ring_write(ring, 10); /* poll interval */
3171 	/* EVENT_WRITE_EOP - flush caches, send int */
3172 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3173 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3174 	radeon_ring_write(ring, addr & 0xffffffff);
3175 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3176 	radeon_ring_write(ring, fence->seq);
3177 	radeon_ring_write(ring, 0);
3178 }
3179 
3180 /*
3181  * IB stuff
3182  */
3183 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3184 {
3185 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3186 	u32 header;
3187 
3188 	if (ib->is_const_ib) {
3189 		/* set switch buffer packet before const IB */
3190 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3191 		radeon_ring_write(ring, 0);
3192 
3193 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3194 	} else {
3195 		u32 next_rptr;
3196 		if (ring->rptr_save_reg) {
3197 			next_rptr = ring->wptr + 3 + 4 + 8;
3198 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3199 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3200 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3201 			radeon_ring_write(ring, next_rptr);
3202 		} else if (rdev->wb.enabled) {
3203 			next_rptr = ring->wptr + 5 + 4 + 8;
3204 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3205 			radeon_ring_write(ring, (1 << 8));
3206 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3207 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3208 			radeon_ring_write(ring, next_rptr);
3209 		}
3210 
3211 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3212 	}
3213 
3214 	radeon_ring_write(ring, header);
3215 	radeon_ring_write(ring,
3216 #ifdef __BIG_ENDIAN
3217 			  (2 << 0) |
3218 #endif
3219 			  (ib->gpu_addr & 0xFFFFFFFC));
3220 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3221 	radeon_ring_write(ring, ib->length_dw |
3222 			  (ib->vm ? (ib->vm->id << 24) : 0));
3223 
3224 	if (!ib->is_const_ib) {
3225 		/* flush read cache over gart for this vmid */
3226 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3227 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3228 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3229 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3230 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3231 				  PACKET3_TC_ACTION_ENA |
3232 				  PACKET3_SH_KCACHE_ACTION_ENA |
3233 				  PACKET3_SH_ICACHE_ACTION_ENA);
3234 		radeon_ring_write(ring, 0xFFFFFFFF);
3235 		radeon_ring_write(ring, 0);
3236 		radeon_ring_write(ring, 10); /* poll interval */
3237 	}
3238 }
3239 
3240 /*
3241  * CP.
3242  */
3243 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3244 {
3245 	if (enable)
3246 		WREG32(CP_ME_CNTL, 0);
3247 	else {
3248 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3249 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3250 		WREG32(SCRATCH_UMSK, 0);
3251 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3252 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3253 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3254 	}
3255 	udelay(50);
3256 }
3257 
3258 static int si_cp_load_microcode(struct radeon_device *rdev)
3259 {
3260 	const __be32 *fw_data;
3261 	int i;
3262 
3263 	if (!rdev->me_fw || !rdev->pfp_fw)
3264 		return -EINVAL;
3265 
3266 	si_cp_enable(rdev, false);
3267 
3268 	/* PFP */
3269 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3270 	WREG32(CP_PFP_UCODE_ADDR, 0);
3271 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3272 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3273 	WREG32(CP_PFP_UCODE_ADDR, 0);
3274 
3275 	/* CE */
3276 	fw_data = (const __be32 *)rdev->ce_fw->data;
3277 	WREG32(CP_CE_UCODE_ADDR, 0);
3278 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3279 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3280 	WREG32(CP_CE_UCODE_ADDR, 0);
3281 
3282 	/* ME */
3283 	fw_data = (const __be32 *)rdev->me_fw->data;
3284 	WREG32(CP_ME_RAM_WADDR, 0);
3285 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3286 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3287 	WREG32(CP_ME_RAM_WADDR, 0);
3288 
3289 	WREG32(CP_PFP_UCODE_ADDR, 0);
3290 	WREG32(CP_CE_UCODE_ADDR, 0);
3291 	WREG32(CP_ME_RAM_WADDR, 0);
3292 	WREG32(CP_ME_RAM_RADDR, 0);
3293 	return 0;
3294 }
3295 
3296 static int si_cp_start(struct radeon_device *rdev)
3297 {
3298 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3299 	int r, i;
3300 
3301 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3302 	if (r) {
3303 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3304 		return r;
3305 	}
3306 	/* init the CP */
3307 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3308 	radeon_ring_write(ring, 0x1);
3309 	radeon_ring_write(ring, 0x0);
3310 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3311 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3312 	radeon_ring_write(ring, 0);
3313 	radeon_ring_write(ring, 0);
3314 
3315 	/* init the CE partitions */
3316 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3317 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3318 	radeon_ring_write(ring, 0xc000);
3319 	radeon_ring_write(ring, 0xe000);
3320 	radeon_ring_unlock_commit(rdev, ring);
3321 
3322 	si_cp_enable(rdev, true);
3323 
3324 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3325 	if (r) {
3326 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3327 		return r;
3328 	}
3329 
3330 	/* setup clear context state */
3331 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3332 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3333 
3334 	for (i = 0; i < si_default_size; i++)
3335 		radeon_ring_write(ring, si_default_state[i]);
3336 
3337 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3338 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3339 
3340 	/* set clear context state */
3341 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3342 	radeon_ring_write(ring, 0);
3343 
3344 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3345 	radeon_ring_write(ring, 0x00000316);
3346 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3347 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3348 
3349 	radeon_ring_unlock_commit(rdev, ring);
3350 
3351 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3352 		ring = &rdev->ring[i];
3353 		r = radeon_ring_lock(rdev, ring, 2);
3354 
3355 		/* clear the compute context state */
3356 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3357 		radeon_ring_write(ring, 0);
3358 
3359 		radeon_ring_unlock_commit(rdev, ring);
3360 	}
3361 
3362 	return 0;
3363 }
3364 
3365 static void si_cp_fini(struct radeon_device *rdev)
3366 {
3367 	struct radeon_ring *ring;
3368 	si_cp_enable(rdev, false);
3369 
3370 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3371 	radeon_ring_fini(rdev, ring);
3372 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3373 
3374 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3375 	radeon_ring_fini(rdev, ring);
3376 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3377 
3378 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3379 	radeon_ring_fini(rdev, ring);
3380 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3381 }
3382 
3383 static int si_cp_resume(struct radeon_device *rdev)
3384 {
3385 	struct radeon_ring *ring;
3386 	u32 tmp;
3387 	u32 rb_bufsz;
3388 	int r;
3389 
3390 	si_enable_gui_idle_interrupt(rdev, false);
3391 
3392 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3393 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3394 
3395 	/* Set the write pointer delay */
3396 	WREG32(CP_RB_WPTR_DELAY, 0);
3397 
3398 	WREG32(CP_DEBUG, 0);
3399 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3400 
3401 	/* ring 0 - compute and gfx */
3402 	/* Set ring buffer size */
3403 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3404 	rb_bufsz = order_base_2(ring->ring_size / 8);
3405 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3406 #ifdef __BIG_ENDIAN
3407 	tmp |= BUF_SWAP_32BIT;
3408 #endif
3409 	WREG32(CP_RB0_CNTL, tmp);
3410 
3411 	/* Initialize the ring buffer's read and write pointers */
3412 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3413 	ring->wptr = 0;
3414 	WREG32(CP_RB0_WPTR, ring->wptr);
3415 
3416 	/* set the wb address whether it's enabled or not */
3417 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3418 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3419 
3420 	if (rdev->wb.enabled)
3421 		WREG32(SCRATCH_UMSK, 0xff);
3422 	else {
3423 		tmp |= RB_NO_UPDATE;
3424 		WREG32(SCRATCH_UMSK, 0);
3425 	}
3426 
3427 	mdelay(1);
3428 	WREG32(CP_RB0_CNTL, tmp);
3429 
3430 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3431 
3432 	ring->rptr = RREG32(CP_RB0_RPTR);
3433 
3434 	/* ring1  - compute only */
3435 	/* Set ring buffer size */
3436 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3437 	rb_bufsz = order_base_2(ring->ring_size / 8);
3438 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3439 #ifdef __BIG_ENDIAN
3440 	tmp |= BUF_SWAP_32BIT;
3441 #endif
3442 	WREG32(CP_RB1_CNTL, tmp);
3443 
3444 	/* Initialize the ring buffer's read and write pointers */
3445 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3446 	ring->wptr = 0;
3447 	WREG32(CP_RB1_WPTR, ring->wptr);
3448 
3449 	/* set the wb address whether it's enabled or not */
3450 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3451 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3452 
3453 	mdelay(1);
3454 	WREG32(CP_RB1_CNTL, tmp);
3455 
3456 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3457 
3458 	ring->rptr = RREG32(CP_RB1_RPTR);
3459 
3460 	/* ring2 - compute only */
3461 	/* Set ring buffer size */
3462 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3463 	rb_bufsz = order_base_2(ring->ring_size / 8);
3464 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3465 #ifdef __BIG_ENDIAN
3466 	tmp |= BUF_SWAP_32BIT;
3467 #endif
3468 	WREG32(CP_RB2_CNTL, tmp);
3469 
3470 	/* Initialize the ring buffer's read and write pointers */
3471 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3472 	ring->wptr = 0;
3473 	WREG32(CP_RB2_WPTR, ring->wptr);
3474 
3475 	/* set the wb address whether it's enabled or not */
3476 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3477 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3478 
3479 	mdelay(1);
3480 	WREG32(CP_RB2_CNTL, tmp);
3481 
3482 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3483 
3484 	ring->rptr = RREG32(CP_RB2_RPTR);
3485 
3486 	/* start the rings */
3487 	si_cp_start(rdev);
3488 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3489 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3490 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3491 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3492 	if (r) {
3493 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3494 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3495 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3496 		return r;
3497 	}
3498 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3499 	if (r) {
3500 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3501 	}
3502 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3503 	if (r) {
3504 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3505 	}
3506 
3507 	si_enable_gui_idle_interrupt(rdev, true);
3508 
3509 	return 0;
3510 }
3511 
3512 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3513 {
3514 	u32 reset_mask = 0;
3515 	u32 tmp;
3516 
3517 	/* GRBM_STATUS */
3518 	tmp = RREG32(GRBM_STATUS);
3519 	if (tmp & (PA_BUSY | SC_BUSY |
3520 		   BCI_BUSY | SX_BUSY |
3521 		   TA_BUSY | VGT_BUSY |
3522 		   DB_BUSY | CB_BUSY |
3523 		   GDS_BUSY | SPI_BUSY |
3524 		   IA_BUSY | IA_BUSY_NO_DMA))
3525 		reset_mask |= RADEON_RESET_GFX;
3526 
3527 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3528 		   CP_BUSY | CP_COHERENCY_BUSY))
3529 		reset_mask |= RADEON_RESET_CP;
3530 
3531 	if (tmp & GRBM_EE_BUSY)
3532 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3533 
3534 	/* GRBM_STATUS2 */
3535 	tmp = RREG32(GRBM_STATUS2);
3536 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3537 		reset_mask |= RADEON_RESET_RLC;
3538 
3539 	/* DMA_STATUS_REG 0 */
3540 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3541 	if (!(tmp & DMA_IDLE))
3542 		reset_mask |= RADEON_RESET_DMA;
3543 
3544 	/* DMA_STATUS_REG 1 */
3545 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3546 	if (!(tmp & DMA_IDLE))
3547 		reset_mask |= RADEON_RESET_DMA1;
3548 
3549 	/* SRBM_STATUS2 */
3550 	tmp = RREG32(SRBM_STATUS2);
3551 	if (tmp & DMA_BUSY)
3552 		reset_mask |= RADEON_RESET_DMA;
3553 
3554 	if (tmp & DMA1_BUSY)
3555 		reset_mask |= RADEON_RESET_DMA1;
3556 
3557 	/* SRBM_STATUS */
3558 	tmp = RREG32(SRBM_STATUS);
3559 
3560 	if (tmp & IH_BUSY)
3561 		reset_mask |= RADEON_RESET_IH;
3562 
3563 	if (tmp & SEM_BUSY)
3564 		reset_mask |= RADEON_RESET_SEM;
3565 
3566 	if (tmp & GRBM_RQ_PENDING)
3567 		reset_mask |= RADEON_RESET_GRBM;
3568 
3569 	if (tmp & VMC_BUSY)
3570 		reset_mask |= RADEON_RESET_VMC;
3571 
3572 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3573 		   MCC_BUSY | MCD_BUSY))
3574 		reset_mask |= RADEON_RESET_MC;
3575 
3576 	if (evergreen_is_display_hung(rdev))
3577 		reset_mask |= RADEON_RESET_DISPLAY;
3578 
3579 	/* VM_L2_STATUS */
3580 	tmp = RREG32(VM_L2_STATUS);
3581 	if (tmp & L2_BUSY)
3582 		reset_mask |= RADEON_RESET_VMC;
3583 
3584 	/* Skip MC reset as it's mostly likely not hung, just busy */
3585 	if (reset_mask & RADEON_RESET_MC) {
3586 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3587 		reset_mask &= ~RADEON_RESET_MC;
3588 	}
3589 
3590 	return reset_mask;
3591 }
3592 
3593 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3594 {
3595 	struct evergreen_mc_save save;
3596 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3597 	u32 tmp;
3598 
3599 	if (reset_mask == 0)
3600 		return;
3601 
3602 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3603 
3604 	evergreen_print_gpu_status_regs(rdev);
3605 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3606 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3607 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3608 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3609 
3610 	/* disable PG/CG */
3611 	si_fini_pg(rdev);
3612 	si_fini_cg(rdev);
3613 
3614 	/* stop the rlc */
3615 	si_rlc_stop(rdev);
3616 
3617 	/* Disable CP parsing/prefetching */
3618 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3619 
3620 	if (reset_mask & RADEON_RESET_DMA) {
3621 		/* dma0 */
3622 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3623 		tmp &= ~DMA_RB_ENABLE;
3624 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3625 	}
3626 	if (reset_mask & RADEON_RESET_DMA1) {
3627 		/* dma1 */
3628 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3629 		tmp &= ~DMA_RB_ENABLE;
3630 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3631 	}
3632 
3633 	udelay(50);
3634 
3635 	evergreen_mc_stop(rdev, &save);
3636 	if (evergreen_mc_wait_for_idle(rdev)) {
3637 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3638 	}
3639 
3640 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3641 		grbm_soft_reset = SOFT_RESET_CB |
3642 			SOFT_RESET_DB |
3643 			SOFT_RESET_GDS |
3644 			SOFT_RESET_PA |
3645 			SOFT_RESET_SC |
3646 			SOFT_RESET_BCI |
3647 			SOFT_RESET_SPI |
3648 			SOFT_RESET_SX |
3649 			SOFT_RESET_TC |
3650 			SOFT_RESET_TA |
3651 			SOFT_RESET_VGT |
3652 			SOFT_RESET_IA;
3653 	}
3654 
3655 	if (reset_mask & RADEON_RESET_CP) {
3656 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3657 
3658 		srbm_soft_reset |= SOFT_RESET_GRBM;
3659 	}
3660 
3661 	if (reset_mask & RADEON_RESET_DMA)
3662 		srbm_soft_reset |= SOFT_RESET_DMA;
3663 
3664 	if (reset_mask & RADEON_RESET_DMA1)
3665 		srbm_soft_reset |= SOFT_RESET_DMA1;
3666 
3667 	if (reset_mask & RADEON_RESET_DISPLAY)
3668 		srbm_soft_reset |= SOFT_RESET_DC;
3669 
3670 	if (reset_mask & RADEON_RESET_RLC)
3671 		grbm_soft_reset |= SOFT_RESET_RLC;
3672 
3673 	if (reset_mask & RADEON_RESET_SEM)
3674 		srbm_soft_reset |= SOFT_RESET_SEM;
3675 
3676 	if (reset_mask & RADEON_RESET_IH)
3677 		srbm_soft_reset |= SOFT_RESET_IH;
3678 
3679 	if (reset_mask & RADEON_RESET_GRBM)
3680 		srbm_soft_reset |= SOFT_RESET_GRBM;
3681 
3682 	if (reset_mask & RADEON_RESET_VMC)
3683 		srbm_soft_reset |= SOFT_RESET_VMC;
3684 
3685 	if (reset_mask & RADEON_RESET_MC)
3686 		srbm_soft_reset |= SOFT_RESET_MC;
3687 
3688 	if (grbm_soft_reset) {
3689 		tmp = RREG32(GRBM_SOFT_RESET);
3690 		tmp |= grbm_soft_reset;
3691 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3692 		WREG32(GRBM_SOFT_RESET, tmp);
3693 		tmp = RREG32(GRBM_SOFT_RESET);
3694 
3695 		udelay(50);
3696 
3697 		tmp &= ~grbm_soft_reset;
3698 		WREG32(GRBM_SOFT_RESET, tmp);
3699 		tmp = RREG32(GRBM_SOFT_RESET);
3700 	}
3701 
3702 	if (srbm_soft_reset) {
3703 		tmp = RREG32(SRBM_SOFT_RESET);
3704 		tmp |= srbm_soft_reset;
3705 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3706 		WREG32(SRBM_SOFT_RESET, tmp);
3707 		tmp = RREG32(SRBM_SOFT_RESET);
3708 
3709 		udelay(50);
3710 
3711 		tmp &= ~srbm_soft_reset;
3712 		WREG32(SRBM_SOFT_RESET, tmp);
3713 		tmp = RREG32(SRBM_SOFT_RESET);
3714 	}
3715 
3716 	/* Wait a little for things to settle down */
3717 	udelay(50);
3718 
3719 	evergreen_mc_resume(rdev, &save);
3720 	udelay(50);
3721 
3722 	evergreen_print_gpu_status_regs(rdev);
3723 }
3724 
3725 int si_asic_reset(struct radeon_device *rdev)
3726 {
3727 	u32 reset_mask;
3728 
3729 	reset_mask = si_gpu_check_soft_reset(rdev);
3730 
3731 	if (reset_mask)
3732 		r600_set_bios_scratch_engine_hung(rdev, true);
3733 
3734 	si_gpu_soft_reset(rdev, reset_mask);
3735 
3736 	reset_mask = si_gpu_check_soft_reset(rdev);
3737 
3738 	if (!reset_mask)
3739 		r600_set_bios_scratch_engine_hung(rdev, false);
3740 
3741 	return 0;
3742 }
3743 
3744 /**
3745  * si_gfx_is_lockup - Check if the GFX engine is locked up
3746  *
3747  * @rdev: radeon_device pointer
3748  * @ring: radeon_ring structure holding ring information
3749  *
3750  * Check if the GFX engine is locked up.
3751  * Returns true if the engine appears to be locked up, false if not.
3752  */
3753 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3754 {
3755 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3756 
3757 	if (!(reset_mask & (RADEON_RESET_GFX |
3758 			    RADEON_RESET_COMPUTE |
3759 			    RADEON_RESET_CP))) {
3760 		radeon_ring_lockup_update(ring);
3761 		return false;
3762 	}
3763 	/* force CP activities */
3764 	radeon_ring_force_activity(rdev, ring);
3765 	return radeon_ring_test_lockup(rdev, ring);
3766 }
3767 
3768 /* MC */
3769 static void si_mc_program(struct radeon_device *rdev)
3770 {
3771 	struct evergreen_mc_save save;
3772 	u32 tmp;
3773 	int i, j;
3774 
3775 	/* Initialize HDP */
3776 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3777 		WREG32((0x2c14 + j), 0x00000000);
3778 		WREG32((0x2c18 + j), 0x00000000);
3779 		WREG32((0x2c1c + j), 0x00000000);
3780 		WREG32((0x2c20 + j), 0x00000000);
3781 		WREG32((0x2c24 + j), 0x00000000);
3782 	}
3783 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3784 
3785 	evergreen_mc_stop(rdev, &save);
3786 	if (radeon_mc_wait_for_idle(rdev)) {
3787 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3788 	}
3789 	if (!ASIC_IS_NODCE(rdev))
3790 		/* Lockout access through VGA aperture*/
3791 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3792 	/* Update configuration */
3793 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3794 	       rdev->mc.vram_start >> 12);
3795 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3796 	       rdev->mc.vram_end >> 12);
3797 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3798 	       rdev->vram_scratch.gpu_addr >> 12);
3799 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3800 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3801 	WREG32(MC_VM_FB_LOCATION, tmp);
3802 	/* XXX double check these! */
3803 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3804 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3805 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3806 	WREG32(MC_VM_AGP_BASE, 0);
3807 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3808 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3809 	if (radeon_mc_wait_for_idle(rdev)) {
3810 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3811 	}
3812 	evergreen_mc_resume(rdev, &save);
3813 	if (!ASIC_IS_NODCE(rdev)) {
3814 		/* we need to own VRAM, so turn off the VGA renderer here
3815 		 * to stop it overwriting our objects */
3816 		rv515_vga_render_disable(rdev);
3817 	}
3818 }
3819 
3820 void si_vram_gtt_location(struct radeon_device *rdev,
3821 			  struct radeon_mc *mc)
3822 {
3823 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3824 		/* leave room for at least 1024M GTT */
3825 		dev_warn(rdev->dev, "limiting VRAM\n");
3826 		mc->real_vram_size = 0xFFC0000000ULL;
3827 		mc->mc_vram_size = 0xFFC0000000ULL;
3828 	}
3829 	radeon_vram_location(rdev, &rdev->mc, 0);
3830 	rdev->mc.gtt_base_align = 0;
3831 	radeon_gtt_location(rdev, mc);
3832 }
3833 
3834 static int si_mc_init(struct radeon_device *rdev)
3835 {
3836 	u32 tmp;
3837 	int chansize, numchan;
3838 
3839 	/* Get VRAM informations */
3840 	rdev->mc.vram_is_ddr = true;
3841 	tmp = RREG32(MC_ARB_RAMCFG);
3842 	if (tmp & CHANSIZE_OVERRIDE) {
3843 		chansize = 16;
3844 	} else if (tmp & CHANSIZE_MASK) {
3845 		chansize = 64;
3846 	} else {
3847 		chansize = 32;
3848 	}
3849 	tmp = RREG32(MC_SHARED_CHMAP);
3850 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3851 	case 0:
3852 	default:
3853 		numchan = 1;
3854 		break;
3855 	case 1:
3856 		numchan = 2;
3857 		break;
3858 	case 2:
3859 		numchan = 4;
3860 		break;
3861 	case 3:
3862 		numchan = 8;
3863 		break;
3864 	case 4:
3865 		numchan = 3;
3866 		break;
3867 	case 5:
3868 		numchan = 6;
3869 		break;
3870 	case 6:
3871 		numchan = 10;
3872 		break;
3873 	case 7:
3874 		numchan = 12;
3875 		break;
3876 	case 8:
3877 		numchan = 16;
3878 		break;
3879 	}
3880 	rdev->mc.vram_width = numchan * chansize;
3881 	/* Could aper size report 0 ? */
3882 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3883 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3884 	/* size in MB on si */
3885 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3886 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3887 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3888 	si_vram_gtt_location(rdev, &rdev->mc);
3889 	radeon_update_bandwidth_info(rdev);
3890 
3891 	return 0;
3892 }
3893 
3894 /*
3895  * GART
3896  */
3897 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3898 {
3899 	/* flush hdp cache */
3900 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3901 
3902 	/* bits 0-15 are the VM contexts0-15 */
3903 	WREG32(VM_INVALIDATE_REQUEST, 1);
3904 }
3905 
3906 static int si_pcie_gart_enable(struct radeon_device *rdev)
3907 {
3908 	int r, i;
3909 
3910 	if (rdev->gart.robj == NULL) {
3911 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3912 		return -EINVAL;
3913 	}
3914 	r = radeon_gart_table_vram_pin(rdev);
3915 	if (r)
3916 		return r;
3917 	radeon_gart_restore(rdev);
3918 	/* Setup TLB control */
3919 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3920 	       (0xA << 7) |
3921 	       ENABLE_L1_TLB |
3922 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3923 	       ENABLE_ADVANCED_DRIVER_MODEL |
3924 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3925 	/* Setup L2 cache */
3926 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3927 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3928 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3929 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3930 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3931 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3932 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3933 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3934 	/* setup context0 */
3935 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3936 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3937 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3938 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3939 			(u32)(rdev->dummy_page.addr >> 12));
3940 	WREG32(VM_CONTEXT0_CNTL2, 0);
3941 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3942 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3943 
3944 	WREG32(0x15D4, 0);
3945 	WREG32(0x15D8, 0);
3946 	WREG32(0x15DC, 0);
3947 
3948 	/* empty context1-15 */
3949 	/* set vm size, must be a multiple of 4 */
3950 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3951 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3952 	/* Assign the pt base to something valid for now; the pts used for
3953 	 * the VMs are determined by the application and setup and assigned
3954 	 * on the fly in the vm part of radeon_gart.c
3955 	 */
3956 	for (i = 1; i < 16; i++) {
3957 		if (i < 8)
3958 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3959 			       rdev->gart.table_addr >> 12);
3960 		else
3961 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3962 			       rdev->gart.table_addr >> 12);
3963 	}
3964 
3965 	/* enable context1-15 */
3966 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3967 	       (u32)(rdev->dummy_page.addr >> 12));
3968 	WREG32(VM_CONTEXT1_CNTL2, 4);
3969 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3970 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3971 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3972 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3973 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3974 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3975 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3976 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3977 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3978 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3979 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3980 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3981 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3982 
3983 	si_pcie_gart_tlb_flush(rdev);
3984 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3985 		 (unsigned)(rdev->mc.gtt_size >> 20),
3986 		 (unsigned long long)rdev->gart.table_addr);
3987 	rdev->gart.ready = true;
3988 	return 0;
3989 }
3990 
3991 static void si_pcie_gart_disable(struct radeon_device *rdev)
3992 {
3993 	/* Disable all tables */
3994 	WREG32(VM_CONTEXT0_CNTL, 0);
3995 	WREG32(VM_CONTEXT1_CNTL, 0);
3996 	/* Setup TLB control */
3997 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3998 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3999 	/* Setup L2 cache */
4000 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4001 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4002 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4003 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4004 	WREG32(VM_L2_CNTL2, 0);
4005 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4006 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4007 	radeon_gart_table_vram_unpin(rdev);
4008 }
4009 
4010 static void si_pcie_gart_fini(struct radeon_device *rdev)
4011 {
4012 	si_pcie_gart_disable(rdev);
4013 	radeon_gart_table_vram_free(rdev);
4014 	radeon_gart_fini(rdev);
4015 }
4016 
4017 /* vm parser */
4018 static bool si_vm_reg_valid(u32 reg)
4019 {
4020 	/* context regs are fine */
4021 	if (reg >= 0x28000)
4022 		return true;
4023 
4024 	/* check config regs */
4025 	switch (reg) {
4026 	case GRBM_GFX_INDEX:
4027 	case CP_STRMOUT_CNTL:
4028 	case VGT_VTX_VECT_EJECT_REG:
4029 	case VGT_CACHE_INVALIDATION:
4030 	case VGT_ESGS_RING_SIZE:
4031 	case VGT_GSVS_RING_SIZE:
4032 	case VGT_GS_VERTEX_REUSE:
4033 	case VGT_PRIMITIVE_TYPE:
4034 	case VGT_INDEX_TYPE:
4035 	case VGT_NUM_INDICES:
4036 	case VGT_NUM_INSTANCES:
4037 	case VGT_TF_RING_SIZE:
4038 	case VGT_HS_OFFCHIP_PARAM:
4039 	case VGT_TF_MEMORY_BASE:
4040 	case PA_CL_ENHANCE:
4041 	case PA_SU_LINE_STIPPLE_VALUE:
4042 	case PA_SC_LINE_STIPPLE_STATE:
4043 	case PA_SC_ENHANCE:
4044 	case SQC_CACHES:
4045 	case SPI_STATIC_THREAD_MGMT_1:
4046 	case SPI_STATIC_THREAD_MGMT_2:
4047 	case SPI_STATIC_THREAD_MGMT_3:
4048 	case SPI_PS_MAX_WAVE_ID:
4049 	case SPI_CONFIG_CNTL:
4050 	case SPI_CONFIG_CNTL_1:
4051 	case TA_CNTL_AUX:
4052 		return true;
4053 	default:
4054 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4055 		return false;
4056 	}
4057 }
4058 
4059 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4060 				  u32 *ib, struct radeon_cs_packet *pkt)
4061 {
4062 	switch (pkt->opcode) {
4063 	case PACKET3_NOP:
4064 	case PACKET3_SET_BASE:
4065 	case PACKET3_SET_CE_DE_COUNTERS:
4066 	case PACKET3_LOAD_CONST_RAM:
4067 	case PACKET3_WRITE_CONST_RAM:
4068 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4069 	case PACKET3_DUMP_CONST_RAM:
4070 	case PACKET3_INCREMENT_CE_COUNTER:
4071 	case PACKET3_WAIT_ON_DE_COUNTER:
4072 	case PACKET3_CE_WRITE:
4073 		break;
4074 	default:
4075 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4076 		return -EINVAL;
4077 	}
4078 	return 0;
4079 }
4080 
4081 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4082 {
4083 	u32 start_reg, reg, i;
4084 	u32 command = ib[idx + 4];
4085 	u32 info = ib[idx + 1];
4086 	u32 idx_value = ib[idx];
4087 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4088 		/* src address space is register */
4089 		if (((info & 0x60000000) >> 29) == 0) {
4090 			start_reg = idx_value << 2;
4091 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4092 				reg = start_reg;
4093 				if (!si_vm_reg_valid(reg)) {
4094 					DRM_ERROR("CP DMA Bad SRC register\n");
4095 					return -EINVAL;
4096 				}
4097 			} else {
4098 				for (i = 0; i < (command & 0x1fffff); i++) {
4099 					reg = start_reg + (4 * i);
4100 					if (!si_vm_reg_valid(reg)) {
4101 						DRM_ERROR("CP DMA Bad SRC register\n");
4102 						return -EINVAL;
4103 					}
4104 				}
4105 			}
4106 		}
4107 	}
4108 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4109 		/* dst address space is register */
4110 		if (((info & 0x00300000) >> 20) == 0) {
4111 			start_reg = ib[idx + 2];
4112 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4113 				reg = start_reg;
4114 				if (!si_vm_reg_valid(reg)) {
4115 					DRM_ERROR("CP DMA Bad DST register\n");
4116 					return -EINVAL;
4117 				}
4118 			} else {
4119 				for (i = 0; i < (command & 0x1fffff); i++) {
4120 					reg = start_reg + (4 * i);
4121 				if (!si_vm_reg_valid(reg)) {
4122 						DRM_ERROR("CP DMA Bad DST register\n");
4123 						return -EINVAL;
4124 					}
4125 				}
4126 			}
4127 		}
4128 	}
4129 	return 0;
4130 }
4131 
4132 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4133 				   u32 *ib, struct radeon_cs_packet *pkt)
4134 {
4135 	int r;
4136 	u32 idx = pkt->idx + 1;
4137 	u32 idx_value = ib[idx];
4138 	u32 start_reg, end_reg, reg, i;
4139 
4140 	switch (pkt->opcode) {
4141 	case PACKET3_NOP:
4142 	case PACKET3_SET_BASE:
4143 	case PACKET3_CLEAR_STATE:
4144 	case PACKET3_INDEX_BUFFER_SIZE:
4145 	case PACKET3_DISPATCH_DIRECT:
4146 	case PACKET3_DISPATCH_INDIRECT:
4147 	case PACKET3_ALLOC_GDS:
4148 	case PACKET3_WRITE_GDS_RAM:
4149 	case PACKET3_ATOMIC_GDS:
4150 	case PACKET3_ATOMIC:
4151 	case PACKET3_OCCLUSION_QUERY:
4152 	case PACKET3_SET_PREDICATION:
4153 	case PACKET3_COND_EXEC:
4154 	case PACKET3_PRED_EXEC:
4155 	case PACKET3_DRAW_INDIRECT:
4156 	case PACKET3_DRAW_INDEX_INDIRECT:
4157 	case PACKET3_INDEX_BASE:
4158 	case PACKET3_DRAW_INDEX_2:
4159 	case PACKET3_CONTEXT_CONTROL:
4160 	case PACKET3_INDEX_TYPE:
4161 	case PACKET3_DRAW_INDIRECT_MULTI:
4162 	case PACKET3_DRAW_INDEX_AUTO:
4163 	case PACKET3_DRAW_INDEX_IMMD:
4164 	case PACKET3_NUM_INSTANCES:
4165 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4166 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4167 	case PACKET3_DRAW_INDEX_OFFSET_2:
4168 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4169 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4170 	case PACKET3_MPEG_INDEX:
4171 	case PACKET3_WAIT_REG_MEM:
4172 	case PACKET3_MEM_WRITE:
4173 	case PACKET3_PFP_SYNC_ME:
4174 	case PACKET3_SURFACE_SYNC:
4175 	case PACKET3_EVENT_WRITE:
4176 	case PACKET3_EVENT_WRITE_EOP:
4177 	case PACKET3_EVENT_WRITE_EOS:
4178 	case PACKET3_SET_CONTEXT_REG:
4179 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4180 	case PACKET3_SET_SH_REG:
4181 	case PACKET3_SET_SH_REG_OFFSET:
4182 	case PACKET3_INCREMENT_DE_COUNTER:
4183 	case PACKET3_WAIT_ON_CE_COUNTER:
4184 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4185 	case PACKET3_ME_WRITE:
4186 		break;
4187 	case PACKET3_COPY_DATA:
4188 		if ((idx_value & 0xf00) == 0) {
4189 			reg = ib[idx + 3] * 4;
4190 			if (!si_vm_reg_valid(reg))
4191 				return -EINVAL;
4192 		}
4193 		break;
4194 	case PACKET3_WRITE_DATA:
4195 		if ((idx_value & 0xf00) == 0) {
4196 			start_reg = ib[idx + 1] * 4;
4197 			if (idx_value & 0x10000) {
4198 				if (!si_vm_reg_valid(start_reg))
4199 					return -EINVAL;
4200 			} else {
4201 				for (i = 0; i < (pkt->count - 2); i++) {
4202 					reg = start_reg + (4 * i);
4203 					if (!si_vm_reg_valid(reg))
4204 						return -EINVAL;
4205 				}
4206 			}
4207 		}
4208 		break;
4209 	case PACKET3_COND_WRITE:
4210 		if (idx_value & 0x100) {
4211 			reg = ib[idx + 5] * 4;
4212 			if (!si_vm_reg_valid(reg))
4213 				return -EINVAL;
4214 		}
4215 		break;
4216 	case PACKET3_COPY_DW:
4217 		if (idx_value & 0x2) {
4218 			reg = ib[idx + 3] * 4;
4219 			if (!si_vm_reg_valid(reg))
4220 				return -EINVAL;
4221 		}
4222 		break;
4223 	case PACKET3_SET_CONFIG_REG:
4224 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4225 		end_reg = 4 * pkt->count + start_reg - 4;
4226 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4227 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4228 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4229 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4230 			return -EINVAL;
4231 		}
4232 		for (i = 0; i < pkt->count; i++) {
4233 			reg = start_reg + (4 * i);
4234 			if (!si_vm_reg_valid(reg))
4235 				return -EINVAL;
4236 		}
4237 		break;
4238 	case PACKET3_CP_DMA:
4239 		r = si_vm_packet3_cp_dma_check(ib, idx);
4240 		if (r)
4241 			return r;
4242 		break;
4243 	default:
4244 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4245 		return -EINVAL;
4246 	}
4247 	return 0;
4248 }
4249 
4250 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4251 				       u32 *ib, struct radeon_cs_packet *pkt)
4252 {
4253 	int r;
4254 	u32 idx = pkt->idx + 1;
4255 	u32 idx_value = ib[idx];
4256 	u32 start_reg, reg, i;
4257 
4258 	switch (pkt->opcode) {
4259 	case PACKET3_NOP:
4260 	case PACKET3_SET_BASE:
4261 	case PACKET3_CLEAR_STATE:
4262 	case PACKET3_DISPATCH_DIRECT:
4263 	case PACKET3_DISPATCH_INDIRECT:
4264 	case PACKET3_ALLOC_GDS:
4265 	case PACKET3_WRITE_GDS_RAM:
4266 	case PACKET3_ATOMIC_GDS:
4267 	case PACKET3_ATOMIC:
4268 	case PACKET3_OCCLUSION_QUERY:
4269 	case PACKET3_SET_PREDICATION:
4270 	case PACKET3_COND_EXEC:
4271 	case PACKET3_PRED_EXEC:
4272 	case PACKET3_CONTEXT_CONTROL:
4273 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4274 	case PACKET3_WAIT_REG_MEM:
4275 	case PACKET3_MEM_WRITE:
4276 	case PACKET3_PFP_SYNC_ME:
4277 	case PACKET3_SURFACE_SYNC:
4278 	case PACKET3_EVENT_WRITE:
4279 	case PACKET3_EVENT_WRITE_EOP:
4280 	case PACKET3_EVENT_WRITE_EOS:
4281 	case PACKET3_SET_CONTEXT_REG:
4282 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4283 	case PACKET3_SET_SH_REG:
4284 	case PACKET3_SET_SH_REG_OFFSET:
4285 	case PACKET3_INCREMENT_DE_COUNTER:
4286 	case PACKET3_WAIT_ON_CE_COUNTER:
4287 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4288 	case PACKET3_ME_WRITE:
4289 		break;
4290 	case PACKET3_COPY_DATA:
4291 		if ((idx_value & 0xf00) == 0) {
4292 			reg = ib[idx + 3] * 4;
4293 			if (!si_vm_reg_valid(reg))
4294 				return -EINVAL;
4295 		}
4296 		break;
4297 	case PACKET3_WRITE_DATA:
4298 		if ((idx_value & 0xf00) == 0) {
4299 			start_reg = ib[idx + 1] * 4;
4300 			if (idx_value & 0x10000) {
4301 				if (!si_vm_reg_valid(start_reg))
4302 					return -EINVAL;
4303 			} else {
4304 				for (i = 0; i < (pkt->count - 2); i++) {
4305 					reg = start_reg + (4 * i);
4306 					if (!si_vm_reg_valid(reg))
4307 						return -EINVAL;
4308 				}
4309 			}
4310 		}
4311 		break;
4312 	case PACKET3_COND_WRITE:
4313 		if (idx_value & 0x100) {
4314 			reg = ib[idx + 5] * 4;
4315 			if (!si_vm_reg_valid(reg))
4316 				return -EINVAL;
4317 		}
4318 		break;
4319 	case PACKET3_COPY_DW:
4320 		if (idx_value & 0x2) {
4321 			reg = ib[idx + 3] * 4;
4322 			if (!si_vm_reg_valid(reg))
4323 				return -EINVAL;
4324 		}
4325 		break;
4326 	case PACKET3_CP_DMA:
4327 		r = si_vm_packet3_cp_dma_check(ib, idx);
4328 		if (r)
4329 			return r;
4330 		break;
4331 	default:
4332 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4333 		return -EINVAL;
4334 	}
4335 	return 0;
4336 }
4337 
4338 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4339 {
4340 	int ret = 0;
4341 	u32 idx = 0;
4342 	struct radeon_cs_packet pkt;
4343 
4344 	do {
4345 		pkt.idx = idx;
4346 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4347 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4348 		pkt.one_reg_wr = 0;
4349 		switch (pkt.type) {
4350 		case RADEON_PACKET_TYPE0:
4351 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4352 			ret = -EINVAL;
4353 			break;
4354 		case RADEON_PACKET_TYPE2:
4355 			idx += 1;
4356 			break;
4357 		case RADEON_PACKET_TYPE3:
4358 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4359 			if (ib->is_const_ib)
4360 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4361 			else {
4362 				switch (ib->ring) {
4363 				case RADEON_RING_TYPE_GFX_INDEX:
4364 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4365 					break;
4366 				case CAYMAN_RING_TYPE_CP1_INDEX:
4367 				case CAYMAN_RING_TYPE_CP2_INDEX:
4368 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4369 					break;
4370 				default:
4371 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4372 					ret = -EINVAL;
4373 					break;
4374 				}
4375 			}
4376 			idx += pkt.count + 2;
4377 			break;
4378 		default:
4379 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4380 			ret = -EINVAL;
4381 			break;
4382 		}
4383 		if (ret)
4384 			break;
4385 	} while (idx < ib->length_dw);
4386 
4387 	return ret;
4388 }
4389 
4390 /*
4391  * vm
4392  */
4393 int si_vm_init(struct radeon_device *rdev)
4394 {
4395 	/* number of VMs */
4396 	rdev->vm_manager.nvm = 16;
4397 	/* base offset of vram pages */
4398 	rdev->vm_manager.vram_base_offset = 0;
4399 
4400 	return 0;
4401 }
4402 
4403 void si_vm_fini(struct radeon_device *rdev)
4404 {
4405 }
4406 
4407 /**
4408  * si_vm_decode_fault - print human readable fault info
4409  *
4410  * @rdev: radeon_device pointer
4411  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4412  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4413  *
4414  * Print human readable fault information (SI).
4415  */
4416 static void si_vm_decode_fault(struct radeon_device *rdev,
4417 			       u32 status, u32 addr)
4418 {
4419 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4420 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4421 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4422 	char *block;
4423 
4424 	if (rdev->family == CHIP_TAHITI) {
4425 		switch (mc_id) {
4426 		case 160:
4427 		case 144:
4428 		case 96:
4429 		case 80:
4430 		case 224:
4431 		case 208:
4432 		case 32:
4433 		case 16:
4434 			block = "CB";
4435 			break;
4436 		case 161:
4437 		case 145:
4438 		case 97:
4439 		case 81:
4440 		case 225:
4441 		case 209:
4442 		case 33:
4443 		case 17:
4444 			block = "CB_FMASK";
4445 			break;
4446 		case 162:
4447 		case 146:
4448 		case 98:
4449 		case 82:
4450 		case 226:
4451 		case 210:
4452 		case 34:
4453 		case 18:
4454 			block = "CB_CMASK";
4455 			break;
4456 		case 163:
4457 		case 147:
4458 		case 99:
4459 		case 83:
4460 		case 227:
4461 		case 211:
4462 		case 35:
4463 		case 19:
4464 			block = "CB_IMMED";
4465 			break;
4466 		case 164:
4467 		case 148:
4468 		case 100:
4469 		case 84:
4470 		case 228:
4471 		case 212:
4472 		case 36:
4473 		case 20:
4474 			block = "DB";
4475 			break;
4476 		case 165:
4477 		case 149:
4478 		case 101:
4479 		case 85:
4480 		case 229:
4481 		case 213:
4482 		case 37:
4483 		case 21:
4484 			block = "DB_HTILE";
4485 			break;
4486 		case 167:
4487 		case 151:
4488 		case 103:
4489 		case 87:
4490 		case 231:
4491 		case 215:
4492 		case 39:
4493 		case 23:
4494 			block = "DB_STEN";
4495 			break;
4496 		case 72:
4497 		case 68:
4498 		case 64:
4499 		case 8:
4500 		case 4:
4501 		case 0:
4502 		case 136:
4503 		case 132:
4504 		case 128:
4505 		case 200:
4506 		case 196:
4507 		case 192:
4508 			block = "TC";
4509 			break;
4510 		case 112:
4511 		case 48:
4512 			block = "CP";
4513 			break;
4514 		case 49:
4515 		case 177:
4516 		case 50:
4517 		case 178:
4518 			block = "SH";
4519 			break;
4520 		case 53:
4521 		case 190:
4522 			block = "VGT";
4523 			break;
4524 		case 117:
4525 			block = "IH";
4526 			break;
4527 		case 51:
4528 		case 115:
4529 			block = "RLC";
4530 			break;
4531 		case 119:
4532 		case 183:
4533 			block = "DMA0";
4534 			break;
4535 		case 61:
4536 			block = "DMA1";
4537 			break;
4538 		case 248:
4539 		case 120:
4540 			block = "HDP";
4541 			break;
4542 		default:
4543 			block = "unknown";
4544 			break;
4545 		}
4546 	} else {
4547 		switch (mc_id) {
4548 		case 32:
4549 		case 16:
4550 		case 96:
4551 		case 80:
4552 		case 160:
4553 		case 144:
4554 		case 224:
4555 		case 208:
4556 			block = "CB";
4557 			break;
4558 		case 33:
4559 		case 17:
4560 		case 97:
4561 		case 81:
4562 		case 161:
4563 		case 145:
4564 		case 225:
4565 		case 209:
4566 			block = "CB_FMASK";
4567 			break;
4568 		case 34:
4569 		case 18:
4570 		case 98:
4571 		case 82:
4572 		case 162:
4573 		case 146:
4574 		case 226:
4575 		case 210:
4576 			block = "CB_CMASK";
4577 			break;
4578 		case 35:
4579 		case 19:
4580 		case 99:
4581 		case 83:
4582 		case 163:
4583 		case 147:
4584 		case 227:
4585 		case 211:
4586 			block = "CB_IMMED";
4587 			break;
4588 		case 36:
4589 		case 20:
4590 		case 100:
4591 		case 84:
4592 		case 164:
4593 		case 148:
4594 		case 228:
4595 		case 212:
4596 			block = "DB";
4597 			break;
4598 		case 37:
4599 		case 21:
4600 		case 101:
4601 		case 85:
4602 		case 165:
4603 		case 149:
4604 		case 229:
4605 		case 213:
4606 			block = "DB_HTILE";
4607 			break;
4608 		case 39:
4609 		case 23:
4610 		case 103:
4611 		case 87:
4612 		case 167:
4613 		case 151:
4614 		case 231:
4615 		case 215:
4616 			block = "DB_STEN";
4617 			break;
4618 		case 72:
4619 		case 68:
4620 		case 8:
4621 		case 4:
4622 		case 136:
4623 		case 132:
4624 		case 200:
4625 		case 196:
4626 			block = "TC";
4627 			break;
4628 		case 112:
4629 		case 48:
4630 			block = "CP";
4631 			break;
4632 		case 49:
4633 		case 177:
4634 		case 50:
4635 		case 178:
4636 			block = "SH";
4637 			break;
4638 		case 53:
4639 			block = "VGT";
4640 			break;
4641 		case 117:
4642 			block = "IH";
4643 			break;
4644 		case 51:
4645 		case 115:
4646 			block = "RLC";
4647 			break;
4648 		case 119:
4649 		case 183:
4650 			block = "DMA0";
4651 			break;
4652 		case 61:
4653 			block = "DMA1";
4654 			break;
4655 		case 248:
4656 		case 120:
4657 			block = "HDP";
4658 			break;
4659 		default:
4660 			block = "unknown";
4661 			break;
4662 		}
4663 	}
4664 
4665 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4666 	       protections, vmid, addr,
4667 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4668 	       block, mc_id);
4669 }
4670 
4671 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4672 {
4673 	struct radeon_ring *ring = &rdev->ring[ridx];
4674 
4675 	if (vm == NULL)
4676 		return;
4677 
4678 	/* write new base address */
4679 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4680 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4681 				 WRITE_DATA_DST_SEL(0)));
4682 
4683 	if (vm->id < 8) {
4684 		radeon_ring_write(ring,
4685 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4686 	} else {
4687 		radeon_ring_write(ring,
4688 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4689 	}
4690 	radeon_ring_write(ring, 0);
4691 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4692 
4693 	/* flush hdp cache */
4694 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4695 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4696 				 WRITE_DATA_DST_SEL(0)));
4697 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4698 	radeon_ring_write(ring, 0);
4699 	radeon_ring_write(ring, 0x1);
4700 
4701 	/* bits 0-15 are the VM contexts0-15 */
4702 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4703 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4704 				 WRITE_DATA_DST_SEL(0)));
4705 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4706 	radeon_ring_write(ring, 0);
4707 	radeon_ring_write(ring, 1 << vm->id);
4708 
4709 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4710 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4711 	radeon_ring_write(ring, 0x0);
4712 }
4713 
4714 /*
4715  *  Power and clock gating
4716  */
4717 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4718 {
4719 	int i;
4720 
4721 	for (i = 0; i < rdev->usec_timeout; i++) {
4722 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4723 			break;
4724 		udelay(1);
4725 	}
4726 
4727 	for (i = 0; i < rdev->usec_timeout; i++) {
4728 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4729 			break;
4730 		udelay(1);
4731 	}
4732 }
4733 
4734 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4735 					 bool enable)
4736 {
4737 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4738 	u32 mask;
4739 	int i;
4740 
4741 	if (enable)
4742 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4743 	else
4744 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4745 	WREG32(CP_INT_CNTL_RING0, tmp);
4746 
4747 	if (!enable) {
4748 		/* read a gfx register */
4749 		tmp = RREG32(DB_DEPTH_INFO);
4750 
4751 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4752 		for (i = 0; i < rdev->usec_timeout; i++) {
4753 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4754 				break;
4755 			udelay(1);
4756 		}
4757 	}
4758 }
4759 
4760 static void si_set_uvd_dcm(struct radeon_device *rdev,
4761 			   bool sw_mode)
4762 {
4763 	u32 tmp, tmp2;
4764 
4765 	tmp = RREG32(UVD_CGC_CTRL);
4766 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4767 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4768 
4769 	if (sw_mode) {
4770 		tmp &= ~0x7ffff800;
4771 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4772 	} else {
4773 		tmp |= 0x7ffff800;
4774 		tmp2 = 0;
4775 	}
4776 
4777 	WREG32(UVD_CGC_CTRL, tmp);
4778 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4779 }
4780 
4781 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4782 {
4783 	bool hw_mode = true;
4784 
4785 	if (hw_mode) {
4786 		si_set_uvd_dcm(rdev, false);
4787 	} else {
4788 		u32 tmp = RREG32(UVD_CGC_CTRL);
4789 		tmp &= ~DCM;
4790 		WREG32(UVD_CGC_CTRL, tmp);
4791 	}
4792 }
4793 
4794 static u32 si_halt_rlc(struct radeon_device *rdev)
4795 {
4796 	u32 data, orig;
4797 
4798 	orig = data = RREG32(RLC_CNTL);
4799 
4800 	if (data & RLC_ENABLE) {
4801 		data &= ~RLC_ENABLE;
4802 		WREG32(RLC_CNTL, data);
4803 
4804 		si_wait_for_rlc_serdes(rdev);
4805 	}
4806 
4807 	return orig;
4808 }
4809 
4810 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4811 {
4812 	u32 tmp;
4813 
4814 	tmp = RREG32(RLC_CNTL);
4815 	if (tmp != rlc)
4816 		WREG32(RLC_CNTL, rlc);
4817 }
4818 
4819 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4820 {
4821 	u32 data, orig;
4822 
4823 	orig = data = RREG32(DMA_PG);
4824 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4825 		data |= PG_CNTL_ENABLE;
4826 	else
4827 		data &= ~PG_CNTL_ENABLE;
4828 	if (orig != data)
4829 		WREG32(DMA_PG, data);
4830 }
4831 
4832 static void si_init_dma_pg(struct radeon_device *rdev)
4833 {
4834 	u32 tmp;
4835 
4836 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4837 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4838 
4839 	for (tmp = 0; tmp < 5; tmp++)
4840 		WREG32(DMA_PGFSM_WRITE, 0);
4841 }
4842 
4843 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4844 			       bool enable)
4845 {
4846 	u32 tmp;
4847 
4848 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4849 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4850 		WREG32(RLC_TTOP_D, tmp);
4851 
4852 		tmp = RREG32(RLC_PG_CNTL);
4853 		tmp |= GFX_PG_ENABLE;
4854 		WREG32(RLC_PG_CNTL, tmp);
4855 
4856 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4857 		tmp |= AUTO_PG_EN;
4858 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4859 	} else {
4860 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4861 		tmp &= ~AUTO_PG_EN;
4862 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4863 
4864 		tmp = RREG32(DB_RENDER_CONTROL);
4865 	}
4866 }
4867 
4868 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4869 {
4870 	u32 tmp;
4871 
4872 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4873 
4874 	tmp = RREG32(RLC_PG_CNTL);
4875 	tmp |= GFX_PG_SRC;
4876 	WREG32(RLC_PG_CNTL, tmp);
4877 
4878 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4879 
4880 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4881 
4882 	tmp &= ~GRBM_REG_SGIT_MASK;
4883 	tmp |= GRBM_REG_SGIT(0x700);
4884 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4885 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4886 }
4887 
4888 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4889 {
4890 	u32 mask = 0, tmp, tmp1;
4891 	int i;
4892 
4893 	si_select_se_sh(rdev, se, sh);
4894 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4895 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4896 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4897 
4898 	tmp &= 0xffff0000;
4899 
4900 	tmp |= tmp1;
4901 	tmp >>= 16;
4902 
4903 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4904 		mask <<= 1;
4905 		mask |= 1;
4906 	}
4907 
4908 	return (~tmp) & mask;
4909 }
4910 
4911 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4912 {
4913 	u32 i, j, k, active_cu_number = 0;
4914 	u32 mask, counter, cu_bitmap;
4915 	u32 tmp = 0;
4916 
4917 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4918 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4919 			mask = 1;
4920 			cu_bitmap = 0;
4921 			counter  = 0;
4922 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4923 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4924 					if (counter < 2)
4925 						cu_bitmap |= mask;
4926 					counter++;
4927 				}
4928 				mask <<= 1;
4929 			}
4930 
4931 			active_cu_number += counter;
4932 			tmp |= (cu_bitmap << (i * 16 + j * 8));
4933 		}
4934 	}
4935 
4936 	WREG32(RLC_PG_AO_CU_MASK, tmp);
4937 
4938 	tmp = RREG32(RLC_MAX_PG_CU);
4939 	tmp &= ~MAX_PU_CU_MASK;
4940 	tmp |= MAX_PU_CU(active_cu_number);
4941 	WREG32(RLC_MAX_PG_CU, tmp);
4942 }
4943 
4944 static void si_enable_cgcg(struct radeon_device *rdev,
4945 			   bool enable)
4946 {
4947 	u32 data, orig, tmp;
4948 
4949 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4950 
4951 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4952 		si_enable_gui_idle_interrupt(rdev, true);
4953 
4954 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4955 
4956 		tmp = si_halt_rlc(rdev);
4957 
4958 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4959 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4960 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4961 
4962 		si_wait_for_rlc_serdes(rdev);
4963 
4964 		si_update_rlc(rdev, tmp);
4965 
4966 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4967 
4968 		data |= CGCG_EN | CGLS_EN;
4969 	} else {
4970 		si_enable_gui_idle_interrupt(rdev, false);
4971 
4972 		RREG32(CB_CGTT_SCLK_CTRL);
4973 		RREG32(CB_CGTT_SCLK_CTRL);
4974 		RREG32(CB_CGTT_SCLK_CTRL);
4975 		RREG32(CB_CGTT_SCLK_CTRL);
4976 
4977 		data &= ~(CGCG_EN | CGLS_EN);
4978 	}
4979 
4980 	if (orig != data)
4981 		WREG32(RLC_CGCG_CGLS_CTRL, data);
4982 }
4983 
4984 static void si_enable_mgcg(struct radeon_device *rdev,
4985 			   bool enable)
4986 {
4987 	u32 data, orig, tmp = 0;
4988 
4989 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
4990 		orig = data = RREG32(CGTS_SM_CTRL_REG);
4991 		data = 0x96940200;
4992 		if (orig != data)
4993 			WREG32(CGTS_SM_CTRL_REG, data);
4994 
4995 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
4996 			orig = data = RREG32(CP_MEM_SLP_CNTL);
4997 			data |= CP_MEM_LS_EN;
4998 			if (orig != data)
4999 				WREG32(CP_MEM_SLP_CNTL, data);
5000 		}
5001 
5002 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5003 		data &= 0xffffffc0;
5004 		if (orig != data)
5005 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5006 
5007 		tmp = si_halt_rlc(rdev);
5008 
5009 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5010 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5011 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5012 
5013 		si_update_rlc(rdev, tmp);
5014 	} else {
5015 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5016 		data |= 0x00000003;
5017 		if (orig != data)
5018 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5019 
5020 		data = RREG32(CP_MEM_SLP_CNTL);
5021 		if (data & CP_MEM_LS_EN) {
5022 			data &= ~CP_MEM_LS_EN;
5023 			WREG32(CP_MEM_SLP_CNTL, data);
5024 		}
5025 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5026 		data |= LS_OVERRIDE | OVERRIDE;
5027 		if (orig != data)
5028 			WREG32(CGTS_SM_CTRL_REG, data);
5029 
5030 		tmp = si_halt_rlc(rdev);
5031 
5032 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5033 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5034 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5035 
5036 		si_update_rlc(rdev, tmp);
5037 	}
5038 }
5039 
5040 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5041 			       bool enable)
5042 {
5043 	u32 orig, data, tmp;
5044 
5045 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5046 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5047 		tmp |= 0x3fff;
5048 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5049 
5050 		orig = data = RREG32(UVD_CGC_CTRL);
5051 		data |= DCM;
5052 		if (orig != data)
5053 			WREG32(UVD_CGC_CTRL, data);
5054 
5055 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5056 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5057 	} else {
5058 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5059 		tmp &= ~0x3fff;
5060 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5061 
5062 		orig = data = RREG32(UVD_CGC_CTRL);
5063 		data &= ~DCM;
5064 		if (orig != data)
5065 			WREG32(UVD_CGC_CTRL, data);
5066 
5067 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5068 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5069 	}
5070 }
5071 
5072 static const u32 mc_cg_registers[] =
5073 {
5074 	MC_HUB_MISC_HUB_CG,
5075 	MC_HUB_MISC_SIP_CG,
5076 	MC_HUB_MISC_VM_CG,
5077 	MC_XPB_CLK_GAT,
5078 	ATC_MISC_CG,
5079 	MC_CITF_MISC_WR_CG,
5080 	MC_CITF_MISC_RD_CG,
5081 	MC_CITF_MISC_VM_CG,
5082 	VM_L2_CG,
5083 };
5084 
5085 static void si_enable_mc_ls(struct radeon_device *rdev,
5086 			    bool enable)
5087 {
5088 	int i;
5089 	u32 orig, data;
5090 
5091 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5092 		orig = data = RREG32(mc_cg_registers[i]);
5093 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5094 			data |= MC_LS_ENABLE;
5095 		else
5096 			data &= ~MC_LS_ENABLE;
5097 		if (data != orig)
5098 			WREG32(mc_cg_registers[i], data);
5099 	}
5100 }
5101 
5102 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5103 			       bool enable)
5104 {
5105 	int i;
5106 	u32 orig, data;
5107 
5108 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5109 		orig = data = RREG32(mc_cg_registers[i]);
5110 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5111 			data |= MC_CG_ENABLE;
5112 		else
5113 			data &= ~MC_CG_ENABLE;
5114 		if (data != orig)
5115 			WREG32(mc_cg_registers[i], data);
5116 	}
5117 }
5118 
5119 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5120 			       bool enable)
5121 {
5122 	u32 orig, data, offset;
5123 	int i;
5124 
5125 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5126 		for (i = 0; i < 2; i++) {
5127 			if (i == 0)
5128 				offset = DMA0_REGISTER_OFFSET;
5129 			else
5130 				offset = DMA1_REGISTER_OFFSET;
5131 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5132 			data &= ~MEM_POWER_OVERRIDE;
5133 			if (data != orig)
5134 				WREG32(DMA_POWER_CNTL + offset, data);
5135 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5136 		}
5137 	} else {
5138 		for (i = 0; i < 2; i++) {
5139 			if (i == 0)
5140 				offset = DMA0_REGISTER_OFFSET;
5141 			else
5142 				offset = DMA1_REGISTER_OFFSET;
5143 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5144 			data |= MEM_POWER_OVERRIDE;
5145 			if (data != orig)
5146 				WREG32(DMA_POWER_CNTL + offset, data);
5147 
5148 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5149 			data = 0xff000000;
5150 			if (data != orig)
5151 				WREG32(DMA_CLK_CTRL + offset, data);
5152 		}
5153 	}
5154 }
5155 
5156 static void si_enable_bif_mgls(struct radeon_device *rdev,
5157 			       bool enable)
5158 {
5159 	u32 orig, data;
5160 
5161 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5162 
5163 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5164 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5165 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5166 	else
5167 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5168 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5169 
5170 	if (orig != data)
5171 		WREG32_PCIE(PCIE_CNTL2, data);
5172 }
5173 
5174 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5175 			       bool enable)
5176 {
5177 	u32 orig, data;
5178 
5179 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5180 
5181 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5182 		data &= ~CLOCK_GATING_DIS;
5183 	else
5184 		data |= CLOCK_GATING_DIS;
5185 
5186 	if (orig != data)
5187 		WREG32(HDP_HOST_PATH_CNTL, data);
5188 }
5189 
5190 static void si_enable_hdp_ls(struct radeon_device *rdev,
5191 			     bool enable)
5192 {
5193 	u32 orig, data;
5194 
5195 	orig = data = RREG32(HDP_MEM_POWER_LS);
5196 
5197 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5198 		data |= HDP_LS_ENABLE;
5199 	else
5200 		data &= ~HDP_LS_ENABLE;
5201 
5202 	if (orig != data)
5203 		WREG32(HDP_MEM_POWER_LS, data);
5204 }
5205 
5206 void si_update_cg(struct radeon_device *rdev,
5207 		  u32 block, bool enable)
5208 {
5209 	if (block & RADEON_CG_BLOCK_GFX) {
5210 		si_enable_gui_idle_interrupt(rdev, false);
5211 		/* order matters! */
5212 		if (enable) {
5213 			si_enable_mgcg(rdev, true);
5214 			si_enable_cgcg(rdev, true);
5215 		} else {
5216 			si_enable_cgcg(rdev, false);
5217 			si_enable_mgcg(rdev, false);
5218 		}
5219 		si_enable_gui_idle_interrupt(rdev, true);
5220 	}
5221 
5222 	if (block & RADEON_CG_BLOCK_MC) {
5223 		si_enable_mc_mgcg(rdev, enable);
5224 		si_enable_mc_ls(rdev, enable);
5225 	}
5226 
5227 	if (block & RADEON_CG_BLOCK_SDMA) {
5228 		si_enable_dma_mgcg(rdev, enable);
5229 	}
5230 
5231 	if (block & RADEON_CG_BLOCK_BIF) {
5232 		si_enable_bif_mgls(rdev, enable);
5233 	}
5234 
5235 	if (block & RADEON_CG_BLOCK_UVD) {
5236 		if (rdev->has_uvd) {
5237 			si_enable_uvd_mgcg(rdev, enable);
5238 		}
5239 	}
5240 
5241 	if (block & RADEON_CG_BLOCK_HDP) {
5242 		si_enable_hdp_mgcg(rdev, enable);
5243 		si_enable_hdp_ls(rdev, enable);
5244 	}
5245 }
5246 
5247 static void si_init_cg(struct radeon_device *rdev)
5248 {
5249 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5250 			    RADEON_CG_BLOCK_MC |
5251 			    RADEON_CG_BLOCK_SDMA |
5252 			    RADEON_CG_BLOCK_BIF |
5253 			    RADEON_CG_BLOCK_HDP), true);
5254 	if (rdev->has_uvd) {
5255 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5256 		si_init_uvd_internal_cg(rdev);
5257 	}
5258 }
5259 
5260 static void si_fini_cg(struct radeon_device *rdev)
5261 {
5262 	if (rdev->has_uvd) {
5263 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5264 	}
5265 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5266 			    RADEON_CG_BLOCK_MC |
5267 			    RADEON_CG_BLOCK_SDMA |
5268 			    RADEON_CG_BLOCK_BIF |
5269 			    RADEON_CG_BLOCK_HDP), false);
5270 }
5271 
5272 u32 si_get_csb_size(struct radeon_device *rdev)
5273 {
5274 	u32 count = 0;
5275 	const struct cs_section_def *sect = NULL;
5276 	const struct cs_extent_def *ext = NULL;
5277 
5278 	if (rdev->rlc.cs_data == NULL)
5279 		return 0;
5280 
5281 	/* begin clear state */
5282 	count += 2;
5283 	/* context control state */
5284 	count += 3;
5285 
5286 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5287 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5288 			if (sect->id == SECT_CONTEXT)
5289 				count += 2 + ext->reg_count;
5290 			else
5291 				return 0;
5292 		}
5293 	}
5294 	/* pa_sc_raster_config */
5295 	count += 3;
5296 	/* end clear state */
5297 	count += 2;
5298 	/* clear state */
5299 	count += 2;
5300 
5301 	return count;
5302 }
5303 
5304 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5305 {
5306 	u32 count = 0, i;
5307 	const struct cs_section_def *sect = NULL;
5308 	const struct cs_extent_def *ext = NULL;
5309 
5310 	if (rdev->rlc.cs_data == NULL)
5311 		return;
5312 	if (buffer == NULL)
5313 		return;
5314 
5315 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5316 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5317 
5318 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5319 	buffer[count++] = cpu_to_le32(0x80000000);
5320 	buffer[count++] = cpu_to_le32(0x80000000);
5321 
5322 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5323 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5324 			if (sect->id == SECT_CONTEXT) {
5325 				buffer[count++] =
5326 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5327 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5328 				for (i = 0; i < ext->reg_count; i++)
5329 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5330 			} else {
5331 				return;
5332 			}
5333 		}
5334 	}
5335 
5336 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5337 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5338 	switch (rdev->family) {
5339 	case CHIP_TAHITI:
5340 	case CHIP_PITCAIRN:
5341 		buffer[count++] = cpu_to_le32(0x2a00126a);
5342 		break;
5343 	case CHIP_VERDE:
5344 		buffer[count++] = cpu_to_le32(0x0000124a);
5345 		break;
5346 	case CHIP_OLAND:
5347 		buffer[count++] = cpu_to_le32(0x00000082);
5348 		break;
5349 	case CHIP_HAINAN:
5350 		buffer[count++] = cpu_to_le32(0x00000000);
5351 		break;
5352 	default:
5353 		buffer[count++] = cpu_to_le32(0x00000000);
5354 		break;
5355 	}
5356 
5357 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5358 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5359 
5360 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5361 	buffer[count++] = cpu_to_le32(0);
5362 }
5363 
5364 static void si_init_pg(struct radeon_device *rdev)
5365 {
5366 	if (rdev->pg_flags) {
5367 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5368 			si_init_dma_pg(rdev);
5369 		}
5370 		si_init_ao_cu_mask(rdev);
5371 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5372 			si_init_gfx_cgpg(rdev);
5373 		}
5374 		si_enable_dma_pg(rdev, true);
5375 		si_enable_gfx_cgpg(rdev, true);
5376 	} else {
5377 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5378 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5379 	}
5380 }
5381 
5382 static void si_fini_pg(struct radeon_device *rdev)
5383 {
5384 	if (rdev->pg_flags) {
5385 		si_enable_dma_pg(rdev, false);
5386 		si_enable_gfx_cgpg(rdev, false);
5387 	}
5388 }
5389 
5390 /*
5391  * RLC
5392  */
5393 void si_rlc_reset(struct radeon_device *rdev)
5394 {
5395 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5396 
5397 	tmp |= SOFT_RESET_RLC;
5398 	WREG32(GRBM_SOFT_RESET, tmp);
5399 	udelay(50);
5400 	tmp &= ~SOFT_RESET_RLC;
5401 	WREG32(GRBM_SOFT_RESET, tmp);
5402 	udelay(50);
5403 }
5404 
5405 static void si_rlc_stop(struct radeon_device *rdev)
5406 {
5407 	WREG32(RLC_CNTL, 0);
5408 
5409 	si_enable_gui_idle_interrupt(rdev, false);
5410 
5411 	si_wait_for_rlc_serdes(rdev);
5412 }
5413 
5414 static void si_rlc_start(struct radeon_device *rdev)
5415 {
5416 	WREG32(RLC_CNTL, RLC_ENABLE);
5417 
5418 	si_enable_gui_idle_interrupt(rdev, true);
5419 
5420 	udelay(50);
5421 }
5422 
5423 static bool si_lbpw_supported(struct radeon_device *rdev)
5424 {
5425 	u32 tmp;
5426 
5427 	/* Enable LBPW only for DDR3 */
5428 	tmp = RREG32(MC_SEQ_MISC0);
5429 	if ((tmp & 0xF0000000) == 0xB0000000)
5430 		return true;
5431 	return false;
5432 }
5433 
5434 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5435 {
5436 	u32 tmp;
5437 
5438 	tmp = RREG32(RLC_LB_CNTL);
5439 	if (enable)
5440 		tmp |= LOAD_BALANCE_ENABLE;
5441 	else
5442 		tmp &= ~LOAD_BALANCE_ENABLE;
5443 	WREG32(RLC_LB_CNTL, tmp);
5444 
5445 	if (!enable) {
5446 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5447 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5448 	}
5449 }
5450 
5451 static int si_rlc_resume(struct radeon_device *rdev)
5452 {
5453 	u32 i;
5454 	const __be32 *fw_data;
5455 
5456 	if (!rdev->rlc_fw)
5457 		return -EINVAL;
5458 
5459 	si_rlc_stop(rdev);
5460 
5461 	si_rlc_reset(rdev);
5462 
5463 	si_init_pg(rdev);
5464 
5465 	si_init_cg(rdev);
5466 
5467 	WREG32(RLC_RL_BASE, 0);
5468 	WREG32(RLC_RL_SIZE, 0);
5469 	WREG32(RLC_LB_CNTL, 0);
5470 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5471 	WREG32(RLC_LB_CNTR_INIT, 0);
5472 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5473 
5474 	WREG32(RLC_MC_CNTL, 0);
5475 	WREG32(RLC_UCODE_CNTL, 0);
5476 
5477 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5478 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5479 		WREG32(RLC_UCODE_ADDR, i);
5480 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5481 	}
5482 	WREG32(RLC_UCODE_ADDR, 0);
5483 
5484 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5485 
5486 	si_rlc_start(rdev);
5487 
5488 	return 0;
5489 }
5490 
5491 static void si_enable_interrupts(struct radeon_device *rdev)
5492 {
5493 	u32 ih_cntl = RREG32(IH_CNTL);
5494 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5495 
5496 	ih_cntl |= ENABLE_INTR;
5497 	ih_rb_cntl |= IH_RB_ENABLE;
5498 	WREG32(IH_CNTL, ih_cntl);
5499 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5500 	rdev->ih.enabled = true;
5501 }
5502 
5503 static void si_disable_interrupts(struct radeon_device *rdev)
5504 {
5505 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5506 	u32 ih_cntl = RREG32(IH_CNTL);
5507 
5508 	ih_rb_cntl &= ~IH_RB_ENABLE;
5509 	ih_cntl &= ~ENABLE_INTR;
5510 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5511 	WREG32(IH_CNTL, ih_cntl);
5512 	/* set rptr, wptr to 0 */
5513 	WREG32(IH_RB_RPTR, 0);
5514 	WREG32(IH_RB_WPTR, 0);
5515 	rdev->ih.enabled = false;
5516 	rdev->ih.rptr = 0;
5517 }
5518 
5519 static void si_disable_interrupt_state(struct radeon_device *rdev)
5520 {
5521 	u32 tmp;
5522 
5523 	tmp = RREG32(CP_INT_CNTL_RING0) &
5524 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5525 	WREG32(CP_INT_CNTL_RING0, tmp);
5526 	WREG32(CP_INT_CNTL_RING1, 0);
5527 	WREG32(CP_INT_CNTL_RING2, 0);
5528 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5529 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5530 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5531 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5532 	WREG32(GRBM_INT_CNTL, 0);
5533 	if (rdev->num_crtc >= 2) {
5534 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5535 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5536 	}
5537 	if (rdev->num_crtc >= 4) {
5538 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5539 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5540 	}
5541 	if (rdev->num_crtc >= 6) {
5542 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5543 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5544 	}
5545 
5546 	if (rdev->num_crtc >= 2) {
5547 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5548 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5549 	}
5550 	if (rdev->num_crtc >= 4) {
5551 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5552 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5553 	}
5554 	if (rdev->num_crtc >= 6) {
5555 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5556 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5557 	}
5558 
5559 	if (!ASIC_IS_NODCE(rdev)) {
5560 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5561 
5562 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5563 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5564 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5565 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5566 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5567 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5568 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5569 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5570 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5571 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5572 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5573 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5574 	}
5575 }
5576 
5577 static int si_irq_init(struct radeon_device *rdev)
5578 {
5579 	int ret = 0;
5580 	int rb_bufsz;
5581 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5582 
5583 	/* allocate ring */
5584 	ret = r600_ih_ring_alloc(rdev);
5585 	if (ret)
5586 		return ret;
5587 
5588 	/* disable irqs */
5589 	si_disable_interrupts(rdev);
5590 
5591 	/* init rlc */
5592 	ret = si_rlc_resume(rdev);
5593 	if (ret) {
5594 		r600_ih_ring_fini(rdev);
5595 		return ret;
5596 	}
5597 
5598 	/* setup interrupt control */
5599 	/* set dummy read address to ring address */
5600 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5601 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5602 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5603 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5604 	 */
5605 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5606 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5607 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5608 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5609 
5610 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5611 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5612 
5613 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5614 		      IH_WPTR_OVERFLOW_CLEAR |
5615 		      (rb_bufsz << 1));
5616 
5617 	if (rdev->wb.enabled)
5618 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5619 
5620 	/* set the writeback address whether it's enabled or not */
5621 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5622 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5623 
5624 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5625 
5626 	/* set rptr, wptr to 0 */
5627 	WREG32(IH_RB_RPTR, 0);
5628 	WREG32(IH_RB_WPTR, 0);
5629 
5630 	/* Default settings for IH_CNTL (disabled at first) */
5631 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5632 	/* RPTR_REARM only works if msi's are enabled */
5633 	if (rdev->msi_enabled)
5634 		ih_cntl |= RPTR_REARM;
5635 	WREG32(IH_CNTL, ih_cntl);
5636 
5637 	/* force the active interrupt state to all disabled */
5638 	si_disable_interrupt_state(rdev);
5639 
5640 	pci_set_master(rdev->pdev);
5641 
5642 	/* enable irqs */
5643 	si_enable_interrupts(rdev);
5644 
5645 	return ret;
5646 }
5647 
5648 int si_irq_set(struct radeon_device *rdev)
5649 {
5650 	u32 cp_int_cntl;
5651 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5652 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5653 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5654 	u32 grbm_int_cntl = 0;
5655 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5656 	u32 dma_cntl, dma_cntl1;
5657 	u32 thermal_int = 0;
5658 
5659 	if (!rdev->irq.installed) {
5660 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5661 		return -EINVAL;
5662 	}
5663 	/* don't enable anything if the ih is disabled */
5664 	if (!rdev->ih.enabled) {
5665 		si_disable_interrupts(rdev);
5666 		/* force the active interrupt state to all disabled */
5667 		si_disable_interrupt_state(rdev);
5668 		return 0;
5669 	}
5670 
5671 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5672 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5673 
5674 	if (!ASIC_IS_NODCE(rdev)) {
5675 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5676 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5677 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5678 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5679 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5680 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5681 	}
5682 
5683 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5684 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5685 
5686 	thermal_int = RREG32(CG_THERMAL_INT) &
5687 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5688 
5689 	/* enable CP interrupts on all rings */
5690 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5691 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5692 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5693 	}
5694 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5695 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5696 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5697 	}
5698 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5699 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5700 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5701 	}
5702 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5703 		DRM_DEBUG("si_irq_set: sw int dma\n");
5704 		dma_cntl |= TRAP_ENABLE;
5705 	}
5706 
5707 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5708 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5709 		dma_cntl1 |= TRAP_ENABLE;
5710 	}
5711 	if (rdev->irq.crtc_vblank_int[0] ||
5712 	    atomic_read(&rdev->irq.pflip[0])) {
5713 		DRM_DEBUG("si_irq_set: vblank 0\n");
5714 		crtc1 |= VBLANK_INT_MASK;
5715 	}
5716 	if (rdev->irq.crtc_vblank_int[1] ||
5717 	    atomic_read(&rdev->irq.pflip[1])) {
5718 		DRM_DEBUG("si_irq_set: vblank 1\n");
5719 		crtc2 |= VBLANK_INT_MASK;
5720 	}
5721 	if (rdev->irq.crtc_vblank_int[2] ||
5722 	    atomic_read(&rdev->irq.pflip[2])) {
5723 		DRM_DEBUG("si_irq_set: vblank 2\n");
5724 		crtc3 |= VBLANK_INT_MASK;
5725 	}
5726 	if (rdev->irq.crtc_vblank_int[3] ||
5727 	    atomic_read(&rdev->irq.pflip[3])) {
5728 		DRM_DEBUG("si_irq_set: vblank 3\n");
5729 		crtc4 |= VBLANK_INT_MASK;
5730 	}
5731 	if (rdev->irq.crtc_vblank_int[4] ||
5732 	    atomic_read(&rdev->irq.pflip[4])) {
5733 		DRM_DEBUG("si_irq_set: vblank 4\n");
5734 		crtc5 |= VBLANK_INT_MASK;
5735 	}
5736 	if (rdev->irq.crtc_vblank_int[5] ||
5737 	    atomic_read(&rdev->irq.pflip[5])) {
5738 		DRM_DEBUG("si_irq_set: vblank 5\n");
5739 		crtc6 |= VBLANK_INT_MASK;
5740 	}
5741 	if (rdev->irq.hpd[0]) {
5742 		DRM_DEBUG("si_irq_set: hpd 1\n");
5743 		hpd1 |= DC_HPDx_INT_EN;
5744 	}
5745 	if (rdev->irq.hpd[1]) {
5746 		DRM_DEBUG("si_irq_set: hpd 2\n");
5747 		hpd2 |= DC_HPDx_INT_EN;
5748 	}
5749 	if (rdev->irq.hpd[2]) {
5750 		DRM_DEBUG("si_irq_set: hpd 3\n");
5751 		hpd3 |= DC_HPDx_INT_EN;
5752 	}
5753 	if (rdev->irq.hpd[3]) {
5754 		DRM_DEBUG("si_irq_set: hpd 4\n");
5755 		hpd4 |= DC_HPDx_INT_EN;
5756 	}
5757 	if (rdev->irq.hpd[4]) {
5758 		DRM_DEBUG("si_irq_set: hpd 5\n");
5759 		hpd5 |= DC_HPDx_INT_EN;
5760 	}
5761 	if (rdev->irq.hpd[5]) {
5762 		DRM_DEBUG("si_irq_set: hpd 6\n");
5763 		hpd6 |= DC_HPDx_INT_EN;
5764 	}
5765 
5766 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5767 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5768 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5769 
5770 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5771 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5772 
5773 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5774 
5775 	if (rdev->irq.dpm_thermal) {
5776 		DRM_DEBUG("dpm thermal\n");
5777 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5778 	}
5779 
5780 	if (rdev->num_crtc >= 2) {
5781 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5782 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5783 	}
5784 	if (rdev->num_crtc >= 4) {
5785 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5786 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5787 	}
5788 	if (rdev->num_crtc >= 6) {
5789 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5790 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5791 	}
5792 
5793 	if (rdev->num_crtc >= 2) {
5794 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5795 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5796 	}
5797 	if (rdev->num_crtc >= 4) {
5798 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5799 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5800 	}
5801 	if (rdev->num_crtc >= 6) {
5802 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5803 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5804 	}
5805 
5806 	if (!ASIC_IS_NODCE(rdev)) {
5807 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5808 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5809 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5810 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5811 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5812 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5813 	}
5814 
5815 	WREG32(CG_THERMAL_INT, thermal_int);
5816 
5817 	return 0;
5818 }
5819 
5820 static inline void si_irq_ack(struct radeon_device *rdev)
5821 {
5822 	u32 tmp;
5823 
5824 	if (ASIC_IS_NODCE(rdev))
5825 		return;
5826 
5827 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5828 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5829 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5830 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5831 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5832 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5833 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5834 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5835 	if (rdev->num_crtc >= 4) {
5836 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5837 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5838 	}
5839 	if (rdev->num_crtc >= 6) {
5840 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5841 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5842 	}
5843 
5844 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5845 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5846 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5847 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5848 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5849 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5850 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5851 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5852 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5853 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5854 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5855 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5856 
5857 	if (rdev->num_crtc >= 4) {
5858 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5859 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5860 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5861 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5862 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5863 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5864 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5865 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5866 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5867 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5868 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5869 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5870 	}
5871 
5872 	if (rdev->num_crtc >= 6) {
5873 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5874 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5875 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5876 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5877 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5878 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5879 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5880 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5881 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5882 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5883 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5884 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5885 	}
5886 
5887 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5888 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5889 		tmp |= DC_HPDx_INT_ACK;
5890 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5891 	}
5892 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5893 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5894 		tmp |= DC_HPDx_INT_ACK;
5895 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5896 	}
5897 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5898 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5899 		tmp |= DC_HPDx_INT_ACK;
5900 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5901 	}
5902 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5903 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5904 		tmp |= DC_HPDx_INT_ACK;
5905 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5906 	}
5907 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5908 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5909 		tmp |= DC_HPDx_INT_ACK;
5910 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5911 	}
5912 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5913 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5914 		tmp |= DC_HPDx_INT_ACK;
5915 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5916 	}
5917 }
5918 
5919 static void si_irq_disable(struct radeon_device *rdev)
5920 {
5921 	si_disable_interrupts(rdev);
5922 	/* Wait and acknowledge irq */
5923 	mdelay(1);
5924 	si_irq_ack(rdev);
5925 	si_disable_interrupt_state(rdev);
5926 }
5927 
5928 static void si_irq_suspend(struct radeon_device *rdev)
5929 {
5930 	si_irq_disable(rdev);
5931 	si_rlc_stop(rdev);
5932 }
5933 
5934 static void si_irq_fini(struct radeon_device *rdev)
5935 {
5936 	si_irq_suspend(rdev);
5937 	r600_ih_ring_fini(rdev);
5938 }
5939 
5940 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5941 {
5942 	u32 wptr, tmp;
5943 
5944 	if (rdev->wb.enabled)
5945 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5946 	else
5947 		wptr = RREG32(IH_RB_WPTR);
5948 
5949 	if (wptr & RB_OVERFLOW) {
5950 		/* When a ring buffer overflow happen start parsing interrupt
5951 		 * from the last not overwritten vector (wptr + 16). Hopefully
5952 		 * this should allow us to catchup.
5953 		 */
5954 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5955 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5956 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5957 		tmp = RREG32(IH_RB_CNTL);
5958 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5959 		WREG32(IH_RB_CNTL, tmp);
5960 	}
5961 	return (wptr & rdev->ih.ptr_mask);
5962 }
5963 
5964 /*        SI IV Ring
5965  * Each IV ring entry is 128 bits:
5966  * [7:0]    - interrupt source id
5967  * [31:8]   - reserved
5968  * [59:32]  - interrupt source data
5969  * [63:60]  - reserved
5970  * [71:64]  - RINGID
5971  * [79:72]  - VMID
5972  * [127:80] - reserved
5973  */
5974 int si_irq_process(struct radeon_device *rdev)
5975 {
5976 	u32 wptr;
5977 	u32 rptr;
5978 	u32 src_id, src_data, ring_id;
5979 	u32 ring_index;
5980 	bool queue_hotplug = false;
5981 	bool queue_thermal = false;
5982 	u32 status, addr;
5983 
5984 	if (!rdev->ih.enabled || rdev->shutdown)
5985 		return IRQ_NONE;
5986 
5987 	wptr = si_get_ih_wptr(rdev);
5988 
5989 restart_ih:
5990 	/* is somebody else already processing irqs? */
5991 	if (atomic_xchg(&rdev->ih.lock, 1))
5992 		return IRQ_NONE;
5993 
5994 	rptr = rdev->ih.rptr;
5995 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5996 
5997 	/* Order reading of wptr vs. reading of IH ring data */
5998 	rmb();
5999 
6000 	/* display interrupts */
6001 	si_irq_ack(rdev);
6002 
6003 	while (rptr != wptr) {
6004 		/* wptr/rptr are in bytes! */
6005 		ring_index = rptr / 4;
6006 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6007 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6008 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6009 
6010 		switch (src_id) {
6011 		case 1: /* D1 vblank/vline */
6012 			switch (src_data) {
6013 			case 0: /* D1 vblank */
6014 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6015 					if (rdev->irq.crtc_vblank_int[0]) {
6016 						drm_handle_vblank(rdev->ddev, 0);
6017 						rdev->pm.vblank_sync = true;
6018 						wake_up(&rdev->irq.vblank_queue);
6019 					}
6020 					if (atomic_read(&rdev->irq.pflip[0]))
6021 						radeon_crtc_handle_flip(rdev, 0);
6022 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6023 					DRM_DEBUG("IH: D1 vblank\n");
6024 				}
6025 				break;
6026 			case 1: /* D1 vline */
6027 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6028 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6029 					DRM_DEBUG("IH: D1 vline\n");
6030 				}
6031 				break;
6032 			default:
6033 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6034 				break;
6035 			}
6036 			break;
6037 		case 2: /* D2 vblank/vline */
6038 			switch (src_data) {
6039 			case 0: /* D2 vblank */
6040 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6041 					if (rdev->irq.crtc_vblank_int[1]) {
6042 						drm_handle_vblank(rdev->ddev, 1);
6043 						rdev->pm.vblank_sync = true;
6044 						wake_up(&rdev->irq.vblank_queue);
6045 					}
6046 					if (atomic_read(&rdev->irq.pflip[1]))
6047 						radeon_crtc_handle_flip(rdev, 1);
6048 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6049 					DRM_DEBUG("IH: D2 vblank\n");
6050 				}
6051 				break;
6052 			case 1: /* D2 vline */
6053 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6054 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6055 					DRM_DEBUG("IH: D2 vline\n");
6056 				}
6057 				break;
6058 			default:
6059 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6060 				break;
6061 			}
6062 			break;
6063 		case 3: /* D3 vblank/vline */
6064 			switch (src_data) {
6065 			case 0: /* D3 vblank */
6066 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6067 					if (rdev->irq.crtc_vblank_int[2]) {
6068 						drm_handle_vblank(rdev->ddev, 2);
6069 						rdev->pm.vblank_sync = true;
6070 						wake_up(&rdev->irq.vblank_queue);
6071 					}
6072 					if (atomic_read(&rdev->irq.pflip[2]))
6073 						radeon_crtc_handle_flip(rdev, 2);
6074 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6075 					DRM_DEBUG("IH: D3 vblank\n");
6076 				}
6077 				break;
6078 			case 1: /* D3 vline */
6079 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6080 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6081 					DRM_DEBUG("IH: D3 vline\n");
6082 				}
6083 				break;
6084 			default:
6085 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6086 				break;
6087 			}
6088 			break;
6089 		case 4: /* D4 vblank/vline */
6090 			switch (src_data) {
6091 			case 0: /* D4 vblank */
6092 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6093 					if (rdev->irq.crtc_vblank_int[3]) {
6094 						drm_handle_vblank(rdev->ddev, 3);
6095 						rdev->pm.vblank_sync = true;
6096 						wake_up(&rdev->irq.vblank_queue);
6097 					}
6098 					if (atomic_read(&rdev->irq.pflip[3]))
6099 						radeon_crtc_handle_flip(rdev, 3);
6100 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6101 					DRM_DEBUG("IH: D4 vblank\n");
6102 				}
6103 				break;
6104 			case 1: /* D4 vline */
6105 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6106 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6107 					DRM_DEBUG("IH: D4 vline\n");
6108 				}
6109 				break;
6110 			default:
6111 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6112 				break;
6113 			}
6114 			break;
6115 		case 5: /* D5 vblank/vline */
6116 			switch (src_data) {
6117 			case 0: /* D5 vblank */
6118 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6119 					if (rdev->irq.crtc_vblank_int[4]) {
6120 						drm_handle_vblank(rdev->ddev, 4);
6121 						rdev->pm.vblank_sync = true;
6122 						wake_up(&rdev->irq.vblank_queue);
6123 					}
6124 					if (atomic_read(&rdev->irq.pflip[4]))
6125 						radeon_crtc_handle_flip(rdev, 4);
6126 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6127 					DRM_DEBUG("IH: D5 vblank\n");
6128 				}
6129 				break;
6130 			case 1: /* D5 vline */
6131 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6132 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6133 					DRM_DEBUG("IH: D5 vline\n");
6134 				}
6135 				break;
6136 			default:
6137 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6138 				break;
6139 			}
6140 			break;
6141 		case 6: /* D6 vblank/vline */
6142 			switch (src_data) {
6143 			case 0: /* D6 vblank */
6144 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6145 					if (rdev->irq.crtc_vblank_int[5]) {
6146 						drm_handle_vblank(rdev->ddev, 5);
6147 						rdev->pm.vblank_sync = true;
6148 						wake_up(&rdev->irq.vblank_queue);
6149 					}
6150 					if (atomic_read(&rdev->irq.pflip[5]))
6151 						radeon_crtc_handle_flip(rdev, 5);
6152 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6153 					DRM_DEBUG("IH: D6 vblank\n");
6154 				}
6155 				break;
6156 			case 1: /* D6 vline */
6157 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6158 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6159 					DRM_DEBUG("IH: D6 vline\n");
6160 				}
6161 				break;
6162 			default:
6163 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6164 				break;
6165 			}
6166 			break;
6167 		case 42: /* HPD hotplug */
6168 			switch (src_data) {
6169 			case 0:
6170 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6171 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6172 					queue_hotplug = true;
6173 					DRM_DEBUG("IH: HPD1\n");
6174 				}
6175 				break;
6176 			case 1:
6177 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6178 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6179 					queue_hotplug = true;
6180 					DRM_DEBUG("IH: HPD2\n");
6181 				}
6182 				break;
6183 			case 2:
6184 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6185 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6186 					queue_hotplug = true;
6187 					DRM_DEBUG("IH: HPD3\n");
6188 				}
6189 				break;
6190 			case 3:
6191 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6192 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6193 					queue_hotplug = true;
6194 					DRM_DEBUG("IH: HPD4\n");
6195 				}
6196 				break;
6197 			case 4:
6198 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6199 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6200 					queue_hotplug = true;
6201 					DRM_DEBUG("IH: HPD5\n");
6202 				}
6203 				break;
6204 			case 5:
6205 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6207 					queue_hotplug = true;
6208 					DRM_DEBUG("IH: HPD6\n");
6209 				}
6210 				break;
6211 			default:
6212 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6213 				break;
6214 			}
6215 			break;
6216 		case 146:
6217 		case 147:
6218 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6219 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6220 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6221 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6222 				addr);
6223 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6224 				status);
6225 			si_vm_decode_fault(rdev, status, addr);
6226 			/* reset addr and status */
6227 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6228 			break;
6229 		case 176: /* RINGID0 CP_INT */
6230 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6231 			break;
6232 		case 177: /* RINGID1 CP_INT */
6233 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6234 			break;
6235 		case 178: /* RINGID2 CP_INT */
6236 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6237 			break;
6238 		case 181: /* CP EOP event */
6239 			DRM_DEBUG("IH: CP EOP\n");
6240 			switch (ring_id) {
6241 			case 0:
6242 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6243 				break;
6244 			case 1:
6245 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6246 				break;
6247 			case 2:
6248 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6249 				break;
6250 			}
6251 			break;
6252 		case 224: /* DMA trap event */
6253 			DRM_DEBUG("IH: DMA trap\n");
6254 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6255 			break;
6256 		case 230: /* thermal low to high */
6257 			DRM_DEBUG("IH: thermal low to high\n");
6258 			rdev->pm.dpm.thermal.high_to_low = false;
6259 			queue_thermal = true;
6260 			break;
6261 		case 231: /* thermal high to low */
6262 			DRM_DEBUG("IH: thermal high to low\n");
6263 			rdev->pm.dpm.thermal.high_to_low = true;
6264 			queue_thermal = true;
6265 			break;
6266 		case 233: /* GUI IDLE */
6267 			DRM_DEBUG("IH: GUI idle\n");
6268 			break;
6269 		case 244: /* DMA trap event */
6270 			DRM_DEBUG("IH: DMA1 trap\n");
6271 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6272 			break;
6273 		default:
6274 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6275 			break;
6276 		}
6277 
6278 		/* wptr/rptr are in bytes! */
6279 		rptr += 16;
6280 		rptr &= rdev->ih.ptr_mask;
6281 	}
6282 	if (queue_hotplug)
6283 		schedule_work(&rdev->hotplug_work);
6284 	if (queue_thermal && rdev->pm.dpm_enabled)
6285 		schedule_work(&rdev->pm.dpm.thermal.work);
6286 	rdev->ih.rptr = rptr;
6287 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6288 	atomic_set(&rdev->ih.lock, 0);
6289 
6290 	/* make sure wptr hasn't changed while processing */
6291 	wptr = si_get_ih_wptr(rdev);
6292 	if (wptr != rptr)
6293 		goto restart_ih;
6294 
6295 	return IRQ_HANDLED;
6296 }
6297 
6298 /*
6299  * startup/shutdown callbacks
6300  */
6301 static int si_startup(struct radeon_device *rdev)
6302 {
6303 	struct radeon_ring *ring;
6304 	int r;
6305 
6306 	/* enable pcie gen2/3 link */
6307 	si_pcie_gen3_enable(rdev);
6308 	/* enable aspm */
6309 	si_program_aspm(rdev);
6310 
6311 	/* scratch needs to be initialized before MC */
6312 	r = r600_vram_scratch_init(rdev);
6313 	if (r)
6314 		return r;
6315 
6316 	si_mc_program(rdev);
6317 
6318 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6319 	    !rdev->rlc_fw || !rdev->mc_fw) {
6320 		r = si_init_microcode(rdev);
6321 		if (r) {
6322 			DRM_ERROR("Failed to load firmware!\n");
6323 			return r;
6324 		}
6325 	}
6326 
6327 	r = si_mc_load_microcode(rdev);
6328 	if (r) {
6329 		DRM_ERROR("Failed to load MC firmware!\n");
6330 		return r;
6331 	}
6332 
6333 	r = si_pcie_gart_enable(rdev);
6334 	if (r)
6335 		return r;
6336 	si_gpu_init(rdev);
6337 
6338 	/* allocate rlc buffers */
6339 	if (rdev->family == CHIP_VERDE) {
6340 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6341 		rdev->rlc.reg_list_size =
6342 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6343 	}
6344 	rdev->rlc.cs_data = si_cs_data;
6345 	r = sumo_rlc_init(rdev);
6346 	if (r) {
6347 		DRM_ERROR("Failed to init rlc BOs!\n");
6348 		return r;
6349 	}
6350 
6351 	/* allocate wb buffer */
6352 	r = radeon_wb_init(rdev);
6353 	if (r)
6354 		return r;
6355 
6356 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6357 	if (r) {
6358 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6359 		return r;
6360 	}
6361 
6362 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6363 	if (r) {
6364 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6365 		return r;
6366 	}
6367 
6368 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6369 	if (r) {
6370 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6371 		return r;
6372 	}
6373 
6374 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6375 	if (r) {
6376 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6377 		return r;
6378 	}
6379 
6380 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6381 	if (r) {
6382 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6383 		return r;
6384 	}
6385 
6386 	if (rdev->has_uvd) {
6387 		r = uvd_v2_2_resume(rdev);
6388 		if (!r) {
6389 			r = radeon_fence_driver_start_ring(rdev,
6390 							   R600_RING_TYPE_UVD_INDEX);
6391 			if (r)
6392 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6393 		}
6394 		if (r)
6395 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6396 	}
6397 
6398 	/* Enable IRQ */
6399 	if (!rdev->irq.installed) {
6400 		r = radeon_irq_kms_init(rdev);
6401 		if (r)
6402 			return r;
6403 	}
6404 
6405 	r = si_irq_init(rdev);
6406 	if (r) {
6407 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6408 		radeon_irq_kms_fini(rdev);
6409 		return r;
6410 	}
6411 	si_irq_set(rdev);
6412 
6413 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6414 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6415 			     CP_RB0_RPTR, CP_RB0_WPTR,
6416 			     RADEON_CP_PACKET2);
6417 	if (r)
6418 		return r;
6419 
6420 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6421 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6422 			     CP_RB1_RPTR, CP_RB1_WPTR,
6423 			     RADEON_CP_PACKET2);
6424 	if (r)
6425 		return r;
6426 
6427 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6428 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6429 			     CP_RB2_RPTR, CP_RB2_WPTR,
6430 			     RADEON_CP_PACKET2);
6431 	if (r)
6432 		return r;
6433 
6434 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6435 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6436 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6437 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6438 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6439 	if (r)
6440 		return r;
6441 
6442 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6443 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6444 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6445 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6446 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6447 	if (r)
6448 		return r;
6449 
6450 	r = si_cp_load_microcode(rdev);
6451 	if (r)
6452 		return r;
6453 	r = si_cp_resume(rdev);
6454 	if (r)
6455 		return r;
6456 
6457 	r = cayman_dma_resume(rdev);
6458 	if (r)
6459 		return r;
6460 
6461 	if (rdev->has_uvd) {
6462 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6463 		if (ring->ring_size) {
6464 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6465 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6466 					     RADEON_CP_PACKET2);
6467 			if (!r)
6468 				r = uvd_v1_0_init(rdev);
6469 			if (r)
6470 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6471 		}
6472 	}
6473 
6474 	r = radeon_ib_pool_init(rdev);
6475 	if (r) {
6476 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6477 		return r;
6478 	}
6479 
6480 	r = radeon_vm_manager_init(rdev);
6481 	if (r) {
6482 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6483 		return r;
6484 	}
6485 
6486 	r = dce6_audio_init(rdev);
6487 	if (r)
6488 		return r;
6489 
6490 	return 0;
6491 }
6492 
6493 int si_resume(struct radeon_device *rdev)
6494 {
6495 	int r;
6496 
6497 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6498 	 * posting will perform necessary task to bring back GPU into good
6499 	 * shape.
6500 	 */
6501 	/* post card */
6502 	atom_asic_init(rdev->mode_info.atom_context);
6503 
6504 	/* init golden registers */
6505 	si_init_golden_registers(rdev);
6506 
6507 	rdev->accel_working = true;
6508 	r = si_startup(rdev);
6509 	if (r) {
6510 		DRM_ERROR("si startup failed on resume\n");
6511 		rdev->accel_working = false;
6512 		return r;
6513 	}
6514 
6515 	return r;
6516 
6517 }
6518 
6519 int si_suspend(struct radeon_device *rdev)
6520 {
6521 	dce6_audio_fini(rdev);
6522 	radeon_vm_manager_fini(rdev);
6523 	si_cp_enable(rdev, false);
6524 	cayman_dma_stop(rdev);
6525 	if (rdev->has_uvd) {
6526 		uvd_v1_0_fini(rdev);
6527 		radeon_uvd_suspend(rdev);
6528 	}
6529 	si_fini_pg(rdev);
6530 	si_fini_cg(rdev);
6531 	si_irq_suspend(rdev);
6532 	radeon_wb_disable(rdev);
6533 	si_pcie_gart_disable(rdev);
6534 	return 0;
6535 }
6536 
6537 /* Plan is to move initialization in that function and use
6538  * helper function so that radeon_device_init pretty much
6539  * do nothing more than calling asic specific function. This
6540  * should also allow to remove a bunch of callback function
6541  * like vram_info.
6542  */
6543 int si_init(struct radeon_device *rdev)
6544 {
6545 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6546 	int r;
6547 
6548 	/* Read BIOS */
6549 	if (!radeon_get_bios(rdev)) {
6550 		if (ASIC_IS_AVIVO(rdev))
6551 			return -EINVAL;
6552 	}
6553 	/* Must be an ATOMBIOS */
6554 	if (!rdev->is_atom_bios) {
6555 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6556 		return -EINVAL;
6557 	}
6558 	r = radeon_atombios_init(rdev);
6559 	if (r)
6560 		return r;
6561 
6562 	/* Post card if necessary */
6563 	if (!radeon_card_posted(rdev)) {
6564 		if (!rdev->bios) {
6565 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6566 			return -EINVAL;
6567 		}
6568 		DRM_INFO("GPU not posted. posting now...\n");
6569 		atom_asic_init(rdev->mode_info.atom_context);
6570 	}
6571 	/* init golden registers */
6572 	si_init_golden_registers(rdev);
6573 	/* Initialize scratch registers */
6574 	si_scratch_init(rdev);
6575 	/* Initialize surface registers */
6576 	radeon_surface_init(rdev);
6577 	/* Initialize clocks */
6578 	radeon_get_clock_info(rdev->ddev);
6579 
6580 	/* Fence driver */
6581 	r = radeon_fence_driver_init(rdev);
6582 	if (r)
6583 		return r;
6584 
6585 	/* initialize memory controller */
6586 	r = si_mc_init(rdev);
6587 	if (r)
6588 		return r;
6589 	/* Memory manager */
6590 	r = radeon_bo_init(rdev);
6591 	if (r)
6592 		return r;
6593 
6594 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6595 	ring->ring_obj = NULL;
6596 	r600_ring_init(rdev, ring, 1024 * 1024);
6597 
6598 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6599 	ring->ring_obj = NULL;
6600 	r600_ring_init(rdev, ring, 1024 * 1024);
6601 
6602 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6603 	ring->ring_obj = NULL;
6604 	r600_ring_init(rdev, ring, 1024 * 1024);
6605 
6606 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6607 	ring->ring_obj = NULL;
6608 	r600_ring_init(rdev, ring, 64 * 1024);
6609 
6610 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6611 	ring->ring_obj = NULL;
6612 	r600_ring_init(rdev, ring, 64 * 1024);
6613 
6614 	if (rdev->has_uvd) {
6615 		r = radeon_uvd_init(rdev);
6616 		if (!r) {
6617 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6618 			ring->ring_obj = NULL;
6619 			r600_ring_init(rdev, ring, 4096);
6620 		}
6621 	}
6622 
6623 	rdev->ih.ring_obj = NULL;
6624 	r600_ih_ring_init(rdev, 64 * 1024);
6625 
6626 	r = r600_pcie_gart_init(rdev);
6627 	if (r)
6628 		return r;
6629 
6630 	rdev->accel_working = true;
6631 	r = si_startup(rdev);
6632 	if (r) {
6633 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6634 		si_cp_fini(rdev);
6635 		cayman_dma_fini(rdev);
6636 		si_irq_fini(rdev);
6637 		sumo_rlc_fini(rdev);
6638 		radeon_wb_fini(rdev);
6639 		radeon_ib_pool_fini(rdev);
6640 		radeon_vm_manager_fini(rdev);
6641 		radeon_irq_kms_fini(rdev);
6642 		si_pcie_gart_fini(rdev);
6643 		rdev->accel_working = false;
6644 	}
6645 
6646 	/* Don't start up if the MC ucode is missing.
6647 	 * The default clocks and voltages before the MC ucode
6648 	 * is loaded are not suffient for advanced operations.
6649 	 */
6650 	if (!rdev->mc_fw) {
6651 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6652 		return -EINVAL;
6653 	}
6654 
6655 	return 0;
6656 }
6657 
6658 void si_fini(struct radeon_device *rdev)
6659 {
6660 	si_cp_fini(rdev);
6661 	cayman_dma_fini(rdev);
6662 	si_fini_pg(rdev);
6663 	si_fini_cg(rdev);
6664 	si_irq_fini(rdev);
6665 	sumo_rlc_fini(rdev);
6666 	radeon_wb_fini(rdev);
6667 	radeon_vm_manager_fini(rdev);
6668 	radeon_ib_pool_fini(rdev);
6669 	radeon_irq_kms_fini(rdev);
6670 	if (rdev->has_uvd) {
6671 		uvd_v1_0_fini(rdev);
6672 		radeon_uvd_fini(rdev);
6673 	}
6674 	si_pcie_gart_fini(rdev);
6675 	r600_vram_scratch_fini(rdev);
6676 	radeon_gem_fini(rdev);
6677 	radeon_fence_driver_fini(rdev);
6678 	radeon_bo_fini(rdev);
6679 	radeon_atombios_fini(rdev);
6680 	kfree(rdev->bios);
6681 	rdev->bios = NULL;
6682 }
6683 
6684 /**
6685  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6686  *
6687  * @rdev: radeon_device pointer
6688  *
6689  * Fetches a GPU clock counter snapshot (SI).
6690  * Returns the 64 bit clock counter snapshot.
6691  */
6692 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6693 {
6694 	uint64_t clock;
6695 
6696 	mutex_lock(&rdev->gpu_clock_mutex);
6697 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6698 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6699 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6700 	mutex_unlock(&rdev->gpu_clock_mutex);
6701 	return clock;
6702 }
6703 
6704 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6705 {
6706 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6707 	int r;
6708 
6709 	/* bypass vclk and dclk with bclk */
6710 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6711 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6712 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6713 
6714 	/* put PLL in bypass mode */
6715 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6716 
6717 	if (!vclk || !dclk) {
6718 		/* keep the Bypass mode, put PLL to sleep */
6719 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6720 		return 0;
6721 	}
6722 
6723 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6724 					  16384, 0x03FFFFFF, 0, 128, 5,
6725 					  &fb_div, &vclk_div, &dclk_div);
6726 	if (r)
6727 		return r;
6728 
6729 	/* set RESET_ANTI_MUX to 0 */
6730 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6731 
6732 	/* set VCO_MODE to 1 */
6733 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6734 
6735 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6736 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6737 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6738 
6739 	/* deassert UPLL_RESET */
6740 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6741 
6742 	mdelay(1);
6743 
6744 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6745 	if (r)
6746 		return r;
6747 
6748 	/* assert UPLL_RESET again */
6749 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6750 
6751 	/* disable spread spectrum. */
6752 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6753 
6754 	/* set feedback divider */
6755 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6756 
6757 	/* set ref divider to 0 */
6758 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6759 
6760 	if (fb_div < 307200)
6761 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6762 	else
6763 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6764 
6765 	/* set PDIV_A and PDIV_B */
6766 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6767 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6768 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6769 
6770 	/* give the PLL some time to settle */
6771 	mdelay(15);
6772 
6773 	/* deassert PLL_RESET */
6774 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6775 
6776 	mdelay(15);
6777 
6778 	/* switch from bypass mode to normal mode */
6779 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6780 
6781 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6782 	if (r)
6783 		return r;
6784 
6785 	/* switch VCLK and DCLK selection */
6786 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6787 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6788 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6789 
6790 	mdelay(100);
6791 
6792 	return 0;
6793 }
6794 
6795 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6796 {
6797 	struct pci_dev *root = rdev->pdev->bus->self;
6798 	int bridge_pos, gpu_pos;
6799 	u32 speed_cntl, mask, current_data_rate;
6800 	int ret, i;
6801 	u16 tmp16;
6802 
6803 	if (radeon_pcie_gen2 == 0)
6804 		return;
6805 
6806 	if (rdev->flags & RADEON_IS_IGP)
6807 		return;
6808 
6809 	if (!(rdev->flags & RADEON_IS_PCIE))
6810 		return;
6811 
6812 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6813 	if (ret != 0)
6814 		return;
6815 
6816 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6817 		return;
6818 
6819 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6820 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6821 		LC_CURRENT_DATA_RATE_SHIFT;
6822 	if (mask & DRM_PCIE_SPEED_80) {
6823 		if (current_data_rate == 2) {
6824 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6825 			return;
6826 		}
6827 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6828 	} else if (mask & DRM_PCIE_SPEED_50) {
6829 		if (current_data_rate == 1) {
6830 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6831 			return;
6832 		}
6833 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6834 	}
6835 
6836 	bridge_pos = pci_pcie_cap(root);
6837 	if (!bridge_pos)
6838 		return;
6839 
6840 	gpu_pos = pci_pcie_cap(rdev->pdev);
6841 	if (!gpu_pos)
6842 		return;
6843 
6844 	if (mask & DRM_PCIE_SPEED_80) {
6845 		/* re-try equalization if gen3 is not already enabled */
6846 		if (current_data_rate != 2) {
6847 			u16 bridge_cfg, gpu_cfg;
6848 			u16 bridge_cfg2, gpu_cfg2;
6849 			u32 max_lw, current_lw, tmp;
6850 
6851 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6852 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6853 
6854 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6855 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6856 
6857 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6858 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6859 
6860 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6861 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6862 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6863 
6864 			if (current_lw < max_lw) {
6865 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6866 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6867 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6868 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6869 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6870 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6871 				}
6872 			}
6873 
6874 			for (i = 0; i < 10; i++) {
6875 				/* check status */
6876 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6877 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6878 					break;
6879 
6880 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6881 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6882 
6883 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6884 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6885 
6886 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6887 				tmp |= LC_SET_QUIESCE;
6888 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6889 
6890 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6891 				tmp |= LC_REDO_EQ;
6892 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6893 
6894 				mdelay(100);
6895 
6896 				/* linkctl */
6897 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6898 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6899 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6900 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6901 
6902 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6903 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6904 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6905 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6906 
6907 				/* linkctl2 */
6908 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6909 				tmp16 &= ~((1 << 4) | (7 << 9));
6910 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6911 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6912 
6913 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6914 				tmp16 &= ~((1 << 4) | (7 << 9));
6915 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6916 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6917 
6918 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6919 				tmp &= ~LC_SET_QUIESCE;
6920 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6921 			}
6922 		}
6923 	}
6924 
6925 	/* set the link speed */
6926 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6927 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6928 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6929 
6930 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6931 	tmp16 &= ~0xf;
6932 	if (mask & DRM_PCIE_SPEED_80)
6933 		tmp16 |= 3; /* gen3 */
6934 	else if (mask & DRM_PCIE_SPEED_50)
6935 		tmp16 |= 2; /* gen2 */
6936 	else
6937 		tmp16 |= 1; /* gen1 */
6938 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6939 
6940 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6941 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6942 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6943 
6944 	for (i = 0; i < rdev->usec_timeout; i++) {
6945 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6946 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6947 			break;
6948 		udelay(1);
6949 	}
6950 }
6951 
6952 static void si_program_aspm(struct radeon_device *rdev)
6953 {
6954 	u32 data, orig;
6955 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6956 	bool disable_clkreq = false;
6957 
6958 	if (radeon_aspm == 0)
6959 		return;
6960 
6961 	if (!(rdev->flags & RADEON_IS_PCIE))
6962 		return;
6963 
6964 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6965 	data &= ~LC_XMIT_N_FTS_MASK;
6966 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6967 	if (orig != data)
6968 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6969 
6970 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6971 	data |= LC_GO_TO_RECOVERY;
6972 	if (orig != data)
6973 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6974 
6975 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
6976 	data |= P_IGNORE_EDB_ERR;
6977 	if (orig != data)
6978 		WREG32_PCIE(PCIE_P_CNTL, data);
6979 
6980 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6981 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6982 	data |= LC_PMI_TO_L1_DIS;
6983 	if (!disable_l0s)
6984 		data |= LC_L0S_INACTIVITY(7);
6985 
6986 	if (!disable_l1) {
6987 		data |= LC_L1_INACTIVITY(7);
6988 		data &= ~LC_PMI_TO_L1_DIS;
6989 		if (orig != data)
6990 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6991 
6992 		if (!disable_plloff_in_l1) {
6993 			bool clk_req_support;
6994 
6995 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6996 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6997 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6998 			if (orig != data)
6999 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7000 
7001 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7002 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7003 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7004 			if (orig != data)
7005 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7006 
7007 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7008 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7009 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7010 			if (orig != data)
7011 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7012 
7013 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7014 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7015 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7016 			if (orig != data)
7017 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7018 
7019 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7020 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7021 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7022 				if (orig != data)
7023 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7024 
7025 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7026 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7027 				if (orig != data)
7028 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7029 
7030 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7031 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7032 				if (orig != data)
7033 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7034 
7035 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7036 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7037 				if (orig != data)
7038 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7039 
7040 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7041 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7042 				if (orig != data)
7043 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7044 
7045 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7046 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7047 				if (orig != data)
7048 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7049 
7050 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7051 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7052 				if (orig != data)
7053 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7054 
7055 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7056 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7057 				if (orig != data)
7058 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7059 			}
7060 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7061 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7062 			data |= LC_DYN_LANES_PWR_STATE(3);
7063 			if (orig != data)
7064 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7065 
7066 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7067 			data &= ~LS2_EXIT_TIME_MASK;
7068 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7069 				data |= LS2_EXIT_TIME(5);
7070 			if (orig != data)
7071 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7072 
7073 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7074 			data &= ~LS2_EXIT_TIME_MASK;
7075 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7076 				data |= LS2_EXIT_TIME(5);
7077 			if (orig != data)
7078 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7079 
7080 			if (!disable_clkreq) {
7081 				struct pci_dev *root = rdev->pdev->bus->self;
7082 				u32 lnkcap;
7083 
7084 				clk_req_support = false;
7085 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7086 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7087 					clk_req_support = true;
7088 			} else {
7089 				clk_req_support = false;
7090 			}
7091 
7092 			if (clk_req_support) {
7093 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7094 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7095 				if (orig != data)
7096 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7097 
7098 				orig = data = RREG32(THM_CLK_CNTL);
7099 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7100 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7101 				if (orig != data)
7102 					WREG32(THM_CLK_CNTL, data);
7103 
7104 				orig = data = RREG32(MISC_CLK_CNTL);
7105 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7106 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7107 				if (orig != data)
7108 					WREG32(MISC_CLK_CNTL, data);
7109 
7110 				orig = data = RREG32(CG_CLKPIN_CNTL);
7111 				data &= ~BCLK_AS_XCLK;
7112 				if (orig != data)
7113 					WREG32(CG_CLKPIN_CNTL, data);
7114 
7115 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7116 				data &= ~FORCE_BIF_REFCLK_EN;
7117 				if (orig != data)
7118 					WREG32(CG_CLKPIN_CNTL_2, data);
7119 
7120 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7121 				data &= ~MPLL_CLKOUT_SEL_MASK;
7122 				data |= MPLL_CLKOUT_SEL(4);
7123 				if (orig != data)
7124 					WREG32(MPLL_BYPASSCLK_SEL, data);
7125 
7126 				orig = data = RREG32(SPLL_CNTL_MODE);
7127 				data &= ~SPLL_REFCLK_SEL_MASK;
7128 				if (orig != data)
7129 					WREG32(SPLL_CNTL_MODE, data);
7130 			}
7131 		}
7132 	} else {
7133 		if (orig != data)
7134 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7135 	}
7136 
7137 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7138 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7139 	if (orig != data)
7140 		WREG32_PCIE(PCIE_CNTL2, data);
7141 
7142 	if (!disable_l0s) {
7143 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7144 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7145 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7146 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7147 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7148 				data &= ~LC_L0S_INACTIVITY_MASK;
7149 				if (orig != data)
7150 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7151 			}
7152 		}
7153 	}
7154 }
7155