xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 84d517f3)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
52 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53 MODULE_FIRMWARE("radeon/VERDE_me.bin");
54 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
59 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
60 MODULE_FIRMWARE("radeon/OLAND_me.bin");
61 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
62 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
63 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
64 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
65 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
73 
74 static void si_pcie_gen3_enable(struct radeon_device *rdev);
75 static void si_program_aspm(struct radeon_device *rdev);
76 extern void sumo_rlc_fini(struct radeon_device *rdev);
77 extern int sumo_rlc_init(struct radeon_device *rdev);
78 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
79 extern void r600_ih_ring_fini(struct radeon_device *rdev);
80 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
81 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
82 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
83 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
84 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
85 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87 					 bool enable);
88 static void si_init_pg(struct radeon_device *rdev);
89 static void si_init_cg(struct radeon_device *rdev);
90 static void si_fini_pg(struct radeon_device *rdev);
91 static void si_fini_cg(struct radeon_device *rdev);
92 static void si_rlc_stop(struct radeon_device *rdev);
93 
94 static const u32 verde_rlc_save_restore_register_list[] =
95 {
96 	(0x8000 << 16) | (0x98f4 >> 2),
97 	0x00000000,
98 	(0x8040 << 16) | (0x98f4 >> 2),
99 	0x00000000,
100 	(0x8000 << 16) | (0xe80 >> 2),
101 	0x00000000,
102 	(0x8040 << 16) | (0xe80 >> 2),
103 	0x00000000,
104 	(0x8000 << 16) | (0x89bc >> 2),
105 	0x00000000,
106 	(0x8040 << 16) | (0x89bc >> 2),
107 	0x00000000,
108 	(0x8000 << 16) | (0x8c1c >> 2),
109 	0x00000000,
110 	(0x8040 << 16) | (0x8c1c >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0x98f0 >> 2),
113 	0x00000000,
114 	(0x9c00 << 16) | (0xe7c >> 2),
115 	0x00000000,
116 	(0x8000 << 16) | (0x9148 >> 2),
117 	0x00000000,
118 	(0x8040 << 16) | (0x9148 >> 2),
119 	0x00000000,
120 	(0x9c00 << 16) | (0x9150 >> 2),
121 	0x00000000,
122 	(0x9c00 << 16) | (0x897c >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0x8d8c >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0xac54 >> 2),
127 	0X00000000,
128 	0x3,
129 	(0x9c00 << 16) | (0x98f8 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x9910 >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9914 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x9918 >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x991c >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x9920 >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9924 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x9928 >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x992c >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x9930 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9934 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x9938 >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x993c >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9940 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9944 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x9948 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x994c >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9950 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9954 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9958 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x995c >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9960 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9964 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9968 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x996c >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9970 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9974 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9978 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x997c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9980 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9984 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9988 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x998c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x8c00 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x8c14 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x8c04 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x8c08 >> 2),
202 	0x00000000,
203 	(0x8000 << 16) | (0x9b7c >> 2),
204 	0x00000000,
205 	(0x8040 << 16) | (0x9b7c >> 2),
206 	0x00000000,
207 	(0x8000 << 16) | (0xe84 >> 2),
208 	0x00000000,
209 	(0x8040 << 16) | (0xe84 >> 2),
210 	0x00000000,
211 	(0x8000 << 16) | (0x89c0 >> 2),
212 	0x00000000,
213 	(0x8040 << 16) | (0x89c0 >> 2),
214 	0x00000000,
215 	(0x8000 << 16) | (0x914c >> 2),
216 	0x00000000,
217 	(0x8040 << 16) | (0x914c >> 2),
218 	0x00000000,
219 	(0x8000 << 16) | (0x8c20 >> 2),
220 	0x00000000,
221 	(0x8040 << 16) | (0x8c20 >> 2),
222 	0x00000000,
223 	(0x8000 << 16) | (0x9354 >> 2),
224 	0x00000000,
225 	(0x8040 << 16) | (0x9354 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9060 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9364 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9100 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x913c >> 2),
234 	0x00000000,
235 	(0x8000 << 16) | (0x90e0 >> 2),
236 	0x00000000,
237 	(0x8000 << 16) | (0x90e4 >> 2),
238 	0x00000000,
239 	(0x8000 << 16) | (0x90e8 >> 2),
240 	0x00000000,
241 	(0x8040 << 16) | (0x90e0 >> 2),
242 	0x00000000,
243 	(0x8040 << 16) | (0x90e4 >> 2),
244 	0x00000000,
245 	(0x8040 << 16) | (0x90e8 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8bcc >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8b24 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x88c4 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x8e50 >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x8c0c >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x8e58 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x8e5c >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x9508 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x950c >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x9494 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0xac0c >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0xac10 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0xac14 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0xae00 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0xac08 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x88d4 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x88c8 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x88cc >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x89b0 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x8b10 >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8a14 >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x9830 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x9834 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x9838 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x9a10 >> 2),
296 	0x00000000,
297 	(0x8000 << 16) | (0x9870 >> 2),
298 	0x00000000,
299 	(0x8000 << 16) | (0x9874 >> 2),
300 	0x00000000,
301 	(0x8001 << 16) | (0x9870 >> 2),
302 	0x00000000,
303 	(0x8001 << 16) | (0x9874 >> 2),
304 	0x00000000,
305 	(0x8040 << 16) | (0x9870 >> 2),
306 	0x00000000,
307 	(0x8040 << 16) | (0x9874 >> 2),
308 	0x00000000,
309 	(0x8041 << 16) | (0x9870 >> 2),
310 	0x00000000,
311 	(0x8041 << 16) | (0x9874 >> 2),
312 	0x00000000,
313 	0x00000000
314 };
315 
316 static const u32 tahiti_golden_rlc_registers[] =
317 {
318 	0xc424, 0xffffffff, 0x00601005,
319 	0xc47c, 0xffffffff, 0x10104040,
320 	0xc488, 0xffffffff, 0x0100000a,
321 	0xc314, 0xffffffff, 0x00000800,
322 	0xc30c, 0xffffffff, 0x800000f4,
323 	0xf4a8, 0xffffffff, 0x00000000
324 };
325 
326 static const u32 tahiti_golden_registers[] =
327 {
328 	0x9a10, 0x00010000, 0x00018208,
329 	0x9830, 0xffffffff, 0x00000000,
330 	0x9834, 0xf00fffff, 0x00000400,
331 	0x9838, 0x0002021c, 0x00020200,
332 	0xc78, 0x00000080, 0x00000000,
333 	0xd030, 0x000300c0, 0x00800040,
334 	0xd830, 0x000300c0, 0x00800040,
335 	0x5bb0, 0x000000f0, 0x00000070,
336 	0x5bc0, 0x00200000, 0x50100000,
337 	0x7030, 0x31000311, 0x00000011,
338 	0x277c, 0x00000003, 0x000007ff,
339 	0x240c, 0x000007ff, 0x00000000,
340 	0x8a14, 0xf000001f, 0x00000007,
341 	0x8b24, 0xffffffff, 0x00ffffff,
342 	0x8b10, 0x0000ff0f, 0x00000000,
343 	0x28a4c, 0x07ffffff, 0x4e000000,
344 	0x28350, 0x3f3f3fff, 0x2a00126a,
345 	0x30, 0x000000ff, 0x0040,
346 	0x34, 0x00000040, 0x00004040,
347 	0x9100, 0x07ffffff, 0x03000000,
348 	0x8e88, 0x01ff1f3f, 0x00000000,
349 	0x8e84, 0x01ff1f3f, 0x00000000,
350 	0x9060, 0x0000007f, 0x00000020,
351 	0x9508, 0x00010000, 0x00010000,
352 	0xac14, 0x00000200, 0x000002fb,
353 	0xac10, 0xffffffff, 0x0000543b,
354 	0xac0c, 0xffffffff, 0xa9210876,
355 	0x88d0, 0xffffffff, 0x000fff40,
356 	0x88d4, 0x0000001f, 0x00000010,
357 	0x1410, 0x20000000, 0x20fffed8,
358 	0x15c0, 0x000c0fc0, 0x000c0400
359 };
360 
361 static const u32 tahiti_golden_registers2[] =
362 {
363 	0xc64, 0x00000001, 0x00000001
364 };
365 
366 static const u32 pitcairn_golden_rlc_registers[] =
367 {
368 	0xc424, 0xffffffff, 0x00601004,
369 	0xc47c, 0xffffffff, 0x10102020,
370 	0xc488, 0xffffffff, 0x01000020,
371 	0xc314, 0xffffffff, 0x00000800,
372 	0xc30c, 0xffffffff, 0x800000a4
373 };
374 
375 static const u32 pitcairn_golden_registers[] =
376 {
377 	0x9a10, 0x00010000, 0x00018208,
378 	0x9830, 0xffffffff, 0x00000000,
379 	0x9834, 0xf00fffff, 0x00000400,
380 	0x9838, 0x0002021c, 0x00020200,
381 	0xc78, 0x00000080, 0x00000000,
382 	0xd030, 0x000300c0, 0x00800040,
383 	0xd830, 0x000300c0, 0x00800040,
384 	0x5bb0, 0x000000f0, 0x00000070,
385 	0x5bc0, 0x00200000, 0x50100000,
386 	0x7030, 0x31000311, 0x00000011,
387 	0x2ae4, 0x00073ffe, 0x000022a2,
388 	0x240c, 0x000007ff, 0x00000000,
389 	0x8a14, 0xf000001f, 0x00000007,
390 	0x8b24, 0xffffffff, 0x00ffffff,
391 	0x8b10, 0x0000ff0f, 0x00000000,
392 	0x28a4c, 0x07ffffff, 0x4e000000,
393 	0x28350, 0x3f3f3fff, 0x2a00126a,
394 	0x30, 0x000000ff, 0x0040,
395 	0x34, 0x00000040, 0x00004040,
396 	0x9100, 0x07ffffff, 0x03000000,
397 	0x9060, 0x0000007f, 0x00000020,
398 	0x9508, 0x00010000, 0x00010000,
399 	0xac14, 0x000003ff, 0x000000f7,
400 	0xac10, 0xffffffff, 0x00000000,
401 	0xac0c, 0xffffffff, 0x32761054,
402 	0x88d4, 0x0000001f, 0x00000010,
403 	0x15c0, 0x000c0fc0, 0x000c0400
404 };
405 
406 static const u32 verde_golden_rlc_registers[] =
407 {
408 	0xc424, 0xffffffff, 0x033f1005,
409 	0xc47c, 0xffffffff, 0x10808020,
410 	0xc488, 0xffffffff, 0x00800008,
411 	0xc314, 0xffffffff, 0x00001000,
412 	0xc30c, 0xffffffff, 0x80010014
413 };
414 
415 static const u32 verde_golden_registers[] =
416 {
417 	0x9a10, 0x00010000, 0x00018208,
418 	0x9830, 0xffffffff, 0x00000000,
419 	0x9834, 0xf00fffff, 0x00000400,
420 	0x9838, 0x0002021c, 0x00020200,
421 	0xc78, 0x00000080, 0x00000000,
422 	0xd030, 0x000300c0, 0x00800040,
423 	0xd030, 0x000300c0, 0x00800040,
424 	0xd830, 0x000300c0, 0x00800040,
425 	0xd830, 0x000300c0, 0x00800040,
426 	0x5bb0, 0x000000f0, 0x00000070,
427 	0x5bc0, 0x00200000, 0x50100000,
428 	0x7030, 0x31000311, 0x00000011,
429 	0x2ae4, 0x00073ffe, 0x000022a2,
430 	0x2ae4, 0x00073ffe, 0x000022a2,
431 	0x2ae4, 0x00073ffe, 0x000022a2,
432 	0x240c, 0x000007ff, 0x00000000,
433 	0x240c, 0x000007ff, 0x00000000,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8a14, 0xf000001f, 0x00000007,
437 	0x8a14, 0xf000001f, 0x00000007,
438 	0x8b24, 0xffffffff, 0x00ffffff,
439 	0x8b10, 0x0000ff0f, 0x00000000,
440 	0x28a4c, 0x07ffffff, 0x4e000000,
441 	0x28350, 0x3f3f3fff, 0x0000124a,
442 	0x28350, 0x3f3f3fff, 0x0000124a,
443 	0x28350, 0x3f3f3fff, 0x0000124a,
444 	0x30, 0x000000ff, 0x0040,
445 	0x34, 0x00000040, 0x00004040,
446 	0x9100, 0x07ffffff, 0x03000000,
447 	0x9100, 0x07ffffff, 0x03000000,
448 	0x8e88, 0x01ff1f3f, 0x00000000,
449 	0x8e88, 0x01ff1f3f, 0x00000000,
450 	0x8e88, 0x01ff1f3f, 0x00000000,
451 	0x8e84, 0x01ff1f3f, 0x00000000,
452 	0x8e84, 0x01ff1f3f, 0x00000000,
453 	0x8e84, 0x01ff1f3f, 0x00000000,
454 	0x9060, 0x0000007f, 0x00000020,
455 	0x9508, 0x00010000, 0x00010000,
456 	0xac14, 0x000003ff, 0x00000003,
457 	0xac14, 0x000003ff, 0x00000003,
458 	0xac14, 0x000003ff, 0x00000003,
459 	0xac10, 0xffffffff, 0x00000000,
460 	0xac10, 0xffffffff, 0x00000000,
461 	0xac10, 0xffffffff, 0x00000000,
462 	0xac0c, 0xffffffff, 0x00001032,
463 	0xac0c, 0xffffffff, 0x00001032,
464 	0xac0c, 0xffffffff, 0x00001032,
465 	0x88d4, 0x0000001f, 0x00000010,
466 	0x88d4, 0x0000001f, 0x00000010,
467 	0x88d4, 0x0000001f, 0x00000010,
468 	0x15c0, 0x000c0fc0, 0x000c0400
469 };
470 
471 static const u32 oland_golden_rlc_registers[] =
472 {
473 	0xc424, 0xffffffff, 0x00601005,
474 	0xc47c, 0xffffffff, 0x10104040,
475 	0xc488, 0xffffffff, 0x0100000a,
476 	0xc314, 0xffffffff, 0x00000800,
477 	0xc30c, 0xffffffff, 0x800000f4
478 };
479 
480 static const u32 oland_golden_registers[] =
481 {
482 	0x9a10, 0x00010000, 0x00018208,
483 	0x9830, 0xffffffff, 0x00000000,
484 	0x9834, 0xf00fffff, 0x00000400,
485 	0x9838, 0x0002021c, 0x00020200,
486 	0xc78, 0x00000080, 0x00000000,
487 	0xd030, 0x000300c0, 0x00800040,
488 	0xd830, 0x000300c0, 0x00800040,
489 	0x5bb0, 0x000000f0, 0x00000070,
490 	0x5bc0, 0x00200000, 0x50100000,
491 	0x7030, 0x31000311, 0x00000011,
492 	0x2ae4, 0x00073ffe, 0x000022a2,
493 	0x240c, 0x000007ff, 0x00000000,
494 	0x8a14, 0xf000001f, 0x00000007,
495 	0x8b24, 0xffffffff, 0x00ffffff,
496 	0x8b10, 0x0000ff0f, 0x00000000,
497 	0x28a4c, 0x07ffffff, 0x4e000000,
498 	0x28350, 0x3f3f3fff, 0x00000082,
499 	0x30, 0x000000ff, 0x0040,
500 	0x34, 0x00000040, 0x00004040,
501 	0x9100, 0x07ffffff, 0x03000000,
502 	0x9060, 0x0000007f, 0x00000020,
503 	0x9508, 0x00010000, 0x00010000,
504 	0xac14, 0x000003ff, 0x000000f3,
505 	0xac10, 0xffffffff, 0x00000000,
506 	0xac0c, 0xffffffff, 0x00003210,
507 	0x88d4, 0x0000001f, 0x00000010,
508 	0x15c0, 0x000c0fc0, 0x000c0400
509 };
510 
511 static const u32 hainan_golden_registers[] =
512 {
513 	0x9a10, 0x00010000, 0x00018208,
514 	0x9830, 0xffffffff, 0x00000000,
515 	0x9834, 0xf00fffff, 0x00000400,
516 	0x9838, 0x0002021c, 0x00020200,
517 	0xd0c0, 0xff000fff, 0x00000100,
518 	0xd030, 0x000300c0, 0x00800040,
519 	0xd8c0, 0xff000fff, 0x00000100,
520 	0xd830, 0x000300c0, 0x00800040,
521 	0x2ae4, 0x00073ffe, 0x000022a2,
522 	0x240c, 0x000007ff, 0x00000000,
523 	0x8a14, 0xf000001f, 0x00000007,
524 	0x8b24, 0xffffffff, 0x00ffffff,
525 	0x8b10, 0x0000ff0f, 0x00000000,
526 	0x28a4c, 0x07ffffff, 0x4e000000,
527 	0x28350, 0x3f3f3fff, 0x00000000,
528 	0x30, 0x000000ff, 0x0040,
529 	0x34, 0x00000040, 0x00004040,
530 	0x9100, 0x03e00000, 0x03600000,
531 	0x9060, 0x0000007f, 0x00000020,
532 	0x9508, 0x00010000, 0x00010000,
533 	0xac14, 0x000003ff, 0x000000f1,
534 	0xac10, 0xffffffff, 0x00000000,
535 	0xac0c, 0xffffffff, 0x00003210,
536 	0x88d4, 0x0000001f, 0x00000010,
537 	0x15c0, 0x000c0fc0, 0x000c0400
538 };
539 
540 static const u32 hainan_golden_registers2[] =
541 {
542 	0x98f8, 0xffffffff, 0x02010001
543 };
544 
545 static const u32 tahiti_mgcg_cgcg_init[] =
546 {
547 	0xc400, 0xffffffff, 0xfffffffc,
548 	0x802c, 0xffffffff, 0xe0000000,
549 	0x9a60, 0xffffffff, 0x00000100,
550 	0x92a4, 0xffffffff, 0x00000100,
551 	0xc164, 0xffffffff, 0x00000100,
552 	0x9774, 0xffffffff, 0x00000100,
553 	0x8984, 0xffffffff, 0x06000100,
554 	0x8a18, 0xffffffff, 0x00000100,
555 	0x92a0, 0xffffffff, 0x00000100,
556 	0xc380, 0xffffffff, 0x00000100,
557 	0x8b28, 0xffffffff, 0x00000100,
558 	0x9144, 0xffffffff, 0x00000100,
559 	0x8d88, 0xffffffff, 0x00000100,
560 	0x8d8c, 0xffffffff, 0x00000100,
561 	0x9030, 0xffffffff, 0x00000100,
562 	0x9034, 0xffffffff, 0x00000100,
563 	0x9038, 0xffffffff, 0x00000100,
564 	0x903c, 0xffffffff, 0x00000100,
565 	0xad80, 0xffffffff, 0x00000100,
566 	0xac54, 0xffffffff, 0x00000100,
567 	0x897c, 0xffffffff, 0x06000100,
568 	0x9868, 0xffffffff, 0x00000100,
569 	0x9510, 0xffffffff, 0x00000100,
570 	0xaf04, 0xffffffff, 0x00000100,
571 	0xae04, 0xffffffff, 0x00000100,
572 	0x949c, 0xffffffff, 0x00000100,
573 	0x802c, 0xffffffff, 0xe0000000,
574 	0x9160, 0xffffffff, 0x00010000,
575 	0x9164, 0xffffffff, 0x00030002,
576 	0x9168, 0xffffffff, 0x00040007,
577 	0x916c, 0xffffffff, 0x00060005,
578 	0x9170, 0xffffffff, 0x00090008,
579 	0x9174, 0xffffffff, 0x00020001,
580 	0x9178, 0xffffffff, 0x00040003,
581 	0x917c, 0xffffffff, 0x00000007,
582 	0x9180, 0xffffffff, 0x00060005,
583 	0x9184, 0xffffffff, 0x00090008,
584 	0x9188, 0xffffffff, 0x00030002,
585 	0x918c, 0xffffffff, 0x00050004,
586 	0x9190, 0xffffffff, 0x00000008,
587 	0x9194, 0xffffffff, 0x00070006,
588 	0x9198, 0xffffffff, 0x000a0009,
589 	0x919c, 0xffffffff, 0x00040003,
590 	0x91a0, 0xffffffff, 0x00060005,
591 	0x91a4, 0xffffffff, 0x00000009,
592 	0x91a8, 0xffffffff, 0x00080007,
593 	0x91ac, 0xffffffff, 0x000b000a,
594 	0x91b0, 0xffffffff, 0x00050004,
595 	0x91b4, 0xffffffff, 0x00070006,
596 	0x91b8, 0xffffffff, 0x0008000b,
597 	0x91bc, 0xffffffff, 0x000a0009,
598 	0x91c0, 0xffffffff, 0x000d000c,
599 	0x91c4, 0xffffffff, 0x00060005,
600 	0x91c8, 0xffffffff, 0x00080007,
601 	0x91cc, 0xffffffff, 0x0000000b,
602 	0x91d0, 0xffffffff, 0x000a0009,
603 	0x91d4, 0xffffffff, 0x000d000c,
604 	0x91d8, 0xffffffff, 0x00070006,
605 	0x91dc, 0xffffffff, 0x00090008,
606 	0x91e0, 0xffffffff, 0x0000000c,
607 	0x91e4, 0xffffffff, 0x000b000a,
608 	0x91e8, 0xffffffff, 0x000e000d,
609 	0x91ec, 0xffffffff, 0x00080007,
610 	0x91f0, 0xffffffff, 0x000a0009,
611 	0x91f4, 0xffffffff, 0x0000000d,
612 	0x91f8, 0xffffffff, 0x000c000b,
613 	0x91fc, 0xffffffff, 0x000f000e,
614 	0x9200, 0xffffffff, 0x00090008,
615 	0x9204, 0xffffffff, 0x000b000a,
616 	0x9208, 0xffffffff, 0x000c000f,
617 	0x920c, 0xffffffff, 0x000e000d,
618 	0x9210, 0xffffffff, 0x00110010,
619 	0x9214, 0xffffffff, 0x000a0009,
620 	0x9218, 0xffffffff, 0x000c000b,
621 	0x921c, 0xffffffff, 0x0000000f,
622 	0x9220, 0xffffffff, 0x000e000d,
623 	0x9224, 0xffffffff, 0x00110010,
624 	0x9228, 0xffffffff, 0x000b000a,
625 	0x922c, 0xffffffff, 0x000d000c,
626 	0x9230, 0xffffffff, 0x00000010,
627 	0x9234, 0xffffffff, 0x000f000e,
628 	0x9238, 0xffffffff, 0x00120011,
629 	0x923c, 0xffffffff, 0x000c000b,
630 	0x9240, 0xffffffff, 0x000e000d,
631 	0x9244, 0xffffffff, 0x00000011,
632 	0x9248, 0xffffffff, 0x0010000f,
633 	0x924c, 0xffffffff, 0x00130012,
634 	0x9250, 0xffffffff, 0x000d000c,
635 	0x9254, 0xffffffff, 0x000f000e,
636 	0x9258, 0xffffffff, 0x00100013,
637 	0x925c, 0xffffffff, 0x00120011,
638 	0x9260, 0xffffffff, 0x00150014,
639 	0x9264, 0xffffffff, 0x000e000d,
640 	0x9268, 0xffffffff, 0x0010000f,
641 	0x926c, 0xffffffff, 0x00000013,
642 	0x9270, 0xffffffff, 0x00120011,
643 	0x9274, 0xffffffff, 0x00150014,
644 	0x9278, 0xffffffff, 0x000f000e,
645 	0x927c, 0xffffffff, 0x00110010,
646 	0x9280, 0xffffffff, 0x00000014,
647 	0x9284, 0xffffffff, 0x00130012,
648 	0x9288, 0xffffffff, 0x00160015,
649 	0x928c, 0xffffffff, 0x0010000f,
650 	0x9290, 0xffffffff, 0x00120011,
651 	0x9294, 0xffffffff, 0x00000015,
652 	0x9298, 0xffffffff, 0x00140013,
653 	0x929c, 0xffffffff, 0x00170016,
654 	0x9150, 0xffffffff, 0x96940200,
655 	0x8708, 0xffffffff, 0x00900100,
656 	0xc478, 0xffffffff, 0x00000080,
657 	0xc404, 0xffffffff, 0x0020003f,
658 	0x30, 0xffffffff, 0x0000001c,
659 	0x34, 0x000f0000, 0x000f0000,
660 	0x160c, 0xffffffff, 0x00000100,
661 	0x1024, 0xffffffff, 0x00000100,
662 	0x102c, 0x00000101, 0x00000000,
663 	0x20a8, 0xffffffff, 0x00000104,
664 	0x264c, 0x000c0000, 0x000c0000,
665 	0x2648, 0x000c0000, 0x000c0000,
666 	0x55e4, 0xff000fff, 0x00000100,
667 	0x55e8, 0x00000001, 0x00000001,
668 	0x2f50, 0x00000001, 0x00000001,
669 	0x30cc, 0xc0000fff, 0x00000104,
670 	0xc1e4, 0x00000001, 0x00000001,
671 	0xd0c0, 0xfffffff0, 0x00000100,
672 	0xd8c0, 0xfffffff0, 0x00000100
673 };
674 
675 static const u32 pitcairn_mgcg_cgcg_init[] =
676 {
677 	0xc400, 0xffffffff, 0xfffffffc,
678 	0x802c, 0xffffffff, 0xe0000000,
679 	0x9a60, 0xffffffff, 0x00000100,
680 	0x92a4, 0xffffffff, 0x00000100,
681 	0xc164, 0xffffffff, 0x00000100,
682 	0x9774, 0xffffffff, 0x00000100,
683 	0x8984, 0xffffffff, 0x06000100,
684 	0x8a18, 0xffffffff, 0x00000100,
685 	0x92a0, 0xffffffff, 0x00000100,
686 	0xc380, 0xffffffff, 0x00000100,
687 	0x8b28, 0xffffffff, 0x00000100,
688 	0x9144, 0xffffffff, 0x00000100,
689 	0x8d88, 0xffffffff, 0x00000100,
690 	0x8d8c, 0xffffffff, 0x00000100,
691 	0x9030, 0xffffffff, 0x00000100,
692 	0x9034, 0xffffffff, 0x00000100,
693 	0x9038, 0xffffffff, 0x00000100,
694 	0x903c, 0xffffffff, 0x00000100,
695 	0xad80, 0xffffffff, 0x00000100,
696 	0xac54, 0xffffffff, 0x00000100,
697 	0x897c, 0xffffffff, 0x06000100,
698 	0x9868, 0xffffffff, 0x00000100,
699 	0x9510, 0xffffffff, 0x00000100,
700 	0xaf04, 0xffffffff, 0x00000100,
701 	0xae04, 0xffffffff, 0x00000100,
702 	0x949c, 0xffffffff, 0x00000100,
703 	0x802c, 0xffffffff, 0xe0000000,
704 	0x9160, 0xffffffff, 0x00010000,
705 	0x9164, 0xffffffff, 0x00030002,
706 	0x9168, 0xffffffff, 0x00040007,
707 	0x916c, 0xffffffff, 0x00060005,
708 	0x9170, 0xffffffff, 0x00090008,
709 	0x9174, 0xffffffff, 0x00020001,
710 	0x9178, 0xffffffff, 0x00040003,
711 	0x917c, 0xffffffff, 0x00000007,
712 	0x9180, 0xffffffff, 0x00060005,
713 	0x9184, 0xffffffff, 0x00090008,
714 	0x9188, 0xffffffff, 0x00030002,
715 	0x918c, 0xffffffff, 0x00050004,
716 	0x9190, 0xffffffff, 0x00000008,
717 	0x9194, 0xffffffff, 0x00070006,
718 	0x9198, 0xffffffff, 0x000a0009,
719 	0x919c, 0xffffffff, 0x00040003,
720 	0x91a0, 0xffffffff, 0x00060005,
721 	0x91a4, 0xffffffff, 0x00000009,
722 	0x91a8, 0xffffffff, 0x00080007,
723 	0x91ac, 0xffffffff, 0x000b000a,
724 	0x91b0, 0xffffffff, 0x00050004,
725 	0x91b4, 0xffffffff, 0x00070006,
726 	0x91b8, 0xffffffff, 0x0008000b,
727 	0x91bc, 0xffffffff, 0x000a0009,
728 	0x91c0, 0xffffffff, 0x000d000c,
729 	0x9200, 0xffffffff, 0x00090008,
730 	0x9204, 0xffffffff, 0x000b000a,
731 	0x9208, 0xffffffff, 0x000c000f,
732 	0x920c, 0xffffffff, 0x000e000d,
733 	0x9210, 0xffffffff, 0x00110010,
734 	0x9214, 0xffffffff, 0x000a0009,
735 	0x9218, 0xffffffff, 0x000c000b,
736 	0x921c, 0xffffffff, 0x0000000f,
737 	0x9220, 0xffffffff, 0x000e000d,
738 	0x9224, 0xffffffff, 0x00110010,
739 	0x9228, 0xffffffff, 0x000b000a,
740 	0x922c, 0xffffffff, 0x000d000c,
741 	0x9230, 0xffffffff, 0x00000010,
742 	0x9234, 0xffffffff, 0x000f000e,
743 	0x9238, 0xffffffff, 0x00120011,
744 	0x923c, 0xffffffff, 0x000c000b,
745 	0x9240, 0xffffffff, 0x000e000d,
746 	0x9244, 0xffffffff, 0x00000011,
747 	0x9248, 0xffffffff, 0x0010000f,
748 	0x924c, 0xffffffff, 0x00130012,
749 	0x9250, 0xffffffff, 0x000d000c,
750 	0x9254, 0xffffffff, 0x000f000e,
751 	0x9258, 0xffffffff, 0x00100013,
752 	0x925c, 0xffffffff, 0x00120011,
753 	0x9260, 0xffffffff, 0x00150014,
754 	0x9150, 0xffffffff, 0x96940200,
755 	0x8708, 0xffffffff, 0x00900100,
756 	0xc478, 0xffffffff, 0x00000080,
757 	0xc404, 0xffffffff, 0x0020003f,
758 	0x30, 0xffffffff, 0x0000001c,
759 	0x34, 0x000f0000, 0x000f0000,
760 	0x160c, 0xffffffff, 0x00000100,
761 	0x1024, 0xffffffff, 0x00000100,
762 	0x102c, 0x00000101, 0x00000000,
763 	0x20a8, 0xffffffff, 0x00000104,
764 	0x55e4, 0xff000fff, 0x00000100,
765 	0x55e8, 0x00000001, 0x00000001,
766 	0x2f50, 0x00000001, 0x00000001,
767 	0x30cc, 0xc0000fff, 0x00000104,
768 	0xc1e4, 0x00000001, 0x00000001,
769 	0xd0c0, 0xfffffff0, 0x00000100,
770 	0xd8c0, 0xfffffff0, 0x00000100
771 };
772 
773 static const u32 verde_mgcg_cgcg_init[] =
774 {
775 	0xc400, 0xffffffff, 0xfffffffc,
776 	0x802c, 0xffffffff, 0xe0000000,
777 	0x9a60, 0xffffffff, 0x00000100,
778 	0x92a4, 0xffffffff, 0x00000100,
779 	0xc164, 0xffffffff, 0x00000100,
780 	0x9774, 0xffffffff, 0x00000100,
781 	0x8984, 0xffffffff, 0x06000100,
782 	0x8a18, 0xffffffff, 0x00000100,
783 	0x92a0, 0xffffffff, 0x00000100,
784 	0xc380, 0xffffffff, 0x00000100,
785 	0x8b28, 0xffffffff, 0x00000100,
786 	0x9144, 0xffffffff, 0x00000100,
787 	0x8d88, 0xffffffff, 0x00000100,
788 	0x8d8c, 0xffffffff, 0x00000100,
789 	0x9030, 0xffffffff, 0x00000100,
790 	0x9034, 0xffffffff, 0x00000100,
791 	0x9038, 0xffffffff, 0x00000100,
792 	0x903c, 0xffffffff, 0x00000100,
793 	0xad80, 0xffffffff, 0x00000100,
794 	0xac54, 0xffffffff, 0x00000100,
795 	0x897c, 0xffffffff, 0x06000100,
796 	0x9868, 0xffffffff, 0x00000100,
797 	0x9510, 0xffffffff, 0x00000100,
798 	0xaf04, 0xffffffff, 0x00000100,
799 	0xae04, 0xffffffff, 0x00000100,
800 	0x949c, 0xffffffff, 0x00000100,
801 	0x802c, 0xffffffff, 0xe0000000,
802 	0x9160, 0xffffffff, 0x00010000,
803 	0x9164, 0xffffffff, 0x00030002,
804 	0x9168, 0xffffffff, 0x00040007,
805 	0x916c, 0xffffffff, 0x00060005,
806 	0x9170, 0xffffffff, 0x00090008,
807 	0x9174, 0xffffffff, 0x00020001,
808 	0x9178, 0xffffffff, 0x00040003,
809 	0x917c, 0xffffffff, 0x00000007,
810 	0x9180, 0xffffffff, 0x00060005,
811 	0x9184, 0xffffffff, 0x00090008,
812 	0x9188, 0xffffffff, 0x00030002,
813 	0x918c, 0xffffffff, 0x00050004,
814 	0x9190, 0xffffffff, 0x00000008,
815 	0x9194, 0xffffffff, 0x00070006,
816 	0x9198, 0xffffffff, 0x000a0009,
817 	0x919c, 0xffffffff, 0x00040003,
818 	0x91a0, 0xffffffff, 0x00060005,
819 	0x91a4, 0xffffffff, 0x00000009,
820 	0x91a8, 0xffffffff, 0x00080007,
821 	0x91ac, 0xffffffff, 0x000b000a,
822 	0x91b0, 0xffffffff, 0x00050004,
823 	0x91b4, 0xffffffff, 0x00070006,
824 	0x91b8, 0xffffffff, 0x0008000b,
825 	0x91bc, 0xffffffff, 0x000a0009,
826 	0x91c0, 0xffffffff, 0x000d000c,
827 	0x9200, 0xffffffff, 0x00090008,
828 	0x9204, 0xffffffff, 0x000b000a,
829 	0x9208, 0xffffffff, 0x000c000f,
830 	0x920c, 0xffffffff, 0x000e000d,
831 	0x9210, 0xffffffff, 0x00110010,
832 	0x9214, 0xffffffff, 0x000a0009,
833 	0x9218, 0xffffffff, 0x000c000b,
834 	0x921c, 0xffffffff, 0x0000000f,
835 	0x9220, 0xffffffff, 0x000e000d,
836 	0x9224, 0xffffffff, 0x00110010,
837 	0x9228, 0xffffffff, 0x000b000a,
838 	0x922c, 0xffffffff, 0x000d000c,
839 	0x9230, 0xffffffff, 0x00000010,
840 	0x9234, 0xffffffff, 0x000f000e,
841 	0x9238, 0xffffffff, 0x00120011,
842 	0x923c, 0xffffffff, 0x000c000b,
843 	0x9240, 0xffffffff, 0x000e000d,
844 	0x9244, 0xffffffff, 0x00000011,
845 	0x9248, 0xffffffff, 0x0010000f,
846 	0x924c, 0xffffffff, 0x00130012,
847 	0x9250, 0xffffffff, 0x000d000c,
848 	0x9254, 0xffffffff, 0x000f000e,
849 	0x9258, 0xffffffff, 0x00100013,
850 	0x925c, 0xffffffff, 0x00120011,
851 	0x9260, 0xffffffff, 0x00150014,
852 	0x9150, 0xffffffff, 0x96940200,
853 	0x8708, 0xffffffff, 0x00900100,
854 	0xc478, 0xffffffff, 0x00000080,
855 	0xc404, 0xffffffff, 0x0020003f,
856 	0x30, 0xffffffff, 0x0000001c,
857 	0x34, 0x000f0000, 0x000f0000,
858 	0x160c, 0xffffffff, 0x00000100,
859 	0x1024, 0xffffffff, 0x00000100,
860 	0x102c, 0x00000101, 0x00000000,
861 	0x20a8, 0xffffffff, 0x00000104,
862 	0x264c, 0x000c0000, 0x000c0000,
863 	0x2648, 0x000c0000, 0x000c0000,
864 	0x55e4, 0xff000fff, 0x00000100,
865 	0x55e8, 0x00000001, 0x00000001,
866 	0x2f50, 0x00000001, 0x00000001,
867 	0x30cc, 0xc0000fff, 0x00000104,
868 	0xc1e4, 0x00000001, 0x00000001,
869 	0xd0c0, 0xfffffff0, 0x00000100,
870 	0xd8c0, 0xfffffff0, 0x00000100
871 };
872 
873 static const u32 oland_mgcg_cgcg_init[] =
874 {
875 	0xc400, 0xffffffff, 0xfffffffc,
876 	0x802c, 0xffffffff, 0xe0000000,
877 	0x9a60, 0xffffffff, 0x00000100,
878 	0x92a4, 0xffffffff, 0x00000100,
879 	0xc164, 0xffffffff, 0x00000100,
880 	0x9774, 0xffffffff, 0x00000100,
881 	0x8984, 0xffffffff, 0x06000100,
882 	0x8a18, 0xffffffff, 0x00000100,
883 	0x92a0, 0xffffffff, 0x00000100,
884 	0xc380, 0xffffffff, 0x00000100,
885 	0x8b28, 0xffffffff, 0x00000100,
886 	0x9144, 0xffffffff, 0x00000100,
887 	0x8d88, 0xffffffff, 0x00000100,
888 	0x8d8c, 0xffffffff, 0x00000100,
889 	0x9030, 0xffffffff, 0x00000100,
890 	0x9034, 0xffffffff, 0x00000100,
891 	0x9038, 0xffffffff, 0x00000100,
892 	0x903c, 0xffffffff, 0x00000100,
893 	0xad80, 0xffffffff, 0x00000100,
894 	0xac54, 0xffffffff, 0x00000100,
895 	0x897c, 0xffffffff, 0x06000100,
896 	0x9868, 0xffffffff, 0x00000100,
897 	0x9510, 0xffffffff, 0x00000100,
898 	0xaf04, 0xffffffff, 0x00000100,
899 	0xae04, 0xffffffff, 0x00000100,
900 	0x949c, 0xffffffff, 0x00000100,
901 	0x802c, 0xffffffff, 0xe0000000,
902 	0x9160, 0xffffffff, 0x00010000,
903 	0x9164, 0xffffffff, 0x00030002,
904 	0x9168, 0xffffffff, 0x00040007,
905 	0x916c, 0xffffffff, 0x00060005,
906 	0x9170, 0xffffffff, 0x00090008,
907 	0x9174, 0xffffffff, 0x00020001,
908 	0x9178, 0xffffffff, 0x00040003,
909 	0x917c, 0xffffffff, 0x00000007,
910 	0x9180, 0xffffffff, 0x00060005,
911 	0x9184, 0xffffffff, 0x00090008,
912 	0x9188, 0xffffffff, 0x00030002,
913 	0x918c, 0xffffffff, 0x00050004,
914 	0x9190, 0xffffffff, 0x00000008,
915 	0x9194, 0xffffffff, 0x00070006,
916 	0x9198, 0xffffffff, 0x000a0009,
917 	0x919c, 0xffffffff, 0x00040003,
918 	0x91a0, 0xffffffff, 0x00060005,
919 	0x91a4, 0xffffffff, 0x00000009,
920 	0x91a8, 0xffffffff, 0x00080007,
921 	0x91ac, 0xffffffff, 0x000b000a,
922 	0x91b0, 0xffffffff, 0x00050004,
923 	0x91b4, 0xffffffff, 0x00070006,
924 	0x91b8, 0xffffffff, 0x0008000b,
925 	0x91bc, 0xffffffff, 0x000a0009,
926 	0x91c0, 0xffffffff, 0x000d000c,
927 	0x91c4, 0xffffffff, 0x00060005,
928 	0x91c8, 0xffffffff, 0x00080007,
929 	0x91cc, 0xffffffff, 0x0000000b,
930 	0x91d0, 0xffffffff, 0x000a0009,
931 	0x91d4, 0xffffffff, 0x000d000c,
932 	0x9150, 0xffffffff, 0x96940200,
933 	0x8708, 0xffffffff, 0x00900100,
934 	0xc478, 0xffffffff, 0x00000080,
935 	0xc404, 0xffffffff, 0x0020003f,
936 	0x30, 0xffffffff, 0x0000001c,
937 	0x34, 0x000f0000, 0x000f0000,
938 	0x160c, 0xffffffff, 0x00000100,
939 	0x1024, 0xffffffff, 0x00000100,
940 	0x102c, 0x00000101, 0x00000000,
941 	0x20a8, 0xffffffff, 0x00000104,
942 	0x264c, 0x000c0000, 0x000c0000,
943 	0x2648, 0x000c0000, 0x000c0000,
944 	0x55e4, 0xff000fff, 0x00000100,
945 	0x55e8, 0x00000001, 0x00000001,
946 	0x2f50, 0x00000001, 0x00000001,
947 	0x30cc, 0xc0000fff, 0x00000104,
948 	0xc1e4, 0x00000001, 0x00000001,
949 	0xd0c0, 0xfffffff0, 0x00000100,
950 	0xd8c0, 0xfffffff0, 0x00000100
951 };
952 
953 static const u32 hainan_mgcg_cgcg_init[] =
954 {
955 	0xc400, 0xffffffff, 0xfffffffc,
956 	0x802c, 0xffffffff, 0xe0000000,
957 	0x9a60, 0xffffffff, 0x00000100,
958 	0x92a4, 0xffffffff, 0x00000100,
959 	0xc164, 0xffffffff, 0x00000100,
960 	0x9774, 0xffffffff, 0x00000100,
961 	0x8984, 0xffffffff, 0x06000100,
962 	0x8a18, 0xffffffff, 0x00000100,
963 	0x92a0, 0xffffffff, 0x00000100,
964 	0xc380, 0xffffffff, 0x00000100,
965 	0x8b28, 0xffffffff, 0x00000100,
966 	0x9144, 0xffffffff, 0x00000100,
967 	0x8d88, 0xffffffff, 0x00000100,
968 	0x8d8c, 0xffffffff, 0x00000100,
969 	0x9030, 0xffffffff, 0x00000100,
970 	0x9034, 0xffffffff, 0x00000100,
971 	0x9038, 0xffffffff, 0x00000100,
972 	0x903c, 0xffffffff, 0x00000100,
973 	0xad80, 0xffffffff, 0x00000100,
974 	0xac54, 0xffffffff, 0x00000100,
975 	0x897c, 0xffffffff, 0x06000100,
976 	0x9868, 0xffffffff, 0x00000100,
977 	0x9510, 0xffffffff, 0x00000100,
978 	0xaf04, 0xffffffff, 0x00000100,
979 	0xae04, 0xffffffff, 0x00000100,
980 	0x949c, 0xffffffff, 0x00000100,
981 	0x802c, 0xffffffff, 0xe0000000,
982 	0x9160, 0xffffffff, 0x00010000,
983 	0x9164, 0xffffffff, 0x00030002,
984 	0x9168, 0xffffffff, 0x00040007,
985 	0x916c, 0xffffffff, 0x00060005,
986 	0x9170, 0xffffffff, 0x00090008,
987 	0x9174, 0xffffffff, 0x00020001,
988 	0x9178, 0xffffffff, 0x00040003,
989 	0x917c, 0xffffffff, 0x00000007,
990 	0x9180, 0xffffffff, 0x00060005,
991 	0x9184, 0xffffffff, 0x00090008,
992 	0x9188, 0xffffffff, 0x00030002,
993 	0x918c, 0xffffffff, 0x00050004,
994 	0x9190, 0xffffffff, 0x00000008,
995 	0x9194, 0xffffffff, 0x00070006,
996 	0x9198, 0xffffffff, 0x000a0009,
997 	0x919c, 0xffffffff, 0x00040003,
998 	0x91a0, 0xffffffff, 0x00060005,
999 	0x91a4, 0xffffffff, 0x00000009,
1000 	0x91a8, 0xffffffff, 0x00080007,
1001 	0x91ac, 0xffffffff, 0x000b000a,
1002 	0x91b0, 0xffffffff, 0x00050004,
1003 	0x91b4, 0xffffffff, 0x00070006,
1004 	0x91b8, 0xffffffff, 0x0008000b,
1005 	0x91bc, 0xffffffff, 0x000a0009,
1006 	0x91c0, 0xffffffff, 0x000d000c,
1007 	0x91c4, 0xffffffff, 0x00060005,
1008 	0x91c8, 0xffffffff, 0x00080007,
1009 	0x91cc, 0xffffffff, 0x0000000b,
1010 	0x91d0, 0xffffffff, 0x000a0009,
1011 	0x91d4, 0xffffffff, 0x000d000c,
1012 	0x9150, 0xffffffff, 0x96940200,
1013 	0x8708, 0xffffffff, 0x00900100,
1014 	0xc478, 0xffffffff, 0x00000080,
1015 	0xc404, 0xffffffff, 0x0020003f,
1016 	0x30, 0xffffffff, 0x0000001c,
1017 	0x34, 0x000f0000, 0x000f0000,
1018 	0x160c, 0xffffffff, 0x00000100,
1019 	0x1024, 0xffffffff, 0x00000100,
1020 	0x20a8, 0xffffffff, 0x00000104,
1021 	0x264c, 0x000c0000, 0x000c0000,
1022 	0x2648, 0x000c0000, 0x000c0000,
1023 	0x2f50, 0x00000001, 0x00000001,
1024 	0x30cc, 0xc0000fff, 0x00000104,
1025 	0xc1e4, 0x00000001, 0x00000001,
1026 	0xd0c0, 0xfffffff0, 0x00000100,
1027 	0xd8c0, 0xfffffff0, 0x00000100
1028 };
1029 
1030 static u32 verde_pg_init[] =
1031 {
1032 	0x353c, 0xffffffff, 0x40000,
1033 	0x3538, 0xffffffff, 0x200010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x7007,
1040 	0x3538, 0xffffffff, 0x300010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x400000,
1047 	0x3538, 0xffffffff, 0x100010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x120200,
1054 	0x3538, 0xffffffff, 0x500010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x1e1e16,
1061 	0x3538, 0xffffffff, 0x600010ff,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x171f1e,
1068 	0x3538, 0xffffffff, 0x700010ff,
1069 	0x353c, 0xffffffff, 0x0,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x3538, 0xffffffff, 0x9ff,
1076 	0x3500, 0xffffffff, 0x0,
1077 	0x3504, 0xffffffff, 0x10000800,
1078 	0x3504, 0xffffffff, 0xf,
1079 	0x3504, 0xffffffff, 0xf,
1080 	0x3500, 0xffffffff, 0x4,
1081 	0x3504, 0xffffffff, 0x1000051e,
1082 	0x3504, 0xffffffff, 0xffff,
1083 	0x3504, 0xffffffff, 0xffff,
1084 	0x3500, 0xffffffff, 0x8,
1085 	0x3504, 0xffffffff, 0x80500,
1086 	0x3500, 0xffffffff, 0x12,
1087 	0x3504, 0xffffffff, 0x9050c,
1088 	0x3500, 0xffffffff, 0x1d,
1089 	0x3504, 0xffffffff, 0xb052c,
1090 	0x3500, 0xffffffff, 0x2a,
1091 	0x3504, 0xffffffff, 0x1053e,
1092 	0x3500, 0xffffffff, 0x2d,
1093 	0x3504, 0xffffffff, 0x10546,
1094 	0x3500, 0xffffffff, 0x30,
1095 	0x3504, 0xffffffff, 0xa054e,
1096 	0x3500, 0xffffffff, 0x3c,
1097 	0x3504, 0xffffffff, 0x1055f,
1098 	0x3500, 0xffffffff, 0x3f,
1099 	0x3504, 0xffffffff, 0x10567,
1100 	0x3500, 0xffffffff, 0x42,
1101 	0x3504, 0xffffffff, 0x1056f,
1102 	0x3500, 0xffffffff, 0x45,
1103 	0x3504, 0xffffffff, 0x10572,
1104 	0x3500, 0xffffffff, 0x48,
1105 	0x3504, 0xffffffff, 0x20575,
1106 	0x3500, 0xffffffff, 0x4c,
1107 	0x3504, 0xffffffff, 0x190801,
1108 	0x3500, 0xffffffff, 0x67,
1109 	0x3504, 0xffffffff, 0x1082a,
1110 	0x3500, 0xffffffff, 0x6a,
1111 	0x3504, 0xffffffff, 0x1b082d,
1112 	0x3500, 0xffffffff, 0x87,
1113 	0x3504, 0xffffffff, 0x310851,
1114 	0x3500, 0xffffffff, 0xba,
1115 	0x3504, 0xffffffff, 0x891,
1116 	0x3500, 0xffffffff, 0xbc,
1117 	0x3504, 0xffffffff, 0x893,
1118 	0x3500, 0xffffffff, 0xbe,
1119 	0x3504, 0xffffffff, 0x20895,
1120 	0x3500, 0xffffffff, 0xc2,
1121 	0x3504, 0xffffffff, 0x20899,
1122 	0x3500, 0xffffffff, 0xc6,
1123 	0x3504, 0xffffffff, 0x2089d,
1124 	0x3500, 0xffffffff, 0xca,
1125 	0x3504, 0xffffffff, 0x8a1,
1126 	0x3500, 0xffffffff, 0xcc,
1127 	0x3504, 0xffffffff, 0x8a3,
1128 	0x3500, 0xffffffff, 0xce,
1129 	0x3504, 0xffffffff, 0x308a5,
1130 	0x3500, 0xffffffff, 0xd3,
1131 	0x3504, 0xffffffff, 0x6d08cd,
1132 	0x3500, 0xffffffff, 0x142,
1133 	0x3504, 0xffffffff, 0x2000095a,
1134 	0x3504, 0xffffffff, 0x1,
1135 	0x3500, 0xffffffff, 0x144,
1136 	0x3504, 0xffffffff, 0x301f095b,
1137 	0x3500, 0xffffffff, 0x165,
1138 	0x3504, 0xffffffff, 0xc094d,
1139 	0x3500, 0xffffffff, 0x173,
1140 	0x3504, 0xffffffff, 0xf096d,
1141 	0x3500, 0xffffffff, 0x184,
1142 	0x3504, 0xffffffff, 0x15097f,
1143 	0x3500, 0xffffffff, 0x19b,
1144 	0x3504, 0xffffffff, 0xc0998,
1145 	0x3500, 0xffffffff, 0x1a9,
1146 	0x3504, 0xffffffff, 0x409a7,
1147 	0x3500, 0xffffffff, 0x1af,
1148 	0x3504, 0xffffffff, 0xcdc,
1149 	0x3500, 0xffffffff, 0x1b1,
1150 	0x3504, 0xffffffff, 0x800,
1151 	0x3508, 0xffffffff, 0x6c9b2000,
1152 	0x3510, 0xfc00, 0x2000,
1153 	0x3544, 0xffffffff, 0xfc0,
1154 	0x28d4, 0x00000100, 0x100
1155 };
1156 
1157 static void si_init_golden_registers(struct radeon_device *rdev)
1158 {
1159 	switch (rdev->family) {
1160 	case CHIP_TAHITI:
1161 		radeon_program_register_sequence(rdev,
1162 						 tahiti_golden_registers,
1163 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1164 		radeon_program_register_sequence(rdev,
1165 						 tahiti_golden_rlc_registers,
1166 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1167 		radeon_program_register_sequence(rdev,
1168 						 tahiti_mgcg_cgcg_init,
1169 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1170 		radeon_program_register_sequence(rdev,
1171 						 tahiti_golden_registers2,
1172 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1173 		break;
1174 	case CHIP_PITCAIRN:
1175 		radeon_program_register_sequence(rdev,
1176 						 pitcairn_golden_registers,
1177 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1178 		radeon_program_register_sequence(rdev,
1179 						 pitcairn_golden_rlc_registers,
1180 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1181 		radeon_program_register_sequence(rdev,
1182 						 pitcairn_mgcg_cgcg_init,
1183 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1184 		break;
1185 	case CHIP_VERDE:
1186 		radeon_program_register_sequence(rdev,
1187 						 verde_golden_registers,
1188 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1189 		radeon_program_register_sequence(rdev,
1190 						 verde_golden_rlc_registers,
1191 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1192 		radeon_program_register_sequence(rdev,
1193 						 verde_mgcg_cgcg_init,
1194 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1195 		radeon_program_register_sequence(rdev,
1196 						 verde_pg_init,
1197 						 (const u32)ARRAY_SIZE(verde_pg_init));
1198 		break;
1199 	case CHIP_OLAND:
1200 		radeon_program_register_sequence(rdev,
1201 						 oland_golden_registers,
1202 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1203 		radeon_program_register_sequence(rdev,
1204 						 oland_golden_rlc_registers,
1205 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1206 		radeon_program_register_sequence(rdev,
1207 						 oland_mgcg_cgcg_init,
1208 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1209 		break;
1210 	case CHIP_HAINAN:
1211 		radeon_program_register_sequence(rdev,
1212 						 hainan_golden_registers,
1213 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1214 		radeon_program_register_sequence(rdev,
1215 						 hainan_golden_registers2,
1216 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1217 		radeon_program_register_sequence(rdev,
1218 						 hainan_mgcg_cgcg_init,
1219 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1220 		break;
1221 	default:
1222 		break;
1223 	}
1224 }
1225 
1226 #define PCIE_BUS_CLK                10000
1227 #define TCLK                        (PCIE_BUS_CLK / 10)
1228 
1229 /**
1230  * si_get_xclk - get the xclk
1231  *
1232  * @rdev: radeon_device pointer
1233  *
1234  * Returns the reference clock used by the gfx engine
1235  * (SI).
1236  */
1237 u32 si_get_xclk(struct radeon_device *rdev)
1238 {
1239         u32 reference_clock = rdev->clock.spll.reference_freq;
1240 	u32 tmp;
1241 
1242 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1243 	if (tmp & MUX_TCLK_TO_XCLK)
1244 		return TCLK;
1245 
1246 	tmp = RREG32(CG_CLKPIN_CNTL);
1247 	if (tmp & XTALIN_DIVIDE)
1248 		return reference_clock / 4;
1249 
1250 	return reference_clock;
1251 }
1252 
1253 /* get temperature in millidegrees */
1254 int si_get_temp(struct radeon_device *rdev)
1255 {
1256 	u32 temp;
1257 	int actual_temp = 0;
1258 
1259 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1260 		CTF_TEMP_SHIFT;
1261 
1262 	if (temp & 0x200)
1263 		actual_temp = 255;
1264 	else
1265 		actual_temp = temp & 0x1ff;
1266 
1267 	actual_temp = (actual_temp * 1000);
1268 
1269 	return actual_temp;
1270 }
1271 
1272 #define TAHITI_IO_MC_REGS_SIZE 36
1273 
1274 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1275 	{0x0000006f, 0x03044000},
1276 	{0x00000070, 0x0480c018},
1277 	{0x00000071, 0x00000040},
1278 	{0x00000072, 0x01000000},
1279 	{0x00000074, 0x000000ff},
1280 	{0x00000075, 0x00143400},
1281 	{0x00000076, 0x08ec0800},
1282 	{0x00000077, 0x040000cc},
1283 	{0x00000079, 0x00000000},
1284 	{0x0000007a, 0x21000409},
1285 	{0x0000007c, 0x00000000},
1286 	{0x0000007d, 0xe8000000},
1287 	{0x0000007e, 0x044408a8},
1288 	{0x0000007f, 0x00000003},
1289 	{0x00000080, 0x00000000},
1290 	{0x00000081, 0x01000000},
1291 	{0x00000082, 0x02000000},
1292 	{0x00000083, 0x00000000},
1293 	{0x00000084, 0xe3f3e4f4},
1294 	{0x00000085, 0x00052024},
1295 	{0x00000087, 0x00000000},
1296 	{0x00000088, 0x66036603},
1297 	{0x00000089, 0x01000000},
1298 	{0x0000008b, 0x1c0a0000},
1299 	{0x0000008c, 0xff010000},
1300 	{0x0000008e, 0xffffefff},
1301 	{0x0000008f, 0xfff3efff},
1302 	{0x00000090, 0xfff3efbf},
1303 	{0x00000094, 0x00101101},
1304 	{0x00000095, 0x00000fff},
1305 	{0x00000096, 0x00116fff},
1306 	{0x00000097, 0x60010000},
1307 	{0x00000098, 0x10010000},
1308 	{0x00000099, 0x00006000},
1309 	{0x0000009a, 0x00001000},
1310 	{0x0000009f, 0x00a77400}
1311 };
1312 
1313 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1314 	{0x0000006f, 0x03044000},
1315 	{0x00000070, 0x0480c018},
1316 	{0x00000071, 0x00000040},
1317 	{0x00000072, 0x01000000},
1318 	{0x00000074, 0x000000ff},
1319 	{0x00000075, 0x00143400},
1320 	{0x00000076, 0x08ec0800},
1321 	{0x00000077, 0x040000cc},
1322 	{0x00000079, 0x00000000},
1323 	{0x0000007a, 0x21000409},
1324 	{0x0000007c, 0x00000000},
1325 	{0x0000007d, 0xe8000000},
1326 	{0x0000007e, 0x044408a8},
1327 	{0x0000007f, 0x00000003},
1328 	{0x00000080, 0x00000000},
1329 	{0x00000081, 0x01000000},
1330 	{0x00000082, 0x02000000},
1331 	{0x00000083, 0x00000000},
1332 	{0x00000084, 0xe3f3e4f4},
1333 	{0x00000085, 0x00052024},
1334 	{0x00000087, 0x00000000},
1335 	{0x00000088, 0x66036603},
1336 	{0x00000089, 0x01000000},
1337 	{0x0000008b, 0x1c0a0000},
1338 	{0x0000008c, 0xff010000},
1339 	{0x0000008e, 0xffffefff},
1340 	{0x0000008f, 0xfff3efff},
1341 	{0x00000090, 0xfff3efbf},
1342 	{0x00000094, 0x00101101},
1343 	{0x00000095, 0x00000fff},
1344 	{0x00000096, 0x00116fff},
1345 	{0x00000097, 0x60010000},
1346 	{0x00000098, 0x10010000},
1347 	{0x00000099, 0x00006000},
1348 	{0x0000009a, 0x00001000},
1349 	{0x0000009f, 0x00a47400}
1350 };
1351 
1352 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353 	{0x0000006f, 0x03044000},
1354 	{0x00000070, 0x0480c018},
1355 	{0x00000071, 0x00000040},
1356 	{0x00000072, 0x01000000},
1357 	{0x00000074, 0x000000ff},
1358 	{0x00000075, 0x00143400},
1359 	{0x00000076, 0x08ec0800},
1360 	{0x00000077, 0x040000cc},
1361 	{0x00000079, 0x00000000},
1362 	{0x0000007a, 0x21000409},
1363 	{0x0000007c, 0x00000000},
1364 	{0x0000007d, 0xe8000000},
1365 	{0x0000007e, 0x044408a8},
1366 	{0x0000007f, 0x00000003},
1367 	{0x00000080, 0x00000000},
1368 	{0x00000081, 0x01000000},
1369 	{0x00000082, 0x02000000},
1370 	{0x00000083, 0x00000000},
1371 	{0x00000084, 0xe3f3e4f4},
1372 	{0x00000085, 0x00052024},
1373 	{0x00000087, 0x00000000},
1374 	{0x00000088, 0x66036603},
1375 	{0x00000089, 0x01000000},
1376 	{0x0000008b, 0x1c0a0000},
1377 	{0x0000008c, 0xff010000},
1378 	{0x0000008e, 0xffffefff},
1379 	{0x0000008f, 0xfff3efff},
1380 	{0x00000090, 0xfff3efbf},
1381 	{0x00000094, 0x00101101},
1382 	{0x00000095, 0x00000fff},
1383 	{0x00000096, 0x00116fff},
1384 	{0x00000097, 0x60010000},
1385 	{0x00000098, 0x10010000},
1386 	{0x00000099, 0x00006000},
1387 	{0x0000009a, 0x00001000},
1388 	{0x0000009f, 0x00a37400}
1389 };
1390 
1391 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392 	{0x0000006f, 0x03044000},
1393 	{0x00000070, 0x0480c018},
1394 	{0x00000071, 0x00000040},
1395 	{0x00000072, 0x01000000},
1396 	{0x00000074, 0x000000ff},
1397 	{0x00000075, 0x00143400},
1398 	{0x00000076, 0x08ec0800},
1399 	{0x00000077, 0x040000cc},
1400 	{0x00000079, 0x00000000},
1401 	{0x0000007a, 0x21000409},
1402 	{0x0000007c, 0x00000000},
1403 	{0x0000007d, 0xe8000000},
1404 	{0x0000007e, 0x044408a8},
1405 	{0x0000007f, 0x00000003},
1406 	{0x00000080, 0x00000000},
1407 	{0x00000081, 0x01000000},
1408 	{0x00000082, 0x02000000},
1409 	{0x00000083, 0x00000000},
1410 	{0x00000084, 0xe3f3e4f4},
1411 	{0x00000085, 0x00052024},
1412 	{0x00000087, 0x00000000},
1413 	{0x00000088, 0x66036603},
1414 	{0x00000089, 0x01000000},
1415 	{0x0000008b, 0x1c0a0000},
1416 	{0x0000008c, 0xff010000},
1417 	{0x0000008e, 0xffffefff},
1418 	{0x0000008f, 0xfff3efff},
1419 	{0x00000090, 0xfff3efbf},
1420 	{0x00000094, 0x00101101},
1421 	{0x00000095, 0x00000fff},
1422 	{0x00000096, 0x00116fff},
1423 	{0x00000097, 0x60010000},
1424 	{0x00000098, 0x10010000},
1425 	{0x00000099, 0x00006000},
1426 	{0x0000009a, 0x00001000},
1427 	{0x0000009f, 0x00a17730}
1428 };
1429 
1430 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431 	{0x0000006f, 0x03044000},
1432 	{0x00000070, 0x0480c018},
1433 	{0x00000071, 0x00000040},
1434 	{0x00000072, 0x01000000},
1435 	{0x00000074, 0x000000ff},
1436 	{0x00000075, 0x00143400},
1437 	{0x00000076, 0x08ec0800},
1438 	{0x00000077, 0x040000cc},
1439 	{0x00000079, 0x00000000},
1440 	{0x0000007a, 0x21000409},
1441 	{0x0000007c, 0x00000000},
1442 	{0x0000007d, 0xe8000000},
1443 	{0x0000007e, 0x044408a8},
1444 	{0x0000007f, 0x00000003},
1445 	{0x00000080, 0x00000000},
1446 	{0x00000081, 0x01000000},
1447 	{0x00000082, 0x02000000},
1448 	{0x00000083, 0x00000000},
1449 	{0x00000084, 0xe3f3e4f4},
1450 	{0x00000085, 0x00052024},
1451 	{0x00000087, 0x00000000},
1452 	{0x00000088, 0x66036603},
1453 	{0x00000089, 0x01000000},
1454 	{0x0000008b, 0x1c0a0000},
1455 	{0x0000008c, 0xff010000},
1456 	{0x0000008e, 0xffffefff},
1457 	{0x0000008f, 0xfff3efff},
1458 	{0x00000090, 0xfff3efbf},
1459 	{0x00000094, 0x00101101},
1460 	{0x00000095, 0x00000fff},
1461 	{0x00000096, 0x00116fff},
1462 	{0x00000097, 0x60010000},
1463 	{0x00000098, 0x10010000},
1464 	{0x00000099, 0x00006000},
1465 	{0x0000009a, 0x00001000},
1466 	{0x0000009f, 0x00a07730}
1467 };
1468 
1469 /* ucode loading */
1470 int si_mc_load_microcode(struct radeon_device *rdev)
1471 {
1472 	const __be32 *fw_data;
1473 	u32 running, blackout = 0;
1474 	u32 *io_mc_regs;
1475 	int i, regs_size, ucode_size;
1476 
1477 	if (!rdev->mc_fw)
1478 		return -EINVAL;
1479 
1480 	ucode_size = rdev->mc_fw->size / 4;
1481 
1482 	switch (rdev->family) {
1483 	case CHIP_TAHITI:
1484 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1485 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1486 		break;
1487 	case CHIP_PITCAIRN:
1488 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1489 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1490 		break;
1491 	case CHIP_VERDE:
1492 	default:
1493 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1494 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1495 		break;
1496 	case CHIP_OLAND:
1497 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1498 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1499 		break;
1500 	case CHIP_HAINAN:
1501 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1502 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1503 		break;
1504 	}
1505 
1506 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507 
1508 	if (running == 0) {
1509 		if (running) {
1510 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512 		}
1513 
1514 		/* reset the engine and set to writable */
1515 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517 
1518 		/* load mc io regs */
1519 		for (i = 0; i < regs_size; i++) {
1520 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522 		}
1523 		/* load the MC ucode */
1524 		fw_data = (const __be32 *)rdev->mc_fw->data;
1525 		for (i = 0; i < ucode_size; i++)
1526 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527 
1528 		/* put the engine back into the active state */
1529 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532 
1533 		/* wait for training to complete */
1534 		for (i = 0; i < rdev->usec_timeout; i++) {
1535 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536 				break;
1537 			udelay(1);
1538 		}
1539 		for (i = 0; i < rdev->usec_timeout; i++) {
1540 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541 				break;
1542 			udelay(1);
1543 		}
1544 
1545 		if (running)
1546 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547 	}
1548 
1549 	return 0;
1550 }
1551 
1552 static int si_init_microcode(struct radeon_device *rdev)
1553 {
1554 	const char *chip_name;
1555 	const char *rlc_chip_name;
1556 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1557 	size_t smc_req_size, mc2_req_size;
1558 	char fw_name[30];
1559 	int err;
1560 
1561 	DRM_DEBUG("\n");
1562 
1563 	switch (rdev->family) {
1564 	case CHIP_TAHITI:
1565 		chip_name = "TAHITI";
1566 		rlc_chip_name = "TAHITI";
1567 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1568 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1569 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1570 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1571 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1572 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1573 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1574 		break;
1575 	case CHIP_PITCAIRN:
1576 		chip_name = "PITCAIRN";
1577 		rlc_chip_name = "PITCAIRN";
1578 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1580 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1581 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1583 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1584 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1585 		break;
1586 	case CHIP_VERDE:
1587 		chip_name = "VERDE";
1588 		rlc_chip_name = "VERDE";
1589 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1590 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1591 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1592 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1593 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1594 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1595 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1596 		break;
1597 	case CHIP_OLAND:
1598 		chip_name = "OLAND";
1599 		rlc_chip_name = "OLAND";
1600 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1601 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1602 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1603 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1604 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1605 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1606 		break;
1607 	case CHIP_HAINAN:
1608 		chip_name = "HAINAN";
1609 		rlc_chip_name = "HAINAN";
1610 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1611 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1612 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1613 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1614 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1615 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1616 		break;
1617 	default: BUG();
1618 	}
1619 
1620 	DRM_INFO("Loading %s Microcode\n", chip_name);
1621 
1622 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1623 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1624 	if (err)
1625 		goto out;
1626 	if (rdev->pfp_fw->size != pfp_req_size) {
1627 		printk(KERN_ERR
1628 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1629 		       rdev->pfp_fw->size, fw_name);
1630 		err = -EINVAL;
1631 		goto out;
1632 	}
1633 
1634 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1635 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1636 	if (err)
1637 		goto out;
1638 	if (rdev->me_fw->size != me_req_size) {
1639 		printk(KERN_ERR
1640 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1641 		       rdev->me_fw->size, fw_name);
1642 		err = -EINVAL;
1643 	}
1644 
1645 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1646 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1647 	if (err)
1648 		goto out;
1649 	if (rdev->ce_fw->size != ce_req_size) {
1650 		printk(KERN_ERR
1651 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1652 		       rdev->ce_fw->size, fw_name);
1653 		err = -EINVAL;
1654 	}
1655 
1656 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1657 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1658 	if (err)
1659 		goto out;
1660 	if (rdev->rlc_fw->size != rlc_req_size) {
1661 		printk(KERN_ERR
1662 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1663 		       rdev->rlc_fw->size, fw_name);
1664 		err = -EINVAL;
1665 	}
1666 
1667 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1668 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1669 	if (err) {
1670 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1671 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1672 		if (err)
1673 			goto out;
1674 	}
1675 	if ((rdev->mc_fw->size != mc_req_size) &&
1676 	    (rdev->mc_fw->size != mc2_req_size)) {
1677 		printk(KERN_ERR
1678 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1679 		       rdev->mc_fw->size, fw_name);
1680 		err = -EINVAL;
1681 	}
1682 	DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1683 
1684 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1686 	if (err) {
1687 		printk(KERN_ERR
1688 		       "smc: error loading firmware \"%s\"\n",
1689 		       fw_name);
1690 		release_firmware(rdev->smc_fw);
1691 		rdev->smc_fw = NULL;
1692 		err = 0;
1693 	} else if (rdev->smc_fw->size != smc_req_size) {
1694 		printk(KERN_ERR
1695 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1696 		       rdev->smc_fw->size, fw_name);
1697 		err = -EINVAL;
1698 	}
1699 
1700 out:
1701 	if (err) {
1702 		if (err != -EINVAL)
1703 			printk(KERN_ERR
1704 			       "si_cp: Failed to load firmware \"%s\"\n",
1705 			       fw_name);
1706 		release_firmware(rdev->pfp_fw);
1707 		rdev->pfp_fw = NULL;
1708 		release_firmware(rdev->me_fw);
1709 		rdev->me_fw = NULL;
1710 		release_firmware(rdev->ce_fw);
1711 		rdev->ce_fw = NULL;
1712 		release_firmware(rdev->rlc_fw);
1713 		rdev->rlc_fw = NULL;
1714 		release_firmware(rdev->mc_fw);
1715 		rdev->mc_fw = NULL;
1716 		release_firmware(rdev->smc_fw);
1717 		rdev->smc_fw = NULL;
1718 	}
1719 	return err;
1720 }
1721 
1722 /* watermark setup */
1723 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1724 				   struct radeon_crtc *radeon_crtc,
1725 				   struct drm_display_mode *mode,
1726 				   struct drm_display_mode *other_mode)
1727 {
1728 	u32 tmp, buffer_alloc, i;
1729 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1730 	/*
1731 	 * Line Buffer Setup
1732 	 * There are 3 line buffers, each one shared by 2 display controllers.
1733 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1734 	 * the display controllers.  The paritioning is done via one of four
1735 	 * preset allocations specified in bits 21:20:
1736 	 *  0 - half lb
1737 	 *  2 - whole lb, other crtc must be disabled
1738 	 */
1739 	/* this can get tricky if we have two large displays on a paired group
1740 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1741 	 * non-linked crtcs for maximum line buffer allocation.
1742 	 */
1743 	if (radeon_crtc->base.enabled && mode) {
1744 		if (other_mode) {
1745 			tmp = 0; /* 1/2 */
1746 			buffer_alloc = 1;
1747 		} else {
1748 			tmp = 2; /* whole */
1749 			buffer_alloc = 2;
1750 		}
1751 	} else {
1752 		tmp = 0;
1753 		buffer_alloc = 0;
1754 	}
1755 
1756 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1757 	       DC_LB_MEMORY_CONFIG(tmp));
1758 
1759 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1760 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1761 	for (i = 0; i < rdev->usec_timeout; i++) {
1762 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1763 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1764 			break;
1765 		udelay(1);
1766 	}
1767 
1768 	if (radeon_crtc->base.enabled && mode) {
1769 		switch (tmp) {
1770 		case 0:
1771 		default:
1772 			return 4096 * 2;
1773 		case 2:
1774 			return 8192 * 2;
1775 		}
1776 	}
1777 
1778 	/* controller not enabled, so no lb used */
1779 	return 0;
1780 }
1781 
1782 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1783 {
1784 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1785 
1786 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1787 	case 0:
1788 	default:
1789 		return 1;
1790 	case 1:
1791 		return 2;
1792 	case 2:
1793 		return 4;
1794 	case 3:
1795 		return 8;
1796 	case 4:
1797 		return 3;
1798 	case 5:
1799 		return 6;
1800 	case 6:
1801 		return 10;
1802 	case 7:
1803 		return 12;
1804 	case 8:
1805 		return 16;
1806 	}
1807 }
1808 
1809 struct dce6_wm_params {
1810 	u32 dram_channels; /* number of dram channels */
1811 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1812 	u32 sclk;          /* engine clock in kHz */
1813 	u32 disp_clk;      /* display clock in kHz */
1814 	u32 src_width;     /* viewport width */
1815 	u32 active_time;   /* active display time in ns */
1816 	u32 blank_time;    /* blank time in ns */
1817 	bool interlaced;    /* mode is interlaced */
1818 	fixed20_12 vsc;    /* vertical scale ratio */
1819 	u32 num_heads;     /* number of active crtcs */
1820 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1821 	u32 lb_size;       /* line buffer allocated to pipe */
1822 	u32 vtaps;         /* vertical scaler taps */
1823 };
1824 
1825 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1826 {
1827 	/* Calculate raw DRAM Bandwidth */
1828 	fixed20_12 dram_efficiency; /* 0.7 */
1829 	fixed20_12 yclk, dram_channels, bandwidth;
1830 	fixed20_12 a;
1831 
1832 	a.full = dfixed_const(1000);
1833 	yclk.full = dfixed_const(wm->yclk);
1834 	yclk.full = dfixed_div(yclk, a);
1835 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1836 	a.full = dfixed_const(10);
1837 	dram_efficiency.full = dfixed_const(7);
1838 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1839 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1840 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1841 
1842 	return dfixed_trunc(bandwidth);
1843 }
1844 
1845 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1846 {
1847 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1848 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1849 	fixed20_12 yclk, dram_channels, bandwidth;
1850 	fixed20_12 a;
1851 
1852 	a.full = dfixed_const(1000);
1853 	yclk.full = dfixed_const(wm->yclk);
1854 	yclk.full = dfixed_div(yclk, a);
1855 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1856 	a.full = dfixed_const(10);
1857 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1858 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1859 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1860 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1861 
1862 	return dfixed_trunc(bandwidth);
1863 }
1864 
1865 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1866 {
1867 	/* Calculate the display Data return Bandwidth */
1868 	fixed20_12 return_efficiency; /* 0.8 */
1869 	fixed20_12 sclk, bandwidth;
1870 	fixed20_12 a;
1871 
1872 	a.full = dfixed_const(1000);
1873 	sclk.full = dfixed_const(wm->sclk);
1874 	sclk.full = dfixed_div(sclk, a);
1875 	a.full = dfixed_const(10);
1876 	return_efficiency.full = dfixed_const(8);
1877 	return_efficiency.full = dfixed_div(return_efficiency, a);
1878 	a.full = dfixed_const(32);
1879 	bandwidth.full = dfixed_mul(a, sclk);
1880 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1881 
1882 	return dfixed_trunc(bandwidth);
1883 }
1884 
1885 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1886 {
1887 	return 32;
1888 }
1889 
1890 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1891 {
1892 	/* Calculate the DMIF Request Bandwidth */
1893 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1894 	fixed20_12 disp_clk, sclk, bandwidth;
1895 	fixed20_12 a, b1, b2;
1896 	u32 min_bandwidth;
1897 
1898 	a.full = dfixed_const(1000);
1899 	disp_clk.full = dfixed_const(wm->disp_clk);
1900 	disp_clk.full = dfixed_div(disp_clk, a);
1901 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1902 	b1.full = dfixed_mul(a, disp_clk);
1903 
1904 	a.full = dfixed_const(1000);
1905 	sclk.full = dfixed_const(wm->sclk);
1906 	sclk.full = dfixed_div(sclk, a);
1907 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1908 	b2.full = dfixed_mul(a, sclk);
1909 
1910 	a.full = dfixed_const(10);
1911 	disp_clk_request_efficiency.full = dfixed_const(8);
1912 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1913 
1914 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1915 
1916 	a.full = dfixed_const(min_bandwidth);
1917 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1918 
1919 	return dfixed_trunc(bandwidth);
1920 }
1921 
1922 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1923 {
1924 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1925 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1926 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1927 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1928 
1929 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1930 }
1931 
1932 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1933 {
1934 	/* Calculate the display mode Average Bandwidth
1935 	 * DisplayMode should contain the source and destination dimensions,
1936 	 * timing, etc.
1937 	 */
1938 	fixed20_12 bpp;
1939 	fixed20_12 line_time;
1940 	fixed20_12 src_width;
1941 	fixed20_12 bandwidth;
1942 	fixed20_12 a;
1943 
1944 	a.full = dfixed_const(1000);
1945 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1946 	line_time.full = dfixed_div(line_time, a);
1947 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1948 	src_width.full = dfixed_const(wm->src_width);
1949 	bandwidth.full = dfixed_mul(src_width, bpp);
1950 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1951 	bandwidth.full = dfixed_div(bandwidth, line_time);
1952 
1953 	return dfixed_trunc(bandwidth);
1954 }
1955 
1956 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1957 {
1958 	/* First calcualte the latency in ns */
1959 	u32 mc_latency = 2000; /* 2000 ns. */
1960 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1961 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1962 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1963 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1964 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1965 		(wm->num_heads * cursor_line_pair_return_time);
1966 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1967 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1968 	u32 tmp, dmif_size = 12288;
1969 	fixed20_12 a, b, c;
1970 
1971 	if (wm->num_heads == 0)
1972 		return 0;
1973 
1974 	a.full = dfixed_const(2);
1975 	b.full = dfixed_const(1);
1976 	if ((wm->vsc.full > a.full) ||
1977 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1978 	    (wm->vtaps >= 5) ||
1979 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1980 		max_src_lines_per_dst_line = 4;
1981 	else
1982 		max_src_lines_per_dst_line = 2;
1983 
1984 	a.full = dfixed_const(available_bandwidth);
1985 	b.full = dfixed_const(wm->num_heads);
1986 	a.full = dfixed_div(a, b);
1987 
1988 	b.full = dfixed_const(mc_latency + 512);
1989 	c.full = dfixed_const(wm->disp_clk);
1990 	b.full = dfixed_div(b, c);
1991 
1992 	c.full = dfixed_const(dmif_size);
1993 	b.full = dfixed_div(c, b);
1994 
1995 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1996 
1997 	b.full = dfixed_const(1000);
1998 	c.full = dfixed_const(wm->disp_clk);
1999 	b.full = dfixed_div(c, b);
2000 	c.full = dfixed_const(wm->bytes_per_pixel);
2001 	b.full = dfixed_mul(b, c);
2002 
2003 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2004 
2005 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2006 	b.full = dfixed_const(1000);
2007 	c.full = dfixed_const(lb_fill_bw);
2008 	b.full = dfixed_div(c, b);
2009 	a.full = dfixed_div(a, b);
2010 	line_fill_time = dfixed_trunc(a);
2011 
2012 	if (line_fill_time < wm->active_time)
2013 		return latency;
2014 	else
2015 		return latency + (line_fill_time - wm->active_time);
2016 
2017 }
2018 
2019 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2020 {
2021 	if (dce6_average_bandwidth(wm) <=
2022 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2023 		return true;
2024 	else
2025 		return false;
2026 };
2027 
2028 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2029 {
2030 	if (dce6_average_bandwidth(wm) <=
2031 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2032 		return true;
2033 	else
2034 		return false;
2035 };
2036 
2037 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2038 {
2039 	u32 lb_partitions = wm->lb_size / wm->src_width;
2040 	u32 line_time = wm->active_time + wm->blank_time;
2041 	u32 latency_tolerant_lines;
2042 	u32 latency_hiding;
2043 	fixed20_12 a;
2044 
2045 	a.full = dfixed_const(1);
2046 	if (wm->vsc.full > a.full)
2047 		latency_tolerant_lines = 1;
2048 	else {
2049 		if (lb_partitions <= (wm->vtaps + 1))
2050 			latency_tolerant_lines = 1;
2051 		else
2052 			latency_tolerant_lines = 2;
2053 	}
2054 
2055 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2056 
2057 	if (dce6_latency_watermark(wm) <= latency_hiding)
2058 		return true;
2059 	else
2060 		return false;
2061 }
2062 
2063 static void dce6_program_watermarks(struct radeon_device *rdev,
2064 					 struct radeon_crtc *radeon_crtc,
2065 					 u32 lb_size, u32 num_heads)
2066 {
2067 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2068 	struct dce6_wm_params wm_low, wm_high;
2069 	u32 dram_channels;
2070 	u32 pixel_period;
2071 	u32 line_time = 0;
2072 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2073 	u32 priority_a_mark = 0, priority_b_mark = 0;
2074 	u32 priority_a_cnt = PRIORITY_OFF;
2075 	u32 priority_b_cnt = PRIORITY_OFF;
2076 	u32 tmp, arb_control3;
2077 	fixed20_12 a, b, c;
2078 
2079 	if (radeon_crtc->base.enabled && num_heads && mode) {
2080 		pixel_period = 1000000 / (u32)mode->clock;
2081 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2082 		priority_a_cnt = 0;
2083 		priority_b_cnt = 0;
2084 
2085 		if (rdev->family == CHIP_ARUBA)
2086 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2087 		else
2088 			dram_channels = si_get_number_of_dram_channels(rdev);
2089 
2090 		/* watermark for high clocks */
2091 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2092 			wm_high.yclk =
2093 				radeon_dpm_get_mclk(rdev, false) * 10;
2094 			wm_high.sclk =
2095 				radeon_dpm_get_sclk(rdev, false) * 10;
2096 		} else {
2097 			wm_high.yclk = rdev->pm.current_mclk * 10;
2098 			wm_high.sclk = rdev->pm.current_sclk * 10;
2099 		}
2100 
2101 		wm_high.disp_clk = mode->clock;
2102 		wm_high.src_width = mode->crtc_hdisplay;
2103 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2104 		wm_high.blank_time = line_time - wm_high.active_time;
2105 		wm_high.interlaced = false;
2106 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2107 			wm_high.interlaced = true;
2108 		wm_high.vsc = radeon_crtc->vsc;
2109 		wm_high.vtaps = 1;
2110 		if (radeon_crtc->rmx_type != RMX_OFF)
2111 			wm_high.vtaps = 2;
2112 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2113 		wm_high.lb_size = lb_size;
2114 		wm_high.dram_channels = dram_channels;
2115 		wm_high.num_heads = num_heads;
2116 
2117 		/* watermark for low clocks */
2118 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2119 			wm_low.yclk =
2120 				radeon_dpm_get_mclk(rdev, true) * 10;
2121 			wm_low.sclk =
2122 				radeon_dpm_get_sclk(rdev, true) * 10;
2123 		} else {
2124 			wm_low.yclk = rdev->pm.current_mclk * 10;
2125 			wm_low.sclk = rdev->pm.current_sclk * 10;
2126 		}
2127 
2128 		wm_low.disp_clk = mode->clock;
2129 		wm_low.src_width = mode->crtc_hdisplay;
2130 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2131 		wm_low.blank_time = line_time - wm_low.active_time;
2132 		wm_low.interlaced = false;
2133 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2134 			wm_low.interlaced = true;
2135 		wm_low.vsc = radeon_crtc->vsc;
2136 		wm_low.vtaps = 1;
2137 		if (radeon_crtc->rmx_type != RMX_OFF)
2138 			wm_low.vtaps = 2;
2139 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2140 		wm_low.lb_size = lb_size;
2141 		wm_low.dram_channels = dram_channels;
2142 		wm_low.num_heads = num_heads;
2143 
2144 		/* set for high clocks */
2145 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2146 		/* set for low clocks */
2147 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2148 
2149 		/* possibly force display priority to high */
2150 		/* should really do this at mode validation time... */
2151 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2152 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2153 		    !dce6_check_latency_hiding(&wm_high) ||
2154 		    (rdev->disp_priority == 2)) {
2155 			DRM_DEBUG_KMS("force priority to high\n");
2156 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2157 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2158 		}
2159 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2160 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2161 		    !dce6_check_latency_hiding(&wm_low) ||
2162 		    (rdev->disp_priority == 2)) {
2163 			DRM_DEBUG_KMS("force priority to high\n");
2164 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2165 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2166 		}
2167 
2168 		a.full = dfixed_const(1000);
2169 		b.full = dfixed_const(mode->clock);
2170 		b.full = dfixed_div(b, a);
2171 		c.full = dfixed_const(latency_watermark_a);
2172 		c.full = dfixed_mul(c, b);
2173 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2174 		c.full = dfixed_div(c, a);
2175 		a.full = dfixed_const(16);
2176 		c.full = dfixed_div(c, a);
2177 		priority_a_mark = dfixed_trunc(c);
2178 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2179 
2180 		a.full = dfixed_const(1000);
2181 		b.full = dfixed_const(mode->clock);
2182 		b.full = dfixed_div(b, a);
2183 		c.full = dfixed_const(latency_watermark_b);
2184 		c.full = dfixed_mul(c, b);
2185 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2186 		c.full = dfixed_div(c, a);
2187 		a.full = dfixed_const(16);
2188 		c.full = dfixed_div(c, a);
2189 		priority_b_mark = dfixed_trunc(c);
2190 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2191 	}
2192 
2193 	/* select wm A */
2194 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2195 	tmp = arb_control3;
2196 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2197 	tmp |= LATENCY_WATERMARK_MASK(1);
2198 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2199 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2200 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2201 		LATENCY_HIGH_WATERMARK(line_time)));
2202 	/* select wm B */
2203 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2204 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2205 	tmp |= LATENCY_WATERMARK_MASK(2);
2206 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2207 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2208 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2209 		LATENCY_HIGH_WATERMARK(line_time)));
2210 	/* restore original selection */
2211 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2212 
2213 	/* write the priority marks */
2214 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2215 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2216 
2217 	/* save values for DPM */
2218 	radeon_crtc->line_time = line_time;
2219 	radeon_crtc->wm_high = latency_watermark_a;
2220 	radeon_crtc->wm_low = latency_watermark_b;
2221 }
2222 
2223 void dce6_bandwidth_update(struct radeon_device *rdev)
2224 {
2225 	struct drm_display_mode *mode0 = NULL;
2226 	struct drm_display_mode *mode1 = NULL;
2227 	u32 num_heads = 0, lb_size;
2228 	int i;
2229 
2230 	radeon_update_display_priority(rdev);
2231 
2232 	for (i = 0; i < rdev->num_crtc; i++) {
2233 		if (rdev->mode_info.crtcs[i]->base.enabled)
2234 			num_heads++;
2235 	}
2236 	for (i = 0; i < rdev->num_crtc; i += 2) {
2237 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2238 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2239 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2240 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2241 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2242 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2243 	}
2244 }
2245 
2246 /*
2247  * Core functions
2248  */
2249 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2250 {
2251 	const u32 num_tile_mode_states = 32;
2252 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2253 
2254 	switch (rdev->config.si.mem_row_size_in_kb) {
2255 	case 1:
2256 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2257 		break;
2258 	case 2:
2259 	default:
2260 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2261 		break;
2262 	case 4:
2263 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2264 		break;
2265 	}
2266 
2267 	if ((rdev->family == CHIP_TAHITI) ||
2268 	    (rdev->family == CHIP_PITCAIRN)) {
2269 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2270 			switch (reg_offset) {
2271 			case 0:  /* non-AA compressed depth or any compressed stencil */
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2277 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280 				break;
2281 			case 1:  /* 2xAA/4xAA compressed depth only */
2282 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2286 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2287 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290 				break;
2291 			case 2:  /* 8xAA compressed depth only */
2292 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2296 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2297 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2300 				break;
2301 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2306 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2307 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310 				break;
2311 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2317 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320 				break;
2321 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 TILE_SPLIT(split_equal_to_row_size) |
2326 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2327 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330 				break;
2331 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2334 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335 						 TILE_SPLIT(split_equal_to_row_size) |
2336 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2337 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2339 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2340 				break;
2341 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2342 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345 						 TILE_SPLIT(split_equal_to_row_size) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2347 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350 				break;
2351 			case 8:  /* 1D and 1D Array Surfaces */
2352 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2353 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2356 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2357 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2360 				break;
2361 			case 9:  /* Displayable maps. */
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2367 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370 				break;
2371 			case 10:  /* Display 8bpp. */
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2377 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2380 				break;
2381 			case 11:  /* Display 16bpp. */
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2387 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2390 				break;
2391 			case 12:  /* Display 32bpp. */
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2397 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400 				break;
2401 			case 13:  /* Thin. */
2402 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2407 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2409 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2410 				break;
2411 			case 14:  /* Thin 8 bpp. */
2412 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2417 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2420 				break;
2421 			case 15:  /* Thin 16 bpp. */
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2427 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2429 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2430 				break;
2431 			case 16:  /* Thin 32 bpp. */
2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2436 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2437 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2440 				break;
2441 			case 17:  /* Thin 64 bpp. */
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445 						 TILE_SPLIT(split_equal_to_row_size) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2447 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2450 				break;
2451 			case 21:  /* 8 bpp PRT. */
2452 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2457 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2460 				break;
2461 			case 22:  /* 16 bpp PRT */
2462 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2464 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2465 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2467 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2470 				break;
2471 			case 23:  /* 32 bpp PRT */
2472 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2473 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2474 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2476 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2477 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2480 				break;
2481 			case 24:  /* 64 bpp PRT */
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2485 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2486 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2487 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2490 				break;
2491 			case 25:  /* 128 bpp PRT */
2492 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2494 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2496 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2497 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2500 				break;
2501 			default:
2502 				gb_tile_moden = 0;
2503 				break;
2504 			}
2505 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2506 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2507 		}
2508 	} else if ((rdev->family == CHIP_VERDE) ||
2509 		   (rdev->family == CHIP_OLAND) ||
2510 		   (rdev->family == CHIP_HAINAN)) {
2511 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2512 			switch (reg_offset) {
2513 			case 0:  /* non-AA compressed depth or any compressed stencil */
2514 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2519 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2522 				break;
2523 			case 1:  /* 2xAA/4xAA compressed depth only */
2524 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2529 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2532 				break;
2533 			case 2:  /* 8xAA compressed depth only */
2534 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2538 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2539 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2542 				break;
2543 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2544 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2549 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552 				break;
2553 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2554 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2556 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2559 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562 				break;
2563 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2564 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 						 TILE_SPLIT(split_equal_to_row_size) |
2568 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2569 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572 				break;
2573 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577 						 TILE_SPLIT(split_equal_to_row_size) |
2578 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2579 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2582 				break;
2583 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587 						 TILE_SPLIT(split_equal_to_row_size) |
2588 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2589 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2591 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2592 				break;
2593 			case 8:  /* 1D and 1D Array Surfaces */
2594 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2595 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2598 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2599 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2601 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602 				break;
2603 			case 9:  /* Displayable maps. */
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2609 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612 				break;
2613 			case 10:  /* Display 8bpp. */
2614 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2619 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2622 				break;
2623 			case 11:  /* Display 16bpp. */
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2629 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632 				break;
2633 			case 12:  /* Display 32bpp. */
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2639 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642 				break;
2643 			case 13:  /* Thin. */
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2645 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2648 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2649 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652 				break;
2653 			case 14:  /* Thin 8 bpp. */
2654 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2657 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2659 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662 				break;
2663 			case 15:  /* Thin 16 bpp. */
2664 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2669 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2672 				break;
2673 			case 16:  /* Thin 32 bpp. */
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2678 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2679 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682 				break;
2683 			case 17:  /* Thin 64 bpp. */
2684 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 						 TILE_SPLIT(split_equal_to_row_size) |
2688 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2689 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692 				break;
2693 			case 21:  /* 8 bpp PRT. */
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2698 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2699 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2700 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2701 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2702 				break;
2703 			case 22:  /* 16 bpp PRT */
2704 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2708 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2709 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2712 				break;
2713 			case 23:  /* 32 bpp PRT */
2714 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2716 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2717 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2718 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2719 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2721 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2722 				break;
2723 			case 24:  /* 64 bpp PRT */
2724 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2726 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2727 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2728 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2729 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2732 				break;
2733 			case 25:  /* 128 bpp PRT */
2734 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2736 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2737 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2738 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2739 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2742 				break;
2743 			default:
2744 				gb_tile_moden = 0;
2745 				break;
2746 			}
2747 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2748 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2749 		}
2750 	} else
2751 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2752 }
2753 
2754 static void si_select_se_sh(struct radeon_device *rdev,
2755 			    u32 se_num, u32 sh_num)
2756 {
2757 	u32 data = INSTANCE_BROADCAST_WRITES;
2758 
2759 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2760 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2761 	else if (se_num == 0xffffffff)
2762 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2763 	else if (sh_num == 0xffffffff)
2764 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2765 	else
2766 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2767 	WREG32(GRBM_GFX_INDEX, data);
2768 }
2769 
2770 static u32 si_create_bitmask(u32 bit_width)
2771 {
2772 	u32 i, mask = 0;
2773 
2774 	for (i = 0; i < bit_width; i++) {
2775 		mask <<= 1;
2776 		mask |= 1;
2777 	}
2778 	return mask;
2779 }
2780 
2781 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2782 {
2783 	u32 data, mask;
2784 
2785 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2786 	if (data & 1)
2787 		data &= INACTIVE_CUS_MASK;
2788 	else
2789 		data = 0;
2790 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2791 
2792 	data >>= INACTIVE_CUS_SHIFT;
2793 
2794 	mask = si_create_bitmask(cu_per_sh);
2795 
2796 	return ~data & mask;
2797 }
2798 
2799 static void si_setup_spi(struct radeon_device *rdev,
2800 			 u32 se_num, u32 sh_per_se,
2801 			 u32 cu_per_sh)
2802 {
2803 	int i, j, k;
2804 	u32 data, mask, active_cu;
2805 
2806 	for (i = 0; i < se_num; i++) {
2807 		for (j = 0; j < sh_per_se; j++) {
2808 			si_select_se_sh(rdev, i, j);
2809 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2810 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2811 
2812 			mask = 1;
2813 			for (k = 0; k < 16; k++) {
2814 				mask <<= k;
2815 				if (active_cu & mask) {
2816 					data &= ~mask;
2817 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2818 					break;
2819 				}
2820 			}
2821 		}
2822 	}
2823 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2824 }
2825 
2826 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2827 			      u32 max_rb_num_per_se,
2828 			      u32 sh_per_se)
2829 {
2830 	u32 data, mask;
2831 
2832 	data = RREG32(CC_RB_BACKEND_DISABLE);
2833 	if (data & 1)
2834 		data &= BACKEND_DISABLE_MASK;
2835 	else
2836 		data = 0;
2837 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2838 
2839 	data >>= BACKEND_DISABLE_SHIFT;
2840 
2841 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2842 
2843 	return data & mask;
2844 }
2845 
2846 static void si_setup_rb(struct radeon_device *rdev,
2847 			u32 se_num, u32 sh_per_se,
2848 			u32 max_rb_num_per_se)
2849 {
2850 	int i, j;
2851 	u32 data, mask;
2852 	u32 disabled_rbs = 0;
2853 	u32 enabled_rbs = 0;
2854 
2855 	for (i = 0; i < se_num; i++) {
2856 		for (j = 0; j < sh_per_se; j++) {
2857 			si_select_se_sh(rdev, i, j);
2858 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2859 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2860 		}
2861 	}
2862 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2863 
2864 	mask = 1;
2865 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2866 		if (!(disabled_rbs & mask))
2867 			enabled_rbs |= mask;
2868 		mask <<= 1;
2869 	}
2870 
2871 	rdev->config.si.backend_enable_mask = enabled_rbs;
2872 
2873 	for (i = 0; i < se_num; i++) {
2874 		si_select_se_sh(rdev, i, 0xffffffff);
2875 		data = 0;
2876 		for (j = 0; j < sh_per_se; j++) {
2877 			switch (enabled_rbs & 3) {
2878 			case 1:
2879 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2880 				break;
2881 			case 2:
2882 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2883 				break;
2884 			case 3:
2885 			default:
2886 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2887 				break;
2888 			}
2889 			enabled_rbs >>= 2;
2890 		}
2891 		WREG32(PA_SC_RASTER_CONFIG, data);
2892 	}
2893 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2894 }
2895 
2896 static void si_gpu_init(struct radeon_device *rdev)
2897 {
2898 	u32 gb_addr_config = 0;
2899 	u32 mc_shared_chmap, mc_arb_ramcfg;
2900 	u32 sx_debug_1;
2901 	u32 hdp_host_path_cntl;
2902 	u32 tmp;
2903 	int i, j;
2904 
2905 	switch (rdev->family) {
2906 	case CHIP_TAHITI:
2907 		rdev->config.si.max_shader_engines = 2;
2908 		rdev->config.si.max_tile_pipes = 12;
2909 		rdev->config.si.max_cu_per_sh = 8;
2910 		rdev->config.si.max_sh_per_se = 2;
2911 		rdev->config.si.max_backends_per_se = 4;
2912 		rdev->config.si.max_texture_channel_caches = 12;
2913 		rdev->config.si.max_gprs = 256;
2914 		rdev->config.si.max_gs_threads = 32;
2915 		rdev->config.si.max_hw_contexts = 8;
2916 
2917 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2918 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2919 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2920 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2921 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2922 		break;
2923 	case CHIP_PITCAIRN:
2924 		rdev->config.si.max_shader_engines = 2;
2925 		rdev->config.si.max_tile_pipes = 8;
2926 		rdev->config.si.max_cu_per_sh = 5;
2927 		rdev->config.si.max_sh_per_se = 2;
2928 		rdev->config.si.max_backends_per_se = 4;
2929 		rdev->config.si.max_texture_channel_caches = 8;
2930 		rdev->config.si.max_gprs = 256;
2931 		rdev->config.si.max_gs_threads = 32;
2932 		rdev->config.si.max_hw_contexts = 8;
2933 
2934 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2935 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2936 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2937 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2938 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2939 		break;
2940 	case CHIP_VERDE:
2941 	default:
2942 		rdev->config.si.max_shader_engines = 1;
2943 		rdev->config.si.max_tile_pipes = 4;
2944 		rdev->config.si.max_cu_per_sh = 5;
2945 		rdev->config.si.max_sh_per_se = 2;
2946 		rdev->config.si.max_backends_per_se = 4;
2947 		rdev->config.si.max_texture_channel_caches = 4;
2948 		rdev->config.si.max_gprs = 256;
2949 		rdev->config.si.max_gs_threads = 32;
2950 		rdev->config.si.max_hw_contexts = 8;
2951 
2952 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2953 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2954 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2955 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2956 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2957 		break;
2958 	case CHIP_OLAND:
2959 		rdev->config.si.max_shader_engines = 1;
2960 		rdev->config.si.max_tile_pipes = 4;
2961 		rdev->config.si.max_cu_per_sh = 6;
2962 		rdev->config.si.max_sh_per_se = 1;
2963 		rdev->config.si.max_backends_per_se = 2;
2964 		rdev->config.si.max_texture_channel_caches = 4;
2965 		rdev->config.si.max_gprs = 256;
2966 		rdev->config.si.max_gs_threads = 16;
2967 		rdev->config.si.max_hw_contexts = 8;
2968 
2969 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2970 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2971 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2972 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2973 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2974 		break;
2975 	case CHIP_HAINAN:
2976 		rdev->config.si.max_shader_engines = 1;
2977 		rdev->config.si.max_tile_pipes = 4;
2978 		rdev->config.si.max_cu_per_sh = 5;
2979 		rdev->config.si.max_sh_per_se = 1;
2980 		rdev->config.si.max_backends_per_se = 1;
2981 		rdev->config.si.max_texture_channel_caches = 2;
2982 		rdev->config.si.max_gprs = 256;
2983 		rdev->config.si.max_gs_threads = 16;
2984 		rdev->config.si.max_hw_contexts = 8;
2985 
2986 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2987 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2988 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2989 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2990 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2991 		break;
2992 	}
2993 
2994 	/* Initialize HDP */
2995 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2996 		WREG32((0x2c14 + j), 0x00000000);
2997 		WREG32((0x2c18 + j), 0x00000000);
2998 		WREG32((0x2c1c + j), 0x00000000);
2999 		WREG32((0x2c20 + j), 0x00000000);
3000 		WREG32((0x2c24 + j), 0x00000000);
3001 	}
3002 
3003 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3004 
3005 	evergreen_fix_pci_max_read_req_size(rdev);
3006 
3007 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3008 
3009 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3010 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3011 
3012 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3013 	rdev->config.si.mem_max_burst_length_bytes = 256;
3014 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3015 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3016 	if (rdev->config.si.mem_row_size_in_kb > 4)
3017 		rdev->config.si.mem_row_size_in_kb = 4;
3018 	/* XXX use MC settings? */
3019 	rdev->config.si.shader_engine_tile_size = 32;
3020 	rdev->config.si.num_gpus = 1;
3021 	rdev->config.si.multi_gpu_tile_size = 64;
3022 
3023 	/* fix up row size */
3024 	gb_addr_config &= ~ROW_SIZE_MASK;
3025 	switch (rdev->config.si.mem_row_size_in_kb) {
3026 	case 1:
3027 	default:
3028 		gb_addr_config |= ROW_SIZE(0);
3029 		break;
3030 	case 2:
3031 		gb_addr_config |= ROW_SIZE(1);
3032 		break;
3033 	case 4:
3034 		gb_addr_config |= ROW_SIZE(2);
3035 		break;
3036 	}
3037 
3038 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3039 	 * not have bank info, so create a custom tiling dword.
3040 	 * bits 3:0   num_pipes
3041 	 * bits 7:4   num_banks
3042 	 * bits 11:8  group_size
3043 	 * bits 15:12 row_size
3044 	 */
3045 	rdev->config.si.tile_config = 0;
3046 	switch (rdev->config.si.num_tile_pipes) {
3047 	case 1:
3048 		rdev->config.si.tile_config |= (0 << 0);
3049 		break;
3050 	case 2:
3051 		rdev->config.si.tile_config |= (1 << 0);
3052 		break;
3053 	case 4:
3054 		rdev->config.si.tile_config |= (2 << 0);
3055 		break;
3056 	case 8:
3057 	default:
3058 		/* XXX what about 12? */
3059 		rdev->config.si.tile_config |= (3 << 0);
3060 		break;
3061 	}
3062 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3063 	case 0: /* four banks */
3064 		rdev->config.si.tile_config |= 0 << 4;
3065 		break;
3066 	case 1: /* eight banks */
3067 		rdev->config.si.tile_config |= 1 << 4;
3068 		break;
3069 	case 2: /* sixteen banks */
3070 	default:
3071 		rdev->config.si.tile_config |= 2 << 4;
3072 		break;
3073 	}
3074 	rdev->config.si.tile_config |=
3075 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3076 	rdev->config.si.tile_config |=
3077 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3078 
3079 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3080 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3081 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3082 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3083 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3084 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3085 	if (rdev->has_uvd) {
3086 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3087 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3088 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3089 	}
3090 
3091 	si_tiling_mode_table_init(rdev);
3092 
3093 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3094 		    rdev->config.si.max_sh_per_se,
3095 		    rdev->config.si.max_backends_per_se);
3096 
3097 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3098 		     rdev->config.si.max_sh_per_se,
3099 		     rdev->config.si.max_cu_per_sh);
3100 
3101 
3102 	/* set HW defaults for 3D engine */
3103 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3104 				     ROQ_IB2_START(0x2b)));
3105 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3106 
3107 	sx_debug_1 = RREG32(SX_DEBUG_1);
3108 	WREG32(SX_DEBUG_1, sx_debug_1);
3109 
3110 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3111 
3112 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3113 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3114 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3115 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3116 
3117 	WREG32(VGT_NUM_INSTANCES, 1);
3118 
3119 	WREG32(CP_PERFMON_CNTL, 0);
3120 
3121 	WREG32(SQ_CONFIG, 0);
3122 
3123 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3124 					  FORCE_EOV_MAX_REZ_CNT(255)));
3125 
3126 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3127 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3128 
3129 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3130 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3131 
3132 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3133 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3134 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3135 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3136 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3137 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3138 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3139 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3140 
3141 	tmp = RREG32(HDP_MISC_CNTL);
3142 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3143 	WREG32(HDP_MISC_CNTL, tmp);
3144 
3145 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3146 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3147 
3148 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3149 
3150 	udelay(50);
3151 }
3152 
3153 /*
3154  * GPU scratch registers helpers function.
3155  */
3156 static void si_scratch_init(struct radeon_device *rdev)
3157 {
3158 	int i;
3159 
3160 	rdev->scratch.num_reg = 7;
3161 	rdev->scratch.reg_base = SCRATCH_REG0;
3162 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3163 		rdev->scratch.free[i] = true;
3164 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3165 	}
3166 }
3167 
3168 void si_fence_ring_emit(struct radeon_device *rdev,
3169 			struct radeon_fence *fence)
3170 {
3171 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3172 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3173 
3174 	/* flush read cache over gart */
3175 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3176 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3177 	radeon_ring_write(ring, 0);
3178 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3179 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3180 			  PACKET3_TC_ACTION_ENA |
3181 			  PACKET3_SH_KCACHE_ACTION_ENA |
3182 			  PACKET3_SH_ICACHE_ACTION_ENA);
3183 	radeon_ring_write(ring, 0xFFFFFFFF);
3184 	radeon_ring_write(ring, 0);
3185 	radeon_ring_write(ring, 10); /* poll interval */
3186 	/* EVENT_WRITE_EOP - flush caches, send int */
3187 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3188 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3189 	radeon_ring_write(ring, addr & 0xffffffff);
3190 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3191 	radeon_ring_write(ring, fence->seq);
3192 	radeon_ring_write(ring, 0);
3193 }
3194 
3195 /*
3196  * IB stuff
3197  */
3198 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3199 {
3200 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3201 	u32 header;
3202 
3203 	if (ib->is_const_ib) {
3204 		/* set switch buffer packet before const IB */
3205 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3206 		radeon_ring_write(ring, 0);
3207 
3208 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3209 	} else {
3210 		u32 next_rptr;
3211 		if (ring->rptr_save_reg) {
3212 			next_rptr = ring->wptr + 3 + 4 + 8;
3213 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3214 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3215 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3216 			radeon_ring_write(ring, next_rptr);
3217 		} else if (rdev->wb.enabled) {
3218 			next_rptr = ring->wptr + 5 + 4 + 8;
3219 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3220 			radeon_ring_write(ring, (1 << 8));
3221 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3222 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3223 			radeon_ring_write(ring, next_rptr);
3224 		}
3225 
3226 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3227 	}
3228 
3229 	radeon_ring_write(ring, header);
3230 	radeon_ring_write(ring,
3231 #ifdef __BIG_ENDIAN
3232 			  (2 << 0) |
3233 #endif
3234 			  (ib->gpu_addr & 0xFFFFFFFC));
3235 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3236 	radeon_ring_write(ring, ib->length_dw |
3237 			  (ib->vm ? (ib->vm->id << 24) : 0));
3238 
3239 	if (!ib->is_const_ib) {
3240 		/* flush read cache over gart for this vmid */
3241 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3242 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3243 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3244 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3245 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3246 				  PACKET3_TC_ACTION_ENA |
3247 				  PACKET3_SH_KCACHE_ACTION_ENA |
3248 				  PACKET3_SH_ICACHE_ACTION_ENA);
3249 		radeon_ring_write(ring, 0xFFFFFFFF);
3250 		radeon_ring_write(ring, 0);
3251 		radeon_ring_write(ring, 10); /* poll interval */
3252 	}
3253 }
3254 
3255 /*
3256  * CP.
3257  */
3258 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3259 {
3260 	if (enable)
3261 		WREG32(CP_ME_CNTL, 0);
3262 	else {
3263 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3264 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3265 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3266 		WREG32(SCRATCH_UMSK, 0);
3267 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3268 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3269 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3270 	}
3271 	udelay(50);
3272 }
3273 
3274 static int si_cp_load_microcode(struct radeon_device *rdev)
3275 {
3276 	const __be32 *fw_data;
3277 	int i;
3278 
3279 	if (!rdev->me_fw || !rdev->pfp_fw)
3280 		return -EINVAL;
3281 
3282 	si_cp_enable(rdev, false);
3283 
3284 	/* PFP */
3285 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3286 	WREG32(CP_PFP_UCODE_ADDR, 0);
3287 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3288 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3289 	WREG32(CP_PFP_UCODE_ADDR, 0);
3290 
3291 	/* CE */
3292 	fw_data = (const __be32 *)rdev->ce_fw->data;
3293 	WREG32(CP_CE_UCODE_ADDR, 0);
3294 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3295 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3296 	WREG32(CP_CE_UCODE_ADDR, 0);
3297 
3298 	/* ME */
3299 	fw_data = (const __be32 *)rdev->me_fw->data;
3300 	WREG32(CP_ME_RAM_WADDR, 0);
3301 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3302 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3303 	WREG32(CP_ME_RAM_WADDR, 0);
3304 
3305 	WREG32(CP_PFP_UCODE_ADDR, 0);
3306 	WREG32(CP_CE_UCODE_ADDR, 0);
3307 	WREG32(CP_ME_RAM_WADDR, 0);
3308 	WREG32(CP_ME_RAM_RADDR, 0);
3309 	return 0;
3310 }
3311 
3312 static int si_cp_start(struct radeon_device *rdev)
3313 {
3314 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3315 	int r, i;
3316 
3317 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3318 	if (r) {
3319 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3320 		return r;
3321 	}
3322 	/* init the CP */
3323 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3324 	radeon_ring_write(ring, 0x1);
3325 	radeon_ring_write(ring, 0x0);
3326 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3327 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3328 	radeon_ring_write(ring, 0);
3329 	radeon_ring_write(ring, 0);
3330 
3331 	/* init the CE partitions */
3332 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3333 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3334 	radeon_ring_write(ring, 0xc000);
3335 	radeon_ring_write(ring, 0xe000);
3336 	radeon_ring_unlock_commit(rdev, ring);
3337 
3338 	si_cp_enable(rdev, true);
3339 
3340 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3341 	if (r) {
3342 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3343 		return r;
3344 	}
3345 
3346 	/* setup clear context state */
3347 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3349 
3350 	for (i = 0; i < si_default_size; i++)
3351 		radeon_ring_write(ring, si_default_state[i]);
3352 
3353 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355 
3356 	/* set clear context state */
3357 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358 	radeon_ring_write(ring, 0);
3359 
3360 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361 	radeon_ring_write(ring, 0x00000316);
3362 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364 
3365 	radeon_ring_unlock_commit(rdev, ring);
3366 
3367 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3368 		ring = &rdev->ring[i];
3369 		r = radeon_ring_lock(rdev, ring, 2);
3370 
3371 		/* clear the compute context state */
3372 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3373 		radeon_ring_write(ring, 0);
3374 
3375 		radeon_ring_unlock_commit(rdev, ring);
3376 	}
3377 
3378 	return 0;
3379 }
3380 
3381 static void si_cp_fini(struct radeon_device *rdev)
3382 {
3383 	struct radeon_ring *ring;
3384 	si_cp_enable(rdev, false);
3385 
3386 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3387 	radeon_ring_fini(rdev, ring);
3388 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3389 
3390 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3391 	radeon_ring_fini(rdev, ring);
3392 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3393 
3394 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3395 	radeon_ring_fini(rdev, ring);
3396 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3397 }
3398 
3399 static int si_cp_resume(struct radeon_device *rdev)
3400 {
3401 	struct radeon_ring *ring;
3402 	u32 tmp;
3403 	u32 rb_bufsz;
3404 	int r;
3405 
3406 	si_enable_gui_idle_interrupt(rdev, false);
3407 
3408 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3409 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3410 
3411 	/* Set the write pointer delay */
3412 	WREG32(CP_RB_WPTR_DELAY, 0);
3413 
3414 	WREG32(CP_DEBUG, 0);
3415 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3416 
3417 	/* ring 0 - compute and gfx */
3418 	/* Set ring buffer size */
3419 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3420 	rb_bufsz = order_base_2(ring->ring_size / 8);
3421 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3422 #ifdef __BIG_ENDIAN
3423 	tmp |= BUF_SWAP_32BIT;
3424 #endif
3425 	WREG32(CP_RB0_CNTL, tmp);
3426 
3427 	/* Initialize the ring buffer's read and write pointers */
3428 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3429 	ring->wptr = 0;
3430 	WREG32(CP_RB0_WPTR, ring->wptr);
3431 
3432 	/* set the wb address whether it's enabled or not */
3433 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3434 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3435 
3436 	if (rdev->wb.enabled)
3437 		WREG32(SCRATCH_UMSK, 0xff);
3438 	else {
3439 		tmp |= RB_NO_UPDATE;
3440 		WREG32(SCRATCH_UMSK, 0);
3441 	}
3442 
3443 	mdelay(1);
3444 	WREG32(CP_RB0_CNTL, tmp);
3445 
3446 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3447 
3448 	/* ring1  - compute only */
3449 	/* Set ring buffer size */
3450 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3451 	rb_bufsz = order_base_2(ring->ring_size / 8);
3452 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3453 #ifdef __BIG_ENDIAN
3454 	tmp |= BUF_SWAP_32BIT;
3455 #endif
3456 	WREG32(CP_RB1_CNTL, tmp);
3457 
3458 	/* Initialize the ring buffer's read and write pointers */
3459 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3460 	ring->wptr = 0;
3461 	WREG32(CP_RB1_WPTR, ring->wptr);
3462 
3463 	/* set the wb address whether it's enabled or not */
3464 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3465 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3466 
3467 	mdelay(1);
3468 	WREG32(CP_RB1_CNTL, tmp);
3469 
3470 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3471 
3472 	/* ring2 - compute only */
3473 	/* Set ring buffer size */
3474 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3475 	rb_bufsz = order_base_2(ring->ring_size / 8);
3476 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3477 #ifdef __BIG_ENDIAN
3478 	tmp |= BUF_SWAP_32BIT;
3479 #endif
3480 	WREG32(CP_RB2_CNTL, tmp);
3481 
3482 	/* Initialize the ring buffer's read and write pointers */
3483 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3484 	ring->wptr = 0;
3485 	WREG32(CP_RB2_WPTR, ring->wptr);
3486 
3487 	/* set the wb address whether it's enabled or not */
3488 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3489 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3490 
3491 	mdelay(1);
3492 	WREG32(CP_RB2_CNTL, tmp);
3493 
3494 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3495 
3496 	/* start the rings */
3497 	si_cp_start(rdev);
3498 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3499 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3500 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3501 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3502 	if (r) {
3503 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3504 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3505 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3506 		return r;
3507 	}
3508 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3509 	if (r) {
3510 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3511 	}
3512 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3513 	if (r) {
3514 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3515 	}
3516 
3517 	si_enable_gui_idle_interrupt(rdev, true);
3518 
3519 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3520 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3521 
3522 	return 0;
3523 }
3524 
3525 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3526 {
3527 	u32 reset_mask = 0;
3528 	u32 tmp;
3529 
3530 	/* GRBM_STATUS */
3531 	tmp = RREG32(GRBM_STATUS);
3532 	if (tmp & (PA_BUSY | SC_BUSY |
3533 		   BCI_BUSY | SX_BUSY |
3534 		   TA_BUSY | VGT_BUSY |
3535 		   DB_BUSY | CB_BUSY |
3536 		   GDS_BUSY | SPI_BUSY |
3537 		   IA_BUSY | IA_BUSY_NO_DMA))
3538 		reset_mask |= RADEON_RESET_GFX;
3539 
3540 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3541 		   CP_BUSY | CP_COHERENCY_BUSY))
3542 		reset_mask |= RADEON_RESET_CP;
3543 
3544 	if (tmp & GRBM_EE_BUSY)
3545 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3546 
3547 	/* GRBM_STATUS2 */
3548 	tmp = RREG32(GRBM_STATUS2);
3549 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3550 		reset_mask |= RADEON_RESET_RLC;
3551 
3552 	/* DMA_STATUS_REG 0 */
3553 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3554 	if (!(tmp & DMA_IDLE))
3555 		reset_mask |= RADEON_RESET_DMA;
3556 
3557 	/* DMA_STATUS_REG 1 */
3558 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3559 	if (!(tmp & DMA_IDLE))
3560 		reset_mask |= RADEON_RESET_DMA1;
3561 
3562 	/* SRBM_STATUS2 */
3563 	tmp = RREG32(SRBM_STATUS2);
3564 	if (tmp & DMA_BUSY)
3565 		reset_mask |= RADEON_RESET_DMA;
3566 
3567 	if (tmp & DMA1_BUSY)
3568 		reset_mask |= RADEON_RESET_DMA1;
3569 
3570 	/* SRBM_STATUS */
3571 	tmp = RREG32(SRBM_STATUS);
3572 
3573 	if (tmp & IH_BUSY)
3574 		reset_mask |= RADEON_RESET_IH;
3575 
3576 	if (tmp & SEM_BUSY)
3577 		reset_mask |= RADEON_RESET_SEM;
3578 
3579 	if (tmp & GRBM_RQ_PENDING)
3580 		reset_mask |= RADEON_RESET_GRBM;
3581 
3582 	if (tmp & VMC_BUSY)
3583 		reset_mask |= RADEON_RESET_VMC;
3584 
3585 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3586 		   MCC_BUSY | MCD_BUSY))
3587 		reset_mask |= RADEON_RESET_MC;
3588 
3589 	if (evergreen_is_display_hung(rdev))
3590 		reset_mask |= RADEON_RESET_DISPLAY;
3591 
3592 	/* VM_L2_STATUS */
3593 	tmp = RREG32(VM_L2_STATUS);
3594 	if (tmp & L2_BUSY)
3595 		reset_mask |= RADEON_RESET_VMC;
3596 
3597 	/* Skip MC reset as it's mostly likely not hung, just busy */
3598 	if (reset_mask & RADEON_RESET_MC) {
3599 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3600 		reset_mask &= ~RADEON_RESET_MC;
3601 	}
3602 
3603 	return reset_mask;
3604 }
3605 
3606 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3607 {
3608 	struct evergreen_mc_save save;
3609 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3610 	u32 tmp;
3611 
3612 	if (reset_mask == 0)
3613 		return;
3614 
3615 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3616 
3617 	evergreen_print_gpu_status_regs(rdev);
3618 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3619 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3620 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3621 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3622 
3623 	/* disable PG/CG */
3624 	si_fini_pg(rdev);
3625 	si_fini_cg(rdev);
3626 
3627 	/* stop the rlc */
3628 	si_rlc_stop(rdev);
3629 
3630 	/* Disable CP parsing/prefetching */
3631 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3632 
3633 	if (reset_mask & RADEON_RESET_DMA) {
3634 		/* dma0 */
3635 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3636 		tmp &= ~DMA_RB_ENABLE;
3637 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3638 	}
3639 	if (reset_mask & RADEON_RESET_DMA1) {
3640 		/* dma1 */
3641 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3642 		tmp &= ~DMA_RB_ENABLE;
3643 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3644 	}
3645 
3646 	udelay(50);
3647 
3648 	evergreen_mc_stop(rdev, &save);
3649 	if (evergreen_mc_wait_for_idle(rdev)) {
3650 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3651 	}
3652 
3653 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3654 		grbm_soft_reset = SOFT_RESET_CB |
3655 			SOFT_RESET_DB |
3656 			SOFT_RESET_GDS |
3657 			SOFT_RESET_PA |
3658 			SOFT_RESET_SC |
3659 			SOFT_RESET_BCI |
3660 			SOFT_RESET_SPI |
3661 			SOFT_RESET_SX |
3662 			SOFT_RESET_TC |
3663 			SOFT_RESET_TA |
3664 			SOFT_RESET_VGT |
3665 			SOFT_RESET_IA;
3666 	}
3667 
3668 	if (reset_mask & RADEON_RESET_CP) {
3669 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3670 
3671 		srbm_soft_reset |= SOFT_RESET_GRBM;
3672 	}
3673 
3674 	if (reset_mask & RADEON_RESET_DMA)
3675 		srbm_soft_reset |= SOFT_RESET_DMA;
3676 
3677 	if (reset_mask & RADEON_RESET_DMA1)
3678 		srbm_soft_reset |= SOFT_RESET_DMA1;
3679 
3680 	if (reset_mask & RADEON_RESET_DISPLAY)
3681 		srbm_soft_reset |= SOFT_RESET_DC;
3682 
3683 	if (reset_mask & RADEON_RESET_RLC)
3684 		grbm_soft_reset |= SOFT_RESET_RLC;
3685 
3686 	if (reset_mask & RADEON_RESET_SEM)
3687 		srbm_soft_reset |= SOFT_RESET_SEM;
3688 
3689 	if (reset_mask & RADEON_RESET_IH)
3690 		srbm_soft_reset |= SOFT_RESET_IH;
3691 
3692 	if (reset_mask & RADEON_RESET_GRBM)
3693 		srbm_soft_reset |= SOFT_RESET_GRBM;
3694 
3695 	if (reset_mask & RADEON_RESET_VMC)
3696 		srbm_soft_reset |= SOFT_RESET_VMC;
3697 
3698 	if (reset_mask & RADEON_RESET_MC)
3699 		srbm_soft_reset |= SOFT_RESET_MC;
3700 
3701 	if (grbm_soft_reset) {
3702 		tmp = RREG32(GRBM_SOFT_RESET);
3703 		tmp |= grbm_soft_reset;
3704 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3705 		WREG32(GRBM_SOFT_RESET, tmp);
3706 		tmp = RREG32(GRBM_SOFT_RESET);
3707 
3708 		udelay(50);
3709 
3710 		tmp &= ~grbm_soft_reset;
3711 		WREG32(GRBM_SOFT_RESET, tmp);
3712 		tmp = RREG32(GRBM_SOFT_RESET);
3713 	}
3714 
3715 	if (srbm_soft_reset) {
3716 		tmp = RREG32(SRBM_SOFT_RESET);
3717 		tmp |= srbm_soft_reset;
3718 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3719 		WREG32(SRBM_SOFT_RESET, tmp);
3720 		tmp = RREG32(SRBM_SOFT_RESET);
3721 
3722 		udelay(50);
3723 
3724 		tmp &= ~srbm_soft_reset;
3725 		WREG32(SRBM_SOFT_RESET, tmp);
3726 		tmp = RREG32(SRBM_SOFT_RESET);
3727 	}
3728 
3729 	/* Wait a little for things to settle down */
3730 	udelay(50);
3731 
3732 	evergreen_mc_resume(rdev, &save);
3733 	udelay(50);
3734 
3735 	evergreen_print_gpu_status_regs(rdev);
3736 }
3737 
3738 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3739 {
3740 	u32 tmp, i;
3741 
3742 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3743 	tmp |= SPLL_BYPASS_EN;
3744 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3745 
3746 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3747 	tmp |= SPLL_CTLREQ_CHG;
3748 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3749 
3750 	for (i = 0; i < rdev->usec_timeout; i++) {
3751 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3752 			break;
3753 		udelay(1);
3754 	}
3755 
3756 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3757 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3758 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3759 
3760 	tmp = RREG32(MPLL_CNTL_MODE);
3761 	tmp &= ~MPLL_MCLK_SEL;
3762 	WREG32(MPLL_CNTL_MODE, tmp);
3763 }
3764 
3765 static void si_spll_powerdown(struct radeon_device *rdev)
3766 {
3767 	u32 tmp;
3768 
3769 	tmp = RREG32(SPLL_CNTL_MODE);
3770 	tmp |= SPLL_SW_DIR_CONTROL;
3771 	WREG32(SPLL_CNTL_MODE, tmp);
3772 
3773 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3774 	tmp |= SPLL_RESET;
3775 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3776 
3777 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3778 	tmp |= SPLL_SLEEP;
3779 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3780 
3781 	tmp = RREG32(SPLL_CNTL_MODE);
3782 	tmp &= ~SPLL_SW_DIR_CONTROL;
3783 	WREG32(SPLL_CNTL_MODE, tmp);
3784 }
3785 
3786 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3787 {
3788 	struct evergreen_mc_save save;
3789 	u32 tmp, i;
3790 
3791 	dev_info(rdev->dev, "GPU pci config reset\n");
3792 
3793 	/* disable dpm? */
3794 
3795 	/* disable cg/pg */
3796 	si_fini_pg(rdev);
3797 	si_fini_cg(rdev);
3798 
3799 	/* Disable CP parsing/prefetching */
3800 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3801 	/* dma0 */
3802 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3803 	tmp &= ~DMA_RB_ENABLE;
3804 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3805 	/* dma1 */
3806 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3807 	tmp &= ~DMA_RB_ENABLE;
3808 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3809 	/* XXX other engines? */
3810 
3811 	/* halt the rlc, disable cp internal ints */
3812 	si_rlc_stop(rdev);
3813 
3814 	udelay(50);
3815 
3816 	/* disable mem access */
3817 	evergreen_mc_stop(rdev, &save);
3818 	if (evergreen_mc_wait_for_idle(rdev)) {
3819 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3820 	}
3821 
3822 	/* set mclk/sclk to bypass */
3823 	si_set_clk_bypass_mode(rdev);
3824 	/* powerdown spll */
3825 	si_spll_powerdown(rdev);
3826 	/* disable BM */
3827 	pci_clear_master(rdev->pdev);
3828 	/* reset */
3829 	radeon_pci_config_reset(rdev);
3830 	/* wait for asic to come out of reset */
3831 	for (i = 0; i < rdev->usec_timeout; i++) {
3832 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3833 			break;
3834 		udelay(1);
3835 	}
3836 }
3837 
3838 int si_asic_reset(struct radeon_device *rdev)
3839 {
3840 	u32 reset_mask;
3841 
3842 	reset_mask = si_gpu_check_soft_reset(rdev);
3843 
3844 	if (reset_mask)
3845 		r600_set_bios_scratch_engine_hung(rdev, true);
3846 
3847 	/* try soft reset */
3848 	si_gpu_soft_reset(rdev, reset_mask);
3849 
3850 	reset_mask = si_gpu_check_soft_reset(rdev);
3851 
3852 	/* try pci config reset */
3853 	if (reset_mask && radeon_hard_reset)
3854 		si_gpu_pci_config_reset(rdev);
3855 
3856 	reset_mask = si_gpu_check_soft_reset(rdev);
3857 
3858 	if (!reset_mask)
3859 		r600_set_bios_scratch_engine_hung(rdev, false);
3860 
3861 	return 0;
3862 }
3863 
3864 /**
3865  * si_gfx_is_lockup - Check if the GFX engine is locked up
3866  *
3867  * @rdev: radeon_device pointer
3868  * @ring: radeon_ring structure holding ring information
3869  *
3870  * Check if the GFX engine is locked up.
3871  * Returns true if the engine appears to be locked up, false if not.
3872  */
3873 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3874 {
3875 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3876 
3877 	if (!(reset_mask & (RADEON_RESET_GFX |
3878 			    RADEON_RESET_COMPUTE |
3879 			    RADEON_RESET_CP))) {
3880 		radeon_ring_lockup_update(rdev, ring);
3881 		return false;
3882 	}
3883 	return radeon_ring_test_lockup(rdev, ring);
3884 }
3885 
3886 /* MC */
3887 static void si_mc_program(struct radeon_device *rdev)
3888 {
3889 	struct evergreen_mc_save save;
3890 	u32 tmp;
3891 	int i, j;
3892 
3893 	/* Initialize HDP */
3894 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3895 		WREG32((0x2c14 + j), 0x00000000);
3896 		WREG32((0x2c18 + j), 0x00000000);
3897 		WREG32((0x2c1c + j), 0x00000000);
3898 		WREG32((0x2c20 + j), 0x00000000);
3899 		WREG32((0x2c24 + j), 0x00000000);
3900 	}
3901 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3902 
3903 	evergreen_mc_stop(rdev, &save);
3904 	if (radeon_mc_wait_for_idle(rdev)) {
3905 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3906 	}
3907 	if (!ASIC_IS_NODCE(rdev))
3908 		/* Lockout access through VGA aperture*/
3909 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3910 	/* Update configuration */
3911 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3912 	       rdev->mc.vram_start >> 12);
3913 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3914 	       rdev->mc.vram_end >> 12);
3915 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3916 	       rdev->vram_scratch.gpu_addr >> 12);
3917 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3918 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3919 	WREG32(MC_VM_FB_LOCATION, tmp);
3920 	/* XXX double check these! */
3921 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3922 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3923 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3924 	WREG32(MC_VM_AGP_BASE, 0);
3925 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3926 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3927 	if (radeon_mc_wait_for_idle(rdev)) {
3928 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3929 	}
3930 	evergreen_mc_resume(rdev, &save);
3931 	if (!ASIC_IS_NODCE(rdev)) {
3932 		/* we need to own VRAM, so turn off the VGA renderer here
3933 		 * to stop it overwriting our objects */
3934 		rv515_vga_render_disable(rdev);
3935 	}
3936 }
3937 
3938 void si_vram_gtt_location(struct radeon_device *rdev,
3939 			  struct radeon_mc *mc)
3940 {
3941 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3942 		/* leave room for at least 1024M GTT */
3943 		dev_warn(rdev->dev, "limiting VRAM\n");
3944 		mc->real_vram_size = 0xFFC0000000ULL;
3945 		mc->mc_vram_size = 0xFFC0000000ULL;
3946 	}
3947 	radeon_vram_location(rdev, &rdev->mc, 0);
3948 	rdev->mc.gtt_base_align = 0;
3949 	radeon_gtt_location(rdev, mc);
3950 }
3951 
3952 static int si_mc_init(struct radeon_device *rdev)
3953 {
3954 	u32 tmp;
3955 	int chansize, numchan;
3956 
3957 	/* Get VRAM informations */
3958 	rdev->mc.vram_is_ddr = true;
3959 	tmp = RREG32(MC_ARB_RAMCFG);
3960 	if (tmp & CHANSIZE_OVERRIDE) {
3961 		chansize = 16;
3962 	} else if (tmp & CHANSIZE_MASK) {
3963 		chansize = 64;
3964 	} else {
3965 		chansize = 32;
3966 	}
3967 	tmp = RREG32(MC_SHARED_CHMAP);
3968 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3969 	case 0:
3970 	default:
3971 		numchan = 1;
3972 		break;
3973 	case 1:
3974 		numchan = 2;
3975 		break;
3976 	case 2:
3977 		numchan = 4;
3978 		break;
3979 	case 3:
3980 		numchan = 8;
3981 		break;
3982 	case 4:
3983 		numchan = 3;
3984 		break;
3985 	case 5:
3986 		numchan = 6;
3987 		break;
3988 	case 6:
3989 		numchan = 10;
3990 		break;
3991 	case 7:
3992 		numchan = 12;
3993 		break;
3994 	case 8:
3995 		numchan = 16;
3996 		break;
3997 	}
3998 	rdev->mc.vram_width = numchan * chansize;
3999 	/* Could aper size report 0 ? */
4000 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4001 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4002 	/* size in MB on si */
4003 	tmp = RREG32(CONFIG_MEMSIZE);
4004 	/* some boards may have garbage in the upper 16 bits */
4005 	if (tmp & 0xffff0000) {
4006 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4007 		if (tmp & 0xffff)
4008 			tmp &= 0xffff;
4009 	}
4010 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4011 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4012 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4013 	si_vram_gtt_location(rdev, &rdev->mc);
4014 	radeon_update_bandwidth_info(rdev);
4015 
4016 	return 0;
4017 }
4018 
4019 /*
4020  * GART
4021  */
4022 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4023 {
4024 	/* flush hdp cache */
4025 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4026 
4027 	/* bits 0-15 are the VM contexts0-15 */
4028 	WREG32(VM_INVALIDATE_REQUEST, 1);
4029 }
4030 
4031 static int si_pcie_gart_enable(struct radeon_device *rdev)
4032 {
4033 	int r, i;
4034 
4035 	if (rdev->gart.robj == NULL) {
4036 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4037 		return -EINVAL;
4038 	}
4039 	r = radeon_gart_table_vram_pin(rdev);
4040 	if (r)
4041 		return r;
4042 	radeon_gart_restore(rdev);
4043 	/* Setup TLB control */
4044 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4045 	       (0xA << 7) |
4046 	       ENABLE_L1_TLB |
4047 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4048 	       ENABLE_ADVANCED_DRIVER_MODEL |
4049 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4050 	/* Setup L2 cache */
4051 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4052 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4053 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4054 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4055 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4056 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4057 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4058 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4059 	/* setup context0 */
4060 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4061 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4062 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4063 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4064 			(u32)(rdev->dummy_page.addr >> 12));
4065 	WREG32(VM_CONTEXT0_CNTL2, 0);
4066 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4067 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4068 
4069 	WREG32(0x15D4, 0);
4070 	WREG32(0x15D8, 0);
4071 	WREG32(0x15DC, 0);
4072 
4073 	/* empty context1-15 */
4074 	/* set vm size, must be a multiple of 4 */
4075 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4076 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4077 	/* Assign the pt base to something valid for now; the pts used for
4078 	 * the VMs are determined by the application and setup and assigned
4079 	 * on the fly in the vm part of radeon_gart.c
4080 	 */
4081 	for (i = 1; i < 16; i++) {
4082 		if (i < 8)
4083 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4084 			       rdev->gart.table_addr >> 12);
4085 		else
4086 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4087 			       rdev->gart.table_addr >> 12);
4088 	}
4089 
4090 	/* enable context1-15 */
4091 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4092 	       (u32)(rdev->dummy_page.addr >> 12));
4093 	WREG32(VM_CONTEXT1_CNTL2, 4);
4094 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4095 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4096 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4097 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4098 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4099 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4100 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4101 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4102 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4103 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4104 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4105 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4106 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4107 
4108 	si_pcie_gart_tlb_flush(rdev);
4109 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4110 		 (unsigned)(rdev->mc.gtt_size >> 20),
4111 		 (unsigned long long)rdev->gart.table_addr);
4112 	rdev->gart.ready = true;
4113 	return 0;
4114 }
4115 
4116 static void si_pcie_gart_disable(struct radeon_device *rdev)
4117 {
4118 	/* Disable all tables */
4119 	WREG32(VM_CONTEXT0_CNTL, 0);
4120 	WREG32(VM_CONTEXT1_CNTL, 0);
4121 	/* Setup TLB control */
4122 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4123 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4124 	/* Setup L2 cache */
4125 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4126 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4127 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4128 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4129 	WREG32(VM_L2_CNTL2, 0);
4130 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4131 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4132 	radeon_gart_table_vram_unpin(rdev);
4133 }
4134 
4135 static void si_pcie_gart_fini(struct radeon_device *rdev)
4136 {
4137 	si_pcie_gart_disable(rdev);
4138 	radeon_gart_table_vram_free(rdev);
4139 	radeon_gart_fini(rdev);
4140 }
4141 
4142 /* vm parser */
4143 static bool si_vm_reg_valid(u32 reg)
4144 {
4145 	/* context regs are fine */
4146 	if (reg >= 0x28000)
4147 		return true;
4148 
4149 	/* check config regs */
4150 	switch (reg) {
4151 	case GRBM_GFX_INDEX:
4152 	case CP_STRMOUT_CNTL:
4153 	case VGT_VTX_VECT_EJECT_REG:
4154 	case VGT_CACHE_INVALIDATION:
4155 	case VGT_ESGS_RING_SIZE:
4156 	case VGT_GSVS_RING_SIZE:
4157 	case VGT_GS_VERTEX_REUSE:
4158 	case VGT_PRIMITIVE_TYPE:
4159 	case VGT_INDEX_TYPE:
4160 	case VGT_NUM_INDICES:
4161 	case VGT_NUM_INSTANCES:
4162 	case VGT_TF_RING_SIZE:
4163 	case VGT_HS_OFFCHIP_PARAM:
4164 	case VGT_TF_MEMORY_BASE:
4165 	case PA_CL_ENHANCE:
4166 	case PA_SU_LINE_STIPPLE_VALUE:
4167 	case PA_SC_LINE_STIPPLE_STATE:
4168 	case PA_SC_ENHANCE:
4169 	case SQC_CACHES:
4170 	case SPI_STATIC_THREAD_MGMT_1:
4171 	case SPI_STATIC_THREAD_MGMT_2:
4172 	case SPI_STATIC_THREAD_MGMT_3:
4173 	case SPI_PS_MAX_WAVE_ID:
4174 	case SPI_CONFIG_CNTL:
4175 	case SPI_CONFIG_CNTL_1:
4176 	case TA_CNTL_AUX:
4177 		return true;
4178 	default:
4179 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4180 		return false;
4181 	}
4182 }
4183 
4184 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4185 				  u32 *ib, struct radeon_cs_packet *pkt)
4186 {
4187 	switch (pkt->opcode) {
4188 	case PACKET3_NOP:
4189 	case PACKET3_SET_BASE:
4190 	case PACKET3_SET_CE_DE_COUNTERS:
4191 	case PACKET3_LOAD_CONST_RAM:
4192 	case PACKET3_WRITE_CONST_RAM:
4193 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4194 	case PACKET3_DUMP_CONST_RAM:
4195 	case PACKET3_INCREMENT_CE_COUNTER:
4196 	case PACKET3_WAIT_ON_DE_COUNTER:
4197 	case PACKET3_CE_WRITE:
4198 		break;
4199 	default:
4200 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4201 		return -EINVAL;
4202 	}
4203 	return 0;
4204 }
4205 
4206 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4207 {
4208 	u32 start_reg, reg, i;
4209 	u32 command = ib[idx + 4];
4210 	u32 info = ib[idx + 1];
4211 	u32 idx_value = ib[idx];
4212 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4213 		/* src address space is register */
4214 		if (((info & 0x60000000) >> 29) == 0) {
4215 			start_reg = idx_value << 2;
4216 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4217 				reg = start_reg;
4218 				if (!si_vm_reg_valid(reg)) {
4219 					DRM_ERROR("CP DMA Bad SRC register\n");
4220 					return -EINVAL;
4221 				}
4222 			} else {
4223 				for (i = 0; i < (command & 0x1fffff); i++) {
4224 					reg = start_reg + (4 * i);
4225 					if (!si_vm_reg_valid(reg)) {
4226 						DRM_ERROR("CP DMA Bad SRC register\n");
4227 						return -EINVAL;
4228 					}
4229 				}
4230 			}
4231 		}
4232 	}
4233 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4234 		/* dst address space is register */
4235 		if (((info & 0x00300000) >> 20) == 0) {
4236 			start_reg = ib[idx + 2];
4237 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4238 				reg = start_reg;
4239 				if (!si_vm_reg_valid(reg)) {
4240 					DRM_ERROR("CP DMA Bad DST register\n");
4241 					return -EINVAL;
4242 				}
4243 			} else {
4244 				for (i = 0; i < (command & 0x1fffff); i++) {
4245 					reg = start_reg + (4 * i);
4246 				if (!si_vm_reg_valid(reg)) {
4247 						DRM_ERROR("CP DMA Bad DST register\n");
4248 						return -EINVAL;
4249 					}
4250 				}
4251 			}
4252 		}
4253 	}
4254 	return 0;
4255 }
4256 
4257 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4258 				   u32 *ib, struct radeon_cs_packet *pkt)
4259 {
4260 	int r;
4261 	u32 idx = pkt->idx + 1;
4262 	u32 idx_value = ib[idx];
4263 	u32 start_reg, end_reg, reg, i;
4264 
4265 	switch (pkt->opcode) {
4266 	case PACKET3_NOP:
4267 	case PACKET3_SET_BASE:
4268 	case PACKET3_CLEAR_STATE:
4269 	case PACKET3_INDEX_BUFFER_SIZE:
4270 	case PACKET3_DISPATCH_DIRECT:
4271 	case PACKET3_DISPATCH_INDIRECT:
4272 	case PACKET3_ALLOC_GDS:
4273 	case PACKET3_WRITE_GDS_RAM:
4274 	case PACKET3_ATOMIC_GDS:
4275 	case PACKET3_ATOMIC:
4276 	case PACKET3_OCCLUSION_QUERY:
4277 	case PACKET3_SET_PREDICATION:
4278 	case PACKET3_COND_EXEC:
4279 	case PACKET3_PRED_EXEC:
4280 	case PACKET3_DRAW_INDIRECT:
4281 	case PACKET3_DRAW_INDEX_INDIRECT:
4282 	case PACKET3_INDEX_BASE:
4283 	case PACKET3_DRAW_INDEX_2:
4284 	case PACKET3_CONTEXT_CONTROL:
4285 	case PACKET3_INDEX_TYPE:
4286 	case PACKET3_DRAW_INDIRECT_MULTI:
4287 	case PACKET3_DRAW_INDEX_AUTO:
4288 	case PACKET3_DRAW_INDEX_IMMD:
4289 	case PACKET3_NUM_INSTANCES:
4290 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4291 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4292 	case PACKET3_DRAW_INDEX_OFFSET_2:
4293 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4294 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4295 	case PACKET3_MPEG_INDEX:
4296 	case PACKET3_WAIT_REG_MEM:
4297 	case PACKET3_MEM_WRITE:
4298 	case PACKET3_PFP_SYNC_ME:
4299 	case PACKET3_SURFACE_SYNC:
4300 	case PACKET3_EVENT_WRITE:
4301 	case PACKET3_EVENT_WRITE_EOP:
4302 	case PACKET3_EVENT_WRITE_EOS:
4303 	case PACKET3_SET_CONTEXT_REG:
4304 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4305 	case PACKET3_SET_SH_REG:
4306 	case PACKET3_SET_SH_REG_OFFSET:
4307 	case PACKET3_INCREMENT_DE_COUNTER:
4308 	case PACKET3_WAIT_ON_CE_COUNTER:
4309 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4310 	case PACKET3_ME_WRITE:
4311 		break;
4312 	case PACKET3_COPY_DATA:
4313 		if ((idx_value & 0xf00) == 0) {
4314 			reg = ib[idx + 3] * 4;
4315 			if (!si_vm_reg_valid(reg))
4316 				return -EINVAL;
4317 		}
4318 		break;
4319 	case PACKET3_WRITE_DATA:
4320 		if ((idx_value & 0xf00) == 0) {
4321 			start_reg = ib[idx + 1] * 4;
4322 			if (idx_value & 0x10000) {
4323 				if (!si_vm_reg_valid(start_reg))
4324 					return -EINVAL;
4325 			} else {
4326 				for (i = 0; i < (pkt->count - 2); i++) {
4327 					reg = start_reg + (4 * i);
4328 					if (!si_vm_reg_valid(reg))
4329 						return -EINVAL;
4330 				}
4331 			}
4332 		}
4333 		break;
4334 	case PACKET3_COND_WRITE:
4335 		if (idx_value & 0x100) {
4336 			reg = ib[idx + 5] * 4;
4337 			if (!si_vm_reg_valid(reg))
4338 				return -EINVAL;
4339 		}
4340 		break;
4341 	case PACKET3_COPY_DW:
4342 		if (idx_value & 0x2) {
4343 			reg = ib[idx + 3] * 4;
4344 			if (!si_vm_reg_valid(reg))
4345 				return -EINVAL;
4346 		}
4347 		break;
4348 	case PACKET3_SET_CONFIG_REG:
4349 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4350 		end_reg = 4 * pkt->count + start_reg - 4;
4351 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4352 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4353 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4354 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4355 			return -EINVAL;
4356 		}
4357 		for (i = 0; i < pkt->count; i++) {
4358 			reg = start_reg + (4 * i);
4359 			if (!si_vm_reg_valid(reg))
4360 				return -EINVAL;
4361 		}
4362 		break;
4363 	case PACKET3_CP_DMA:
4364 		r = si_vm_packet3_cp_dma_check(ib, idx);
4365 		if (r)
4366 			return r;
4367 		break;
4368 	default:
4369 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4370 		return -EINVAL;
4371 	}
4372 	return 0;
4373 }
4374 
4375 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4376 				       u32 *ib, struct radeon_cs_packet *pkt)
4377 {
4378 	int r;
4379 	u32 idx = pkt->idx + 1;
4380 	u32 idx_value = ib[idx];
4381 	u32 start_reg, reg, i;
4382 
4383 	switch (pkt->opcode) {
4384 	case PACKET3_NOP:
4385 	case PACKET3_SET_BASE:
4386 	case PACKET3_CLEAR_STATE:
4387 	case PACKET3_DISPATCH_DIRECT:
4388 	case PACKET3_DISPATCH_INDIRECT:
4389 	case PACKET3_ALLOC_GDS:
4390 	case PACKET3_WRITE_GDS_RAM:
4391 	case PACKET3_ATOMIC_GDS:
4392 	case PACKET3_ATOMIC:
4393 	case PACKET3_OCCLUSION_QUERY:
4394 	case PACKET3_SET_PREDICATION:
4395 	case PACKET3_COND_EXEC:
4396 	case PACKET3_PRED_EXEC:
4397 	case PACKET3_CONTEXT_CONTROL:
4398 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4399 	case PACKET3_WAIT_REG_MEM:
4400 	case PACKET3_MEM_WRITE:
4401 	case PACKET3_PFP_SYNC_ME:
4402 	case PACKET3_SURFACE_SYNC:
4403 	case PACKET3_EVENT_WRITE:
4404 	case PACKET3_EVENT_WRITE_EOP:
4405 	case PACKET3_EVENT_WRITE_EOS:
4406 	case PACKET3_SET_CONTEXT_REG:
4407 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4408 	case PACKET3_SET_SH_REG:
4409 	case PACKET3_SET_SH_REG_OFFSET:
4410 	case PACKET3_INCREMENT_DE_COUNTER:
4411 	case PACKET3_WAIT_ON_CE_COUNTER:
4412 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4413 	case PACKET3_ME_WRITE:
4414 		break;
4415 	case PACKET3_COPY_DATA:
4416 		if ((idx_value & 0xf00) == 0) {
4417 			reg = ib[idx + 3] * 4;
4418 			if (!si_vm_reg_valid(reg))
4419 				return -EINVAL;
4420 		}
4421 		break;
4422 	case PACKET3_WRITE_DATA:
4423 		if ((idx_value & 0xf00) == 0) {
4424 			start_reg = ib[idx + 1] * 4;
4425 			if (idx_value & 0x10000) {
4426 				if (!si_vm_reg_valid(start_reg))
4427 					return -EINVAL;
4428 			} else {
4429 				for (i = 0; i < (pkt->count - 2); i++) {
4430 					reg = start_reg + (4 * i);
4431 					if (!si_vm_reg_valid(reg))
4432 						return -EINVAL;
4433 				}
4434 			}
4435 		}
4436 		break;
4437 	case PACKET3_COND_WRITE:
4438 		if (idx_value & 0x100) {
4439 			reg = ib[idx + 5] * 4;
4440 			if (!si_vm_reg_valid(reg))
4441 				return -EINVAL;
4442 		}
4443 		break;
4444 	case PACKET3_COPY_DW:
4445 		if (idx_value & 0x2) {
4446 			reg = ib[idx + 3] * 4;
4447 			if (!si_vm_reg_valid(reg))
4448 				return -EINVAL;
4449 		}
4450 		break;
4451 	case PACKET3_CP_DMA:
4452 		r = si_vm_packet3_cp_dma_check(ib, idx);
4453 		if (r)
4454 			return r;
4455 		break;
4456 	default:
4457 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4458 		return -EINVAL;
4459 	}
4460 	return 0;
4461 }
4462 
4463 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4464 {
4465 	int ret = 0;
4466 	u32 idx = 0;
4467 	struct radeon_cs_packet pkt;
4468 
4469 	do {
4470 		pkt.idx = idx;
4471 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4472 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4473 		pkt.one_reg_wr = 0;
4474 		switch (pkt.type) {
4475 		case RADEON_PACKET_TYPE0:
4476 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4477 			ret = -EINVAL;
4478 			break;
4479 		case RADEON_PACKET_TYPE2:
4480 			idx += 1;
4481 			break;
4482 		case RADEON_PACKET_TYPE3:
4483 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4484 			if (ib->is_const_ib)
4485 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4486 			else {
4487 				switch (ib->ring) {
4488 				case RADEON_RING_TYPE_GFX_INDEX:
4489 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4490 					break;
4491 				case CAYMAN_RING_TYPE_CP1_INDEX:
4492 				case CAYMAN_RING_TYPE_CP2_INDEX:
4493 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4494 					break;
4495 				default:
4496 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4497 					ret = -EINVAL;
4498 					break;
4499 				}
4500 			}
4501 			idx += pkt.count + 2;
4502 			break;
4503 		default:
4504 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4505 			ret = -EINVAL;
4506 			break;
4507 		}
4508 		if (ret)
4509 			break;
4510 	} while (idx < ib->length_dw);
4511 
4512 	return ret;
4513 }
4514 
4515 /*
4516  * vm
4517  */
4518 int si_vm_init(struct radeon_device *rdev)
4519 {
4520 	/* number of VMs */
4521 	rdev->vm_manager.nvm = 16;
4522 	/* base offset of vram pages */
4523 	rdev->vm_manager.vram_base_offset = 0;
4524 
4525 	return 0;
4526 }
4527 
4528 void si_vm_fini(struct radeon_device *rdev)
4529 {
4530 }
4531 
4532 /**
4533  * si_vm_decode_fault - print human readable fault info
4534  *
4535  * @rdev: radeon_device pointer
4536  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4537  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4538  *
4539  * Print human readable fault information (SI).
4540  */
4541 static void si_vm_decode_fault(struct radeon_device *rdev,
4542 			       u32 status, u32 addr)
4543 {
4544 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4545 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4546 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4547 	char *block;
4548 
4549 	if (rdev->family == CHIP_TAHITI) {
4550 		switch (mc_id) {
4551 		case 160:
4552 		case 144:
4553 		case 96:
4554 		case 80:
4555 		case 224:
4556 		case 208:
4557 		case 32:
4558 		case 16:
4559 			block = "CB";
4560 			break;
4561 		case 161:
4562 		case 145:
4563 		case 97:
4564 		case 81:
4565 		case 225:
4566 		case 209:
4567 		case 33:
4568 		case 17:
4569 			block = "CB_FMASK";
4570 			break;
4571 		case 162:
4572 		case 146:
4573 		case 98:
4574 		case 82:
4575 		case 226:
4576 		case 210:
4577 		case 34:
4578 		case 18:
4579 			block = "CB_CMASK";
4580 			break;
4581 		case 163:
4582 		case 147:
4583 		case 99:
4584 		case 83:
4585 		case 227:
4586 		case 211:
4587 		case 35:
4588 		case 19:
4589 			block = "CB_IMMED";
4590 			break;
4591 		case 164:
4592 		case 148:
4593 		case 100:
4594 		case 84:
4595 		case 228:
4596 		case 212:
4597 		case 36:
4598 		case 20:
4599 			block = "DB";
4600 			break;
4601 		case 165:
4602 		case 149:
4603 		case 101:
4604 		case 85:
4605 		case 229:
4606 		case 213:
4607 		case 37:
4608 		case 21:
4609 			block = "DB_HTILE";
4610 			break;
4611 		case 167:
4612 		case 151:
4613 		case 103:
4614 		case 87:
4615 		case 231:
4616 		case 215:
4617 		case 39:
4618 		case 23:
4619 			block = "DB_STEN";
4620 			break;
4621 		case 72:
4622 		case 68:
4623 		case 64:
4624 		case 8:
4625 		case 4:
4626 		case 0:
4627 		case 136:
4628 		case 132:
4629 		case 128:
4630 		case 200:
4631 		case 196:
4632 		case 192:
4633 			block = "TC";
4634 			break;
4635 		case 112:
4636 		case 48:
4637 			block = "CP";
4638 			break;
4639 		case 49:
4640 		case 177:
4641 		case 50:
4642 		case 178:
4643 			block = "SH";
4644 			break;
4645 		case 53:
4646 		case 190:
4647 			block = "VGT";
4648 			break;
4649 		case 117:
4650 			block = "IH";
4651 			break;
4652 		case 51:
4653 		case 115:
4654 			block = "RLC";
4655 			break;
4656 		case 119:
4657 		case 183:
4658 			block = "DMA0";
4659 			break;
4660 		case 61:
4661 			block = "DMA1";
4662 			break;
4663 		case 248:
4664 		case 120:
4665 			block = "HDP";
4666 			break;
4667 		default:
4668 			block = "unknown";
4669 			break;
4670 		}
4671 	} else {
4672 		switch (mc_id) {
4673 		case 32:
4674 		case 16:
4675 		case 96:
4676 		case 80:
4677 		case 160:
4678 		case 144:
4679 		case 224:
4680 		case 208:
4681 			block = "CB";
4682 			break;
4683 		case 33:
4684 		case 17:
4685 		case 97:
4686 		case 81:
4687 		case 161:
4688 		case 145:
4689 		case 225:
4690 		case 209:
4691 			block = "CB_FMASK";
4692 			break;
4693 		case 34:
4694 		case 18:
4695 		case 98:
4696 		case 82:
4697 		case 162:
4698 		case 146:
4699 		case 226:
4700 		case 210:
4701 			block = "CB_CMASK";
4702 			break;
4703 		case 35:
4704 		case 19:
4705 		case 99:
4706 		case 83:
4707 		case 163:
4708 		case 147:
4709 		case 227:
4710 		case 211:
4711 			block = "CB_IMMED";
4712 			break;
4713 		case 36:
4714 		case 20:
4715 		case 100:
4716 		case 84:
4717 		case 164:
4718 		case 148:
4719 		case 228:
4720 		case 212:
4721 			block = "DB";
4722 			break;
4723 		case 37:
4724 		case 21:
4725 		case 101:
4726 		case 85:
4727 		case 165:
4728 		case 149:
4729 		case 229:
4730 		case 213:
4731 			block = "DB_HTILE";
4732 			break;
4733 		case 39:
4734 		case 23:
4735 		case 103:
4736 		case 87:
4737 		case 167:
4738 		case 151:
4739 		case 231:
4740 		case 215:
4741 			block = "DB_STEN";
4742 			break;
4743 		case 72:
4744 		case 68:
4745 		case 8:
4746 		case 4:
4747 		case 136:
4748 		case 132:
4749 		case 200:
4750 		case 196:
4751 			block = "TC";
4752 			break;
4753 		case 112:
4754 		case 48:
4755 			block = "CP";
4756 			break;
4757 		case 49:
4758 		case 177:
4759 		case 50:
4760 		case 178:
4761 			block = "SH";
4762 			break;
4763 		case 53:
4764 			block = "VGT";
4765 			break;
4766 		case 117:
4767 			block = "IH";
4768 			break;
4769 		case 51:
4770 		case 115:
4771 			block = "RLC";
4772 			break;
4773 		case 119:
4774 		case 183:
4775 			block = "DMA0";
4776 			break;
4777 		case 61:
4778 			block = "DMA1";
4779 			break;
4780 		case 248:
4781 		case 120:
4782 			block = "HDP";
4783 			break;
4784 		default:
4785 			block = "unknown";
4786 			break;
4787 		}
4788 	}
4789 
4790 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4791 	       protections, vmid, addr,
4792 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4793 	       block, mc_id);
4794 }
4795 
4796 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4797 {
4798 	struct radeon_ring *ring = &rdev->ring[ridx];
4799 
4800 	if (vm == NULL)
4801 		return;
4802 
4803 	/* write new base address */
4804 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4805 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4806 				 WRITE_DATA_DST_SEL(0)));
4807 
4808 	if (vm->id < 8) {
4809 		radeon_ring_write(ring,
4810 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4811 	} else {
4812 		radeon_ring_write(ring,
4813 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4814 	}
4815 	radeon_ring_write(ring, 0);
4816 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4817 
4818 	/* flush hdp cache */
4819 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4820 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4821 				 WRITE_DATA_DST_SEL(0)));
4822 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4823 	radeon_ring_write(ring, 0);
4824 	radeon_ring_write(ring, 0x1);
4825 
4826 	/* bits 0-15 are the VM contexts0-15 */
4827 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4828 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4829 				 WRITE_DATA_DST_SEL(0)));
4830 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4831 	radeon_ring_write(ring, 0);
4832 	radeon_ring_write(ring, 1 << vm->id);
4833 
4834 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4835 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4836 	radeon_ring_write(ring, 0x0);
4837 }
4838 
4839 /*
4840  *  Power and clock gating
4841  */
4842 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4843 {
4844 	int i;
4845 
4846 	for (i = 0; i < rdev->usec_timeout; i++) {
4847 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4848 			break;
4849 		udelay(1);
4850 	}
4851 
4852 	for (i = 0; i < rdev->usec_timeout; i++) {
4853 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4854 			break;
4855 		udelay(1);
4856 	}
4857 }
4858 
4859 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4860 					 bool enable)
4861 {
4862 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4863 	u32 mask;
4864 	int i;
4865 
4866 	if (enable)
4867 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4868 	else
4869 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4870 	WREG32(CP_INT_CNTL_RING0, tmp);
4871 
4872 	if (!enable) {
4873 		/* read a gfx register */
4874 		tmp = RREG32(DB_DEPTH_INFO);
4875 
4876 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4877 		for (i = 0; i < rdev->usec_timeout; i++) {
4878 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4879 				break;
4880 			udelay(1);
4881 		}
4882 	}
4883 }
4884 
4885 static void si_set_uvd_dcm(struct radeon_device *rdev,
4886 			   bool sw_mode)
4887 {
4888 	u32 tmp, tmp2;
4889 
4890 	tmp = RREG32(UVD_CGC_CTRL);
4891 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4892 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4893 
4894 	if (sw_mode) {
4895 		tmp &= ~0x7ffff800;
4896 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4897 	} else {
4898 		tmp |= 0x7ffff800;
4899 		tmp2 = 0;
4900 	}
4901 
4902 	WREG32(UVD_CGC_CTRL, tmp);
4903 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4904 }
4905 
4906 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4907 {
4908 	bool hw_mode = true;
4909 
4910 	if (hw_mode) {
4911 		si_set_uvd_dcm(rdev, false);
4912 	} else {
4913 		u32 tmp = RREG32(UVD_CGC_CTRL);
4914 		tmp &= ~DCM;
4915 		WREG32(UVD_CGC_CTRL, tmp);
4916 	}
4917 }
4918 
4919 static u32 si_halt_rlc(struct radeon_device *rdev)
4920 {
4921 	u32 data, orig;
4922 
4923 	orig = data = RREG32(RLC_CNTL);
4924 
4925 	if (data & RLC_ENABLE) {
4926 		data &= ~RLC_ENABLE;
4927 		WREG32(RLC_CNTL, data);
4928 
4929 		si_wait_for_rlc_serdes(rdev);
4930 	}
4931 
4932 	return orig;
4933 }
4934 
4935 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4936 {
4937 	u32 tmp;
4938 
4939 	tmp = RREG32(RLC_CNTL);
4940 	if (tmp != rlc)
4941 		WREG32(RLC_CNTL, rlc);
4942 }
4943 
4944 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4945 {
4946 	u32 data, orig;
4947 
4948 	orig = data = RREG32(DMA_PG);
4949 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4950 		data |= PG_CNTL_ENABLE;
4951 	else
4952 		data &= ~PG_CNTL_ENABLE;
4953 	if (orig != data)
4954 		WREG32(DMA_PG, data);
4955 }
4956 
4957 static void si_init_dma_pg(struct radeon_device *rdev)
4958 {
4959 	u32 tmp;
4960 
4961 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4962 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4963 
4964 	for (tmp = 0; tmp < 5; tmp++)
4965 		WREG32(DMA_PGFSM_WRITE, 0);
4966 }
4967 
4968 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4969 			       bool enable)
4970 {
4971 	u32 tmp;
4972 
4973 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4974 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4975 		WREG32(RLC_TTOP_D, tmp);
4976 
4977 		tmp = RREG32(RLC_PG_CNTL);
4978 		tmp |= GFX_PG_ENABLE;
4979 		WREG32(RLC_PG_CNTL, tmp);
4980 
4981 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4982 		tmp |= AUTO_PG_EN;
4983 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4984 	} else {
4985 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4986 		tmp &= ~AUTO_PG_EN;
4987 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4988 
4989 		tmp = RREG32(DB_RENDER_CONTROL);
4990 	}
4991 }
4992 
4993 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4994 {
4995 	u32 tmp;
4996 
4997 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4998 
4999 	tmp = RREG32(RLC_PG_CNTL);
5000 	tmp |= GFX_PG_SRC;
5001 	WREG32(RLC_PG_CNTL, tmp);
5002 
5003 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5004 
5005 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5006 
5007 	tmp &= ~GRBM_REG_SGIT_MASK;
5008 	tmp |= GRBM_REG_SGIT(0x700);
5009 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5010 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5011 }
5012 
5013 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5014 {
5015 	u32 mask = 0, tmp, tmp1;
5016 	int i;
5017 
5018 	si_select_se_sh(rdev, se, sh);
5019 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5020 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5021 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5022 
5023 	tmp &= 0xffff0000;
5024 
5025 	tmp |= tmp1;
5026 	tmp >>= 16;
5027 
5028 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5029 		mask <<= 1;
5030 		mask |= 1;
5031 	}
5032 
5033 	return (~tmp) & mask;
5034 }
5035 
5036 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5037 {
5038 	u32 i, j, k, active_cu_number = 0;
5039 	u32 mask, counter, cu_bitmap;
5040 	u32 tmp = 0;
5041 
5042 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5043 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5044 			mask = 1;
5045 			cu_bitmap = 0;
5046 			counter  = 0;
5047 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5048 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5049 					if (counter < 2)
5050 						cu_bitmap |= mask;
5051 					counter++;
5052 				}
5053 				mask <<= 1;
5054 			}
5055 
5056 			active_cu_number += counter;
5057 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5058 		}
5059 	}
5060 
5061 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5062 
5063 	tmp = RREG32(RLC_MAX_PG_CU);
5064 	tmp &= ~MAX_PU_CU_MASK;
5065 	tmp |= MAX_PU_CU(active_cu_number);
5066 	WREG32(RLC_MAX_PG_CU, tmp);
5067 }
5068 
5069 static void si_enable_cgcg(struct radeon_device *rdev,
5070 			   bool enable)
5071 {
5072 	u32 data, orig, tmp;
5073 
5074 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5075 
5076 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5077 		si_enable_gui_idle_interrupt(rdev, true);
5078 
5079 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5080 
5081 		tmp = si_halt_rlc(rdev);
5082 
5083 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5084 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5085 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5086 
5087 		si_wait_for_rlc_serdes(rdev);
5088 
5089 		si_update_rlc(rdev, tmp);
5090 
5091 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5092 
5093 		data |= CGCG_EN | CGLS_EN;
5094 	} else {
5095 		si_enable_gui_idle_interrupt(rdev, false);
5096 
5097 		RREG32(CB_CGTT_SCLK_CTRL);
5098 		RREG32(CB_CGTT_SCLK_CTRL);
5099 		RREG32(CB_CGTT_SCLK_CTRL);
5100 		RREG32(CB_CGTT_SCLK_CTRL);
5101 
5102 		data &= ~(CGCG_EN | CGLS_EN);
5103 	}
5104 
5105 	if (orig != data)
5106 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5107 }
5108 
5109 static void si_enable_mgcg(struct radeon_device *rdev,
5110 			   bool enable)
5111 {
5112 	u32 data, orig, tmp = 0;
5113 
5114 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5115 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5116 		data = 0x96940200;
5117 		if (orig != data)
5118 			WREG32(CGTS_SM_CTRL_REG, data);
5119 
5120 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5121 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5122 			data |= CP_MEM_LS_EN;
5123 			if (orig != data)
5124 				WREG32(CP_MEM_SLP_CNTL, data);
5125 		}
5126 
5127 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5128 		data &= 0xffffffc0;
5129 		if (orig != data)
5130 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5131 
5132 		tmp = si_halt_rlc(rdev);
5133 
5134 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5135 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5136 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5137 
5138 		si_update_rlc(rdev, tmp);
5139 	} else {
5140 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5141 		data |= 0x00000003;
5142 		if (orig != data)
5143 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5144 
5145 		data = RREG32(CP_MEM_SLP_CNTL);
5146 		if (data & CP_MEM_LS_EN) {
5147 			data &= ~CP_MEM_LS_EN;
5148 			WREG32(CP_MEM_SLP_CNTL, data);
5149 		}
5150 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5151 		data |= LS_OVERRIDE | OVERRIDE;
5152 		if (orig != data)
5153 			WREG32(CGTS_SM_CTRL_REG, data);
5154 
5155 		tmp = si_halt_rlc(rdev);
5156 
5157 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5158 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5159 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5160 
5161 		si_update_rlc(rdev, tmp);
5162 	}
5163 }
5164 
5165 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5166 			       bool enable)
5167 {
5168 	u32 orig, data, tmp;
5169 
5170 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5171 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5172 		tmp |= 0x3fff;
5173 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5174 
5175 		orig = data = RREG32(UVD_CGC_CTRL);
5176 		data |= DCM;
5177 		if (orig != data)
5178 			WREG32(UVD_CGC_CTRL, data);
5179 
5180 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5181 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5182 	} else {
5183 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5184 		tmp &= ~0x3fff;
5185 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5186 
5187 		orig = data = RREG32(UVD_CGC_CTRL);
5188 		data &= ~DCM;
5189 		if (orig != data)
5190 			WREG32(UVD_CGC_CTRL, data);
5191 
5192 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5193 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5194 	}
5195 }
5196 
5197 static const u32 mc_cg_registers[] =
5198 {
5199 	MC_HUB_MISC_HUB_CG,
5200 	MC_HUB_MISC_SIP_CG,
5201 	MC_HUB_MISC_VM_CG,
5202 	MC_XPB_CLK_GAT,
5203 	ATC_MISC_CG,
5204 	MC_CITF_MISC_WR_CG,
5205 	MC_CITF_MISC_RD_CG,
5206 	MC_CITF_MISC_VM_CG,
5207 	VM_L2_CG,
5208 };
5209 
5210 static void si_enable_mc_ls(struct radeon_device *rdev,
5211 			    bool enable)
5212 {
5213 	int i;
5214 	u32 orig, data;
5215 
5216 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5217 		orig = data = RREG32(mc_cg_registers[i]);
5218 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5219 			data |= MC_LS_ENABLE;
5220 		else
5221 			data &= ~MC_LS_ENABLE;
5222 		if (data != orig)
5223 			WREG32(mc_cg_registers[i], data);
5224 	}
5225 }
5226 
5227 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5228 			       bool enable)
5229 {
5230 	int i;
5231 	u32 orig, data;
5232 
5233 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5234 		orig = data = RREG32(mc_cg_registers[i]);
5235 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5236 			data |= MC_CG_ENABLE;
5237 		else
5238 			data &= ~MC_CG_ENABLE;
5239 		if (data != orig)
5240 			WREG32(mc_cg_registers[i], data);
5241 	}
5242 }
5243 
5244 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5245 			       bool enable)
5246 {
5247 	u32 orig, data, offset;
5248 	int i;
5249 
5250 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5251 		for (i = 0; i < 2; i++) {
5252 			if (i == 0)
5253 				offset = DMA0_REGISTER_OFFSET;
5254 			else
5255 				offset = DMA1_REGISTER_OFFSET;
5256 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5257 			data &= ~MEM_POWER_OVERRIDE;
5258 			if (data != orig)
5259 				WREG32(DMA_POWER_CNTL + offset, data);
5260 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5261 		}
5262 	} else {
5263 		for (i = 0; i < 2; i++) {
5264 			if (i == 0)
5265 				offset = DMA0_REGISTER_OFFSET;
5266 			else
5267 				offset = DMA1_REGISTER_OFFSET;
5268 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5269 			data |= MEM_POWER_OVERRIDE;
5270 			if (data != orig)
5271 				WREG32(DMA_POWER_CNTL + offset, data);
5272 
5273 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5274 			data = 0xff000000;
5275 			if (data != orig)
5276 				WREG32(DMA_CLK_CTRL + offset, data);
5277 		}
5278 	}
5279 }
5280 
5281 static void si_enable_bif_mgls(struct radeon_device *rdev,
5282 			       bool enable)
5283 {
5284 	u32 orig, data;
5285 
5286 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5287 
5288 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5289 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5290 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5291 	else
5292 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5293 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5294 
5295 	if (orig != data)
5296 		WREG32_PCIE(PCIE_CNTL2, data);
5297 }
5298 
5299 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5300 			       bool enable)
5301 {
5302 	u32 orig, data;
5303 
5304 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5305 
5306 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5307 		data &= ~CLOCK_GATING_DIS;
5308 	else
5309 		data |= CLOCK_GATING_DIS;
5310 
5311 	if (orig != data)
5312 		WREG32(HDP_HOST_PATH_CNTL, data);
5313 }
5314 
5315 static void si_enable_hdp_ls(struct radeon_device *rdev,
5316 			     bool enable)
5317 {
5318 	u32 orig, data;
5319 
5320 	orig = data = RREG32(HDP_MEM_POWER_LS);
5321 
5322 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5323 		data |= HDP_LS_ENABLE;
5324 	else
5325 		data &= ~HDP_LS_ENABLE;
5326 
5327 	if (orig != data)
5328 		WREG32(HDP_MEM_POWER_LS, data);
5329 }
5330 
5331 static void si_update_cg(struct radeon_device *rdev,
5332 			 u32 block, bool enable)
5333 {
5334 	if (block & RADEON_CG_BLOCK_GFX) {
5335 		si_enable_gui_idle_interrupt(rdev, false);
5336 		/* order matters! */
5337 		if (enable) {
5338 			si_enable_mgcg(rdev, true);
5339 			si_enable_cgcg(rdev, true);
5340 		} else {
5341 			si_enable_cgcg(rdev, false);
5342 			si_enable_mgcg(rdev, false);
5343 		}
5344 		si_enable_gui_idle_interrupt(rdev, true);
5345 	}
5346 
5347 	if (block & RADEON_CG_BLOCK_MC) {
5348 		si_enable_mc_mgcg(rdev, enable);
5349 		si_enable_mc_ls(rdev, enable);
5350 	}
5351 
5352 	if (block & RADEON_CG_BLOCK_SDMA) {
5353 		si_enable_dma_mgcg(rdev, enable);
5354 	}
5355 
5356 	if (block & RADEON_CG_BLOCK_BIF) {
5357 		si_enable_bif_mgls(rdev, enable);
5358 	}
5359 
5360 	if (block & RADEON_CG_BLOCK_UVD) {
5361 		if (rdev->has_uvd) {
5362 			si_enable_uvd_mgcg(rdev, enable);
5363 		}
5364 	}
5365 
5366 	if (block & RADEON_CG_BLOCK_HDP) {
5367 		si_enable_hdp_mgcg(rdev, enable);
5368 		si_enable_hdp_ls(rdev, enable);
5369 	}
5370 }
5371 
5372 static void si_init_cg(struct radeon_device *rdev)
5373 {
5374 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5375 			    RADEON_CG_BLOCK_MC |
5376 			    RADEON_CG_BLOCK_SDMA |
5377 			    RADEON_CG_BLOCK_BIF |
5378 			    RADEON_CG_BLOCK_HDP), true);
5379 	if (rdev->has_uvd) {
5380 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5381 		si_init_uvd_internal_cg(rdev);
5382 	}
5383 }
5384 
5385 static void si_fini_cg(struct radeon_device *rdev)
5386 {
5387 	if (rdev->has_uvd) {
5388 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5389 	}
5390 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5391 			    RADEON_CG_BLOCK_MC |
5392 			    RADEON_CG_BLOCK_SDMA |
5393 			    RADEON_CG_BLOCK_BIF |
5394 			    RADEON_CG_BLOCK_HDP), false);
5395 }
5396 
5397 u32 si_get_csb_size(struct radeon_device *rdev)
5398 {
5399 	u32 count = 0;
5400 	const struct cs_section_def *sect = NULL;
5401 	const struct cs_extent_def *ext = NULL;
5402 
5403 	if (rdev->rlc.cs_data == NULL)
5404 		return 0;
5405 
5406 	/* begin clear state */
5407 	count += 2;
5408 	/* context control state */
5409 	count += 3;
5410 
5411 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5412 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5413 			if (sect->id == SECT_CONTEXT)
5414 				count += 2 + ext->reg_count;
5415 			else
5416 				return 0;
5417 		}
5418 	}
5419 	/* pa_sc_raster_config */
5420 	count += 3;
5421 	/* end clear state */
5422 	count += 2;
5423 	/* clear state */
5424 	count += 2;
5425 
5426 	return count;
5427 }
5428 
5429 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5430 {
5431 	u32 count = 0, i;
5432 	const struct cs_section_def *sect = NULL;
5433 	const struct cs_extent_def *ext = NULL;
5434 
5435 	if (rdev->rlc.cs_data == NULL)
5436 		return;
5437 	if (buffer == NULL)
5438 		return;
5439 
5440 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5441 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5442 
5443 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5444 	buffer[count++] = cpu_to_le32(0x80000000);
5445 	buffer[count++] = cpu_to_le32(0x80000000);
5446 
5447 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5448 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5449 			if (sect->id == SECT_CONTEXT) {
5450 				buffer[count++] =
5451 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5452 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5453 				for (i = 0; i < ext->reg_count; i++)
5454 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5455 			} else {
5456 				return;
5457 			}
5458 		}
5459 	}
5460 
5461 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5462 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5463 	switch (rdev->family) {
5464 	case CHIP_TAHITI:
5465 	case CHIP_PITCAIRN:
5466 		buffer[count++] = cpu_to_le32(0x2a00126a);
5467 		break;
5468 	case CHIP_VERDE:
5469 		buffer[count++] = cpu_to_le32(0x0000124a);
5470 		break;
5471 	case CHIP_OLAND:
5472 		buffer[count++] = cpu_to_le32(0x00000082);
5473 		break;
5474 	case CHIP_HAINAN:
5475 		buffer[count++] = cpu_to_le32(0x00000000);
5476 		break;
5477 	default:
5478 		buffer[count++] = cpu_to_le32(0x00000000);
5479 		break;
5480 	}
5481 
5482 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5483 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5484 
5485 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5486 	buffer[count++] = cpu_to_le32(0);
5487 }
5488 
5489 static void si_init_pg(struct radeon_device *rdev)
5490 {
5491 	if (rdev->pg_flags) {
5492 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5493 			si_init_dma_pg(rdev);
5494 		}
5495 		si_init_ao_cu_mask(rdev);
5496 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5497 			si_init_gfx_cgpg(rdev);
5498 		} else {
5499 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5500 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5501 		}
5502 		si_enable_dma_pg(rdev, true);
5503 		si_enable_gfx_cgpg(rdev, true);
5504 	} else {
5505 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5506 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5507 	}
5508 }
5509 
5510 static void si_fini_pg(struct radeon_device *rdev)
5511 {
5512 	if (rdev->pg_flags) {
5513 		si_enable_dma_pg(rdev, false);
5514 		si_enable_gfx_cgpg(rdev, false);
5515 	}
5516 }
5517 
5518 /*
5519  * RLC
5520  */
5521 void si_rlc_reset(struct radeon_device *rdev)
5522 {
5523 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5524 
5525 	tmp |= SOFT_RESET_RLC;
5526 	WREG32(GRBM_SOFT_RESET, tmp);
5527 	udelay(50);
5528 	tmp &= ~SOFT_RESET_RLC;
5529 	WREG32(GRBM_SOFT_RESET, tmp);
5530 	udelay(50);
5531 }
5532 
5533 static void si_rlc_stop(struct radeon_device *rdev)
5534 {
5535 	WREG32(RLC_CNTL, 0);
5536 
5537 	si_enable_gui_idle_interrupt(rdev, false);
5538 
5539 	si_wait_for_rlc_serdes(rdev);
5540 }
5541 
5542 static void si_rlc_start(struct radeon_device *rdev)
5543 {
5544 	WREG32(RLC_CNTL, RLC_ENABLE);
5545 
5546 	si_enable_gui_idle_interrupt(rdev, true);
5547 
5548 	udelay(50);
5549 }
5550 
5551 static bool si_lbpw_supported(struct radeon_device *rdev)
5552 {
5553 	u32 tmp;
5554 
5555 	/* Enable LBPW only for DDR3 */
5556 	tmp = RREG32(MC_SEQ_MISC0);
5557 	if ((tmp & 0xF0000000) == 0xB0000000)
5558 		return true;
5559 	return false;
5560 }
5561 
5562 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5563 {
5564 	u32 tmp;
5565 
5566 	tmp = RREG32(RLC_LB_CNTL);
5567 	if (enable)
5568 		tmp |= LOAD_BALANCE_ENABLE;
5569 	else
5570 		tmp &= ~LOAD_BALANCE_ENABLE;
5571 	WREG32(RLC_LB_CNTL, tmp);
5572 
5573 	if (!enable) {
5574 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5575 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5576 	}
5577 }
5578 
5579 static int si_rlc_resume(struct radeon_device *rdev)
5580 {
5581 	u32 i;
5582 	const __be32 *fw_data;
5583 
5584 	if (!rdev->rlc_fw)
5585 		return -EINVAL;
5586 
5587 	si_rlc_stop(rdev);
5588 
5589 	si_rlc_reset(rdev);
5590 
5591 	si_init_pg(rdev);
5592 
5593 	si_init_cg(rdev);
5594 
5595 	WREG32(RLC_RL_BASE, 0);
5596 	WREG32(RLC_RL_SIZE, 0);
5597 	WREG32(RLC_LB_CNTL, 0);
5598 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5599 	WREG32(RLC_LB_CNTR_INIT, 0);
5600 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5601 
5602 	WREG32(RLC_MC_CNTL, 0);
5603 	WREG32(RLC_UCODE_CNTL, 0);
5604 
5605 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5606 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5607 		WREG32(RLC_UCODE_ADDR, i);
5608 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5609 	}
5610 	WREG32(RLC_UCODE_ADDR, 0);
5611 
5612 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5613 
5614 	si_rlc_start(rdev);
5615 
5616 	return 0;
5617 }
5618 
5619 static void si_enable_interrupts(struct radeon_device *rdev)
5620 {
5621 	u32 ih_cntl = RREG32(IH_CNTL);
5622 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5623 
5624 	ih_cntl |= ENABLE_INTR;
5625 	ih_rb_cntl |= IH_RB_ENABLE;
5626 	WREG32(IH_CNTL, ih_cntl);
5627 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5628 	rdev->ih.enabled = true;
5629 }
5630 
5631 static void si_disable_interrupts(struct radeon_device *rdev)
5632 {
5633 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5634 	u32 ih_cntl = RREG32(IH_CNTL);
5635 
5636 	ih_rb_cntl &= ~IH_RB_ENABLE;
5637 	ih_cntl &= ~ENABLE_INTR;
5638 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5639 	WREG32(IH_CNTL, ih_cntl);
5640 	/* set rptr, wptr to 0 */
5641 	WREG32(IH_RB_RPTR, 0);
5642 	WREG32(IH_RB_WPTR, 0);
5643 	rdev->ih.enabled = false;
5644 	rdev->ih.rptr = 0;
5645 }
5646 
5647 static void si_disable_interrupt_state(struct radeon_device *rdev)
5648 {
5649 	u32 tmp;
5650 
5651 	tmp = RREG32(CP_INT_CNTL_RING0) &
5652 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5653 	WREG32(CP_INT_CNTL_RING0, tmp);
5654 	WREG32(CP_INT_CNTL_RING1, 0);
5655 	WREG32(CP_INT_CNTL_RING2, 0);
5656 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5657 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5658 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5659 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5660 	WREG32(GRBM_INT_CNTL, 0);
5661 	if (rdev->num_crtc >= 2) {
5662 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5663 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5664 	}
5665 	if (rdev->num_crtc >= 4) {
5666 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5667 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5668 	}
5669 	if (rdev->num_crtc >= 6) {
5670 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5671 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5672 	}
5673 
5674 	if (rdev->num_crtc >= 2) {
5675 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5676 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5677 	}
5678 	if (rdev->num_crtc >= 4) {
5679 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5680 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5681 	}
5682 	if (rdev->num_crtc >= 6) {
5683 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5684 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5685 	}
5686 
5687 	if (!ASIC_IS_NODCE(rdev)) {
5688 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5689 
5690 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5691 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5692 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5693 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5694 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5695 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5696 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5697 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5698 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5699 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5700 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5701 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5702 	}
5703 }
5704 
5705 static int si_irq_init(struct radeon_device *rdev)
5706 {
5707 	int ret = 0;
5708 	int rb_bufsz;
5709 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5710 
5711 	/* allocate ring */
5712 	ret = r600_ih_ring_alloc(rdev);
5713 	if (ret)
5714 		return ret;
5715 
5716 	/* disable irqs */
5717 	si_disable_interrupts(rdev);
5718 
5719 	/* init rlc */
5720 	ret = si_rlc_resume(rdev);
5721 	if (ret) {
5722 		r600_ih_ring_fini(rdev);
5723 		return ret;
5724 	}
5725 
5726 	/* setup interrupt control */
5727 	/* set dummy read address to ring address */
5728 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5729 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5730 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5731 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5732 	 */
5733 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5734 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5735 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5736 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5737 
5738 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5739 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5740 
5741 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5742 		      IH_WPTR_OVERFLOW_CLEAR |
5743 		      (rb_bufsz << 1));
5744 
5745 	if (rdev->wb.enabled)
5746 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5747 
5748 	/* set the writeback address whether it's enabled or not */
5749 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5750 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5751 
5752 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5753 
5754 	/* set rptr, wptr to 0 */
5755 	WREG32(IH_RB_RPTR, 0);
5756 	WREG32(IH_RB_WPTR, 0);
5757 
5758 	/* Default settings for IH_CNTL (disabled at first) */
5759 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5760 	/* RPTR_REARM only works if msi's are enabled */
5761 	if (rdev->msi_enabled)
5762 		ih_cntl |= RPTR_REARM;
5763 	WREG32(IH_CNTL, ih_cntl);
5764 
5765 	/* force the active interrupt state to all disabled */
5766 	si_disable_interrupt_state(rdev);
5767 
5768 	pci_set_master(rdev->pdev);
5769 
5770 	/* enable irqs */
5771 	si_enable_interrupts(rdev);
5772 
5773 	return ret;
5774 }
5775 
5776 int si_irq_set(struct radeon_device *rdev)
5777 {
5778 	u32 cp_int_cntl;
5779 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5780 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5781 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5782 	u32 grbm_int_cntl = 0;
5783 	u32 dma_cntl, dma_cntl1;
5784 	u32 thermal_int = 0;
5785 
5786 	if (!rdev->irq.installed) {
5787 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5788 		return -EINVAL;
5789 	}
5790 	/* don't enable anything if the ih is disabled */
5791 	if (!rdev->ih.enabled) {
5792 		si_disable_interrupts(rdev);
5793 		/* force the active interrupt state to all disabled */
5794 		si_disable_interrupt_state(rdev);
5795 		return 0;
5796 	}
5797 
5798 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5799 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5800 
5801 	if (!ASIC_IS_NODCE(rdev)) {
5802 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5803 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5804 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5805 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5806 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5807 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5808 	}
5809 
5810 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5811 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5812 
5813 	thermal_int = RREG32(CG_THERMAL_INT) &
5814 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5815 
5816 	/* enable CP interrupts on all rings */
5817 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5818 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5819 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5820 	}
5821 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5822 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5823 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5824 	}
5825 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5826 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5827 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5828 	}
5829 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5830 		DRM_DEBUG("si_irq_set: sw int dma\n");
5831 		dma_cntl |= TRAP_ENABLE;
5832 	}
5833 
5834 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5835 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5836 		dma_cntl1 |= TRAP_ENABLE;
5837 	}
5838 	if (rdev->irq.crtc_vblank_int[0] ||
5839 	    atomic_read(&rdev->irq.pflip[0])) {
5840 		DRM_DEBUG("si_irq_set: vblank 0\n");
5841 		crtc1 |= VBLANK_INT_MASK;
5842 	}
5843 	if (rdev->irq.crtc_vblank_int[1] ||
5844 	    atomic_read(&rdev->irq.pflip[1])) {
5845 		DRM_DEBUG("si_irq_set: vblank 1\n");
5846 		crtc2 |= VBLANK_INT_MASK;
5847 	}
5848 	if (rdev->irq.crtc_vblank_int[2] ||
5849 	    atomic_read(&rdev->irq.pflip[2])) {
5850 		DRM_DEBUG("si_irq_set: vblank 2\n");
5851 		crtc3 |= VBLANK_INT_MASK;
5852 	}
5853 	if (rdev->irq.crtc_vblank_int[3] ||
5854 	    atomic_read(&rdev->irq.pflip[3])) {
5855 		DRM_DEBUG("si_irq_set: vblank 3\n");
5856 		crtc4 |= VBLANK_INT_MASK;
5857 	}
5858 	if (rdev->irq.crtc_vblank_int[4] ||
5859 	    atomic_read(&rdev->irq.pflip[4])) {
5860 		DRM_DEBUG("si_irq_set: vblank 4\n");
5861 		crtc5 |= VBLANK_INT_MASK;
5862 	}
5863 	if (rdev->irq.crtc_vblank_int[5] ||
5864 	    atomic_read(&rdev->irq.pflip[5])) {
5865 		DRM_DEBUG("si_irq_set: vblank 5\n");
5866 		crtc6 |= VBLANK_INT_MASK;
5867 	}
5868 	if (rdev->irq.hpd[0]) {
5869 		DRM_DEBUG("si_irq_set: hpd 1\n");
5870 		hpd1 |= DC_HPDx_INT_EN;
5871 	}
5872 	if (rdev->irq.hpd[1]) {
5873 		DRM_DEBUG("si_irq_set: hpd 2\n");
5874 		hpd2 |= DC_HPDx_INT_EN;
5875 	}
5876 	if (rdev->irq.hpd[2]) {
5877 		DRM_DEBUG("si_irq_set: hpd 3\n");
5878 		hpd3 |= DC_HPDx_INT_EN;
5879 	}
5880 	if (rdev->irq.hpd[3]) {
5881 		DRM_DEBUG("si_irq_set: hpd 4\n");
5882 		hpd4 |= DC_HPDx_INT_EN;
5883 	}
5884 	if (rdev->irq.hpd[4]) {
5885 		DRM_DEBUG("si_irq_set: hpd 5\n");
5886 		hpd5 |= DC_HPDx_INT_EN;
5887 	}
5888 	if (rdev->irq.hpd[5]) {
5889 		DRM_DEBUG("si_irq_set: hpd 6\n");
5890 		hpd6 |= DC_HPDx_INT_EN;
5891 	}
5892 
5893 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5894 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5895 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5896 
5897 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5898 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5899 
5900 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5901 
5902 	if (rdev->irq.dpm_thermal) {
5903 		DRM_DEBUG("dpm thermal\n");
5904 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5905 	}
5906 
5907 	if (rdev->num_crtc >= 2) {
5908 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5909 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5910 	}
5911 	if (rdev->num_crtc >= 4) {
5912 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5913 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5914 	}
5915 	if (rdev->num_crtc >= 6) {
5916 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5917 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5918 	}
5919 
5920 	if (rdev->num_crtc >= 2) {
5921 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
5922 		       GRPH_PFLIP_INT_MASK);
5923 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
5924 		       GRPH_PFLIP_INT_MASK);
5925 	}
5926 	if (rdev->num_crtc >= 4) {
5927 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
5928 		       GRPH_PFLIP_INT_MASK);
5929 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
5930 		       GRPH_PFLIP_INT_MASK);
5931 	}
5932 	if (rdev->num_crtc >= 6) {
5933 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
5934 		       GRPH_PFLIP_INT_MASK);
5935 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
5936 		       GRPH_PFLIP_INT_MASK);
5937 	}
5938 
5939 	if (!ASIC_IS_NODCE(rdev)) {
5940 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5941 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5942 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5943 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5944 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5945 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5946 	}
5947 
5948 	WREG32(CG_THERMAL_INT, thermal_int);
5949 
5950 	return 0;
5951 }
5952 
5953 static inline void si_irq_ack(struct radeon_device *rdev)
5954 {
5955 	u32 tmp;
5956 
5957 	if (ASIC_IS_NODCE(rdev))
5958 		return;
5959 
5960 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5961 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5962 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5963 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5964 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5965 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5966 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5967 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5968 	if (rdev->num_crtc >= 4) {
5969 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5970 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5971 	}
5972 	if (rdev->num_crtc >= 6) {
5973 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5974 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5975 	}
5976 
5977 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5978 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5979 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5980 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5981 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5982 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5983 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5984 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5985 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5986 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5987 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5988 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5989 
5990 	if (rdev->num_crtc >= 4) {
5991 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5992 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5993 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5994 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5995 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5996 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5997 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5998 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5999 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6000 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6001 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6002 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6003 	}
6004 
6005 	if (rdev->num_crtc >= 6) {
6006 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6007 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6008 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6009 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6010 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6011 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6012 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6013 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6014 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6015 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6016 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6017 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6018 	}
6019 
6020 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6021 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6022 		tmp |= DC_HPDx_INT_ACK;
6023 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6024 	}
6025 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6026 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6027 		tmp |= DC_HPDx_INT_ACK;
6028 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6029 	}
6030 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6031 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6032 		tmp |= DC_HPDx_INT_ACK;
6033 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6034 	}
6035 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6036 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6037 		tmp |= DC_HPDx_INT_ACK;
6038 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6039 	}
6040 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6041 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6042 		tmp |= DC_HPDx_INT_ACK;
6043 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6044 	}
6045 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6046 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6047 		tmp |= DC_HPDx_INT_ACK;
6048 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6049 	}
6050 }
6051 
6052 static void si_irq_disable(struct radeon_device *rdev)
6053 {
6054 	si_disable_interrupts(rdev);
6055 	/* Wait and acknowledge irq */
6056 	mdelay(1);
6057 	si_irq_ack(rdev);
6058 	si_disable_interrupt_state(rdev);
6059 }
6060 
6061 static void si_irq_suspend(struct radeon_device *rdev)
6062 {
6063 	si_irq_disable(rdev);
6064 	si_rlc_stop(rdev);
6065 }
6066 
6067 static void si_irq_fini(struct radeon_device *rdev)
6068 {
6069 	si_irq_suspend(rdev);
6070 	r600_ih_ring_fini(rdev);
6071 }
6072 
6073 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6074 {
6075 	u32 wptr, tmp;
6076 
6077 	if (rdev->wb.enabled)
6078 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6079 	else
6080 		wptr = RREG32(IH_RB_WPTR);
6081 
6082 	if (wptr & RB_OVERFLOW) {
6083 		/* When a ring buffer overflow happen start parsing interrupt
6084 		 * from the last not overwritten vector (wptr + 16). Hopefully
6085 		 * this should allow us to catchup.
6086 		 */
6087 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6088 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6089 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6090 		tmp = RREG32(IH_RB_CNTL);
6091 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6092 		WREG32(IH_RB_CNTL, tmp);
6093 	}
6094 	return (wptr & rdev->ih.ptr_mask);
6095 }
6096 
6097 /*        SI IV Ring
6098  * Each IV ring entry is 128 bits:
6099  * [7:0]    - interrupt source id
6100  * [31:8]   - reserved
6101  * [59:32]  - interrupt source data
6102  * [63:60]  - reserved
6103  * [71:64]  - RINGID
6104  * [79:72]  - VMID
6105  * [127:80] - reserved
6106  */
6107 int si_irq_process(struct radeon_device *rdev)
6108 {
6109 	u32 wptr;
6110 	u32 rptr;
6111 	u32 src_id, src_data, ring_id;
6112 	u32 ring_index;
6113 	bool queue_hotplug = false;
6114 	bool queue_thermal = false;
6115 	u32 status, addr;
6116 
6117 	if (!rdev->ih.enabled || rdev->shutdown)
6118 		return IRQ_NONE;
6119 
6120 	wptr = si_get_ih_wptr(rdev);
6121 
6122 restart_ih:
6123 	/* is somebody else already processing irqs? */
6124 	if (atomic_xchg(&rdev->ih.lock, 1))
6125 		return IRQ_NONE;
6126 
6127 	rptr = rdev->ih.rptr;
6128 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6129 
6130 	/* Order reading of wptr vs. reading of IH ring data */
6131 	rmb();
6132 
6133 	/* display interrupts */
6134 	si_irq_ack(rdev);
6135 
6136 	while (rptr != wptr) {
6137 		/* wptr/rptr are in bytes! */
6138 		ring_index = rptr / 4;
6139 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6140 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6141 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6142 
6143 		switch (src_id) {
6144 		case 1: /* D1 vblank/vline */
6145 			switch (src_data) {
6146 			case 0: /* D1 vblank */
6147 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6148 					if (rdev->irq.crtc_vblank_int[0]) {
6149 						drm_handle_vblank(rdev->ddev, 0);
6150 						rdev->pm.vblank_sync = true;
6151 						wake_up(&rdev->irq.vblank_queue);
6152 					}
6153 					if (atomic_read(&rdev->irq.pflip[0]))
6154 						radeon_crtc_handle_flip(rdev, 0);
6155 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6156 					DRM_DEBUG("IH: D1 vblank\n");
6157 				}
6158 				break;
6159 			case 1: /* D1 vline */
6160 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6161 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6162 					DRM_DEBUG("IH: D1 vline\n");
6163 				}
6164 				break;
6165 			default:
6166 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6167 				break;
6168 			}
6169 			break;
6170 		case 2: /* D2 vblank/vline */
6171 			switch (src_data) {
6172 			case 0: /* D2 vblank */
6173 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6174 					if (rdev->irq.crtc_vblank_int[1]) {
6175 						drm_handle_vblank(rdev->ddev, 1);
6176 						rdev->pm.vblank_sync = true;
6177 						wake_up(&rdev->irq.vblank_queue);
6178 					}
6179 					if (atomic_read(&rdev->irq.pflip[1]))
6180 						radeon_crtc_handle_flip(rdev, 1);
6181 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6182 					DRM_DEBUG("IH: D2 vblank\n");
6183 				}
6184 				break;
6185 			case 1: /* D2 vline */
6186 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6187 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6188 					DRM_DEBUG("IH: D2 vline\n");
6189 				}
6190 				break;
6191 			default:
6192 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6193 				break;
6194 			}
6195 			break;
6196 		case 3: /* D3 vblank/vline */
6197 			switch (src_data) {
6198 			case 0: /* D3 vblank */
6199 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6200 					if (rdev->irq.crtc_vblank_int[2]) {
6201 						drm_handle_vblank(rdev->ddev, 2);
6202 						rdev->pm.vblank_sync = true;
6203 						wake_up(&rdev->irq.vblank_queue);
6204 					}
6205 					if (atomic_read(&rdev->irq.pflip[2]))
6206 						radeon_crtc_handle_flip(rdev, 2);
6207 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6208 					DRM_DEBUG("IH: D3 vblank\n");
6209 				}
6210 				break;
6211 			case 1: /* D3 vline */
6212 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6213 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6214 					DRM_DEBUG("IH: D3 vline\n");
6215 				}
6216 				break;
6217 			default:
6218 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6219 				break;
6220 			}
6221 			break;
6222 		case 4: /* D4 vblank/vline */
6223 			switch (src_data) {
6224 			case 0: /* D4 vblank */
6225 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6226 					if (rdev->irq.crtc_vblank_int[3]) {
6227 						drm_handle_vblank(rdev->ddev, 3);
6228 						rdev->pm.vblank_sync = true;
6229 						wake_up(&rdev->irq.vblank_queue);
6230 					}
6231 					if (atomic_read(&rdev->irq.pflip[3]))
6232 						radeon_crtc_handle_flip(rdev, 3);
6233 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6234 					DRM_DEBUG("IH: D4 vblank\n");
6235 				}
6236 				break;
6237 			case 1: /* D4 vline */
6238 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6239 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6240 					DRM_DEBUG("IH: D4 vline\n");
6241 				}
6242 				break;
6243 			default:
6244 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6245 				break;
6246 			}
6247 			break;
6248 		case 5: /* D5 vblank/vline */
6249 			switch (src_data) {
6250 			case 0: /* D5 vblank */
6251 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6252 					if (rdev->irq.crtc_vblank_int[4]) {
6253 						drm_handle_vblank(rdev->ddev, 4);
6254 						rdev->pm.vblank_sync = true;
6255 						wake_up(&rdev->irq.vblank_queue);
6256 					}
6257 					if (atomic_read(&rdev->irq.pflip[4]))
6258 						radeon_crtc_handle_flip(rdev, 4);
6259 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6260 					DRM_DEBUG("IH: D5 vblank\n");
6261 				}
6262 				break;
6263 			case 1: /* D5 vline */
6264 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6265 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6266 					DRM_DEBUG("IH: D5 vline\n");
6267 				}
6268 				break;
6269 			default:
6270 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6271 				break;
6272 			}
6273 			break;
6274 		case 6: /* D6 vblank/vline */
6275 			switch (src_data) {
6276 			case 0: /* D6 vblank */
6277 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6278 					if (rdev->irq.crtc_vblank_int[5]) {
6279 						drm_handle_vblank(rdev->ddev, 5);
6280 						rdev->pm.vblank_sync = true;
6281 						wake_up(&rdev->irq.vblank_queue);
6282 					}
6283 					if (atomic_read(&rdev->irq.pflip[5]))
6284 						radeon_crtc_handle_flip(rdev, 5);
6285 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6286 					DRM_DEBUG("IH: D6 vblank\n");
6287 				}
6288 				break;
6289 			case 1: /* D6 vline */
6290 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6291 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6292 					DRM_DEBUG("IH: D6 vline\n");
6293 				}
6294 				break;
6295 			default:
6296 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6297 				break;
6298 			}
6299 			break;
6300 		case 8: /* D1 page flip */
6301 		case 10: /* D2 page flip */
6302 		case 12: /* D3 page flip */
6303 		case 14: /* D4 page flip */
6304 		case 16: /* D5 page flip */
6305 		case 18: /* D6 page flip */
6306 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6307 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6308 			break;
6309 		case 42: /* HPD hotplug */
6310 			switch (src_data) {
6311 			case 0:
6312 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6313 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6314 					queue_hotplug = true;
6315 					DRM_DEBUG("IH: HPD1\n");
6316 				}
6317 				break;
6318 			case 1:
6319 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6320 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6321 					queue_hotplug = true;
6322 					DRM_DEBUG("IH: HPD2\n");
6323 				}
6324 				break;
6325 			case 2:
6326 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6327 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6328 					queue_hotplug = true;
6329 					DRM_DEBUG("IH: HPD3\n");
6330 				}
6331 				break;
6332 			case 3:
6333 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6334 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6335 					queue_hotplug = true;
6336 					DRM_DEBUG("IH: HPD4\n");
6337 				}
6338 				break;
6339 			case 4:
6340 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6341 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6342 					queue_hotplug = true;
6343 					DRM_DEBUG("IH: HPD5\n");
6344 				}
6345 				break;
6346 			case 5:
6347 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6348 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6349 					queue_hotplug = true;
6350 					DRM_DEBUG("IH: HPD6\n");
6351 				}
6352 				break;
6353 			default:
6354 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6355 				break;
6356 			}
6357 			break;
6358 		case 124: /* UVD */
6359 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6360 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6361 			break;
6362 		case 146:
6363 		case 147:
6364 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6365 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6366 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6367 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6368 				addr);
6369 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6370 				status);
6371 			si_vm_decode_fault(rdev, status, addr);
6372 			/* reset addr and status */
6373 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6374 			break;
6375 		case 176: /* RINGID0 CP_INT */
6376 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6377 			break;
6378 		case 177: /* RINGID1 CP_INT */
6379 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6380 			break;
6381 		case 178: /* RINGID2 CP_INT */
6382 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6383 			break;
6384 		case 181: /* CP EOP event */
6385 			DRM_DEBUG("IH: CP EOP\n");
6386 			switch (ring_id) {
6387 			case 0:
6388 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6389 				break;
6390 			case 1:
6391 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6392 				break;
6393 			case 2:
6394 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6395 				break;
6396 			}
6397 			break;
6398 		case 224: /* DMA trap event */
6399 			DRM_DEBUG("IH: DMA trap\n");
6400 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6401 			break;
6402 		case 230: /* thermal low to high */
6403 			DRM_DEBUG("IH: thermal low to high\n");
6404 			rdev->pm.dpm.thermal.high_to_low = false;
6405 			queue_thermal = true;
6406 			break;
6407 		case 231: /* thermal high to low */
6408 			DRM_DEBUG("IH: thermal high to low\n");
6409 			rdev->pm.dpm.thermal.high_to_low = true;
6410 			queue_thermal = true;
6411 			break;
6412 		case 233: /* GUI IDLE */
6413 			DRM_DEBUG("IH: GUI idle\n");
6414 			break;
6415 		case 244: /* DMA trap event */
6416 			DRM_DEBUG("IH: DMA1 trap\n");
6417 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6418 			break;
6419 		default:
6420 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6421 			break;
6422 		}
6423 
6424 		/* wptr/rptr are in bytes! */
6425 		rptr += 16;
6426 		rptr &= rdev->ih.ptr_mask;
6427 	}
6428 	if (queue_hotplug)
6429 		schedule_work(&rdev->hotplug_work);
6430 	if (queue_thermal && rdev->pm.dpm_enabled)
6431 		schedule_work(&rdev->pm.dpm.thermal.work);
6432 	rdev->ih.rptr = rptr;
6433 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6434 	atomic_set(&rdev->ih.lock, 0);
6435 
6436 	/* make sure wptr hasn't changed while processing */
6437 	wptr = si_get_ih_wptr(rdev);
6438 	if (wptr != rptr)
6439 		goto restart_ih;
6440 
6441 	return IRQ_HANDLED;
6442 }
6443 
6444 /*
6445  * startup/shutdown callbacks
6446  */
6447 static int si_startup(struct radeon_device *rdev)
6448 {
6449 	struct radeon_ring *ring;
6450 	int r;
6451 
6452 	/* enable pcie gen2/3 link */
6453 	si_pcie_gen3_enable(rdev);
6454 	/* enable aspm */
6455 	si_program_aspm(rdev);
6456 
6457 	/* scratch needs to be initialized before MC */
6458 	r = r600_vram_scratch_init(rdev);
6459 	if (r)
6460 		return r;
6461 
6462 	si_mc_program(rdev);
6463 
6464 	if (!rdev->pm.dpm_enabled) {
6465 		r = si_mc_load_microcode(rdev);
6466 		if (r) {
6467 			DRM_ERROR("Failed to load MC firmware!\n");
6468 			return r;
6469 		}
6470 	}
6471 
6472 	r = si_pcie_gart_enable(rdev);
6473 	if (r)
6474 		return r;
6475 	si_gpu_init(rdev);
6476 
6477 	/* allocate rlc buffers */
6478 	if (rdev->family == CHIP_VERDE) {
6479 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6480 		rdev->rlc.reg_list_size =
6481 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6482 	}
6483 	rdev->rlc.cs_data = si_cs_data;
6484 	r = sumo_rlc_init(rdev);
6485 	if (r) {
6486 		DRM_ERROR("Failed to init rlc BOs!\n");
6487 		return r;
6488 	}
6489 
6490 	/* allocate wb buffer */
6491 	r = radeon_wb_init(rdev);
6492 	if (r)
6493 		return r;
6494 
6495 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6496 	if (r) {
6497 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6498 		return r;
6499 	}
6500 
6501 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6502 	if (r) {
6503 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6504 		return r;
6505 	}
6506 
6507 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6508 	if (r) {
6509 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6510 		return r;
6511 	}
6512 
6513 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6514 	if (r) {
6515 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6516 		return r;
6517 	}
6518 
6519 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6520 	if (r) {
6521 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6522 		return r;
6523 	}
6524 
6525 	if (rdev->has_uvd) {
6526 		r = uvd_v2_2_resume(rdev);
6527 		if (!r) {
6528 			r = radeon_fence_driver_start_ring(rdev,
6529 							   R600_RING_TYPE_UVD_INDEX);
6530 			if (r)
6531 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6532 		}
6533 		if (r)
6534 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6535 	}
6536 
6537 	/* Enable IRQ */
6538 	if (!rdev->irq.installed) {
6539 		r = radeon_irq_kms_init(rdev);
6540 		if (r)
6541 			return r;
6542 	}
6543 
6544 	r = si_irq_init(rdev);
6545 	if (r) {
6546 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6547 		radeon_irq_kms_fini(rdev);
6548 		return r;
6549 	}
6550 	si_irq_set(rdev);
6551 
6552 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6553 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6554 			     RADEON_CP_PACKET2);
6555 	if (r)
6556 		return r;
6557 
6558 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6559 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6560 			     RADEON_CP_PACKET2);
6561 	if (r)
6562 		return r;
6563 
6564 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6565 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6566 			     RADEON_CP_PACKET2);
6567 	if (r)
6568 		return r;
6569 
6570 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6571 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6572 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6573 	if (r)
6574 		return r;
6575 
6576 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6577 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6578 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6579 	if (r)
6580 		return r;
6581 
6582 	r = si_cp_load_microcode(rdev);
6583 	if (r)
6584 		return r;
6585 	r = si_cp_resume(rdev);
6586 	if (r)
6587 		return r;
6588 
6589 	r = cayman_dma_resume(rdev);
6590 	if (r)
6591 		return r;
6592 
6593 	if (rdev->has_uvd) {
6594 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6595 		if (ring->ring_size) {
6596 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6597 					     RADEON_CP_PACKET2);
6598 			if (!r)
6599 				r = uvd_v1_0_init(rdev);
6600 			if (r)
6601 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6602 		}
6603 	}
6604 
6605 	r = radeon_ib_pool_init(rdev);
6606 	if (r) {
6607 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6608 		return r;
6609 	}
6610 
6611 	r = radeon_vm_manager_init(rdev);
6612 	if (r) {
6613 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6614 		return r;
6615 	}
6616 
6617 	r = dce6_audio_init(rdev);
6618 	if (r)
6619 		return r;
6620 
6621 	return 0;
6622 }
6623 
6624 int si_resume(struct radeon_device *rdev)
6625 {
6626 	int r;
6627 
6628 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6629 	 * posting will perform necessary task to bring back GPU into good
6630 	 * shape.
6631 	 */
6632 	/* post card */
6633 	atom_asic_init(rdev->mode_info.atom_context);
6634 
6635 	/* init golden registers */
6636 	si_init_golden_registers(rdev);
6637 
6638 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6639 		radeon_pm_resume(rdev);
6640 
6641 	rdev->accel_working = true;
6642 	r = si_startup(rdev);
6643 	if (r) {
6644 		DRM_ERROR("si startup failed on resume\n");
6645 		rdev->accel_working = false;
6646 		return r;
6647 	}
6648 
6649 	return r;
6650 
6651 }
6652 
6653 int si_suspend(struct radeon_device *rdev)
6654 {
6655 	radeon_pm_suspend(rdev);
6656 	dce6_audio_fini(rdev);
6657 	radeon_vm_manager_fini(rdev);
6658 	si_cp_enable(rdev, false);
6659 	cayman_dma_stop(rdev);
6660 	if (rdev->has_uvd) {
6661 		uvd_v1_0_fini(rdev);
6662 		radeon_uvd_suspend(rdev);
6663 	}
6664 	si_fini_pg(rdev);
6665 	si_fini_cg(rdev);
6666 	si_irq_suspend(rdev);
6667 	radeon_wb_disable(rdev);
6668 	si_pcie_gart_disable(rdev);
6669 	return 0;
6670 }
6671 
6672 /* Plan is to move initialization in that function and use
6673  * helper function so that radeon_device_init pretty much
6674  * do nothing more than calling asic specific function. This
6675  * should also allow to remove a bunch of callback function
6676  * like vram_info.
6677  */
6678 int si_init(struct radeon_device *rdev)
6679 {
6680 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6681 	int r;
6682 
6683 	/* Read BIOS */
6684 	if (!radeon_get_bios(rdev)) {
6685 		if (ASIC_IS_AVIVO(rdev))
6686 			return -EINVAL;
6687 	}
6688 	/* Must be an ATOMBIOS */
6689 	if (!rdev->is_atom_bios) {
6690 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6691 		return -EINVAL;
6692 	}
6693 	r = radeon_atombios_init(rdev);
6694 	if (r)
6695 		return r;
6696 
6697 	/* Post card if necessary */
6698 	if (!radeon_card_posted(rdev)) {
6699 		if (!rdev->bios) {
6700 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6701 			return -EINVAL;
6702 		}
6703 		DRM_INFO("GPU not posted. posting now...\n");
6704 		atom_asic_init(rdev->mode_info.atom_context);
6705 	}
6706 	/* init golden registers */
6707 	si_init_golden_registers(rdev);
6708 	/* Initialize scratch registers */
6709 	si_scratch_init(rdev);
6710 	/* Initialize surface registers */
6711 	radeon_surface_init(rdev);
6712 	/* Initialize clocks */
6713 	radeon_get_clock_info(rdev->ddev);
6714 
6715 	/* Fence driver */
6716 	r = radeon_fence_driver_init(rdev);
6717 	if (r)
6718 		return r;
6719 
6720 	/* initialize memory controller */
6721 	r = si_mc_init(rdev);
6722 	if (r)
6723 		return r;
6724 	/* Memory manager */
6725 	r = radeon_bo_init(rdev);
6726 	if (r)
6727 		return r;
6728 
6729 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6730 	    !rdev->rlc_fw || !rdev->mc_fw) {
6731 		r = si_init_microcode(rdev);
6732 		if (r) {
6733 			DRM_ERROR("Failed to load firmware!\n");
6734 			return r;
6735 		}
6736 	}
6737 
6738 	/* Initialize power management */
6739 	radeon_pm_init(rdev);
6740 
6741 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6742 	ring->ring_obj = NULL;
6743 	r600_ring_init(rdev, ring, 1024 * 1024);
6744 
6745 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6746 	ring->ring_obj = NULL;
6747 	r600_ring_init(rdev, ring, 1024 * 1024);
6748 
6749 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6750 	ring->ring_obj = NULL;
6751 	r600_ring_init(rdev, ring, 1024 * 1024);
6752 
6753 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6754 	ring->ring_obj = NULL;
6755 	r600_ring_init(rdev, ring, 64 * 1024);
6756 
6757 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6758 	ring->ring_obj = NULL;
6759 	r600_ring_init(rdev, ring, 64 * 1024);
6760 
6761 	if (rdev->has_uvd) {
6762 		r = radeon_uvd_init(rdev);
6763 		if (!r) {
6764 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6765 			ring->ring_obj = NULL;
6766 			r600_ring_init(rdev, ring, 4096);
6767 		}
6768 	}
6769 
6770 	rdev->ih.ring_obj = NULL;
6771 	r600_ih_ring_init(rdev, 64 * 1024);
6772 
6773 	r = r600_pcie_gart_init(rdev);
6774 	if (r)
6775 		return r;
6776 
6777 	rdev->accel_working = true;
6778 	r = si_startup(rdev);
6779 	if (r) {
6780 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6781 		si_cp_fini(rdev);
6782 		cayman_dma_fini(rdev);
6783 		si_irq_fini(rdev);
6784 		sumo_rlc_fini(rdev);
6785 		radeon_wb_fini(rdev);
6786 		radeon_ib_pool_fini(rdev);
6787 		radeon_vm_manager_fini(rdev);
6788 		radeon_irq_kms_fini(rdev);
6789 		si_pcie_gart_fini(rdev);
6790 		rdev->accel_working = false;
6791 	}
6792 
6793 	/* Don't start up if the MC ucode is missing.
6794 	 * The default clocks and voltages before the MC ucode
6795 	 * is loaded are not suffient for advanced operations.
6796 	 */
6797 	if (!rdev->mc_fw) {
6798 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6799 		return -EINVAL;
6800 	}
6801 
6802 	return 0;
6803 }
6804 
6805 void si_fini(struct radeon_device *rdev)
6806 {
6807 	radeon_pm_fini(rdev);
6808 	si_cp_fini(rdev);
6809 	cayman_dma_fini(rdev);
6810 	si_fini_pg(rdev);
6811 	si_fini_cg(rdev);
6812 	si_irq_fini(rdev);
6813 	sumo_rlc_fini(rdev);
6814 	radeon_wb_fini(rdev);
6815 	radeon_vm_manager_fini(rdev);
6816 	radeon_ib_pool_fini(rdev);
6817 	radeon_irq_kms_fini(rdev);
6818 	if (rdev->has_uvd) {
6819 		uvd_v1_0_fini(rdev);
6820 		radeon_uvd_fini(rdev);
6821 	}
6822 	si_pcie_gart_fini(rdev);
6823 	r600_vram_scratch_fini(rdev);
6824 	radeon_gem_fini(rdev);
6825 	radeon_fence_driver_fini(rdev);
6826 	radeon_bo_fini(rdev);
6827 	radeon_atombios_fini(rdev);
6828 	kfree(rdev->bios);
6829 	rdev->bios = NULL;
6830 }
6831 
6832 /**
6833  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6834  *
6835  * @rdev: radeon_device pointer
6836  *
6837  * Fetches a GPU clock counter snapshot (SI).
6838  * Returns the 64 bit clock counter snapshot.
6839  */
6840 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6841 {
6842 	uint64_t clock;
6843 
6844 	mutex_lock(&rdev->gpu_clock_mutex);
6845 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6846 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6847 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6848 	mutex_unlock(&rdev->gpu_clock_mutex);
6849 	return clock;
6850 }
6851 
6852 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6853 {
6854 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6855 	int r;
6856 
6857 	/* bypass vclk and dclk with bclk */
6858 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6859 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6860 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6861 
6862 	/* put PLL in bypass mode */
6863 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6864 
6865 	if (!vclk || !dclk) {
6866 		/* keep the Bypass mode, put PLL to sleep */
6867 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6868 		return 0;
6869 	}
6870 
6871 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6872 					  16384, 0x03FFFFFF, 0, 128, 5,
6873 					  &fb_div, &vclk_div, &dclk_div);
6874 	if (r)
6875 		return r;
6876 
6877 	/* set RESET_ANTI_MUX to 0 */
6878 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6879 
6880 	/* set VCO_MODE to 1 */
6881 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6882 
6883 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6884 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6885 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6886 
6887 	/* deassert UPLL_RESET */
6888 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6889 
6890 	mdelay(1);
6891 
6892 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6893 	if (r)
6894 		return r;
6895 
6896 	/* assert UPLL_RESET again */
6897 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6898 
6899 	/* disable spread spectrum. */
6900 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6901 
6902 	/* set feedback divider */
6903 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6904 
6905 	/* set ref divider to 0 */
6906 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6907 
6908 	if (fb_div < 307200)
6909 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6910 	else
6911 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6912 
6913 	/* set PDIV_A and PDIV_B */
6914 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6915 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6916 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6917 
6918 	/* give the PLL some time to settle */
6919 	mdelay(15);
6920 
6921 	/* deassert PLL_RESET */
6922 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6923 
6924 	mdelay(15);
6925 
6926 	/* switch from bypass mode to normal mode */
6927 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6928 
6929 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6930 	if (r)
6931 		return r;
6932 
6933 	/* switch VCLK and DCLK selection */
6934 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6935 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6936 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6937 
6938 	mdelay(100);
6939 
6940 	return 0;
6941 }
6942 
6943 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6944 {
6945 	struct pci_dev *root = rdev->pdev->bus->self;
6946 	int bridge_pos, gpu_pos;
6947 	u32 speed_cntl, mask, current_data_rate;
6948 	int ret, i;
6949 	u16 tmp16;
6950 
6951 	if (radeon_pcie_gen2 == 0)
6952 		return;
6953 
6954 	if (rdev->flags & RADEON_IS_IGP)
6955 		return;
6956 
6957 	if (!(rdev->flags & RADEON_IS_PCIE))
6958 		return;
6959 
6960 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6961 	if (ret != 0)
6962 		return;
6963 
6964 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6965 		return;
6966 
6967 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6968 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6969 		LC_CURRENT_DATA_RATE_SHIFT;
6970 	if (mask & DRM_PCIE_SPEED_80) {
6971 		if (current_data_rate == 2) {
6972 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6973 			return;
6974 		}
6975 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6976 	} else if (mask & DRM_PCIE_SPEED_50) {
6977 		if (current_data_rate == 1) {
6978 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6979 			return;
6980 		}
6981 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6982 	}
6983 
6984 	bridge_pos = pci_pcie_cap(root);
6985 	if (!bridge_pos)
6986 		return;
6987 
6988 	gpu_pos = pci_pcie_cap(rdev->pdev);
6989 	if (!gpu_pos)
6990 		return;
6991 
6992 	if (mask & DRM_PCIE_SPEED_80) {
6993 		/* re-try equalization if gen3 is not already enabled */
6994 		if (current_data_rate != 2) {
6995 			u16 bridge_cfg, gpu_cfg;
6996 			u16 bridge_cfg2, gpu_cfg2;
6997 			u32 max_lw, current_lw, tmp;
6998 
6999 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7000 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7001 
7002 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7003 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7004 
7005 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7006 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7007 
7008 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7009 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7010 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7011 
7012 			if (current_lw < max_lw) {
7013 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7014 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7015 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7016 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7017 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7018 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7019 				}
7020 			}
7021 
7022 			for (i = 0; i < 10; i++) {
7023 				/* check status */
7024 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7025 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7026 					break;
7027 
7028 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7029 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7030 
7031 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7032 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7033 
7034 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7035 				tmp |= LC_SET_QUIESCE;
7036 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7037 
7038 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7039 				tmp |= LC_REDO_EQ;
7040 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7041 
7042 				mdelay(100);
7043 
7044 				/* linkctl */
7045 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7046 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7047 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7048 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7049 
7050 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7051 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7052 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7053 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7054 
7055 				/* linkctl2 */
7056 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7057 				tmp16 &= ~((1 << 4) | (7 << 9));
7058 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7059 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7060 
7061 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7062 				tmp16 &= ~((1 << 4) | (7 << 9));
7063 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7064 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7065 
7066 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7067 				tmp &= ~LC_SET_QUIESCE;
7068 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7069 			}
7070 		}
7071 	}
7072 
7073 	/* set the link speed */
7074 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7075 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7076 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7077 
7078 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7079 	tmp16 &= ~0xf;
7080 	if (mask & DRM_PCIE_SPEED_80)
7081 		tmp16 |= 3; /* gen3 */
7082 	else if (mask & DRM_PCIE_SPEED_50)
7083 		tmp16 |= 2; /* gen2 */
7084 	else
7085 		tmp16 |= 1; /* gen1 */
7086 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7087 
7088 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7089 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7090 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7091 
7092 	for (i = 0; i < rdev->usec_timeout; i++) {
7093 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7094 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7095 			break;
7096 		udelay(1);
7097 	}
7098 }
7099 
7100 static void si_program_aspm(struct radeon_device *rdev)
7101 {
7102 	u32 data, orig;
7103 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7104 	bool disable_clkreq = false;
7105 
7106 	if (radeon_aspm == 0)
7107 		return;
7108 
7109 	if (!(rdev->flags & RADEON_IS_PCIE))
7110 		return;
7111 
7112 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7113 	data &= ~LC_XMIT_N_FTS_MASK;
7114 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7115 	if (orig != data)
7116 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7117 
7118 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7119 	data |= LC_GO_TO_RECOVERY;
7120 	if (orig != data)
7121 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7122 
7123 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7124 	data |= P_IGNORE_EDB_ERR;
7125 	if (orig != data)
7126 		WREG32_PCIE(PCIE_P_CNTL, data);
7127 
7128 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7129 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7130 	data |= LC_PMI_TO_L1_DIS;
7131 	if (!disable_l0s)
7132 		data |= LC_L0S_INACTIVITY(7);
7133 
7134 	if (!disable_l1) {
7135 		data |= LC_L1_INACTIVITY(7);
7136 		data &= ~LC_PMI_TO_L1_DIS;
7137 		if (orig != data)
7138 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7139 
7140 		if (!disable_plloff_in_l1) {
7141 			bool clk_req_support;
7142 
7143 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7144 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7145 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7146 			if (orig != data)
7147 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7148 
7149 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7150 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7151 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7152 			if (orig != data)
7153 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7154 
7155 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7156 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7157 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7158 			if (orig != data)
7159 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7160 
7161 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7162 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7163 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7164 			if (orig != data)
7165 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7166 
7167 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7168 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7169 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7170 				if (orig != data)
7171 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7172 
7173 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7174 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7175 				if (orig != data)
7176 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7177 
7178 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7179 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7180 				if (orig != data)
7181 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7182 
7183 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7184 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7185 				if (orig != data)
7186 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7187 
7188 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7189 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7190 				if (orig != data)
7191 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7192 
7193 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7194 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7195 				if (orig != data)
7196 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7197 
7198 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7199 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7200 				if (orig != data)
7201 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7202 
7203 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7204 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7205 				if (orig != data)
7206 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7207 			}
7208 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7209 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7210 			data |= LC_DYN_LANES_PWR_STATE(3);
7211 			if (orig != data)
7212 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7213 
7214 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7215 			data &= ~LS2_EXIT_TIME_MASK;
7216 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7217 				data |= LS2_EXIT_TIME(5);
7218 			if (orig != data)
7219 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7220 
7221 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7222 			data &= ~LS2_EXIT_TIME_MASK;
7223 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7224 				data |= LS2_EXIT_TIME(5);
7225 			if (orig != data)
7226 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7227 
7228 			if (!disable_clkreq) {
7229 				struct pci_dev *root = rdev->pdev->bus->self;
7230 				u32 lnkcap;
7231 
7232 				clk_req_support = false;
7233 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7234 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7235 					clk_req_support = true;
7236 			} else {
7237 				clk_req_support = false;
7238 			}
7239 
7240 			if (clk_req_support) {
7241 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7242 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7243 				if (orig != data)
7244 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7245 
7246 				orig = data = RREG32(THM_CLK_CNTL);
7247 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7248 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7249 				if (orig != data)
7250 					WREG32(THM_CLK_CNTL, data);
7251 
7252 				orig = data = RREG32(MISC_CLK_CNTL);
7253 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7254 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7255 				if (orig != data)
7256 					WREG32(MISC_CLK_CNTL, data);
7257 
7258 				orig = data = RREG32(CG_CLKPIN_CNTL);
7259 				data &= ~BCLK_AS_XCLK;
7260 				if (orig != data)
7261 					WREG32(CG_CLKPIN_CNTL, data);
7262 
7263 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7264 				data &= ~FORCE_BIF_REFCLK_EN;
7265 				if (orig != data)
7266 					WREG32(CG_CLKPIN_CNTL_2, data);
7267 
7268 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7269 				data &= ~MPLL_CLKOUT_SEL_MASK;
7270 				data |= MPLL_CLKOUT_SEL(4);
7271 				if (orig != data)
7272 					WREG32(MPLL_BYPASSCLK_SEL, data);
7273 
7274 				orig = data = RREG32(SPLL_CNTL_MODE);
7275 				data &= ~SPLL_REFCLK_SEL_MASK;
7276 				if (orig != data)
7277 					WREG32(SPLL_CNTL_MODE, data);
7278 			}
7279 		}
7280 	} else {
7281 		if (orig != data)
7282 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7283 	}
7284 
7285 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7286 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7287 	if (orig != data)
7288 		WREG32_PCIE(PCIE_CNTL2, data);
7289 
7290 	if (!disable_l0s) {
7291 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7292 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7293 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7294 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7295 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7296 				data &= ~LC_L0S_INACTIVITY_MASK;
7297 				if (orig != data)
7298 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7299 			}
7300 		}
7301 	}
7302 }
7303