xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 5104d265)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79 
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82 	(0x8000 << 16) | (0x98f4 >> 2),
83 	0x00000000,
84 	(0x8040 << 16) | (0x98f4 >> 2),
85 	0x00000000,
86 	(0x8000 << 16) | (0xe80 >> 2),
87 	0x00000000,
88 	(0x8040 << 16) | (0xe80 >> 2),
89 	0x00000000,
90 	(0x8000 << 16) | (0x89bc >> 2),
91 	0x00000000,
92 	(0x8040 << 16) | (0x89bc >> 2),
93 	0x00000000,
94 	(0x8000 << 16) | (0x8c1c >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x8c1c >> 2),
97 	0x00000000,
98 	(0x9c00 << 16) | (0x98f0 >> 2),
99 	0x00000000,
100 	(0x9c00 << 16) | (0xe7c >> 2),
101 	0x00000000,
102 	(0x8000 << 16) | (0x9148 >> 2),
103 	0x00000000,
104 	(0x8040 << 16) | (0x9148 >> 2),
105 	0x00000000,
106 	(0x9c00 << 16) | (0x9150 >> 2),
107 	0x00000000,
108 	(0x9c00 << 16) | (0x897c >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x8d8c >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0xac54 >> 2),
113 	0X00000000,
114 	0x3,
115 	(0x9c00 << 16) | (0x98f8 >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x9910 >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0x9914 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9918 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x991c >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x9920 >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x9924 >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9928 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x992c >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9930 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x9934 >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9938 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x993c >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9940 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x9944 >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9948 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x994c >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9950 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x9954 >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9958 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x995c >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9960 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x9964 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9968 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x996c >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9970 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9974 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9978 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x997c >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9980 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9984 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9988 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x998c >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x8c00 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x8c14 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x8c04 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x8c08 >> 2),
188 	0x00000000,
189 	(0x8000 << 16) | (0x9b7c >> 2),
190 	0x00000000,
191 	(0x8040 << 16) | (0x9b7c >> 2),
192 	0x00000000,
193 	(0x8000 << 16) | (0xe84 >> 2),
194 	0x00000000,
195 	(0x8040 << 16) | (0xe84 >> 2),
196 	0x00000000,
197 	(0x8000 << 16) | (0x89c0 >> 2),
198 	0x00000000,
199 	(0x8040 << 16) | (0x89c0 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x914c >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x914c >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0x8c20 >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0x8c20 >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x9354 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x9354 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9060 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9364 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9100 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x913c >> 2),
220 	0x00000000,
221 	(0x8000 << 16) | (0x90e0 >> 2),
222 	0x00000000,
223 	(0x8000 << 16) | (0x90e4 >> 2),
224 	0x00000000,
225 	(0x8000 << 16) | (0x90e8 >> 2),
226 	0x00000000,
227 	(0x8040 << 16) | (0x90e0 >> 2),
228 	0x00000000,
229 	(0x8040 << 16) | (0x90e4 >> 2),
230 	0x00000000,
231 	(0x8040 << 16) | (0x90e8 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x8bcc >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8b24 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x88c4 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8e50 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c0c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8e58 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8e5c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9508 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x950c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9494 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0xac0c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0xac10 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0xac14 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0xae00 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0xac08 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x88d4 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x88c8 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x88cc >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x89b0 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8b10 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8a14 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9830 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9834 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9838 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x9a10 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x9870 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x9874 >> 2),
286 	0x00000000,
287 	(0x8001 << 16) | (0x9870 >> 2),
288 	0x00000000,
289 	(0x8001 << 16) | (0x9874 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x9870 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x9874 >> 2),
294 	0x00000000,
295 	(0x8041 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8041 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	0x00000000
300 };
301 
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304 	0xc424, 0xffffffff, 0x00601005,
305 	0xc47c, 0xffffffff, 0x10104040,
306 	0xc488, 0xffffffff, 0x0100000a,
307 	0xc314, 0xffffffff, 0x00000800,
308 	0xc30c, 0xffffffff, 0x800000f4,
309 	0xf4a8, 0xffffffff, 0x00000000
310 };
311 
312 static const u32 tahiti_golden_registers[] =
313 {
314 	0x9a10, 0x00010000, 0x00018208,
315 	0x9830, 0xffffffff, 0x00000000,
316 	0x9834, 0xf00fffff, 0x00000400,
317 	0x9838, 0x0002021c, 0x00020200,
318 	0xc78, 0x00000080, 0x00000000,
319 	0xd030, 0x000300c0, 0x00800040,
320 	0xd830, 0x000300c0, 0x00800040,
321 	0x5bb0, 0x000000f0, 0x00000070,
322 	0x5bc0, 0x00200000, 0x50100000,
323 	0x7030, 0x31000311, 0x00000011,
324 	0x277c, 0x00000003, 0x000007ff,
325 	0x240c, 0x000007ff, 0x00000000,
326 	0x8a14, 0xf000001f, 0x00000007,
327 	0x8b24, 0xffffffff, 0x00ffffff,
328 	0x8b10, 0x0000ff0f, 0x00000000,
329 	0x28a4c, 0x07ffffff, 0x4e000000,
330 	0x28350, 0x3f3f3fff, 0x2a00126a,
331 	0x30, 0x000000ff, 0x0040,
332 	0x34, 0x00000040, 0x00004040,
333 	0x9100, 0x07ffffff, 0x03000000,
334 	0x8e88, 0x01ff1f3f, 0x00000000,
335 	0x8e84, 0x01ff1f3f, 0x00000000,
336 	0x9060, 0x0000007f, 0x00000020,
337 	0x9508, 0x00010000, 0x00010000,
338 	0xac14, 0x00000200, 0x000002fb,
339 	0xac10, 0xffffffff, 0x0000543b,
340 	0xac0c, 0xffffffff, 0xa9210876,
341 	0x88d0, 0xffffffff, 0x000fff40,
342 	0x88d4, 0x0000001f, 0x00000010,
343 	0x1410, 0x20000000, 0x20fffed8,
344 	0x15c0, 0x000c0fc0, 0x000c0400
345 };
346 
347 static const u32 tahiti_golden_registers2[] =
348 {
349 	0xc64, 0x00000001, 0x00000001
350 };
351 
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354 	0xc424, 0xffffffff, 0x00601004,
355 	0xc47c, 0xffffffff, 0x10102020,
356 	0xc488, 0xffffffff, 0x01000020,
357 	0xc314, 0xffffffff, 0x00000800,
358 	0xc30c, 0xffffffff, 0x800000a4
359 };
360 
361 static const u32 pitcairn_golden_registers[] =
362 {
363 	0x9a10, 0x00010000, 0x00018208,
364 	0x9830, 0xffffffff, 0x00000000,
365 	0x9834, 0xf00fffff, 0x00000400,
366 	0x9838, 0x0002021c, 0x00020200,
367 	0xc78, 0x00000080, 0x00000000,
368 	0xd030, 0x000300c0, 0x00800040,
369 	0xd830, 0x000300c0, 0x00800040,
370 	0x5bb0, 0x000000f0, 0x00000070,
371 	0x5bc0, 0x00200000, 0x50100000,
372 	0x7030, 0x31000311, 0x00000011,
373 	0x2ae4, 0x00073ffe, 0x000022a2,
374 	0x240c, 0x000007ff, 0x00000000,
375 	0x8a14, 0xf000001f, 0x00000007,
376 	0x8b24, 0xffffffff, 0x00ffffff,
377 	0x8b10, 0x0000ff0f, 0x00000000,
378 	0x28a4c, 0x07ffffff, 0x4e000000,
379 	0x28350, 0x3f3f3fff, 0x2a00126a,
380 	0x30, 0x000000ff, 0x0040,
381 	0x34, 0x00000040, 0x00004040,
382 	0x9100, 0x07ffffff, 0x03000000,
383 	0x9060, 0x0000007f, 0x00000020,
384 	0x9508, 0x00010000, 0x00010000,
385 	0xac14, 0x000003ff, 0x000000f7,
386 	0xac10, 0xffffffff, 0x00000000,
387 	0xac0c, 0xffffffff, 0x32761054,
388 	0x88d4, 0x0000001f, 0x00000010,
389 	0x15c0, 0x000c0fc0, 0x000c0400
390 };
391 
392 static const u32 verde_golden_rlc_registers[] =
393 {
394 	0xc424, 0xffffffff, 0x033f1005,
395 	0xc47c, 0xffffffff, 0x10808020,
396 	0xc488, 0xffffffff, 0x00800008,
397 	0xc314, 0xffffffff, 0x00001000,
398 	0xc30c, 0xffffffff, 0x80010014
399 };
400 
401 static const u32 verde_golden_registers[] =
402 {
403 	0x9a10, 0x00010000, 0x00018208,
404 	0x9830, 0xffffffff, 0x00000000,
405 	0x9834, 0xf00fffff, 0x00000400,
406 	0x9838, 0x0002021c, 0x00020200,
407 	0xc78, 0x00000080, 0x00000000,
408 	0xd030, 0x000300c0, 0x00800040,
409 	0xd030, 0x000300c0, 0x00800040,
410 	0xd830, 0x000300c0, 0x00800040,
411 	0xd830, 0x000300c0, 0x00800040,
412 	0x5bb0, 0x000000f0, 0x00000070,
413 	0x5bc0, 0x00200000, 0x50100000,
414 	0x7030, 0x31000311, 0x00000011,
415 	0x2ae4, 0x00073ffe, 0x000022a2,
416 	0x2ae4, 0x00073ffe, 0x000022a2,
417 	0x2ae4, 0x00073ffe, 0x000022a2,
418 	0x240c, 0x000007ff, 0x00000000,
419 	0x240c, 0x000007ff, 0x00000000,
420 	0x240c, 0x000007ff, 0x00000000,
421 	0x8a14, 0xf000001f, 0x00000007,
422 	0x8a14, 0xf000001f, 0x00000007,
423 	0x8a14, 0xf000001f, 0x00000007,
424 	0x8b24, 0xffffffff, 0x00ffffff,
425 	0x8b10, 0x0000ff0f, 0x00000000,
426 	0x28a4c, 0x07ffffff, 0x4e000000,
427 	0x28350, 0x3f3f3fff, 0x0000124a,
428 	0x28350, 0x3f3f3fff, 0x0000124a,
429 	0x28350, 0x3f3f3fff, 0x0000124a,
430 	0x30, 0x000000ff, 0x0040,
431 	0x34, 0x00000040, 0x00004040,
432 	0x9100, 0x07ffffff, 0x03000000,
433 	0x9100, 0x07ffffff, 0x03000000,
434 	0x8e88, 0x01ff1f3f, 0x00000000,
435 	0x8e88, 0x01ff1f3f, 0x00000000,
436 	0x8e88, 0x01ff1f3f, 0x00000000,
437 	0x8e84, 0x01ff1f3f, 0x00000000,
438 	0x8e84, 0x01ff1f3f, 0x00000000,
439 	0x8e84, 0x01ff1f3f, 0x00000000,
440 	0x9060, 0x0000007f, 0x00000020,
441 	0x9508, 0x00010000, 0x00010000,
442 	0xac14, 0x000003ff, 0x00000003,
443 	0xac14, 0x000003ff, 0x00000003,
444 	0xac14, 0x000003ff, 0x00000003,
445 	0xac10, 0xffffffff, 0x00000000,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac10, 0xffffffff, 0x00000000,
448 	0xac0c, 0xffffffff, 0x00001032,
449 	0xac0c, 0xffffffff, 0x00001032,
450 	0xac0c, 0xffffffff, 0x00001032,
451 	0x88d4, 0x0000001f, 0x00000010,
452 	0x88d4, 0x0000001f, 0x00000010,
453 	0x88d4, 0x0000001f, 0x00000010,
454 	0x15c0, 0x000c0fc0, 0x000c0400
455 };
456 
457 static const u32 oland_golden_rlc_registers[] =
458 {
459 	0xc424, 0xffffffff, 0x00601005,
460 	0xc47c, 0xffffffff, 0x10104040,
461 	0xc488, 0xffffffff, 0x0100000a,
462 	0xc314, 0xffffffff, 0x00000800,
463 	0xc30c, 0xffffffff, 0x800000f4
464 };
465 
466 static const u32 oland_golden_registers[] =
467 {
468 	0x9a10, 0x00010000, 0x00018208,
469 	0x9830, 0xffffffff, 0x00000000,
470 	0x9834, 0xf00fffff, 0x00000400,
471 	0x9838, 0x0002021c, 0x00020200,
472 	0xc78, 0x00000080, 0x00000000,
473 	0xd030, 0x000300c0, 0x00800040,
474 	0xd830, 0x000300c0, 0x00800040,
475 	0x5bb0, 0x000000f0, 0x00000070,
476 	0x5bc0, 0x00200000, 0x50100000,
477 	0x7030, 0x31000311, 0x00000011,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x8a14, 0xf000001f, 0x00000007,
481 	0x8b24, 0xffffffff, 0x00ffffff,
482 	0x8b10, 0x0000ff0f, 0x00000000,
483 	0x28a4c, 0x07ffffff, 0x4e000000,
484 	0x28350, 0x3f3f3fff, 0x00000082,
485 	0x30, 0x000000ff, 0x0040,
486 	0x34, 0x00000040, 0x00004040,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x9060, 0x0000007f, 0x00000020,
489 	0x9508, 0x00010000, 0x00010000,
490 	0xac14, 0x000003ff, 0x000000f3,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac0c, 0xffffffff, 0x00003210,
493 	0x88d4, 0x0000001f, 0x00000010,
494 	0x15c0, 0x000c0fc0, 0x000c0400
495 };
496 
497 static const u32 hainan_golden_registers[] =
498 {
499 	0x9a10, 0x00010000, 0x00018208,
500 	0x9830, 0xffffffff, 0x00000000,
501 	0x9834, 0xf00fffff, 0x00000400,
502 	0x9838, 0x0002021c, 0x00020200,
503 	0xd0c0, 0xff000fff, 0x00000100,
504 	0xd030, 0x000300c0, 0x00800040,
505 	0xd8c0, 0xff000fff, 0x00000100,
506 	0xd830, 0x000300c0, 0x00800040,
507 	0x2ae4, 0x00073ffe, 0x000022a2,
508 	0x240c, 0x000007ff, 0x00000000,
509 	0x8a14, 0xf000001f, 0x00000007,
510 	0x8b24, 0xffffffff, 0x00ffffff,
511 	0x8b10, 0x0000ff0f, 0x00000000,
512 	0x28a4c, 0x07ffffff, 0x4e000000,
513 	0x28350, 0x3f3f3fff, 0x00000000,
514 	0x30, 0x000000ff, 0x0040,
515 	0x34, 0x00000040, 0x00004040,
516 	0x9100, 0x03e00000, 0x03600000,
517 	0x9060, 0x0000007f, 0x00000020,
518 	0x9508, 0x00010000, 0x00010000,
519 	0xac14, 0x000003ff, 0x000000f1,
520 	0xac10, 0xffffffff, 0x00000000,
521 	0xac0c, 0xffffffff, 0x00003210,
522 	0x88d4, 0x0000001f, 0x00000010,
523 	0x15c0, 0x000c0fc0, 0x000c0400
524 };
525 
526 static const u32 hainan_golden_registers2[] =
527 {
528 	0x98f8, 0xffffffff, 0x02010001
529 };
530 
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533 	0xc400, 0xffffffff, 0xfffffffc,
534 	0x802c, 0xffffffff, 0xe0000000,
535 	0x9a60, 0xffffffff, 0x00000100,
536 	0x92a4, 0xffffffff, 0x00000100,
537 	0xc164, 0xffffffff, 0x00000100,
538 	0x9774, 0xffffffff, 0x00000100,
539 	0x8984, 0xffffffff, 0x06000100,
540 	0x8a18, 0xffffffff, 0x00000100,
541 	0x92a0, 0xffffffff, 0x00000100,
542 	0xc380, 0xffffffff, 0x00000100,
543 	0x8b28, 0xffffffff, 0x00000100,
544 	0x9144, 0xffffffff, 0x00000100,
545 	0x8d88, 0xffffffff, 0x00000100,
546 	0x8d8c, 0xffffffff, 0x00000100,
547 	0x9030, 0xffffffff, 0x00000100,
548 	0x9034, 0xffffffff, 0x00000100,
549 	0x9038, 0xffffffff, 0x00000100,
550 	0x903c, 0xffffffff, 0x00000100,
551 	0xad80, 0xffffffff, 0x00000100,
552 	0xac54, 0xffffffff, 0x00000100,
553 	0x897c, 0xffffffff, 0x06000100,
554 	0x9868, 0xffffffff, 0x00000100,
555 	0x9510, 0xffffffff, 0x00000100,
556 	0xaf04, 0xffffffff, 0x00000100,
557 	0xae04, 0xffffffff, 0x00000100,
558 	0x949c, 0xffffffff, 0x00000100,
559 	0x802c, 0xffffffff, 0xe0000000,
560 	0x9160, 0xffffffff, 0x00010000,
561 	0x9164, 0xffffffff, 0x00030002,
562 	0x9168, 0xffffffff, 0x00040007,
563 	0x916c, 0xffffffff, 0x00060005,
564 	0x9170, 0xffffffff, 0x00090008,
565 	0x9174, 0xffffffff, 0x00020001,
566 	0x9178, 0xffffffff, 0x00040003,
567 	0x917c, 0xffffffff, 0x00000007,
568 	0x9180, 0xffffffff, 0x00060005,
569 	0x9184, 0xffffffff, 0x00090008,
570 	0x9188, 0xffffffff, 0x00030002,
571 	0x918c, 0xffffffff, 0x00050004,
572 	0x9190, 0xffffffff, 0x00000008,
573 	0x9194, 0xffffffff, 0x00070006,
574 	0x9198, 0xffffffff, 0x000a0009,
575 	0x919c, 0xffffffff, 0x00040003,
576 	0x91a0, 0xffffffff, 0x00060005,
577 	0x91a4, 0xffffffff, 0x00000009,
578 	0x91a8, 0xffffffff, 0x00080007,
579 	0x91ac, 0xffffffff, 0x000b000a,
580 	0x91b0, 0xffffffff, 0x00050004,
581 	0x91b4, 0xffffffff, 0x00070006,
582 	0x91b8, 0xffffffff, 0x0008000b,
583 	0x91bc, 0xffffffff, 0x000a0009,
584 	0x91c0, 0xffffffff, 0x000d000c,
585 	0x91c4, 0xffffffff, 0x00060005,
586 	0x91c8, 0xffffffff, 0x00080007,
587 	0x91cc, 0xffffffff, 0x0000000b,
588 	0x91d0, 0xffffffff, 0x000a0009,
589 	0x91d4, 0xffffffff, 0x000d000c,
590 	0x91d8, 0xffffffff, 0x00070006,
591 	0x91dc, 0xffffffff, 0x00090008,
592 	0x91e0, 0xffffffff, 0x0000000c,
593 	0x91e4, 0xffffffff, 0x000b000a,
594 	0x91e8, 0xffffffff, 0x000e000d,
595 	0x91ec, 0xffffffff, 0x00080007,
596 	0x91f0, 0xffffffff, 0x000a0009,
597 	0x91f4, 0xffffffff, 0x0000000d,
598 	0x91f8, 0xffffffff, 0x000c000b,
599 	0x91fc, 0xffffffff, 0x000f000e,
600 	0x9200, 0xffffffff, 0x00090008,
601 	0x9204, 0xffffffff, 0x000b000a,
602 	0x9208, 0xffffffff, 0x000c000f,
603 	0x920c, 0xffffffff, 0x000e000d,
604 	0x9210, 0xffffffff, 0x00110010,
605 	0x9214, 0xffffffff, 0x000a0009,
606 	0x9218, 0xffffffff, 0x000c000b,
607 	0x921c, 0xffffffff, 0x0000000f,
608 	0x9220, 0xffffffff, 0x000e000d,
609 	0x9224, 0xffffffff, 0x00110010,
610 	0x9228, 0xffffffff, 0x000b000a,
611 	0x922c, 0xffffffff, 0x000d000c,
612 	0x9230, 0xffffffff, 0x00000010,
613 	0x9234, 0xffffffff, 0x000f000e,
614 	0x9238, 0xffffffff, 0x00120011,
615 	0x923c, 0xffffffff, 0x000c000b,
616 	0x9240, 0xffffffff, 0x000e000d,
617 	0x9244, 0xffffffff, 0x00000011,
618 	0x9248, 0xffffffff, 0x0010000f,
619 	0x924c, 0xffffffff, 0x00130012,
620 	0x9250, 0xffffffff, 0x000d000c,
621 	0x9254, 0xffffffff, 0x000f000e,
622 	0x9258, 0xffffffff, 0x00100013,
623 	0x925c, 0xffffffff, 0x00120011,
624 	0x9260, 0xffffffff, 0x00150014,
625 	0x9264, 0xffffffff, 0x000e000d,
626 	0x9268, 0xffffffff, 0x0010000f,
627 	0x926c, 0xffffffff, 0x00000013,
628 	0x9270, 0xffffffff, 0x00120011,
629 	0x9274, 0xffffffff, 0x00150014,
630 	0x9278, 0xffffffff, 0x000f000e,
631 	0x927c, 0xffffffff, 0x00110010,
632 	0x9280, 0xffffffff, 0x00000014,
633 	0x9284, 0xffffffff, 0x00130012,
634 	0x9288, 0xffffffff, 0x00160015,
635 	0x928c, 0xffffffff, 0x0010000f,
636 	0x9290, 0xffffffff, 0x00120011,
637 	0x9294, 0xffffffff, 0x00000015,
638 	0x9298, 0xffffffff, 0x00140013,
639 	0x929c, 0xffffffff, 0x00170016,
640 	0x9150, 0xffffffff, 0x96940200,
641 	0x8708, 0xffffffff, 0x00900100,
642 	0xc478, 0xffffffff, 0x00000080,
643 	0xc404, 0xffffffff, 0x0020003f,
644 	0x30, 0xffffffff, 0x0000001c,
645 	0x34, 0x000f0000, 0x000f0000,
646 	0x160c, 0xffffffff, 0x00000100,
647 	0x1024, 0xffffffff, 0x00000100,
648 	0x102c, 0x00000101, 0x00000000,
649 	0x20a8, 0xffffffff, 0x00000104,
650 	0x264c, 0x000c0000, 0x000c0000,
651 	0x2648, 0x000c0000, 0x000c0000,
652 	0x55e4, 0xff000fff, 0x00000100,
653 	0x55e8, 0x00000001, 0x00000001,
654 	0x2f50, 0x00000001, 0x00000001,
655 	0x30cc, 0xc0000fff, 0x00000104,
656 	0xc1e4, 0x00000001, 0x00000001,
657 	0xd0c0, 0xfffffff0, 0x00000100,
658 	0xd8c0, 0xfffffff0, 0x00000100
659 };
660 
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663 	0xc400, 0xffffffff, 0xfffffffc,
664 	0x802c, 0xffffffff, 0xe0000000,
665 	0x9a60, 0xffffffff, 0x00000100,
666 	0x92a4, 0xffffffff, 0x00000100,
667 	0xc164, 0xffffffff, 0x00000100,
668 	0x9774, 0xffffffff, 0x00000100,
669 	0x8984, 0xffffffff, 0x06000100,
670 	0x8a18, 0xffffffff, 0x00000100,
671 	0x92a0, 0xffffffff, 0x00000100,
672 	0xc380, 0xffffffff, 0x00000100,
673 	0x8b28, 0xffffffff, 0x00000100,
674 	0x9144, 0xffffffff, 0x00000100,
675 	0x8d88, 0xffffffff, 0x00000100,
676 	0x8d8c, 0xffffffff, 0x00000100,
677 	0x9030, 0xffffffff, 0x00000100,
678 	0x9034, 0xffffffff, 0x00000100,
679 	0x9038, 0xffffffff, 0x00000100,
680 	0x903c, 0xffffffff, 0x00000100,
681 	0xad80, 0xffffffff, 0x00000100,
682 	0xac54, 0xffffffff, 0x00000100,
683 	0x897c, 0xffffffff, 0x06000100,
684 	0x9868, 0xffffffff, 0x00000100,
685 	0x9510, 0xffffffff, 0x00000100,
686 	0xaf04, 0xffffffff, 0x00000100,
687 	0xae04, 0xffffffff, 0x00000100,
688 	0x949c, 0xffffffff, 0x00000100,
689 	0x802c, 0xffffffff, 0xe0000000,
690 	0x9160, 0xffffffff, 0x00010000,
691 	0x9164, 0xffffffff, 0x00030002,
692 	0x9168, 0xffffffff, 0x00040007,
693 	0x916c, 0xffffffff, 0x00060005,
694 	0x9170, 0xffffffff, 0x00090008,
695 	0x9174, 0xffffffff, 0x00020001,
696 	0x9178, 0xffffffff, 0x00040003,
697 	0x917c, 0xffffffff, 0x00000007,
698 	0x9180, 0xffffffff, 0x00060005,
699 	0x9184, 0xffffffff, 0x00090008,
700 	0x9188, 0xffffffff, 0x00030002,
701 	0x918c, 0xffffffff, 0x00050004,
702 	0x9190, 0xffffffff, 0x00000008,
703 	0x9194, 0xffffffff, 0x00070006,
704 	0x9198, 0xffffffff, 0x000a0009,
705 	0x919c, 0xffffffff, 0x00040003,
706 	0x91a0, 0xffffffff, 0x00060005,
707 	0x91a4, 0xffffffff, 0x00000009,
708 	0x91a8, 0xffffffff, 0x00080007,
709 	0x91ac, 0xffffffff, 0x000b000a,
710 	0x91b0, 0xffffffff, 0x00050004,
711 	0x91b4, 0xffffffff, 0x00070006,
712 	0x91b8, 0xffffffff, 0x0008000b,
713 	0x91bc, 0xffffffff, 0x000a0009,
714 	0x91c0, 0xffffffff, 0x000d000c,
715 	0x9200, 0xffffffff, 0x00090008,
716 	0x9204, 0xffffffff, 0x000b000a,
717 	0x9208, 0xffffffff, 0x000c000f,
718 	0x920c, 0xffffffff, 0x000e000d,
719 	0x9210, 0xffffffff, 0x00110010,
720 	0x9214, 0xffffffff, 0x000a0009,
721 	0x9218, 0xffffffff, 0x000c000b,
722 	0x921c, 0xffffffff, 0x0000000f,
723 	0x9220, 0xffffffff, 0x000e000d,
724 	0x9224, 0xffffffff, 0x00110010,
725 	0x9228, 0xffffffff, 0x000b000a,
726 	0x922c, 0xffffffff, 0x000d000c,
727 	0x9230, 0xffffffff, 0x00000010,
728 	0x9234, 0xffffffff, 0x000f000e,
729 	0x9238, 0xffffffff, 0x00120011,
730 	0x923c, 0xffffffff, 0x000c000b,
731 	0x9240, 0xffffffff, 0x000e000d,
732 	0x9244, 0xffffffff, 0x00000011,
733 	0x9248, 0xffffffff, 0x0010000f,
734 	0x924c, 0xffffffff, 0x00130012,
735 	0x9250, 0xffffffff, 0x000d000c,
736 	0x9254, 0xffffffff, 0x000f000e,
737 	0x9258, 0xffffffff, 0x00100013,
738 	0x925c, 0xffffffff, 0x00120011,
739 	0x9260, 0xffffffff, 0x00150014,
740 	0x9150, 0xffffffff, 0x96940200,
741 	0x8708, 0xffffffff, 0x00900100,
742 	0xc478, 0xffffffff, 0x00000080,
743 	0xc404, 0xffffffff, 0x0020003f,
744 	0x30, 0xffffffff, 0x0000001c,
745 	0x34, 0x000f0000, 0x000f0000,
746 	0x160c, 0xffffffff, 0x00000100,
747 	0x1024, 0xffffffff, 0x00000100,
748 	0x102c, 0x00000101, 0x00000000,
749 	0x20a8, 0xffffffff, 0x00000104,
750 	0x55e4, 0xff000fff, 0x00000100,
751 	0x55e8, 0x00000001, 0x00000001,
752 	0x2f50, 0x00000001, 0x00000001,
753 	0x30cc, 0xc0000fff, 0x00000104,
754 	0xc1e4, 0x00000001, 0x00000001,
755 	0xd0c0, 0xfffffff0, 0x00000100,
756 	0xd8c0, 0xfffffff0, 0x00000100
757 };
758 
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761 	0xc400, 0xffffffff, 0xfffffffc,
762 	0x802c, 0xffffffff, 0xe0000000,
763 	0x9a60, 0xffffffff, 0x00000100,
764 	0x92a4, 0xffffffff, 0x00000100,
765 	0xc164, 0xffffffff, 0x00000100,
766 	0x9774, 0xffffffff, 0x00000100,
767 	0x8984, 0xffffffff, 0x06000100,
768 	0x8a18, 0xffffffff, 0x00000100,
769 	0x92a0, 0xffffffff, 0x00000100,
770 	0xc380, 0xffffffff, 0x00000100,
771 	0x8b28, 0xffffffff, 0x00000100,
772 	0x9144, 0xffffffff, 0x00000100,
773 	0x8d88, 0xffffffff, 0x00000100,
774 	0x8d8c, 0xffffffff, 0x00000100,
775 	0x9030, 0xffffffff, 0x00000100,
776 	0x9034, 0xffffffff, 0x00000100,
777 	0x9038, 0xffffffff, 0x00000100,
778 	0x903c, 0xffffffff, 0x00000100,
779 	0xad80, 0xffffffff, 0x00000100,
780 	0xac54, 0xffffffff, 0x00000100,
781 	0x897c, 0xffffffff, 0x06000100,
782 	0x9868, 0xffffffff, 0x00000100,
783 	0x9510, 0xffffffff, 0x00000100,
784 	0xaf04, 0xffffffff, 0x00000100,
785 	0xae04, 0xffffffff, 0x00000100,
786 	0x949c, 0xffffffff, 0x00000100,
787 	0x802c, 0xffffffff, 0xe0000000,
788 	0x9160, 0xffffffff, 0x00010000,
789 	0x9164, 0xffffffff, 0x00030002,
790 	0x9168, 0xffffffff, 0x00040007,
791 	0x916c, 0xffffffff, 0x00060005,
792 	0x9170, 0xffffffff, 0x00090008,
793 	0x9174, 0xffffffff, 0x00020001,
794 	0x9178, 0xffffffff, 0x00040003,
795 	0x917c, 0xffffffff, 0x00000007,
796 	0x9180, 0xffffffff, 0x00060005,
797 	0x9184, 0xffffffff, 0x00090008,
798 	0x9188, 0xffffffff, 0x00030002,
799 	0x918c, 0xffffffff, 0x00050004,
800 	0x9190, 0xffffffff, 0x00000008,
801 	0x9194, 0xffffffff, 0x00070006,
802 	0x9198, 0xffffffff, 0x000a0009,
803 	0x919c, 0xffffffff, 0x00040003,
804 	0x91a0, 0xffffffff, 0x00060005,
805 	0x91a4, 0xffffffff, 0x00000009,
806 	0x91a8, 0xffffffff, 0x00080007,
807 	0x91ac, 0xffffffff, 0x000b000a,
808 	0x91b0, 0xffffffff, 0x00050004,
809 	0x91b4, 0xffffffff, 0x00070006,
810 	0x91b8, 0xffffffff, 0x0008000b,
811 	0x91bc, 0xffffffff, 0x000a0009,
812 	0x91c0, 0xffffffff, 0x000d000c,
813 	0x9200, 0xffffffff, 0x00090008,
814 	0x9204, 0xffffffff, 0x000b000a,
815 	0x9208, 0xffffffff, 0x000c000f,
816 	0x920c, 0xffffffff, 0x000e000d,
817 	0x9210, 0xffffffff, 0x00110010,
818 	0x9214, 0xffffffff, 0x000a0009,
819 	0x9218, 0xffffffff, 0x000c000b,
820 	0x921c, 0xffffffff, 0x0000000f,
821 	0x9220, 0xffffffff, 0x000e000d,
822 	0x9224, 0xffffffff, 0x00110010,
823 	0x9228, 0xffffffff, 0x000b000a,
824 	0x922c, 0xffffffff, 0x000d000c,
825 	0x9230, 0xffffffff, 0x00000010,
826 	0x9234, 0xffffffff, 0x000f000e,
827 	0x9238, 0xffffffff, 0x00120011,
828 	0x923c, 0xffffffff, 0x000c000b,
829 	0x9240, 0xffffffff, 0x000e000d,
830 	0x9244, 0xffffffff, 0x00000011,
831 	0x9248, 0xffffffff, 0x0010000f,
832 	0x924c, 0xffffffff, 0x00130012,
833 	0x9250, 0xffffffff, 0x000d000c,
834 	0x9254, 0xffffffff, 0x000f000e,
835 	0x9258, 0xffffffff, 0x00100013,
836 	0x925c, 0xffffffff, 0x00120011,
837 	0x9260, 0xffffffff, 0x00150014,
838 	0x9150, 0xffffffff, 0x96940200,
839 	0x8708, 0xffffffff, 0x00900100,
840 	0xc478, 0xffffffff, 0x00000080,
841 	0xc404, 0xffffffff, 0x0020003f,
842 	0x30, 0xffffffff, 0x0000001c,
843 	0x34, 0x000f0000, 0x000f0000,
844 	0x160c, 0xffffffff, 0x00000100,
845 	0x1024, 0xffffffff, 0x00000100,
846 	0x102c, 0x00000101, 0x00000000,
847 	0x20a8, 0xffffffff, 0x00000104,
848 	0x264c, 0x000c0000, 0x000c0000,
849 	0x2648, 0x000c0000, 0x000c0000,
850 	0x55e4, 0xff000fff, 0x00000100,
851 	0x55e8, 0x00000001, 0x00000001,
852 	0x2f50, 0x00000001, 0x00000001,
853 	0x30cc, 0xc0000fff, 0x00000104,
854 	0xc1e4, 0x00000001, 0x00000001,
855 	0xd0c0, 0xfffffff0, 0x00000100,
856 	0xd8c0, 0xfffffff0, 0x00000100
857 };
858 
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861 	0xc400, 0xffffffff, 0xfffffffc,
862 	0x802c, 0xffffffff, 0xe0000000,
863 	0x9a60, 0xffffffff, 0x00000100,
864 	0x92a4, 0xffffffff, 0x00000100,
865 	0xc164, 0xffffffff, 0x00000100,
866 	0x9774, 0xffffffff, 0x00000100,
867 	0x8984, 0xffffffff, 0x06000100,
868 	0x8a18, 0xffffffff, 0x00000100,
869 	0x92a0, 0xffffffff, 0x00000100,
870 	0xc380, 0xffffffff, 0x00000100,
871 	0x8b28, 0xffffffff, 0x00000100,
872 	0x9144, 0xffffffff, 0x00000100,
873 	0x8d88, 0xffffffff, 0x00000100,
874 	0x8d8c, 0xffffffff, 0x00000100,
875 	0x9030, 0xffffffff, 0x00000100,
876 	0x9034, 0xffffffff, 0x00000100,
877 	0x9038, 0xffffffff, 0x00000100,
878 	0x903c, 0xffffffff, 0x00000100,
879 	0xad80, 0xffffffff, 0x00000100,
880 	0xac54, 0xffffffff, 0x00000100,
881 	0x897c, 0xffffffff, 0x06000100,
882 	0x9868, 0xffffffff, 0x00000100,
883 	0x9510, 0xffffffff, 0x00000100,
884 	0xaf04, 0xffffffff, 0x00000100,
885 	0xae04, 0xffffffff, 0x00000100,
886 	0x949c, 0xffffffff, 0x00000100,
887 	0x802c, 0xffffffff, 0xe0000000,
888 	0x9160, 0xffffffff, 0x00010000,
889 	0x9164, 0xffffffff, 0x00030002,
890 	0x9168, 0xffffffff, 0x00040007,
891 	0x916c, 0xffffffff, 0x00060005,
892 	0x9170, 0xffffffff, 0x00090008,
893 	0x9174, 0xffffffff, 0x00020001,
894 	0x9178, 0xffffffff, 0x00040003,
895 	0x917c, 0xffffffff, 0x00000007,
896 	0x9180, 0xffffffff, 0x00060005,
897 	0x9184, 0xffffffff, 0x00090008,
898 	0x9188, 0xffffffff, 0x00030002,
899 	0x918c, 0xffffffff, 0x00050004,
900 	0x9190, 0xffffffff, 0x00000008,
901 	0x9194, 0xffffffff, 0x00070006,
902 	0x9198, 0xffffffff, 0x000a0009,
903 	0x919c, 0xffffffff, 0x00040003,
904 	0x91a0, 0xffffffff, 0x00060005,
905 	0x91a4, 0xffffffff, 0x00000009,
906 	0x91a8, 0xffffffff, 0x00080007,
907 	0x91ac, 0xffffffff, 0x000b000a,
908 	0x91b0, 0xffffffff, 0x00050004,
909 	0x91b4, 0xffffffff, 0x00070006,
910 	0x91b8, 0xffffffff, 0x0008000b,
911 	0x91bc, 0xffffffff, 0x000a0009,
912 	0x91c0, 0xffffffff, 0x000d000c,
913 	0x91c4, 0xffffffff, 0x00060005,
914 	0x91c8, 0xffffffff, 0x00080007,
915 	0x91cc, 0xffffffff, 0x0000000b,
916 	0x91d0, 0xffffffff, 0x000a0009,
917 	0x91d4, 0xffffffff, 0x000d000c,
918 	0x9150, 0xffffffff, 0x96940200,
919 	0x8708, 0xffffffff, 0x00900100,
920 	0xc478, 0xffffffff, 0x00000080,
921 	0xc404, 0xffffffff, 0x0020003f,
922 	0x30, 0xffffffff, 0x0000001c,
923 	0x34, 0x000f0000, 0x000f0000,
924 	0x160c, 0xffffffff, 0x00000100,
925 	0x1024, 0xffffffff, 0x00000100,
926 	0x102c, 0x00000101, 0x00000000,
927 	0x20a8, 0xffffffff, 0x00000104,
928 	0x264c, 0x000c0000, 0x000c0000,
929 	0x2648, 0x000c0000, 0x000c0000,
930 	0x55e4, 0xff000fff, 0x00000100,
931 	0x55e8, 0x00000001, 0x00000001,
932 	0x2f50, 0x00000001, 0x00000001,
933 	0x30cc, 0xc0000fff, 0x00000104,
934 	0xc1e4, 0x00000001, 0x00000001,
935 	0xd0c0, 0xfffffff0, 0x00000100,
936 	0xd8c0, 0xfffffff0, 0x00000100
937 };
938 
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941 	0xc400, 0xffffffff, 0xfffffffc,
942 	0x802c, 0xffffffff, 0xe0000000,
943 	0x9a60, 0xffffffff, 0x00000100,
944 	0x92a4, 0xffffffff, 0x00000100,
945 	0xc164, 0xffffffff, 0x00000100,
946 	0x9774, 0xffffffff, 0x00000100,
947 	0x8984, 0xffffffff, 0x06000100,
948 	0x8a18, 0xffffffff, 0x00000100,
949 	0x92a0, 0xffffffff, 0x00000100,
950 	0xc380, 0xffffffff, 0x00000100,
951 	0x8b28, 0xffffffff, 0x00000100,
952 	0x9144, 0xffffffff, 0x00000100,
953 	0x8d88, 0xffffffff, 0x00000100,
954 	0x8d8c, 0xffffffff, 0x00000100,
955 	0x9030, 0xffffffff, 0x00000100,
956 	0x9034, 0xffffffff, 0x00000100,
957 	0x9038, 0xffffffff, 0x00000100,
958 	0x903c, 0xffffffff, 0x00000100,
959 	0xad80, 0xffffffff, 0x00000100,
960 	0xac54, 0xffffffff, 0x00000100,
961 	0x897c, 0xffffffff, 0x06000100,
962 	0x9868, 0xffffffff, 0x00000100,
963 	0x9510, 0xffffffff, 0x00000100,
964 	0xaf04, 0xffffffff, 0x00000100,
965 	0xae04, 0xffffffff, 0x00000100,
966 	0x949c, 0xffffffff, 0x00000100,
967 	0x802c, 0xffffffff, 0xe0000000,
968 	0x9160, 0xffffffff, 0x00010000,
969 	0x9164, 0xffffffff, 0x00030002,
970 	0x9168, 0xffffffff, 0x00040007,
971 	0x916c, 0xffffffff, 0x00060005,
972 	0x9170, 0xffffffff, 0x00090008,
973 	0x9174, 0xffffffff, 0x00020001,
974 	0x9178, 0xffffffff, 0x00040003,
975 	0x917c, 0xffffffff, 0x00000007,
976 	0x9180, 0xffffffff, 0x00060005,
977 	0x9184, 0xffffffff, 0x00090008,
978 	0x9188, 0xffffffff, 0x00030002,
979 	0x918c, 0xffffffff, 0x00050004,
980 	0x9190, 0xffffffff, 0x00000008,
981 	0x9194, 0xffffffff, 0x00070006,
982 	0x9198, 0xffffffff, 0x000a0009,
983 	0x919c, 0xffffffff, 0x00040003,
984 	0x91a0, 0xffffffff, 0x00060005,
985 	0x91a4, 0xffffffff, 0x00000009,
986 	0x91a8, 0xffffffff, 0x00080007,
987 	0x91ac, 0xffffffff, 0x000b000a,
988 	0x91b0, 0xffffffff, 0x00050004,
989 	0x91b4, 0xffffffff, 0x00070006,
990 	0x91b8, 0xffffffff, 0x0008000b,
991 	0x91bc, 0xffffffff, 0x000a0009,
992 	0x91c0, 0xffffffff, 0x000d000c,
993 	0x91c4, 0xffffffff, 0x00060005,
994 	0x91c8, 0xffffffff, 0x00080007,
995 	0x91cc, 0xffffffff, 0x0000000b,
996 	0x91d0, 0xffffffff, 0x000a0009,
997 	0x91d4, 0xffffffff, 0x000d000c,
998 	0x9150, 0xffffffff, 0x96940200,
999 	0x8708, 0xffffffff, 0x00900100,
1000 	0xc478, 0xffffffff, 0x00000080,
1001 	0xc404, 0xffffffff, 0x0020003f,
1002 	0x30, 0xffffffff, 0x0000001c,
1003 	0x34, 0x000f0000, 0x000f0000,
1004 	0x160c, 0xffffffff, 0x00000100,
1005 	0x1024, 0xffffffff, 0x00000100,
1006 	0x20a8, 0xffffffff, 0x00000104,
1007 	0x264c, 0x000c0000, 0x000c0000,
1008 	0x2648, 0x000c0000, 0x000c0000,
1009 	0x2f50, 0x00000001, 0x00000001,
1010 	0x30cc, 0xc0000fff, 0x00000104,
1011 	0xc1e4, 0x00000001, 0x00000001,
1012 	0xd0c0, 0xfffffff0, 0x00000100,
1013 	0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015 
1016 static u32 verde_pg_init[] =
1017 {
1018 	0x353c, 0xffffffff, 0x40000,
1019 	0x3538, 0xffffffff, 0x200010ff,
1020 	0x353c, 0xffffffff, 0x0,
1021 	0x353c, 0xffffffff, 0x0,
1022 	0x353c, 0xffffffff, 0x0,
1023 	0x353c, 0xffffffff, 0x0,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x7007,
1026 	0x3538, 0xffffffff, 0x300010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x400000,
1033 	0x3538, 0xffffffff, 0x100010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x120200,
1040 	0x3538, 0xffffffff, 0x500010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x1e1e16,
1047 	0x3538, 0xffffffff, 0x600010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x171f1e,
1054 	0x3538, 0xffffffff, 0x700010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x3538, 0xffffffff, 0x9ff,
1062 	0x3500, 0xffffffff, 0x0,
1063 	0x3504, 0xffffffff, 0x10000800,
1064 	0x3504, 0xffffffff, 0xf,
1065 	0x3504, 0xffffffff, 0xf,
1066 	0x3500, 0xffffffff, 0x4,
1067 	0x3504, 0xffffffff, 0x1000051e,
1068 	0x3504, 0xffffffff, 0xffff,
1069 	0x3504, 0xffffffff, 0xffff,
1070 	0x3500, 0xffffffff, 0x8,
1071 	0x3504, 0xffffffff, 0x80500,
1072 	0x3500, 0xffffffff, 0x12,
1073 	0x3504, 0xffffffff, 0x9050c,
1074 	0x3500, 0xffffffff, 0x1d,
1075 	0x3504, 0xffffffff, 0xb052c,
1076 	0x3500, 0xffffffff, 0x2a,
1077 	0x3504, 0xffffffff, 0x1053e,
1078 	0x3500, 0xffffffff, 0x2d,
1079 	0x3504, 0xffffffff, 0x10546,
1080 	0x3500, 0xffffffff, 0x30,
1081 	0x3504, 0xffffffff, 0xa054e,
1082 	0x3500, 0xffffffff, 0x3c,
1083 	0x3504, 0xffffffff, 0x1055f,
1084 	0x3500, 0xffffffff, 0x3f,
1085 	0x3504, 0xffffffff, 0x10567,
1086 	0x3500, 0xffffffff, 0x42,
1087 	0x3504, 0xffffffff, 0x1056f,
1088 	0x3500, 0xffffffff, 0x45,
1089 	0x3504, 0xffffffff, 0x10572,
1090 	0x3500, 0xffffffff, 0x48,
1091 	0x3504, 0xffffffff, 0x20575,
1092 	0x3500, 0xffffffff, 0x4c,
1093 	0x3504, 0xffffffff, 0x190801,
1094 	0x3500, 0xffffffff, 0x67,
1095 	0x3504, 0xffffffff, 0x1082a,
1096 	0x3500, 0xffffffff, 0x6a,
1097 	0x3504, 0xffffffff, 0x1b082d,
1098 	0x3500, 0xffffffff, 0x87,
1099 	0x3504, 0xffffffff, 0x310851,
1100 	0x3500, 0xffffffff, 0xba,
1101 	0x3504, 0xffffffff, 0x891,
1102 	0x3500, 0xffffffff, 0xbc,
1103 	0x3504, 0xffffffff, 0x893,
1104 	0x3500, 0xffffffff, 0xbe,
1105 	0x3504, 0xffffffff, 0x20895,
1106 	0x3500, 0xffffffff, 0xc2,
1107 	0x3504, 0xffffffff, 0x20899,
1108 	0x3500, 0xffffffff, 0xc6,
1109 	0x3504, 0xffffffff, 0x2089d,
1110 	0x3500, 0xffffffff, 0xca,
1111 	0x3504, 0xffffffff, 0x8a1,
1112 	0x3500, 0xffffffff, 0xcc,
1113 	0x3504, 0xffffffff, 0x8a3,
1114 	0x3500, 0xffffffff, 0xce,
1115 	0x3504, 0xffffffff, 0x308a5,
1116 	0x3500, 0xffffffff, 0xd3,
1117 	0x3504, 0xffffffff, 0x6d08cd,
1118 	0x3500, 0xffffffff, 0x142,
1119 	0x3504, 0xffffffff, 0x2000095a,
1120 	0x3504, 0xffffffff, 0x1,
1121 	0x3500, 0xffffffff, 0x144,
1122 	0x3504, 0xffffffff, 0x301f095b,
1123 	0x3500, 0xffffffff, 0x165,
1124 	0x3504, 0xffffffff, 0xc094d,
1125 	0x3500, 0xffffffff, 0x173,
1126 	0x3504, 0xffffffff, 0xf096d,
1127 	0x3500, 0xffffffff, 0x184,
1128 	0x3504, 0xffffffff, 0x15097f,
1129 	0x3500, 0xffffffff, 0x19b,
1130 	0x3504, 0xffffffff, 0xc0998,
1131 	0x3500, 0xffffffff, 0x1a9,
1132 	0x3504, 0xffffffff, 0x409a7,
1133 	0x3500, 0xffffffff, 0x1af,
1134 	0x3504, 0xffffffff, 0xcdc,
1135 	0x3500, 0xffffffff, 0x1b1,
1136 	0x3504, 0xffffffff, 0x800,
1137 	0x3508, 0xffffffff, 0x6c9b2000,
1138 	0x3510, 0xfc00, 0x2000,
1139 	0x3544, 0xffffffff, 0xfc0,
1140 	0x28d4, 0x00000100, 0x100
1141 };
1142 
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145 	switch (rdev->family) {
1146 	case CHIP_TAHITI:
1147 		radeon_program_register_sequence(rdev,
1148 						 tahiti_golden_registers,
1149 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150 		radeon_program_register_sequence(rdev,
1151 						 tahiti_golden_rlc_registers,
1152 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153 		radeon_program_register_sequence(rdev,
1154 						 tahiti_mgcg_cgcg_init,
1155 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156 		radeon_program_register_sequence(rdev,
1157 						 tahiti_golden_registers2,
1158 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159 		break;
1160 	case CHIP_PITCAIRN:
1161 		radeon_program_register_sequence(rdev,
1162 						 pitcairn_golden_registers,
1163 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164 		radeon_program_register_sequence(rdev,
1165 						 pitcairn_golden_rlc_registers,
1166 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167 		radeon_program_register_sequence(rdev,
1168 						 pitcairn_mgcg_cgcg_init,
1169 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170 		break;
1171 	case CHIP_VERDE:
1172 		radeon_program_register_sequence(rdev,
1173 						 verde_golden_registers,
1174 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1175 		radeon_program_register_sequence(rdev,
1176 						 verde_golden_rlc_registers,
1177 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178 		radeon_program_register_sequence(rdev,
1179 						 verde_mgcg_cgcg_init,
1180 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181 		radeon_program_register_sequence(rdev,
1182 						 verde_pg_init,
1183 						 (const u32)ARRAY_SIZE(verde_pg_init));
1184 		break;
1185 	case CHIP_OLAND:
1186 		radeon_program_register_sequence(rdev,
1187 						 oland_golden_registers,
1188 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1189 		radeon_program_register_sequence(rdev,
1190 						 oland_golden_rlc_registers,
1191 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192 		radeon_program_register_sequence(rdev,
1193 						 oland_mgcg_cgcg_init,
1194 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195 		break;
1196 	case CHIP_HAINAN:
1197 		radeon_program_register_sequence(rdev,
1198 						 hainan_golden_registers,
1199 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1200 		radeon_program_register_sequence(rdev,
1201 						 hainan_golden_registers2,
1202 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203 		radeon_program_register_sequence(rdev,
1204 						 hainan_mgcg_cgcg_init,
1205 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206 		break;
1207 	default:
1208 		break;
1209 	}
1210 }
1211 
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214 
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226 	u32 tmp;
1227 
1228 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1229 	if (tmp & MUX_TCLK_TO_XCLK)
1230 		return TCLK;
1231 
1232 	tmp = RREG32(CG_CLKPIN_CNTL);
1233 	if (tmp & XTALIN_DIVIDE)
1234 		return reference_clock / 4;
1235 
1236 	return reference_clock;
1237 }
1238 
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242 	u32 temp;
1243 	int actual_temp = 0;
1244 
1245 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246 		CTF_TEMP_SHIFT;
1247 
1248 	if (temp & 0x200)
1249 		actual_temp = 255;
1250 	else
1251 		actual_temp = temp & 0x1ff;
1252 
1253 	actual_temp = (actual_temp * 1000);
1254 
1255 	return actual_temp;
1256 }
1257 
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259 
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261 	{0x0000006f, 0x03044000},
1262 	{0x00000070, 0x0480c018},
1263 	{0x00000071, 0x00000040},
1264 	{0x00000072, 0x01000000},
1265 	{0x00000074, 0x000000ff},
1266 	{0x00000075, 0x00143400},
1267 	{0x00000076, 0x08ec0800},
1268 	{0x00000077, 0x040000cc},
1269 	{0x00000079, 0x00000000},
1270 	{0x0000007a, 0x21000409},
1271 	{0x0000007c, 0x00000000},
1272 	{0x0000007d, 0xe8000000},
1273 	{0x0000007e, 0x044408a8},
1274 	{0x0000007f, 0x00000003},
1275 	{0x00000080, 0x00000000},
1276 	{0x00000081, 0x01000000},
1277 	{0x00000082, 0x02000000},
1278 	{0x00000083, 0x00000000},
1279 	{0x00000084, 0xe3f3e4f4},
1280 	{0x00000085, 0x00052024},
1281 	{0x00000087, 0x00000000},
1282 	{0x00000088, 0x66036603},
1283 	{0x00000089, 0x01000000},
1284 	{0x0000008b, 0x1c0a0000},
1285 	{0x0000008c, 0xff010000},
1286 	{0x0000008e, 0xffffefff},
1287 	{0x0000008f, 0xfff3efff},
1288 	{0x00000090, 0xfff3efbf},
1289 	{0x00000094, 0x00101101},
1290 	{0x00000095, 0x00000fff},
1291 	{0x00000096, 0x00116fff},
1292 	{0x00000097, 0x60010000},
1293 	{0x00000098, 0x10010000},
1294 	{0x00000099, 0x00006000},
1295 	{0x0000009a, 0x00001000},
1296 	{0x0000009f, 0x00a77400}
1297 };
1298 
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300 	{0x0000006f, 0x03044000},
1301 	{0x00000070, 0x0480c018},
1302 	{0x00000071, 0x00000040},
1303 	{0x00000072, 0x01000000},
1304 	{0x00000074, 0x000000ff},
1305 	{0x00000075, 0x00143400},
1306 	{0x00000076, 0x08ec0800},
1307 	{0x00000077, 0x040000cc},
1308 	{0x00000079, 0x00000000},
1309 	{0x0000007a, 0x21000409},
1310 	{0x0000007c, 0x00000000},
1311 	{0x0000007d, 0xe8000000},
1312 	{0x0000007e, 0x044408a8},
1313 	{0x0000007f, 0x00000003},
1314 	{0x00000080, 0x00000000},
1315 	{0x00000081, 0x01000000},
1316 	{0x00000082, 0x02000000},
1317 	{0x00000083, 0x00000000},
1318 	{0x00000084, 0xe3f3e4f4},
1319 	{0x00000085, 0x00052024},
1320 	{0x00000087, 0x00000000},
1321 	{0x00000088, 0x66036603},
1322 	{0x00000089, 0x01000000},
1323 	{0x0000008b, 0x1c0a0000},
1324 	{0x0000008c, 0xff010000},
1325 	{0x0000008e, 0xffffefff},
1326 	{0x0000008f, 0xfff3efff},
1327 	{0x00000090, 0xfff3efbf},
1328 	{0x00000094, 0x00101101},
1329 	{0x00000095, 0x00000fff},
1330 	{0x00000096, 0x00116fff},
1331 	{0x00000097, 0x60010000},
1332 	{0x00000098, 0x10010000},
1333 	{0x00000099, 0x00006000},
1334 	{0x0000009a, 0x00001000},
1335 	{0x0000009f, 0x00a47400}
1336 };
1337 
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339 	{0x0000006f, 0x03044000},
1340 	{0x00000070, 0x0480c018},
1341 	{0x00000071, 0x00000040},
1342 	{0x00000072, 0x01000000},
1343 	{0x00000074, 0x000000ff},
1344 	{0x00000075, 0x00143400},
1345 	{0x00000076, 0x08ec0800},
1346 	{0x00000077, 0x040000cc},
1347 	{0x00000079, 0x00000000},
1348 	{0x0000007a, 0x21000409},
1349 	{0x0000007c, 0x00000000},
1350 	{0x0000007d, 0xe8000000},
1351 	{0x0000007e, 0x044408a8},
1352 	{0x0000007f, 0x00000003},
1353 	{0x00000080, 0x00000000},
1354 	{0x00000081, 0x01000000},
1355 	{0x00000082, 0x02000000},
1356 	{0x00000083, 0x00000000},
1357 	{0x00000084, 0xe3f3e4f4},
1358 	{0x00000085, 0x00052024},
1359 	{0x00000087, 0x00000000},
1360 	{0x00000088, 0x66036603},
1361 	{0x00000089, 0x01000000},
1362 	{0x0000008b, 0x1c0a0000},
1363 	{0x0000008c, 0xff010000},
1364 	{0x0000008e, 0xffffefff},
1365 	{0x0000008f, 0xfff3efff},
1366 	{0x00000090, 0xfff3efbf},
1367 	{0x00000094, 0x00101101},
1368 	{0x00000095, 0x00000fff},
1369 	{0x00000096, 0x00116fff},
1370 	{0x00000097, 0x60010000},
1371 	{0x00000098, 0x10010000},
1372 	{0x00000099, 0x00006000},
1373 	{0x0000009a, 0x00001000},
1374 	{0x0000009f, 0x00a37400}
1375 };
1376 
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378 	{0x0000006f, 0x03044000},
1379 	{0x00000070, 0x0480c018},
1380 	{0x00000071, 0x00000040},
1381 	{0x00000072, 0x01000000},
1382 	{0x00000074, 0x000000ff},
1383 	{0x00000075, 0x00143400},
1384 	{0x00000076, 0x08ec0800},
1385 	{0x00000077, 0x040000cc},
1386 	{0x00000079, 0x00000000},
1387 	{0x0000007a, 0x21000409},
1388 	{0x0000007c, 0x00000000},
1389 	{0x0000007d, 0xe8000000},
1390 	{0x0000007e, 0x044408a8},
1391 	{0x0000007f, 0x00000003},
1392 	{0x00000080, 0x00000000},
1393 	{0x00000081, 0x01000000},
1394 	{0x00000082, 0x02000000},
1395 	{0x00000083, 0x00000000},
1396 	{0x00000084, 0xe3f3e4f4},
1397 	{0x00000085, 0x00052024},
1398 	{0x00000087, 0x00000000},
1399 	{0x00000088, 0x66036603},
1400 	{0x00000089, 0x01000000},
1401 	{0x0000008b, 0x1c0a0000},
1402 	{0x0000008c, 0xff010000},
1403 	{0x0000008e, 0xffffefff},
1404 	{0x0000008f, 0xfff3efff},
1405 	{0x00000090, 0xfff3efbf},
1406 	{0x00000094, 0x00101101},
1407 	{0x00000095, 0x00000fff},
1408 	{0x00000096, 0x00116fff},
1409 	{0x00000097, 0x60010000},
1410 	{0x00000098, 0x10010000},
1411 	{0x00000099, 0x00006000},
1412 	{0x0000009a, 0x00001000},
1413 	{0x0000009f, 0x00a17730}
1414 };
1415 
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417 	{0x0000006f, 0x03044000},
1418 	{0x00000070, 0x0480c018},
1419 	{0x00000071, 0x00000040},
1420 	{0x00000072, 0x01000000},
1421 	{0x00000074, 0x000000ff},
1422 	{0x00000075, 0x00143400},
1423 	{0x00000076, 0x08ec0800},
1424 	{0x00000077, 0x040000cc},
1425 	{0x00000079, 0x00000000},
1426 	{0x0000007a, 0x21000409},
1427 	{0x0000007c, 0x00000000},
1428 	{0x0000007d, 0xe8000000},
1429 	{0x0000007e, 0x044408a8},
1430 	{0x0000007f, 0x00000003},
1431 	{0x00000080, 0x00000000},
1432 	{0x00000081, 0x01000000},
1433 	{0x00000082, 0x02000000},
1434 	{0x00000083, 0x00000000},
1435 	{0x00000084, 0xe3f3e4f4},
1436 	{0x00000085, 0x00052024},
1437 	{0x00000087, 0x00000000},
1438 	{0x00000088, 0x66036603},
1439 	{0x00000089, 0x01000000},
1440 	{0x0000008b, 0x1c0a0000},
1441 	{0x0000008c, 0xff010000},
1442 	{0x0000008e, 0xffffefff},
1443 	{0x0000008f, 0xfff3efff},
1444 	{0x00000090, 0xfff3efbf},
1445 	{0x00000094, 0x00101101},
1446 	{0x00000095, 0x00000fff},
1447 	{0x00000096, 0x00116fff},
1448 	{0x00000097, 0x60010000},
1449 	{0x00000098, 0x10010000},
1450 	{0x00000099, 0x00006000},
1451 	{0x0000009a, 0x00001000},
1452 	{0x0000009f, 0x00a07730}
1453 };
1454 
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458 	const __be32 *fw_data;
1459 	u32 running, blackout = 0;
1460 	u32 *io_mc_regs;
1461 	int i, ucode_size, regs_size;
1462 
1463 	if (!rdev->mc_fw)
1464 		return -EINVAL;
1465 
1466 	switch (rdev->family) {
1467 	case CHIP_TAHITI:
1468 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469 		ucode_size = SI_MC_UCODE_SIZE;
1470 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1471 		break;
1472 	case CHIP_PITCAIRN:
1473 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474 		ucode_size = SI_MC_UCODE_SIZE;
1475 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1476 		break;
1477 	case CHIP_VERDE:
1478 	default:
1479 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1480 		ucode_size = SI_MC_UCODE_SIZE;
1481 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1482 		break;
1483 	case CHIP_OLAND:
1484 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1485 		ucode_size = OLAND_MC_UCODE_SIZE;
1486 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1487 		break;
1488 	case CHIP_HAINAN:
1489 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490 		ucode_size = OLAND_MC_UCODE_SIZE;
1491 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1492 		break;
1493 	}
1494 
1495 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496 
1497 	if (running == 0) {
1498 		if (running) {
1499 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501 		}
1502 
1503 		/* reset the engine and set to writable */
1504 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506 
1507 		/* load mc io regs */
1508 		for (i = 0; i < regs_size; i++) {
1509 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511 		}
1512 		/* load the MC ucode */
1513 		fw_data = (const __be32 *)rdev->mc_fw->data;
1514 		for (i = 0; i < ucode_size; i++)
1515 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516 
1517 		/* put the engine back into the active state */
1518 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521 
1522 		/* wait for training to complete */
1523 		for (i = 0; i < rdev->usec_timeout; i++) {
1524 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525 				break;
1526 			udelay(1);
1527 		}
1528 		for (i = 0; i < rdev->usec_timeout; i++) {
1529 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530 				break;
1531 			udelay(1);
1532 		}
1533 
1534 		if (running)
1535 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536 	}
1537 
1538 	return 0;
1539 }
1540 
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543 	const char *chip_name;
1544 	const char *rlc_chip_name;
1545 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546 	size_t smc_req_size;
1547 	char fw_name[30];
1548 	int err;
1549 
1550 	DRM_DEBUG("\n");
1551 
1552 	switch (rdev->family) {
1553 	case CHIP_TAHITI:
1554 		chip_name = "TAHITI";
1555 		rlc_chip_name = "TAHITI";
1556 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1558 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1559 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1561 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562 		break;
1563 	case CHIP_PITCAIRN:
1564 		chip_name = "PITCAIRN";
1565 		rlc_chip_name = "PITCAIRN";
1566 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1568 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1569 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1571 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572 		break;
1573 	case CHIP_VERDE:
1574 		chip_name = "VERDE";
1575 		rlc_chip_name = "VERDE";
1576 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1578 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1579 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1581 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582 		break;
1583 	case CHIP_OLAND:
1584 		chip_name = "OLAND";
1585 		rlc_chip_name = "OLAND";
1586 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1588 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1589 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592 		break;
1593 	case CHIP_HAINAN:
1594 		chip_name = "HAINAN";
1595 		rlc_chip_name = "HAINAN";
1596 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1598 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1599 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602 		break;
1603 	default: BUG();
1604 	}
1605 
1606 	DRM_INFO("Loading %s Microcode\n", chip_name);
1607 
1608 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610 	if (err)
1611 		goto out;
1612 	if (rdev->pfp_fw->size != pfp_req_size) {
1613 		printk(KERN_ERR
1614 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615 		       rdev->pfp_fw->size, fw_name);
1616 		err = -EINVAL;
1617 		goto out;
1618 	}
1619 
1620 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622 	if (err)
1623 		goto out;
1624 	if (rdev->me_fw->size != me_req_size) {
1625 		printk(KERN_ERR
1626 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627 		       rdev->me_fw->size, fw_name);
1628 		err = -EINVAL;
1629 	}
1630 
1631 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633 	if (err)
1634 		goto out;
1635 	if (rdev->ce_fw->size != ce_req_size) {
1636 		printk(KERN_ERR
1637 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638 		       rdev->ce_fw->size, fw_name);
1639 		err = -EINVAL;
1640 	}
1641 
1642 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644 	if (err)
1645 		goto out;
1646 	if (rdev->rlc_fw->size != rlc_req_size) {
1647 		printk(KERN_ERR
1648 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649 		       rdev->rlc_fw->size, fw_name);
1650 		err = -EINVAL;
1651 	}
1652 
1653 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655 	if (err)
1656 		goto out;
1657 	if (rdev->mc_fw->size != mc_req_size) {
1658 		printk(KERN_ERR
1659 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660 		       rdev->mc_fw->size, fw_name);
1661 		err = -EINVAL;
1662 	}
1663 
1664 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666 	if (err)
1667 		goto out;
1668 	if (rdev->smc_fw->size != smc_req_size) {
1669 		printk(KERN_ERR
1670 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1671 		       rdev->smc_fw->size, fw_name);
1672 		err = -EINVAL;
1673 	}
1674 
1675 out:
1676 	if (err) {
1677 		if (err != -EINVAL)
1678 			printk(KERN_ERR
1679 			       "si_cp: Failed to load firmware \"%s\"\n",
1680 			       fw_name);
1681 		release_firmware(rdev->pfp_fw);
1682 		rdev->pfp_fw = NULL;
1683 		release_firmware(rdev->me_fw);
1684 		rdev->me_fw = NULL;
1685 		release_firmware(rdev->ce_fw);
1686 		rdev->ce_fw = NULL;
1687 		release_firmware(rdev->rlc_fw);
1688 		rdev->rlc_fw = NULL;
1689 		release_firmware(rdev->mc_fw);
1690 		rdev->mc_fw = NULL;
1691 		release_firmware(rdev->smc_fw);
1692 		rdev->smc_fw = NULL;
1693 	}
1694 	return err;
1695 }
1696 
1697 /* watermark setup */
1698 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1699 				   struct radeon_crtc *radeon_crtc,
1700 				   struct drm_display_mode *mode,
1701 				   struct drm_display_mode *other_mode)
1702 {
1703 	u32 tmp;
1704 	/*
1705 	 * Line Buffer Setup
1706 	 * There are 3 line buffers, each one shared by 2 display controllers.
1707 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1708 	 * the display controllers.  The paritioning is done via one of four
1709 	 * preset allocations specified in bits 21:20:
1710 	 *  0 - half lb
1711 	 *  2 - whole lb, other crtc must be disabled
1712 	 */
1713 	/* this can get tricky if we have two large displays on a paired group
1714 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1715 	 * non-linked crtcs for maximum line buffer allocation.
1716 	 */
1717 	if (radeon_crtc->base.enabled && mode) {
1718 		if (other_mode)
1719 			tmp = 0; /* 1/2 */
1720 		else
1721 			tmp = 2; /* whole */
1722 	} else
1723 		tmp = 0;
1724 
1725 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1726 	       DC_LB_MEMORY_CONFIG(tmp));
1727 
1728 	if (radeon_crtc->base.enabled && mode) {
1729 		switch (tmp) {
1730 		case 0:
1731 		default:
1732 			return 4096 * 2;
1733 		case 2:
1734 			return 8192 * 2;
1735 		}
1736 	}
1737 
1738 	/* controller not enabled, so no lb used */
1739 	return 0;
1740 }
1741 
1742 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1743 {
1744 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1745 
1746 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1747 	case 0:
1748 	default:
1749 		return 1;
1750 	case 1:
1751 		return 2;
1752 	case 2:
1753 		return 4;
1754 	case 3:
1755 		return 8;
1756 	case 4:
1757 		return 3;
1758 	case 5:
1759 		return 6;
1760 	case 6:
1761 		return 10;
1762 	case 7:
1763 		return 12;
1764 	case 8:
1765 		return 16;
1766 	}
1767 }
1768 
1769 struct dce6_wm_params {
1770 	u32 dram_channels; /* number of dram channels */
1771 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1772 	u32 sclk;          /* engine clock in kHz */
1773 	u32 disp_clk;      /* display clock in kHz */
1774 	u32 src_width;     /* viewport width */
1775 	u32 active_time;   /* active display time in ns */
1776 	u32 blank_time;    /* blank time in ns */
1777 	bool interlaced;    /* mode is interlaced */
1778 	fixed20_12 vsc;    /* vertical scale ratio */
1779 	u32 num_heads;     /* number of active crtcs */
1780 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1781 	u32 lb_size;       /* line buffer allocated to pipe */
1782 	u32 vtaps;         /* vertical scaler taps */
1783 };
1784 
1785 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1786 {
1787 	/* Calculate raw DRAM Bandwidth */
1788 	fixed20_12 dram_efficiency; /* 0.7 */
1789 	fixed20_12 yclk, dram_channels, bandwidth;
1790 	fixed20_12 a;
1791 
1792 	a.full = dfixed_const(1000);
1793 	yclk.full = dfixed_const(wm->yclk);
1794 	yclk.full = dfixed_div(yclk, a);
1795 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1796 	a.full = dfixed_const(10);
1797 	dram_efficiency.full = dfixed_const(7);
1798 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1799 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1800 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1801 
1802 	return dfixed_trunc(bandwidth);
1803 }
1804 
1805 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1806 {
1807 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1808 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1809 	fixed20_12 yclk, dram_channels, bandwidth;
1810 	fixed20_12 a;
1811 
1812 	a.full = dfixed_const(1000);
1813 	yclk.full = dfixed_const(wm->yclk);
1814 	yclk.full = dfixed_div(yclk, a);
1815 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1816 	a.full = dfixed_const(10);
1817 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1818 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1819 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1820 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1821 
1822 	return dfixed_trunc(bandwidth);
1823 }
1824 
1825 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1826 {
1827 	/* Calculate the display Data return Bandwidth */
1828 	fixed20_12 return_efficiency; /* 0.8 */
1829 	fixed20_12 sclk, bandwidth;
1830 	fixed20_12 a;
1831 
1832 	a.full = dfixed_const(1000);
1833 	sclk.full = dfixed_const(wm->sclk);
1834 	sclk.full = dfixed_div(sclk, a);
1835 	a.full = dfixed_const(10);
1836 	return_efficiency.full = dfixed_const(8);
1837 	return_efficiency.full = dfixed_div(return_efficiency, a);
1838 	a.full = dfixed_const(32);
1839 	bandwidth.full = dfixed_mul(a, sclk);
1840 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1841 
1842 	return dfixed_trunc(bandwidth);
1843 }
1844 
1845 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1846 {
1847 	return 32;
1848 }
1849 
1850 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1851 {
1852 	/* Calculate the DMIF Request Bandwidth */
1853 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1854 	fixed20_12 disp_clk, sclk, bandwidth;
1855 	fixed20_12 a, b1, b2;
1856 	u32 min_bandwidth;
1857 
1858 	a.full = dfixed_const(1000);
1859 	disp_clk.full = dfixed_const(wm->disp_clk);
1860 	disp_clk.full = dfixed_div(disp_clk, a);
1861 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1862 	b1.full = dfixed_mul(a, disp_clk);
1863 
1864 	a.full = dfixed_const(1000);
1865 	sclk.full = dfixed_const(wm->sclk);
1866 	sclk.full = dfixed_div(sclk, a);
1867 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1868 	b2.full = dfixed_mul(a, sclk);
1869 
1870 	a.full = dfixed_const(10);
1871 	disp_clk_request_efficiency.full = dfixed_const(8);
1872 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1873 
1874 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1875 
1876 	a.full = dfixed_const(min_bandwidth);
1877 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1878 
1879 	return dfixed_trunc(bandwidth);
1880 }
1881 
1882 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1883 {
1884 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1885 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1886 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1887 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1888 
1889 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1890 }
1891 
1892 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1893 {
1894 	/* Calculate the display mode Average Bandwidth
1895 	 * DisplayMode should contain the source and destination dimensions,
1896 	 * timing, etc.
1897 	 */
1898 	fixed20_12 bpp;
1899 	fixed20_12 line_time;
1900 	fixed20_12 src_width;
1901 	fixed20_12 bandwidth;
1902 	fixed20_12 a;
1903 
1904 	a.full = dfixed_const(1000);
1905 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1906 	line_time.full = dfixed_div(line_time, a);
1907 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1908 	src_width.full = dfixed_const(wm->src_width);
1909 	bandwidth.full = dfixed_mul(src_width, bpp);
1910 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1911 	bandwidth.full = dfixed_div(bandwidth, line_time);
1912 
1913 	return dfixed_trunc(bandwidth);
1914 }
1915 
1916 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1917 {
1918 	/* First calcualte the latency in ns */
1919 	u32 mc_latency = 2000; /* 2000 ns. */
1920 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1921 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1922 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1923 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1924 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1925 		(wm->num_heads * cursor_line_pair_return_time);
1926 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1927 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1928 	u32 tmp, dmif_size = 12288;
1929 	fixed20_12 a, b, c;
1930 
1931 	if (wm->num_heads == 0)
1932 		return 0;
1933 
1934 	a.full = dfixed_const(2);
1935 	b.full = dfixed_const(1);
1936 	if ((wm->vsc.full > a.full) ||
1937 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1938 	    (wm->vtaps >= 5) ||
1939 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1940 		max_src_lines_per_dst_line = 4;
1941 	else
1942 		max_src_lines_per_dst_line = 2;
1943 
1944 	a.full = dfixed_const(available_bandwidth);
1945 	b.full = dfixed_const(wm->num_heads);
1946 	a.full = dfixed_div(a, b);
1947 
1948 	b.full = dfixed_const(mc_latency + 512);
1949 	c.full = dfixed_const(wm->disp_clk);
1950 	b.full = dfixed_div(b, c);
1951 
1952 	c.full = dfixed_const(dmif_size);
1953 	b.full = dfixed_div(c, b);
1954 
1955 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1956 
1957 	b.full = dfixed_const(1000);
1958 	c.full = dfixed_const(wm->disp_clk);
1959 	b.full = dfixed_div(c, b);
1960 	c.full = dfixed_const(wm->bytes_per_pixel);
1961 	b.full = dfixed_mul(b, c);
1962 
1963 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1964 
1965 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1966 	b.full = dfixed_const(1000);
1967 	c.full = dfixed_const(lb_fill_bw);
1968 	b.full = dfixed_div(c, b);
1969 	a.full = dfixed_div(a, b);
1970 	line_fill_time = dfixed_trunc(a);
1971 
1972 	if (line_fill_time < wm->active_time)
1973 		return latency;
1974 	else
1975 		return latency + (line_fill_time - wm->active_time);
1976 
1977 }
1978 
1979 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1980 {
1981 	if (dce6_average_bandwidth(wm) <=
1982 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1983 		return true;
1984 	else
1985 		return false;
1986 };
1987 
1988 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1989 {
1990 	if (dce6_average_bandwidth(wm) <=
1991 	    (dce6_available_bandwidth(wm) / wm->num_heads))
1992 		return true;
1993 	else
1994 		return false;
1995 };
1996 
1997 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1998 {
1999 	u32 lb_partitions = wm->lb_size / wm->src_width;
2000 	u32 line_time = wm->active_time + wm->blank_time;
2001 	u32 latency_tolerant_lines;
2002 	u32 latency_hiding;
2003 	fixed20_12 a;
2004 
2005 	a.full = dfixed_const(1);
2006 	if (wm->vsc.full > a.full)
2007 		latency_tolerant_lines = 1;
2008 	else {
2009 		if (lb_partitions <= (wm->vtaps + 1))
2010 			latency_tolerant_lines = 1;
2011 		else
2012 			latency_tolerant_lines = 2;
2013 	}
2014 
2015 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2016 
2017 	if (dce6_latency_watermark(wm) <= latency_hiding)
2018 		return true;
2019 	else
2020 		return false;
2021 }
2022 
2023 static void dce6_program_watermarks(struct radeon_device *rdev,
2024 					 struct radeon_crtc *radeon_crtc,
2025 					 u32 lb_size, u32 num_heads)
2026 {
2027 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2028 	struct dce6_wm_params wm_low, wm_high;
2029 	u32 dram_channels;
2030 	u32 pixel_period;
2031 	u32 line_time = 0;
2032 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2033 	u32 priority_a_mark = 0, priority_b_mark = 0;
2034 	u32 priority_a_cnt = PRIORITY_OFF;
2035 	u32 priority_b_cnt = PRIORITY_OFF;
2036 	u32 tmp, arb_control3;
2037 	fixed20_12 a, b, c;
2038 
2039 	if (radeon_crtc->base.enabled && num_heads && mode) {
2040 		pixel_period = 1000000 / (u32)mode->clock;
2041 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2042 		priority_a_cnt = 0;
2043 		priority_b_cnt = 0;
2044 
2045 		if (rdev->family == CHIP_ARUBA)
2046 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2047 		else
2048 			dram_channels = si_get_number_of_dram_channels(rdev);
2049 
2050 		/* watermark for high clocks */
2051 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2052 			wm_high.yclk =
2053 				radeon_dpm_get_mclk(rdev, false) * 10;
2054 			wm_high.sclk =
2055 				radeon_dpm_get_sclk(rdev, false) * 10;
2056 		} else {
2057 			wm_high.yclk = rdev->pm.current_mclk * 10;
2058 			wm_high.sclk = rdev->pm.current_sclk * 10;
2059 		}
2060 
2061 		wm_high.disp_clk = mode->clock;
2062 		wm_high.src_width = mode->crtc_hdisplay;
2063 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2064 		wm_high.blank_time = line_time - wm_high.active_time;
2065 		wm_high.interlaced = false;
2066 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2067 			wm_high.interlaced = true;
2068 		wm_high.vsc = radeon_crtc->vsc;
2069 		wm_high.vtaps = 1;
2070 		if (radeon_crtc->rmx_type != RMX_OFF)
2071 			wm_high.vtaps = 2;
2072 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2073 		wm_high.lb_size = lb_size;
2074 		wm_high.dram_channels = dram_channels;
2075 		wm_high.num_heads = num_heads;
2076 
2077 		/* watermark for low clocks */
2078 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079 			wm_low.yclk =
2080 				radeon_dpm_get_mclk(rdev, true) * 10;
2081 			wm_low.sclk =
2082 				radeon_dpm_get_sclk(rdev, true) * 10;
2083 		} else {
2084 			wm_low.yclk = rdev->pm.current_mclk * 10;
2085 			wm_low.sclk = rdev->pm.current_sclk * 10;
2086 		}
2087 
2088 		wm_low.disp_clk = mode->clock;
2089 		wm_low.src_width = mode->crtc_hdisplay;
2090 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2091 		wm_low.blank_time = line_time - wm_low.active_time;
2092 		wm_low.interlaced = false;
2093 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094 			wm_low.interlaced = true;
2095 		wm_low.vsc = radeon_crtc->vsc;
2096 		wm_low.vtaps = 1;
2097 		if (radeon_crtc->rmx_type != RMX_OFF)
2098 			wm_low.vtaps = 2;
2099 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100 		wm_low.lb_size = lb_size;
2101 		wm_low.dram_channels = dram_channels;
2102 		wm_low.num_heads = num_heads;
2103 
2104 		/* set for high clocks */
2105 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2106 		/* set for low clocks */
2107 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2108 
2109 		/* possibly force display priority to high */
2110 		/* should really do this at mode validation time... */
2111 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2112 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2113 		    !dce6_check_latency_hiding(&wm_high) ||
2114 		    (rdev->disp_priority == 2)) {
2115 			DRM_DEBUG_KMS("force priority to high\n");
2116 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2117 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2118 		}
2119 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2120 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2121 		    !dce6_check_latency_hiding(&wm_low) ||
2122 		    (rdev->disp_priority == 2)) {
2123 			DRM_DEBUG_KMS("force priority to high\n");
2124 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2125 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2126 		}
2127 
2128 		a.full = dfixed_const(1000);
2129 		b.full = dfixed_const(mode->clock);
2130 		b.full = dfixed_div(b, a);
2131 		c.full = dfixed_const(latency_watermark_a);
2132 		c.full = dfixed_mul(c, b);
2133 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2134 		c.full = dfixed_div(c, a);
2135 		a.full = dfixed_const(16);
2136 		c.full = dfixed_div(c, a);
2137 		priority_a_mark = dfixed_trunc(c);
2138 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2139 
2140 		a.full = dfixed_const(1000);
2141 		b.full = dfixed_const(mode->clock);
2142 		b.full = dfixed_div(b, a);
2143 		c.full = dfixed_const(latency_watermark_b);
2144 		c.full = dfixed_mul(c, b);
2145 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2146 		c.full = dfixed_div(c, a);
2147 		a.full = dfixed_const(16);
2148 		c.full = dfixed_div(c, a);
2149 		priority_b_mark = dfixed_trunc(c);
2150 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2151 	}
2152 
2153 	/* select wm A */
2154 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2155 	tmp = arb_control3;
2156 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2157 	tmp |= LATENCY_WATERMARK_MASK(1);
2158 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2159 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2160 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2161 		LATENCY_HIGH_WATERMARK(line_time)));
2162 	/* select wm B */
2163 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2164 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2165 	tmp |= LATENCY_WATERMARK_MASK(2);
2166 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2167 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2168 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2169 		LATENCY_HIGH_WATERMARK(line_time)));
2170 	/* restore original selection */
2171 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2172 
2173 	/* write the priority marks */
2174 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2175 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2176 
2177 	/* save values for DPM */
2178 	radeon_crtc->line_time = line_time;
2179 	radeon_crtc->wm_high = latency_watermark_a;
2180 	radeon_crtc->wm_low = latency_watermark_b;
2181 }
2182 
2183 void dce6_bandwidth_update(struct radeon_device *rdev)
2184 {
2185 	struct drm_display_mode *mode0 = NULL;
2186 	struct drm_display_mode *mode1 = NULL;
2187 	u32 num_heads = 0, lb_size;
2188 	int i;
2189 
2190 	radeon_update_display_priority(rdev);
2191 
2192 	for (i = 0; i < rdev->num_crtc; i++) {
2193 		if (rdev->mode_info.crtcs[i]->base.enabled)
2194 			num_heads++;
2195 	}
2196 	for (i = 0; i < rdev->num_crtc; i += 2) {
2197 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2198 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2199 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2200 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2201 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2202 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2203 	}
2204 }
2205 
2206 /*
2207  * Core functions
2208  */
2209 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2210 {
2211 	const u32 num_tile_mode_states = 32;
2212 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2213 
2214 	switch (rdev->config.si.mem_row_size_in_kb) {
2215 	case 1:
2216 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2217 		break;
2218 	case 2:
2219 	default:
2220 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2221 		break;
2222 	case 4:
2223 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2224 		break;
2225 	}
2226 
2227 	if ((rdev->family == CHIP_TAHITI) ||
2228 	    (rdev->family == CHIP_PITCAIRN)) {
2229 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2230 			switch (reg_offset) {
2231 			case 0:  /* non-AA compressed depth or any compressed stencil */
2232 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2234 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2235 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2236 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2237 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2240 				break;
2241 			case 1:  /* 2xAA/4xAA compressed depth only */
2242 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2245 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2246 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2247 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2250 				break;
2251 			case 2:  /* 8xAA compressed depth only */
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2255 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2257 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2260 				break;
2261 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2262 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2264 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2265 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2267 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2270 				break;
2271 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2277 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280 				break;
2281 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2282 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285 						 TILE_SPLIT(split_equal_to_row_size) |
2286 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2287 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290 				break;
2291 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2292 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295 						 TILE_SPLIT(split_equal_to_row_size) |
2296 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2297 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2300 				break;
2301 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305 						 TILE_SPLIT(split_equal_to_row_size) |
2306 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2307 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310 				break;
2311 			case 8:  /* 1D and 1D Array Surfaces */
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2313 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2314 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2317 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320 				break;
2321 			case 9:  /* Displayable maps. */
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2327 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330 				break;
2331 			case 10:  /* Display 8bpp. */
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2336 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2337 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2340 				break;
2341 			case 11:  /* Display 16bpp. */
2342 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2347 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350 				break;
2351 			case 12:  /* Display 32bpp. */
2352 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2356 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2357 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2360 				break;
2361 			case 13:  /* Thin. */
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2364 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2367 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370 				break;
2371 			case 14:  /* Thin 8 bpp. */
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2377 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2380 				break;
2381 			case 15:  /* Thin 16 bpp. */
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2384 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2387 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2390 				break;
2391 			case 16:  /* Thin 32 bpp. */
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2397 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400 				break;
2401 			case 17:  /* Thin 64 bpp. */
2402 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405 						 TILE_SPLIT(split_equal_to_row_size) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2407 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2410 				break;
2411 			case 21:  /* 8 bpp PRT. */
2412 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2417 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2418 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2420 				break;
2421 			case 22:  /* 16 bpp PRT */
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2427 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2430 				break;
2431 			case 23:  /* 32 bpp PRT */
2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2436 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2437 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440 				break;
2441 			case 24:  /* 64 bpp PRT */
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2447 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450 				break;
2451 			case 25:  /* 128 bpp PRT */
2452 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2456 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2457 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2460 				break;
2461 			default:
2462 				gb_tile_moden = 0;
2463 				break;
2464 			}
2465 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2466 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2467 		}
2468 	} else if ((rdev->family == CHIP_VERDE) ||
2469 		   (rdev->family == CHIP_OLAND) ||
2470 		   (rdev->family == CHIP_HAINAN)) {
2471 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2472 			switch (reg_offset) {
2473 			case 0:  /* non-AA compressed depth or any compressed stencil */
2474 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2476 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2477 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2478 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2479 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2482 				break;
2483 			case 1:  /* 2xAA/4xAA compressed depth only */
2484 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2486 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2487 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2489 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2492 				break;
2493 			case 2:  /* 8xAA compressed depth only */
2494 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2497 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2498 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2499 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2502 				break;
2503 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2504 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2506 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2507 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2508 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2509 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2512 				break;
2513 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2514 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2519 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522 				break;
2523 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2524 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527 						 TILE_SPLIT(split_equal_to_row_size) |
2528 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2529 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2531 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532 				break;
2533 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2534 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537 						 TILE_SPLIT(split_equal_to_row_size) |
2538 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2539 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542 				break;
2543 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2544 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547 						 TILE_SPLIT(split_equal_to_row_size) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2549 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552 				break;
2553 			case 8:  /* 1D and 1D Array Surfaces */
2554 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2559 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562 				break;
2563 			case 9:  /* Displayable maps. */
2564 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2565 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2569 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572 				break;
2573 			case 10:  /* Display 8bpp. */
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2579 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2582 				break;
2583 			case 11:  /* Display 16bpp. */
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2588 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2589 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2592 				break;
2593 			case 12:  /* Display 32bpp. */
2594 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2599 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602 				break;
2603 			case 13:  /* Thin. */
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2606 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2609 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612 				break;
2613 			case 14:  /* Thin 8 bpp. */
2614 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2616 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2619 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622 				break;
2623 			case 15:  /* Thin 16 bpp. */
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2629 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632 				break;
2633 			case 16:  /* Thin 32 bpp. */
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2636 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2639 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642 				break;
2643 			case 17:  /* Thin 64 bpp. */
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647 						 TILE_SPLIT(split_equal_to_row_size) |
2648 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2649 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652 				break;
2653 			case 21:  /* 8 bpp PRT. */
2654 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2659 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2660 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662 				break;
2663 			case 22:  /* 16 bpp PRT */
2664 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2669 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2672 				break;
2673 			case 23:  /* 32 bpp PRT */
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2678 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2679 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682 				break;
2683 			case 24:  /* 64 bpp PRT */
2684 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2687 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2688 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2689 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692 				break;
2693 			case 25:  /* 128 bpp PRT */
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2698 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2699 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2702 				break;
2703 			default:
2704 				gb_tile_moden = 0;
2705 				break;
2706 			}
2707 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2708 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2709 		}
2710 	} else
2711 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2712 }
2713 
2714 static void si_select_se_sh(struct radeon_device *rdev,
2715 			    u32 se_num, u32 sh_num)
2716 {
2717 	u32 data = INSTANCE_BROADCAST_WRITES;
2718 
2719 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2720 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2721 	else if (se_num == 0xffffffff)
2722 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2723 	else if (sh_num == 0xffffffff)
2724 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2725 	else
2726 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2727 	WREG32(GRBM_GFX_INDEX, data);
2728 }
2729 
2730 static u32 si_create_bitmask(u32 bit_width)
2731 {
2732 	u32 i, mask = 0;
2733 
2734 	for (i = 0; i < bit_width; i++) {
2735 		mask <<= 1;
2736 		mask |= 1;
2737 	}
2738 	return mask;
2739 }
2740 
2741 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2742 {
2743 	u32 data, mask;
2744 
2745 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2746 	if (data & 1)
2747 		data &= INACTIVE_CUS_MASK;
2748 	else
2749 		data = 0;
2750 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2751 
2752 	data >>= INACTIVE_CUS_SHIFT;
2753 
2754 	mask = si_create_bitmask(cu_per_sh);
2755 
2756 	return ~data & mask;
2757 }
2758 
2759 static void si_setup_spi(struct radeon_device *rdev,
2760 			 u32 se_num, u32 sh_per_se,
2761 			 u32 cu_per_sh)
2762 {
2763 	int i, j, k;
2764 	u32 data, mask, active_cu;
2765 
2766 	for (i = 0; i < se_num; i++) {
2767 		for (j = 0; j < sh_per_se; j++) {
2768 			si_select_se_sh(rdev, i, j);
2769 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2770 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2771 
2772 			mask = 1;
2773 			for (k = 0; k < 16; k++) {
2774 				mask <<= k;
2775 				if (active_cu & mask) {
2776 					data &= ~mask;
2777 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2778 					break;
2779 				}
2780 			}
2781 		}
2782 	}
2783 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2784 }
2785 
2786 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2787 			      u32 max_rb_num, u32 se_num,
2788 			      u32 sh_per_se)
2789 {
2790 	u32 data, mask;
2791 
2792 	data = RREG32(CC_RB_BACKEND_DISABLE);
2793 	if (data & 1)
2794 		data &= BACKEND_DISABLE_MASK;
2795 	else
2796 		data = 0;
2797 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2798 
2799 	data >>= BACKEND_DISABLE_SHIFT;
2800 
2801 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2802 
2803 	return data & mask;
2804 }
2805 
2806 static void si_setup_rb(struct radeon_device *rdev,
2807 			u32 se_num, u32 sh_per_se,
2808 			u32 max_rb_num)
2809 {
2810 	int i, j;
2811 	u32 data, mask;
2812 	u32 disabled_rbs = 0;
2813 	u32 enabled_rbs = 0;
2814 
2815 	for (i = 0; i < se_num; i++) {
2816 		for (j = 0; j < sh_per_se; j++) {
2817 			si_select_se_sh(rdev, i, j);
2818 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2819 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2820 		}
2821 	}
2822 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2823 
2824 	mask = 1;
2825 	for (i = 0; i < max_rb_num; i++) {
2826 		if (!(disabled_rbs & mask))
2827 			enabled_rbs |= mask;
2828 		mask <<= 1;
2829 	}
2830 
2831 	for (i = 0; i < se_num; i++) {
2832 		si_select_se_sh(rdev, i, 0xffffffff);
2833 		data = 0;
2834 		for (j = 0; j < sh_per_se; j++) {
2835 			switch (enabled_rbs & 3) {
2836 			case 1:
2837 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2838 				break;
2839 			case 2:
2840 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2841 				break;
2842 			case 3:
2843 			default:
2844 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2845 				break;
2846 			}
2847 			enabled_rbs >>= 2;
2848 		}
2849 		WREG32(PA_SC_RASTER_CONFIG, data);
2850 	}
2851 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2852 }
2853 
2854 static void si_gpu_init(struct radeon_device *rdev)
2855 {
2856 	u32 gb_addr_config = 0;
2857 	u32 mc_shared_chmap, mc_arb_ramcfg;
2858 	u32 sx_debug_1;
2859 	u32 hdp_host_path_cntl;
2860 	u32 tmp;
2861 	int i, j;
2862 
2863 	switch (rdev->family) {
2864 	case CHIP_TAHITI:
2865 		rdev->config.si.max_shader_engines = 2;
2866 		rdev->config.si.max_tile_pipes = 12;
2867 		rdev->config.si.max_cu_per_sh = 8;
2868 		rdev->config.si.max_sh_per_se = 2;
2869 		rdev->config.si.max_backends_per_se = 4;
2870 		rdev->config.si.max_texture_channel_caches = 12;
2871 		rdev->config.si.max_gprs = 256;
2872 		rdev->config.si.max_gs_threads = 32;
2873 		rdev->config.si.max_hw_contexts = 8;
2874 
2875 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2876 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2877 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2878 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2879 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2880 		break;
2881 	case CHIP_PITCAIRN:
2882 		rdev->config.si.max_shader_engines = 2;
2883 		rdev->config.si.max_tile_pipes = 8;
2884 		rdev->config.si.max_cu_per_sh = 5;
2885 		rdev->config.si.max_sh_per_se = 2;
2886 		rdev->config.si.max_backends_per_se = 4;
2887 		rdev->config.si.max_texture_channel_caches = 8;
2888 		rdev->config.si.max_gprs = 256;
2889 		rdev->config.si.max_gs_threads = 32;
2890 		rdev->config.si.max_hw_contexts = 8;
2891 
2892 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2893 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2894 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2895 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2896 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2897 		break;
2898 	case CHIP_VERDE:
2899 	default:
2900 		rdev->config.si.max_shader_engines = 1;
2901 		rdev->config.si.max_tile_pipes = 4;
2902 		rdev->config.si.max_cu_per_sh = 5;
2903 		rdev->config.si.max_sh_per_se = 2;
2904 		rdev->config.si.max_backends_per_se = 4;
2905 		rdev->config.si.max_texture_channel_caches = 4;
2906 		rdev->config.si.max_gprs = 256;
2907 		rdev->config.si.max_gs_threads = 32;
2908 		rdev->config.si.max_hw_contexts = 8;
2909 
2910 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2911 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2912 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2913 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2914 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2915 		break;
2916 	case CHIP_OLAND:
2917 		rdev->config.si.max_shader_engines = 1;
2918 		rdev->config.si.max_tile_pipes = 4;
2919 		rdev->config.si.max_cu_per_sh = 6;
2920 		rdev->config.si.max_sh_per_se = 1;
2921 		rdev->config.si.max_backends_per_se = 2;
2922 		rdev->config.si.max_texture_channel_caches = 4;
2923 		rdev->config.si.max_gprs = 256;
2924 		rdev->config.si.max_gs_threads = 16;
2925 		rdev->config.si.max_hw_contexts = 8;
2926 
2927 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2928 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2929 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2930 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2931 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2932 		break;
2933 	case CHIP_HAINAN:
2934 		rdev->config.si.max_shader_engines = 1;
2935 		rdev->config.si.max_tile_pipes = 4;
2936 		rdev->config.si.max_cu_per_sh = 5;
2937 		rdev->config.si.max_sh_per_se = 1;
2938 		rdev->config.si.max_backends_per_se = 1;
2939 		rdev->config.si.max_texture_channel_caches = 2;
2940 		rdev->config.si.max_gprs = 256;
2941 		rdev->config.si.max_gs_threads = 16;
2942 		rdev->config.si.max_hw_contexts = 8;
2943 
2944 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2945 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2946 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2947 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2948 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2949 		break;
2950 	}
2951 
2952 	/* Initialize HDP */
2953 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2954 		WREG32((0x2c14 + j), 0x00000000);
2955 		WREG32((0x2c18 + j), 0x00000000);
2956 		WREG32((0x2c1c + j), 0x00000000);
2957 		WREG32((0x2c20 + j), 0x00000000);
2958 		WREG32((0x2c24 + j), 0x00000000);
2959 	}
2960 
2961 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2962 
2963 	evergreen_fix_pci_max_read_req_size(rdev);
2964 
2965 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2966 
2967 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2968 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2969 
2970 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2971 	rdev->config.si.mem_max_burst_length_bytes = 256;
2972 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2973 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2974 	if (rdev->config.si.mem_row_size_in_kb > 4)
2975 		rdev->config.si.mem_row_size_in_kb = 4;
2976 	/* XXX use MC settings? */
2977 	rdev->config.si.shader_engine_tile_size = 32;
2978 	rdev->config.si.num_gpus = 1;
2979 	rdev->config.si.multi_gpu_tile_size = 64;
2980 
2981 	/* fix up row size */
2982 	gb_addr_config &= ~ROW_SIZE_MASK;
2983 	switch (rdev->config.si.mem_row_size_in_kb) {
2984 	case 1:
2985 	default:
2986 		gb_addr_config |= ROW_SIZE(0);
2987 		break;
2988 	case 2:
2989 		gb_addr_config |= ROW_SIZE(1);
2990 		break;
2991 	case 4:
2992 		gb_addr_config |= ROW_SIZE(2);
2993 		break;
2994 	}
2995 
2996 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2997 	 * not have bank info, so create a custom tiling dword.
2998 	 * bits 3:0   num_pipes
2999 	 * bits 7:4   num_banks
3000 	 * bits 11:8  group_size
3001 	 * bits 15:12 row_size
3002 	 */
3003 	rdev->config.si.tile_config = 0;
3004 	switch (rdev->config.si.num_tile_pipes) {
3005 	case 1:
3006 		rdev->config.si.tile_config |= (0 << 0);
3007 		break;
3008 	case 2:
3009 		rdev->config.si.tile_config |= (1 << 0);
3010 		break;
3011 	case 4:
3012 		rdev->config.si.tile_config |= (2 << 0);
3013 		break;
3014 	case 8:
3015 	default:
3016 		/* XXX what about 12? */
3017 		rdev->config.si.tile_config |= (3 << 0);
3018 		break;
3019 	}
3020 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3021 	case 0: /* four banks */
3022 		rdev->config.si.tile_config |= 0 << 4;
3023 		break;
3024 	case 1: /* eight banks */
3025 		rdev->config.si.tile_config |= 1 << 4;
3026 		break;
3027 	case 2: /* sixteen banks */
3028 	default:
3029 		rdev->config.si.tile_config |= 2 << 4;
3030 		break;
3031 	}
3032 	rdev->config.si.tile_config |=
3033 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3034 	rdev->config.si.tile_config |=
3035 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3036 
3037 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3038 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3039 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3040 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3041 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3042 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3043 	if (rdev->has_uvd) {
3044 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3045 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3046 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3047 	}
3048 
3049 	si_tiling_mode_table_init(rdev);
3050 
3051 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3052 		    rdev->config.si.max_sh_per_se,
3053 		    rdev->config.si.max_backends_per_se);
3054 
3055 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3056 		     rdev->config.si.max_sh_per_se,
3057 		     rdev->config.si.max_cu_per_sh);
3058 
3059 
3060 	/* set HW defaults for 3D engine */
3061 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3062 				     ROQ_IB2_START(0x2b)));
3063 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3064 
3065 	sx_debug_1 = RREG32(SX_DEBUG_1);
3066 	WREG32(SX_DEBUG_1, sx_debug_1);
3067 
3068 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3069 
3070 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3071 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3072 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3073 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3074 
3075 	WREG32(VGT_NUM_INSTANCES, 1);
3076 
3077 	WREG32(CP_PERFMON_CNTL, 0);
3078 
3079 	WREG32(SQ_CONFIG, 0);
3080 
3081 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3082 					  FORCE_EOV_MAX_REZ_CNT(255)));
3083 
3084 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3085 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3086 
3087 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3088 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3089 
3090 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3091 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3092 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3093 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3094 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3095 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3096 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3097 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3098 
3099 	tmp = RREG32(HDP_MISC_CNTL);
3100 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3101 	WREG32(HDP_MISC_CNTL, tmp);
3102 
3103 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3104 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3105 
3106 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3107 
3108 	udelay(50);
3109 }
3110 
3111 /*
3112  * GPU scratch registers helpers function.
3113  */
3114 static void si_scratch_init(struct radeon_device *rdev)
3115 {
3116 	int i;
3117 
3118 	rdev->scratch.num_reg = 7;
3119 	rdev->scratch.reg_base = SCRATCH_REG0;
3120 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3121 		rdev->scratch.free[i] = true;
3122 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3123 	}
3124 }
3125 
3126 void si_fence_ring_emit(struct radeon_device *rdev,
3127 			struct radeon_fence *fence)
3128 {
3129 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3130 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3131 
3132 	/* flush read cache over gart */
3133 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3134 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3135 	radeon_ring_write(ring, 0);
3136 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3137 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3138 			  PACKET3_TC_ACTION_ENA |
3139 			  PACKET3_SH_KCACHE_ACTION_ENA |
3140 			  PACKET3_SH_ICACHE_ACTION_ENA);
3141 	radeon_ring_write(ring, 0xFFFFFFFF);
3142 	radeon_ring_write(ring, 0);
3143 	radeon_ring_write(ring, 10); /* poll interval */
3144 	/* EVENT_WRITE_EOP - flush caches, send int */
3145 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3146 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3147 	radeon_ring_write(ring, addr & 0xffffffff);
3148 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3149 	radeon_ring_write(ring, fence->seq);
3150 	radeon_ring_write(ring, 0);
3151 }
3152 
3153 /*
3154  * IB stuff
3155  */
3156 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3157 {
3158 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3159 	u32 header;
3160 
3161 	if (ib->is_const_ib) {
3162 		/* set switch buffer packet before const IB */
3163 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3164 		radeon_ring_write(ring, 0);
3165 
3166 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3167 	} else {
3168 		u32 next_rptr;
3169 		if (ring->rptr_save_reg) {
3170 			next_rptr = ring->wptr + 3 + 4 + 8;
3171 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3172 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3173 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3174 			radeon_ring_write(ring, next_rptr);
3175 		} else if (rdev->wb.enabled) {
3176 			next_rptr = ring->wptr + 5 + 4 + 8;
3177 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3178 			radeon_ring_write(ring, (1 << 8));
3179 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3180 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3181 			radeon_ring_write(ring, next_rptr);
3182 		}
3183 
3184 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3185 	}
3186 
3187 	radeon_ring_write(ring, header);
3188 	radeon_ring_write(ring,
3189 #ifdef __BIG_ENDIAN
3190 			  (2 << 0) |
3191 #endif
3192 			  (ib->gpu_addr & 0xFFFFFFFC));
3193 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3194 	radeon_ring_write(ring, ib->length_dw |
3195 			  (ib->vm ? (ib->vm->id << 24) : 0));
3196 
3197 	if (!ib->is_const_ib) {
3198 		/* flush read cache over gart for this vmid */
3199 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3201 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3202 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3203 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3204 				  PACKET3_TC_ACTION_ENA |
3205 				  PACKET3_SH_KCACHE_ACTION_ENA |
3206 				  PACKET3_SH_ICACHE_ACTION_ENA);
3207 		radeon_ring_write(ring, 0xFFFFFFFF);
3208 		radeon_ring_write(ring, 0);
3209 		radeon_ring_write(ring, 10); /* poll interval */
3210 	}
3211 }
3212 
3213 /*
3214  * CP.
3215  */
3216 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3217 {
3218 	if (enable)
3219 		WREG32(CP_ME_CNTL, 0);
3220 	else {
3221 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3222 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3223 		WREG32(SCRATCH_UMSK, 0);
3224 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3225 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3226 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3227 	}
3228 	udelay(50);
3229 }
3230 
3231 static int si_cp_load_microcode(struct radeon_device *rdev)
3232 {
3233 	const __be32 *fw_data;
3234 	int i;
3235 
3236 	if (!rdev->me_fw || !rdev->pfp_fw)
3237 		return -EINVAL;
3238 
3239 	si_cp_enable(rdev, false);
3240 
3241 	/* PFP */
3242 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3243 	WREG32(CP_PFP_UCODE_ADDR, 0);
3244 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3245 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3246 	WREG32(CP_PFP_UCODE_ADDR, 0);
3247 
3248 	/* CE */
3249 	fw_data = (const __be32 *)rdev->ce_fw->data;
3250 	WREG32(CP_CE_UCODE_ADDR, 0);
3251 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3252 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3253 	WREG32(CP_CE_UCODE_ADDR, 0);
3254 
3255 	/* ME */
3256 	fw_data = (const __be32 *)rdev->me_fw->data;
3257 	WREG32(CP_ME_RAM_WADDR, 0);
3258 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3259 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3260 	WREG32(CP_ME_RAM_WADDR, 0);
3261 
3262 	WREG32(CP_PFP_UCODE_ADDR, 0);
3263 	WREG32(CP_CE_UCODE_ADDR, 0);
3264 	WREG32(CP_ME_RAM_WADDR, 0);
3265 	WREG32(CP_ME_RAM_RADDR, 0);
3266 	return 0;
3267 }
3268 
3269 static int si_cp_start(struct radeon_device *rdev)
3270 {
3271 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272 	int r, i;
3273 
3274 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3275 	if (r) {
3276 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277 		return r;
3278 	}
3279 	/* init the CP */
3280 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3281 	radeon_ring_write(ring, 0x1);
3282 	radeon_ring_write(ring, 0x0);
3283 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3284 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3285 	radeon_ring_write(ring, 0);
3286 	radeon_ring_write(ring, 0);
3287 
3288 	/* init the CE partitions */
3289 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3290 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3291 	radeon_ring_write(ring, 0xc000);
3292 	radeon_ring_write(ring, 0xe000);
3293 	radeon_ring_unlock_commit(rdev, ring);
3294 
3295 	si_cp_enable(rdev, true);
3296 
3297 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3298 	if (r) {
3299 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3300 		return r;
3301 	}
3302 
3303 	/* setup clear context state */
3304 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3306 
3307 	for (i = 0; i < si_default_size; i++)
3308 		radeon_ring_write(ring, si_default_state[i]);
3309 
3310 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3311 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3312 
3313 	/* set clear context state */
3314 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3315 	radeon_ring_write(ring, 0);
3316 
3317 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3318 	radeon_ring_write(ring, 0x00000316);
3319 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3320 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3321 
3322 	radeon_ring_unlock_commit(rdev, ring);
3323 
3324 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3325 		ring = &rdev->ring[i];
3326 		r = radeon_ring_lock(rdev, ring, 2);
3327 
3328 		/* clear the compute context state */
3329 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3330 		radeon_ring_write(ring, 0);
3331 
3332 		radeon_ring_unlock_commit(rdev, ring);
3333 	}
3334 
3335 	return 0;
3336 }
3337 
3338 static void si_cp_fini(struct radeon_device *rdev)
3339 {
3340 	struct radeon_ring *ring;
3341 	si_cp_enable(rdev, false);
3342 
3343 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3344 	radeon_ring_fini(rdev, ring);
3345 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3346 
3347 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3348 	radeon_ring_fini(rdev, ring);
3349 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3350 
3351 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3352 	radeon_ring_fini(rdev, ring);
3353 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3354 }
3355 
3356 static int si_cp_resume(struct radeon_device *rdev)
3357 {
3358 	struct radeon_ring *ring;
3359 	u32 tmp;
3360 	u32 rb_bufsz;
3361 	int r;
3362 
3363 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3364 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3365 				 SOFT_RESET_PA |
3366 				 SOFT_RESET_VGT |
3367 				 SOFT_RESET_SPI |
3368 				 SOFT_RESET_SX));
3369 	RREG32(GRBM_SOFT_RESET);
3370 	mdelay(15);
3371 	WREG32(GRBM_SOFT_RESET, 0);
3372 	RREG32(GRBM_SOFT_RESET);
3373 
3374 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3375 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3376 
3377 	/* Set the write pointer delay */
3378 	WREG32(CP_RB_WPTR_DELAY, 0);
3379 
3380 	WREG32(CP_DEBUG, 0);
3381 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3382 
3383 	/* ring 0 - compute and gfx */
3384 	/* Set ring buffer size */
3385 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3386 	rb_bufsz = drm_order(ring->ring_size / 8);
3387 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3388 #ifdef __BIG_ENDIAN
3389 	tmp |= BUF_SWAP_32BIT;
3390 #endif
3391 	WREG32(CP_RB0_CNTL, tmp);
3392 
3393 	/* Initialize the ring buffer's read and write pointers */
3394 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3395 	ring->wptr = 0;
3396 	WREG32(CP_RB0_WPTR, ring->wptr);
3397 
3398 	/* set the wb address whether it's enabled or not */
3399 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3400 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3401 
3402 	if (rdev->wb.enabled)
3403 		WREG32(SCRATCH_UMSK, 0xff);
3404 	else {
3405 		tmp |= RB_NO_UPDATE;
3406 		WREG32(SCRATCH_UMSK, 0);
3407 	}
3408 
3409 	mdelay(1);
3410 	WREG32(CP_RB0_CNTL, tmp);
3411 
3412 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3413 
3414 	ring->rptr = RREG32(CP_RB0_RPTR);
3415 
3416 	/* ring1  - compute only */
3417 	/* Set ring buffer size */
3418 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3419 	rb_bufsz = drm_order(ring->ring_size / 8);
3420 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422 	tmp |= BUF_SWAP_32BIT;
3423 #endif
3424 	WREG32(CP_RB1_CNTL, tmp);
3425 
3426 	/* Initialize the ring buffer's read and write pointers */
3427 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3428 	ring->wptr = 0;
3429 	WREG32(CP_RB1_WPTR, ring->wptr);
3430 
3431 	/* set the wb address whether it's enabled or not */
3432 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3433 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3434 
3435 	mdelay(1);
3436 	WREG32(CP_RB1_CNTL, tmp);
3437 
3438 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3439 
3440 	ring->rptr = RREG32(CP_RB1_RPTR);
3441 
3442 	/* ring2 - compute only */
3443 	/* Set ring buffer size */
3444 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3445 	rb_bufsz = drm_order(ring->ring_size / 8);
3446 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3447 #ifdef __BIG_ENDIAN
3448 	tmp |= BUF_SWAP_32BIT;
3449 #endif
3450 	WREG32(CP_RB2_CNTL, tmp);
3451 
3452 	/* Initialize the ring buffer's read and write pointers */
3453 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3454 	ring->wptr = 0;
3455 	WREG32(CP_RB2_WPTR, ring->wptr);
3456 
3457 	/* set the wb address whether it's enabled or not */
3458 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3459 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3460 
3461 	mdelay(1);
3462 	WREG32(CP_RB2_CNTL, tmp);
3463 
3464 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3465 
3466 	ring->rptr = RREG32(CP_RB2_RPTR);
3467 
3468 	/* start the rings */
3469 	si_cp_start(rdev);
3470 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3471 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3472 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3473 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3474 	if (r) {
3475 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3476 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3477 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3478 		return r;
3479 	}
3480 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3481 	if (r) {
3482 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483 	}
3484 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3485 	if (r) {
3486 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3487 	}
3488 
3489 	return 0;
3490 }
3491 
3492 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3493 {
3494 	u32 reset_mask = 0;
3495 	u32 tmp;
3496 
3497 	/* GRBM_STATUS */
3498 	tmp = RREG32(GRBM_STATUS);
3499 	if (tmp & (PA_BUSY | SC_BUSY |
3500 		   BCI_BUSY | SX_BUSY |
3501 		   TA_BUSY | VGT_BUSY |
3502 		   DB_BUSY | CB_BUSY |
3503 		   GDS_BUSY | SPI_BUSY |
3504 		   IA_BUSY | IA_BUSY_NO_DMA))
3505 		reset_mask |= RADEON_RESET_GFX;
3506 
3507 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3508 		   CP_BUSY | CP_COHERENCY_BUSY))
3509 		reset_mask |= RADEON_RESET_CP;
3510 
3511 	if (tmp & GRBM_EE_BUSY)
3512 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3513 
3514 	/* GRBM_STATUS2 */
3515 	tmp = RREG32(GRBM_STATUS2);
3516 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3517 		reset_mask |= RADEON_RESET_RLC;
3518 
3519 	/* DMA_STATUS_REG 0 */
3520 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3521 	if (!(tmp & DMA_IDLE))
3522 		reset_mask |= RADEON_RESET_DMA;
3523 
3524 	/* DMA_STATUS_REG 1 */
3525 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3526 	if (!(tmp & DMA_IDLE))
3527 		reset_mask |= RADEON_RESET_DMA1;
3528 
3529 	/* SRBM_STATUS2 */
3530 	tmp = RREG32(SRBM_STATUS2);
3531 	if (tmp & DMA_BUSY)
3532 		reset_mask |= RADEON_RESET_DMA;
3533 
3534 	if (tmp & DMA1_BUSY)
3535 		reset_mask |= RADEON_RESET_DMA1;
3536 
3537 	/* SRBM_STATUS */
3538 	tmp = RREG32(SRBM_STATUS);
3539 
3540 	if (tmp & IH_BUSY)
3541 		reset_mask |= RADEON_RESET_IH;
3542 
3543 	if (tmp & SEM_BUSY)
3544 		reset_mask |= RADEON_RESET_SEM;
3545 
3546 	if (tmp & GRBM_RQ_PENDING)
3547 		reset_mask |= RADEON_RESET_GRBM;
3548 
3549 	if (tmp & VMC_BUSY)
3550 		reset_mask |= RADEON_RESET_VMC;
3551 
3552 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3553 		   MCC_BUSY | MCD_BUSY))
3554 		reset_mask |= RADEON_RESET_MC;
3555 
3556 	if (evergreen_is_display_hung(rdev))
3557 		reset_mask |= RADEON_RESET_DISPLAY;
3558 
3559 	/* VM_L2_STATUS */
3560 	tmp = RREG32(VM_L2_STATUS);
3561 	if (tmp & L2_BUSY)
3562 		reset_mask |= RADEON_RESET_VMC;
3563 
3564 	/* Skip MC reset as it's mostly likely not hung, just busy */
3565 	if (reset_mask & RADEON_RESET_MC) {
3566 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3567 		reset_mask &= ~RADEON_RESET_MC;
3568 	}
3569 
3570 	return reset_mask;
3571 }
3572 
3573 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3574 {
3575 	struct evergreen_mc_save save;
3576 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3577 	u32 tmp;
3578 
3579 	if (reset_mask == 0)
3580 		return;
3581 
3582 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3583 
3584 	evergreen_print_gpu_status_regs(rdev);
3585 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3586 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3587 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3588 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3589 
3590 	/* Disable CP parsing/prefetching */
3591 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3592 
3593 	if (reset_mask & RADEON_RESET_DMA) {
3594 		/* dma0 */
3595 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3596 		tmp &= ~DMA_RB_ENABLE;
3597 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3598 	}
3599 	if (reset_mask & RADEON_RESET_DMA1) {
3600 		/* dma1 */
3601 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3602 		tmp &= ~DMA_RB_ENABLE;
3603 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3604 	}
3605 
3606 	udelay(50);
3607 
3608 	evergreen_mc_stop(rdev, &save);
3609 	if (evergreen_mc_wait_for_idle(rdev)) {
3610 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3611 	}
3612 
3613 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3614 		grbm_soft_reset = SOFT_RESET_CB |
3615 			SOFT_RESET_DB |
3616 			SOFT_RESET_GDS |
3617 			SOFT_RESET_PA |
3618 			SOFT_RESET_SC |
3619 			SOFT_RESET_BCI |
3620 			SOFT_RESET_SPI |
3621 			SOFT_RESET_SX |
3622 			SOFT_RESET_TC |
3623 			SOFT_RESET_TA |
3624 			SOFT_RESET_VGT |
3625 			SOFT_RESET_IA;
3626 	}
3627 
3628 	if (reset_mask & RADEON_RESET_CP) {
3629 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3630 
3631 		srbm_soft_reset |= SOFT_RESET_GRBM;
3632 	}
3633 
3634 	if (reset_mask & RADEON_RESET_DMA)
3635 		srbm_soft_reset |= SOFT_RESET_DMA;
3636 
3637 	if (reset_mask & RADEON_RESET_DMA1)
3638 		srbm_soft_reset |= SOFT_RESET_DMA1;
3639 
3640 	if (reset_mask & RADEON_RESET_DISPLAY)
3641 		srbm_soft_reset |= SOFT_RESET_DC;
3642 
3643 	if (reset_mask & RADEON_RESET_RLC)
3644 		grbm_soft_reset |= SOFT_RESET_RLC;
3645 
3646 	if (reset_mask & RADEON_RESET_SEM)
3647 		srbm_soft_reset |= SOFT_RESET_SEM;
3648 
3649 	if (reset_mask & RADEON_RESET_IH)
3650 		srbm_soft_reset |= SOFT_RESET_IH;
3651 
3652 	if (reset_mask & RADEON_RESET_GRBM)
3653 		srbm_soft_reset |= SOFT_RESET_GRBM;
3654 
3655 	if (reset_mask & RADEON_RESET_VMC)
3656 		srbm_soft_reset |= SOFT_RESET_VMC;
3657 
3658 	if (reset_mask & RADEON_RESET_MC)
3659 		srbm_soft_reset |= SOFT_RESET_MC;
3660 
3661 	if (grbm_soft_reset) {
3662 		tmp = RREG32(GRBM_SOFT_RESET);
3663 		tmp |= grbm_soft_reset;
3664 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3665 		WREG32(GRBM_SOFT_RESET, tmp);
3666 		tmp = RREG32(GRBM_SOFT_RESET);
3667 
3668 		udelay(50);
3669 
3670 		tmp &= ~grbm_soft_reset;
3671 		WREG32(GRBM_SOFT_RESET, tmp);
3672 		tmp = RREG32(GRBM_SOFT_RESET);
3673 	}
3674 
3675 	if (srbm_soft_reset) {
3676 		tmp = RREG32(SRBM_SOFT_RESET);
3677 		tmp |= srbm_soft_reset;
3678 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3679 		WREG32(SRBM_SOFT_RESET, tmp);
3680 		tmp = RREG32(SRBM_SOFT_RESET);
3681 
3682 		udelay(50);
3683 
3684 		tmp &= ~srbm_soft_reset;
3685 		WREG32(SRBM_SOFT_RESET, tmp);
3686 		tmp = RREG32(SRBM_SOFT_RESET);
3687 	}
3688 
3689 	/* Wait a little for things to settle down */
3690 	udelay(50);
3691 
3692 	evergreen_mc_resume(rdev, &save);
3693 	udelay(50);
3694 
3695 	evergreen_print_gpu_status_regs(rdev);
3696 }
3697 
3698 int si_asic_reset(struct radeon_device *rdev)
3699 {
3700 	u32 reset_mask;
3701 
3702 	reset_mask = si_gpu_check_soft_reset(rdev);
3703 
3704 	if (reset_mask)
3705 		r600_set_bios_scratch_engine_hung(rdev, true);
3706 
3707 	si_gpu_soft_reset(rdev, reset_mask);
3708 
3709 	reset_mask = si_gpu_check_soft_reset(rdev);
3710 
3711 	if (!reset_mask)
3712 		r600_set_bios_scratch_engine_hung(rdev, false);
3713 
3714 	return 0;
3715 }
3716 
3717 /**
3718  * si_gfx_is_lockup - Check if the GFX engine is locked up
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Check if the GFX engine is locked up.
3724  * Returns true if the engine appears to be locked up, false if not.
3725  */
3726 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3727 {
3728 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3729 
3730 	if (!(reset_mask & (RADEON_RESET_GFX |
3731 			    RADEON_RESET_COMPUTE |
3732 			    RADEON_RESET_CP))) {
3733 		radeon_ring_lockup_update(ring);
3734 		return false;
3735 	}
3736 	/* force CP activities */
3737 	radeon_ring_force_activity(rdev, ring);
3738 	return radeon_ring_test_lockup(rdev, ring);
3739 }
3740 
3741 /**
3742  * si_dma_is_lockup - Check if the DMA engine is locked up
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Check if the async DMA engine is locked up.
3748  * Returns true if the engine appears to be locked up, false if not.
3749  */
3750 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3751 {
3752 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3753 	u32 mask;
3754 
3755 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3756 		mask = RADEON_RESET_DMA;
3757 	else
3758 		mask = RADEON_RESET_DMA1;
3759 
3760 	if (!(reset_mask & mask)) {
3761 		radeon_ring_lockup_update(ring);
3762 		return false;
3763 	}
3764 	/* force ring activities */
3765 	radeon_ring_force_activity(rdev, ring);
3766 	return radeon_ring_test_lockup(rdev, ring);
3767 }
3768 
3769 /* MC */
3770 static void si_mc_program(struct radeon_device *rdev)
3771 {
3772 	struct evergreen_mc_save save;
3773 	u32 tmp;
3774 	int i, j;
3775 
3776 	/* Initialize HDP */
3777 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3778 		WREG32((0x2c14 + j), 0x00000000);
3779 		WREG32((0x2c18 + j), 0x00000000);
3780 		WREG32((0x2c1c + j), 0x00000000);
3781 		WREG32((0x2c20 + j), 0x00000000);
3782 		WREG32((0x2c24 + j), 0x00000000);
3783 	}
3784 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3785 
3786 	evergreen_mc_stop(rdev, &save);
3787 	if (radeon_mc_wait_for_idle(rdev)) {
3788 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3789 	}
3790 	if (!ASIC_IS_NODCE(rdev))
3791 		/* Lockout access through VGA aperture*/
3792 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3793 	/* Update configuration */
3794 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3795 	       rdev->mc.vram_start >> 12);
3796 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3797 	       rdev->mc.vram_end >> 12);
3798 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3799 	       rdev->vram_scratch.gpu_addr >> 12);
3800 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3801 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3802 	WREG32(MC_VM_FB_LOCATION, tmp);
3803 	/* XXX double check these! */
3804 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3805 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3806 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3807 	WREG32(MC_VM_AGP_BASE, 0);
3808 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3809 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3810 	if (radeon_mc_wait_for_idle(rdev)) {
3811 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3812 	}
3813 	evergreen_mc_resume(rdev, &save);
3814 	if (!ASIC_IS_NODCE(rdev)) {
3815 		/* we need to own VRAM, so turn off the VGA renderer here
3816 		 * to stop it overwriting our objects */
3817 		rv515_vga_render_disable(rdev);
3818 	}
3819 }
3820 
3821 void si_vram_gtt_location(struct radeon_device *rdev,
3822 			  struct radeon_mc *mc)
3823 {
3824 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3825 		/* leave room for at least 1024M GTT */
3826 		dev_warn(rdev->dev, "limiting VRAM\n");
3827 		mc->real_vram_size = 0xFFC0000000ULL;
3828 		mc->mc_vram_size = 0xFFC0000000ULL;
3829 	}
3830 	radeon_vram_location(rdev, &rdev->mc, 0);
3831 	rdev->mc.gtt_base_align = 0;
3832 	radeon_gtt_location(rdev, mc);
3833 }
3834 
3835 static int si_mc_init(struct radeon_device *rdev)
3836 {
3837 	u32 tmp;
3838 	int chansize, numchan;
3839 
3840 	/* Get VRAM informations */
3841 	rdev->mc.vram_is_ddr = true;
3842 	tmp = RREG32(MC_ARB_RAMCFG);
3843 	if (tmp & CHANSIZE_OVERRIDE) {
3844 		chansize = 16;
3845 	} else if (tmp & CHANSIZE_MASK) {
3846 		chansize = 64;
3847 	} else {
3848 		chansize = 32;
3849 	}
3850 	tmp = RREG32(MC_SHARED_CHMAP);
3851 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3852 	case 0:
3853 	default:
3854 		numchan = 1;
3855 		break;
3856 	case 1:
3857 		numchan = 2;
3858 		break;
3859 	case 2:
3860 		numchan = 4;
3861 		break;
3862 	case 3:
3863 		numchan = 8;
3864 		break;
3865 	case 4:
3866 		numchan = 3;
3867 		break;
3868 	case 5:
3869 		numchan = 6;
3870 		break;
3871 	case 6:
3872 		numchan = 10;
3873 		break;
3874 	case 7:
3875 		numchan = 12;
3876 		break;
3877 	case 8:
3878 		numchan = 16;
3879 		break;
3880 	}
3881 	rdev->mc.vram_width = numchan * chansize;
3882 	/* Could aper size report 0 ? */
3883 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3884 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3885 	/* size in MB on si */
3886 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3887 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3888 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3889 	si_vram_gtt_location(rdev, &rdev->mc);
3890 	radeon_update_bandwidth_info(rdev);
3891 
3892 	return 0;
3893 }
3894 
3895 /*
3896  * GART
3897  */
3898 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3899 {
3900 	/* flush hdp cache */
3901 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3902 
3903 	/* bits 0-15 are the VM contexts0-15 */
3904 	WREG32(VM_INVALIDATE_REQUEST, 1);
3905 }
3906 
3907 static int si_pcie_gart_enable(struct radeon_device *rdev)
3908 {
3909 	int r, i;
3910 
3911 	if (rdev->gart.robj == NULL) {
3912 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3913 		return -EINVAL;
3914 	}
3915 	r = radeon_gart_table_vram_pin(rdev);
3916 	if (r)
3917 		return r;
3918 	radeon_gart_restore(rdev);
3919 	/* Setup TLB control */
3920 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3921 	       (0xA << 7) |
3922 	       ENABLE_L1_TLB |
3923 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3924 	       ENABLE_ADVANCED_DRIVER_MODEL |
3925 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3926 	/* Setup L2 cache */
3927 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3928 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3929 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3930 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3931 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3932 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3933 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3934 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3935 	/* setup context0 */
3936 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3937 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3938 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3939 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3940 			(u32)(rdev->dummy_page.addr >> 12));
3941 	WREG32(VM_CONTEXT0_CNTL2, 0);
3942 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3943 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3944 
3945 	WREG32(0x15D4, 0);
3946 	WREG32(0x15D8, 0);
3947 	WREG32(0x15DC, 0);
3948 
3949 	/* empty context1-15 */
3950 	/* set vm size, must be a multiple of 4 */
3951 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3952 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3953 	/* Assign the pt base to something valid for now; the pts used for
3954 	 * the VMs are determined by the application and setup and assigned
3955 	 * on the fly in the vm part of radeon_gart.c
3956 	 */
3957 	for (i = 1; i < 16; i++) {
3958 		if (i < 8)
3959 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3960 			       rdev->gart.table_addr >> 12);
3961 		else
3962 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3963 			       rdev->gart.table_addr >> 12);
3964 	}
3965 
3966 	/* enable context1-15 */
3967 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3968 	       (u32)(rdev->dummy_page.addr >> 12));
3969 	WREG32(VM_CONTEXT1_CNTL2, 4);
3970 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3971 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3972 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3973 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3974 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3975 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3977 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3979 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3981 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3983 
3984 	si_pcie_gart_tlb_flush(rdev);
3985 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3986 		 (unsigned)(rdev->mc.gtt_size >> 20),
3987 		 (unsigned long long)rdev->gart.table_addr);
3988 	rdev->gart.ready = true;
3989 	return 0;
3990 }
3991 
3992 static void si_pcie_gart_disable(struct radeon_device *rdev)
3993 {
3994 	/* Disable all tables */
3995 	WREG32(VM_CONTEXT0_CNTL, 0);
3996 	WREG32(VM_CONTEXT1_CNTL, 0);
3997 	/* Setup TLB control */
3998 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3999 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4000 	/* Setup L2 cache */
4001 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4002 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4003 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4004 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4005 	WREG32(VM_L2_CNTL2, 0);
4006 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4007 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4008 	radeon_gart_table_vram_unpin(rdev);
4009 }
4010 
4011 static void si_pcie_gart_fini(struct radeon_device *rdev)
4012 {
4013 	si_pcie_gart_disable(rdev);
4014 	radeon_gart_table_vram_free(rdev);
4015 	radeon_gart_fini(rdev);
4016 }
4017 
4018 /* vm parser */
4019 static bool si_vm_reg_valid(u32 reg)
4020 {
4021 	/* context regs are fine */
4022 	if (reg >= 0x28000)
4023 		return true;
4024 
4025 	/* check config regs */
4026 	switch (reg) {
4027 	case GRBM_GFX_INDEX:
4028 	case CP_STRMOUT_CNTL:
4029 	case VGT_VTX_VECT_EJECT_REG:
4030 	case VGT_CACHE_INVALIDATION:
4031 	case VGT_ESGS_RING_SIZE:
4032 	case VGT_GSVS_RING_SIZE:
4033 	case VGT_GS_VERTEX_REUSE:
4034 	case VGT_PRIMITIVE_TYPE:
4035 	case VGT_INDEX_TYPE:
4036 	case VGT_NUM_INDICES:
4037 	case VGT_NUM_INSTANCES:
4038 	case VGT_TF_RING_SIZE:
4039 	case VGT_HS_OFFCHIP_PARAM:
4040 	case VGT_TF_MEMORY_BASE:
4041 	case PA_CL_ENHANCE:
4042 	case PA_SU_LINE_STIPPLE_VALUE:
4043 	case PA_SC_LINE_STIPPLE_STATE:
4044 	case PA_SC_ENHANCE:
4045 	case SQC_CACHES:
4046 	case SPI_STATIC_THREAD_MGMT_1:
4047 	case SPI_STATIC_THREAD_MGMT_2:
4048 	case SPI_STATIC_THREAD_MGMT_3:
4049 	case SPI_PS_MAX_WAVE_ID:
4050 	case SPI_CONFIG_CNTL:
4051 	case SPI_CONFIG_CNTL_1:
4052 	case TA_CNTL_AUX:
4053 		return true;
4054 	default:
4055 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4056 		return false;
4057 	}
4058 }
4059 
4060 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4061 				  u32 *ib, struct radeon_cs_packet *pkt)
4062 {
4063 	switch (pkt->opcode) {
4064 	case PACKET3_NOP:
4065 	case PACKET3_SET_BASE:
4066 	case PACKET3_SET_CE_DE_COUNTERS:
4067 	case PACKET3_LOAD_CONST_RAM:
4068 	case PACKET3_WRITE_CONST_RAM:
4069 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4070 	case PACKET3_DUMP_CONST_RAM:
4071 	case PACKET3_INCREMENT_CE_COUNTER:
4072 	case PACKET3_WAIT_ON_DE_COUNTER:
4073 	case PACKET3_CE_WRITE:
4074 		break;
4075 	default:
4076 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4077 		return -EINVAL;
4078 	}
4079 	return 0;
4080 }
4081 
4082 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4083 				   u32 *ib, struct radeon_cs_packet *pkt)
4084 {
4085 	u32 idx = pkt->idx + 1;
4086 	u32 idx_value = ib[idx];
4087 	u32 start_reg, end_reg, reg, i;
4088 	u32 command, info;
4089 
4090 	switch (pkt->opcode) {
4091 	case PACKET3_NOP:
4092 	case PACKET3_SET_BASE:
4093 	case PACKET3_CLEAR_STATE:
4094 	case PACKET3_INDEX_BUFFER_SIZE:
4095 	case PACKET3_DISPATCH_DIRECT:
4096 	case PACKET3_DISPATCH_INDIRECT:
4097 	case PACKET3_ALLOC_GDS:
4098 	case PACKET3_WRITE_GDS_RAM:
4099 	case PACKET3_ATOMIC_GDS:
4100 	case PACKET3_ATOMIC:
4101 	case PACKET3_OCCLUSION_QUERY:
4102 	case PACKET3_SET_PREDICATION:
4103 	case PACKET3_COND_EXEC:
4104 	case PACKET3_PRED_EXEC:
4105 	case PACKET3_DRAW_INDIRECT:
4106 	case PACKET3_DRAW_INDEX_INDIRECT:
4107 	case PACKET3_INDEX_BASE:
4108 	case PACKET3_DRAW_INDEX_2:
4109 	case PACKET3_CONTEXT_CONTROL:
4110 	case PACKET3_INDEX_TYPE:
4111 	case PACKET3_DRAW_INDIRECT_MULTI:
4112 	case PACKET3_DRAW_INDEX_AUTO:
4113 	case PACKET3_DRAW_INDEX_IMMD:
4114 	case PACKET3_NUM_INSTANCES:
4115 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4116 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4117 	case PACKET3_DRAW_INDEX_OFFSET_2:
4118 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4119 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4120 	case PACKET3_MPEG_INDEX:
4121 	case PACKET3_WAIT_REG_MEM:
4122 	case PACKET3_MEM_WRITE:
4123 	case PACKET3_PFP_SYNC_ME:
4124 	case PACKET3_SURFACE_SYNC:
4125 	case PACKET3_EVENT_WRITE:
4126 	case PACKET3_EVENT_WRITE_EOP:
4127 	case PACKET3_EVENT_WRITE_EOS:
4128 	case PACKET3_SET_CONTEXT_REG:
4129 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4130 	case PACKET3_SET_SH_REG:
4131 	case PACKET3_SET_SH_REG_OFFSET:
4132 	case PACKET3_INCREMENT_DE_COUNTER:
4133 	case PACKET3_WAIT_ON_CE_COUNTER:
4134 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4135 	case PACKET3_ME_WRITE:
4136 		break;
4137 	case PACKET3_COPY_DATA:
4138 		if ((idx_value & 0xf00) == 0) {
4139 			reg = ib[idx + 3] * 4;
4140 			if (!si_vm_reg_valid(reg))
4141 				return -EINVAL;
4142 		}
4143 		break;
4144 	case PACKET3_WRITE_DATA:
4145 		if ((idx_value & 0xf00) == 0) {
4146 			start_reg = ib[idx + 1] * 4;
4147 			if (idx_value & 0x10000) {
4148 				if (!si_vm_reg_valid(start_reg))
4149 					return -EINVAL;
4150 			} else {
4151 				for (i = 0; i < (pkt->count - 2); i++) {
4152 					reg = start_reg + (4 * i);
4153 					if (!si_vm_reg_valid(reg))
4154 						return -EINVAL;
4155 				}
4156 			}
4157 		}
4158 		break;
4159 	case PACKET3_COND_WRITE:
4160 		if (idx_value & 0x100) {
4161 			reg = ib[idx + 5] * 4;
4162 			if (!si_vm_reg_valid(reg))
4163 				return -EINVAL;
4164 		}
4165 		break;
4166 	case PACKET3_COPY_DW:
4167 		if (idx_value & 0x2) {
4168 			reg = ib[idx + 3] * 4;
4169 			if (!si_vm_reg_valid(reg))
4170 				return -EINVAL;
4171 		}
4172 		break;
4173 	case PACKET3_SET_CONFIG_REG:
4174 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4175 		end_reg = 4 * pkt->count + start_reg - 4;
4176 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4177 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4178 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4179 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4180 			return -EINVAL;
4181 		}
4182 		for (i = 0; i < pkt->count; i++) {
4183 			reg = start_reg + (4 * i);
4184 			if (!si_vm_reg_valid(reg))
4185 				return -EINVAL;
4186 		}
4187 		break;
4188 	case PACKET3_CP_DMA:
4189 		command = ib[idx + 4];
4190 		info = ib[idx + 1];
4191 		if (command & PACKET3_CP_DMA_CMD_SAS) {
4192 			/* src address space is register */
4193 			if (((info & 0x60000000) >> 29) == 0) {
4194 				start_reg = idx_value << 2;
4195 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
4196 					reg = start_reg;
4197 					if (!si_vm_reg_valid(reg)) {
4198 						DRM_ERROR("CP DMA Bad SRC register\n");
4199 						return -EINVAL;
4200 					}
4201 				} else {
4202 					for (i = 0; i < (command & 0x1fffff); i++) {
4203 						reg = start_reg + (4 * i);
4204 						if (!si_vm_reg_valid(reg)) {
4205 							DRM_ERROR("CP DMA Bad SRC register\n");
4206 							return -EINVAL;
4207 						}
4208 					}
4209 				}
4210 			}
4211 		}
4212 		if (command & PACKET3_CP_DMA_CMD_DAS) {
4213 			/* dst address space is register */
4214 			if (((info & 0x00300000) >> 20) == 0) {
4215 				start_reg = ib[idx + 2];
4216 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
4217 					reg = start_reg;
4218 					if (!si_vm_reg_valid(reg)) {
4219 						DRM_ERROR("CP DMA Bad DST register\n");
4220 						return -EINVAL;
4221 					}
4222 				} else {
4223 					for (i = 0; i < (command & 0x1fffff); i++) {
4224 						reg = start_reg + (4 * i);
4225 						if (!si_vm_reg_valid(reg)) {
4226 							DRM_ERROR("CP DMA Bad DST register\n");
4227 							return -EINVAL;
4228 						}
4229 					}
4230 				}
4231 			}
4232 		}
4233 		break;
4234 	default:
4235 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4236 		return -EINVAL;
4237 	}
4238 	return 0;
4239 }
4240 
4241 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4242 				       u32 *ib, struct radeon_cs_packet *pkt)
4243 {
4244 	u32 idx = pkt->idx + 1;
4245 	u32 idx_value = ib[idx];
4246 	u32 start_reg, reg, i;
4247 
4248 	switch (pkt->opcode) {
4249 	case PACKET3_NOP:
4250 	case PACKET3_SET_BASE:
4251 	case PACKET3_CLEAR_STATE:
4252 	case PACKET3_DISPATCH_DIRECT:
4253 	case PACKET3_DISPATCH_INDIRECT:
4254 	case PACKET3_ALLOC_GDS:
4255 	case PACKET3_WRITE_GDS_RAM:
4256 	case PACKET3_ATOMIC_GDS:
4257 	case PACKET3_ATOMIC:
4258 	case PACKET3_OCCLUSION_QUERY:
4259 	case PACKET3_SET_PREDICATION:
4260 	case PACKET3_COND_EXEC:
4261 	case PACKET3_PRED_EXEC:
4262 	case PACKET3_CONTEXT_CONTROL:
4263 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4264 	case PACKET3_WAIT_REG_MEM:
4265 	case PACKET3_MEM_WRITE:
4266 	case PACKET3_PFP_SYNC_ME:
4267 	case PACKET3_SURFACE_SYNC:
4268 	case PACKET3_EVENT_WRITE:
4269 	case PACKET3_EVENT_WRITE_EOP:
4270 	case PACKET3_EVENT_WRITE_EOS:
4271 	case PACKET3_SET_CONTEXT_REG:
4272 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4273 	case PACKET3_SET_SH_REG:
4274 	case PACKET3_SET_SH_REG_OFFSET:
4275 	case PACKET3_INCREMENT_DE_COUNTER:
4276 	case PACKET3_WAIT_ON_CE_COUNTER:
4277 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4278 	case PACKET3_ME_WRITE:
4279 		break;
4280 	case PACKET3_COPY_DATA:
4281 		if ((idx_value & 0xf00) == 0) {
4282 			reg = ib[idx + 3] * 4;
4283 			if (!si_vm_reg_valid(reg))
4284 				return -EINVAL;
4285 		}
4286 		break;
4287 	case PACKET3_WRITE_DATA:
4288 		if ((idx_value & 0xf00) == 0) {
4289 			start_reg = ib[idx + 1] * 4;
4290 			if (idx_value & 0x10000) {
4291 				if (!si_vm_reg_valid(start_reg))
4292 					return -EINVAL;
4293 			} else {
4294 				for (i = 0; i < (pkt->count - 2); i++) {
4295 					reg = start_reg + (4 * i);
4296 					if (!si_vm_reg_valid(reg))
4297 						return -EINVAL;
4298 				}
4299 			}
4300 		}
4301 		break;
4302 	case PACKET3_COND_WRITE:
4303 		if (idx_value & 0x100) {
4304 			reg = ib[idx + 5] * 4;
4305 			if (!si_vm_reg_valid(reg))
4306 				return -EINVAL;
4307 		}
4308 		break;
4309 	case PACKET3_COPY_DW:
4310 		if (idx_value & 0x2) {
4311 			reg = ib[idx + 3] * 4;
4312 			if (!si_vm_reg_valid(reg))
4313 				return -EINVAL;
4314 		}
4315 		break;
4316 	default:
4317 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4318 		return -EINVAL;
4319 	}
4320 	return 0;
4321 }
4322 
4323 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4324 {
4325 	int ret = 0;
4326 	u32 idx = 0;
4327 	struct radeon_cs_packet pkt;
4328 
4329 	do {
4330 		pkt.idx = idx;
4331 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4332 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4333 		pkt.one_reg_wr = 0;
4334 		switch (pkt.type) {
4335 		case RADEON_PACKET_TYPE0:
4336 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4337 			ret = -EINVAL;
4338 			break;
4339 		case RADEON_PACKET_TYPE2:
4340 			idx += 1;
4341 			break;
4342 		case RADEON_PACKET_TYPE3:
4343 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4344 			if (ib->is_const_ib)
4345 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4346 			else {
4347 				switch (ib->ring) {
4348 				case RADEON_RING_TYPE_GFX_INDEX:
4349 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4350 					break;
4351 				case CAYMAN_RING_TYPE_CP1_INDEX:
4352 				case CAYMAN_RING_TYPE_CP2_INDEX:
4353 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4354 					break;
4355 				default:
4356 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4357 					ret = -EINVAL;
4358 					break;
4359 				}
4360 			}
4361 			idx += pkt.count + 2;
4362 			break;
4363 		default:
4364 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4365 			ret = -EINVAL;
4366 			break;
4367 		}
4368 		if (ret)
4369 			break;
4370 	} while (idx < ib->length_dw);
4371 
4372 	return ret;
4373 }
4374 
4375 /*
4376  * vm
4377  */
4378 int si_vm_init(struct radeon_device *rdev)
4379 {
4380 	/* number of VMs */
4381 	rdev->vm_manager.nvm = 16;
4382 	/* base offset of vram pages */
4383 	rdev->vm_manager.vram_base_offset = 0;
4384 
4385 	return 0;
4386 }
4387 
4388 void si_vm_fini(struct radeon_device *rdev)
4389 {
4390 }
4391 
4392 /**
4393  * si_vm_decode_fault - print human readable fault info
4394  *
4395  * @rdev: radeon_device pointer
4396  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4397  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4398  *
4399  * Print human readable fault information (SI).
4400  */
4401 static void si_vm_decode_fault(struct radeon_device *rdev,
4402 			       u32 status, u32 addr)
4403 {
4404 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4405 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4406 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4407 	char *block;
4408 
4409 	if (rdev->family == CHIP_TAHITI) {
4410 		switch (mc_id) {
4411 		case 160:
4412 		case 144:
4413 		case 96:
4414 		case 80:
4415 		case 224:
4416 		case 208:
4417 		case 32:
4418 		case 16:
4419 			block = "CB";
4420 			break;
4421 		case 161:
4422 		case 145:
4423 		case 97:
4424 		case 81:
4425 		case 225:
4426 		case 209:
4427 		case 33:
4428 		case 17:
4429 			block = "CB_FMASK";
4430 			break;
4431 		case 162:
4432 		case 146:
4433 		case 98:
4434 		case 82:
4435 		case 226:
4436 		case 210:
4437 		case 34:
4438 		case 18:
4439 			block = "CB_CMASK";
4440 			break;
4441 		case 163:
4442 		case 147:
4443 		case 99:
4444 		case 83:
4445 		case 227:
4446 		case 211:
4447 		case 35:
4448 		case 19:
4449 			block = "CB_IMMED";
4450 			break;
4451 		case 164:
4452 		case 148:
4453 		case 100:
4454 		case 84:
4455 		case 228:
4456 		case 212:
4457 		case 36:
4458 		case 20:
4459 			block = "DB";
4460 			break;
4461 		case 165:
4462 		case 149:
4463 		case 101:
4464 		case 85:
4465 		case 229:
4466 		case 213:
4467 		case 37:
4468 		case 21:
4469 			block = "DB_HTILE";
4470 			break;
4471 		case 167:
4472 		case 151:
4473 		case 103:
4474 		case 87:
4475 		case 231:
4476 		case 215:
4477 		case 39:
4478 		case 23:
4479 			block = "DB_STEN";
4480 			break;
4481 		case 72:
4482 		case 68:
4483 		case 64:
4484 		case 8:
4485 		case 4:
4486 		case 0:
4487 		case 136:
4488 		case 132:
4489 		case 128:
4490 		case 200:
4491 		case 196:
4492 		case 192:
4493 			block = "TC";
4494 			break;
4495 		case 112:
4496 		case 48:
4497 			block = "CP";
4498 			break;
4499 		case 49:
4500 		case 177:
4501 		case 50:
4502 		case 178:
4503 			block = "SH";
4504 			break;
4505 		case 53:
4506 		case 190:
4507 			block = "VGT";
4508 			break;
4509 		case 117:
4510 			block = "IH";
4511 			break;
4512 		case 51:
4513 		case 115:
4514 			block = "RLC";
4515 			break;
4516 		case 119:
4517 		case 183:
4518 			block = "DMA0";
4519 			break;
4520 		case 61:
4521 			block = "DMA1";
4522 			break;
4523 		case 248:
4524 		case 120:
4525 			block = "HDP";
4526 			break;
4527 		default:
4528 			block = "unknown";
4529 			break;
4530 		}
4531 	} else {
4532 		switch (mc_id) {
4533 		case 32:
4534 		case 16:
4535 		case 96:
4536 		case 80:
4537 		case 160:
4538 		case 144:
4539 		case 224:
4540 		case 208:
4541 			block = "CB";
4542 			break;
4543 		case 33:
4544 		case 17:
4545 		case 97:
4546 		case 81:
4547 		case 161:
4548 		case 145:
4549 		case 225:
4550 		case 209:
4551 			block = "CB_FMASK";
4552 			break;
4553 		case 34:
4554 		case 18:
4555 		case 98:
4556 		case 82:
4557 		case 162:
4558 		case 146:
4559 		case 226:
4560 		case 210:
4561 			block = "CB_CMASK";
4562 			break;
4563 		case 35:
4564 		case 19:
4565 		case 99:
4566 		case 83:
4567 		case 163:
4568 		case 147:
4569 		case 227:
4570 		case 211:
4571 			block = "CB_IMMED";
4572 			break;
4573 		case 36:
4574 		case 20:
4575 		case 100:
4576 		case 84:
4577 		case 164:
4578 		case 148:
4579 		case 228:
4580 		case 212:
4581 			block = "DB";
4582 			break;
4583 		case 37:
4584 		case 21:
4585 		case 101:
4586 		case 85:
4587 		case 165:
4588 		case 149:
4589 		case 229:
4590 		case 213:
4591 			block = "DB_HTILE";
4592 			break;
4593 		case 39:
4594 		case 23:
4595 		case 103:
4596 		case 87:
4597 		case 167:
4598 		case 151:
4599 		case 231:
4600 		case 215:
4601 			block = "DB_STEN";
4602 			break;
4603 		case 72:
4604 		case 68:
4605 		case 8:
4606 		case 4:
4607 		case 136:
4608 		case 132:
4609 		case 200:
4610 		case 196:
4611 			block = "TC";
4612 			break;
4613 		case 112:
4614 		case 48:
4615 			block = "CP";
4616 			break;
4617 		case 49:
4618 		case 177:
4619 		case 50:
4620 		case 178:
4621 			block = "SH";
4622 			break;
4623 		case 53:
4624 			block = "VGT";
4625 			break;
4626 		case 117:
4627 			block = "IH";
4628 			break;
4629 		case 51:
4630 		case 115:
4631 			block = "RLC";
4632 			break;
4633 		case 119:
4634 		case 183:
4635 			block = "DMA0";
4636 			break;
4637 		case 61:
4638 			block = "DMA1";
4639 			break;
4640 		case 248:
4641 		case 120:
4642 			block = "HDP";
4643 			break;
4644 		default:
4645 			block = "unknown";
4646 			break;
4647 		}
4648 	}
4649 
4650 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4651 	       protections, vmid, addr,
4652 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4653 	       block, mc_id);
4654 }
4655 
4656 /**
4657  * si_vm_set_page - update the page tables using the CP
4658  *
4659  * @rdev: radeon_device pointer
4660  * @ib: indirect buffer to fill with commands
4661  * @pe: addr of the page entry
4662  * @addr: dst addr to write into pe
4663  * @count: number of page entries to update
4664  * @incr: increase next addr by incr bytes
4665  * @flags: access flags
4666  *
4667  * Update the page tables using the CP (SI).
4668  */
4669 void si_vm_set_page(struct radeon_device *rdev,
4670 		    struct radeon_ib *ib,
4671 		    uint64_t pe,
4672 		    uint64_t addr, unsigned count,
4673 		    uint32_t incr, uint32_t flags)
4674 {
4675 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4676 	uint64_t value;
4677 	unsigned ndw;
4678 
4679 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4680 		while (count) {
4681 			ndw = 2 + count * 2;
4682 			if (ndw > 0x3FFE)
4683 				ndw = 0x3FFE;
4684 
4685 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4686 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4687 					WRITE_DATA_DST_SEL(1));
4688 			ib->ptr[ib->length_dw++] = pe;
4689 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4690 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4691 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4692 					value = radeon_vm_map_gart(rdev, addr);
4693 					value &= 0xFFFFFFFFFFFFF000ULL;
4694 				} else if (flags & RADEON_VM_PAGE_VALID) {
4695 					value = addr;
4696 				} else {
4697 					value = 0;
4698 				}
4699 				addr += incr;
4700 				value |= r600_flags;
4701 				ib->ptr[ib->length_dw++] = value;
4702 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4703 			}
4704 		}
4705 	} else {
4706 		/* DMA */
4707 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4708 			while (count) {
4709 				ndw = count * 2;
4710 				if (ndw > 0xFFFFE)
4711 					ndw = 0xFFFFE;
4712 
4713 				/* for non-physically contiguous pages (system) */
4714 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4715 				ib->ptr[ib->length_dw++] = pe;
4716 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4717 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4718 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4719 						value = radeon_vm_map_gart(rdev, addr);
4720 						value &= 0xFFFFFFFFFFFFF000ULL;
4721 					} else if (flags & RADEON_VM_PAGE_VALID) {
4722 						value = addr;
4723 					} else {
4724 						value = 0;
4725 					}
4726 					addr += incr;
4727 					value |= r600_flags;
4728 					ib->ptr[ib->length_dw++] = value;
4729 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4730 				}
4731 			}
4732 		} else {
4733 			while (count) {
4734 				ndw = count * 2;
4735 				if (ndw > 0xFFFFE)
4736 					ndw = 0xFFFFE;
4737 
4738 				if (flags & RADEON_VM_PAGE_VALID)
4739 					value = addr;
4740 				else
4741 					value = 0;
4742 				/* for physically contiguous pages (vram) */
4743 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4744 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4745 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4746 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4747 				ib->ptr[ib->length_dw++] = 0;
4748 				ib->ptr[ib->length_dw++] = value; /* value */
4749 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4750 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4751 				ib->ptr[ib->length_dw++] = 0;
4752 				pe += ndw * 4;
4753 				addr += (ndw / 2) * incr;
4754 				count -= ndw / 2;
4755 			}
4756 		}
4757 		while (ib->length_dw & 0x7)
4758 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4759 	}
4760 }
4761 
4762 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4763 {
4764 	struct radeon_ring *ring = &rdev->ring[ridx];
4765 
4766 	if (vm == NULL)
4767 		return;
4768 
4769 	/* write new base address */
4770 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772 				 WRITE_DATA_DST_SEL(0)));
4773 
4774 	if (vm->id < 8) {
4775 		radeon_ring_write(ring,
4776 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4777 	} else {
4778 		radeon_ring_write(ring,
4779 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4780 	}
4781 	radeon_ring_write(ring, 0);
4782 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4783 
4784 	/* flush hdp cache */
4785 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4786 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4787 				 WRITE_DATA_DST_SEL(0)));
4788 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4789 	radeon_ring_write(ring, 0);
4790 	radeon_ring_write(ring, 0x1);
4791 
4792 	/* bits 0-15 are the VM contexts0-15 */
4793 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795 				 WRITE_DATA_DST_SEL(0)));
4796 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4797 	radeon_ring_write(ring, 0);
4798 	radeon_ring_write(ring, 1 << vm->id);
4799 
4800 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4801 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4802 	radeon_ring_write(ring, 0x0);
4803 }
4804 
4805 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4806 {
4807 	struct radeon_ring *ring = &rdev->ring[ridx];
4808 
4809 	if (vm == NULL)
4810 		return;
4811 
4812 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4813 	if (vm->id < 8) {
4814 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4815 	} else {
4816 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4817 	}
4818 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4819 
4820 	/* flush hdp cache */
4821 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4822 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4823 	radeon_ring_write(ring, 1);
4824 
4825 	/* bits 0-7 are the VM contexts0-7 */
4826 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4827 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4828 	radeon_ring_write(ring, 1 << vm->id);
4829 }
4830 
4831 /*
4832  *  Power and clock gating
4833  */
4834 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4835 {
4836 	int i;
4837 
4838 	for (i = 0; i < rdev->usec_timeout; i++) {
4839 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4840 			break;
4841 		udelay(1);
4842 	}
4843 
4844 	for (i = 0; i < rdev->usec_timeout; i++) {
4845 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4846 			break;
4847 		udelay(1);
4848 	}
4849 }
4850 
4851 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4852 					 bool enable)
4853 {
4854 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4855 	u32 mask;
4856 	int i;
4857 
4858 	if (enable)
4859 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4860 	else
4861 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4862 	WREG32(CP_INT_CNTL_RING0, tmp);
4863 
4864 	if (!enable) {
4865 		/* read a gfx register */
4866 		tmp = RREG32(DB_DEPTH_INFO);
4867 
4868 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4869 		for (i = 0; i < rdev->usec_timeout; i++) {
4870 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4871 				break;
4872 			udelay(1);
4873 		}
4874 	}
4875 }
4876 
4877 static void si_set_uvd_dcm(struct radeon_device *rdev,
4878 			   bool sw_mode)
4879 {
4880 	u32 tmp, tmp2;
4881 
4882 	tmp = RREG32(UVD_CGC_CTRL);
4883 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4884 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4885 
4886 	if (sw_mode) {
4887 		tmp &= ~0x7ffff800;
4888 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4889 	} else {
4890 		tmp |= 0x7ffff800;
4891 		tmp2 = 0;
4892 	}
4893 
4894 	WREG32(UVD_CGC_CTRL, tmp);
4895 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4896 }
4897 
4898 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4899 {
4900 	bool hw_mode = true;
4901 
4902 	if (hw_mode) {
4903 		si_set_uvd_dcm(rdev, false);
4904 	} else {
4905 		u32 tmp = RREG32(UVD_CGC_CTRL);
4906 		tmp &= ~DCM;
4907 		WREG32(UVD_CGC_CTRL, tmp);
4908 	}
4909 }
4910 
4911 static u32 si_halt_rlc(struct radeon_device *rdev)
4912 {
4913 	u32 data, orig;
4914 
4915 	orig = data = RREG32(RLC_CNTL);
4916 
4917 	if (data & RLC_ENABLE) {
4918 		data &= ~RLC_ENABLE;
4919 		WREG32(RLC_CNTL, data);
4920 
4921 		si_wait_for_rlc_serdes(rdev);
4922 	}
4923 
4924 	return orig;
4925 }
4926 
4927 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4928 {
4929 	u32 tmp;
4930 
4931 	tmp = RREG32(RLC_CNTL);
4932 	if (tmp != rlc)
4933 		WREG32(RLC_CNTL, rlc);
4934 }
4935 
4936 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4937 {
4938 	u32 data, orig;
4939 
4940 	orig = data = RREG32(DMA_PG);
4941 	if (enable)
4942 		data |= PG_CNTL_ENABLE;
4943 	else
4944 		data &= ~PG_CNTL_ENABLE;
4945 	if (orig != data)
4946 		WREG32(DMA_PG, data);
4947 }
4948 
4949 static void si_init_dma_pg(struct radeon_device *rdev)
4950 {
4951 	u32 tmp;
4952 
4953 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4954 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4955 
4956 	for (tmp = 0; tmp < 5; tmp++)
4957 		WREG32(DMA_PGFSM_WRITE, 0);
4958 }
4959 
4960 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4961 			       bool enable)
4962 {
4963 	u32 tmp;
4964 
4965 	if (enable) {
4966 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4967 		WREG32(RLC_TTOP_D, tmp);
4968 
4969 		tmp = RREG32(RLC_PG_CNTL);
4970 		tmp |= GFX_PG_ENABLE;
4971 		WREG32(RLC_PG_CNTL, tmp);
4972 
4973 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4974 		tmp |= AUTO_PG_EN;
4975 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4976 	} else {
4977 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4978 		tmp &= ~AUTO_PG_EN;
4979 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4980 
4981 		tmp = RREG32(DB_RENDER_CONTROL);
4982 	}
4983 }
4984 
4985 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4986 {
4987 	u32 tmp;
4988 
4989 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4990 
4991 	tmp = RREG32(RLC_PG_CNTL);
4992 	tmp |= GFX_PG_SRC;
4993 	WREG32(RLC_PG_CNTL, tmp);
4994 
4995 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4996 
4997 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4998 
4999 	tmp &= ~GRBM_REG_SGIT_MASK;
5000 	tmp |= GRBM_REG_SGIT(0x700);
5001 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5002 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5003 }
5004 
5005 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5006 {
5007 	u32 mask = 0, tmp, tmp1;
5008 	int i;
5009 
5010 	si_select_se_sh(rdev, se, sh);
5011 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5012 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5013 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5014 
5015 	tmp &= 0xffff0000;
5016 
5017 	tmp |= tmp1;
5018 	tmp >>= 16;
5019 
5020 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5021 		mask <<= 1;
5022 		mask |= 1;
5023 	}
5024 
5025 	return (~tmp) & mask;
5026 }
5027 
5028 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5029 {
5030 	u32 i, j, k, active_cu_number = 0;
5031 	u32 mask, counter, cu_bitmap;
5032 	u32 tmp = 0;
5033 
5034 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5035 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5036 			mask = 1;
5037 			cu_bitmap = 0;
5038 			counter  = 0;
5039 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5040 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5041 					if (counter < 2)
5042 						cu_bitmap |= mask;
5043 					counter++;
5044 				}
5045 				mask <<= 1;
5046 			}
5047 
5048 			active_cu_number += counter;
5049 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5050 		}
5051 	}
5052 
5053 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5054 
5055 	tmp = RREG32(RLC_MAX_PG_CU);
5056 	tmp &= ~MAX_PU_CU_MASK;
5057 	tmp |= MAX_PU_CU(active_cu_number);
5058 	WREG32(RLC_MAX_PG_CU, tmp);
5059 }
5060 
5061 static void si_enable_cgcg(struct radeon_device *rdev,
5062 			   bool enable)
5063 {
5064 	u32 data, orig, tmp;
5065 
5066 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5067 
5068 	si_enable_gui_idle_interrupt(rdev, enable);
5069 
5070 	if (enable) {
5071 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5072 
5073 		tmp = si_halt_rlc(rdev);
5074 
5075 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5076 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5077 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5078 
5079 		si_wait_for_rlc_serdes(rdev);
5080 
5081 		si_update_rlc(rdev, tmp);
5082 
5083 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5084 
5085 		data |= CGCG_EN | CGLS_EN;
5086 	} else {
5087 		RREG32(CB_CGTT_SCLK_CTRL);
5088 		RREG32(CB_CGTT_SCLK_CTRL);
5089 		RREG32(CB_CGTT_SCLK_CTRL);
5090 		RREG32(CB_CGTT_SCLK_CTRL);
5091 
5092 		data &= ~(CGCG_EN | CGLS_EN);
5093 	}
5094 
5095 	if (orig != data)
5096 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5097 }
5098 
5099 static void si_enable_mgcg(struct radeon_device *rdev,
5100 			   bool enable)
5101 {
5102 	u32 data, orig, tmp = 0;
5103 
5104 	if (enable) {
5105 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5106 		data = 0x96940200;
5107 		if (orig != data)
5108 			WREG32(CGTS_SM_CTRL_REG, data);
5109 
5110 		orig = data = RREG32(CP_MEM_SLP_CNTL);
5111 		data |= CP_MEM_LS_EN;
5112 		if (orig != data)
5113 			WREG32(CP_MEM_SLP_CNTL, data);
5114 
5115 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5116 		data &= 0xffffffc0;
5117 		if (orig != data)
5118 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5119 
5120 		tmp = si_halt_rlc(rdev);
5121 
5122 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5123 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5124 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5125 
5126 		si_update_rlc(rdev, tmp);
5127 	} else {
5128 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5129 		data |= 0x00000003;
5130 		if (orig != data)
5131 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5132 
5133 		data = RREG32(CP_MEM_SLP_CNTL);
5134 		if (data & CP_MEM_LS_EN) {
5135 			data &= ~CP_MEM_LS_EN;
5136 			WREG32(CP_MEM_SLP_CNTL, data);
5137 		}
5138 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5139 		data |= LS_OVERRIDE | OVERRIDE;
5140 		if (orig != data)
5141 			WREG32(CGTS_SM_CTRL_REG, data);
5142 
5143 		tmp = si_halt_rlc(rdev);
5144 
5145 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5146 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5147 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5148 
5149 		si_update_rlc(rdev, tmp);
5150 	}
5151 }
5152 
5153 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5154 			       bool enable)
5155 {
5156 	u32 orig, data, tmp;
5157 
5158 	if (enable) {
5159 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5160 		tmp |= 0x3fff;
5161 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5162 
5163 		orig = data = RREG32(UVD_CGC_CTRL);
5164 		data |= DCM;
5165 		if (orig != data)
5166 			WREG32(UVD_CGC_CTRL, data);
5167 
5168 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5169 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5170 	} else {
5171 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5172 		tmp &= ~0x3fff;
5173 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5174 
5175 		orig = data = RREG32(UVD_CGC_CTRL);
5176 		data &= ~DCM;
5177 		if (orig != data)
5178 			WREG32(UVD_CGC_CTRL, data);
5179 
5180 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5181 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5182 	}
5183 }
5184 
5185 static const u32 mc_cg_registers[] =
5186 {
5187 	MC_HUB_MISC_HUB_CG,
5188 	MC_HUB_MISC_SIP_CG,
5189 	MC_HUB_MISC_VM_CG,
5190 	MC_XPB_CLK_GAT,
5191 	ATC_MISC_CG,
5192 	MC_CITF_MISC_WR_CG,
5193 	MC_CITF_MISC_RD_CG,
5194 	MC_CITF_MISC_VM_CG,
5195 	VM_L2_CG,
5196 };
5197 
5198 static void si_enable_mc_ls(struct radeon_device *rdev,
5199 			    bool enable)
5200 {
5201 	int i;
5202 	u32 orig, data;
5203 
5204 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5205 		orig = data = RREG32(mc_cg_registers[i]);
5206 		if (enable)
5207 			data |= MC_LS_ENABLE;
5208 		else
5209 			data &= ~MC_LS_ENABLE;
5210 		if (data != orig)
5211 			WREG32(mc_cg_registers[i], data);
5212 	}
5213 }
5214 
5215 
5216 static void si_init_cg(struct radeon_device *rdev)
5217 {
5218 	bool has_uvd = true;
5219 
5220 	si_enable_mgcg(rdev, true);
5221 	si_enable_cgcg(rdev, true);
5222 	/* disable MC LS on Tahiti */
5223 	if (rdev->family == CHIP_TAHITI)
5224 		si_enable_mc_ls(rdev, false);
5225 	if (has_uvd) {
5226 		si_enable_uvd_mgcg(rdev, true);
5227 		si_init_uvd_internal_cg(rdev);
5228 	}
5229 }
5230 
5231 static void si_fini_cg(struct radeon_device *rdev)
5232 {
5233 	bool has_uvd = true;
5234 
5235 	if (has_uvd)
5236 		si_enable_uvd_mgcg(rdev, false);
5237 	si_enable_cgcg(rdev, false);
5238 	si_enable_mgcg(rdev, false);
5239 }
5240 
5241 static void si_init_pg(struct radeon_device *rdev)
5242 {
5243 	bool has_pg = false;
5244 
5245 	/* only cape verde supports PG */
5246 	if (rdev->family == CHIP_VERDE)
5247 		has_pg = true;
5248 
5249 	if (has_pg) {
5250 		si_init_ao_cu_mask(rdev);
5251 		si_init_dma_pg(rdev);
5252 		si_enable_dma_pg(rdev, true);
5253 		si_init_gfx_cgpg(rdev);
5254 		si_enable_gfx_cgpg(rdev, true);
5255 	} else {
5256 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5257 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5258 	}
5259 }
5260 
5261 static void si_fini_pg(struct radeon_device *rdev)
5262 {
5263 	bool has_pg = false;
5264 
5265 	/* only cape verde supports PG */
5266 	if (rdev->family == CHIP_VERDE)
5267 		has_pg = true;
5268 
5269 	if (has_pg) {
5270 		si_enable_dma_pg(rdev, false);
5271 		si_enable_gfx_cgpg(rdev, false);
5272 	}
5273 }
5274 
5275 /*
5276  * RLC
5277  */
5278 void si_rlc_fini(struct radeon_device *rdev)
5279 {
5280 	int r;
5281 
5282 	/* save restore block */
5283 	if (rdev->rlc.save_restore_obj) {
5284 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5285 		if (unlikely(r != 0))
5286 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5287 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
5288 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5289 
5290 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
5291 		rdev->rlc.save_restore_obj = NULL;
5292 	}
5293 
5294 	/* clear state block */
5295 	if (rdev->rlc.clear_state_obj) {
5296 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5297 		if (unlikely(r != 0))
5298 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5299 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
5300 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5301 
5302 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
5303 		rdev->rlc.clear_state_obj = NULL;
5304 	}
5305 }
5306 
5307 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5308 
5309 int si_rlc_init(struct radeon_device *rdev)
5310 {
5311 	volatile u32 *dst_ptr;
5312 	u32 dws, data, i, j, k, reg_num;
5313 	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5314 	u64 reg_list_mc_addr;
5315 	const struct cs_section_def *cs_data = si_cs_data;
5316 	int r;
5317 
5318 	/* save restore block */
5319 	if (rdev->rlc.save_restore_obj == NULL) {
5320 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5321 				     RADEON_GEM_DOMAIN_VRAM, NULL,
5322 				     &rdev->rlc.save_restore_obj);
5323 		if (r) {
5324 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5325 			return r;
5326 		}
5327 	}
5328 
5329 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5330 	if (unlikely(r != 0)) {
5331 		si_rlc_fini(rdev);
5332 		return r;
5333 	}
5334 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5335 			  &rdev->rlc.save_restore_gpu_addr);
5336 	if (r) {
5337 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5338 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5339 		si_rlc_fini(rdev);
5340 		return r;
5341 	}
5342 
5343 	if (rdev->family == CHIP_VERDE) {
5344 		r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5345 		if (r) {
5346 			dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5347 			si_rlc_fini(rdev);
5348 		return r;
5349 		}
5350 		/* write the sr buffer */
5351 		dst_ptr = rdev->rlc.sr_ptr;
5352 		for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5353 			dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5354 		}
5355 		radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5356 	}
5357 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5358 
5359 	/* clear state block */
5360 	reg_list_num = 0;
5361 	dws = 0;
5362 	for (i = 0; cs_data[i].section != NULL; i++) {
5363 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5364 			reg_list_num++;
5365 			dws += cs_data[i].section[j].reg_count;
5366 		}
5367 	}
5368 	reg_list_blk_index = (3 * reg_list_num + 2);
5369 	dws += reg_list_blk_index;
5370 
5371 	if (rdev->rlc.clear_state_obj == NULL) {
5372 		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5373 				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5374 		if (r) {
5375 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5376 			si_rlc_fini(rdev);
5377 			return r;
5378 		}
5379 	}
5380 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5381 	if (unlikely(r != 0)) {
5382 		si_rlc_fini(rdev);
5383 		return r;
5384 	}
5385 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5386 			  &rdev->rlc.clear_state_gpu_addr);
5387 	if (r) {
5388 
5389 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5390 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5391 		si_rlc_fini(rdev);
5392 		return r;
5393 	}
5394 	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5395 	if (r) {
5396 		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5397 		si_rlc_fini(rdev);
5398 		return r;
5399 	}
5400 	/* set up the cs buffer */
5401 	dst_ptr = rdev->rlc.cs_ptr;
5402 	reg_list_hdr_blk_index = 0;
5403 	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5404 	data = upper_32_bits(reg_list_mc_addr);
5405 	dst_ptr[reg_list_hdr_blk_index] = data;
5406 	reg_list_hdr_blk_index++;
5407 	for (i = 0; cs_data[i].section != NULL; i++) {
5408 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5409 			reg_num = cs_data[i].section[j].reg_count;
5410 			data = reg_list_mc_addr & 0xffffffff;
5411 			dst_ptr[reg_list_hdr_blk_index] = data;
5412 			reg_list_hdr_blk_index++;
5413 
5414 			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5415 			dst_ptr[reg_list_hdr_blk_index] = data;
5416 			reg_list_hdr_blk_index++;
5417 
5418 			data = 0x08000000 | (reg_num * 4);
5419 			dst_ptr[reg_list_hdr_blk_index] = data;
5420 			reg_list_hdr_blk_index++;
5421 
5422 			for (k = 0; k < reg_num; k++) {
5423 				data = cs_data[i].section[j].extent[k];
5424 				dst_ptr[reg_list_blk_index + k] = data;
5425 			}
5426 			reg_list_mc_addr += reg_num * 4;
5427 			reg_list_blk_index += reg_num;
5428 		}
5429 	}
5430 	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5431 
5432 	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5433 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5434 
5435 	return 0;
5436 }
5437 
5438 static void si_rlc_reset(struct radeon_device *rdev)
5439 {
5440 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5441 
5442 	tmp |= SOFT_RESET_RLC;
5443 	WREG32(GRBM_SOFT_RESET, tmp);
5444 	udelay(50);
5445 	tmp &= ~SOFT_RESET_RLC;
5446 	WREG32(GRBM_SOFT_RESET, tmp);
5447 	udelay(50);
5448 }
5449 
5450 static void si_rlc_stop(struct radeon_device *rdev)
5451 {
5452 	WREG32(RLC_CNTL, 0);
5453 
5454 	si_enable_gui_idle_interrupt(rdev, false);
5455 
5456 	si_wait_for_rlc_serdes(rdev);
5457 }
5458 
5459 static void si_rlc_start(struct radeon_device *rdev)
5460 {
5461 	WREG32(RLC_CNTL, RLC_ENABLE);
5462 
5463 	si_enable_gui_idle_interrupt(rdev, true);
5464 
5465 	udelay(50);
5466 }
5467 
5468 static bool si_lbpw_supported(struct radeon_device *rdev)
5469 {
5470 	u32 tmp;
5471 
5472 	/* Enable LBPW only for DDR3 */
5473 	tmp = RREG32(MC_SEQ_MISC0);
5474 	if ((tmp & 0xF0000000) == 0xB0000000)
5475 		return true;
5476 	return false;
5477 }
5478 
5479 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5480 {
5481 	u32 tmp;
5482 
5483 	tmp = RREG32(RLC_LB_CNTL);
5484 	if (enable)
5485 		tmp |= LOAD_BALANCE_ENABLE;
5486 	else
5487 		tmp &= ~LOAD_BALANCE_ENABLE;
5488 	WREG32(RLC_LB_CNTL, tmp);
5489 
5490 	if (!enable) {
5491 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5492 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5493 	}
5494 }
5495 
5496 static int si_rlc_resume(struct radeon_device *rdev)
5497 {
5498 	u32 i;
5499 	const __be32 *fw_data;
5500 
5501 	if (!rdev->rlc_fw)
5502 		return -EINVAL;
5503 
5504 	si_rlc_stop(rdev);
5505 
5506 	si_rlc_reset(rdev);
5507 
5508 	si_init_pg(rdev);
5509 
5510 	si_init_cg(rdev);
5511 
5512 	WREG32(RLC_RL_BASE, 0);
5513 	WREG32(RLC_RL_SIZE, 0);
5514 	WREG32(RLC_LB_CNTL, 0);
5515 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5516 	WREG32(RLC_LB_CNTR_INIT, 0);
5517 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5518 
5519 	WREG32(RLC_MC_CNTL, 0);
5520 	WREG32(RLC_UCODE_CNTL, 0);
5521 
5522 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5523 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5524 		WREG32(RLC_UCODE_ADDR, i);
5525 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5526 	}
5527 	WREG32(RLC_UCODE_ADDR, 0);
5528 
5529 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5530 
5531 	si_rlc_start(rdev);
5532 
5533 	return 0;
5534 }
5535 
5536 static void si_enable_interrupts(struct radeon_device *rdev)
5537 {
5538 	u32 ih_cntl = RREG32(IH_CNTL);
5539 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5540 
5541 	ih_cntl |= ENABLE_INTR;
5542 	ih_rb_cntl |= IH_RB_ENABLE;
5543 	WREG32(IH_CNTL, ih_cntl);
5544 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5545 	rdev->ih.enabled = true;
5546 }
5547 
5548 static void si_disable_interrupts(struct radeon_device *rdev)
5549 {
5550 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5551 	u32 ih_cntl = RREG32(IH_CNTL);
5552 
5553 	ih_rb_cntl &= ~IH_RB_ENABLE;
5554 	ih_cntl &= ~ENABLE_INTR;
5555 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5556 	WREG32(IH_CNTL, ih_cntl);
5557 	/* set rptr, wptr to 0 */
5558 	WREG32(IH_RB_RPTR, 0);
5559 	WREG32(IH_RB_WPTR, 0);
5560 	rdev->ih.enabled = false;
5561 	rdev->ih.rptr = 0;
5562 }
5563 
5564 static void si_disable_interrupt_state(struct radeon_device *rdev)
5565 {
5566 	u32 tmp;
5567 
5568 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5569 	WREG32(CP_INT_CNTL_RING1, 0);
5570 	WREG32(CP_INT_CNTL_RING2, 0);
5571 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5572 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5573 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5574 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5575 	WREG32(GRBM_INT_CNTL, 0);
5576 	if (rdev->num_crtc >= 2) {
5577 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5578 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5579 	}
5580 	if (rdev->num_crtc >= 4) {
5581 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5582 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5583 	}
5584 	if (rdev->num_crtc >= 6) {
5585 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5586 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5587 	}
5588 
5589 	if (rdev->num_crtc >= 2) {
5590 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5591 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5592 	}
5593 	if (rdev->num_crtc >= 4) {
5594 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5595 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5596 	}
5597 	if (rdev->num_crtc >= 6) {
5598 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5599 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5600 	}
5601 
5602 	if (!ASIC_IS_NODCE(rdev)) {
5603 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5604 
5605 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5606 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5607 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5608 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5609 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5610 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5611 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5612 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5613 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5614 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5615 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5616 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5617 	}
5618 }
5619 
5620 static int si_irq_init(struct radeon_device *rdev)
5621 {
5622 	int ret = 0;
5623 	int rb_bufsz;
5624 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5625 
5626 	/* allocate ring */
5627 	ret = r600_ih_ring_alloc(rdev);
5628 	if (ret)
5629 		return ret;
5630 
5631 	/* disable irqs */
5632 	si_disable_interrupts(rdev);
5633 
5634 	/* init rlc */
5635 	ret = si_rlc_resume(rdev);
5636 	if (ret) {
5637 		r600_ih_ring_fini(rdev);
5638 		return ret;
5639 	}
5640 
5641 	/* setup interrupt control */
5642 	/* set dummy read address to ring address */
5643 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5644 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5645 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5646 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5647 	 */
5648 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5649 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5650 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5651 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5652 
5653 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5654 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5655 
5656 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5657 		      IH_WPTR_OVERFLOW_CLEAR |
5658 		      (rb_bufsz << 1));
5659 
5660 	if (rdev->wb.enabled)
5661 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5662 
5663 	/* set the writeback address whether it's enabled or not */
5664 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5665 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5666 
5667 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5668 
5669 	/* set rptr, wptr to 0 */
5670 	WREG32(IH_RB_RPTR, 0);
5671 	WREG32(IH_RB_WPTR, 0);
5672 
5673 	/* Default settings for IH_CNTL (disabled at first) */
5674 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5675 	/* RPTR_REARM only works if msi's are enabled */
5676 	if (rdev->msi_enabled)
5677 		ih_cntl |= RPTR_REARM;
5678 	WREG32(IH_CNTL, ih_cntl);
5679 
5680 	/* force the active interrupt state to all disabled */
5681 	si_disable_interrupt_state(rdev);
5682 
5683 	pci_set_master(rdev->pdev);
5684 
5685 	/* enable irqs */
5686 	si_enable_interrupts(rdev);
5687 
5688 	return ret;
5689 }
5690 
5691 int si_irq_set(struct radeon_device *rdev)
5692 {
5693 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5694 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5695 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5696 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5697 	u32 grbm_int_cntl = 0;
5698 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5699 	u32 dma_cntl, dma_cntl1;
5700 	u32 thermal_int = 0;
5701 
5702 	if (!rdev->irq.installed) {
5703 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5704 		return -EINVAL;
5705 	}
5706 	/* don't enable anything if the ih is disabled */
5707 	if (!rdev->ih.enabled) {
5708 		si_disable_interrupts(rdev);
5709 		/* force the active interrupt state to all disabled */
5710 		si_disable_interrupt_state(rdev);
5711 		return 0;
5712 	}
5713 
5714 	if (!ASIC_IS_NODCE(rdev)) {
5715 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5717 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5718 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5719 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5720 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5721 	}
5722 
5723 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5724 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5725 
5726 	thermal_int = RREG32(CG_THERMAL_INT) &
5727 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5728 
5729 	/* enable CP interrupts on all rings */
5730 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5731 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5732 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5733 	}
5734 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5735 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5736 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5737 	}
5738 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5739 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5740 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5741 	}
5742 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5743 		DRM_DEBUG("si_irq_set: sw int dma\n");
5744 		dma_cntl |= TRAP_ENABLE;
5745 	}
5746 
5747 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5748 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5749 		dma_cntl1 |= TRAP_ENABLE;
5750 	}
5751 	if (rdev->irq.crtc_vblank_int[0] ||
5752 	    atomic_read(&rdev->irq.pflip[0])) {
5753 		DRM_DEBUG("si_irq_set: vblank 0\n");
5754 		crtc1 |= VBLANK_INT_MASK;
5755 	}
5756 	if (rdev->irq.crtc_vblank_int[1] ||
5757 	    atomic_read(&rdev->irq.pflip[1])) {
5758 		DRM_DEBUG("si_irq_set: vblank 1\n");
5759 		crtc2 |= VBLANK_INT_MASK;
5760 	}
5761 	if (rdev->irq.crtc_vblank_int[2] ||
5762 	    atomic_read(&rdev->irq.pflip[2])) {
5763 		DRM_DEBUG("si_irq_set: vblank 2\n");
5764 		crtc3 |= VBLANK_INT_MASK;
5765 	}
5766 	if (rdev->irq.crtc_vblank_int[3] ||
5767 	    atomic_read(&rdev->irq.pflip[3])) {
5768 		DRM_DEBUG("si_irq_set: vblank 3\n");
5769 		crtc4 |= VBLANK_INT_MASK;
5770 	}
5771 	if (rdev->irq.crtc_vblank_int[4] ||
5772 	    atomic_read(&rdev->irq.pflip[4])) {
5773 		DRM_DEBUG("si_irq_set: vblank 4\n");
5774 		crtc5 |= VBLANK_INT_MASK;
5775 	}
5776 	if (rdev->irq.crtc_vblank_int[5] ||
5777 	    atomic_read(&rdev->irq.pflip[5])) {
5778 		DRM_DEBUG("si_irq_set: vblank 5\n");
5779 		crtc6 |= VBLANK_INT_MASK;
5780 	}
5781 	if (rdev->irq.hpd[0]) {
5782 		DRM_DEBUG("si_irq_set: hpd 1\n");
5783 		hpd1 |= DC_HPDx_INT_EN;
5784 	}
5785 	if (rdev->irq.hpd[1]) {
5786 		DRM_DEBUG("si_irq_set: hpd 2\n");
5787 		hpd2 |= DC_HPDx_INT_EN;
5788 	}
5789 	if (rdev->irq.hpd[2]) {
5790 		DRM_DEBUG("si_irq_set: hpd 3\n");
5791 		hpd3 |= DC_HPDx_INT_EN;
5792 	}
5793 	if (rdev->irq.hpd[3]) {
5794 		DRM_DEBUG("si_irq_set: hpd 4\n");
5795 		hpd4 |= DC_HPDx_INT_EN;
5796 	}
5797 	if (rdev->irq.hpd[4]) {
5798 		DRM_DEBUG("si_irq_set: hpd 5\n");
5799 		hpd5 |= DC_HPDx_INT_EN;
5800 	}
5801 	if (rdev->irq.hpd[5]) {
5802 		DRM_DEBUG("si_irq_set: hpd 6\n");
5803 		hpd6 |= DC_HPDx_INT_EN;
5804 	}
5805 
5806 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5807 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5808 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5809 
5810 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5811 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5812 
5813 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5814 
5815 	if (rdev->irq.dpm_thermal) {
5816 		DRM_DEBUG("dpm thermal\n");
5817 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5818 	}
5819 
5820 	if (rdev->num_crtc >= 2) {
5821 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5822 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5823 	}
5824 	if (rdev->num_crtc >= 4) {
5825 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5826 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5827 	}
5828 	if (rdev->num_crtc >= 6) {
5829 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5830 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5831 	}
5832 
5833 	if (rdev->num_crtc >= 2) {
5834 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5835 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5836 	}
5837 	if (rdev->num_crtc >= 4) {
5838 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5839 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5840 	}
5841 	if (rdev->num_crtc >= 6) {
5842 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5843 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5844 	}
5845 
5846 	if (!ASIC_IS_NODCE(rdev)) {
5847 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5848 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5849 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5850 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5851 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5852 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5853 	}
5854 
5855 	WREG32(CG_THERMAL_INT, thermal_int);
5856 
5857 	return 0;
5858 }
5859 
5860 static inline void si_irq_ack(struct radeon_device *rdev)
5861 {
5862 	u32 tmp;
5863 
5864 	if (ASIC_IS_NODCE(rdev))
5865 		return;
5866 
5867 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5868 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5869 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5870 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5871 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5872 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5873 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5874 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5875 	if (rdev->num_crtc >= 4) {
5876 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5877 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5878 	}
5879 	if (rdev->num_crtc >= 6) {
5880 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5881 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5882 	}
5883 
5884 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5885 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5886 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5887 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5888 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5889 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5890 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5891 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5892 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5893 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5894 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5895 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5896 
5897 	if (rdev->num_crtc >= 4) {
5898 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5899 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5900 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5901 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5902 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5903 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5904 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5905 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5906 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5907 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5908 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5909 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5910 	}
5911 
5912 	if (rdev->num_crtc >= 6) {
5913 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5914 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5915 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5916 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5917 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5918 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5919 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5920 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5921 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5922 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5923 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5924 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5925 	}
5926 
5927 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5928 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5929 		tmp |= DC_HPDx_INT_ACK;
5930 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5931 	}
5932 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5933 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5934 		tmp |= DC_HPDx_INT_ACK;
5935 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5936 	}
5937 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5938 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5939 		tmp |= DC_HPDx_INT_ACK;
5940 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5941 	}
5942 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5943 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5944 		tmp |= DC_HPDx_INT_ACK;
5945 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5946 	}
5947 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5948 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5949 		tmp |= DC_HPDx_INT_ACK;
5950 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5951 	}
5952 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5953 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5954 		tmp |= DC_HPDx_INT_ACK;
5955 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5956 	}
5957 }
5958 
5959 static void si_irq_disable(struct radeon_device *rdev)
5960 {
5961 	si_disable_interrupts(rdev);
5962 	/* Wait and acknowledge irq */
5963 	mdelay(1);
5964 	si_irq_ack(rdev);
5965 	si_disable_interrupt_state(rdev);
5966 }
5967 
5968 static void si_irq_suspend(struct radeon_device *rdev)
5969 {
5970 	si_irq_disable(rdev);
5971 	si_rlc_stop(rdev);
5972 }
5973 
5974 static void si_irq_fini(struct radeon_device *rdev)
5975 {
5976 	si_irq_suspend(rdev);
5977 	r600_ih_ring_fini(rdev);
5978 }
5979 
5980 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5981 {
5982 	u32 wptr, tmp;
5983 
5984 	if (rdev->wb.enabled)
5985 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5986 	else
5987 		wptr = RREG32(IH_RB_WPTR);
5988 
5989 	if (wptr & RB_OVERFLOW) {
5990 		/* When a ring buffer overflow happen start parsing interrupt
5991 		 * from the last not overwritten vector (wptr + 16). Hopefully
5992 		 * this should allow us to catchup.
5993 		 */
5994 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5995 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5996 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5997 		tmp = RREG32(IH_RB_CNTL);
5998 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5999 		WREG32(IH_RB_CNTL, tmp);
6000 	}
6001 	return (wptr & rdev->ih.ptr_mask);
6002 }
6003 
6004 /*        SI IV Ring
6005  * Each IV ring entry is 128 bits:
6006  * [7:0]    - interrupt source id
6007  * [31:8]   - reserved
6008  * [59:32]  - interrupt source data
6009  * [63:60]  - reserved
6010  * [71:64]  - RINGID
6011  * [79:72]  - VMID
6012  * [127:80] - reserved
6013  */
6014 int si_irq_process(struct radeon_device *rdev)
6015 {
6016 	u32 wptr;
6017 	u32 rptr;
6018 	u32 src_id, src_data, ring_id;
6019 	u32 ring_index;
6020 	bool queue_hotplug = false;
6021 	bool queue_thermal = false;
6022 	u32 status, addr;
6023 
6024 	if (!rdev->ih.enabled || rdev->shutdown)
6025 		return IRQ_NONE;
6026 
6027 	wptr = si_get_ih_wptr(rdev);
6028 
6029 restart_ih:
6030 	/* is somebody else already processing irqs? */
6031 	if (atomic_xchg(&rdev->ih.lock, 1))
6032 		return IRQ_NONE;
6033 
6034 	rptr = rdev->ih.rptr;
6035 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6036 
6037 	/* Order reading of wptr vs. reading of IH ring data */
6038 	rmb();
6039 
6040 	/* display interrupts */
6041 	si_irq_ack(rdev);
6042 
6043 	while (rptr != wptr) {
6044 		/* wptr/rptr are in bytes! */
6045 		ring_index = rptr / 4;
6046 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6047 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6048 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6049 
6050 		switch (src_id) {
6051 		case 1: /* D1 vblank/vline */
6052 			switch (src_data) {
6053 			case 0: /* D1 vblank */
6054 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6055 					if (rdev->irq.crtc_vblank_int[0]) {
6056 						drm_handle_vblank(rdev->ddev, 0);
6057 						rdev->pm.vblank_sync = true;
6058 						wake_up(&rdev->irq.vblank_queue);
6059 					}
6060 					if (atomic_read(&rdev->irq.pflip[0]))
6061 						radeon_crtc_handle_flip(rdev, 0);
6062 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6063 					DRM_DEBUG("IH: D1 vblank\n");
6064 				}
6065 				break;
6066 			case 1: /* D1 vline */
6067 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6068 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6069 					DRM_DEBUG("IH: D1 vline\n");
6070 				}
6071 				break;
6072 			default:
6073 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6074 				break;
6075 			}
6076 			break;
6077 		case 2: /* D2 vblank/vline */
6078 			switch (src_data) {
6079 			case 0: /* D2 vblank */
6080 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6081 					if (rdev->irq.crtc_vblank_int[1]) {
6082 						drm_handle_vblank(rdev->ddev, 1);
6083 						rdev->pm.vblank_sync = true;
6084 						wake_up(&rdev->irq.vblank_queue);
6085 					}
6086 					if (atomic_read(&rdev->irq.pflip[1]))
6087 						radeon_crtc_handle_flip(rdev, 1);
6088 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6089 					DRM_DEBUG("IH: D2 vblank\n");
6090 				}
6091 				break;
6092 			case 1: /* D2 vline */
6093 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6094 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6095 					DRM_DEBUG("IH: D2 vline\n");
6096 				}
6097 				break;
6098 			default:
6099 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6100 				break;
6101 			}
6102 			break;
6103 		case 3: /* D3 vblank/vline */
6104 			switch (src_data) {
6105 			case 0: /* D3 vblank */
6106 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6107 					if (rdev->irq.crtc_vblank_int[2]) {
6108 						drm_handle_vblank(rdev->ddev, 2);
6109 						rdev->pm.vblank_sync = true;
6110 						wake_up(&rdev->irq.vblank_queue);
6111 					}
6112 					if (atomic_read(&rdev->irq.pflip[2]))
6113 						radeon_crtc_handle_flip(rdev, 2);
6114 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6115 					DRM_DEBUG("IH: D3 vblank\n");
6116 				}
6117 				break;
6118 			case 1: /* D3 vline */
6119 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6120 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6121 					DRM_DEBUG("IH: D3 vline\n");
6122 				}
6123 				break;
6124 			default:
6125 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6126 				break;
6127 			}
6128 			break;
6129 		case 4: /* D4 vblank/vline */
6130 			switch (src_data) {
6131 			case 0: /* D4 vblank */
6132 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6133 					if (rdev->irq.crtc_vblank_int[3]) {
6134 						drm_handle_vblank(rdev->ddev, 3);
6135 						rdev->pm.vblank_sync = true;
6136 						wake_up(&rdev->irq.vblank_queue);
6137 					}
6138 					if (atomic_read(&rdev->irq.pflip[3]))
6139 						radeon_crtc_handle_flip(rdev, 3);
6140 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6141 					DRM_DEBUG("IH: D4 vblank\n");
6142 				}
6143 				break;
6144 			case 1: /* D4 vline */
6145 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6146 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6147 					DRM_DEBUG("IH: D4 vline\n");
6148 				}
6149 				break;
6150 			default:
6151 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6152 				break;
6153 			}
6154 			break;
6155 		case 5: /* D5 vblank/vline */
6156 			switch (src_data) {
6157 			case 0: /* D5 vblank */
6158 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6159 					if (rdev->irq.crtc_vblank_int[4]) {
6160 						drm_handle_vblank(rdev->ddev, 4);
6161 						rdev->pm.vblank_sync = true;
6162 						wake_up(&rdev->irq.vblank_queue);
6163 					}
6164 					if (atomic_read(&rdev->irq.pflip[4]))
6165 						radeon_crtc_handle_flip(rdev, 4);
6166 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6167 					DRM_DEBUG("IH: D5 vblank\n");
6168 				}
6169 				break;
6170 			case 1: /* D5 vline */
6171 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6172 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6173 					DRM_DEBUG("IH: D5 vline\n");
6174 				}
6175 				break;
6176 			default:
6177 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6178 				break;
6179 			}
6180 			break;
6181 		case 6: /* D6 vblank/vline */
6182 			switch (src_data) {
6183 			case 0: /* D6 vblank */
6184 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6185 					if (rdev->irq.crtc_vblank_int[5]) {
6186 						drm_handle_vblank(rdev->ddev, 5);
6187 						rdev->pm.vblank_sync = true;
6188 						wake_up(&rdev->irq.vblank_queue);
6189 					}
6190 					if (atomic_read(&rdev->irq.pflip[5]))
6191 						radeon_crtc_handle_flip(rdev, 5);
6192 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6193 					DRM_DEBUG("IH: D6 vblank\n");
6194 				}
6195 				break;
6196 			case 1: /* D6 vline */
6197 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6198 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6199 					DRM_DEBUG("IH: D6 vline\n");
6200 				}
6201 				break;
6202 			default:
6203 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6204 				break;
6205 			}
6206 			break;
6207 		case 42: /* HPD hotplug */
6208 			switch (src_data) {
6209 			case 0:
6210 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6211 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6212 					queue_hotplug = true;
6213 					DRM_DEBUG("IH: HPD1\n");
6214 				}
6215 				break;
6216 			case 1:
6217 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6218 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6219 					queue_hotplug = true;
6220 					DRM_DEBUG("IH: HPD2\n");
6221 				}
6222 				break;
6223 			case 2:
6224 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6225 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6226 					queue_hotplug = true;
6227 					DRM_DEBUG("IH: HPD3\n");
6228 				}
6229 				break;
6230 			case 3:
6231 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6232 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6233 					queue_hotplug = true;
6234 					DRM_DEBUG("IH: HPD4\n");
6235 				}
6236 				break;
6237 			case 4:
6238 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6239 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6240 					queue_hotplug = true;
6241 					DRM_DEBUG("IH: HPD5\n");
6242 				}
6243 				break;
6244 			case 5:
6245 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6246 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6247 					queue_hotplug = true;
6248 					DRM_DEBUG("IH: HPD6\n");
6249 				}
6250 				break;
6251 			default:
6252 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6253 				break;
6254 			}
6255 			break;
6256 		case 146:
6257 		case 147:
6258 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6259 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6260 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6261 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6262 				addr);
6263 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6264 				status);
6265 			si_vm_decode_fault(rdev, status, addr);
6266 			/* reset addr and status */
6267 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6268 			break;
6269 		case 176: /* RINGID0 CP_INT */
6270 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6271 			break;
6272 		case 177: /* RINGID1 CP_INT */
6273 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6274 			break;
6275 		case 178: /* RINGID2 CP_INT */
6276 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6277 			break;
6278 		case 181: /* CP EOP event */
6279 			DRM_DEBUG("IH: CP EOP\n");
6280 			switch (ring_id) {
6281 			case 0:
6282 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6283 				break;
6284 			case 1:
6285 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6286 				break;
6287 			case 2:
6288 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6289 				break;
6290 			}
6291 			break;
6292 		case 224: /* DMA trap event */
6293 			DRM_DEBUG("IH: DMA trap\n");
6294 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6295 			break;
6296 		case 230: /* thermal low to high */
6297 			DRM_DEBUG("IH: thermal low to high\n");
6298 			rdev->pm.dpm.thermal.high_to_low = false;
6299 			queue_thermal = true;
6300 			break;
6301 		case 231: /* thermal high to low */
6302 			DRM_DEBUG("IH: thermal high to low\n");
6303 			rdev->pm.dpm.thermal.high_to_low = true;
6304 			queue_thermal = true;
6305 			break;
6306 		case 233: /* GUI IDLE */
6307 			DRM_DEBUG("IH: GUI idle\n");
6308 			break;
6309 		case 244: /* DMA trap event */
6310 			DRM_DEBUG("IH: DMA1 trap\n");
6311 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6312 			break;
6313 		default:
6314 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6315 			break;
6316 		}
6317 
6318 		/* wptr/rptr are in bytes! */
6319 		rptr += 16;
6320 		rptr &= rdev->ih.ptr_mask;
6321 	}
6322 	if (queue_hotplug)
6323 		schedule_work(&rdev->hotplug_work);
6324 	if (queue_thermal && rdev->pm.dpm_enabled)
6325 		schedule_work(&rdev->pm.dpm.thermal.work);
6326 	rdev->ih.rptr = rptr;
6327 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6328 	atomic_set(&rdev->ih.lock, 0);
6329 
6330 	/* make sure wptr hasn't changed while processing */
6331 	wptr = si_get_ih_wptr(rdev);
6332 	if (wptr != rptr)
6333 		goto restart_ih;
6334 
6335 	return IRQ_HANDLED;
6336 }
6337 
6338 /**
6339  * si_copy_dma - copy pages using the DMA engine
6340  *
6341  * @rdev: radeon_device pointer
6342  * @src_offset: src GPU address
6343  * @dst_offset: dst GPU address
6344  * @num_gpu_pages: number of GPU pages to xfer
6345  * @fence: radeon fence object
6346  *
6347  * Copy GPU paging using the DMA engine (SI).
6348  * Used by the radeon ttm implementation to move pages if
6349  * registered as the asic copy callback.
6350  */
6351 int si_copy_dma(struct radeon_device *rdev,
6352 		uint64_t src_offset, uint64_t dst_offset,
6353 		unsigned num_gpu_pages,
6354 		struct radeon_fence **fence)
6355 {
6356 	struct radeon_semaphore *sem = NULL;
6357 	int ring_index = rdev->asic->copy.dma_ring_index;
6358 	struct radeon_ring *ring = &rdev->ring[ring_index];
6359 	u32 size_in_bytes, cur_size_in_bytes;
6360 	int i, num_loops;
6361 	int r = 0;
6362 
6363 	r = radeon_semaphore_create(rdev, &sem);
6364 	if (r) {
6365 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6366 		return r;
6367 	}
6368 
6369 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6370 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6371 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6372 	if (r) {
6373 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6374 		radeon_semaphore_free(rdev, &sem, NULL);
6375 		return r;
6376 	}
6377 
6378 	if (radeon_fence_need_sync(*fence, ring->idx)) {
6379 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6380 					    ring->idx);
6381 		radeon_fence_note_sync(*fence, ring->idx);
6382 	} else {
6383 		radeon_semaphore_free(rdev, &sem, NULL);
6384 	}
6385 
6386 	for (i = 0; i < num_loops; i++) {
6387 		cur_size_in_bytes = size_in_bytes;
6388 		if (cur_size_in_bytes > 0xFFFFF)
6389 			cur_size_in_bytes = 0xFFFFF;
6390 		size_in_bytes -= cur_size_in_bytes;
6391 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6392 		radeon_ring_write(ring, dst_offset & 0xffffffff);
6393 		radeon_ring_write(ring, src_offset & 0xffffffff);
6394 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6395 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6396 		src_offset += cur_size_in_bytes;
6397 		dst_offset += cur_size_in_bytes;
6398 	}
6399 
6400 	r = radeon_fence_emit(rdev, fence, ring->idx);
6401 	if (r) {
6402 		radeon_ring_unlock_undo(rdev, ring);
6403 		return r;
6404 	}
6405 
6406 	radeon_ring_unlock_commit(rdev, ring);
6407 	radeon_semaphore_free(rdev, &sem, *fence);
6408 
6409 	return r;
6410 }
6411 
6412 /*
6413  * startup/shutdown callbacks
6414  */
6415 static int si_startup(struct radeon_device *rdev)
6416 {
6417 	struct radeon_ring *ring;
6418 	int r;
6419 
6420 	/* enable pcie gen2/3 link */
6421 	si_pcie_gen3_enable(rdev);
6422 	/* enable aspm */
6423 	si_program_aspm(rdev);
6424 
6425 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6426 	    !rdev->rlc_fw || !rdev->mc_fw) {
6427 		r = si_init_microcode(rdev);
6428 		if (r) {
6429 			DRM_ERROR("Failed to load firmware!\n");
6430 			return r;
6431 		}
6432 	}
6433 
6434 	r = si_mc_load_microcode(rdev);
6435 	if (r) {
6436 		DRM_ERROR("Failed to load MC firmware!\n");
6437 		return r;
6438 	}
6439 
6440 	r = r600_vram_scratch_init(rdev);
6441 	if (r)
6442 		return r;
6443 
6444 	si_mc_program(rdev);
6445 	r = si_pcie_gart_enable(rdev);
6446 	if (r)
6447 		return r;
6448 	si_gpu_init(rdev);
6449 
6450 	/* allocate rlc buffers */
6451 	r = si_rlc_init(rdev);
6452 	if (r) {
6453 		DRM_ERROR("Failed to init rlc BOs!\n");
6454 		return r;
6455 	}
6456 
6457 	/* allocate wb buffer */
6458 	r = radeon_wb_init(rdev);
6459 	if (r)
6460 		return r;
6461 
6462 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6463 	if (r) {
6464 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6465 		return r;
6466 	}
6467 
6468 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6469 	if (r) {
6470 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6471 		return r;
6472 	}
6473 
6474 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6475 	if (r) {
6476 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6477 		return r;
6478 	}
6479 
6480 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6481 	if (r) {
6482 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6483 		return r;
6484 	}
6485 
6486 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6487 	if (r) {
6488 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6489 		return r;
6490 	}
6491 
6492 	if (rdev->has_uvd) {
6493 		r = rv770_uvd_resume(rdev);
6494 		if (!r) {
6495 			r = radeon_fence_driver_start_ring(rdev,
6496 							   R600_RING_TYPE_UVD_INDEX);
6497 			if (r)
6498 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6499 		}
6500 		if (r)
6501 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6502 	}
6503 
6504 	/* Enable IRQ */
6505 	if (!rdev->irq.installed) {
6506 		r = radeon_irq_kms_init(rdev);
6507 		if (r)
6508 			return r;
6509 	}
6510 
6511 	r = si_irq_init(rdev);
6512 	if (r) {
6513 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6514 		radeon_irq_kms_fini(rdev);
6515 		return r;
6516 	}
6517 	si_irq_set(rdev);
6518 
6519 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6520 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6521 			     CP_RB0_RPTR, CP_RB0_WPTR,
6522 			     0, 0xfffff, RADEON_CP_PACKET2);
6523 	if (r)
6524 		return r;
6525 
6526 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6527 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6528 			     CP_RB1_RPTR, CP_RB1_WPTR,
6529 			     0, 0xfffff, RADEON_CP_PACKET2);
6530 	if (r)
6531 		return r;
6532 
6533 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6534 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6535 			     CP_RB2_RPTR, CP_RB2_WPTR,
6536 			     0, 0xfffff, RADEON_CP_PACKET2);
6537 	if (r)
6538 		return r;
6539 
6540 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6541 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6542 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6543 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6544 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6545 	if (r)
6546 		return r;
6547 
6548 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6549 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6550 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6551 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6552 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6553 	if (r)
6554 		return r;
6555 
6556 	r = si_cp_load_microcode(rdev);
6557 	if (r)
6558 		return r;
6559 	r = si_cp_resume(rdev);
6560 	if (r)
6561 		return r;
6562 
6563 	r = cayman_dma_resume(rdev);
6564 	if (r)
6565 		return r;
6566 
6567 	if (rdev->has_uvd) {
6568 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6569 		if (ring->ring_size) {
6570 			r = radeon_ring_init(rdev, ring, ring->ring_size,
6571 					     R600_WB_UVD_RPTR_OFFSET,
6572 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6573 					     0, 0xfffff, RADEON_CP_PACKET2);
6574 			if (!r)
6575 				r = r600_uvd_init(rdev);
6576 			if (r)
6577 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6578 		}
6579 	}
6580 
6581 	r = radeon_ib_pool_init(rdev);
6582 	if (r) {
6583 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6584 		return r;
6585 	}
6586 
6587 	r = radeon_vm_manager_init(rdev);
6588 	if (r) {
6589 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6590 		return r;
6591 	}
6592 
6593 	return 0;
6594 }
6595 
6596 int si_resume(struct radeon_device *rdev)
6597 {
6598 	int r;
6599 
6600 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6601 	 * posting will perform necessary task to bring back GPU into good
6602 	 * shape.
6603 	 */
6604 	/* post card */
6605 	atom_asic_init(rdev->mode_info.atom_context);
6606 
6607 	/* init golden registers */
6608 	si_init_golden_registers(rdev);
6609 
6610 	rdev->accel_working = true;
6611 	r = si_startup(rdev);
6612 	if (r) {
6613 		DRM_ERROR("si startup failed on resume\n");
6614 		rdev->accel_working = false;
6615 		return r;
6616 	}
6617 
6618 	return r;
6619 
6620 }
6621 
6622 int si_suspend(struct radeon_device *rdev)
6623 {
6624 	radeon_vm_manager_fini(rdev);
6625 	si_cp_enable(rdev, false);
6626 	cayman_dma_stop(rdev);
6627 	if (rdev->has_uvd) {
6628 		r600_uvd_rbc_stop(rdev);
6629 		radeon_uvd_suspend(rdev);
6630 	}
6631 	si_irq_suspend(rdev);
6632 	radeon_wb_disable(rdev);
6633 	si_pcie_gart_disable(rdev);
6634 	return 0;
6635 }
6636 
6637 /* Plan is to move initialization in that function and use
6638  * helper function so that radeon_device_init pretty much
6639  * do nothing more than calling asic specific function. This
6640  * should also allow to remove a bunch of callback function
6641  * like vram_info.
6642  */
6643 int si_init(struct radeon_device *rdev)
6644 {
6645 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6646 	int r;
6647 
6648 	/* Read BIOS */
6649 	if (!radeon_get_bios(rdev)) {
6650 		if (ASIC_IS_AVIVO(rdev))
6651 			return -EINVAL;
6652 	}
6653 	/* Must be an ATOMBIOS */
6654 	if (!rdev->is_atom_bios) {
6655 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6656 		return -EINVAL;
6657 	}
6658 	r = radeon_atombios_init(rdev);
6659 	if (r)
6660 		return r;
6661 
6662 	/* Post card if necessary */
6663 	if (!radeon_card_posted(rdev)) {
6664 		if (!rdev->bios) {
6665 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6666 			return -EINVAL;
6667 		}
6668 		DRM_INFO("GPU not posted. posting now...\n");
6669 		atom_asic_init(rdev->mode_info.atom_context);
6670 	}
6671 	/* init golden registers */
6672 	si_init_golden_registers(rdev);
6673 	/* Initialize scratch registers */
6674 	si_scratch_init(rdev);
6675 	/* Initialize surface registers */
6676 	radeon_surface_init(rdev);
6677 	/* Initialize clocks */
6678 	radeon_get_clock_info(rdev->ddev);
6679 
6680 	/* Fence driver */
6681 	r = radeon_fence_driver_init(rdev);
6682 	if (r)
6683 		return r;
6684 
6685 	/* initialize memory controller */
6686 	r = si_mc_init(rdev);
6687 	if (r)
6688 		return r;
6689 	/* Memory manager */
6690 	r = radeon_bo_init(rdev);
6691 	if (r)
6692 		return r;
6693 
6694 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6695 	ring->ring_obj = NULL;
6696 	r600_ring_init(rdev, ring, 1024 * 1024);
6697 
6698 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6699 	ring->ring_obj = NULL;
6700 	r600_ring_init(rdev, ring, 1024 * 1024);
6701 
6702 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6703 	ring->ring_obj = NULL;
6704 	r600_ring_init(rdev, ring, 1024 * 1024);
6705 
6706 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6707 	ring->ring_obj = NULL;
6708 	r600_ring_init(rdev, ring, 64 * 1024);
6709 
6710 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6711 	ring->ring_obj = NULL;
6712 	r600_ring_init(rdev, ring, 64 * 1024);
6713 
6714 	if (rdev->has_uvd) {
6715 		r = radeon_uvd_init(rdev);
6716 		if (!r) {
6717 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6718 			ring->ring_obj = NULL;
6719 			r600_ring_init(rdev, ring, 4096);
6720 		}
6721 	}
6722 
6723 	rdev->ih.ring_obj = NULL;
6724 	r600_ih_ring_init(rdev, 64 * 1024);
6725 
6726 	r = r600_pcie_gart_init(rdev);
6727 	if (r)
6728 		return r;
6729 
6730 	rdev->accel_working = true;
6731 	r = si_startup(rdev);
6732 	if (r) {
6733 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6734 		si_cp_fini(rdev);
6735 		cayman_dma_fini(rdev);
6736 		si_irq_fini(rdev);
6737 		si_rlc_fini(rdev);
6738 		radeon_wb_fini(rdev);
6739 		radeon_ib_pool_fini(rdev);
6740 		radeon_vm_manager_fini(rdev);
6741 		radeon_irq_kms_fini(rdev);
6742 		si_pcie_gart_fini(rdev);
6743 		rdev->accel_working = false;
6744 	}
6745 
6746 	/* Don't start up if the MC ucode is missing.
6747 	 * The default clocks and voltages before the MC ucode
6748 	 * is loaded are not suffient for advanced operations.
6749 	 */
6750 	if (!rdev->mc_fw) {
6751 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6752 		return -EINVAL;
6753 	}
6754 
6755 	return 0;
6756 }
6757 
6758 void si_fini(struct radeon_device *rdev)
6759 {
6760 	si_cp_fini(rdev);
6761 	cayman_dma_fini(rdev);
6762 	si_irq_fini(rdev);
6763 	si_rlc_fini(rdev);
6764 	si_fini_cg(rdev);
6765 	si_fini_pg(rdev);
6766 	radeon_wb_fini(rdev);
6767 	radeon_vm_manager_fini(rdev);
6768 	radeon_ib_pool_fini(rdev);
6769 	radeon_irq_kms_fini(rdev);
6770 	if (rdev->has_uvd)
6771 		radeon_uvd_fini(rdev);
6772 	si_pcie_gart_fini(rdev);
6773 	r600_vram_scratch_fini(rdev);
6774 	radeon_gem_fini(rdev);
6775 	radeon_fence_driver_fini(rdev);
6776 	radeon_bo_fini(rdev);
6777 	radeon_atombios_fini(rdev);
6778 	kfree(rdev->bios);
6779 	rdev->bios = NULL;
6780 }
6781 
6782 /**
6783  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6784  *
6785  * @rdev: radeon_device pointer
6786  *
6787  * Fetches a GPU clock counter snapshot (SI).
6788  * Returns the 64 bit clock counter snapshot.
6789  */
6790 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6791 {
6792 	uint64_t clock;
6793 
6794 	mutex_lock(&rdev->gpu_clock_mutex);
6795 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6796 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6797 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6798 	mutex_unlock(&rdev->gpu_clock_mutex);
6799 	return clock;
6800 }
6801 
6802 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6803 {
6804 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6805 	int r;
6806 
6807 	/* bypass vclk and dclk with bclk */
6808 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6809 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6810 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6811 
6812 	/* put PLL in bypass mode */
6813 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6814 
6815 	if (!vclk || !dclk) {
6816 		/* keep the Bypass mode, put PLL to sleep */
6817 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6818 		return 0;
6819 	}
6820 
6821 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6822 					  16384, 0x03FFFFFF, 0, 128, 5,
6823 					  &fb_div, &vclk_div, &dclk_div);
6824 	if (r)
6825 		return r;
6826 
6827 	/* set RESET_ANTI_MUX to 0 */
6828 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6829 
6830 	/* set VCO_MODE to 1 */
6831 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6832 
6833 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6834 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6835 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6836 
6837 	/* deassert UPLL_RESET */
6838 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6839 
6840 	mdelay(1);
6841 
6842 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6843 	if (r)
6844 		return r;
6845 
6846 	/* assert UPLL_RESET again */
6847 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6848 
6849 	/* disable spread spectrum. */
6850 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6851 
6852 	/* set feedback divider */
6853 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6854 
6855 	/* set ref divider to 0 */
6856 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6857 
6858 	if (fb_div < 307200)
6859 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6860 	else
6861 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6862 
6863 	/* set PDIV_A and PDIV_B */
6864 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6865 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6866 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6867 
6868 	/* give the PLL some time to settle */
6869 	mdelay(15);
6870 
6871 	/* deassert PLL_RESET */
6872 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6873 
6874 	mdelay(15);
6875 
6876 	/* switch from bypass mode to normal mode */
6877 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6878 
6879 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6880 	if (r)
6881 		return r;
6882 
6883 	/* switch VCLK and DCLK selection */
6884 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6885 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6886 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6887 
6888 	mdelay(100);
6889 
6890 	return 0;
6891 }
6892 
6893 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6894 {
6895 	struct pci_dev *root = rdev->pdev->bus->self;
6896 	int bridge_pos, gpu_pos;
6897 	u32 speed_cntl, mask, current_data_rate;
6898 	int ret, i;
6899 	u16 tmp16;
6900 
6901 	if (radeon_pcie_gen2 == 0)
6902 		return;
6903 
6904 	if (rdev->flags & RADEON_IS_IGP)
6905 		return;
6906 
6907 	if (!(rdev->flags & RADEON_IS_PCIE))
6908 		return;
6909 
6910 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6911 	if (ret != 0)
6912 		return;
6913 
6914 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6915 		return;
6916 
6917 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6918 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6919 		LC_CURRENT_DATA_RATE_SHIFT;
6920 	if (mask & DRM_PCIE_SPEED_80) {
6921 		if (current_data_rate == 2) {
6922 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6923 			return;
6924 		}
6925 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6926 	} else if (mask & DRM_PCIE_SPEED_50) {
6927 		if (current_data_rate == 1) {
6928 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6929 			return;
6930 		}
6931 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6932 	}
6933 
6934 	bridge_pos = pci_pcie_cap(root);
6935 	if (!bridge_pos)
6936 		return;
6937 
6938 	gpu_pos = pci_pcie_cap(rdev->pdev);
6939 	if (!gpu_pos)
6940 		return;
6941 
6942 	if (mask & DRM_PCIE_SPEED_80) {
6943 		/* re-try equalization if gen3 is not already enabled */
6944 		if (current_data_rate != 2) {
6945 			u16 bridge_cfg, gpu_cfg;
6946 			u16 bridge_cfg2, gpu_cfg2;
6947 			u32 max_lw, current_lw, tmp;
6948 
6949 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6950 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6951 
6952 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6953 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6954 
6955 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6956 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6957 
6958 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6959 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6960 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6961 
6962 			if (current_lw < max_lw) {
6963 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6964 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6965 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6966 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6967 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6968 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6969 				}
6970 			}
6971 
6972 			for (i = 0; i < 10; i++) {
6973 				/* check status */
6974 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6975 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6976 					break;
6977 
6978 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6979 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6980 
6981 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6982 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6983 
6984 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6985 				tmp |= LC_SET_QUIESCE;
6986 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6987 
6988 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6989 				tmp |= LC_REDO_EQ;
6990 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6991 
6992 				mdelay(100);
6993 
6994 				/* linkctl */
6995 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6996 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6997 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6998 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6999 
7000 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7001 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7002 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7003 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7004 
7005 				/* linkctl2 */
7006 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7007 				tmp16 &= ~((1 << 4) | (7 << 9));
7008 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7009 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7010 
7011 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7012 				tmp16 &= ~((1 << 4) | (7 << 9));
7013 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7014 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7015 
7016 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7017 				tmp &= ~LC_SET_QUIESCE;
7018 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7019 			}
7020 		}
7021 	}
7022 
7023 	/* set the link speed */
7024 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7025 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7026 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7027 
7028 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7029 	tmp16 &= ~0xf;
7030 	if (mask & DRM_PCIE_SPEED_80)
7031 		tmp16 |= 3; /* gen3 */
7032 	else if (mask & DRM_PCIE_SPEED_50)
7033 		tmp16 |= 2; /* gen2 */
7034 	else
7035 		tmp16 |= 1; /* gen1 */
7036 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7037 
7038 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7039 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7040 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7041 
7042 	for (i = 0; i < rdev->usec_timeout; i++) {
7043 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7044 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7045 			break;
7046 		udelay(1);
7047 	}
7048 }
7049 
7050 static void si_program_aspm(struct radeon_device *rdev)
7051 {
7052 	u32 data, orig;
7053 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7054 	bool disable_clkreq = false;
7055 
7056 	if (radeon_aspm == 0)
7057 		return;
7058 
7059 	if (!(rdev->flags & RADEON_IS_PCIE))
7060 		return;
7061 
7062 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7063 	data &= ~LC_XMIT_N_FTS_MASK;
7064 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7065 	if (orig != data)
7066 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7067 
7068 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7069 	data |= LC_GO_TO_RECOVERY;
7070 	if (orig != data)
7071 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7072 
7073 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7074 	data |= P_IGNORE_EDB_ERR;
7075 	if (orig != data)
7076 		WREG32_PCIE(PCIE_P_CNTL, data);
7077 
7078 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7079 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7080 	data |= LC_PMI_TO_L1_DIS;
7081 	if (!disable_l0s)
7082 		data |= LC_L0S_INACTIVITY(7);
7083 
7084 	if (!disable_l1) {
7085 		data |= LC_L1_INACTIVITY(7);
7086 		data &= ~LC_PMI_TO_L1_DIS;
7087 		if (orig != data)
7088 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7089 
7090 		if (!disable_plloff_in_l1) {
7091 			bool clk_req_support;
7092 
7093 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7094 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7095 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7096 			if (orig != data)
7097 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7098 
7099 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7100 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7101 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7102 			if (orig != data)
7103 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7104 
7105 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7106 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7107 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7108 			if (orig != data)
7109 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7110 
7111 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7112 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7113 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7114 			if (orig != data)
7115 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7116 
7117 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7118 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7119 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7120 				if (orig != data)
7121 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7122 
7123 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7124 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7125 				if (orig != data)
7126 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7127 
7128 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7129 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7130 				if (orig != data)
7131 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7132 
7133 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7134 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7135 				if (orig != data)
7136 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7137 
7138 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7139 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7140 				if (orig != data)
7141 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7142 
7143 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7144 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7145 				if (orig != data)
7146 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7147 
7148 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7149 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7150 				if (orig != data)
7151 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7152 
7153 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7154 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7155 				if (orig != data)
7156 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7157 			}
7158 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7159 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7160 			data |= LC_DYN_LANES_PWR_STATE(3);
7161 			if (orig != data)
7162 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7163 
7164 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7165 			data &= ~LS2_EXIT_TIME_MASK;
7166 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7167 				data |= LS2_EXIT_TIME(5);
7168 			if (orig != data)
7169 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7170 
7171 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7172 			data &= ~LS2_EXIT_TIME_MASK;
7173 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7174 				data |= LS2_EXIT_TIME(5);
7175 			if (orig != data)
7176 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7177 
7178 			if (!disable_clkreq) {
7179 				struct pci_dev *root = rdev->pdev->bus->self;
7180 				u32 lnkcap;
7181 
7182 				clk_req_support = false;
7183 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7184 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7185 					clk_req_support = true;
7186 			} else {
7187 				clk_req_support = false;
7188 			}
7189 
7190 			if (clk_req_support) {
7191 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7192 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7193 				if (orig != data)
7194 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7195 
7196 				orig = data = RREG32(THM_CLK_CNTL);
7197 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7198 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7199 				if (orig != data)
7200 					WREG32(THM_CLK_CNTL, data);
7201 
7202 				orig = data = RREG32(MISC_CLK_CNTL);
7203 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7204 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7205 				if (orig != data)
7206 					WREG32(MISC_CLK_CNTL, data);
7207 
7208 				orig = data = RREG32(CG_CLKPIN_CNTL);
7209 				data &= ~BCLK_AS_XCLK;
7210 				if (orig != data)
7211 					WREG32(CG_CLKPIN_CNTL, data);
7212 
7213 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7214 				data &= ~FORCE_BIF_REFCLK_EN;
7215 				if (orig != data)
7216 					WREG32(CG_CLKPIN_CNTL_2, data);
7217 
7218 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7219 				data &= ~MPLL_CLKOUT_SEL_MASK;
7220 				data |= MPLL_CLKOUT_SEL(4);
7221 				if (orig != data)
7222 					WREG32(MPLL_BYPASSCLK_SEL, data);
7223 
7224 				orig = data = RREG32(SPLL_CNTL_MODE);
7225 				data &= ~SPLL_REFCLK_SEL_MASK;
7226 				if (orig != data)
7227 					WREG32(SPLL_CNTL_MODE, data);
7228 			}
7229 		}
7230 	} else {
7231 		if (orig != data)
7232 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7233 	}
7234 
7235 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7236 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7237 	if (orig != data)
7238 		WREG32_PCIE(PCIE_CNTL2, data);
7239 
7240 	if (!disable_l0s) {
7241 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7242 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7243 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7244 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7245 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7246 				data &= ~LC_L0S_INACTIVITY_MASK;
7247 				if (orig != data)
7248 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7249 			}
7250 		}
7251 	}
7252 }
7253