xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision ee8a99bd)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79 
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82 	(0x8000 << 16) | (0x98f4 >> 2),
83 	0x00000000,
84 	(0x8040 << 16) | (0x98f4 >> 2),
85 	0x00000000,
86 	(0x8000 << 16) | (0xe80 >> 2),
87 	0x00000000,
88 	(0x8040 << 16) | (0xe80 >> 2),
89 	0x00000000,
90 	(0x8000 << 16) | (0x89bc >> 2),
91 	0x00000000,
92 	(0x8040 << 16) | (0x89bc >> 2),
93 	0x00000000,
94 	(0x8000 << 16) | (0x8c1c >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x8c1c >> 2),
97 	0x00000000,
98 	(0x9c00 << 16) | (0x98f0 >> 2),
99 	0x00000000,
100 	(0x9c00 << 16) | (0xe7c >> 2),
101 	0x00000000,
102 	(0x8000 << 16) | (0x9148 >> 2),
103 	0x00000000,
104 	(0x8040 << 16) | (0x9148 >> 2),
105 	0x00000000,
106 	(0x9c00 << 16) | (0x9150 >> 2),
107 	0x00000000,
108 	(0x9c00 << 16) | (0x897c >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x8d8c >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0xac54 >> 2),
113 	0X00000000,
114 	0x3,
115 	(0x9c00 << 16) | (0x98f8 >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x9910 >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0x9914 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9918 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x991c >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x9920 >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x9924 >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9928 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x992c >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9930 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x9934 >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9938 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x993c >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9940 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x9944 >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9948 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x994c >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9950 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x9954 >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9958 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x995c >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9960 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x9964 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9968 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x996c >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9970 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9974 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9978 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x997c >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9980 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9984 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9988 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x998c >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x8c00 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x8c14 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x8c04 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x8c08 >> 2),
188 	0x00000000,
189 	(0x8000 << 16) | (0x9b7c >> 2),
190 	0x00000000,
191 	(0x8040 << 16) | (0x9b7c >> 2),
192 	0x00000000,
193 	(0x8000 << 16) | (0xe84 >> 2),
194 	0x00000000,
195 	(0x8040 << 16) | (0xe84 >> 2),
196 	0x00000000,
197 	(0x8000 << 16) | (0x89c0 >> 2),
198 	0x00000000,
199 	(0x8040 << 16) | (0x89c0 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x914c >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x914c >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0x8c20 >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0x8c20 >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x9354 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x9354 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9060 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9364 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9100 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x913c >> 2),
220 	0x00000000,
221 	(0x8000 << 16) | (0x90e0 >> 2),
222 	0x00000000,
223 	(0x8000 << 16) | (0x90e4 >> 2),
224 	0x00000000,
225 	(0x8000 << 16) | (0x90e8 >> 2),
226 	0x00000000,
227 	(0x8040 << 16) | (0x90e0 >> 2),
228 	0x00000000,
229 	(0x8040 << 16) | (0x90e4 >> 2),
230 	0x00000000,
231 	(0x8040 << 16) | (0x90e8 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x8bcc >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8b24 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x88c4 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8e50 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c0c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8e58 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8e5c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9508 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x950c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9494 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0xac0c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0xac10 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0xac14 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0xae00 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0xac08 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x88d4 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x88c8 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x88cc >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x89b0 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8b10 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8a14 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9830 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9834 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9838 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x9a10 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x9870 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x9874 >> 2),
286 	0x00000000,
287 	(0x8001 << 16) | (0x9870 >> 2),
288 	0x00000000,
289 	(0x8001 << 16) | (0x9874 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x9870 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x9874 >> 2),
294 	0x00000000,
295 	(0x8041 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8041 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	0x00000000
300 };
301 
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304 	0xc424, 0xffffffff, 0x00601005,
305 	0xc47c, 0xffffffff, 0x10104040,
306 	0xc488, 0xffffffff, 0x0100000a,
307 	0xc314, 0xffffffff, 0x00000800,
308 	0xc30c, 0xffffffff, 0x800000f4,
309 	0xf4a8, 0xffffffff, 0x00000000
310 };
311 
312 static const u32 tahiti_golden_registers[] =
313 {
314 	0x9a10, 0x00010000, 0x00018208,
315 	0x9830, 0xffffffff, 0x00000000,
316 	0x9834, 0xf00fffff, 0x00000400,
317 	0x9838, 0x0002021c, 0x00020200,
318 	0xc78, 0x00000080, 0x00000000,
319 	0xd030, 0x000300c0, 0x00800040,
320 	0xd830, 0x000300c0, 0x00800040,
321 	0x5bb0, 0x000000f0, 0x00000070,
322 	0x5bc0, 0x00200000, 0x50100000,
323 	0x7030, 0x31000311, 0x00000011,
324 	0x277c, 0x00000003, 0x000007ff,
325 	0x240c, 0x000007ff, 0x00000000,
326 	0x8a14, 0xf000001f, 0x00000007,
327 	0x8b24, 0xffffffff, 0x00ffffff,
328 	0x8b10, 0x0000ff0f, 0x00000000,
329 	0x28a4c, 0x07ffffff, 0x4e000000,
330 	0x28350, 0x3f3f3fff, 0x2a00126a,
331 	0x30, 0x000000ff, 0x0040,
332 	0x34, 0x00000040, 0x00004040,
333 	0x9100, 0x07ffffff, 0x03000000,
334 	0x8e88, 0x01ff1f3f, 0x00000000,
335 	0x8e84, 0x01ff1f3f, 0x00000000,
336 	0x9060, 0x0000007f, 0x00000020,
337 	0x9508, 0x00010000, 0x00010000,
338 	0xac14, 0x00000200, 0x000002fb,
339 	0xac10, 0xffffffff, 0x0000543b,
340 	0xac0c, 0xffffffff, 0xa9210876,
341 	0x88d0, 0xffffffff, 0x000fff40,
342 	0x88d4, 0x0000001f, 0x00000010,
343 	0x1410, 0x20000000, 0x20fffed8,
344 	0x15c0, 0x000c0fc0, 0x000c0400
345 };
346 
347 static const u32 tahiti_golden_registers2[] =
348 {
349 	0xc64, 0x00000001, 0x00000001
350 };
351 
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354 	0xc424, 0xffffffff, 0x00601004,
355 	0xc47c, 0xffffffff, 0x10102020,
356 	0xc488, 0xffffffff, 0x01000020,
357 	0xc314, 0xffffffff, 0x00000800,
358 	0xc30c, 0xffffffff, 0x800000a4
359 };
360 
361 static const u32 pitcairn_golden_registers[] =
362 {
363 	0x9a10, 0x00010000, 0x00018208,
364 	0x9830, 0xffffffff, 0x00000000,
365 	0x9834, 0xf00fffff, 0x00000400,
366 	0x9838, 0x0002021c, 0x00020200,
367 	0xc78, 0x00000080, 0x00000000,
368 	0xd030, 0x000300c0, 0x00800040,
369 	0xd830, 0x000300c0, 0x00800040,
370 	0x5bb0, 0x000000f0, 0x00000070,
371 	0x5bc0, 0x00200000, 0x50100000,
372 	0x7030, 0x31000311, 0x00000011,
373 	0x2ae4, 0x00073ffe, 0x000022a2,
374 	0x240c, 0x000007ff, 0x00000000,
375 	0x8a14, 0xf000001f, 0x00000007,
376 	0x8b24, 0xffffffff, 0x00ffffff,
377 	0x8b10, 0x0000ff0f, 0x00000000,
378 	0x28a4c, 0x07ffffff, 0x4e000000,
379 	0x28350, 0x3f3f3fff, 0x2a00126a,
380 	0x30, 0x000000ff, 0x0040,
381 	0x34, 0x00000040, 0x00004040,
382 	0x9100, 0x07ffffff, 0x03000000,
383 	0x9060, 0x0000007f, 0x00000020,
384 	0x9508, 0x00010000, 0x00010000,
385 	0xac14, 0x000003ff, 0x000000f7,
386 	0xac10, 0xffffffff, 0x00000000,
387 	0xac0c, 0xffffffff, 0x32761054,
388 	0x88d4, 0x0000001f, 0x00000010,
389 	0x15c0, 0x000c0fc0, 0x000c0400
390 };
391 
392 static const u32 verde_golden_rlc_registers[] =
393 {
394 	0xc424, 0xffffffff, 0x033f1005,
395 	0xc47c, 0xffffffff, 0x10808020,
396 	0xc488, 0xffffffff, 0x00800008,
397 	0xc314, 0xffffffff, 0x00001000,
398 	0xc30c, 0xffffffff, 0x80010014
399 };
400 
401 static const u32 verde_golden_registers[] =
402 {
403 	0x9a10, 0x00010000, 0x00018208,
404 	0x9830, 0xffffffff, 0x00000000,
405 	0x9834, 0xf00fffff, 0x00000400,
406 	0x9838, 0x0002021c, 0x00020200,
407 	0xc78, 0x00000080, 0x00000000,
408 	0xd030, 0x000300c0, 0x00800040,
409 	0xd030, 0x000300c0, 0x00800040,
410 	0xd830, 0x000300c0, 0x00800040,
411 	0xd830, 0x000300c0, 0x00800040,
412 	0x5bb0, 0x000000f0, 0x00000070,
413 	0x5bc0, 0x00200000, 0x50100000,
414 	0x7030, 0x31000311, 0x00000011,
415 	0x2ae4, 0x00073ffe, 0x000022a2,
416 	0x2ae4, 0x00073ffe, 0x000022a2,
417 	0x2ae4, 0x00073ffe, 0x000022a2,
418 	0x240c, 0x000007ff, 0x00000000,
419 	0x240c, 0x000007ff, 0x00000000,
420 	0x240c, 0x000007ff, 0x00000000,
421 	0x8a14, 0xf000001f, 0x00000007,
422 	0x8a14, 0xf000001f, 0x00000007,
423 	0x8a14, 0xf000001f, 0x00000007,
424 	0x8b24, 0xffffffff, 0x00ffffff,
425 	0x8b10, 0x0000ff0f, 0x00000000,
426 	0x28a4c, 0x07ffffff, 0x4e000000,
427 	0x28350, 0x3f3f3fff, 0x0000124a,
428 	0x28350, 0x3f3f3fff, 0x0000124a,
429 	0x28350, 0x3f3f3fff, 0x0000124a,
430 	0x30, 0x000000ff, 0x0040,
431 	0x34, 0x00000040, 0x00004040,
432 	0x9100, 0x07ffffff, 0x03000000,
433 	0x9100, 0x07ffffff, 0x03000000,
434 	0x8e88, 0x01ff1f3f, 0x00000000,
435 	0x8e88, 0x01ff1f3f, 0x00000000,
436 	0x8e88, 0x01ff1f3f, 0x00000000,
437 	0x8e84, 0x01ff1f3f, 0x00000000,
438 	0x8e84, 0x01ff1f3f, 0x00000000,
439 	0x8e84, 0x01ff1f3f, 0x00000000,
440 	0x9060, 0x0000007f, 0x00000020,
441 	0x9508, 0x00010000, 0x00010000,
442 	0xac14, 0x000003ff, 0x00000003,
443 	0xac14, 0x000003ff, 0x00000003,
444 	0xac14, 0x000003ff, 0x00000003,
445 	0xac10, 0xffffffff, 0x00000000,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac10, 0xffffffff, 0x00000000,
448 	0xac0c, 0xffffffff, 0x00001032,
449 	0xac0c, 0xffffffff, 0x00001032,
450 	0xac0c, 0xffffffff, 0x00001032,
451 	0x88d4, 0x0000001f, 0x00000010,
452 	0x88d4, 0x0000001f, 0x00000010,
453 	0x88d4, 0x0000001f, 0x00000010,
454 	0x15c0, 0x000c0fc0, 0x000c0400
455 };
456 
457 static const u32 oland_golden_rlc_registers[] =
458 {
459 	0xc424, 0xffffffff, 0x00601005,
460 	0xc47c, 0xffffffff, 0x10104040,
461 	0xc488, 0xffffffff, 0x0100000a,
462 	0xc314, 0xffffffff, 0x00000800,
463 	0xc30c, 0xffffffff, 0x800000f4
464 };
465 
466 static const u32 oland_golden_registers[] =
467 {
468 	0x9a10, 0x00010000, 0x00018208,
469 	0x9830, 0xffffffff, 0x00000000,
470 	0x9834, 0xf00fffff, 0x00000400,
471 	0x9838, 0x0002021c, 0x00020200,
472 	0xc78, 0x00000080, 0x00000000,
473 	0xd030, 0x000300c0, 0x00800040,
474 	0xd830, 0x000300c0, 0x00800040,
475 	0x5bb0, 0x000000f0, 0x00000070,
476 	0x5bc0, 0x00200000, 0x50100000,
477 	0x7030, 0x31000311, 0x00000011,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x8a14, 0xf000001f, 0x00000007,
481 	0x8b24, 0xffffffff, 0x00ffffff,
482 	0x8b10, 0x0000ff0f, 0x00000000,
483 	0x28a4c, 0x07ffffff, 0x4e000000,
484 	0x28350, 0x3f3f3fff, 0x00000082,
485 	0x30, 0x000000ff, 0x0040,
486 	0x34, 0x00000040, 0x00004040,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x9060, 0x0000007f, 0x00000020,
489 	0x9508, 0x00010000, 0x00010000,
490 	0xac14, 0x000003ff, 0x000000f3,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac0c, 0xffffffff, 0x00003210,
493 	0x88d4, 0x0000001f, 0x00000010,
494 	0x15c0, 0x000c0fc0, 0x000c0400
495 };
496 
497 static const u32 hainan_golden_registers[] =
498 {
499 	0x9a10, 0x00010000, 0x00018208,
500 	0x9830, 0xffffffff, 0x00000000,
501 	0x9834, 0xf00fffff, 0x00000400,
502 	0x9838, 0x0002021c, 0x00020200,
503 	0xd0c0, 0xff000fff, 0x00000100,
504 	0xd030, 0x000300c0, 0x00800040,
505 	0xd8c0, 0xff000fff, 0x00000100,
506 	0xd830, 0x000300c0, 0x00800040,
507 	0x2ae4, 0x00073ffe, 0x000022a2,
508 	0x240c, 0x000007ff, 0x00000000,
509 	0x8a14, 0xf000001f, 0x00000007,
510 	0x8b24, 0xffffffff, 0x00ffffff,
511 	0x8b10, 0x0000ff0f, 0x00000000,
512 	0x28a4c, 0x07ffffff, 0x4e000000,
513 	0x28350, 0x3f3f3fff, 0x00000000,
514 	0x30, 0x000000ff, 0x0040,
515 	0x34, 0x00000040, 0x00004040,
516 	0x9100, 0x03e00000, 0x03600000,
517 	0x9060, 0x0000007f, 0x00000020,
518 	0x9508, 0x00010000, 0x00010000,
519 	0xac14, 0x000003ff, 0x000000f1,
520 	0xac10, 0xffffffff, 0x00000000,
521 	0xac0c, 0xffffffff, 0x00003210,
522 	0x88d4, 0x0000001f, 0x00000010,
523 	0x15c0, 0x000c0fc0, 0x000c0400
524 };
525 
526 static const u32 hainan_golden_registers2[] =
527 {
528 	0x98f8, 0xffffffff, 0x02010001
529 };
530 
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533 	0xc400, 0xffffffff, 0xfffffffc,
534 	0x802c, 0xffffffff, 0xe0000000,
535 	0x9a60, 0xffffffff, 0x00000100,
536 	0x92a4, 0xffffffff, 0x00000100,
537 	0xc164, 0xffffffff, 0x00000100,
538 	0x9774, 0xffffffff, 0x00000100,
539 	0x8984, 0xffffffff, 0x06000100,
540 	0x8a18, 0xffffffff, 0x00000100,
541 	0x92a0, 0xffffffff, 0x00000100,
542 	0xc380, 0xffffffff, 0x00000100,
543 	0x8b28, 0xffffffff, 0x00000100,
544 	0x9144, 0xffffffff, 0x00000100,
545 	0x8d88, 0xffffffff, 0x00000100,
546 	0x8d8c, 0xffffffff, 0x00000100,
547 	0x9030, 0xffffffff, 0x00000100,
548 	0x9034, 0xffffffff, 0x00000100,
549 	0x9038, 0xffffffff, 0x00000100,
550 	0x903c, 0xffffffff, 0x00000100,
551 	0xad80, 0xffffffff, 0x00000100,
552 	0xac54, 0xffffffff, 0x00000100,
553 	0x897c, 0xffffffff, 0x06000100,
554 	0x9868, 0xffffffff, 0x00000100,
555 	0x9510, 0xffffffff, 0x00000100,
556 	0xaf04, 0xffffffff, 0x00000100,
557 	0xae04, 0xffffffff, 0x00000100,
558 	0x949c, 0xffffffff, 0x00000100,
559 	0x802c, 0xffffffff, 0xe0000000,
560 	0x9160, 0xffffffff, 0x00010000,
561 	0x9164, 0xffffffff, 0x00030002,
562 	0x9168, 0xffffffff, 0x00040007,
563 	0x916c, 0xffffffff, 0x00060005,
564 	0x9170, 0xffffffff, 0x00090008,
565 	0x9174, 0xffffffff, 0x00020001,
566 	0x9178, 0xffffffff, 0x00040003,
567 	0x917c, 0xffffffff, 0x00000007,
568 	0x9180, 0xffffffff, 0x00060005,
569 	0x9184, 0xffffffff, 0x00090008,
570 	0x9188, 0xffffffff, 0x00030002,
571 	0x918c, 0xffffffff, 0x00050004,
572 	0x9190, 0xffffffff, 0x00000008,
573 	0x9194, 0xffffffff, 0x00070006,
574 	0x9198, 0xffffffff, 0x000a0009,
575 	0x919c, 0xffffffff, 0x00040003,
576 	0x91a0, 0xffffffff, 0x00060005,
577 	0x91a4, 0xffffffff, 0x00000009,
578 	0x91a8, 0xffffffff, 0x00080007,
579 	0x91ac, 0xffffffff, 0x000b000a,
580 	0x91b0, 0xffffffff, 0x00050004,
581 	0x91b4, 0xffffffff, 0x00070006,
582 	0x91b8, 0xffffffff, 0x0008000b,
583 	0x91bc, 0xffffffff, 0x000a0009,
584 	0x91c0, 0xffffffff, 0x000d000c,
585 	0x91c4, 0xffffffff, 0x00060005,
586 	0x91c8, 0xffffffff, 0x00080007,
587 	0x91cc, 0xffffffff, 0x0000000b,
588 	0x91d0, 0xffffffff, 0x000a0009,
589 	0x91d4, 0xffffffff, 0x000d000c,
590 	0x91d8, 0xffffffff, 0x00070006,
591 	0x91dc, 0xffffffff, 0x00090008,
592 	0x91e0, 0xffffffff, 0x0000000c,
593 	0x91e4, 0xffffffff, 0x000b000a,
594 	0x91e8, 0xffffffff, 0x000e000d,
595 	0x91ec, 0xffffffff, 0x00080007,
596 	0x91f0, 0xffffffff, 0x000a0009,
597 	0x91f4, 0xffffffff, 0x0000000d,
598 	0x91f8, 0xffffffff, 0x000c000b,
599 	0x91fc, 0xffffffff, 0x000f000e,
600 	0x9200, 0xffffffff, 0x00090008,
601 	0x9204, 0xffffffff, 0x000b000a,
602 	0x9208, 0xffffffff, 0x000c000f,
603 	0x920c, 0xffffffff, 0x000e000d,
604 	0x9210, 0xffffffff, 0x00110010,
605 	0x9214, 0xffffffff, 0x000a0009,
606 	0x9218, 0xffffffff, 0x000c000b,
607 	0x921c, 0xffffffff, 0x0000000f,
608 	0x9220, 0xffffffff, 0x000e000d,
609 	0x9224, 0xffffffff, 0x00110010,
610 	0x9228, 0xffffffff, 0x000b000a,
611 	0x922c, 0xffffffff, 0x000d000c,
612 	0x9230, 0xffffffff, 0x00000010,
613 	0x9234, 0xffffffff, 0x000f000e,
614 	0x9238, 0xffffffff, 0x00120011,
615 	0x923c, 0xffffffff, 0x000c000b,
616 	0x9240, 0xffffffff, 0x000e000d,
617 	0x9244, 0xffffffff, 0x00000011,
618 	0x9248, 0xffffffff, 0x0010000f,
619 	0x924c, 0xffffffff, 0x00130012,
620 	0x9250, 0xffffffff, 0x000d000c,
621 	0x9254, 0xffffffff, 0x000f000e,
622 	0x9258, 0xffffffff, 0x00100013,
623 	0x925c, 0xffffffff, 0x00120011,
624 	0x9260, 0xffffffff, 0x00150014,
625 	0x9264, 0xffffffff, 0x000e000d,
626 	0x9268, 0xffffffff, 0x0010000f,
627 	0x926c, 0xffffffff, 0x00000013,
628 	0x9270, 0xffffffff, 0x00120011,
629 	0x9274, 0xffffffff, 0x00150014,
630 	0x9278, 0xffffffff, 0x000f000e,
631 	0x927c, 0xffffffff, 0x00110010,
632 	0x9280, 0xffffffff, 0x00000014,
633 	0x9284, 0xffffffff, 0x00130012,
634 	0x9288, 0xffffffff, 0x00160015,
635 	0x928c, 0xffffffff, 0x0010000f,
636 	0x9290, 0xffffffff, 0x00120011,
637 	0x9294, 0xffffffff, 0x00000015,
638 	0x9298, 0xffffffff, 0x00140013,
639 	0x929c, 0xffffffff, 0x00170016,
640 	0x9150, 0xffffffff, 0x96940200,
641 	0x8708, 0xffffffff, 0x00900100,
642 	0xc478, 0xffffffff, 0x00000080,
643 	0xc404, 0xffffffff, 0x0020003f,
644 	0x30, 0xffffffff, 0x0000001c,
645 	0x34, 0x000f0000, 0x000f0000,
646 	0x160c, 0xffffffff, 0x00000100,
647 	0x1024, 0xffffffff, 0x00000100,
648 	0x102c, 0x00000101, 0x00000000,
649 	0x20a8, 0xffffffff, 0x00000104,
650 	0x264c, 0x000c0000, 0x000c0000,
651 	0x2648, 0x000c0000, 0x000c0000,
652 	0x55e4, 0xff000fff, 0x00000100,
653 	0x55e8, 0x00000001, 0x00000001,
654 	0x2f50, 0x00000001, 0x00000001,
655 	0x30cc, 0xc0000fff, 0x00000104,
656 	0xc1e4, 0x00000001, 0x00000001,
657 	0xd0c0, 0xfffffff0, 0x00000100,
658 	0xd8c0, 0xfffffff0, 0x00000100
659 };
660 
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663 	0xc400, 0xffffffff, 0xfffffffc,
664 	0x802c, 0xffffffff, 0xe0000000,
665 	0x9a60, 0xffffffff, 0x00000100,
666 	0x92a4, 0xffffffff, 0x00000100,
667 	0xc164, 0xffffffff, 0x00000100,
668 	0x9774, 0xffffffff, 0x00000100,
669 	0x8984, 0xffffffff, 0x06000100,
670 	0x8a18, 0xffffffff, 0x00000100,
671 	0x92a0, 0xffffffff, 0x00000100,
672 	0xc380, 0xffffffff, 0x00000100,
673 	0x8b28, 0xffffffff, 0x00000100,
674 	0x9144, 0xffffffff, 0x00000100,
675 	0x8d88, 0xffffffff, 0x00000100,
676 	0x8d8c, 0xffffffff, 0x00000100,
677 	0x9030, 0xffffffff, 0x00000100,
678 	0x9034, 0xffffffff, 0x00000100,
679 	0x9038, 0xffffffff, 0x00000100,
680 	0x903c, 0xffffffff, 0x00000100,
681 	0xad80, 0xffffffff, 0x00000100,
682 	0xac54, 0xffffffff, 0x00000100,
683 	0x897c, 0xffffffff, 0x06000100,
684 	0x9868, 0xffffffff, 0x00000100,
685 	0x9510, 0xffffffff, 0x00000100,
686 	0xaf04, 0xffffffff, 0x00000100,
687 	0xae04, 0xffffffff, 0x00000100,
688 	0x949c, 0xffffffff, 0x00000100,
689 	0x802c, 0xffffffff, 0xe0000000,
690 	0x9160, 0xffffffff, 0x00010000,
691 	0x9164, 0xffffffff, 0x00030002,
692 	0x9168, 0xffffffff, 0x00040007,
693 	0x916c, 0xffffffff, 0x00060005,
694 	0x9170, 0xffffffff, 0x00090008,
695 	0x9174, 0xffffffff, 0x00020001,
696 	0x9178, 0xffffffff, 0x00040003,
697 	0x917c, 0xffffffff, 0x00000007,
698 	0x9180, 0xffffffff, 0x00060005,
699 	0x9184, 0xffffffff, 0x00090008,
700 	0x9188, 0xffffffff, 0x00030002,
701 	0x918c, 0xffffffff, 0x00050004,
702 	0x9190, 0xffffffff, 0x00000008,
703 	0x9194, 0xffffffff, 0x00070006,
704 	0x9198, 0xffffffff, 0x000a0009,
705 	0x919c, 0xffffffff, 0x00040003,
706 	0x91a0, 0xffffffff, 0x00060005,
707 	0x91a4, 0xffffffff, 0x00000009,
708 	0x91a8, 0xffffffff, 0x00080007,
709 	0x91ac, 0xffffffff, 0x000b000a,
710 	0x91b0, 0xffffffff, 0x00050004,
711 	0x91b4, 0xffffffff, 0x00070006,
712 	0x91b8, 0xffffffff, 0x0008000b,
713 	0x91bc, 0xffffffff, 0x000a0009,
714 	0x91c0, 0xffffffff, 0x000d000c,
715 	0x9200, 0xffffffff, 0x00090008,
716 	0x9204, 0xffffffff, 0x000b000a,
717 	0x9208, 0xffffffff, 0x000c000f,
718 	0x920c, 0xffffffff, 0x000e000d,
719 	0x9210, 0xffffffff, 0x00110010,
720 	0x9214, 0xffffffff, 0x000a0009,
721 	0x9218, 0xffffffff, 0x000c000b,
722 	0x921c, 0xffffffff, 0x0000000f,
723 	0x9220, 0xffffffff, 0x000e000d,
724 	0x9224, 0xffffffff, 0x00110010,
725 	0x9228, 0xffffffff, 0x000b000a,
726 	0x922c, 0xffffffff, 0x000d000c,
727 	0x9230, 0xffffffff, 0x00000010,
728 	0x9234, 0xffffffff, 0x000f000e,
729 	0x9238, 0xffffffff, 0x00120011,
730 	0x923c, 0xffffffff, 0x000c000b,
731 	0x9240, 0xffffffff, 0x000e000d,
732 	0x9244, 0xffffffff, 0x00000011,
733 	0x9248, 0xffffffff, 0x0010000f,
734 	0x924c, 0xffffffff, 0x00130012,
735 	0x9250, 0xffffffff, 0x000d000c,
736 	0x9254, 0xffffffff, 0x000f000e,
737 	0x9258, 0xffffffff, 0x00100013,
738 	0x925c, 0xffffffff, 0x00120011,
739 	0x9260, 0xffffffff, 0x00150014,
740 	0x9150, 0xffffffff, 0x96940200,
741 	0x8708, 0xffffffff, 0x00900100,
742 	0xc478, 0xffffffff, 0x00000080,
743 	0xc404, 0xffffffff, 0x0020003f,
744 	0x30, 0xffffffff, 0x0000001c,
745 	0x34, 0x000f0000, 0x000f0000,
746 	0x160c, 0xffffffff, 0x00000100,
747 	0x1024, 0xffffffff, 0x00000100,
748 	0x102c, 0x00000101, 0x00000000,
749 	0x20a8, 0xffffffff, 0x00000104,
750 	0x55e4, 0xff000fff, 0x00000100,
751 	0x55e8, 0x00000001, 0x00000001,
752 	0x2f50, 0x00000001, 0x00000001,
753 	0x30cc, 0xc0000fff, 0x00000104,
754 	0xc1e4, 0x00000001, 0x00000001,
755 	0xd0c0, 0xfffffff0, 0x00000100,
756 	0xd8c0, 0xfffffff0, 0x00000100
757 };
758 
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761 	0xc400, 0xffffffff, 0xfffffffc,
762 	0x802c, 0xffffffff, 0xe0000000,
763 	0x9a60, 0xffffffff, 0x00000100,
764 	0x92a4, 0xffffffff, 0x00000100,
765 	0xc164, 0xffffffff, 0x00000100,
766 	0x9774, 0xffffffff, 0x00000100,
767 	0x8984, 0xffffffff, 0x06000100,
768 	0x8a18, 0xffffffff, 0x00000100,
769 	0x92a0, 0xffffffff, 0x00000100,
770 	0xc380, 0xffffffff, 0x00000100,
771 	0x8b28, 0xffffffff, 0x00000100,
772 	0x9144, 0xffffffff, 0x00000100,
773 	0x8d88, 0xffffffff, 0x00000100,
774 	0x8d8c, 0xffffffff, 0x00000100,
775 	0x9030, 0xffffffff, 0x00000100,
776 	0x9034, 0xffffffff, 0x00000100,
777 	0x9038, 0xffffffff, 0x00000100,
778 	0x903c, 0xffffffff, 0x00000100,
779 	0xad80, 0xffffffff, 0x00000100,
780 	0xac54, 0xffffffff, 0x00000100,
781 	0x897c, 0xffffffff, 0x06000100,
782 	0x9868, 0xffffffff, 0x00000100,
783 	0x9510, 0xffffffff, 0x00000100,
784 	0xaf04, 0xffffffff, 0x00000100,
785 	0xae04, 0xffffffff, 0x00000100,
786 	0x949c, 0xffffffff, 0x00000100,
787 	0x802c, 0xffffffff, 0xe0000000,
788 	0x9160, 0xffffffff, 0x00010000,
789 	0x9164, 0xffffffff, 0x00030002,
790 	0x9168, 0xffffffff, 0x00040007,
791 	0x916c, 0xffffffff, 0x00060005,
792 	0x9170, 0xffffffff, 0x00090008,
793 	0x9174, 0xffffffff, 0x00020001,
794 	0x9178, 0xffffffff, 0x00040003,
795 	0x917c, 0xffffffff, 0x00000007,
796 	0x9180, 0xffffffff, 0x00060005,
797 	0x9184, 0xffffffff, 0x00090008,
798 	0x9188, 0xffffffff, 0x00030002,
799 	0x918c, 0xffffffff, 0x00050004,
800 	0x9190, 0xffffffff, 0x00000008,
801 	0x9194, 0xffffffff, 0x00070006,
802 	0x9198, 0xffffffff, 0x000a0009,
803 	0x919c, 0xffffffff, 0x00040003,
804 	0x91a0, 0xffffffff, 0x00060005,
805 	0x91a4, 0xffffffff, 0x00000009,
806 	0x91a8, 0xffffffff, 0x00080007,
807 	0x91ac, 0xffffffff, 0x000b000a,
808 	0x91b0, 0xffffffff, 0x00050004,
809 	0x91b4, 0xffffffff, 0x00070006,
810 	0x91b8, 0xffffffff, 0x0008000b,
811 	0x91bc, 0xffffffff, 0x000a0009,
812 	0x91c0, 0xffffffff, 0x000d000c,
813 	0x9200, 0xffffffff, 0x00090008,
814 	0x9204, 0xffffffff, 0x000b000a,
815 	0x9208, 0xffffffff, 0x000c000f,
816 	0x920c, 0xffffffff, 0x000e000d,
817 	0x9210, 0xffffffff, 0x00110010,
818 	0x9214, 0xffffffff, 0x000a0009,
819 	0x9218, 0xffffffff, 0x000c000b,
820 	0x921c, 0xffffffff, 0x0000000f,
821 	0x9220, 0xffffffff, 0x000e000d,
822 	0x9224, 0xffffffff, 0x00110010,
823 	0x9228, 0xffffffff, 0x000b000a,
824 	0x922c, 0xffffffff, 0x000d000c,
825 	0x9230, 0xffffffff, 0x00000010,
826 	0x9234, 0xffffffff, 0x000f000e,
827 	0x9238, 0xffffffff, 0x00120011,
828 	0x923c, 0xffffffff, 0x000c000b,
829 	0x9240, 0xffffffff, 0x000e000d,
830 	0x9244, 0xffffffff, 0x00000011,
831 	0x9248, 0xffffffff, 0x0010000f,
832 	0x924c, 0xffffffff, 0x00130012,
833 	0x9250, 0xffffffff, 0x000d000c,
834 	0x9254, 0xffffffff, 0x000f000e,
835 	0x9258, 0xffffffff, 0x00100013,
836 	0x925c, 0xffffffff, 0x00120011,
837 	0x9260, 0xffffffff, 0x00150014,
838 	0x9150, 0xffffffff, 0x96940200,
839 	0x8708, 0xffffffff, 0x00900100,
840 	0xc478, 0xffffffff, 0x00000080,
841 	0xc404, 0xffffffff, 0x0020003f,
842 	0x30, 0xffffffff, 0x0000001c,
843 	0x34, 0x000f0000, 0x000f0000,
844 	0x160c, 0xffffffff, 0x00000100,
845 	0x1024, 0xffffffff, 0x00000100,
846 	0x102c, 0x00000101, 0x00000000,
847 	0x20a8, 0xffffffff, 0x00000104,
848 	0x264c, 0x000c0000, 0x000c0000,
849 	0x2648, 0x000c0000, 0x000c0000,
850 	0x55e4, 0xff000fff, 0x00000100,
851 	0x55e8, 0x00000001, 0x00000001,
852 	0x2f50, 0x00000001, 0x00000001,
853 	0x30cc, 0xc0000fff, 0x00000104,
854 	0xc1e4, 0x00000001, 0x00000001,
855 	0xd0c0, 0xfffffff0, 0x00000100,
856 	0xd8c0, 0xfffffff0, 0x00000100
857 };
858 
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861 	0xc400, 0xffffffff, 0xfffffffc,
862 	0x802c, 0xffffffff, 0xe0000000,
863 	0x9a60, 0xffffffff, 0x00000100,
864 	0x92a4, 0xffffffff, 0x00000100,
865 	0xc164, 0xffffffff, 0x00000100,
866 	0x9774, 0xffffffff, 0x00000100,
867 	0x8984, 0xffffffff, 0x06000100,
868 	0x8a18, 0xffffffff, 0x00000100,
869 	0x92a0, 0xffffffff, 0x00000100,
870 	0xc380, 0xffffffff, 0x00000100,
871 	0x8b28, 0xffffffff, 0x00000100,
872 	0x9144, 0xffffffff, 0x00000100,
873 	0x8d88, 0xffffffff, 0x00000100,
874 	0x8d8c, 0xffffffff, 0x00000100,
875 	0x9030, 0xffffffff, 0x00000100,
876 	0x9034, 0xffffffff, 0x00000100,
877 	0x9038, 0xffffffff, 0x00000100,
878 	0x903c, 0xffffffff, 0x00000100,
879 	0xad80, 0xffffffff, 0x00000100,
880 	0xac54, 0xffffffff, 0x00000100,
881 	0x897c, 0xffffffff, 0x06000100,
882 	0x9868, 0xffffffff, 0x00000100,
883 	0x9510, 0xffffffff, 0x00000100,
884 	0xaf04, 0xffffffff, 0x00000100,
885 	0xae04, 0xffffffff, 0x00000100,
886 	0x949c, 0xffffffff, 0x00000100,
887 	0x802c, 0xffffffff, 0xe0000000,
888 	0x9160, 0xffffffff, 0x00010000,
889 	0x9164, 0xffffffff, 0x00030002,
890 	0x9168, 0xffffffff, 0x00040007,
891 	0x916c, 0xffffffff, 0x00060005,
892 	0x9170, 0xffffffff, 0x00090008,
893 	0x9174, 0xffffffff, 0x00020001,
894 	0x9178, 0xffffffff, 0x00040003,
895 	0x917c, 0xffffffff, 0x00000007,
896 	0x9180, 0xffffffff, 0x00060005,
897 	0x9184, 0xffffffff, 0x00090008,
898 	0x9188, 0xffffffff, 0x00030002,
899 	0x918c, 0xffffffff, 0x00050004,
900 	0x9190, 0xffffffff, 0x00000008,
901 	0x9194, 0xffffffff, 0x00070006,
902 	0x9198, 0xffffffff, 0x000a0009,
903 	0x919c, 0xffffffff, 0x00040003,
904 	0x91a0, 0xffffffff, 0x00060005,
905 	0x91a4, 0xffffffff, 0x00000009,
906 	0x91a8, 0xffffffff, 0x00080007,
907 	0x91ac, 0xffffffff, 0x000b000a,
908 	0x91b0, 0xffffffff, 0x00050004,
909 	0x91b4, 0xffffffff, 0x00070006,
910 	0x91b8, 0xffffffff, 0x0008000b,
911 	0x91bc, 0xffffffff, 0x000a0009,
912 	0x91c0, 0xffffffff, 0x000d000c,
913 	0x91c4, 0xffffffff, 0x00060005,
914 	0x91c8, 0xffffffff, 0x00080007,
915 	0x91cc, 0xffffffff, 0x0000000b,
916 	0x91d0, 0xffffffff, 0x000a0009,
917 	0x91d4, 0xffffffff, 0x000d000c,
918 	0x9150, 0xffffffff, 0x96940200,
919 	0x8708, 0xffffffff, 0x00900100,
920 	0xc478, 0xffffffff, 0x00000080,
921 	0xc404, 0xffffffff, 0x0020003f,
922 	0x30, 0xffffffff, 0x0000001c,
923 	0x34, 0x000f0000, 0x000f0000,
924 	0x160c, 0xffffffff, 0x00000100,
925 	0x1024, 0xffffffff, 0x00000100,
926 	0x102c, 0x00000101, 0x00000000,
927 	0x20a8, 0xffffffff, 0x00000104,
928 	0x264c, 0x000c0000, 0x000c0000,
929 	0x2648, 0x000c0000, 0x000c0000,
930 	0x55e4, 0xff000fff, 0x00000100,
931 	0x55e8, 0x00000001, 0x00000001,
932 	0x2f50, 0x00000001, 0x00000001,
933 	0x30cc, 0xc0000fff, 0x00000104,
934 	0xc1e4, 0x00000001, 0x00000001,
935 	0xd0c0, 0xfffffff0, 0x00000100,
936 	0xd8c0, 0xfffffff0, 0x00000100
937 };
938 
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941 	0xc400, 0xffffffff, 0xfffffffc,
942 	0x802c, 0xffffffff, 0xe0000000,
943 	0x9a60, 0xffffffff, 0x00000100,
944 	0x92a4, 0xffffffff, 0x00000100,
945 	0xc164, 0xffffffff, 0x00000100,
946 	0x9774, 0xffffffff, 0x00000100,
947 	0x8984, 0xffffffff, 0x06000100,
948 	0x8a18, 0xffffffff, 0x00000100,
949 	0x92a0, 0xffffffff, 0x00000100,
950 	0xc380, 0xffffffff, 0x00000100,
951 	0x8b28, 0xffffffff, 0x00000100,
952 	0x9144, 0xffffffff, 0x00000100,
953 	0x8d88, 0xffffffff, 0x00000100,
954 	0x8d8c, 0xffffffff, 0x00000100,
955 	0x9030, 0xffffffff, 0x00000100,
956 	0x9034, 0xffffffff, 0x00000100,
957 	0x9038, 0xffffffff, 0x00000100,
958 	0x903c, 0xffffffff, 0x00000100,
959 	0xad80, 0xffffffff, 0x00000100,
960 	0xac54, 0xffffffff, 0x00000100,
961 	0x897c, 0xffffffff, 0x06000100,
962 	0x9868, 0xffffffff, 0x00000100,
963 	0x9510, 0xffffffff, 0x00000100,
964 	0xaf04, 0xffffffff, 0x00000100,
965 	0xae04, 0xffffffff, 0x00000100,
966 	0x949c, 0xffffffff, 0x00000100,
967 	0x802c, 0xffffffff, 0xe0000000,
968 	0x9160, 0xffffffff, 0x00010000,
969 	0x9164, 0xffffffff, 0x00030002,
970 	0x9168, 0xffffffff, 0x00040007,
971 	0x916c, 0xffffffff, 0x00060005,
972 	0x9170, 0xffffffff, 0x00090008,
973 	0x9174, 0xffffffff, 0x00020001,
974 	0x9178, 0xffffffff, 0x00040003,
975 	0x917c, 0xffffffff, 0x00000007,
976 	0x9180, 0xffffffff, 0x00060005,
977 	0x9184, 0xffffffff, 0x00090008,
978 	0x9188, 0xffffffff, 0x00030002,
979 	0x918c, 0xffffffff, 0x00050004,
980 	0x9190, 0xffffffff, 0x00000008,
981 	0x9194, 0xffffffff, 0x00070006,
982 	0x9198, 0xffffffff, 0x000a0009,
983 	0x919c, 0xffffffff, 0x00040003,
984 	0x91a0, 0xffffffff, 0x00060005,
985 	0x91a4, 0xffffffff, 0x00000009,
986 	0x91a8, 0xffffffff, 0x00080007,
987 	0x91ac, 0xffffffff, 0x000b000a,
988 	0x91b0, 0xffffffff, 0x00050004,
989 	0x91b4, 0xffffffff, 0x00070006,
990 	0x91b8, 0xffffffff, 0x0008000b,
991 	0x91bc, 0xffffffff, 0x000a0009,
992 	0x91c0, 0xffffffff, 0x000d000c,
993 	0x91c4, 0xffffffff, 0x00060005,
994 	0x91c8, 0xffffffff, 0x00080007,
995 	0x91cc, 0xffffffff, 0x0000000b,
996 	0x91d0, 0xffffffff, 0x000a0009,
997 	0x91d4, 0xffffffff, 0x000d000c,
998 	0x9150, 0xffffffff, 0x96940200,
999 	0x8708, 0xffffffff, 0x00900100,
1000 	0xc478, 0xffffffff, 0x00000080,
1001 	0xc404, 0xffffffff, 0x0020003f,
1002 	0x30, 0xffffffff, 0x0000001c,
1003 	0x34, 0x000f0000, 0x000f0000,
1004 	0x160c, 0xffffffff, 0x00000100,
1005 	0x1024, 0xffffffff, 0x00000100,
1006 	0x20a8, 0xffffffff, 0x00000104,
1007 	0x264c, 0x000c0000, 0x000c0000,
1008 	0x2648, 0x000c0000, 0x000c0000,
1009 	0x2f50, 0x00000001, 0x00000001,
1010 	0x30cc, 0xc0000fff, 0x00000104,
1011 	0xc1e4, 0x00000001, 0x00000001,
1012 	0xd0c0, 0xfffffff0, 0x00000100,
1013 	0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015 
1016 static u32 verde_pg_init[] =
1017 {
1018 	0x353c, 0xffffffff, 0x40000,
1019 	0x3538, 0xffffffff, 0x200010ff,
1020 	0x353c, 0xffffffff, 0x0,
1021 	0x353c, 0xffffffff, 0x0,
1022 	0x353c, 0xffffffff, 0x0,
1023 	0x353c, 0xffffffff, 0x0,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x7007,
1026 	0x3538, 0xffffffff, 0x300010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x400000,
1033 	0x3538, 0xffffffff, 0x100010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x120200,
1040 	0x3538, 0xffffffff, 0x500010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x1e1e16,
1047 	0x3538, 0xffffffff, 0x600010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x171f1e,
1054 	0x3538, 0xffffffff, 0x700010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x3538, 0xffffffff, 0x9ff,
1062 	0x3500, 0xffffffff, 0x0,
1063 	0x3504, 0xffffffff, 0x10000800,
1064 	0x3504, 0xffffffff, 0xf,
1065 	0x3504, 0xffffffff, 0xf,
1066 	0x3500, 0xffffffff, 0x4,
1067 	0x3504, 0xffffffff, 0x1000051e,
1068 	0x3504, 0xffffffff, 0xffff,
1069 	0x3504, 0xffffffff, 0xffff,
1070 	0x3500, 0xffffffff, 0x8,
1071 	0x3504, 0xffffffff, 0x80500,
1072 	0x3500, 0xffffffff, 0x12,
1073 	0x3504, 0xffffffff, 0x9050c,
1074 	0x3500, 0xffffffff, 0x1d,
1075 	0x3504, 0xffffffff, 0xb052c,
1076 	0x3500, 0xffffffff, 0x2a,
1077 	0x3504, 0xffffffff, 0x1053e,
1078 	0x3500, 0xffffffff, 0x2d,
1079 	0x3504, 0xffffffff, 0x10546,
1080 	0x3500, 0xffffffff, 0x30,
1081 	0x3504, 0xffffffff, 0xa054e,
1082 	0x3500, 0xffffffff, 0x3c,
1083 	0x3504, 0xffffffff, 0x1055f,
1084 	0x3500, 0xffffffff, 0x3f,
1085 	0x3504, 0xffffffff, 0x10567,
1086 	0x3500, 0xffffffff, 0x42,
1087 	0x3504, 0xffffffff, 0x1056f,
1088 	0x3500, 0xffffffff, 0x45,
1089 	0x3504, 0xffffffff, 0x10572,
1090 	0x3500, 0xffffffff, 0x48,
1091 	0x3504, 0xffffffff, 0x20575,
1092 	0x3500, 0xffffffff, 0x4c,
1093 	0x3504, 0xffffffff, 0x190801,
1094 	0x3500, 0xffffffff, 0x67,
1095 	0x3504, 0xffffffff, 0x1082a,
1096 	0x3500, 0xffffffff, 0x6a,
1097 	0x3504, 0xffffffff, 0x1b082d,
1098 	0x3500, 0xffffffff, 0x87,
1099 	0x3504, 0xffffffff, 0x310851,
1100 	0x3500, 0xffffffff, 0xba,
1101 	0x3504, 0xffffffff, 0x891,
1102 	0x3500, 0xffffffff, 0xbc,
1103 	0x3504, 0xffffffff, 0x893,
1104 	0x3500, 0xffffffff, 0xbe,
1105 	0x3504, 0xffffffff, 0x20895,
1106 	0x3500, 0xffffffff, 0xc2,
1107 	0x3504, 0xffffffff, 0x20899,
1108 	0x3500, 0xffffffff, 0xc6,
1109 	0x3504, 0xffffffff, 0x2089d,
1110 	0x3500, 0xffffffff, 0xca,
1111 	0x3504, 0xffffffff, 0x8a1,
1112 	0x3500, 0xffffffff, 0xcc,
1113 	0x3504, 0xffffffff, 0x8a3,
1114 	0x3500, 0xffffffff, 0xce,
1115 	0x3504, 0xffffffff, 0x308a5,
1116 	0x3500, 0xffffffff, 0xd3,
1117 	0x3504, 0xffffffff, 0x6d08cd,
1118 	0x3500, 0xffffffff, 0x142,
1119 	0x3504, 0xffffffff, 0x2000095a,
1120 	0x3504, 0xffffffff, 0x1,
1121 	0x3500, 0xffffffff, 0x144,
1122 	0x3504, 0xffffffff, 0x301f095b,
1123 	0x3500, 0xffffffff, 0x165,
1124 	0x3504, 0xffffffff, 0xc094d,
1125 	0x3500, 0xffffffff, 0x173,
1126 	0x3504, 0xffffffff, 0xf096d,
1127 	0x3500, 0xffffffff, 0x184,
1128 	0x3504, 0xffffffff, 0x15097f,
1129 	0x3500, 0xffffffff, 0x19b,
1130 	0x3504, 0xffffffff, 0xc0998,
1131 	0x3500, 0xffffffff, 0x1a9,
1132 	0x3504, 0xffffffff, 0x409a7,
1133 	0x3500, 0xffffffff, 0x1af,
1134 	0x3504, 0xffffffff, 0xcdc,
1135 	0x3500, 0xffffffff, 0x1b1,
1136 	0x3504, 0xffffffff, 0x800,
1137 	0x3508, 0xffffffff, 0x6c9b2000,
1138 	0x3510, 0xfc00, 0x2000,
1139 	0x3544, 0xffffffff, 0xfc0,
1140 	0x28d4, 0x00000100, 0x100
1141 };
1142 
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145 	switch (rdev->family) {
1146 	case CHIP_TAHITI:
1147 		radeon_program_register_sequence(rdev,
1148 						 tahiti_golden_registers,
1149 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150 		radeon_program_register_sequence(rdev,
1151 						 tahiti_golden_rlc_registers,
1152 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153 		radeon_program_register_sequence(rdev,
1154 						 tahiti_mgcg_cgcg_init,
1155 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156 		radeon_program_register_sequence(rdev,
1157 						 tahiti_golden_registers2,
1158 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159 		break;
1160 	case CHIP_PITCAIRN:
1161 		radeon_program_register_sequence(rdev,
1162 						 pitcairn_golden_registers,
1163 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164 		radeon_program_register_sequence(rdev,
1165 						 pitcairn_golden_rlc_registers,
1166 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167 		radeon_program_register_sequence(rdev,
1168 						 pitcairn_mgcg_cgcg_init,
1169 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170 		break;
1171 	case CHIP_VERDE:
1172 		radeon_program_register_sequence(rdev,
1173 						 verde_golden_registers,
1174 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1175 		radeon_program_register_sequence(rdev,
1176 						 verde_golden_rlc_registers,
1177 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178 		radeon_program_register_sequence(rdev,
1179 						 verde_mgcg_cgcg_init,
1180 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181 		radeon_program_register_sequence(rdev,
1182 						 verde_pg_init,
1183 						 (const u32)ARRAY_SIZE(verde_pg_init));
1184 		break;
1185 	case CHIP_OLAND:
1186 		radeon_program_register_sequence(rdev,
1187 						 oland_golden_registers,
1188 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1189 		radeon_program_register_sequence(rdev,
1190 						 oland_golden_rlc_registers,
1191 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192 		radeon_program_register_sequence(rdev,
1193 						 oland_mgcg_cgcg_init,
1194 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195 		break;
1196 	case CHIP_HAINAN:
1197 		radeon_program_register_sequence(rdev,
1198 						 hainan_golden_registers,
1199 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1200 		radeon_program_register_sequence(rdev,
1201 						 hainan_golden_registers2,
1202 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203 		radeon_program_register_sequence(rdev,
1204 						 hainan_mgcg_cgcg_init,
1205 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206 		break;
1207 	default:
1208 		break;
1209 	}
1210 }
1211 
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214 
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226 	u32 tmp;
1227 
1228 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1229 	if (tmp & MUX_TCLK_TO_XCLK)
1230 		return TCLK;
1231 
1232 	tmp = RREG32(CG_CLKPIN_CNTL);
1233 	if (tmp & XTALIN_DIVIDE)
1234 		return reference_clock / 4;
1235 
1236 	return reference_clock;
1237 }
1238 
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242 	u32 temp;
1243 	int actual_temp = 0;
1244 
1245 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246 		CTF_TEMP_SHIFT;
1247 
1248 	if (temp & 0x200)
1249 		actual_temp = 255;
1250 	else
1251 		actual_temp = temp & 0x1ff;
1252 
1253 	actual_temp = (actual_temp * 1000);
1254 
1255 	return actual_temp;
1256 }
1257 
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259 
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261 	{0x0000006f, 0x03044000},
1262 	{0x00000070, 0x0480c018},
1263 	{0x00000071, 0x00000040},
1264 	{0x00000072, 0x01000000},
1265 	{0x00000074, 0x000000ff},
1266 	{0x00000075, 0x00143400},
1267 	{0x00000076, 0x08ec0800},
1268 	{0x00000077, 0x040000cc},
1269 	{0x00000079, 0x00000000},
1270 	{0x0000007a, 0x21000409},
1271 	{0x0000007c, 0x00000000},
1272 	{0x0000007d, 0xe8000000},
1273 	{0x0000007e, 0x044408a8},
1274 	{0x0000007f, 0x00000003},
1275 	{0x00000080, 0x00000000},
1276 	{0x00000081, 0x01000000},
1277 	{0x00000082, 0x02000000},
1278 	{0x00000083, 0x00000000},
1279 	{0x00000084, 0xe3f3e4f4},
1280 	{0x00000085, 0x00052024},
1281 	{0x00000087, 0x00000000},
1282 	{0x00000088, 0x66036603},
1283 	{0x00000089, 0x01000000},
1284 	{0x0000008b, 0x1c0a0000},
1285 	{0x0000008c, 0xff010000},
1286 	{0x0000008e, 0xffffefff},
1287 	{0x0000008f, 0xfff3efff},
1288 	{0x00000090, 0xfff3efbf},
1289 	{0x00000094, 0x00101101},
1290 	{0x00000095, 0x00000fff},
1291 	{0x00000096, 0x00116fff},
1292 	{0x00000097, 0x60010000},
1293 	{0x00000098, 0x10010000},
1294 	{0x00000099, 0x00006000},
1295 	{0x0000009a, 0x00001000},
1296 	{0x0000009f, 0x00a77400}
1297 };
1298 
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300 	{0x0000006f, 0x03044000},
1301 	{0x00000070, 0x0480c018},
1302 	{0x00000071, 0x00000040},
1303 	{0x00000072, 0x01000000},
1304 	{0x00000074, 0x000000ff},
1305 	{0x00000075, 0x00143400},
1306 	{0x00000076, 0x08ec0800},
1307 	{0x00000077, 0x040000cc},
1308 	{0x00000079, 0x00000000},
1309 	{0x0000007a, 0x21000409},
1310 	{0x0000007c, 0x00000000},
1311 	{0x0000007d, 0xe8000000},
1312 	{0x0000007e, 0x044408a8},
1313 	{0x0000007f, 0x00000003},
1314 	{0x00000080, 0x00000000},
1315 	{0x00000081, 0x01000000},
1316 	{0x00000082, 0x02000000},
1317 	{0x00000083, 0x00000000},
1318 	{0x00000084, 0xe3f3e4f4},
1319 	{0x00000085, 0x00052024},
1320 	{0x00000087, 0x00000000},
1321 	{0x00000088, 0x66036603},
1322 	{0x00000089, 0x01000000},
1323 	{0x0000008b, 0x1c0a0000},
1324 	{0x0000008c, 0xff010000},
1325 	{0x0000008e, 0xffffefff},
1326 	{0x0000008f, 0xfff3efff},
1327 	{0x00000090, 0xfff3efbf},
1328 	{0x00000094, 0x00101101},
1329 	{0x00000095, 0x00000fff},
1330 	{0x00000096, 0x00116fff},
1331 	{0x00000097, 0x60010000},
1332 	{0x00000098, 0x10010000},
1333 	{0x00000099, 0x00006000},
1334 	{0x0000009a, 0x00001000},
1335 	{0x0000009f, 0x00a47400}
1336 };
1337 
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339 	{0x0000006f, 0x03044000},
1340 	{0x00000070, 0x0480c018},
1341 	{0x00000071, 0x00000040},
1342 	{0x00000072, 0x01000000},
1343 	{0x00000074, 0x000000ff},
1344 	{0x00000075, 0x00143400},
1345 	{0x00000076, 0x08ec0800},
1346 	{0x00000077, 0x040000cc},
1347 	{0x00000079, 0x00000000},
1348 	{0x0000007a, 0x21000409},
1349 	{0x0000007c, 0x00000000},
1350 	{0x0000007d, 0xe8000000},
1351 	{0x0000007e, 0x044408a8},
1352 	{0x0000007f, 0x00000003},
1353 	{0x00000080, 0x00000000},
1354 	{0x00000081, 0x01000000},
1355 	{0x00000082, 0x02000000},
1356 	{0x00000083, 0x00000000},
1357 	{0x00000084, 0xe3f3e4f4},
1358 	{0x00000085, 0x00052024},
1359 	{0x00000087, 0x00000000},
1360 	{0x00000088, 0x66036603},
1361 	{0x00000089, 0x01000000},
1362 	{0x0000008b, 0x1c0a0000},
1363 	{0x0000008c, 0xff010000},
1364 	{0x0000008e, 0xffffefff},
1365 	{0x0000008f, 0xfff3efff},
1366 	{0x00000090, 0xfff3efbf},
1367 	{0x00000094, 0x00101101},
1368 	{0x00000095, 0x00000fff},
1369 	{0x00000096, 0x00116fff},
1370 	{0x00000097, 0x60010000},
1371 	{0x00000098, 0x10010000},
1372 	{0x00000099, 0x00006000},
1373 	{0x0000009a, 0x00001000},
1374 	{0x0000009f, 0x00a37400}
1375 };
1376 
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378 	{0x0000006f, 0x03044000},
1379 	{0x00000070, 0x0480c018},
1380 	{0x00000071, 0x00000040},
1381 	{0x00000072, 0x01000000},
1382 	{0x00000074, 0x000000ff},
1383 	{0x00000075, 0x00143400},
1384 	{0x00000076, 0x08ec0800},
1385 	{0x00000077, 0x040000cc},
1386 	{0x00000079, 0x00000000},
1387 	{0x0000007a, 0x21000409},
1388 	{0x0000007c, 0x00000000},
1389 	{0x0000007d, 0xe8000000},
1390 	{0x0000007e, 0x044408a8},
1391 	{0x0000007f, 0x00000003},
1392 	{0x00000080, 0x00000000},
1393 	{0x00000081, 0x01000000},
1394 	{0x00000082, 0x02000000},
1395 	{0x00000083, 0x00000000},
1396 	{0x00000084, 0xe3f3e4f4},
1397 	{0x00000085, 0x00052024},
1398 	{0x00000087, 0x00000000},
1399 	{0x00000088, 0x66036603},
1400 	{0x00000089, 0x01000000},
1401 	{0x0000008b, 0x1c0a0000},
1402 	{0x0000008c, 0xff010000},
1403 	{0x0000008e, 0xffffefff},
1404 	{0x0000008f, 0xfff3efff},
1405 	{0x00000090, 0xfff3efbf},
1406 	{0x00000094, 0x00101101},
1407 	{0x00000095, 0x00000fff},
1408 	{0x00000096, 0x00116fff},
1409 	{0x00000097, 0x60010000},
1410 	{0x00000098, 0x10010000},
1411 	{0x00000099, 0x00006000},
1412 	{0x0000009a, 0x00001000},
1413 	{0x0000009f, 0x00a17730}
1414 };
1415 
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417 	{0x0000006f, 0x03044000},
1418 	{0x00000070, 0x0480c018},
1419 	{0x00000071, 0x00000040},
1420 	{0x00000072, 0x01000000},
1421 	{0x00000074, 0x000000ff},
1422 	{0x00000075, 0x00143400},
1423 	{0x00000076, 0x08ec0800},
1424 	{0x00000077, 0x040000cc},
1425 	{0x00000079, 0x00000000},
1426 	{0x0000007a, 0x21000409},
1427 	{0x0000007c, 0x00000000},
1428 	{0x0000007d, 0xe8000000},
1429 	{0x0000007e, 0x044408a8},
1430 	{0x0000007f, 0x00000003},
1431 	{0x00000080, 0x00000000},
1432 	{0x00000081, 0x01000000},
1433 	{0x00000082, 0x02000000},
1434 	{0x00000083, 0x00000000},
1435 	{0x00000084, 0xe3f3e4f4},
1436 	{0x00000085, 0x00052024},
1437 	{0x00000087, 0x00000000},
1438 	{0x00000088, 0x66036603},
1439 	{0x00000089, 0x01000000},
1440 	{0x0000008b, 0x1c0a0000},
1441 	{0x0000008c, 0xff010000},
1442 	{0x0000008e, 0xffffefff},
1443 	{0x0000008f, 0xfff3efff},
1444 	{0x00000090, 0xfff3efbf},
1445 	{0x00000094, 0x00101101},
1446 	{0x00000095, 0x00000fff},
1447 	{0x00000096, 0x00116fff},
1448 	{0x00000097, 0x60010000},
1449 	{0x00000098, 0x10010000},
1450 	{0x00000099, 0x00006000},
1451 	{0x0000009a, 0x00001000},
1452 	{0x0000009f, 0x00a07730}
1453 };
1454 
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458 	const __be32 *fw_data;
1459 	u32 running, blackout = 0;
1460 	u32 *io_mc_regs;
1461 	int i, ucode_size, regs_size;
1462 
1463 	if (!rdev->mc_fw)
1464 		return -EINVAL;
1465 
1466 	switch (rdev->family) {
1467 	case CHIP_TAHITI:
1468 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469 		ucode_size = SI_MC_UCODE_SIZE;
1470 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1471 		break;
1472 	case CHIP_PITCAIRN:
1473 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474 		ucode_size = SI_MC_UCODE_SIZE;
1475 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1476 		break;
1477 	case CHIP_VERDE:
1478 	default:
1479 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1480 		ucode_size = SI_MC_UCODE_SIZE;
1481 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1482 		break;
1483 	case CHIP_OLAND:
1484 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1485 		ucode_size = OLAND_MC_UCODE_SIZE;
1486 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1487 		break;
1488 	case CHIP_HAINAN:
1489 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490 		ucode_size = OLAND_MC_UCODE_SIZE;
1491 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1492 		break;
1493 	}
1494 
1495 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496 
1497 	if (running == 0) {
1498 		if (running) {
1499 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501 		}
1502 
1503 		/* reset the engine and set to writable */
1504 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506 
1507 		/* load mc io regs */
1508 		for (i = 0; i < regs_size; i++) {
1509 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511 		}
1512 		/* load the MC ucode */
1513 		fw_data = (const __be32 *)rdev->mc_fw->data;
1514 		for (i = 0; i < ucode_size; i++)
1515 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516 
1517 		/* put the engine back into the active state */
1518 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521 
1522 		/* wait for training to complete */
1523 		for (i = 0; i < rdev->usec_timeout; i++) {
1524 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525 				break;
1526 			udelay(1);
1527 		}
1528 		for (i = 0; i < rdev->usec_timeout; i++) {
1529 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530 				break;
1531 			udelay(1);
1532 		}
1533 
1534 		if (running)
1535 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536 	}
1537 
1538 	return 0;
1539 }
1540 
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543 	const char *chip_name;
1544 	const char *rlc_chip_name;
1545 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546 	size_t smc_req_size;
1547 	char fw_name[30];
1548 	int err;
1549 
1550 	DRM_DEBUG("\n");
1551 
1552 	switch (rdev->family) {
1553 	case CHIP_TAHITI:
1554 		chip_name = "TAHITI";
1555 		rlc_chip_name = "TAHITI";
1556 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1558 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1559 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1561 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562 		break;
1563 	case CHIP_PITCAIRN:
1564 		chip_name = "PITCAIRN";
1565 		rlc_chip_name = "PITCAIRN";
1566 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1568 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1569 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1571 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572 		break;
1573 	case CHIP_VERDE:
1574 		chip_name = "VERDE";
1575 		rlc_chip_name = "VERDE";
1576 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1578 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1579 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1581 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582 		break;
1583 	case CHIP_OLAND:
1584 		chip_name = "OLAND";
1585 		rlc_chip_name = "OLAND";
1586 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1588 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1589 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592 		break;
1593 	case CHIP_HAINAN:
1594 		chip_name = "HAINAN";
1595 		rlc_chip_name = "HAINAN";
1596 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1598 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1599 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602 		break;
1603 	default: BUG();
1604 	}
1605 
1606 	DRM_INFO("Loading %s Microcode\n", chip_name);
1607 
1608 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610 	if (err)
1611 		goto out;
1612 	if (rdev->pfp_fw->size != pfp_req_size) {
1613 		printk(KERN_ERR
1614 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615 		       rdev->pfp_fw->size, fw_name);
1616 		err = -EINVAL;
1617 		goto out;
1618 	}
1619 
1620 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622 	if (err)
1623 		goto out;
1624 	if (rdev->me_fw->size != me_req_size) {
1625 		printk(KERN_ERR
1626 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627 		       rdev->me_fw->size, fw_name);
1628 		err = -EINVAL;
1629 	}
1630 
1631 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633 	if (err)
1634 		goto out;
1635 	if (rdev->ce_fw->size != ce_req_size) {
1636 		printk(KERN_ERR
1637 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638 		       rdev->ce_fw->size, fw_name);
1639 		err = -EINVAL;
1640 	}
1641 
1642 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644 	if (err)
1645 		goto out;
1646 	if (rdev->rlc_fw->size != rlc_req_size) {
1647 		printk(KERN_ERR
1648 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649 		       rdev->rlc_fw->size, fw_name);
1650 		err = -EINVAL;
1651 	}
1652 
1653 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655 	if (err)
1656 		goto out;
1657 	if (rdev->mc_fw->size != mc_req_size) {
1658 		printk(KERN_ERR
1659 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660 		       rdev->mc_fw->size, fw_name);
1661 		err = -EINVAL;
1662 	}
1663 
1664 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666 	if (err) {
1667 		printk(KERN_ERR
1668 		       "smc: error loading firmware \"%s\"\n",
1669 		       fw_name);
1670 		release_firmware(rdev->smc_fw);
1671 		rdev->smc_fw = NULL;
1672 	} else if (rdev->smc_fw->size != smc_req_size) {
1673 		printk(KERN_ERR
1674 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1675 		       rdev->smc_fw->size, fw_name);
1676 		err = -EINVAL;
1677 	}
1678 
1679 out:
1680 	if (err) {
1681 		if (err != -EINVAL)
1682 			printk(KERN_ERR
1683 			       "si_cp: Failed to load firmware \"%s\"\n",
1684 			       fw_name);
1685 		release_firmware(rdev->pfp_fw);
1686 		rdev->pfp_fw = NULL;
1687 		release_firmware(rdev->me_fw);
1688 		rdev->me_fw = NULL;
1689 		release_firmware(rdev->ce_fw);
1690 		rdev->ce_fw = NULL;
1691 		release_firmware(rdev->rlc_fw);
1692 		rdev->rlc_fw = NULL;
1693 		release_firmware(rdev->mc_fw);
1694 		rdev->mc_fw = NULL;
1695 		release_firmware(rdev->smc_fw);
1696 		rdev->smc_fw = NULL;
1697 	}
1698 	return err;
1699 }
1700 
1701 /* watermark setup */
1702 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1703 				   struct radeon_crtc *radeon_crtc,
1704 				   struct drm_display_mode *mode,
1705 				   struct drm_display_mode *other_mode)
1706 {
1707 	u32 tmp;
1708 	/*
1709 	 * Line Buffer Setup
1710 	 * There are 3 line buffers, each one shared by 2 display controllers.
1711 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1712 	 * the display controllers.  The paritioning is done via one of four
1713 	 * preset allocations specified in bits 21:20:
1714 	 *  0 - half lb
1715 	 *  2 - whole lb, other crtc must be disabled
1716 	 */
1717 	/* this can get tricky if we have two large displays on a paired group
1718 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1719 	 * non-linked crtcs for maximum line buffer allocation.
1720 	 */
1721 	if (radeon_crtc->base.enabled && mode) {
1722 		if (other_mode)
1723 			tmp = 0; /* 1/2 */
1724 		else
1725 			tmp = 2; /* whole */
1726 	} else
1727 		tmp = 0;
1728 
1729 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1730 	       DC_LB_MEMORY_CONFIG(tmp));
1731 
1732 	if (radeon_crtc->base.enabled && mode) {
1733 		switch (tmp) {
1734 		case 0:
1735 		default:
1736 			return 4096 * 2;
1737 		case 2:
1738 			return 8192 * 2;
1739 		}
1740 	}
1741 
1742 	/* controller not enabled, so no lb used */
1743 	return 0;
1744 }
1745 
1746 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1747 {
1748 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1749 
1750 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1751 	case 0:
1752 	default:
1753 		return 1;
1754 	case 1:
1755 		return 2;
1756 	case 2:
1757 		return 4;
1758 	case 3:
1759 		return 8;
1760 	case 4:
1761 		return 3;
1762 	case 5:
1763 		return 6;
1764 	case 6:
1765 		return 10;
1766 	case 7:
1767 		return 12;
1768 	case 8:
1769 		return 16;
1770 	}
1771 }
1772 
1773 struct dce6_wm_params {
1774 	u32 dram_channels; /* number of dram channels */
1775 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1776 	u32 sclk;          /* engine clock in kHz */
1777 	u32 disp_clk;      /* display clock in kHz */
1778 	u32 src_width;     /* viewport width */
1779 	u32 active_time;   /* active display time in ns */
1780 	u32 blank_time;    /* blank time in ns */
1781 	bool interlaced;    /* mode is interlaced */
1782 	fixed20_12 vsc;    /* vertical scale ratio */
1783 	u32 num_heads;     /* number of active crtcs */
1784 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1785 	u32 lb_size;       /* line buffer allocated to pipe */
1786 	u32 vtaps;         /* vertical scaler taps */
1787 };
1788 
1789 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1790 {
1791 	/* Calculate raw DRAM Bandwidth */
1792 	fixed20_12 dram_efficiency; /* 0.7 */
1793 	fixed20_12 yclk, dram_channels, bandwidth;
1794 	fixed20_12 a;
1795 
1796 	a.full = dfixed_const(1000);
1797 	yclk.full = dfixed_const(wm->yclk);
1798 	yclk.full = dfixed_div(yclk, a);
1799 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1800 	a.full = dfixed_const(10);
1801 	dram_efficiency.full = dfixed_const(7);
1802 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1803 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1804 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1805 
1806 	return dfixed_trunc(bandwidth);
1807 }
1808 
1809 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1810 {
1811 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1812 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1813 	fixed20_12 yclk, dram_channels, bandwidth;
1814 	fixed20_12 a;
1815 
1816 	a.full = dfixed_const(1000);
1817 	yclk.full = dfixed_const(wm->yclk);
1818 	yclk.full = dfixed_div(yclk, a);
1819 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1820 	a.full = dfixed_const(10);
1821 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1822 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1823 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1824 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1825 
1826 	return dfixed_trunc(bandwidth);
1827 }
1828 
1829 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1830 {
1831 	/* Calculate the display Data return Bandwidth */
1832 	fixed20_12 return_efficiency; /* 0.8 */
1833 	fixed20_12 sclk, bandwidth;
1834 	fixed20_12 a;
1835 
1836 	a.full = dfixed_const(1000);
1837 	sclk.full = dfixed_const(wm->sclk);
1838 	sclk.full = dfixed_div(sclk, a);
1839 	a.full = dfixed_const(10);
1840 	return_efficiency.full = dfixed_const(8);
1841 	return_efficiency.full = dfixed_div(return_efficiency, a);
1842 	a.full = dfixed_const(32);
1843 	bandwidth.full = dfixed_mul(a, sclk);
1844 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1845 
1846 	return dfixed_trunc(bandwidth);
1847 }
1848 
1849 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1850 {
1851 	return 32;
1852 }
1853 
1854 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1855 {
1856 	/* Calculate the DMIF Request Bandwidth */
1857 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1858 	fixed20_12 disp_clk, sclk, bandwidth;
1859 	fixed20_12 a, b1, b2;
1860 	u32 min_bandwidth;
1861 
1862 	a.full = dfixed_const(1000);
1863 	disp_clk.full = dfixed_const(wm->disp_clk);
1864 	disp_clk.full = dfixed_div(disp_clk, a);
1865 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1866 	b1.full = dfixed_mul(a, disp_clk);
1867 
1868 	a.full = dfixed_const(1000);
1869 	sclk.full = dfixed_const(wm->sclk);
1870 	sclk.full = dfixed_div(sclk, a);
1871 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1872 	b2.full = dfixed_mul(a, sclk);
1873 
1874 	a.full = dfixed_const(10);
1875 	disp_clk_request_efficiency.full = dfixed_const(8);
1876 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1877 
1878 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1879 
1880 	a.full = dfixed_const(min_bandwidth);
1881 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1882 
1883 	return dfixed_trunc(bandwidth);
1884 }
1885 
1886 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1887 {
1888 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1889 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1890 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1891 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1892 
1893 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1894 }
1895 
1896 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1897 {
1898 	/* Calculate the display mode Average Bandwidth
1899 	 * DisplayMode should contain the source and destination dimensions,
1900 	 * timing, etc.
1901 	 */
1902 	fixed20_12 bpp;
1903 	fixed20_12 line_time;
1904 	fixed20_12 src_width;
1905 	fixed20_12 bandwidth;
1906 	fixed20_12 a;
1907 
1908 	a.full = dfixed_const(1000);
1909 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1910 	line_time.full = dfixed_div(line_time, a);
1911 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1912 	src_width.full = dfixed_const(wm->src_width);
1913 	bandwidth.full = dfixed_mul(src_width, bpp);
1914 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1915 	bandwidth.full = dfixed_div(bandwidth, line_time);
1916 
1917 	return dfixed_trunc(bandwidth);
1918 }
1919 
1920 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1921 {
1922 	/* First calcualte the latency in ns */
1923 	u32 mc_latency = 2000; /* 2000 ns. */
1924 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1925 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1926 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1927 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1928 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1929 		(wm->num_heads * cursor_line_pair_return_time);
1930 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1931 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1932 	u32 tmp, dmif_size = 12288;
1933 	fixed20_12 a, b, c;
1934 
1935 	if (wm->num_heads == 0)
1936 		return 0;
1937 
1938 	a.full = dfixed_const(2);
1939 	b.full = dfixed_const(1);
1940 	if ((wm->vsc.full > a.full) ||
1941 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1942 	    (wm->vtaps >= 5) ||
1943 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1944 		max_src_lines_per_dst_line = 4;
1945 	else
1946 		max_src_lines_per_dst_line = 2;
1947 
1948 	a.full = dfixed_const(available_bandwidth);
1949 	b.full = dfixed_const(wm->num_heads);
1950 	a.full = dfixed_div(a, b);
1951 
1952 	b.full = dfixed_const(mc_latency + 512);
1953 	c.full = dfixed_const(wm->disp_clk);
1954 	b.full = dfixed_div(b, c);
1955 
1956 	c.full = dfixed_const(dmif_size);
1957 	b.full = dfixed_div(c, b);
1958 
1959 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1960 
1961 	b.full = dfixed_const(1000);
1962 	c.full = dfixed_const(wm->disp_clk);
1963 	b.full = dfixed_div(c, b);
1964 	c.full = dfixed_const(wm->bytes_per_pixel);
1965 	b.full = dfixed_mul(b, c);
1966 
1967 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1968 
1969 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1970 	b.full = dfixed_const(1000);
1971 	c.full = dfixed_const(lb_fill_bw);
1972 	b.full = dfixed_div(c, b);
1973 	a.full = dfixed_div(a, b);
1974 	line_fill_time = dfixed_trunc(a);
1975 
1976 	if (line_fill_time < wm->active_time)
1977 		return latency;
1978 	else
1979 		return latency + (line_fill_time - wm->active_time);
1980 
1981 }
1982 
1983 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1984 {
1985 	if (dce6_average_bandwidth(wm) <=
1986 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1987 		return true;
1988 	else
1989 		return false;
1990 };
1991 
1992 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1993 {
1994 	if (dce6_average_bandwidth(wm) <=
1995 	    (dce6_available_bandwidth(wm) / wm->num_heads))
1996 		return true;
1997 	else
1998 		return false;
1999 };
2000 
2001 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2002 {
2003 	u32 lb_partitions = wm->lb_size / wm->src_width;
2004 	u32 line_time = wm->active_time + wm->blank_time;
2005 	u32 latency_tolerant_lines;
2006 	u32 latency_hiding;
2007 	fixed20_12 a;
2008 
2009 	a.full = dfixed_const(1);
2010 	if (wm->vsc.full > a.full)
2011 		latency_tolerant_lines = 1;
2012 	else {
2013 		if (lb_partitions <= (wm->vtaps + 1))
2014 			latency_tolerant_lines = 1;
2015 		else
2016 			latency_tolerant_lines = 2;
2017 	}
2018 
2019 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2020 
2021 	if (dce6_latency_watermark(wm) <= latency_hiding)
2022 		return true;
2023 	else
2024 		return false;
2025 }
2026 
2027 static void dce6_program_watermarks(struct radeon_device *rdev,
2028 					 struct radeon_crtc *radeon_crtc,
2029 					 u32 lb_size, u32 num_heads)
2030 {
2031 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2032 	struct dce6_wm_params wm_low, wm_high;
2033 	u32 dram_channels;
2034 	u32 pixel_period;
2035 	u32 line_time = 0;
2036 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2037 	u32 priority_a_mark = 0, priority_b_mark = 0;
2038 	u32 priority_a_cnt = PRIORITY_OFF;
2039 	u32 priority_b_cnt = PRIORITY_OFF;
2040 	u32 tmp, arb_control3;
2041 	fixed20_12 a, b, c;
2042 
2043 	if (radeon_crtc->base.enabled && num_heads && mode) {
2044 		pixel_period = 1000000 / (u32)mode->clock;
2045 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2046 		priority_a_cnt = 0;
2047 		priority_b_cnt = 0;
2048 
2049 		if (rdev->family == CHIP_ARUBA)
2050 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2051 		else
2052 			dram_channels = si_get_number_of_dram_channels(rdev);
2053 
2054 		/* watermark for high clocks */
2055 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2056 			wm_high.yclk =
2057 				radeon_dpm_get_mclk(rdev, false) * 10;
2058 			wm_high.sclk =
2059 				radeon_dpm_get_sclk(rdev, false) * 10;
2060 		} else {
2061 			wm_high.yclk = rdev->pm.current_mclk * 10;
2062 			wm_high.sclk = rdev->pm.current_sclk * 10;
2063 		}
2064 
2065 		wm_high.disp_clk = mode->clock;
2066 		wm_high.src_width = mode->crtc_hdisplay;
2067 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2068 		wm_high.blank_time = line_time - wm_high.active_time;
2069 		wm_high.interlaced = false;
2070 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2071 			wm_high.interlaced = true;
2072 		wm_high.vsc = radeon_crtc->vsc;
2073 		wm_high.vtaps = 1;
2074 		if (radeon_crtc->rmx_type != RMX_OFF)
2075 			wm_high.vtaps = 2;
2076 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2077 		wm_high.lb_size = lb_size;
2078 		wm_high.dram_channels = dram_channels;
2079 		wm_high.num_heads = num_heads;
2080 
2081 		/* watermark for low clocks */
2082 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2083 			wm_low.yclk =
2084 				radeon_dpm_get_mclk(rdev, true) * 10;
2085 			wm_low.sclk =
2086 				radeon_dpm_get_sclk(rdev, true) * 10;
2087 		} else {
2088 			wm_low.yclk = rdev->pm.current_mclk * 10;
2089 			wm_low.sclk = rdev->pm.current_sclk * 10;
2090 		}
2091 
2092 		wm_low.disp_clk = mode->clock;
2093 		wm_low.src_width = mode->crtc_hdisplay;
2094 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2095 		wm_low.blank_time = line_time - wm_low.active_time;
2096 		wm_low.interlaced = false;
2097 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2098 			wm_low.interlaced = true;
2099 		wm_low.vsc = radeon_crtc->vsc;
2100 		wm_low.vtaps = 1;
2101 		if (radeon_crtc->rmx_type != RMX_OFF)
2102 			wm_low.vtaps = 2;
2103 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2104 		wm_low.lb_size = lb_size;
2105 		wm_low.dram_channels = dram_channels;
2106 		wm_low.num_heads = num_heads;
2107 
2108 		/* set for high clocks */
2109 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2110 		/* set for low clocks */
2111 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2112 
2113 		/* possibly force display priority to high */
2114 		/* should really do this at mode validation time... */
2115 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2116 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2117 		    !dce6_check_latency_hiding(&wm_high) ||
2118 		    (rdev->disp_priority == 2)) {
2119 			DRM_DEBUG_KMS("force priority to high\n");
2120 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2121 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2122 		}
2123 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2124 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2125 		    !dce6_check_latency_hiding(&wm_low) ||
2126 		    (rdev->disp_priority == 2)) {
2127 			DRM_DEBUG_KMS("force priority to high\n");
2128 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2129 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2130 		}
2131 
2132 		a.full = dfixed_const(1000);
2133 		b.full = dfixed_const(mode->clock);
2134 		b.full = dfixed_div(b, a);
2135 		c.full = dfixed_const(latency_watermark_a);
2136 		c.full = dfixed_mul(c, b);
2137 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2138 		c.full = dfixed_div(c, a);
2139 		a.full = dfixed_const(16);
2140 		c.full = dfixed_div(c, a);
2141 		priority_a_mark = dfixed_trunc(c);
2142 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2143 
2144 		a.full = dfixed_const(1000);
2145 		b.full = dfixed_const(mode->clock);
2146 		b.full = dfixed_div(b, a);
2147 		c.full = dfixed_const(latency_watermark_b);
2148 		c.full = dfixed_mul(c, b);
2149 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2150 		c.full = dfixed_div(c, a);
2151 		a.full = dfixed_const(16);
2152 		c.full = dfixed_div(c, a);
2153 		priority_b_mark = dfixed_trunc(c);
2154 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2155 	}
2156 
2157 	/* select wm A */
2158 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2159 	tmp = arb_control3;
2160 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2161 	tmp |= LATENCY_WATERMARK_MASK(1);
2162 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2163 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2164 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2165 		LATENCY_HIGH_WATERMARK(line_time)));
2166 	/* select wm B */
2167 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2168 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2169 	tmp |= LATENCY_WATERMARK_MASK(2);
2170 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2171 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2172 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2173 		LATENCY_HIGH_WATERMARK(line_time)));
2174 	/* restore original selection */
2175 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2176 
2177 	/* write the priority marks */
2178 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2179 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2180 
2181 	/* save values for DPM */
2182 	radeon_crtc->line_time = line_time;
2183 	radeon_crtc->wm_high = latency_watermark_a;
2184 	radeon_crtc->wm_low = latency_watermark_b;
2185 }
2186 
2187 void dce6_bandwidth_update(struct radeon_device *rdev)
2188 {
2189 	struct drm_display_mode *mode0 = NULL;
2190 	struct drm_display_mode *mode1 = NULL;
2191 	u32 num_heads = 0, lb_size;
2192 	int i;
2193 
2194 	radeon_update_display_priority(rdev);
2195 
2196 	for (i = 0; i < rdev->num_crtc; i++) {
2197 		if (rdev->mode_info.crtcs[i]->base.enabled)
2198 			num_heads++;
2199 	}
2200 	for (i = 0; i < rdev->num_crtc; i += 2) {
2201 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2202 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2203 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2204 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2205 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2206 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2207 	}
2208 }
2209 
2210 /*
2211  * Core functions
2212  */
2213 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2214 {
2215 	const u32 num_tile_mode_states = 32;
2216 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2217 
2218 	switch (rdev->config.si.mem_row_size_in_kb) {
2219 	case 1:
2220 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2221 		break;
2222 	case 2:
2223 	default:
2224 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2225 		break;
2226 	case 4:
2227 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2228 		break;
2229 	}
2230 
2231 	if ((rdev->family == CHIP_TAHITI) ||
2232 	    (rdev->family == CHIP_PITCAIRN)) {
2233 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2234 			switch (reg_offset) {
2235 			case 0:  /* non-AA compressed depth or any compressed stencil */
2236 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2239 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2240 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2241 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2244 				break;
2245 			case 1:  /* 2xAA/4xAA compressed depth only */
2246 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2248 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2249 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2250 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2251 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2254 				break;
2255 			case 2:  /* 8xAA compressed depth only */
2256 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2258 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2259 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2260 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2261 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2264 				break;
2265 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2266 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2268 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2269 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2270 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2271 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2274 				break;
2275 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2276 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2278 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2279 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2280 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2281 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2284 				break;
2285 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2286 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2288 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2289 						 TILE_SPLIT(split_equal_to_row_size) |
2290 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2291 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2294 				break;
2295 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2296 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2298 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2299 						 TILE_SPLIT(split_equal_to_row_size) |
2300 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2301 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2304 				break;
2305 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2309 						 TILE_SPLIT(split_equal_to_row_size) |
2310 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2311 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2314 				break;
2315 			case 8:  /* 1D and 1D Array Surfaces */
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2317 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2319 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2320 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2321 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2324 				break;
2325 			case 9:  /* Displayable maps. */
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2329 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2330 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2331 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2334 				break;
2335 			case 10:  /* Display 8bpp. */
2336 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2338 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2339 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2340 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2341 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2344 				break;
2345 			case 11:  /* Display 16bpp. */
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2349 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2350 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2351 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2354 				break;
2355 			case 12:  /* Display 32bpp. */
2356 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2359 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2360 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2361 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2364 				break;
2365 			case 13:  /* Thin. */
2366 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2368 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2369 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2370 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2371 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2374 				break;
2375 			case 14:  /* Thin 8 bpp. */
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2377 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2379 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2380 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2381 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2384 				break;
2385 			case 15:  /* Thin 16 bpp. */
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2390 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2391 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2394 				break;
2395 			case 16:  /* Thin 32 bpp. */
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2398 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2399 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2401 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2403 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2404 				break;
2405 			case 17:  /* Thin 64 bpp. */
2406 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2408 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2409 						 TILE_SPLIT(split_equal_to_row_size) |
2410 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2411 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2414 				break;
2415 			case 21:  /* 8 bpp PRT. */
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2419 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2420 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2421 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2424 				break;
2425 			case 22:  /* 16 bpp PRT */
2426 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2428 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2429 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2430 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2431 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2434 				break;
2435 			case 23:  /* 32 bpp PRT */
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2439 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2440 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2441 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2444 				break;
2445 			case 24:  /* 64 bpp PRT */
2446 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2448 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2449 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2450 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2451 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2454 				break;
2455 			case 25:  /* 128 bpp PRT */
2456 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2458 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2459 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2460 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2461 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2464 				break;
2465 			default:
2466 				gb_tile_moden = 0;
2467 				break;
2468 			}
2469 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2470 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2471 		}
2472 	} else if ((rdev->family == CHIP_VERDE) ||
2473 		   (rdev->family == CHIP_OLAND) ||
2474 		   (rdev->family == CHIP_HAINAN)) {
2475 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2476 			switch (reg_offset) {
2477 			case 0:  /* non-AA compressed depth or any compressed stencil */
2478 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2481 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2483 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2486 				break;
2487 			case 1:  /* 2xAA/4xAA compressed depth only */
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2491 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2492 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2493 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2496 				break;
2497 			case 2:  /* 8xAA compressed depth only */
2498 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2500 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2503 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2506 				break;
2507 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2508 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2510 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2511 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2513 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2516 				break;
2517 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2518 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2519 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2520 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2521 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2522 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2523 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2526 				break;
2527 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2528 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2531 						 TILE_SPLIT(split_equal_to_row_size) |
2532 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2533 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2536 				break;
2537 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2538 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2541 						 TILE_SPLIT(split_equal_to_row_size) |
2542 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2543 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546 				break;
2547 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2548 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2551 						 TILE_SPLIT(split_equal_to_row_size) |
2552 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2553 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2556 				break;
2557 			case 8:  /* 1D and 1D Array Surfaces */
2558 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2559 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2561 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2562 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2563 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2566 				break;
2567 			case 9:  /* Displayable maps. */
2568 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2571 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2572 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2573 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576 				break;
2577 			case 10:  /* Display 8bpp. */
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2581 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2582 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2583 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2586 				break;
2587 			case 11:  /* Display 16bpp. */
2588 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2590 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2592 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2593 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2595 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596 				break;
2597 			case 12:  /* Display 32bpp. */
2598 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2600 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2602 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2603 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2606 				break;
2607 			case 13:  /* Thin. */
2608 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2610 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2612 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2613 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616 				break;
2617 			case 14:  /* Thin 8 bpp. */
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2622 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2623 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2626 				break;
2627 			case 15:  /* Thin 16 bpp. */
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2631 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2632 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2633 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2636 				break;
2637 			case 16:  /* Thin 32 bpp. */
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2641 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2642 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2643 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2646 				break;
2647 			case 17:  /* Thin 64 bpp. */
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2650 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2651 						 TILE_SPLIT(split_equal_to_row_size) |
2652 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2653 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2656 				break;
2657 			case 21:  /* 8 bpp PRT. */
2658 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2662 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2663 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2664 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2666 				break;
2667 			case 22:  /* 16 bpp PRT */
2668 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2673 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2676 				break;
2677 			case 23:  /* 32 bpp PRT */
2678 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2681 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2683 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2685 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686 				break;
2687 			case 24:  /* 64 bpp PRT */
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2691 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2692 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2693 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2695 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2696 				break;
2697 			case 25:  /* 128 bpp PRT */
2698 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2701 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2702 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2703 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2706 				break;
2707 			default:
2708 				gb_tile_moden = 0;
2709 				break;
2710 			}
2711 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2712 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2713 		}
2714 	} else
2715 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2716 }
2717 
2718 static void si_select_se_sh(struct radeon_device *rdev,
2719 			    u32 se_num, u32 sh_num)
2720 {
2721 	u32 data = INSTANCE_BROADCAST_WRITES;
2722 
2723 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2724 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2725 	else if (se_num == 0xffffffff)
2726 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2727 	else if (sh_num == 0xffffffff)
2728 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2729 	else
2730 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2731 	WREG32(GRBM_GFX_INDEX, data);
2732 }
2733 
2734 static u32 si_create_bitmask(u32 bit_width)
2735 {
2736 	u32 i, mask = 0;
2737 
2738 	for (i = 0; i < bit_width; i++) {
2739 		mask <<= 1;
2740 		mask |= 1;
2741 	}
2742 	return mask;
2743 }
2744 
2745 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2746 {
2747 	u32 data, mask;
2748 
2749 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2750 	if (data & 1)
2751 		data &= INACTIVE_CUS_MASK;
2752 	else
2753 		data = 0;
2754 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2755 
2756 	data >>= INACTIVE_CUS_SHIFT;
2757 
2758 	mask = si_create_bitmask(cu_per_sh);
2759 
2760 	return ~data & mask;
2761 }
2762 
2763 static void si_setup_spi(struct radeon_device *rdev,
2764 			 u32 se_num, u32 sh_per_se,
2765 			 u32 cu_per_sh)
2766 {
2767 	int i, j, k;
2768 	u32 data, mask, active_cu;
2769 
2770 	for (i = 0; i < se_num; i++) {
2771 		for (j = 0; j < sh_per_se; j++) {
2772 			si_select_se_sh(rdev, i, j);
2773 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2774 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2775 
2776 			mask = 1;
2777 			for (k = 0; k < 16; k++) {
2778 				mask <<= k;
2779 				if (active_cu & mask) {
2780 					data &= ~mask;
2781 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2782 					break;
2783 				}
2784 			}
2785 		}
2786 	}
2787 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2788 }
2789 
2790 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2791 			      u32 max_rb_num, u32 se_num,
2792 			      u32 sh_per_se)
2793 {
2794 	u32 data, mask;
2795 
2796 	data = RREG32(CC_RB_BACKEND_DISABLE);
2797 	if (data & 1)
2798 		data &= BACKEND_DISABLE_MASK;
2799 	else
2800 		data = 0;
2801 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2802 
2803 	data >>= BACKEND_DISABLE_SHIFT;
2804 
2805 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2806 
2807 	return data & mask;
2808 }
2809 
2810 static void si_setup_rb(struct radeon_device *rdev,
2811 			u32 se_num, u32 sh_per_se,
2812 			u32 max_rb_num)
2813 {
2814 	int i, j;
2815 	u32 data, mask;
2816 	u32 disabled_rbs = 0;
2817 	u32 enabled_rbs = 0;
2818 
2819 	for (i = 0; i < se_num; i++) {
2820 		for (j = 0; j < sh_per_se; j++) {
2821 			si_select_se_sh(rdev, i, j);
2822 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2823 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2824 		}
2825 	}
2826 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2827 
2828 	mask = 1;
2829 	for (i = 0; i < max_rb_num; i++) {
2830 		if (!(disabled_rbs & mask))
2831 			enabled_rbs |= mask;
2832 		mask <<= 1;
2833 	}
2834 
2835 	for (i = 0; i < se_num; i++) {
2836 		si_select_se_sh(rdev, i, 0xffffffff);
2837 		data = 0;
2838 		for (j = 0; j < sh_per_se; j++) {
2839 			switch (enabled_rbs & 3) {
2840 			case 1:
2841 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2842 				break;
2843 			case 2:
2844 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2845 				break;
2846 			case 3:
2847 			default:
2848 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2849 				break;
2850 			}
2851 			enabled_rbs >>= 2;
2852 		}
2853 		WREG32(PA_SC_RASTER_CONFIG, data);
2854 	}
2855 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2856 }
2857 
2858 static void si_gpu_init(struct radeon_device *rdev)
2859 {
2860 	u32 gb_addr_config = 0;
2861 	u32 mc_shared_chmap, mc_arb_ramcfg;
2862 	u32 sx_debug_1;
2863 	u32 hdp_host_path_cntl;
2864 	u32 tmp;
2865 	int i, j;
2866 
2867 	switch (rdev->family) {
2868 	case CHIP_TAHITI:
2869 		rdev->config.si.max_shader_engines = 2;
2870 		rdev->config.si.max_tile_pipes = 12;
2871 		rdev->config.si.max_cu_per_sh = 8;
2872 		rdev->config.si.max_sh_per_se = 2;
2873 		rdev->config.si.max_backends_per_se = 4;
2874 		rdev->config.si.max_texture_channel_caches = 12;
2875 		rdev->config.si.max_gprs = 256;
2876 		rdev->config.si.max_gs_threads = 32;
2877 		rdev->config.si.max_hw_contexts = 8;
2878 
2879 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2880 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2881 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2882 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2883 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2884 		break;
2885 	case CHIP_PITCAIRN:
2886 		rdev->config.si.max_shader_engines = 2;
2887 		rdev->config.si.max_tile_pipes = 8;
2888 		rdev->config.si.max_cu_per_sh = 5;
2889 		rdev->config.si.max_sh_per_se = 2;
2890 		rdev->config.si.max_backends_per_se = 4;
2891 		rdev->config.si.max_texture_channel_caches = 8;
2892 		rdev->config.si.max_gprs = 256;
2893 		rdev->config.si.max_gs_threads = 32;
2894 		rdev->config.si.max_hw_contexts = 8;
2895 
2896 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2897 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2898 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2899 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2900 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2901 		break;
2902 	case CHIP_VERDE:
2903 	default:
2904 		rdev->config.si.max_shader_engines = 1;
2905 		rdev->config.si.max_tile_pipes = 4;
2906 		rdev->config.si.max_cu_per_sh = 5;
2907 		rdev->config.si.max_sh_per_se = 2;
2908 		rdev->config.si.max_backends_per_se = 4;
2909 		rdev->config.si.max_texture_channel_caches = 4;
2910 		rdev->config.si.max_gprs = 256;
2911 		rdev->config.si.max_gs_threads = 32;
2912 		rdev->config.si.max_hw_contexts = 8;
2913 
2914 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2915 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2916 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2917 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2918 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2919 		break;
2920 	case CHIP_OLAND:
2921 		rdev->config.si.max_shader_engines = 1;
2922 		rdev->config.si.max_tile_pipes = 4;
2923 		rdev->config.si.max_cu_per_sh = 6;
2924 		rdev->config.si.max_sh_per_se = 1;
2925 		rdev->config.si.max_backends_per_se = 2;
2926 		rdev->config.si.max_texture_channel_caches = 4;
2927 		rdev->config.si.max_gprs = 256;
2928 		rdev->config.si.max_gs_threads = 16;
2929 		rdev->config.si.max_hw_contexts = 8;
2930 
2931 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2932 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2933 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2934 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2935 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2936 		break;
2937 	case CHIP_HAINAN:
2938 		rdev->config.si.max_shader_engines = 1;
2939 		rdev->config.si.max_tile_pipes = 4;
2940 		rdev->config.si.max_cu_per_sh = 5;
2941 		rdev->config.si.max_sh_per_se = 1;
2942 		rdev->config.si.max_backends_per_se = 1;
2943 		rdev->config.si.max_texture_channel_caches = 2;
2944 		rdev->config.si.max_gprs = 256;
2945 		rdev->config.si.max_gs_threads = 16;
2946 		rdev->config.si.max_hw_contexts = 8;
2947 
2948 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2949 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2950 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2951 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2952 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2953 		break;
2954 	}
2955 
2956 	/* Initialize HDP */
2957 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2958 		WREG32((0x2c14 + j), 0x00000000);
2959 		WREG32((0x2c18 + j), 0x00000000);
2960 		WREG32((0x2c1c + j), 0x00000000);
2961 		WREG32((0x2c20 + j), 0x00000000);
2962 		WREG32((0x2c24 + j), 0x00000000);
2963 	}
2964 
2965 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2966 
2967 	evergreen_fix_pci_max_read_req_size(rdev);
2968 
2969 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2970 
2971 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2972 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2973 
2974 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2975 	rdev->config.si.mem_max_burst_length_bytes = 256;
2976 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2977 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2978 	if (rdev->config.si.mem_row_size_in_kb > 4)
2979 		rdev->config.si.mem_row_size_in_kb = 4;
2980 	/* XXX use MC settings? */
2981 	rdev->config.si.shader_engine_tile_size = 32;
2982 	rdev->config.si.num_gpus = 1;
2983 	rdev->config.si.multi_gpu_tile_size = 64;
2984 
2985 	/* fix up row size */
2986 	gb_addr_config &= ~ROW_SIZE_MASK;
2987 	switch (rdev->config.si.mem_row_size_in_kb) {
2988 	case 1:
2989 	default:
2990 		gb_addr_config |= ROW_SIZE(0);
2991 		break;
2992 	case 2:
2993 		gb_addr_config |= ROW_SIZE(1);
2994 		break;
2995 	case 4:
2996 		gb_addr_config |= ROW_SIZE(2);
2997 		break;
2998 	}
2999 
3000 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3001 	 * not have bank info, so create a custom tiling dword.
3002 	 * bits 3:0   num_pipes
3003 	 * bits 7:4   num_banks
3004 	 * bits 11:8  group_size
3005 	 * bits 15:12 row_size
3006 	 */
3007 	rdev->config.si.tile_config = 0;
3008 	switch (rdev->config.si.num_tile_pipes) {
3009 	case 1:
3010 		rdev->config.si.tile_config |= (0 << 0);
3011 		break;
3012 	case 2:
3013 		rdev->config.si.tile_config |= (1 << 0);
3014 		break;
3015 	case 4:
3016 		rdev->config.si.tile_config |= (2 << 0);
3017 		break;
3018 	case 8:
3019 	default:
3020 		/* XXX what about 12? */
3021 		rdev->config.si.tile_config |= (3 << 0);
3022 		break;
3023 	}
3024 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3025 	case 0: /* four banks */
3026 		rdev->config.si.tile_config |= 0 << 4;
3027 		break;
3028 	case 1: /* eight banks */
3029 		rdev->config.si.tile_config |= 1 << 4;
3030 		break;
3031 	case 2: /* sixteen banks */
3032 	default:
3033 		rdev->config.si.tile_config |= 2 << 4;
3034 		break;
3035 	}
3036 	rdev->config.si.tile_config |=
3037 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3038 	rdev->config.si.tile_config |=
3039 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3040 
3041 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3042 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3043 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3044 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3045 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3046 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3047 	if (rdev->has_uvd) {
3048 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3049 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3050 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3051 	}
3052 
3053 	si_tiling_mode_table_init(rdev);
3054 
3055 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3056 		    rdev->config.si.max_sh_per_se,
3057 		    rdev->config.si.max_backends_per_se);
3058 
3059 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3060 		     rdev->config.si.max_sh_per_se,
3061 		     rdev->config.si.max_cu_per_sh);
3062 
3063 
3064 	/* set HW defaults for 3D engine */
3065 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3066 				     ROQ_IB2_START(0x2b)));
3067 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3068 
3069 	sx_debug_1 = RREG32(SX_DEBUG_1);
3070 	WREG32(SX_DEBUG_1, sx_debug_1);
3071 
3072 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3073 
3074 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3075 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3076 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3077 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3078 
3079 	WREG32(VGT_NUM_INSTANCES, 1);
3080 
3081 	WREG32(CP_PERFMON_CNTL, 0);
3082 
3083 	WREG32(SQ_CONFIG, 0);
3084 
3085 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3086 					  FORCE_EOV_MAX_REZ_CNT(255)));
3087 
3088 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3089 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3090 
3091 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3092 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3093 
3094 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3095 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3096 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3097 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3098 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3099 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3100 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3101 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3102 
3103 	tmp = RREG32(HDP_MISC_CNTL);
3104 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3105 	WREG32(HDP_MISC_CNTL, tmp);
3106 
3107 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3108 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3109 
3110 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3111 
3112 	udelay(50);
3113 }
3114 
3115 /*
3116  * GPU scratch registers helpers function.
3117  */
3118 static void si_scratch_init(struct radeon_device *rdev)
3119 {
3120 	int i;
3121 
3122 	rdev->scratch.num_reg = 7;
3123 	rdev->scratch.reg_base = SCRATCH_REG0;
3124 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3125 		rdev->scratch.free[i] = true;
3126 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3127 	}
3128 }
3129 
3130 void si_fence_ring_emit(struct radeon_device *rdev,
3131 			struct radeon_fence *fence)
3132 {
3133 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3134 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3135 
3136 	/* flush read cache over gart */
3137 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3138 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3139 	radeon_ring_write(ring, 0);
3140 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3141 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3142 			  PACKET3_TC_ACTION_ENA |
3143 			  PACKET3_SH_KCACHE_ACTION_ENA |
3144 			  PACKET3_SH_ICACHE_ACTION_ENA);
3145 	radeon_ring_write(ring, 0xFFFFFFFF);
3146 	radeon_ring_write(ring, 0);
3147 	radeon_ring_write(ring, 10); /* poll interval */
3148 	/* EVENT_WRITE_EOP - flush caches, send int */
3149 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3150 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3151 	radeon_ring_write(ring, addr & 0xffffffff);
3152 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3153 	radeon_ring_write(ring, fence->seq);
3154 	radeon_ring_write(ring, 0);
3155 }
3156 
3157 /*
3158  * IB stuff
3159  */
3160 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3161 {
3162 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3163 	u32 header;
3164 
3165 	if (ib->is_const_ib) {
3166 		/* set switch buffer packet before const IB */
3167 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3168 		radeon_ring_write(ring, 0);
3169 
3170 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3171 	} else {
3172 		u32 next_rptr;
3173 		if (ring->rptr_save_reg) {
3174 			next_rptr = ring->wptr + 3 + 4 + 8;
3175 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3176 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3177 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3178 			radeon_ring_write(ring, next_rptr);
3179 		} else if (rdev->wb.enabled) {
3180 			next_rptr = ring->wptr + 5 + 4 + 8;
3181 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3182 			radeon_ring_write(ring, (1 << 8));
3183 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3184 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3185 			radeon_ring_write(ring, next_rptr);
3186 		}
3187 
3188 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3189 	}
3190 
3191 	radeon_ring_write(ring, header);
3192 	radeon_ring_write(ring,
3193 #ifdef __BIG_ENDIAN
3194 			  (2 << 0) |
3195 #endif
3196 			  (ib->gpu_addr & 0xFFFFFFFC));
3197 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3198 	radeon_ring_write(ring, ib->length_dw |
3199 			  (ib->vm ? (ib->vm->id << 24) : 0));
3200 
3201 	if (!ib->is_const_ib) {
3202 		/* flush read cache over gart for this vmid */
3203 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3204 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3205 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3206 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3207 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3208 				  PACKET3_TC_ACTION_ENA |
3209 				  PACKET3_SH_KCACHE_ACTION_ENA |
3210 				  PACKET3_SH_ICACHE_ACTION_ENA);
3211 		radeon_ring_write(ring, 0xFFFFFFFF);
3212 		radeon_ring_write(ring, 0);
3213 		radeon_ring_write(ring, 10); /* poll interval */
3214 	}
3215 }
3216 
3217 /*
3218  * CP.
3219  */
3220 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3221 {
3222 	if (enable)
3223 		WREG32(CP_ME_CNTL, 0);
3224 	else {
3225 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3226 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3227 		WREG32(SCRATCH_UMSK, 0);
3228 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3229 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3230 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3231 	}
3232 	udelay(50);
3233 }
3234 
3235 static int si_cp_load_microcode(struct radeon_device *rdev)
3236 {
3237 	const __be32 *fw_data;
3238 	int i;
3239 
3240 	if (!rdev->me_fw || !rdev->pfp_fw)
3241 		return -EINVAL;
3242 
3243 	si_cp_enable(rdev, false);
3244 
3245 	/* PFP */
3246 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3247 	WREG32(CP_PFP_UCODE_ADDR, 0);
3248 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3249 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3250 	WREG32(CP_PFP_UCODE_ADDR, 0);
3251 
3252 	/* CE */
3253 	fw_data = (const __be32 *)rdev->ce_fw->data;
3254 	WREG32(CP_CE_UCODE_ADDR, 0);
3255 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3256 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3257 	WREG32(CP_CE_UCODE_ADDR, 0);
3258 
3259 	/* ME */
3260 	fw_data = (const __be32 *)rdev->me_fw->data;
3261 	WREG32(CP_ME_RAM_WADDR, 0);
3262 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3263 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3264 	WREG32(CP_ME_RAM_WADDR, 0);
3265 
3266 	WREG32(CP_PFP_UCODE_ADDR, 0);
3267 	WREG32(CP_CE_UCODE_ADDR, 0);
3268 	WREG32(CP_ME_RAM_WADDR, 0);
3269 	WREG32(CP_ME_RAM_RADDR, 0);
3270 	return 0;
3271 }
3272 
3273 static int si_cp_start(struct radeon_device *rdev)
3274 {
3275 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3276 	int r, i;
3277 
3278 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3279 	if (r) {
3280 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3281 		return r;
3282 	}
3283 	/* init the CP */
3284 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3285 	radeon_ring_write(ring, 0x1);
3286 	radeon_ring_write(ring, 0x0);
3287 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3288 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3289 	radeon_ring_write(ring, 0);
3290 	radeon_ring_write(ring, 0);
3291 
3292 	/* init the CE partitions */
3293 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3294 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3295 	radeon_ring_write(ring, 0xc000);
3296 	radeon_ring_write(ring, 0xe000);
3297 	radeon_ring_unlock_commit(rdev, ring);
3298 
3299 	si_cp_enable(rdev, true);
3300 
3301 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3302 	if (r) {
3303 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3304 		return r;
3305 	}
3306 
3307 	/* setup clear context state */
3308 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3309 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3310 
3311 	for (i = 0; i < si_default_size; i++)
3312 		radeon_ring_write(ring, si_default_state[i]);
3313 
3314 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3315 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3316 
3317 	/* set clear context state */
3318 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3319 	radeon_ring_write(ring, 0);
3320 
3321 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3322 	radeon_ring_write(ring, 0x00000316);
3323 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3324 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3325 
3326 	radeon_ring_unlock_commit(rdev, ring);
3327 
3328 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3329 		ring = &rdev->ring[i];
3330 		r = radeon_ring_lock(rdev, ring, 2);
3331 
3332 		/* clear the compute context state */
3333 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3334 		radeon_ring_write(ring, 0);
3335 
3336 		radeon_ring_unlock_commit(rdev, ring);
3337 	}
3338 
3339 	return 0;
3340 }
3341 
3342 static void si_cp_fini(struct radeon_device *rdev)
3343 {
3344 	struct radeon_ring *ring;
3345 	si_cp_enable(rdev, false);
3346 
3347 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3348 	radeon_ring_fini(rdev, ring);
3349 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3350 
3351 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3352 	radeon_ring_fini(rdev, ring);
3353 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3354 
3355 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3356 	radeon_ring_fini(rdev, ring);
3357 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3358 }
3359 
3360 static int si_cp_resume(struct radeon_device *rdev)
3361 {
3362 	struct radeon_ring *ring;
3363 	u32 tmp;
3364 	u32 rb_bufsz;
3365 	int r;
3366 
3367 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3368 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3369 				 SOFT_RESET_PA |
3370 				 SOFT_RESET_VGT |
3371 				 SOFT_RESET_SPI |
3372 				 SOFT_RESET_SX));
3373 	RREG32(GRBM_SOFT_RESET);
3374 	mdelay(15);
3375 	WREG32(GRBM_SOFT_RESET, 0);
3376 	RREG32(GRBM_SOFT_RESET);
3377 
3378 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3379 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3380 
3381 	/* Set the write pointer delay */
3382 	WREG32(CP_RB_WPTR_DELAY, 0);
3383 
3384 	WREG32(CP_DEBUG, 0);
3385 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3386 
3387 	/* ring 0 - compute and gfx */
3388 	/* Set ring buffer size */
3389 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3390 	rb_bufsz = drm_order(ring->ring_size / 8);
3391 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3392 #ifdef __BIG_ENDIAN
3393 	tmp |= BUF_SWAP_32BIT;
3394 #endif
3395 	WREG32(CP_RB0_CNTL, tmp);
3396 
3397 	/* Initialize the ring buffer's read and write pointers */
3398 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3399 	ring->wptr = 0;
3400 	WREG32(CP_RB0_WPTR, ring->wptr);
3401 
3402 	/* set the wb address whether it's enabled or not */
3403 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3404 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3405 
3406 	if (rdev->wb.enabled)
3407 		WREG32(SCRATCH_UMSK, 0xff);
3408 	else {
3409 		tmp |= RB_NO_UPDATE;
3410 		WREG32(SCRATCH_UMSK, 0);
3411 	}
3412 
3413 	mdelay(1);
3414 	WREG32(CP_RB0_CNTL, tmp);
3415 
3416 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3417 
3418 	ring->rptr = RREG32(CP_RB0_RPTR);
3419 
3420 	/* ring1  - compute only */
3421 	/* Set ring buffer size */
3422 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3423 	rb_bufsz = drm_order(ring->ring_size / 8);
3424 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3425 #ifdef __BIG_ENDIAN
3426 	tmp |= BUF_SWAP_32BIT;
3427 #endif
3428 	WREG32(CP_RB1_CNTL, tmp);
3429 
3430 	/* Initialize the ring buffer's read and write pointers */
3431 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3432 	ring->wptr = 0;
3433 	WREG32(CP_RB1_WPTR, ring->wptr);
3434 
3435 	/* set the wb address whether it's enabled or not */
3436 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3437 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3438 
3439 	mdelay(1);
3440 	WREG32(CP_RB1_CNTL, tmp);
3441 
3442 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3443 
3444 	ring->rptr = RREG32(CP_RB1_RPTR);
3445 
3446 	/* ring2 - compute only */
3447 	/* Set ring buffer size */
3448 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3449 	rb_bufsz = drm_order(ring->ring_size / 8);
3450 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3451 #ifdef __BIG_ENDIAN
3452 	tmp |= BUF_SWAP_32BIT;
3453 #endif
3454 	WREG32(CP_RB2_CNTL, tmp);
3455 
3456 	/* Initialize the ring buffer's read and write pointers */
3457 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3458 	ring->wptr = 0;
3459 	WREG32(CP_RB2_WPTR, ring->wptr);
3460 
3461 	/* set the wb address whether it's enabled or not */
3462 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3463 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3464 
3465 	mdelay(1);
3466 	WREG32(CP_RB2_CNTL, tmp);
3467 
3468 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3469 
3470 	ring->rptr = RREG32(CP_RB2_RPTR);
3471 
3472 	/* start the rings */
3473 	si_cp_start(rdev);
3474 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3475 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3476 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3477 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3478 	if (r) {
3479 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3480 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3481 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3482 		return r;
3483 	}
3484 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3485 	if (r) {
3486 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3487 	}
3488 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3489 	if (r) {
3490 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3491 	}
3492 
3493 	return 0;
3494 }
3495 
3496 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3497 {
3498 	u32 reset_mask = 0;
3499 	u32 tmp;
3500 
3501 	/* GRBM_STATUS */
3502 	tmp = RREG32(GRBM_STATUS);
3503 	if (tmp & (PA_BUSY | SC_BUSY |
3504 		   BCI_BUSY | SX_BUSY |
3505 		   TA_BUSY | VGT_BUSY |
3506 		   DB_BUSY | CB_BUSY |
3507 		   GDS_BUSY | SPI_BUSY |
3508 		   IA_BUSY | IA_BUSY_NO_DMA))
3509 		reset_mask |= RADEON_RESET_GFX;
3510 
3511 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3512 		   CP_BUSY | CP_COHERENCY_BUSY))
3513 		reset_mask |= RADEON_RESET_CP;
3514 
3515 	if (tmp & GRBM_EE_BUSY)
3516 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3517 
3518 	/* GRBM_STATUS2 */
3519 	tmp = RREG32(GRBM_STATUS2);
3520 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3521 		reset_mask |= RADEON_RESET_RLC;
3522 
3523 	/* DMA_STATUS_REG 0 */
3524 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3525 	if (!(tmp & DMA_IDLE))
3526 		reset_mask |= RADEON_RESET_DMA;
3527 
3528 	/* DMA_STATUS_REG 1 */
3529 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3530 	if (!(tmp & DMA_IDLE))
3531 		reset_mask |= RADEON_RESET_DMA1;
3532 
3533 	/* SRBM_STATUS2 */
3534 	tmp = RREG32(SRBM_STATUS2);
3535 	if (tmp & DMA_BUSY)
3536 		reset_mask |= RADEON_RESET_DMA;
3537 
3538 	if (tmp & DMA1_BUSY)
3539 		reset_mask |= RADEON_RESET_DMA1;
3540 
3541 	/* SRBM_STATUS */
3542 	tmp = RREG32(SRBM_STATUS);
3543 
3544 	if (tmp & IH_BUSY)
3545 		reset_mask |= RADEON_RESET_IH;
3546 
3547 	if (tmp & SEM_BUSY)
3548 		reset_mask |= RADEON_RESET_SEM;
3549 
3550 	if (tmp & GRBM_RQ_PENDING)
3551 		reset_mask |= RADEON_RESET_GRBM;
3552 
3553 	if (tmp & VMC_BUSY)
3554 		reset_mask |= RADEON_RESET_VMC;
3555 
3556 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3557 		   MCC_BUSY | MCD_BUSY))
3558 		reset_mask |= RADEON_RESET_MC;
3559 
3560 	if (evergreen_is_display_hung(rdev))
3561 		reset_mask |= RADEON_RESET_DISPLAY;
3562 
3563 	/* VM_L2_STATUS */
3564 	tmp = RREG32(VM_L2_STATUS);
3565 	if (tmp & L2_BUSY)
3566 		reset_mask |= RADEON_RESET_VMC;
3567 
3568 	/* Skip MC reset as it's mostly likely not hung, just busy */
3569 	if (reset_mask & RADEON_RESET_MC) {
3570 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3571 		reset_mask &= ~RADEON_RESET_MC;
3572 	}
3573 
3574 	return reset_mask;
3575 }
3576 
3577 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3578 {
3579 	struct evergreen_mc_save save;
3580 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3581 	u32 tmp;
3582 
3583 	if (reset_mask == 0)
3584 		return;
3585 
3586 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3587 
3588 	evergreen_print_gpu_status_regs(rdev);
3589 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3590 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3591 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3592 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3593 
3594 	/* Disable CP parsing/prefetching */
3595 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3596 
3597 	if (reset_mask & RADEON_RESET_DMA) {
3598 		/* dma0 */
3599 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3600 		tmp &= ~DMA_RB_ENABLE;
3601 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3602 	}
3603 	if (reset_mask & RADEON_RESET_DMA1) {
3604 		/* dma1 */
3605 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3606 		tmp &= ~DMA_RB_ENABLE;
3607 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3608 	}
3609 
3610 	udelay(50);
3611 
3612 	evergreen_mc_stop(rdev, &save);
3613 	if (evergreen_mc_wait_for_idle(rdev)) {
3614 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3615 	}
3616 
3617 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3618 		grbm_soft_reset = SOFT_RESET_CB |
3619 			SOFT_RESET_DB |
3620 			SOFT_RESET_GDS |
3621 			SOFT_RESET_PA |
3622 			SOFT_RESET_SC |
3623 			SOFT_RESET_BCI |
3624 			SOFT_RESET_SPI |
3625 			SOFT_RESET_SX |
3626 			SOFT_RESET_TC |
3627 			SOFT_RESET_TA |
3628 			SOFT_RESET_VGT |
3629 			SOFT_RESET_IA;
3630 	}
3631 
3632 	if (reset_mask & RADEON_RESET_CP) {
3633 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3634 
3635 		srbm_soft_reset |= SOFT_RESET_GRBM;
3636 	}
3637 
3638 	if (reset_mask & RADEON_RESET_DMA)
3639 		srbm_soft_reset |= SOFT_RESET_DMA;
3640 
3641 	if (reset_mask & RADEON_RESET_DMA1)
3642 		srbm_soft_reset |= SOFT_RESET_DMA1;
3643 
3644 	if (reset_mask & RADEON_RESET_DISPLAY)
3645 		srbm_soft_reset |= SOFT_RESET_DC;
3646 
3647 	if (reset_mask & RADEON_RESET_RLC)
3648 		grbm_soft_reset |= SOFT_RESET_RLC;
3649 
3650 	if (reset_mask & RADEON_RESET_SEM)
3651 		srbm_soft_reset |= SOFT_RESET_SEM;
3652 
3653 	if (reset_mask & RADEON_RESET_IH)
3654 		srbm_soft_reset |= SOFT_RESET_IH;
3655 
3656 	if (reset_mask & RADEON_RESET_GRBM)
3657 		srbm_soft_reset |= SOFT_RESET_GRBM;
3658 
3659 	if (reset_mask & RADEON_RESET_VMC)
3660 		srbm_soft_reset |= SOFT_RESET_VMC;
3661 
3662 	if (reset_mask & RADEON_RESET_MC)
3663 		srbm_soft_reset |= SOFT_RESET_MC;
3664 
3665 	if (grbm_soft_reset) {
3666 		tmp = RREG32(GRBM_SOFT_RESET);
3667 		tmp |= grbm_soft_reset;
3668 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3669 		WREG32(GRBM_SOFT_RESET, tmp);
3670 		tmp = RREG32(GRBM_SOFT_RESET);
3671 
3672 		udelay(50);
3673 
3674 		tmp &= ~grbm_soft_reset;
3675 		WREG32(GRBM_SOFT_RESET, tmp);
3676 		tmp = RREG32(GRBM_SOFT_RESET);
3677 	}
3678 
3679 	if (srbm_soft_reset) {
3680 		tmp = RREG32(SRBM_SOFT_RESET);
3681 		tmp |= srbm_soft_reset;
3682 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3683 		WREG32(SRBM_SOFT_RESET, tmp);
3684 		tmp = RREG32(SRBM_SOFT_RESET);
3685 
3686 		udelay(50);
3687 
3688 		tmp &= ~srbm_soft_reset;
3689 		WREG32(SRBM_SOFT_RESET, tmp);
3690 		tmp = RREG32(SRBM_SOFT_RESET);
3691 	}
3692 
3693 	/* Wait a little for things to settle down */
3694 	udelay(50);
3695 
3696 	evergreen_mc_resume(rdev, &save);
3697 	udelay(50);
3698 
3699 	evergreen_print_gpu_status_regs(rdev);
3700 }
3701 
3702 int si_asic_reset(struct radeon_device *rdev)
3703 {
3704 	u32 reset_mask;
3705 
3706 	reset_mask = si_gpu_check_soft_reset(rdev);
3707 
3708 	if (reset_mask)
3709 		r600_set_bios_scratch_engine_hung(rdev, true);
3710 
3711 	si_gpu_soft_reset(rdev, reset_mask);
3712 
3713 	reset_mask = si_gpu_check_soft_reset(rdev);
3714 
3715 	if (!reset_mask)
3716 		r600_set_bios_scratch_engine_hung(rdev, false);
3717 
3718 	return 0;
3719 }
3720 
3721 /**
3722  * si_gfx_is_lockup - Check if the GFX engine is locked up
3723  *
3724  * @rdev: radeon_device pointer
3725  * @ring: radeon_ring structure holding ring information
3726  *
3727  * Check if the GFX engine is locked up.
3728  * Returns true if the engine appears to be locked up, false if not.
3729  */
3730 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3731 {
3732 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3733 
3734 	if (!(reset_mask & (RADEON_RESET_GFX |
3735 			    RADEON_RESET_COMPUTE |
3736 			    RADEON_RESET_CP))) {
3737 		radeon_ring_lockup_update(ring);
3738 		return false;
3739 	}
3740 	/* force CP activities */
3741 	radeon_ring_force_activity(rdev, ring);
3742 	return radeon_ring_test_lockup(rdev, ring);
3743 }
3744 
3745 /**
3746  * si_dma_is_lockup - Check if the DMA engine is locked up
3747  *
3748  * @rdev: radeon_device pointer
3749  * @ring: radeon_ring structure holding ring information
3750  *
3751  * Check if the async DMA engine is locked up.
3752  * Returns true if the engine appears to be locked up, false if not.
3753  */
3754 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3755 {
3756 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3757 	u32 mask;
3758 
3759 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3760 		mask = RADEON_RESET_DMA;
3761 	else
3762 		mask = RADEON_RESET_DMA1;
3763 
3764 	if (!(reset_mask & mask)) {
3765 		radeon_ring_lockup_update(ring);
3766 		return false;
3767 	}
3768 	/* force ring activities */
3769 	radeon_ring_force_activity(rdev, ring);
3770 	return radeon_ring_test_lockup(rdev, ring);
3771 }
3772 
3773 /* MC */
3774 static void si_mc_program(struct radeon_device *rdev)
3775 {
3776 	struct evergreen_mc_save save;
3777 	u32 tmp;
3778 	int i, j;
3779 
3780 	/* Initialize HDP */
3781 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3782 		WREG32((0x2c14 + j), 0x00000000);
3783 		WREG32((0x2c18 + j), 0x00000000);
3784 		WREG32((0x2c1c + j), 0x00000000);
3785 		WREG32((0x2c20 + j), 0x00000000);
3786 		WREG32((0x2c24 + j), 0x00000000);
3787 	}
3788 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3789 
3790 	evergreen_mc_stop(rdev, &save);
3791 	if (radeon_mc_wait_for_idle(rdev)) {
3792 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3793 	}
3794 	if (!ASIC_IS_NODCE(rdev))
3795 		/* Lockout access through VGA aperture*/
3796 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3797 	/* Update configuration */
3798 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3799 	       rdev->mc.vram_start >> 12);
3800 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3801 	       rdev->mc.vram_end >> 12);
3802 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3803 	       rdev->vram_scratch.gpu_addr >> 12);
3804 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3805 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3806 	WREG32(MC_VM_FB_LOCATION, tmp);
3807 	/* XXX double check these! */
3808 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3809 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3810 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3811 	WREG32(MC_VM_AGP_BASE, 0);
3812 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3813 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3814 	if (radeon_mc_wait_for_idle(rdev)) {
3815 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3816 	}
3817 	evergreen_mc_resume(rdev, &save);
3818 	if (!ASIC_IS_NODCE(rdev)) {
3819 		/* we need to own VRAM, so turn off the VGA renderer here
3820 		 * to stop it overwriting our objects */
3821 		rv515_vga_render_disable(rdev);
3822 	}
3823 }
3824 
3825 void si_vram_gtt_location(struct radeon_device *rdev,
3826 			  struct radeon_mc *mc)
3827 {
3828 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3829 		/* leave room for at least 1024M GTT */
3830 		dev_warn(rdev->dev, "limiting VRAM\n");
3831 		mc->real_vram_size = 0xFFC0000000ULL;
3832 		mc->mc_vram_size = 0xFFC0000000ULL;
3833 	}
3834 	radeon_vram_location(rdev, &rdev->mc, 0);
3835 	rdev->mc.gtt_base_align = 0;
3836 	radeon_gtt_location(rdev, mc);
3837 }
3838 
3839 static int si_mc_init(struct radeon_device *rdev)
3840 {
3841 	u32 tmp;
3842 	int chansize, numchan;
3843 
3844 	/* Get VRAM informations */
3845 	rdev->mc.vram_is_ddr = true;
3846 	tmp = RREG32(MC_ARB_RAMCFG);
3847 	if (tmp & CHANSIZE_OVERRIDE) {
3848 		chansize = 16;
3849 	} else if (tmp & CHANSIZE_MASK) {
3850 		chansize = 64;
3851 	} else {
3852 		chansize = 32;
3853 	}
3854 	tmp = RREG32(MC_SHARED_CHMAP);
3855 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3856 	case 0:
3857 	default:
3858 		numchan = 1;
3859 		break;
3860 	case 1:
3861 		numchan = 2;
3862 		break;
3863 	case 2:
3864 		numchan = 4;
3865 		break;
3866 	case 3:
3867 		numchan = 8;
3868 		break;
3869 	case 4:
3870 		numchan = 3;
3871 		break;
3872 	case 5:
3873 		numchan = 6;
3874 		break;
3875 	case 6:
3876 		numchan = 10;
3877 		break;
3878 	case 7:
3879 		numchan = 12;
3880 		break;
3881 	case 8:
3882 		numchan = 16;
3883 		break;
3884 	}
3885 	rdev->mc.vram_width = numchan * chansize;
3886 	/* Could aper size report 0 ? */
3887 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3888 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3889 	/* size in MB on si */
3890 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3891 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3892 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3893 	si_vram_gtt_location(rdev, &rdev->mc);
3894 	radeon_update_bandwidth_info(rdev);
3895 
3896 	return 0;
3897 }
3898 
3899 /*
3900  * GART
3901  */
3902 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3903 {
3904 	/* flush hdp cache */
3905 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3906 
3907 	/* bits 0-15 are the VM contexts0-15 */
3908 	WREG32(VM_INVALIDATE_REQUEST, 1);
3909 }
3910 
3911 static int si_pcie_gart_enable(struct radeon_device *rdev)
3912 {
3913 	int r, i;
3914 
3915 	if (rdev->gart.robj == NULL) {
3916 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3917 		return -EINVAL;
3918 	}
3919 	r = radeon_gart_table_vram_pin(rdev);
3920 	if (r)
3921 		return r;
3922 	radeon_gart_restore(rdev);
3923 	/* Setup TLB control */
3924 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3925 	       (0xA << 7) |
3926 	       ENABLE_L1_TLB |
3927 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3928 	       ENABLE_ADVANCED_DRIVER_MODEL |
3929 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3930 	/* Setup L2 cache */
3931 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3932 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3933 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3934 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3935 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3936 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3937 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3938 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3939 	/* setup context0 */
3940 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3941 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3942 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3943 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3944 			(u32)(rdev->dummy_page.addr >> 12));
3945 	WREG32(VM_CONTEXT0_CNTL2, 0);
3946 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3947 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3948 
3949 	WREG32(0x15D4, 0);
3950 	WREG32(0x15D8, 0);
3951 	WREG32(0x15DC, 0);
3952 
3953 	/* empty context1-15 */
3954 	/* set vm size, must be a multiple of 4 */
3955 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3956 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3957 	/* Assign the pt base to something valid for now; the pts used for
3958 	 * the VMs are determined by the application and setup and assigned
3959 	 * on the fly in the vm part of radeon_gart.c
3960 	 */
3961 	for (i = 1; i < 16; i++) {
3962 		if (i < 8)
3963 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3964 			       rdev->gart.table_addr >> 12);
3965 		else
3966 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3967 			       rdev->gart.table_addr >> 12);
3968 	}
3969 
3970 	/* enable context1-15 */
3971 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3972 	       (u32)(rdev->dummy_page.addr >> 12));
3973 	WREG32(VM_CONTEXT1_CNTL2, 4);
3974 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3975 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3977 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3981 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3983 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3985 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3987 
3988 	si_pcie_gart_tlb_flush(rdev);
3989 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3990 		 (unsigned)(rdev->mc.gtt_size >> 20),
3991 		 (unsigned long long)rdev->gart.table_addr);
3992 	rdev->gart.ready = true;
3993 	return 0;
3994 }
3995 
3996 static void si_pcie_gart_disable(struct radeon_device *rdev)
3997 {
3998 	/* Disable all tables */
3999 	WREG32(VM_CONTEXT0_CNTL, 0);
4000 	WREG32(VM_CONTEXT1_CNTL, 0);
4001 	/* Setup TLB control */
4002 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4003 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4004 	/* Setup L2 cache */
4005 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4006 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4007 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4008 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4009 	WREG32(VM_L2_CNTL2, 0);
4010 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4011 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4012 	radeon_gart_table_vram_unpin(rdev);
4013 }
4014 
4015 static void si_pcie_gart_fini(struct radeon_device *rdev)
4016 {
4017 	si_pcie_gart_disable(rdev);
4018 	radeon_gart_table_vram_free(rdev);
4019 	radeon_gart_fini(rdev);
4020 }
4021 
4022 /* vm parser */
4023 static bool si_vm_reg_valid(u32 reg)
4024 {
4025 	/* context regs are fine */
4026 	if (reg >= 0x28000)
4027 		return true;
4028 
4029 	/* check config regs */
4030 	switch (reg) {
4031 	case GRBM_GFX_INDEX:
4032 	case CP_STRMOUT_CNTL:
4033 	case VGT_VTX_VECT_EJECT_REG:
4034 	case VGT_CACHE_INVALIDATION:
4035 	case VGT_ESGS_RING_SIZE:
4036 	case VGT_GSVS_RING_SIZE:
4037 	case VGT_GS_VERTEX_REUSE:
4038 	case VGT_PRIMITIVE_TYPE:
4039 	case VGT_INDEX_TYPE:
4040 	case VGT_NUM_INDICES:
4041 	case VGT_NUM_INSTANCES:
4042 	case VGT_TF_RING_SIZE:
4043 	case VGT_HS_OFFCHIP_PARAM:
4044 	case VGT_TF_MEMORY_BASE:
4045 	case PA_CL_ENHANCE:
4046 	case PA_SU_LINE_STIPPLE_VALUE:
4047 	case PA_SC_LINE_STIPPLE_STATE:
4048 	case PA_SC_ENHANCE:
4049 	case SQC_CACHES:
4050 	case SPI_STATIC_THREAD_MGMT_1:
4051 	case SPI_STATIC_THREAD_MGMT_2:
4052 	case SPI_STATIC_THREAD_MGMT_3:
4053 	case SPI_PS_MAX_WAVE_ID:
4054 	case SPI_CONFIG_CNTL:
4055 	case SPI_CONFIG_CNTL_1:
4056 	case TA_CNTL_AUX:
4057 		return true;
4058 	default:
4059 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4060 		return false;
4061 	}
4062 }
4063 
4064 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4065 				  u32 *ib, struct radeon_cs_packet *pkt)
4066 {
4067 	switch (pkt->opcode) {
4068 	case PACKET3_NOP:
4069 	case PACKET3_SET_BASE:
4070 	case PACKET3_SET_CE_DE_COUNTERS:
4071 	case PACKET3_LOAD_CONST_RAM:
4072 	case PACKET3_WRITE_CONST_RAM:
4073 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4074 	case PACKET3_DUMP_CONST_RAM:
4075 	case PACKET3_INCREMENT_CE_COUNTER:
4076 	case PACKET3_WAIT_ON_DE_COUNTER:
4077 	case PACKET3_CE_WRITE:
4078 		break;
4079 	default:
4080 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4081 		return -EINVAL;
4082 	}
4083 	return 0;
4084 }
4085 
4086 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4087 				   u32 *ib, struct radeon_cs_packet *pkt)
4088 {
4089 	u32 idx = pkt->idx + 1;
4090 	u32 idx_value = ib[idx];
4091 	u32 start_reg, end_reg, reg, i;
4092 	u32 command, info;
4093 
4094 	switch (pkt->opcode) {
4095 	case PACKET3_NOP:
4096 	case PACKET3_SET_BASE:
4097 	case PACKET3_CLEAR_STATE:
4098 	case PACKET3_INDEX_BUFFER_SIZE:
4099 	case PACKET3_DISPATCH_DIRECT:
4100 	case PACKET3_DISPATCH_INDIRECT:
4101 	case PACKET3_ALLOC_GDS:
4102 	case PACKET3_WRITE_GDS_RAM:
4103 	case PACKET3_ATOMIC_GDS:
4104 	case PACKET3_ATOMIC:
4105 	case PACKET3_OCCLUSION_QUERY:
4106 	case PACKET3_SET_PREDICATION:
4107 	case PACKET3_COND_EXEC:
4108 	case PACKET3_PRED_EXEC:
4109 	case PACKET3_DRAW_INDIRECT:
4110 	case PACKET3_DRAW_INDEX_INDIRECT:
4111 	case PACKET3_INDEX_BASE:
4112 	case PACKET3_DRAW_INDEX_2:
4113 	case PACKET3_CONTEXT_CONTROL:
4114 	case PACKET3_INDEX_TYPE:
4115 	case PACKET3_DRAW_INDIRECT_MULTI:
4116 	case PACKET3_DRAW_INDEX_AUTO:
4117 	case PACKET3_DRAW_INDEX_IMMD:
4118 	case PACKET3_NUM_INSTANCES:
4119 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4120 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4121 	case PACKET3_DRAW_INDEX_OFFSET_2:
4122 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4123 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4124 	case PACKET3_MPEG_INDEX:
4125 	case PACKET3_WAIT_REG_MEM:
4126 	case PACKET3_MEM_WRITE:
4127 	case PACKET3_PFP_SYNC_ME:
4128 	case PACKET3_SURFACE_SYNC:
4129 	case PACKET3_EVENT_WRITE:
4130 	case PACKET3_EVENT_WRITE_EOP:
4131 	case PACKET3_EVENT_WRITE_EOS:
4132 	case PACKET3_SET_CONTEXT_REG:
4133 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4134 	case PACKET3_SET_SH_REG:
4135 	case PACKET3_SET_SH_REG_OFFSET:
4136 	case PACKET3_INCREMENT_DE_COUNTER:
4137 	case PACKET3_WAIT_ON_CE_COUNTER:
4138 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4139 	case PACKET3_ME_WRITE:
4140 		break;
4141 	case PACKET3_COPY_DATA:
4142 		if ((idx_value & 0xf00) == 0) {
4143 			reg = ib[idx + 3] * 4;
4144 			if (!si_vm_reg_valid(reg))
4145 				return -EINVAL;
4146 		}
4147 		break;
4148 	case PACKET3_WRITE_DATA:
4149 		if ((idx_value & 0xf00) == 0) {
4150 			start_reg = ib[idx + 1] * 4;
4151 			if (idx_value & 0x10000) {
4152 				if (!si_vm_reg_valid(start_reg))
4153 					return -EINVAL;
4154 			} else {
4155 				for (i = 0; i < (pkt->count - 2); i++) {
4156 					reg = start_reg + (4 * i);
4157 					if (!si_vm_reg_valid(reg))
4158 						return -EINVAL;
4159 				}
4160 			}
4161 		}
4162 		break;
4163 	case PACKET3_COND_WRITE:
4164 		if (idx_value & 0x100) {
4165 			reg = ib[idx + 5] * 4;
4166 			if (!si_vm_reg_valid(reg))
4167 				return -EINVAL;
4168 		}
4169 		break;
4170 	case PACKET3_COPY_DW:
4171 		if (idx_value & 0x2) {
4172 			reg = ib[idx + 3] * 4;
4173 			if (!si_vm_reg_valid(reg))
4174 				return -EINVAL;
4175 		}
4176 		break;
4177 	case PACKET3_SET_CONFIG_REG:
4178 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4179 		end_reg = 4 * pkt->count + start_reg - 4;
4180 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4181 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4182 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4183 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4184 			return -EINVAL;
4185 		}
4186 		for (i = 0; i < pkt->count; i++) {
4187 			reg = start_reg + (4 * i);
4188 			if (!si_vm_reg_valid(reg))
4189 				return -EINVAL;
4190 		}
4191 		break;
4192 	case PACKET3_CP_DMA:
4193 		command = ib[idx + 4];
4194 		info = ib[idx + 1];
4195 		if (command & PACKET3_CP_DMA_CMD_SAS) {
4196 			/* src address space is register */
4197 			if (((info & 0x60000000) >> 29) == 0) {
4198 				start_reg = idx_value << 2;
4199 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
4200 					reg = start_reg;
4201 					if (!si_vm_reg_valid(reg)) {
4202 						DRM_ERROR("CP DMA Bad SRC register\n");
4203 						return -EINVAL;
4204 					}
4205 				} else {
4206 					for (i = 0; i < (command & 0x1fffff); i++) {
4207 						reg = start_reg + (4 * i);
4208 						if (!si_vm_reg_valid(reg)) {
4209 							DRM_ERROR("CP DMA Bad SRC register\n");
4210 							return -EINVAL;
4211 						}
4212 					}
4213 				}
4214 			}
4215 		}
4216 		if (command & PACKET3_CP_DMA_CMD_DAS) {
4217 			/* dst address space is register */
4218 			if (((info & 0x00300000) >> 20) == 0) {
4219 				start_reg = ib[idx + 2];
4220 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
4221 					reg = start_reg;
4222 					if (!si_vm_reg_valid(reg)) {
4223 						DRM_ERROR("CP DMA Bad DST register\n");
4224 						return -EINVAL;
4225 					}
4226 				} else {
4227 					for (i = 0; i < (command & 0x1fffff); i++) {
4228 						reg = start_reg + (4 * i);
4229 						if (!si_vm_reg_valid(reg)) {
4230 							DRM_ERROR("CP DMA Bad DST register\n");
4231 							return -EINVAL;
4232 						}
4233 					}
4234 				}
4235 			}
4236 		}
4237 		break;
4238 	default:
4239 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4240 		return -EINVAL;
4241 	}
4242 	return 0;
4243 }
4244 
4245 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4246 				       u32 *ib, struct radeon_cs_packet *pkt)
4247 {
4248 	u32 idx = pkt->idx + 1;
4249 	u32 idx_value = ib[idx];
4250 	u32 start_reg, reg, i;
4251 
4252 	switch (pkt->opcode) {
4253 	case PACKET3_NOP:
4254 	case PACKET3_SET_BASE:
4255 	case PACKET3_CLEAR_STATE:
4256 	case PACKET3_DISPATCH_DIRECT:
4257 	case PACKET3_DISPATCH_INDIRECT:
4258 	case PACKET3_ALLOC_GDS:
4259 	case PACKET3_WRITE_GDS_RAM:
4260 	case PACKET3_ATOMIC_GDS:
4261 	case PACKET3_ATOMIC:
4262 	case PACKET3_OCCLUSION_QUERY:
4263 	case PACKET3_SET_PREDICATION:
4264 	case PACKET3_COND_EXEC:
4265 	case PACKET3_PRED_EXEC:
4266 	case PACKET3_CONTEXT_CONTROL:
4267 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4268 	case PACKET3_WAIT_REG_MEM:
4269 	case PACKET3_MEM_WRITE:
4270 	case PACKET3_PFP_SYNC_ME:
4271 	case PACKET3_SURFACE_SYNC:
4272 	case PACKET3_EVENT_WRITE:
4273 	case PACKET3_EVENT_WRITE_EOP:
4274 	case PACKET3_EVENT_WRITE_EOS:
4275 	case PACKET3_SET_CONTEXT_REG:
4276 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4277 	case PACKET3_SET_SH_REG:
4278 	case PACKET3_SET_SH_REG_OFFSET:
4279 	case PACKET3_INCREMENT_DE_COUNTER:
4280 	case PACKET3_WAIT_ON_CE_COUNTER:
4281 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4282 	case PACKET3_ME_WRITE:
4283 		break;
4284 	case PACKET3_COPY_DATA:
4285 		if ((idx_value & 0xf00) == 0) {
4286 			reg = ib[idx + 3] * 4;
4287 			if (!si_vm_reg_valid(reg))
4288 				return -EINVAL;
4289 		}
4290 		break;
4291 	case PACKET3_WRITE_DATA:
4292 		if ((idx_value & 0xf00) == 0) {
4293 			start_reg = ib[idx + 1] * 4;
4294 			if (idx_value & 0x10000) {
4295 				if (!si_vm_reg_valid(start_reg))
4296 					return -EINVAL;
4297 			} else {
4298 				for (i = 0; i < (pkt->count - 2); i++) {
4299 					reg = start_reg + (4 * i);
4300 					if (!si_vm_reg_valid(reg))
4301 						return -EINVAL;
4302 				}
4303 			}
4304 		}
4305 		break;
4306 	case PACKET3_COND_WRITE:
4307 		if (idx_value & 0x100) {
4308 			reg = ib[idx + 5] * 4;
4309 			if (!si_vm_reg_valid(reg))
4310 				return -EINVAL;
4311 		}
4312 		break;
4313 	case PACKET3_COPY_DW:
4314 		if (idx_value & 0x2) {
4315 			reg = ib[idx + 3] * 4;
4316 			if (!si_vm_reg_valid(reg))
4317 				return -EINVAL;
4318 		}
4319 		break;
4320 	default:
4321 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4322 		return -EINVAL;
4323 	}
4324 	return 0;
4325 }
4326 
4327 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4328 {
4329 	int ret = 0;
4330 	u32 idx = 0;
4331 	struct radeon_cs_packet pkt;
4332 
4333 	do {
4334 		pkt.idx = idx;
4335 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4336 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4337 		pkt.one_reg_wr = 0;
4338 		switch (pkt.type) {
4339 		case RADEON_PACKET_TYPE0:
4340 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4341 			ret = -EINVAL;
4342 			break;
4343 		case RADEON_PACKET_TYPE2:
4344 			idx += 1;
4345 			break;
4346 		case RADEON_PACKET_TYPE3:
4347 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4348 			if (ib->is_const_ib)
4349 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4350 			else {
4351 				switch (ib->ring) {
4352 				case RADEON_RING_TYPE_GFX_INDEX:
4353 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4354 					break;
4355 				case CAYMAN_RING_TYPE_CP1_INDEX:
4356 				case CAYMAN_RING_TYPE_CP2_INDEX:
4357 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4358 					break;
4359 				default:
4360 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4361 					ret = -EINVAL;
4362 					break;
4363 				}
4364 			}
4365 			idx += pkt.count + 2;
4366 			break;
4367 		default:
4368 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4369 			ret = -EINVAL;
4370 			break;
4371 		}
4372 		if (ret)
4373 			break;
4374 	} while (idx < ib->length_dw);
4375 
4376 	return ret;
4377 }
4378 
4379 /*
4380  * vm
4381  */
4382 int si_vm_init(struct radeon_device *rdev)
4383 {
4384 	/* number of VMs */
4385 	rdev->vm_manager.nvm = 16;
4386 	/* base offset of vram pages */
4387 	rdev->vm_manager.vram_base_offset = 0;
4388 
4389 	return 0;
4390 }
4391 
4392 void si_vm_fini(struct radeon_device *rdev)
4393 {
4394 }
4395 
4396 /**
4397  * si_vm_decode_fault - print human readable fault info
4398  *
4399  * @rdev: radeon_device pointer
4400  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4401  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4402  *
4403  * Print human readable fault information (SI).
4404  */
4405 static void si_vm_decode_fault(struct radeon_device *rdev,
4406 			       u32 status, u32 addr)
4407 {
4408 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4409 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4410 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4411 	char *block;
4412 
4413 	if (rdev->family == CHIP_TAHITI) {
4414 		switch (mc_id) {
4415 		case 160:
4416 		case 144:
4417 		case 96:
4418 		case 80:
4419 		case 224:
4420 		case 208:
4421 		case 32:
4422 		case 16:
4423 			block = "CB";
4424 			break;
4425 		case 161:
4426 		case 145:
4427 		case 97:
4428 		case 81:
4429 		case 225:
4430 		case 209:
4431 		case 33:
4432 		case 17:
4433 			block = "CB_FMASK";
4434 			break;
4435 		case 162:
4436 		case 146:
4437 		case 98:
4438 		case 82:
4439 		case 226:
4440 		case 210:
4441 		case 34:
4442 		case 18:
4443 			block = "CB_CMASK";
4444 			break;
4445 		case 163:
4446 		case 147:
4447 		case 99:
4448 		case 83:
4449 		case 227:
4450 		case 211:
4451 		case 35:
4452 		case 19:
4453 			block = "CB_IMMED";
4454 			break;
4455 		case 164:
4456 		case 148:
4457 		case 100:
4458 		case 84:
4459 		case 228:
4460 		case 212:
4461 		case 36:
4462 		case 20:
4463 			block = "DB";
4464 			break;
4465 		case 165:
4466 		case 149:
4467 		case 101:
4468 		case 85:
4469 		case 229:
4470 		case 213:
4471 		case 37:
4472 		case 21:
4473 			block = "DB_HTILE";
4474 			break;
4475 		case 167:
4476 		case 151:
4477 		case 103:
4478 		case 87:
4479 		case 231:
4480 		case 215:
4481 		case 39:
4482 		case 23:
4483 			block = "DB_STEN";
4484 			break;
4485 		case 72:
4486 		case 68:
4487 		case 64:
4488 		case 8:
4489 		case 4:
4490 		case 0:
4491 		case 136:
4492 		case 132:
4493 		case 128:
4494 		case 200:
4495 		case 196:
4496 		case 192:
4497 			block = "TC";
4498 			break;
4499 		case 112:
4500 		case 48:
4501 			block = "CP";
4502 			break;
4503 		case 49:
4504 		case 177:
4505 		case 50:
4506 		case 178:
4507 			block = "SH";
4508 			break;
4509 		case 53:
4510 		case 190:
4511 			block = "VGT";
4512 			break;
4513 		case 117:
4514 			block = "IH";
4515 			break;
4516 		case 51:
4517 		case 115:
4518 			block = "RLC";
4519 			break;
4520 		case 119:
4521 		case 183:
4522 			block = "DMA0";
4523 			break;
4524 		case 61:
4525 			block = "DMA1";
4526 			break;
4527 		case 248:
4528 		case 120:
4529 			block = "HDP";
4530 			break;
4531 		default:
4532 			block = "unknown";
4533 			break;
4534 		}
4535 	} else {
4536 		switch (mc_id) {
4537 		case 32:
4538 		case 16:
4539 		case 96:
4540 		case 80:
4541 		case 160:
4542 		case 144:
4543 		case 224:
4544 		case 208:
4545 			block = "CB";
4546 			break;
4547 		case 33:
4548 		case 17:
4549 		case 97:
4550 		case 81:
4551 		case 161:
4552 		case 145:
4553 		case 225:
4554 		case 209:
4555 			block = "CB_FMASK";
4556 			break;
4557 		case 34:
4558 		case 18:
4559 		case 98:
4560 		case 82:
4561 		case 162:
4562 		case 146:
4563 		case 226:
4564 		case 210:
4565 			block = "CB_CMASK";
4566 			break;
4567 		case 35:
4568 		case 19:
4569 		case 99:
4570 		case 83:
4571 		case 163:
4572 		case 147:
4573 		case 227:
4574 		case 211:
4575 			block = "CB_IMMED";
4576 			break;
4577 		case 36:
4578 		case 20:
4579 		case 100:
4580 		case 84:
4581 		case 164:
4582 		case 148:
4583 		case 228:
4584 		case 212:
4585 			block = "DB";
4586 			break;
4587 		case 37:
4588 		case 21:
4589 		case 101:
4590 		case 85:
4591 		case 165:
4592 		case 149:
4593 		case 229:
4594 		case 213:
4595 			block = "DB_HTILE";
4596 			break;
4597 		case 39:
4598 		case 23:
4599 		case 103:
4600 		case 87:
4601 		case 167:
4602 		case 151:
4603 		case 231:
4604 		case 215:
4605 			block = "DB_STEN";
4606 			break;
4607 		case 72:
4608 		case 68:
4609 		case 8:
4610 		case 4:
4611 		case 136:
4612 		case 132:
4613 		case 200:
4614 		case 196:
4615 			block = "TC";
4616 			break;
4617 		case 112:
4618 		case 48:
4619 			block = "CP";
4620 			break;
4621 		case 49:
4622 		case 177:
4623 		case 50:
4624 		case 178:
4625 			block = "SH";
4626 			break;
4627 		case 53:
4628 			block = "VGT";
4629 			break;
4630 		case 117:
4631 			block = "IH";
4632 			break;
4633 		case 51:
4634 		case 115:
4635 			block = "RLC";
4636 			break;
4637 		case 119:
4638 		case 183:
4639 			block = "DMA0";
4640 			break;
4641 		case 61:
4642 			block = "DMA1";
4643 			break;
4644 		case 248:
4645 		case 120:
4646 			block = "HDP";
4647 			break;
4648 		default:
4649 			block = "unknown";
4650 			break;
4651 		}
4652 	}
4653 
4654 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4655 	       protections, vmid, addr,
4656 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4657 	       block, mc_id);
4658 }
4659 
4660 /**
4661  * si_vm_set_page - update the page tables using the CP
4662  *
4663  * @rdev: radeon_device pointer
4664  * @ib: indirect buffer to fill with commands
4665  * @pe: addr of the page entry
4666  * @addr: dst addr to write into pe
4667  * @count: number of page entries to update
4668  * @incr: increase next addr by incr bytes
4669  * @flags: access flags
4670  *
4671  * Update the page tables using the CP (SI).
4672  */
4673 void si_vm_set_page(struct radeon_device *rdev,
4674 		    struct radeon_ib *ib,
4675 		    uint64_t pe,
4676 		    uint64_t addr, unsigned count,
4677 		    uint32_t incr, uint32_t flags)
4678 {
4679 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4680 	uint64_t value;
4681 	unsigned ndw;
4682 
4683 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4684 		while (count) {
4685 			ndw = 2 + count * 2;
4686 			if (ndw > 0x3FFE)
4687 				ndw = 0x3FFE;
4688 
4689 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4690 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4691 					WRITE_DATA_DST_SEL(1));
4692 			ib->ptr[ib->length_dw++] = pe;
4693 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4694 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4695 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4696 					value = radeon_vm_map_gart(rdev, addr);
4697 					value &= 0xFFFFFFFFFFFFF000ULL;
4698 				} else if (flags & RADEON_VM_PAGE_VALID) {
4699 					value = addr;
4700 				} else {
4701 					value = 0;
4702 				}
4703 				addr += incr;
4704 				value |= r600_flags;
4705 				ib->ptr[ib->length_dw++] = value;
4706 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4707 			}
4708 		}
4709 	} else {
4710 		/* DMA */
4711 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4712 			while (count) {
4713 				ndw = count * 2;
4714 				if (ndw > 0xFFFFE)
4715 					ndw = 0xFFFFE;
4716 
4717 				/* for non-physically contiguous pages (system) */
4718 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4719 				ib->ptr[ib->length_dw++] = pe;
4720 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4721 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4722 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4723 						value = radeon_vm_map_gart(rdev, addr);
4724 						value &= 0xFFFFFFFFFFFFF000ULL;
4725 					} else if (flags & RADEON_VM_PAGE_VALID) {
4726 						value = addr;
4727 					} else {
4728 						value = 0;
4729 					}
4730 					addr += incr;
4731 					value |= r600_flags;
4732 					ib->ptr[ib->length_dw++] = value;
4733 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4734 				}
4735 			}
4736 		} else {
4737 			while (count) {
4738 				ndw = count * 2;
4739 				if (ndw > 0xFFFFE)
4740 					ndw = 0xFFFFE;
4741 
4742 				if (flags & RADEON_VM_PAGE_VALID)
4743 					value = addr;
4744 				else
4745 					value = 0;
4746 				/* for physically contiguous pages (vram) */
4747 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4748 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4749 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4750 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4751 				ib->ptr[ib->length_dw++] = 0;
4752 				ib->ptr[ib->length_dw++] = value; /* value */
4753 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4754 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4755 				ib->ptr[ib->length_dw++] = 0;
4756 				pe += ndw * 4;
4757 				addr += (ndw / 2) * incr;
4758 				count -= ndw / 2;
4759 			}
4760 		}
4761 		while (ib->length_dw & 0x7)
4762 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4763 	}
4764 }
4765 
4766 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4767 {
4768 	struct radeon_ring *ring = &rdev->ring[ridx];
4769 
4770 	if (vm == NULL)
4771 		return;
4772 
4773 	/* write new base address */
4774 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4775 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4776 				 WRITE_DATA_DST_SEL(0)));
4777 
4778 	if (vm->id < 8) {
4779 		radeon_ring_write(ring,
4780 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4781 	} else {
4782 		radeon_ring_write(ring,
4783 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4784 	}
4785 	radeon_ring_write(ring, 0);
4786 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4787 
4788 	/* flush hdp cache */
4789 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4790 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4791 				 WRITE_DATA_DST_SEL(0)));
4792 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4793 	radeon_ring_write(ring, 0);
4794 	radeon_ring_write(ring, 0x1);
4795 
4796 	/* bits 0-15 are the VM contexts0-15 */
4797 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4798 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4799 				 WRITE_DATA_DST_SEL(0)));
4800 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4801 	radeon_ring_write(ring, 0);
4802 	radeon_ring_write(ring, 1 << vm->id);
4803 
4804 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4805 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4806 	radeon_ring_write(ring, 0x0);
4807 }
4808 
4809 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4810 {
4811 	struct radeon_ring *ring = &rdev->ring[ridx];
4812 
4813 	if (vm == NULL)
4814 		return;
4815 
4816 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4817 	if (vm->id < 8) {
4818 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4819 	} else {
4820 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4821 	}
4822 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4823 
4824 	/* flush hdp cache */
4825 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4826 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4827 	radeon_ring_write(ring, 1);
4828 
4829 	/* bits 0-7 are the VM contexts0-7 */
4830 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4831 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4832 	radeon_ring_write(ring, 1 << vm->id);
4833 }
4834 
4835 /*
4836  *  Power and clock gating
4837  */
4838 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4839 {
4840 	int i;
4841 
4842 	for (i = 0; i < rdev->usec_timeout; i++) {
4843 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4844 			break;
4845 		udelay(1);
4846 	}
4847 
4848 	for (i = 0; i < rdev->usec_timeout; i++) {
4849 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4850 			break;
4851 		udelay(1);
4852 	}
4853 }
4854 
4855 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4856 					 bool enable)
4857 {
4858 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4859 	u32 mask;
4860 	int i;
4861 
4862 	if (enable)
4863 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4864 	else
4865 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4866 	WREG32(CP_INT_CNTL_RING0, tmp);
4867 
4868 	if (!enable) {
4869 		/* read a gfx register */
4870 		tmp = RREG32(DB_DEPTH_INFO);
4871 
4872 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4873 		for (i = 0; i < rdev->usec_timeout; i++) {
4874 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4875 				break;
4876 			udelay(1);
4877 		}
4878 	}
4879 }
4880 
4881 static void si_set_uvd_dcm(struct radeon_device *rdev,
4882 			   bool sw_mode)
4883 {
4884 	u32 tmp, tmp2;
4885 
4886 	tmp = RREG32(UVD_CGC_CTRL);
4887 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4888 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4889 
4890 	if (sw_mode) {
4891 		tmp &= ~0x7ffff800;
4892 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4893 	} else {
4894 		tmp |= 0x7ffff800;
4895 		tmp2 = 0;
4896 	}
4897 
4898 	WREG32(UVD_CGC_CTRL, tmp);
4899 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4900 }
4901 
4902 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4903 {
4904 	bool hw_mode = true;
4905 
4906 	if (hw_mode) {
4907 		si_set_uvd_dcm(rdev, false);
4908 	} else {
4909 		u32 tmp = RREG32(UVD_CGC_CTRL);
4910 		tmp &= ~DCM;
4911 		WREG32(UVD_CGC_CTRL, tmp);
4912 	}
4913 }
4914 
4915 static u32 si_halt_rlc(struct radeon_device *rdev)
4916 {
4917 	u32 data, orig;
4918 
4919 	orig = data = RREG32(RLC_CNTL);
4920 
4921 	if (data & RLC_ENABLE) {
4922 		data &= ~RLC_ENABLE;
4923 		WREG32(RLC_CNTL, data);
4924 
4925 		si_wait_for_rlc_serdes(rdev);
4926 	}
4927 
4928 	return orig;
4929 }
4930 
4931 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4932 {
4933 	u32 tmp;
4934 
4935 	tmp = RREG32(RLC_CNTL);
4936 	if (tmp != rlc)
4937 		WREG32(RLC_CNTL, rlc);
4938 }
4939 
4940 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4941 {
4942 	u32 data, orig;
4943 
4944 	orig = data = RREG32(DMA_PG);
4945 	if (enable)
4946 		data |= PG_CNTL_ENABLE;
4947 	else
4948 		data &= ~PG_CNTL_ENABLE;
4949 	if (orig != data)
4950 		WREG32(DMA_PG, data);
4951 }
4952 
4953 static void si_init_dma_pg(struct radeon_device *rdev)
4954 {
4955 	u32 tmp;
4956 
4957 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4958 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4959 
4960 	for (tmp = 0; tmp < 5; tmp++)
4961 		WREG32(DMA_PGFSM_WRITE, 0);
4962 }
4963 
4964 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4965 			       bool enable)
4966 {
4967 	u32 tmp;
4968 
4969 	if (enable) {
4970 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4971 		WREG32(RLC_TTOP_D, tmp);
4972 
4973 		tmp = RREG32(RLC_PG_CNTL);
4974 		tmp |= GFX_PG_ENABLE;
4975 		WREG32(RLC_PG_CNTL, tmp);
4976 
4977 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4978 		tmp |= AUTO_PG_EN;
4979 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4980 	} else {
4981 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4982 		tmp &= ~AUTO_PG_EN;
4983 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4984 
4985 		tmp = RREG32(DB_RENDER_CONTROL);
4986 	}
4987 }
4988 
4989 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4990 {
4991 	u32 tmp;
4992 
4993 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4994 
4995 	tmp = RREG32(RLC_PG_CNTL);
4996 	tmp |= GFX_PG_SRC;
4997 	WREG32(RLC_PG_CNTL, tmp);
4998 
4999 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5000 
5001 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5002 
5003 	tmp &= ~GRBM_REG_SGIT_MASK;
5004 	tmp |= GRBM_REG_SGIT(0x700);
5005 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5006 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5007 }
5008 
5009 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5010 {
5011 	u32 mask = 0, tmp, tmp1;
5012 	int i;
5013 
5014 	si_select_se_sh(rdev, se, sh);
5015 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5016 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5017 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5018 
5019 	tmp &= 0xffff0000;
5020 
5021 	tmp |= tmp1;
5022 	tmp >>= 16;
5023 
5024 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5025 		mask <<= 1;
5026 		mask |= 1;
5027 	}
5028 
5029 	return (~tmp) & mask;
5030 }
5031 
5032 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5033 {
5034 	u32 i, j, k, active_cu_number = 0;
5035 	u32 mask, counter, cu_bitmap;
5036 	u32 tmp = 0;
5037 
5038 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5039 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5040 			mask = 1;
5041 			cu_bitmap = 0;
5042 			counter  = 0;
5043 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5044 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5045 					if (counter < 2)
5046 						cu_bitmap |= mask;
5047 					counter++;
5048 				}
5049 				mask <<= 1;
5050 			}
5051 
5052 			active_cu_number += counter;
5053 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5054 		}
5055 	}
5056 
5057 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5058 
5059 	tmp = RREG32(RLC_MAX_PG_CU);
5060 	tmp &= ~MAX_PU_CU_MASK;
5061 	tmp |= MAX_PU_CU(active_cu_number);
5062 	WREG32(RLC_MAX_PG_CU, tmp);
5063 }
5064 
5065 static void si_enable_cgcg(struct radeon_device *rdev,
5066 			   bool enable)
5067 {
5068 	u32 data, orig, tmp;
5069 
5070 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5071 
5072 	si_enable_gui_idle_interrupt(rdev, enable);
5073 
5074 	if (enable) {
5075 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5076 
5077 		tmp = si_halt_rlc(rdev);
5078 
5079 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5080 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5081 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5082 
5083 		si_wait_for_rlc_serdes(rdev);
5084 
5085 		si_update_rlc(rdev, tmp);
5086 
5087 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5088 
5089 		data |= CGCG_EN | CGLS_EN;
5090 	} else {
5091 		RREG32(CB_CGTT_SCLK_CTRL);
5092 		RREG32(CB_CGTT_SCLK_CTRL);
5093 		RREG32(CB_CGTT_SCLK_CTRL);
5094 		RREG32(CB_CGTT_SCLK_CTRL);
5095 
5096 		data &= ~(CGCG_EN | CGLS_EN);
5097 	}
5098 
5099 	if (orig != data)
5100 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5101 }
5102 
5103 static void si_enable_mgcg(struct radeon_device *rdev,
5104 			   bool enable)
5105 {
5106 	u32 data, orig, tmp = 0;
5107 
5108 	if (enable) {
5109 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5110 		data = 0x96940200;
5111 		if (orig != data)
5112 			WREG32(CGTS_SM_CTRL_REG, data);
5113 
5114 		orig = data = RREG32(CP_MEM_SLP_CNTL);
5115 		data |= CP_MEM_LS_EN;
5116 		if (orig != data)
5117 			WREG32(CP_MEM_SLP_CNTL, data);
5118 
5119 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5120 		data &= 0xffffffc0;
5121 		if (orig != data)
5122 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5123 
5124 		tmp = si_halt_rlc(rdev);
5125 
5126 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5127 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5128 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5129 
5130 		si_update_rlc(rdev, tmp);
5131 	} else {
5132 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5133 		data |= 0x00000003;
5134 		if (orig != data)
5135 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5136 
5137 		data = RREG32(CP_MEM_SLP_CNTL);
5138 		if (data & CP_MEM_LS_EN) {
5139 			data &= ~CP_MEM_LS_EN;
5140 			WREG32(CP_MEM_SLP_CNTL, data);
5141 		}
5142 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5143 		data |= LS_OVERRIDE | OVERRIDE;
5144 		if (orig != data)
5145 			WREG32(CGTS_SM_CTRL_REG, data);
5146 
5147 		tmp = si_halt_rlc(rdev);
5148 
5149 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5150 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5151 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5152 
5153 		si_update_rlc(rdev, tmp);
5154 	}
5155 }
5156 
5157 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5158 			       bool enable)
5159 {
5160 	u32 orig, data, tmp;
5161 
5162 	if (enable) {
5163 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5164 		tmp |= 0x3fff;
5165 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5166 
5167 		orig = data = RREG32(UVD_CGC_CTRL);
5168 		data |= DCM;
5169 		if (orig != data)
5170 			WREG32(UVD_CGC_CTRL, data);
5171 
5172 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5173 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5174 	} else {
5175 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5176 		tmp &= ~0x3fff;
5177 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5178 
5179 		orig = data = RREG32(UVD_CGC_CTRL);
5180 		data &= ~DCM;
5181 		if (orig != data)
5182 			WREG32(UVD_CGC_CTRL, data);
5183 
5184 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5185 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5186 	}
5187 }
5188 
5189 static const u32 mc_cg_registers[] =
5190 {
5191 	MC_HUB_MISC_HUB_CG,
5192 	MC_HUB_MISC_SIP_CG,
5193 	MC_HUB_MISC_VM_CG,
5194 	MC_XPB_CLK_GAT,
5195 	ATC_MISC_CG,
5196 	MC_CITF_MISC_WR_CG,
5197 	MC_CITF_MISC_RD_CG,
5198 	MC_CITF_MISC_VM_CG,
5199 	VM_L2_CG,
5200 };
5201 
5202 static void si_enable_mc_ls(struct radeon_device *rdev,
5203 			    bool enable)
5204 {
5205 	int i;
5206 	u32 orig, data;
5207 
5208 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5209 		orig = data = RREG32(mc_cg_registers[i]);
5210 		if (enable)
5211 			data |= MC_LS_ENABLE;
5212 		else
5213 			data &= ~MC_LS_ENABLE;
5214 		if (data != orig)
5215 			WREG32(mc_cg_registers[i], data);
5216 	}
5217 }
5218 
5219 
5220 static void si_init_cg(struct radeon_device *rdev)
5221 {
5222 	si_enable_mgcg(rdev, true);
5223 	si_enable_cgcg(rdev, false);
5224 	/* disable MC LS on Tahiti */
5225 	if (rdev->family == CHIP_TAHITI)
5226 		si_enable_mc_ls(rdev, false);
5227 	if (rdev->has_uvd) {
5228 		si_enable_uvd_mgcg(rdev, true);
5229 		si_init_uvd_internal_cg(rdev);
5230 	}
5231 }
5232 
5233 static void si_fini_cg(struct radeon_device *rdev)
5234 {
5235 	if (rdev->has_uvd)
5236 		si_enable_uvd_mgcg(rdev, false);
5237 	si_enable_cgcg(rdev, false);
5238 	si_enable_mgcg(rdev, false);
5239 }
5240 
5241 static void si_init_pg(struct radeon_device *rdev)
5242 {
5243 	bool has_pg = false;
5244 #if 0
5245 	/* only cape verde supports PG */
5246 	if (rdev->family == CHIP_VERDE)
5247 		has_pg = true;
5248 #endif
5249 	if (has_pg) {
5250 		si_init_ao_cu_mask(rdev);
5251 		si_init_dma_pg(rdev);
5252 		si_enable_dma_pg(rdev, true);
5253 		si_init_gfx_cgpg(rdev);
5254 		si_enable_gfx_cgpg(rdev, true);
5255 	} else {
5256 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5257 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5258 	}
5259 }
5260 
5261 static void si_fini_pg(struct radeon_device *rdev)
5262 {
5263 	bool has_pg = false;
5264 
5265 	/* only cape verde supports PG */
5266 	if (rdev->family == CHIP_VERDE)
5267 		has_pg = true;
5268 
5269 	if (has_pg) {
5270 		si_enable_dma_pg(rdev, false);
5271 		si_enable_gfx_cgpg(rdev, false);
5272 	}
5273 }
5274 
5275 /*
5276  * RLC
5277  */
5278 void si_rlc_fini(struct radeon_device *rdev)
5279 {
5280 	int r;
5281 
5282 	/* save restore block */
5283 	if (rdev->rlc.save_restore_obj) {
5284 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5285 		if (unlikely(r != 0))
5286 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5287 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
5288 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5289 
5290 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
5291 		rdev->rlc.save_restore_obj = NULL;
5292 	}
5293 
5294 	/* clear state block */
5295 	if (rdev->rlc.clear_state_obj) {
5296 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5297 		if (unlikely(r != 0))
5298 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5299 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
5300 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5301 
5302 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
5303 		rdev->rlc.clear_state_obj = NULL;
5304 	}
5305 }
5306 
5307 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5308 
5309 int si_rlc_init(struct radeon_device *rdev)
5310 {
5311 	volatile u32 *dst_ptr;
5312 	u32 dws, data, i, j, k, reg_num;
5313 	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5314 	u64 reg_list_mc_addr;
5315 	const struct cs_section_def *cs_data = si_cs_data;
5316 	int r;
5317 
5318 	/* save restore block */
5319 	if (rdev->rlc.save_restore_obj == NULL) {
5320 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5321 				     RADEON_GEM_DOMAIN_VRAM, NULL,
5322 				     &rdev->rlc.save_restore_obj);
5323 		if (r) {
5324 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5325 			return r;
5326 		}
5327 	}
5328 
5329 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5330 	if (unlikely(r != 0)) {
5331 		si_rlc_fini(rdev);
5332 		return r;
5333 	}
5334 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5335 			  &rdev->rlc.save_restore_gpu_addr);
5336 	if (r) {
5337 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5338 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5339 		si_rlc_fini(rdev);
5340 		return r;
5341 	}
5342 
5343 	if (rdev->family == CHIP_VERDE) {
5344 		r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5345 		if (r) {
5346 			dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5347 			si_rlc_fini(rdev);
5348 		return r;
5349 		}
5350 		/* write the sr buffer */
5351 		dst_ptr = rdev->rlc.sr_ptr;
5352 		for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5353 			dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5354 		}
5355 		radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5356 	}
5357 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5358 
5359 	/* clear state block */
5360 	reg_list_num = 0;
5361 	dws = 0;
5362 	for (i = 0; cs_data[i].section != NULL; i++) {
5363 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5364 			reg_list_num++;
5365 			dws += cs_data[i].section[j].reg_count;
5366 		}
5367 	}
5368 	reg_list_blk_index = (3 * reg_list_num + 2);
5369 	dws += reg_list_blk_index;
5370 
5371 	if (rdev->rlc.clear_state_obj == NULL) {
5372 		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5373 				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5374 		if (r) {
5375 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5376 			si_rlc_fini(rdev);
5377 			return r;
5378 		}
5379 	}
5380 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5381 	if (unlikely(r != 0)) {
5382 		si_rlc_fini(rdev);
5383 		return r;
5384 	}
5385 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5386 			  &rdev->rlc.clear_state_gpu_addr);
5387 	if (r) {
5388 
5389 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5390 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5391 		si_rlc_fini(rdev);
5392 		return r;
5393 	}
5394 	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5395 	if (r) {
5396 		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5397 		si_rlc_fini(rdev);
5398 		return r;
5399 	}
5400 	/* set up the cs buffer */
5401 	dst_ptr = rdev->rlc.cs_ptr;
5402 	reg_list_hdr_blk_index = 0;
5403 	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5404 	data = upper_32_bits(reg_list_mc_addr);
5405 	dst_ptr[reg_list_hdr_blk_index] = data;
5406 	reg_list_hdr_blk_index++;
5407 	for (i = 0; cs_data[i].section != NULL; i++) {
5408 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5409 			reg_num = cs_data[i].section[j].reg_count;
5410 			data = reg_list_mc_addr & 0xffffffff;
5411 			dst_ptr[reg_list_hdr_blk_index] = data;
5412 			reg_list_hdr_blk_index++;
5413 
5414 			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5415 			dst_ptr[reg_list_hdr_blk_index] = data;
5416 			reg_list_hdr_blk_index++;
5417 
5418 			data = 0x08000000 | (reg_num * 4);
5419 			dst_ptr[reg_list_hdr_blk_index] = data;
5420 			reg_list_hdr_blk_index++;
5421 
5422 			for (k = 0; k < reg_num; k++) {
5423 				data = cs_data[i].section[j].extent[k];
5424 				dst_ptr[reg_list_blk_index + k] = data;
5425 			}
5426 			reg_list_mc_addr += reg_num * 4;
5427 			reg_list_blk_index += reg_num;
5428 		}
5429 	}
5430 	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5431 
5432 	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5433 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5434 
5435 	return 0;
5436 }
5437 
5438 static void si_rlc_reset(struct radeon_device *rdev)
5439 {
5440 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5441 
5442 	tmp |= SOFT_RESET_RLC;
5443 	WREG32(GRBM_SOFT_RESET, tmp);
5444 	udelay(50);
5445 	tmp &= ~SOFT_RESET_RLC;
5446 	WREG32(GRBM_SOFT_RESET, tmp);
5447 	udelay(50);
5448 }
5449 
5450 static void si_rlc_stop(struct radeon_device *rdev)
5451 {
5452 	WREG32(RLC_CNTL, 0);
5453 
5454 	si_enable_gui_idle_interrupt(rdev, false);
5455 
5456 	si_wait_for_rlc_serdes(rdev);
5457 }
5458 
5459 static void si_rlc_start(struct radeon_device *rdev)
5460 {
5461 	WREG32(RLC_CNTL, RLC_ENABLE);
5462 
5463 	si_enable_gui_idle_interrupt(rdev, true);
5464 
5465 	udelay(50);
5466 }
5467 
5468 static bool si_lbpw_supported(struct radeon_device *rdev)
5469 {
5470 	u32 tmp;
5471 
5472 	/* Enable LBPW only for DDR3 */
5473 	tmp = RREG32(MC_SEQ_MISC0);
5474 	if ((tmp & 0xF0000000) == 0xB0000000)
5475 		return true;
5476 	return false;
5477 }
5478 
5479 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5480 {
5481 	u32 tmp;
5482 
5483 	tmp = RREG32(RLC_LB_CNTL);
5484 	if (enable)
5485 		tmp |= LOAD_BALANCE_ENABLE;
5486 	else
5487 		tmp &= ~LOAD_BALANCE_ENABLE;
5488 	WREG32(RLC_LB_CNTL, tmp);
5489 
5490 	if (!enable) {
5491 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5492 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5493 	}
5494 }
5495 
5496 static int si_rlc_resume(struct radeon_device *rdev)
5497 {
5498 	u32 i;
5499 	const __be32 *fw_data;
5500 
5501 	if (!rdev->rlc_fw)
5502 		return -EINVAL;
5503 
5504 	si_rlc_stop(rdev);
5505 
5506 	si_rlc_reset(rdev);
5507 
5508 	si_init_pg(rdev);
5509 
5510 	si_init_cg(rdev);
5511 
5512 	WREG32(RLC_RL_BASE, 0);
5513 	WREG32(RLC_RL_SIZE, 0);
5514 	WREG32(RLC_LB_CNTL, 0);
5515 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5516 	WREG32(RLC_LB_CNTR_INIT, 0);
5517 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5518 
5519 	WREG32(RLC_MC_CNTL, 0);
5520 	WREG32(RLC_UCODE_CNTL, 0);
5521 
5522 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5523 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5524 		WREG32(RLC_UCODE_ADDR, i);
5525 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5526 	}
5527 	WREG32(RLC_UCODE_ADDR, 0);
5528 
5529 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5530 
5531 	si_rlc_start(rdev);
5532 
5533 	return 0;
5534 }
5535 
5536 static void si_enable_interrupts(struct radeon_device *rdev)
5537 {
5538 	u32 ih_cntl = RREG32(IH_CNTL);
5539 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5540 
5541 	ih_cntl |= ENABLE_INTR;
5542 	ih_rb_cntl |= IH_RB_ENABLE;
5543 	WREG32(IH_CNTL, ih_cntl);
5544 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5545 	rdev->ih.enabled = true;
5546 }
5547 
5548 static void si_disable_interrupts(struct radeon_device *rdev)
5549 {
5550 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5551 	u32 ih_cntl = RREG32(IH_CNTL);
5552 
5553 	ih_rb_cntl &= ~IH_RB_ENABLE;
5554 	ih_cntl &= ~ENABLE_INTR;
5555 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5556 	WREG32(IH_CNTL, ih_cntl);
5557 	/* set rptr, wptr to 0 */
5558 	WREG32(IH_RB_RPTR, 0);
5559 	WREG32(IH_RB_WPTR, 0);
5560 	rdev->ih.enabled = false;
5561 	rdev->ih.rptr = 0;
5562 }
5563 
5564 static void si_disable_interrupt_state(struct radeon_device *rdev)
5565 {
5566 	u32 tmp;
5567 
5568 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5569 	WREG32(CP_INT_CNTL_RING1, 0);
5570 	WREG32(CP_INT_CNTL_RING2, 0);
5571 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5572 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5573 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5574 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5575 	WREG32(GRBM_INT_CNTL, 0);
5576 	if (rdev->num_crtc >= 2) {
5577 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5578 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5579 	}
5580 	if (rdev->num_crtc >= 4) {
5581 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5582 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5583 	}
5584 	if (rdev->num_crtc >= 6) {
5585 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5586 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5587 	}
5588 
5589 	if (rdev->num_crtc >= 2) {
5590 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5591 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5592 	}
5593 	if (rdev->num_crtc >= 4) {
5594 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5595 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5596 	}
5597 	if (rdev->num_crtc >= 6) {
5598 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5599 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5600 	}
5601 
5602 	if (!ASIC_IS_NODCE(rdev)) {
5603 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5604 
5605 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5606 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5607 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5608 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5609 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5610 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5611 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5612 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5613 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5614 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5615 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5616 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5617 	}
5618 }
5619 
5620 static int si_irq_init(struct radeon_device *rdev)
5621 {
5622 	int ret = 0;
5623 	int rb_bufsz;
5624 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5625 
5626 	/* allocate ring */
5627 	ret = r600_ih_ring_alloc(rdev);
5628 	if (ret)
5629 		return ret;
5630 
5631 	/* disable irqs */
5632 	si_disable_interrupts(rdev);
5633 
5634 	/* init rlc */
5635 	ret = si_rlc_resume(rdev);
5636 	if (ret) {
5637 		r600_ih_ring_fini(rdev);
5638 		return ret;
5639 	}
5640 
5641 	/* setup interrupt control */
5642 	/* set dummy read address to ring address */
5643 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5644 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5645 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5646 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5647 	 */
5648 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5649 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5650 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5651 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5652 
5653 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5654 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5655 
5656 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5657 		      IH_WPTR_OVERFLOW_CLEAR |
5658 		      (rb_bufsz << 1));
5659 
5660 	if (rdev->wb.enabled)
5661 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5662 
5663 	/* set the writeback address whether it's enabled or not */
5664 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5665 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5666 
5667 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5668 
5669 	/* set rptr, wptr to 0 */
5670 	WREG32(IH_RB_RPTR, 0);
5671 	WREG32(IH_RB_WPTR, 0);
5672 
5673 	/* Default settings for IH_CNTL (disabled at first) */
5674 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5675 	/* RPTR_REARM only works if msi's are enabled */
5676 	if (rdev->msi_enabled)
5677 		ih_cntl |= RPTR_REARM;
5678 	WREG32(IH_CNTL, ih_cntl);
5679 
5680 	/* force the active interrupt state to all disabled */
5681 	si_disable_interrupt_state(rdev);
5682 
5683 	pci_set_master(rdev->pdev);
5684 
5685 	/* enable irqs */
5686 	si_enable_interrupts(rdev);
5687 
5688 	return ret;
5689 }
5690 
5691 int si_irq_set(struct radeon_device *rdev)
5692 {
5693 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5694 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5695 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5696 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5697 	u32 grbm_int_cntl = 0;
5698 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5699 	u32 dma_cntl, dma_cntl1;
5700 	u32 thermal_int = 0;
5701 
5702 	if (!rdev->irq.installed) {
5703 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5704 		return -EINVAL;
5705 	}
5706 	/* don't enable anything if the ih is disabled */
5707 	if (!rdev->ih.enabled) {
5708 		si_disable_interrupts(rdev);
5709 		/* force the active interrupt state to all disabled */
5710 		si_disable_interrupt_state(rdev);
5711 		return 0;
5712 	}
5713 
5714 	if (!ASIC_IS_NODCE(rdev)) {
5715 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5717 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5718 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5719 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5720 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5721 	}
5722 
5723 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5724 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5725 
5726 	thermal_int = RREG32(CG_THERMAL_INT) &
5727 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5728 
5729 	/* enable CP interrupts on all rings */
5730 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5731 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5732 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5733 	}
5734 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5735 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5736 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5737 	}
5738 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5739 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5740 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5741 	}
5742 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5743 		DRM_DEBUG("si_irq_set: sw int dma\n");
5744 		dma_cntl |= TRAP_ENABLE;
5745 	}
5746 
5747 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5748 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5749 		dma_cntl1 |= TRAP_ENABLE;
5750 	}
5751 	if (rdev->irq.crtc_vblank_int[0] ||
5752 	    atomic_read(&rdev->irq.pflip[0])) {
5753 		DRM_DEBUG("si_irq_set: vblank 0\n");
5754 		crtc1 |= VBLANK_INT_MASK;
5755 	}
5756 	if (rdev->irq.crtc_vblank_int[1] ||
5757 	    atomic_read(&rdev->irq.pflip[1])) {
5758 		DRM_DEBUG("si_irq_set: vblank 1\n");
5759 		crtc2 |= VBLANK_INT_MASK;
5760 	}
5761 	if (rdev->irq.crtc_vblank_int[2] ||
5762 	    atomic_read(&rdev->irq.pflip[2])) {
5763 		DRM_DEBUG("si_irq_set: vblank 2\n");
5764 		crtc3 |= VBLANK_INT_MASK;
5765 	}
5766 	if (rdev->irq.crtc_vblank_int[3] ||
5767 	    atomic_read(&rdev->irq.pflip[3])) {
5768 		DRM_DEBUG("si_irq_set: vblank 3\n");
5769 		crtc4 |= VBLANK_INT_MASK;
5770 	}
5771 	if (rdev->irq.crtc_vblank_int[4] ||
5772 	    atomic_read(&rdev->irq.pflip[4])) {
5773 		DRM_DEBUG("si_irq_set: vblank 4\n");
5774 		crtc5 |= VBLANK_INT_MASK;
5775 	}
5776 	if (rdev->irq.crtc_vblank_int[5] ||
5777 	    atomic_read(&rdev->irq.pflip[5])) {
5778 		DRM_DEBUG("si_irq_set: vblank 5\n");
5779 		crtc6 |= VBLANK_INT_MASK;
5780 	}
5781 	if (rdev->irq.hpd[0]) {
5782 		DRM_DEBUG("si_irq_set: hpd 1\n");
5783 		hpd1 |= DC_HPDx_INT_EN;
5784 	}
5785 	if (rdev->irq.hpd[1]) {
5786 		DRM_DEBUG("si_irq_set: hpd 2\n");
5787 		hpd2 |= DC_HPDx_INT_EN;
5788 	}
5789 	if (rdev->irq.hpd[2]) {
5790 		DRM_DEBUG("si_irq_set: hpd 3\n");
5791 		hpd3 |= DC_HPDx_INT_EN;
5792 	}
5793 	if (rdev->irq.hpd[3]) {
5794 		DRM_DEBUG("si_irq_set: hpd 4\n");
5795 		hpd4 |= DC_HPDx_INT_EN;
5796 	}
5797 	if (rdev->irq.hpd[4]) {
5798 		DRM_DEBUG("si_irq_set: hpd 5\n");
5799 		hpd5 |= DC_HPDx_INT_EN;
5800 	}
5801 	if (rdev->irq.hpd[5]) {
5802 		DRM_DEBUG("si_irq_set: hpd 6\n");
5803 		hpd6 |= DC_HPDx_INT_EN;
5804 	}
5805 
5806 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5807 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5808 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5809 
5810 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5811 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5812 
5813 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5814 
5815 	if (rdev->irq.dpm_thermal) {
5816 		DRM_DEBUG("dpm thermal\n");
5817 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5818 	}
5819 
5820 	if (rdev->num_crtc >= 2) {
5821 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5822 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5823 	}
5824 	if (rdev->num_crtc >= 4) {
5825 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5826 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5827 	}
5828 	if (rdev->num_crtc >= 6) {
5829 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5830 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5831 	}
5832 
5833 	if (rdev->num_crtc >= 2) {
5834 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5835 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5836 	}
5837 	if (rdev->num_crtc >= 4) {
5838 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5839 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5840 	}
5841 	if (rdev->num_crtc >= 6) {
5842 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5843 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5844 	}
5845 
5846 	if (!ASIC_IS_NODCE(rdev)) {
5847 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5848 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5849 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5850 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5851 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5852 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5853 	}
5854 
5855 	WREG32(CG_THERMAL_INT, thermal_int);
5856 
5857 	return 0;
5858 }
5859 
5860 static inline void si_irq_ack(struct radeon_device *rdev)
5861 {
5862 	u32 tmp;
5863 
5864 	if (ASIC_IS_NODCE(rdev))
5865 		return;
5866 
5867 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5868 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5869 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5870 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5871 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5872 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5873 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5874 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5875 	if (rdev->num_crtc >= 4) {
5876 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5877 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5878 	}
5879 	if (rdev->num_crtc >= 6) {
5880 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5881 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5882 	}
5883 
5884 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5885 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5886 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5887 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5888 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5889 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5890 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5891 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5892 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5893 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5894 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5895 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5896 
5897 	if (rdev->num_crtc >= 4) {
5898 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5899 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5900 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5901 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5902 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5903 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5904 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5905 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5906 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5907 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5908 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5909 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5910 	}
5911 
5912 	if (rdev->num_crtc >= 6) {
5913 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5914 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5915 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5916 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5917 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5918 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5919 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5920 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5921 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5922 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5923 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5924 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5925 	}
5926 
5927 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5928 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5929 		tmp |= DC_HPDx_INT_ACK;
5930 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5931 	}
5932 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5933 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5934 		tmp |= DC_HPDx_INT_ACK;
5935 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5936 	}
5937 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5938 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5939 		tmp |= DC_HPDx_INT_ACK;
5940 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5941 	}
5942 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5943 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5944 		tmp |= DC_HPDx_INT_ACK;
5945 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5946 	}
5947 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5948 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5949 		tmp |= DC_HPDx_INT_ACK;
5950 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5951 	}
5952 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5953 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5954 		tmp |= DC_HPDx_INT_ACK;
5955 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5956 	}
5957 }
5958 
5959 static void si_irq_disable(struct radeon_device *rdev)
5960 {
5961 	si_disable_interrupts(rdev);
5962 	/* Wait and acknowledge irq */
5963 	mdelay(1);
5964 	si_irq_ack(rdev);
5965 	si_disable_interrupt_state(rdev);
5966 }
5967 
5968 static void si_irq_suspend(struct radeon_device *rdev)
5969 {
5970 	si_irq_disable(rdev);
5971 	si_rlc_stop(rdev);
5972 }
5973 
5974 static void si_irq_fini(struct radeon_device *rdev)
5975 {
5976 	si_irq_suspend(rdev);
5977 	r600_ih_ring_fini(rdev);
5978 }
5979 
5980 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5981 {
5982 	u32 wptr, tmp;
5983 
5984 	if (rdev->wb.enabled)
5985 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5986 	else
5987 		wptr = RREG32(IH_RB_WPTR);
5988 
5989 	if (wptr & RB_OVERFLOW) {
5990 		/* When a ring buffer overflow happen start parsing interrupt
5991 		 * from the last not overwritten vector (wptr + 16). Hopefully
5992 		 * this should allow us to catchup.
5993 		 */
5994 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5995 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5996 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5997 		tmp = RREG32(IH_RB_CNTL);
5998 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5999 		WREG32(IH_RB_CNTL, tmp);
6000 	}
6001 	return (wptr & rdev->ih.ptr_mask);
6002 }
6003 
6004 /*        SI IV Ring
6005  * Each IV ring entry is 128 bits:
6006  * [7:0]    - interrupt source id
6007  * [31:8]   - reserved
6008  * [59:32]  - interrupt source data
6009  * [63:60]  - reserved
6010  * [71:64]  - RINGID
6011  * [79:72]  - VMID
6012  * [127:80] - reserved
6013  */
6014 int si_irq_process(struct radeon_device *rdev)
6015 {
6016 	u32 wptr;
6017 	u32 rptr;
6018 	u32 src_id, src_data, ring_id;
6019 	u32 ring_index;
6020 	bool queue_hotplug = false;
6021 	bool queue_thermal = false;
6022 	u32 status, addr;
6023 
6024 	if (!rdev->ih.enabled || rdev->shutdown)
6025 		return IRQ_NONE;
6026 
6027 	wptr = si_get_ih_wptr(rdev);
6028 
6029 restart_ih:
6030 	/* is somebody else already processing irqs? */
6031 	if (atomic_xchg(&rdev->ih.lock, 1))
6032 		return IRQ_NONE;
6033 
6034 	rptr = rdev->ih.rptr;
6035 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6036 
6037 	/* Order reading of wptr vs. reading of IH ring data */
6038 	rmb();
6039 
6040 	/* display interrupts */
6041 	si_irq_ack(rdev);
6042 
6043 	while (rptr != wptr) {
6044 		/* wptr/rptr are in bytes! */
6045 		ring_index = rptr / 4;
6046 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6047 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6048 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6049 
6050 		switch (src_id) {
6051 		case 1: /* D1 vblank/vline */
6052 			switch (src_data) {
6053 			case 0: /* D1 vblank */
6054 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6055 					if (rdev->irq.crtc_vblank_int[0]) {
6056 						drm_handle_vblank(rdev->ddev, 0);
6057 						rdev->pm.vblank_sync = true;
6058 						wake_up(&rdev->irq.vblank_queue);
6059 					}
6060 					if (atomic_read(&rdev->irq.pflip[0]))
6061 						radeon_crtc_handle_flip(rdev, 0);
6062 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6063 					DRM_DEBUG("IH: D1 vblank\n");
6064 				}
6065 				break;
6066 			case 1: /* D1 vline */
6067 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6068 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6069 					DRM_DEBUG("IH: D1 vline\n");
6070 				}
6071 				break;
6072 			default:
6073 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6074 				break;
6075 			}
6076 			break;
6077 		case 2: /* D2 vblank/vline */
6078 			switch (src_data) {
6079 			case 0: /* D2 vblank */
6080 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6081 					if (rdev->irq.crtc_vblank_int[1]) {
6082 						drm_handle_vblank(rdev->ddev, 1);
6083 						rdev->pm.vblank_sync = true;
6084 						wake_up(&rdev->irq.vblank_queue);
6085 					}
6086 					if (atomic_read(&rdev->irq.pflip[1]))
6087 						radeon_crtc_handle_flip(rdev, 1);
6088 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6089 					DRM_DEBUG("IH: D2 vblank\n");
6090 				}
6091 				break;
6092 			case 1: /* D2 vline */
6093 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6094 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6095 					DRM_DEBUG("IH: D2 vline\n");
6096 				}
6097 				break;
6098 			default:
6099 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6100 				break;
6101 			}
6102 			break;
6103 		case 3: /* D3 vblank/vline */
6104 			switch (src_data) {
6105 			case 0: /* D3 vblank */
6106 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6107 					if (rdev->irq.crtc_vblank_int[2]) {
6108 						drm_handle_vblank(rdev->ddev, 2);
6109 						rdev->pm.vblank_sync = true;
6110 						wake_up(&rdev->irq.vblank_queue);
6111 					}
6112 					if (atomic_read(&rdev->irq.pflip[2]))
6113 						radeon_crtc_handle_flip(rdev, 2);
6114 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6115 					DRM_DEBUG("IH: D3 vblank\n");
6116 				}
6117 				break;
6118 			case 1: /* D3 vline */
6119 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6120 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6121 					DRM_DEBUG("IH: D3 vline\n");
6122 				}
6123 				break;
6124 			default:
6125 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6126 				break;
6127 			}
6128 			break;
6129 		case 4: /* D4 vblank/vline */
6130 			switch (src_data) {
6131 			case 0: /* D4 vblank */
6132 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6133 					if (rdev->irq.crtc_vblank_int[3]) {
6134 						drm_handle_vblank(rdev->ddev, 3);
6135 						rdev->pm.vblank_sync = true;
6136 						wake_up(&rdev->irq.vblank_queue);
6137 					}
6138 					if (atomic_read(&rdev->irq.pflip[3]))
6139 						radeon_crtc_handle_flip(rdev, 3);
6140 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6141 					DRM_DEBUG("IH: D4 vblank\n");
6142 				}
6143 				break;
6144 			case 1: /* D4 vline */
6145 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6146 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6147 					DRM_DEBUG("IH: D4 vline\n");
6148 				}
6149 				break;
6150 			default:
6151 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6152 				break;
6153 			}
6154 			break;
6155 		case 5: /* D5 vblank/vline */
6156 			switch (src_data) {
6157 			case 0: /* D5 vblank */
6158 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6159 					if (rdev->irq.crtc_vblank_int[4]) {
6160 						drm_handle_vblank(rdev->ddev, 4);
6161 						rdev->pm.vblank_sync = true;
6162 						wake_up(&rdev->irq.vblank_queue);
6163 					}
6164 					if (atomic_read(&rdev->irq.pflip[4]))
6165 						radeon_crtc_handle_flip(rdev, 4);
6166 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6167 					DRM_DEBUG("IH: D5 vblank\n");
6168 				}
6169 				break;
6170 			case 1: /* D5 vline */
6171 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6172 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6173 					DRM_DEBUG("IH: D5 vline\n");
6174 				}
6175 				break;
6176 			default:
6177 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6178 				break;
6179 			}
6180 			break;
6181 		case 6: /* D6 vblank/vline */
6182 			switch (src_data) {
6183 			case 0: /* D6 vblank */
6184 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6185 					if (rdev->irq.crtc_vblank_int[5]) {
6186 						drm_handle_vblank(rdev->ddev, 5);
6187 						rdev->pm.vblank_sync = true;
6188 						wake_up(&rdev->irq.vblank_queue);
6189 					}
6190 					if (atomic_read(&rdev->irq.pflip[5]))
6191 						radeon_crtc_handle_flip(rdev, 5);
6192 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6193 					DRM_DEBUG("IH: D6 vblank\n");
6194 				}
6195 				break;
6196 			case 1: /* D6 vline */
6197 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6198 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6199 					DRM_DEBUG("IH: D6 vline\n");
6200 				}
6201 				break;
6202 			default:
6203 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6204 				break;
6205 			}
6206 			break;
6207 		case 42: /* HPD hotplug */
6208 			switch (src_data) {
6209 			case 0:
6210 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6211 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6212 					queue_hotplug = true;
6213 					DRM_DEBUG("IH: HPD1\n");
6214 				}
6215 				break;
6216 			case 1:
6217 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6218 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6219 					queue_hotplug = true;
6220 					DRM_DEBUG("IH: HPD2\n");
6221 				}
6222 				break;
6223 			case 2:
6224 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6225 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6226 					queue_hotplug = true;
6227 					DRM_DEBUG("IH: HPD3\n");
6228 				}
6229 				break;
6230 			case 3:
6231 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6232 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6233 					queue_hotplug = true;
6234 					DRM_DEBUG("IH: HPD4\n");
6235 				}
6236 				break;
6237 			case 4:
6238 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6239 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6240 					queue_hotplug = true;
6241 					DRM_DEBUG("IH: HPD5\n");
6242 				}
6243 				break;
6244 			case 5:
6245 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6246 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6247 					queue_hotplug = true;
6248 					DRM_DEBUG("IH: HPD6\n");
6249 				}
6250 				break;
6251 			default:
6252 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6253 				break;
6254 			}
6255 			break;
6256 		case 146:
6257 		case 147:
6258 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6259 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6260 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6261 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6262 				addr);
6263 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6264 				status);
6265 			si_vm_decode_fault(rdev, status, addr);
6266 			/* reset addr and status */
6267 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6268 			break;
6269 		case 176: /* RINGID0 CP_INT */
6270 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6271 			break;
6272 		case 177: /* RINGID1 CP_INT */
6273 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6274 			break;
6275 		case 178: /* RINGID2 CP_INT */
6276 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6277 			break;
6278 		case 181: /* CP EOP event */
6279 			DRM_DEBUG("IH: CP EOP\n");
6280 			switch (ring_id) {
6281 			case 0:
6282 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6283 				break;
6284 			case 1:
6285 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6286 				break;
6287 			case 2:
6288 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6289 				break;
6290 			}
6291 			break;
6292 		case 224: /* DMA trap event */
6293 			DRM_DEBUG("IH: DMA trap\n");
6294 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6295 			break;
6296 		case 230: /* thermal low to high */
6297 			DRM_DEBUG("IH: thermal low to high\n");
6298 			rdev->pm.dpm.thermal.high_to_low = false;
6299 			queue_thermal = true;
6300 			break;
6301 		case 231: /* thermal high to low */
6302 			DRM_DEBUG("IH: thermal high to low\n");
6303 			rdev->pm.dpm.thermal.high_to_low = true;
6304 			queue_thermal = true;
6305 			break;
6306 		case 233: /* GUI IDLE */
6307 			DRM_DEBUG("IH: GUI idle\n");
6308 			break;
6309 		case 244: /* DMA trap event */
6310 			DRM_DEBUG("IH: DMA1 trap\n");
6311 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6312 			break;
6313 		default:
6314 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6315 			break;
6316 		}
6317 
6318 		/* wptr/rptr are in bytes! */
6319 		rptr += 16;
6320 		rptr &= rdev->ih.ptr_mask;
6321 	}
6322 	if (queue_hotplug)
6323 		schedule_work(&rdev->hotplug_work);
6324 	if (queue_thermal && rdev->pm.dpm_enabled)
6325 		schedule_work(&rdev->pm.dpm.thermal.work);
6326 	rdev->ih.rptr = rptr;
6327 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6328 	atomic_set(&rdev->ih.lock, 0);
6329 
6330 	/* make sure wptr hasn't changed while processing */
6331 	wptr = si_get_ih_wptr(rdev);
6332 	if (wptr != rptr)
6333 		goto restart_ih;
6334 
6335 	return IRQ_HANDLED;
6336 }
6337 
6338 /**
6339  * si_copy_dma - copy pages using the DMA engine
6340  *
6341  * @rdev: radeon_device pointer
6342  * @src_offset: src GPU address
6343  * @dst_offset: dst GPU address
6344  * @num_gpu_pages: number of GPU pages to xfer
6345  * @fence: radeon fence object
6346  *
6347  * Copy GPU paging using the DMA engine (SI).
6348  * Used by the radeon ttm implementation to move pages if
6349  * registered as the asic copy callback.
6350  */
6351 int si_copy_dma(struct radeon_device *rdev,
6352 		uint64_t src_offset, uint64_t dst_offset,
6353 		unsigned num_gpu_pages,
6354 		struct radeon_fence **fence)
6355 {
6356 	struct radeon_semaphore *sem = NULL;
6357 	int ring_index = rdev->asic->copy.dma_ring_index;
6358 	struct radeon_ring *ring = &rdev->ring[ring_index];
6359 	u32 size_in_bytes, cur_size_in_bytes;
6360 	int i, num_loops;
6361 	int r = 0;
6362 
6363 	r = radeon_semaphore_create(rdev, &sem);
6364 	if (r) {
6365 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6366 		return r;
6367 	}
6368 
6369 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6370 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6371 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6372 	if (r) {
6373 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6374 		radeon_semaphore_free(rdev, &sem, NULL);
6375 		return r;
6376 	}
6377 
6378 	if (radeon_fence_need_sync(*fence, ring->idx)) {
6379 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6380 					    ring->idx);
6381 		radeon_fence_note_sync(*fence, ring->idx);
6382 	} else {
6383 		radeon_semaphore_free(rdev, &sem, NULL);
6384 	}
6385 
6386 	for (i = 0; i < num_loops; i++) {
6387 		cur_size_in_bytes = size_in_bytes;
6388 		if (cur_size_in_bytes > 0xFFFFF)
6389 			cur_size_in_bytes = 0xFFFFF;
6390 		size_in_bytes -= cur_size_in_bytes;
6391 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6392 		radeon_ring_write(ring, dst_offset & 0xffffffff);
6393 		radeon_ring_write(ring, src_offset & 0xffffffff);
6394 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6395 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6396 		src_offset += cur_size_in_bytes;
6397 		dst_offset += cur_size_in_bytes;
6398 	}
6399 
6400 	r = radeon_fence_emit(rdev, fence, ring->idx);
6401 	if (r) {
6402 		radeon_ring_unlock_undo(rdev, ring);
6403 		return r;
6404 	}
6405 
6406 	radeon_ring_unlock_commit(rdev, ring);
6407 	radeon_semaphore_free(rdev, &sem, *fence);
6408 
6409 	return r;
6410 }
6411 
6412 /*
6413  * startup/shutdown callbacks
6414  */
6415 static int si_startup(struct radeon_device *rdev)
6416 {
6417 	struct radeon_ring *ring;
6418 	int r;
6419 
6420 	/* enable pcie gen2/3 link */
6421 	si_pcie_gen3_enable(rdev);
6422 	/* enable aspm */
6423 	si_program_aspm(rdev);
6424 
6425 	si_mc_program(rdev);
6426 
6427 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6428 	    !rdev->rlc_fw || !rdev->mc_fw) {
6429 		r = si_init_microcode(rdev);
6430 		if (r) {
6431 			DRM_ERROR("Failed to load firmware!\n");
6432 			return r;
6433 		}
6434 	}
6435 
6436 	r = si_mc_load_microcode(rdev);
6437 	if (r) {
6438 		DRM_ERROR("Failed to load MC firmware!\n");
6439 		return r;
6440 	}
6441 
6442 	r = r600_vram_scratch_init(rdev);
6443 	if (r)
6444 		return r;
6445 
6446 	r = si_pcie_gart_enable(rdev);
6447 	if (r)
6448 		return r;
6449 	si_gpu_init(rdev);
6450 
6451 	/* allocate rlc buffers */
6452 	r = si_rlc_init(rdev);
6453 	if (r) {
6454 		DRM_ERROR("Failed to init rlc BOs!\n");
6455 		return r;
6456 	}
6457 
6458 	/* allocate wb buffer */
6459 	r = radeon_wb_init(rdev);
6460 	if (r)
6461 		return r;
6462 
6463 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6464 	if (r) {
6465 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6466 		return r;
6467 	}
6468 
6469 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6470 	if (r) {
6471 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6472 		return r;
6473 	}
6474 
6475 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6476 	if (r) {
6477 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6478 		return r;
6479 	}
6480 
6481 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6482 	if (r) {
6483 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6484 		return r;
6485 	}
6486 
6487 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6488 	if (r) {
6489 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6490 		return r;
6491 	}
6492 
6493 	if (rdev->has_uvd) {
6494 		r = rv770_uvd_resume(rdev);
6495 		if (!r) {
6496 			r = radeon_fence_driver_start_ring(rdev,
6497 							   R600_RING_TYPE_UVD_INDEX);
6498 			if (r)
6499 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6500 		}
6501 		if (r)
6502 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6503 	}
6504 
6505 	/* Enable IRQ */
6506 	if (!rdev->irq.installed) {
6507 		r = radeon_irq_kms_init(rdev);
6508 		if (r)
6509 			return r;
6510 	}
6511 
6512 	r = si_irq_init(rdev);
6513 	if (r) {
6514 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6515 		radeon_irq_kms_fini(rdev);
6516 		return r;
6517 	}
6518 	si_irq_set(rdev);
6519 
6520 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6521 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6522 			     CP_RB0_RPTR, CP_RB0_WPTR,
6523 			     0, 0xfffff, RADEON_CP_PACKET2);
6524 	if (r)
6525 		return r;
6526 
6527 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6528 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6529 			     CP_RB1_RPTR, CP_RB1_WPTR,
6530 			     0, 0xfffff, RADEON_CP_PACKET2);
6531 	if (r)
6532 		return r;
6533 
6534 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6535 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6536 			     CP_RB2_RPTR, CP_RB2_WPTR,
6537 			     0, 0xfffff, RADEON_CP_PACKET2);
6538 	if (r)
6539 		return r;
6540 
6541 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6542 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6543 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6544 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6545 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6546 	if (r)
6547 		return r;
6548 
6549 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6550 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6551 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6552 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6553 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6554 	if (r)
6555 		return r;
6556 
6557 	r = si_cp_load_microcode(rdev);
6558 	if (r)
6559 		return r;
6560 	r = si_cp_resume(rdev);
6561 	if (r)
6562 		return r;
6563 
6564 	r = cayman_dma_resume(rdev);
6565 	if (r)
6566 		return r;
6567 
6568 	if (rdev->has_uvd) {
6569 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6570 		if (ring->ring_size) {
6571 			r = radeon_ring_init(rdev, ring, ring->ring_size,
6572 					     R600_WB_UVD_RPTR_OFFSET,
6573 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6574 					     0, 0xfffff, RADEON_CP_PACKET2);
6575 			if (!r)
6576 				r = r600_uvd_init(rdev);
6577 			if (r)
6578 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6579 		}
6580 	}
6581 
6582 	r = radeon_ib_pool_init(rdev);
6583 	if (r) {
6584 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6585 		return r;
6586 	}
6587 
6588 	r = radeon_vm_manager_init(rdev);
6589 	if (r) {
6590 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6591 		return r;
6592 	}
6593 
6594 	return 0;
6595 }
6596 
6597 int si_resume(struct radeon_device *rdev)
6598 {
6599 	int r;
6600 
6601 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6602 	 * posting will perform necessary task to bring back GPU into good
6603 	 * shape.
6604 	 */
6605 	/* post card */
6606 	atom_asic_init(rdev->mode_info.atom_context);
6607 
6608 	/* init golden registers */
6609 	si_init_golden_registers(rdev);
6610 
6611 	rdev->accel_working = true;
6612 	r = si_startup(rdev);
6613 	if (r) {
6614 		DRM_ERROR("si startup failed on resume\n");
6615 		rdev->accel_working = false;
6616 		return r;
6617 	}
6618 
6619 	return r;
6620 
6621 }
6622 
6623 int si_suspend(struct radeon_device *rdev)
6624 {
6625 	radeon_vm_manager_fini(rdev);
6626 	si_cp_enable(rdev, false);
6627 	cayman_dma_stop(rdev);
6628 	if (rdev->has_uvd) {
6629 		r600_uvd_stop(rdev);
6630 		radeon_uvd_suspend(rdev);
6631 	}
6632 	si_irq_suspend(rdev);
6633 	radeon_wb_disable(rdev);
6634 	si_pcie_gart_disable(rdev);
6635 	return 0;
6636 }
6637 
6638 /* Plan is to move initialization in that function and use
6639  * helper function so that radeon_device_init pretty much
6640  * do nothing more than calling asic specific function. This
6641  * should also allow to remove a bunch of callback function
6642  * like vram_info.
6643  */
6644 int si_init(struct radeon_device *rdev)
6645 {
6646 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6647 	int r;
6648 
6649 	/* Read BIOS */
6650 	if (!radeon_get_bios(rdev)) {
6651 		if (ASIC_IS_AVIVO(rdev))
6652 			return -EINVAL;
6653 	}
6654 	/* Must be an ATOMBIOS */
6655 	if (!rdev->is_atom_bios) {
6656 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6657 		return -EINVAL;
6658 	}
6659 	r = radeon_atombios_init(rdev);
6660 	if (r)
6661 		return r;
6662 
6663 	/* Post card if necessary */
6664 	if (!radeon_card_posted(rdev)) {
6665 		if (!rdev->bios) {
6666 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6667 			return -EINVAL;
6668 		}
6669 		DRM_INFO("GPU not posted. posting now...\n");
6670 		atom_asic_init(rdev->mode_info.atom_context);
6671 	}
6672 	/* init golden registers */
6673 	si_init_golden_registers(rdev);
6674 	/* Initialize scratch registers */
6675 	si_scratch_init(rdev);
6676 	/* Initialize surface registers */
6677 	radeon_surface_init(rdev);
6678 	/* Initialize clocks */
6679 	radeon_get_clock_info(rdev->ddev);
6680 
6681 	/* Fence driver */
6682 	r = radeon_fence_driver_init(rdev);
6683 	if (r)
6684 		return r;
6685 
6686 	/* initialize memory controller */
6687 	r = si_mc_init(rdev);
6688 	if (r)
6689 		return r;
6690 	/* Memory manager */
6691 	r = radeon_bo_init(rdev);
6692 	if (r)
6693 		return r;
6694 
6695 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6696 	ring->ring_obj = NULL;
6697 	r600_ring_init(rdev, ring, 1024 * 1024);
6698 
6699 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6700 	ring->ring_obj = NULL;
6701 	r600_ring_init(rdev, ring, 1024 * 1024);
6702 
6703 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6704 	ring->ring_obj = NULL;
6705 	r600_ring_init(rdev, ring, 1024 * 1024);
6706 
6707 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6708 	ring->ring_obj = NULL;
6709 	r600_ring_init(rdev, ring, 64 * 1024);
6710 
6711 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6712 	ring->ring_obj = NULL;
6713 	r600_ring_init(rdev, ring, 64 * 1024);
6714 
6715 	if (rdev->has_uvd) {
6716 		r = radeon_uvd_init(rdev);
6717 		if (!r) {
6718 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6719 			ring->ring_obj = NULL;
6720 			r600_ring_init(rdev, ring, 4096);
6721 		}
6722 	}
6723 
6724 	rdev->ih.ring_obj = NULL;
6725 	r600_ih_ring_init(rdev, 64 * 1024);
6726 
6727 	r = r600_pcie_gart_init(rdev);
6728 	if (r)
6729 		return r;
6730 
6731 	rdev->accel_working = true;
6732 	r = si_startup(rdev);
6733 	if (r) {
6734 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6735 		si_cp_fini(rdev);
6736 		cayman_dma_fini(rdev);
6737 		si_irq_fini(rdev);
6738 		si_rlc_fini(rdev);
6739 		radeon_wb_fini(rdev);
6740 		radeon_ib_pool_fini(rdev);
6741 		radeon_vm_manager_fini(rdev);
6742 		radeon_irq_kms_fini(rdev);
6743 		si_pcie_gart_fini(rdev);
6744 		rdev->accel_working = false;
6745 	}
6746 
6747 	/* Don't start up if the MC ucode is missing.
6748 	 * The default clocks and voltages before the MC ucode
6749 	 * is loaded are not suffient for advanced operations.
6750 	 */
6751 	if (!rdev->mc_fw) {
6752 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6753 		return -EINVAL;
6754 	}
6755 
6756 	return 0;
6757 }
6758 
6759 void si_fini(struct radeon_device *rdev)
6760 {
6761 	si_cp_fini(rdev);
6762 	cayman_dma_fini(rdev);
6763 	si_irq_fini(rdev);
6764 	si_rlc_fini(rdev);
6765 	si_fini_cg(rdev);
6766 	si_fini_pg(rdev);
6767 	radeon_wb_fini(rdev);
6768 	radeon_vm_manager_fini(rdev);
6769 	radeon_ib_pool_fini(rdev);
6770 	radeon_irq_kms_fini(rdev);
6771 	if (rdev->has_uvd) {
6772 		r600_uvd_stop(rdev);
6773 		radeon_uvd_fini(rdev);
6774 	}
6775 	si_pcie_gart_fini(rdev);
6776 	r600_vram_scratch_fini(rdev);
6777 	radeon_gem_fini(rdev);
6778 	radeon_fence_driver_fini(rdev);
6779 	radeon_bo_fini(rdev);
6780 	radeon_atombios_fini(rdev);
6781 	kfree(rdev->bios);
6782 	rdev->bios = NULL;
6783 }
6784 
6785 /**
6786  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6787  *
6788  * @rdev: radeon_device pointer
6789  *
6790  * Fetches a GPU clock counter snapshot (SI).
6791  * Returns the 64 bit clock counter snapshot.
6792  */
6793 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6794 {
6795 	uint64_t clock;
6796 
6797 	mutex_lock(&rdev->gpu_clock_mutex);
6798 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6799 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6800 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6801 	mutex_unlock(&rdev->gpu_clock_mutex);
6802 	return clock;
6803 }
6804 
6805 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6806 {
6807 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6808 	int r;
6809 
6810 	/* bypass vclk and dclk with bclk */
6811 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6812 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6813 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6814 
6815 	/* put PLL in bypass mode */
6816 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6817 
6818 	if (!vclk || !dclk) {
6819 		/* keep the Bypass mode, put PLL to sleep */
6820 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6821 		return 0;
6822 	}
6823 
6824 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6825 					  16384, 0x03FFFFFF, 0, 128, 5,
6826 					  &fb_div, &vclk_div, &dclk_div);
6827 	if (r)
6828 		return r;
6829 
6830 	/* set RESET_ANTI_MUX to 0 */
6831 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6832 
6833 	/* set VCO_MODE to 1 */
6834 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6835 
6836 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6837 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6838 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6839 
6840 	/* deassert UPLL_RESET */
6841 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6842 
6843 	mdelay(1);
6844 
6845 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6846 	if (r)
6847 		return r;
6848 
6849 	/* assert UPLL_RESET again */
6850 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6851 
6852 	/* disable spread spectrum. */
6853 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6854 
6855 	/* set feedback divider */
6856 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6857 
6858 	/* set ref divider to 0 */
6859 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6860 
6861 	if (fb_div < 307200)
6862 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6863 	else
6864 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6865 
6866 	/* set PDIV_A and PDIV_B */
6867 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6868 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6869 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6870 
6871 	/* give the PLL some time to settle */
6872 	mdelay(15);
6873 
6874 	/* deassert PLL_RESET */
6875 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6876 
6877 	mdelay(15);
6878 
6879 	/* switch from bypass mode to normal mode */
6880 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6881 
6882 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6883 	if (r)
6884 		return r;
6885 
6886 	/* switch VCLK and DCLK selection */
6887 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6888 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6889 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6890 
6891 	mdelay(100);
6892 
6893 	return 0;
6894 }
6895 
6896 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6897 {
6898 	struct pci_dev *root = rdev->pdev->bus->self;
6899 	int bridge_pos, gpu_pos;
6900 	u32 speed_cntl, mask, current_data_rate;
6901 	int ret, i;
6902 	u16 tmp16;
6903 
6904 	if (radeon_pcie_gen2 == 0)
6905 		return;
6906 
6907 	if (rdev->flags & RADEON_IS_IGP)
6908 		return;
6909 
6910 	if (!(rdev->flags & RADEON_IS_PCIE))
6911 		return;
6912 
6913 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6914 	if (ret != 0)
6915 		return;
6916 
6917 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6918 		return;
6919 
6920 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6921 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6922 		LC_CURRENT_DATA_RATE_SHIFT;
6923 	if (mask & DRM_PCIE_SPEED_80) {
6924 		if (current_data_rate == 2) {
6925 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6926 			return;
6927 		}
6928 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6929 	} else if (mask & DRM_PCIE_SPEED_50) {
6930 		if (current_data_rate == 1) {
6931 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6932 			return;
6933 		}
6934 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6935 	}
6936 
6937 	bridge_pos = pci_pcie_cap(root);
6938 	if (!bridge_pos)
6939 		return;
6940 
6941 	gpu_pos = pci_pcie_cap(rdev->pdev);
6942 	if (!gpu_pos)
6943 		return;
6944 
6945 	if (mask & DRM_PCIE_SPEED_80) {
6946 		/* re-try equalization if gen3 is not already enabled */
6947 		if (current_data_rate != 2) {
6948 			u16 bridge_cfg, gpu_cfg;
6949 			u16 bridge_cfg2, gpu_cfg2;
6950 			u32 max_lw, current_lw, tmp;
6951 
6952 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6953 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6954 
6955 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6956 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6957 
6958 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6959 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6960 
6961 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6962 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6963 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6964 
6965 			if (current_lw < max_lw) {
6966 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6967 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6968 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6969 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6970 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6971 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6972 				}
6973 			}
6974 
6975 			for (i = 0; i < 10; i++) {
6976 				/* check status */
6977 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6978 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6979 					break;
6980 
6981 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6982 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6983 
6984 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6985 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6986 
6987 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6988 				tmp |= LC_SET_QUIESCE;
6989 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6990 
6991 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6992 				tmp |= LC_REDO_EQ;
6993 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6994 
6995 				mdelay(100);
6996 
6997 				/* linkctl */
6998 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6999 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7000 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7001 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7002 
7003 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7004 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7005 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7006 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7007 
7008 				/* linkctl2 */
7009 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7010 				tmp16 &= ~((1 << 4) | (7 << 9));
7011 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7012 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7013 
7014 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7015 				tmp16 &= ~((1 << 4) | (7 << 9));
7016 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7017 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7018 
7019 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7020 				tmp &= ~LC_SET_QUIESCE;
7021 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7022 			}
7023 		}
7024 	}
7025 
7026 	/* set the link speed */
7027 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7028 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7029 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7030 
7031 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7032 	tmp16 &= ~0xf;
7033 	if (mask & DRM_PCIE_SPEED_80)
7034 		tmp16 |= 3; /* gen3 */
7035 	else if (mask & DRM_PCIE_SPEED_50)
7036 		tmp16 |= 2; /* gen2 */
7037 	else
7038 		tmp16 |= 1; /* gen1 */
7039 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7040 
7041 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7042 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7043 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7044 
7045 	for (i = 0; i < rdev->usec_timeout; i++) {
7046 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7047 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7048 			break;
7049 		udelay(1);
7050 	}
7051 }
7052 
7053 static void si_program_aspm(struct radeon_device *rdev)
7054 {
7055 	u32 data, orig;
7056 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7057 	bool disable_clkreq = false;
7058 
7059 	if (radeon_aspm == 0)
7060 		return;
7061 
7062 	if (!(rdev->flags & RADEON_IS_PCIE))
7063 		return;
7064 
7065 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7066 	data &= ~LC_XMIT_N_FTS_MASK;
7067 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7068 	if (orig != data)
7069 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7070 
7071 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7072 	data |= LC_GO_TO_RECOVERY;
7073 	if (orig != data)
7074 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7075 
7076 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7077 	data |= P_IGNORE_EDB_ERR;
7078 	if (orig != data)
7079 		WREG32_PCIE(PCIE_P_CNTL, data);
7080 
7081 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7082 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7083 	data |= LC_PMI_TO_L1_DIS;
7084 	if (!disable_l0s)
7085 		data |= LC_L0S_INACTIVITY(7);
7086 
7087 	if (!disable_l1) {
7088 		data |= LC_L1_INACTIVITY(7);
7089 		data &= ~LC_PMI_TO_L1_DIS;
7090 		if (orig != data)
7091 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7092 
7093 		if (!disable_plloff_in_l1) {
7094 			bool clk_req_support;
7095 
7096 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7097 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7098 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7099 			if (orig != data)
7100 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7101 
7102 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7103 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7104 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7105 			if (orig != data)
7106 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7107 
7108 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7109 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7110 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7111 			if (orig != data)
7112 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7113 
7114 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7115 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7116 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7117 			if (orig != data)
7118 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7119 
7120 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7121 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7122 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7123 				if (orig != data)
7124 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7125 
7126 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7127 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7128 				if (orig != data)
7129 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7130 
7131 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7132 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7133 				if (orig != data)
7134 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7135 
7136 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7137 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7138 				if (orig != data)
7139 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7140 
7141 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7142 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7143 				if (orig != data)
7144 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7145 
7146 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7147 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7148 				if (orig != data)
7149 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7150 
7151 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7152 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7153 				if (orig != data)
7154 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7155 
7156 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7157 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7158 				if (orig != data)
7159 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7160 			}
7161 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7162 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7163 			data |= LC_DYN_LANES_PWR_STATE(3);
7164 			if (orig != data)
7165 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7166 
7167 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7168 			data &= ~LS2_EXIT_TIME_MASK;
7169 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7170 				data |= LS2_EXIT_TIME(5);
7171 			if (orig != data)
7172 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7173 
7174 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7175 			data &= ~LS2_EXIT_TIME_MASK;
7176 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7177 				data |= LS2_EXIT_TIME(5);
7178 			if (orig != data)
7179 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7180 
7181 			if (!disable_clkreq) {
7182 				struct pci_dev *root = rdev->pdev->bus->self;
7183 				u32 lnkcap;
7184 
7185 				clk_req_support = false;
7186 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7187 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7188 					clk_req_support = true;
7189 			} else {
7190 				clk_req_support = false;
7191 			}
7192 
7193 			if (clk_req_support) {
7194 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7195 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7196 				if (orig != data)
7197 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7198 
7199 				orig = data = RREG32(THM_CLK_CNTL);
7200 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7201 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7202 				if (orig != data)
7203 					WREG32(THM_CLK_CNTL, data);
7204 
7205 				orig = data = RREG32(MISC_CLK_CNTL);
7206 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7207 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7208 				if (orig != data)
7209 					WREG32(MISC_CLK_CNTL, data);
7210 
7211 				orig = data = RREG32(CG_CLKPIN_CNTL);
7212 				data &= ~BCLK_AS_XCLK;
7213 				if (orig != data)
7214 					WREG32(CG_CLKPIN_CNTL, data);
7215 
7216 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7217 				data &= ~FORCE_BIF_REFCLK_EN;
7218 				if (orig != data)
7219 					WREG32(CG_CLKPIN_CNTL_2, data);
7220 
7221 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7222 				data &= ~MPLL_CLKOUT_SEL_MASK;
7223 				data |= MPLL_CLKOUT_SEL(4);
7224 				if (orig != data)
7225 					WREG32(MPLL_BYPASSCLK_SEL, data);
7226 
7227 				orig = data = RREG32(SPLL_CNTL_MODE);
7228 				data &= ~SPLL_REFCLK_SEL_MASK;
7229 				if (orig != data)
7230 					WREG32(SPLL_CNTL_MODE, data);
7231 			}
7232 		}
7233 	} else {
7234 		if (orig != data)
7235 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7236 	}
7237 
7238 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7239 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7240 	if (orig != data)
7241 		WREG32_PCIE(PCIE_CNTL2, data);
7242 
7243 	if (!disable_l0s) {
7244 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7245 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7246 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7247 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7248 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7249 				data &= ~LC_L0S_INACTIVITY_MASK;
7250 				if (orig != data)
7251 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7252 			}
7253 		}
7254 	}
7255 }
7256