xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 455f9726)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
52 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53 MODULE_FIRMWARE("radeon/VERDE_me.bin");
54 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
59 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
60 MODULE_FIRMWARE("radeon/OLAND_me.bin");
61 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
62 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
63 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
64 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
65 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
73 
74 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
75 static void si_pcie_gen3_enable(struct radeon_device *rdev);
76 static void si_program_aspm(struct radeon_device *rdev);
77 extern void sumo_rlc_fini(struct radeon_device *rdev);
78 extern int sumo_rlc_init(struct radeon_device *rdev);
79 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
80 extern void r600_ih_ring_fini(struct radeon_device *rdev);
81 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
82 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
83 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
84 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
85 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
86 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
87 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
88 					 bool enable);
89 static void si_init_pg(struct radeon_device *rdev);
90 static void si_init_cg(struct radeon_device *rdev);
91 static void si_fini_pg(struct radeon_device *rdev);
92 static void si_fini_cg(struct radeon_device *rdev);
93 static void si_rlc_stop(struct radeon_device *rdev);
94 
95 static const u32 verde_rlc_save_restore_register_list[] =
96 {
97 	(0x8000 << 16) | (0x98f4 >> 2),
98 	0x00000000,
99 	(0x8040 << 16) | (0x98f4 >> 2),
100 	0x00000000,
101 	(0x8000 << 16) | (0xe80 >> 2),
102 	0x00000000,
103 	(0x8040 << 16) | (0xe80 >> 2),
104 	0x00000000,
105 	(0x8000 << 16) | (0x89bc >> 2),
106 	0x00000000,
107 	(0x8040 << 16) | (0x89bc >> 2),
108 	0x00000000,
109 	(0x8000 << 16) | (0x8c1c >> 2),
110 	0x00000000,
111 	(0x8040 << 16) | (0x8c1c >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x98f0 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0xe7c >> 2),
116 	0x00000000,
117 	(0x8000 << 16) | (0x9148 >> 2),
118 	0x00000000,
119 	(0x8040 << 16) | (0x9148 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9150 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x897c >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x8d8c >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0xac54 >> 2),
128 	0X00000000,
129 	0x3,
130 	(0x9c00 << 16) | (0x98f8 >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x9910 >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x9914 >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9918 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x991c >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x9920 >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x9924 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9928 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x992c >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x9930 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9934 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9938 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x993c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x9940 >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x9944 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9948 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x994c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9950 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9954 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9958 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x995c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9960 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9964 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9968 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x996c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9970 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9974 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9978 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x997c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9980 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x9984 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9988 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x998c >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x8c00 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x8c14 >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x8c04 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x8c08 >> 2),
203 	0x00000000,
204 	(0x8000 << 16) | (0x9b7c >> 2),
205 	0x00000000,
206 	(0x8040 << 16) | (0x9b7c >> 2),
207 	0x00000000,
208 	(0x8000 << 16) | (0xe84 >> 2),
209 	0x00000000,
210 	(0x8040 << 16) | (0xe84 >> 2),
211 	0x00000000,
212 	(0x8000 << 16) | (0x89c0 >> 2),
213 	0x00000000,
214 	(0x8040 << 16) | (0x89c0 >> 2),
215 	0x00000000,
216 	(0x8000 << 16) | (0x914c >> 2),
217 	0x00000000,
218 	(0x8040 << 16) | (0x914c >> 2),
219 	0x00000000,
220 	(0x8000 << 16) | (0x8c20 >> 2),
221 	0x00000000,
222 	(0x8040 << 16) | (0x8c20 >> 2),
223 	0x00000000,
224 	(0x8000 << 16) | (0x9354 >> 2),
225 	0x00000000,
226 	(0x8040 << 16) | (0x9354 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9060 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9364 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x9100 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x913c >> 2),
235 	0x00000000,
236 	(0x8000 << 16) | (0x90e0 >> 2),
237 	0x00000000,
238 	(0x8000 << 16) | (0x90e4 >> 2),
239 	0x00000000,
240 	(0x8000 << 16) | (0x90e8 >> 2),
241 	0x00000000,
242 	(0x8040 << 16) | (0x90e0 >> 2),
243 	0x00000000,
244 	(0x8040 << 16) | (0x90e4 >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0x90e8 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8bcc >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x8b24 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x88c4 >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x8e50 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x8c0c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x8e58 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x8e5c >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x9508 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x950c >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x9494 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0xac0c >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0xac10 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0xac14 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0xae00 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0xac08 >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x88d4 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x88c8 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x88cc >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x89b0 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x8b10 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8a14 >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x9830 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x9834 >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x9838 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0x9a10 >> 2),
297 	0x00000000,
298 	(0x8000 << 16) | (0x9870 >> 2),
299 	0x00000000,
300 	(0x8000 << 16) | (0x9874 >> 2),
301 	0x00000000,
302 	(0x8001 << 16) | (0x9870 >> 2),
303 	0x00000000,
304 	(0x8001 << 16) | (0x9874 >> 2),
305 	0x00000000,
306 	(0x8040 << 16) | (0x9870 >> 2),
307 	0x00000000,
308 	(0x8040 << 16) | (0x9874 >> 2),
309 	0x00000000,
310 	(0x8041 << 16) | (0x9870 >> 2),
311 	0x00000000,
312 	(0x8041 << 16) | (0x9874 >> 2),
313 	0x00000000,
314 	0x00000000
315 };
316 
317 static const u32 tahiti_golden_rlc_registers[] =
318 {
319 	0xc424, 0xffffffff, 0x00601005,
320 	0xc47c, 0xffffffff, 0x10104040,
321 	0xc488, 0xffffffff, 0x0100000a,
322 	0xc314, 0xffffffff, 0x00000800,
323 	0xc30c, 0xffffffff, 0x800000f4,
324 	0xf4a8, 0xffffffff, 0x00000000
325 };
326 
327 static const u32 tahiti_golden_registers[] =
328 {
329 	0x9a10, 0x00010000, 0x00018208,
330 	0x9830, 0xffffffff, 0x00000000,
331 	0x9834, 0xf00fffff, 0x00000400,
332 	0x9838, 0x0002021c, 0x00020200,
333 	0xc78, 0x00000080, 0x00000000,
334 	0xd030, 0x000300c0, 0x00800040,
335 	0xd830, 0x000300c0, 0x00800040,
336 	0x5bb0, 0x000000f0, 0x00000070,
337 	0x5bc0, 0x00200000, 0x50100000,
338 	0x7030, 0x31000311, 0x00000011,
339 	0x277c, 0x00000003, 0x000007ff,
340 	0x240c, 0x000007ff, 0x00000000,
341 	0x8a14, 0xf000001f, 0x00000007,
342 	0x8b24, 0xffffffff, 0x00ffffff,
343 	0x8b10, 0x0000ff0f, 0x00000000,
344 	0x28a4c, 0x07ffffff, 0x4e000000,
345 	0x28350, 0x3f3f3fff, 0x2a00126a,
346 	0x30, 0x000000ff, 0x0040,
347 	0x34, 0x00000040, 0x00004040,
348 	0x9100, 0x07ffffff, 0x03000000,
349 	0x8e88, 0x01ff1f3f, 0x00000000,
350 	0x8e84, 0x01ff1f3f, 0x00000000,
351 	0x9060, 0x0000007f, 0x00000020,
352 	0x9508, 0x00010000, 0x00010000,
353 	0xac14, 0x00000200, 0x000002fb,
354 	0xac10, 0xffffffff, 0x0000543b,
355 	0xac0c, 0xffffffff, 0xa9210876,
356 	0x88d0, 0xffffffff, 0x000fff40,
357 	0x88d4, 0x0000001f, 0x00000010,
358 	0x1410, 0x20000000, 0x20fffed8,
359 	0x15c0, 0x000c0fc0, 0x000c0400
360 };
361 
362 static const u32 tahiti_golden_registers2[] =
363 {
364 	0xc64, 0x00000001, 0x00000001
365 };
366 
367 static const u32 pitcairn_golden_rlc_registers[] =
368 {
369 	0xc424, 0xffffffff, 0x00601004,
370 	0xc47c, 0xffffffff, 0x10102020,
371 	0xc488, 0xffffffff, 0x01000020,
372 	0xc314, 0xffffffff, 0x00000800,
373 	0xc30c, 0xffffffff, 0x800000a4
374 };
375 
376 static const u32 pitcairn_golden_registers[] =
377 {
378 	0x9a10, 0x00010000, 0x00018208,
379 	0x9830, 0xffffffff, 0x00000000,
380 	0x9834, 0xf00fffff, 0x00000400,
381 	0x9838, 0x0002021c, 0x00020200,
382 	0xc78, 0x00000080, 0x00000000,
383 	0xd030, 0x000300c0, 0x00800040,
384 	0xd830, 0x000300c0, 0x00800040,
385 	0x5bb0, 0x000000f0, 0x00000070,
386 	0x5bc0, 0x00200000, 0x50100000,
387 	0x7030, 0x31000311, 0x00000011,
388 	0x2ae4, 0x00073ffe, 0x000022a2,
389 	0x240c, 0x000007ff, 0x00000000,
390 	0x8a14, 0xf000001f, 0x00000007,
391 	0x8b24, 0xffffffff, 0x00ffffff,
392 	0x8b10, 0x0000ff0f, 0x00000000,
393 	0x28a4c, 0x07ffffff, 0x4e000000,
394 	0x28350, 0x3f3f3fff, 0x2a00126a,
395 	0x30, 0x000000ff, 0x0040,
396 	0x34, 0x00000040, 0x00004040,
397 	0x9100, 0x07ffffff, 0x03000000,
398 	0x9060, 0x0000007f, 0x00000020,
399 	0x9508, 0x00010000, 0x00010000,
400 	0xac14, 0x000003ff, 0x000000f7,
401 	0xac10, 0xffffffff, 0x00000000,
402 	0xac0c, 0xffffffff, 0x32761054,
403 	0x88d4, 0x0000001f, 0x00000010,
404 	0x15c0, 0x000c0fc0, 0x000c0400
405 };
406 
407 static const u32 verde_golden_rlc_registers[] =
408 {
409 	0xc424, 0xffffffff, 0x033f1005,
410 	0xc47c, 0xffffffff, 0x10808020,
411 	0xc488, 0xffffffff, 0x00800008,
412 	0xc314, 0xffffffff, 0x00001000,
413 	0xc30c, 0xffffffff, 0x80010014
414 };
415 
416 static const u32 verde_golden_registers[] =
417 {
418 	0x9a10, 0x00010000, 0x00018208,
419 	0x9830, 0xffffffff, 0x00000000,
420 	0x9834, 0xf00fffff, 0x00000400,
421 	0x9838, 0x0002021c, 0x00020200,
422 	0xc78, 0x00000080, 0x00000000,
423 	0xd030, 0x000300c0, 0x00800040,
424 	0xd030, 0x000300c0, 0x00800040,
425 	0xd830, 0x000300c0, 0x00800040,
426 	0xd830, 0x000300c0, 0x00800040,
427 	0x5bb0, 0x000000f0, 0x00000070,
428 	0x5bc0, 0x00200000, 0x50100000,
429 	0x7030, 0x31000311, 0x00000011,
430 	0x2ae4, 0x00073ffe, 0x000022a2,
431 	0x2ae4, 0x00073ffe, 0x000022a2,
432 	0x2ae4, 0x00073ffe, 0x000022a2,
433 	0x240c, 0x000007ff, 0x00000000,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x240c, 0x000007ff, 0x00000000,
436 	0x8a14, 0xf000001f, 0x00000007,
437 	0x8a14, 0xf000001f, 0x00000007,
438 	0x8a14, 0xf000001f, 0x00000007,
439 	0x8b24, 0xffffffff, 0x00ffffff,
440 	0x8b10, 0x0000ff0f, 0x00000000,
441 	0x28a4c, 0x07ffffff, 0x4e000000,
442 	0x28350, 0x3f3f3fff, 0x0000124a,
443 	0x28350, 0x3f3f3fff, 0x0000124a,
444 	0x28350, 0x3f3f3fff, 0x0000124a,
445 	0x30, 0x000000ff, 0x0040,
446 	0x34, 0x00000040, 0x00004040,
447 	0x9100, 0x07ffffff, 0x03000000,
448 	0x9100, 0x07ffffff, 0x03000000,
449 	0x8e88, 0x01ff1f3f, 0x00000000,
450 	0x8e88, 0x01ff1f3f, 0x00000000,
451 	0x8e88, 0x01ff1f3f, 0x00000000,
452 	0x8e84, 0x01ff1f3f, 0x00000000,
453 	0x8e84, 0x01ff1f3f, 0x00000000,
454 	0x8e84, 0x01ff1f3f, 0x00000000,
455 	0x9060, 0x0000007f, 0x00000020,
456 	0x9508, 0x00010000, 0x00010000,
457 	0xac14, 0x000003ff, 0x00000003,
458 	0xac14, 0x000003ff, 0x00000003,
459 	0xac14, 0x000003ff, 0x00000003,
460 	0xac10, 0xffffffff, 0x00000000,
461 	0xac10, 0xffffffff, 0x00000000,
462 	0xac10, 0xffffffff, 0x00000000,
463 	0xac0c, 0xffffffff, 0x00001032,
464 	0xac0c, 0xffffffff, 0x00001032,
465 	0xac0c, 0xffffffff, 0x00001032,
466 	0x88d4, 0x0000001f, 0x00000010,
467 	0x88d4, 0x0000001f, 0x00000010,
468 	0x88d4, 0x0000001f, 0x00000010,
469 	0x15c0, 0x000c0fc0, 0x000c0400
470 };
471 
472 static const u32 oland_golden_rlc_registers[] =
473 {
474 	0xc424, 0xffffffff, 0x00601005,
475 	0xc47c, 0xffffffff, 0x10104040,
476 	0xc488, 0xffffffff, 0x0100000a,
477 	0xc314, 0xffffffff, 0x00000800,
478 	0xc30c, 0xffffffff, 0x800000f4
479 };
480 
481 static const u32 oland_golden_registers[] =
482 {
483 	0x9a10, 0x00010000, 0x00018208,
484 	0x9830, 0xffffffff, 0x00000000,
485 	0x9834, 0xf00fffff, 0x00000400,
486 	0x9838, 0x0002021c, 0x00020200,
487 	0xc78, 0x00000080, 0x00000000,
488 	0xd030, 0x000300c0, 0x00800040,
489 	0xd830, 0x000300c0, 0x00800040,
490 	0x5bb0, 0x000000f0, 0x00000070,
491 	0x5bc0, 0x00200000, 0x50100000,
492 	0x7030, 0x31000311, 0x00000011,
493 	0x2ae4, 0x00073ffe, 0x000022a2,
494 	0x240c, 0x000007ff, 0x00000000,
495 	0x8a14, 0xf000001f, 0x00000007,
496 	0x8b24, 0xffffffff, 0x00ffffff,
497 	0x8b10, 0x0000ff0f, 0x00000000,
498 	0x28a4c, 0x07ffffff, 0x4e000000,
499 	0x28350, 0x3f3f3fff, 0x00000082,
500 	0x30, 0x000000ff, 0x0040,
501 	0x34, 0x00000040, 0x00004040,
502 	0x9100, 0x07ffffff, 0x03000000,
503 	0x9060, 0x0000007f, 0x00000020,
504 	0x9508, 0x00010000, 0x00010000,
505 	0xac14, 0x000003ff, 0x000000f3,
506 	0xac10, 0xffffffff, 0x00000000,
507 	0xac0c, 0xffffffff, 0x00003210,
508 	0x88d4, 0x0000001f, 0x00000010,
509 	0x15c0, 0x000c0fc0, 0x000c0400
510 };
511 
512 static const u32 hainan_golden_registers[] =
513 {
514 	0x9a10, 0x00010000, 0x00018208,
515 	0x9830, 0xffffffff, 0x00000000,
516 	0x9834, 0xf00fffff, 0x00000400,
517 	0x9838, 0x0002021c, 0x00020200,
518 	0xd0c0, 0xff000fff, 0x00000100,
519 	0xd030, 0x000300c0, 0x00800040,
520 	0xd8c0, 0xff000fff, 0x00000100,
521 	0xd830, 0x000300c0, 0x00800040,
522 	0x2ae4, 0x00073ffe, 0x000022a2,
523 	0x240c, 0x000007ff, 0x00000000,
524 	0x8a14, 0xf000001f, 0x00000007,
525 	0x8b24, 0xffffffff, 0x00ffffff,
526 	0x8b10, 0x0000ff0f, 0x00000000,
527 	0x28a4c, 0x07ffffff, 0x4e000000,
528 	0x28350, 0x3f3f3fff, 0x00000000,
529 	0x30, 0x000000ff, 0x0040,
530 	0x34, 0x00000040, 0x00004040,
531 	0x9100, 0x03e00000, 0x03600000,
532 	0x9060, 0x0000007f, 0x00000020,
533 	0x9508, 0x00010000, 0x00010000,
534 	0xac14, 0x000003ff, 0x000000f1,
535 	0xac10, 0xffffffff, 0x00000000,
536 	0xac0c, 0xffffffff, 0x00003210,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x15c0, 0x000c0fc0, 0x000c0400
539 };
540 
541 static const u32 hainan_golden_registers2[] =
542 {
543 	0x98f8, 0xffffffff, 0x02010001
544 };
545 
546 static const u32 tahiti_mgcg_cgcg_init[] =
547 {
548 	0xc400, 0xffffffff, 0xfffffffc,
549 	0x802c, 0xffffffff, 0xe0000000,
550 	0x9a60, 0xffffffff, 0x00000100,
551 	0x92a4, 0xffffffff, 0x00000100,
552 	0xc164, 0xffffffff, 0x00000100,
553 	0x9774, 0xffffffff, 0x00000100,
554 	0x8984, 0xffffffff, 0x06000100,
555 	0x8a18, 0xffffffff, 0x00000100,
556 	0x92a0, 0xffffffff, 0x00000100,
557 	0xc380, 0xffffffff, 0x00000100,
558 	0x8b28, 0xffffffff, 0x00000100,
559 	0x9144, 0xffffffff, 0x00000100,
560 	0x8d88, 0xffffffff, 0x00000100,
561 	0x8d8c, 0xffffffff, 0x00000100,
562 	0x9030, 0xffffffff, 0x00000100,
563 	0x9034, 0xffffffff, 0x00000100,
564 	0x9038, 0xffffffff, 0x00000100,
565 	0x903c, 0xffffffff, 0x00000100,
566 	0xad80, 0xffffffff, 0x00000100,
567 	0xac54, 0xffffffff, 0x00000100,
568 	0x897c, 0xffffffff, 0x06000100,
569 	0x9868, 0xffffffff, 0x00000100,
570 	0x9510, 0xffffffff, 0x00000100,
571 	0xaf04, 0xffffffff, 0x00000100,
572 	0xae04, 0xffffffff, 0x00000100,
573 	0x949c, 0xffffffff, 0x00000100,
574 	0x802c, 0xffffffff, 0xe0000000,
575 	0x9160, 0xffffffff, 0x00010000,
576 	0x9164, 0xffffffff, 0x00030002,
577 	0x9168, 0xffffffff, 0x00040007,
578 	0x916c, 0xffffffff, 0x00060005,
579 	0x9170, 0xffffffff, 0x00090008,
580 	0x9174, 0xffffffff, 0x00020001,
581 	0x9178, 0xffffffff, 0x00040003,
582 	0x917c, 0xffffffff, 0x00000007,
583 	0x9180, 0xffffffff, 0x00060005,
584 	0x9184, 0xffffffff, 0x00090008,
585 	0x9188, 0xffffffff, 0x00030002,
586 	0x918c, 0xffffffff, 0x00050004,
587 	0x9190, 0xffffffff, 0x00000008,
588 	0x9194, 0xffffffff, 0x00070006,
589 	0x9198, 0xffffffff, 0x000a0009,
590 	0x919c, 0xffffffff, 0x00040003,
591 	0x91a0, 0xffffffff, 0x00060005,
592 	0x91a4, 0xffffffff, 0x00000009,
593 	0x91a8, 0xffffffff, 0x00080007,
594 	0x91ac, 0xffffffff, 0x000b000a,
595 	0x91b0, 0xffffffff, 0x00050004,
596 	0x91b4, 0xffffffff, 0x00070006,
597 	0x91b8, 0xffffffff, 0x0008000b,
598 	0x91bc, 0xffffffff, 0x000a0009,
599 	0x91c0, 0xffffffff, 0x000d000c,
600 	0x91c4, 0xffffffff, 0x00060005,
601 	0x91c8, 0xffffffff, 0x00080007,
602 	0x91cc, 0xffffffff, 0x0000000b,
603 	0x91d0, 0xffffffff, 0x000a0009,
604 	0x91d4, 0xffffffff, 0x000d000c,
605 	0x91d8, 0xffffffff, 0x00070006,
606 	0x91dc, 0xffffffff, 0x00090008,
607 	0x91e0, 0xffffffff, 0x0000000c,
608 	0x91e4, 0xffffffff, 0x000b000a,
609 	0x91e8, 0xffffffff, 0x000e000d,
610 	0x91ec, 0xffffffff, 0x00080007,
611 	0x91f0, 0xffffffff, 0x000a0009,
612 	0x91f4, 0xffffffff, 0x0000000d,
613 	0x91f8, 0xffffffff, 0x000c000b,
614 	0x91fc, 0xffffffff, 0x000f000e,
615 	0x9200, 0xffffffff, 0x00090008,
616 	0x9204, 0xffffffff, 0x000b000a,
617 	0x9208, 0xffffffff, 0x000c000f,
618 	0x920c, 0xffffffff, 0x000e000d,
619 	0x9210, 0xffffffff, 0x00110010,
620 	0x9214, 0xffffffff, 0x000a0009,
621 	0x9218, 0xffffffff, 0x000c000b,
622 	0x921c, 0xffffffff, 0x0000000f,
623 	0x9220, 0xffffffff, 0x000e000d,
624 	0x9224, 0xffffffff, 0x00110010,
625 	0x9228, 0xffffffff, 0x000b000a,
626 	0x922c, 0xffffffff, 0x000d000c,
627 	0x9230, 0xffffffff, 0x00000010,
628 	0x9234, 0xffffffff, 0x000f000e,
629 	0x9238, 0xffffffff, 0x00120011,
630 	0x923c, 0xffffffff, 0x000c000b,
631 	0x9240, 0xffffffff, 0x000e000d,
632 	0x9244, 0xffffffff, 0x00000011,
633 	0x9248, 0xffffffff, 0x0010000f,
634 	0x924c, 0xffffffff, 0x00130012,
635 	0x9250, 0xffffffff, 0x000d000c,
636 	0x9254, 0xffffffff, 0x000f000e,
637 	0x9258, 0xffffffff, 0x00100013,
638 	0x925c, 0xffffffff, 0x00120011,
639 	0x9260, 0xffffffff, 0x00150014,
640 	0x9264, 0xffffffff, 0x000e000d,
641 	0x9268, 0xffffffff, 0x0010000f,
642 	0x926c, 0xffffffff, 0x00000013,
643 	0x9270, 0xffffffff, 0x00120011,
644 	0x9274, 0xffffffff, 0x00150014,
645 	0x9278, 0xffffffff, 0x000f000e,
646 	0x927c, 0xffffffff, 0x00110010,
647 	0x9280, 0xffffffff, 0x00000014,
648 	0x9284, 0xffffffff, 0x00130012,
649 	0x9288, 0xffffffff, 0x00160015,
650 	0x928c, 0xffffffff, 0x0010000f,
651 	0x9290, 0xffffffff, 0x00120011,
652 	0x9294, 0xffffffff, 0x00000015,
653 	0x9298, 0xffffffff, 0x00140013,
654 	0x929c, 0xffffffff, 0x00170016,
655 	0x9150, 0xffffffff, 0x96940200,
656 	0x8708, 0xffffffff, 0x00900100,
657 	0xc478, 0xffffffff, 0x00000080,
658 	0xc404, 0xffffffff, 0x0020003f,
659 	0x30, 0xffffffff, 0x0000001c,
660 	0x34, 0x000f0000, 0x000f0000,
661 	0x160c, 0xffffffff, 0x00000100,
662 	0x1024, 0xffffffff, 0x00000100,
663 	0x102c, 0x00000101, 0x00000000,
664 	0x20a8, 0xffffffff, 0x00000104,
665 	0x264c, 0x000c0000, 0x000c0000,
666 	0x2648, 0x000c0000, 0x000c0000,
667 	0x55e4, 0xff000fff, 0x00000100,
668 	0x55e8, 0x00000001, 0x00000001,
669 	0x2f50, 0x00000001, 0x00000001,
670 	0x30cc, 0xc0000fff, 0x00000104,
671 	0xc1e4, 0x00000001, 0x00000001,
672 	0xd0c0, 0xfffffff0, 0x00000100,
673 	0xd8c0, 0xfffffff0, 0x00000100
674 };
675 
676 static const u32 pitcairn_mgcg_cgcg_init[] =
677 {
678 	0xc400, 0xffffffff, 0xfffffffc,
679 	0x802c, 0xffffffff, 0xe0000000,
680 	0x9a60, 0xffffffff, 0x00000100,
681 	0x92a4, 0xffffffff, 0x00000100,
682 	0xc164, 0xffffffff, 0x00000100,
683 	0x9774, 0xffffffff, 0x00000100,
684 	0x8984, 0xffffffff, 0x06000100,
685 	0x8a18, 0xffffffff, 0x00000100,
686 	0x92a0, 0xffffffff, 0x00000100,
687 	0xc380, 0xffffffff, 0x00000100,
688 	0x8b28, 0xffffffff, 0x00000100,
689 	0x9144, 0xffffffff, 0x00000100,
690 	0x8d88, 0xffffffff, 0x00000100,
691 	0x8d8c, 0xffffffff, 0x00000100,
692 	0x9030, 0xffffffff, 0x00000100,
693 	0x9034, 0xffffffff, 0x00000100,
694 	0x9038, 0xffffffff, 0x00000100,
695 	0x903c, 0xffffffff, 0x00000100,
696 	0xad80, 0xffffffff, 0x00000100,
697 	0xac54, 0xffffffff, 0x00000100,
698 	0x897c, 0xffffffff, 0x06000100,
699 	0x9868, 0xffffffff, 0x00000100,
700 	0x9510, 0xffffffff, 0x00000100,
701 	0xaf04, 0xffffffff, 0x00000100,
702 	0xae04, 0xffffffff, 0x00000100,
703 	0x949c, 0xffffffff, 0x00000100,
704 	0x802c, 0xffffffff, 0xe0000000,
705 	0x9160, 0xffffffff, 0x00010000,
706 	0x9164, 0xffffffff, 0x00030002,
707 	0x9168, 0xffffffff, 0x00040007,
708 	0x916c, 0xffffffff, 0x00060005,
709 	0x9170, 0xffffffff, 0x00090008,
710 	0x9174, 0xffffffff, 0x00020001,
711 	0x9178, 0xffffffff, 0x00040003,
712 	0x917c, 0xffffffff, 0x00000007,
713 	0x9180, 0xffffffff, 0x00060005,
714 	0x9184, 0xffffffff, 0x00090008,
715 	0x9188, 0xffffffff, 0x00030002,
716 	0x918c, 0xffffffff, 0x00050004,
717 	0x9190, 0xffffffff, 0x00000008,
718 	0x9194, 0xffffffff, 0x00070006,
719 	0x9198, 0xffffffff, 0x000a0009,
720 	0x919c, 0xffffffff, 0x00040003,
721 	0x91a0, 0xffffffff, 0x00060005,
722 	0x91a4, 0xffffffff, 0x00000009,
723 	0x91a8, 0xffffffff, 0x00080007,
724 	0x91ac, 0xffffffff, 0x000b000a,
725 	0x91b0, 0xffffffff, 0x00050004,
726 	0x91b4, 0xffffffff, 0x00070006,
727 	0x91b8, 0xffffffff, 0x0008000b,
728 	0x91bc, 0xffffffff, 0x000a0009,
729 	0x91c0, 0xffffffff, 0x000d000c,
730 	0x9200, 0xffffffff, 0x00090008,
731 	0x9204, 0xffffffff, 0x000b000a,
732 	0x9208, 0xffffffff, 0x000c000f,
733 	0x920c, 0xffffffff, 0x000e000d,
734 	0x9210, 0xffffffff, 0x00110010,
735 	0x9214, 0xffffffff, 0x000a0009,
736 	0x9218, 0xffffffff, 0x000c000b,
737 	0x921c, 0xffffffff, 0x0000000f,
738 	0x9220, 0xffffffff, 0x000e000d,
739 	0x9224, 0xffffffff, 0x00110010,
740 	0x9228, 0xffffffff, 0x000b000a,
741 	0x922c, 0xffffffff, 0x000d000c,
742 	0x9230, 0xffffffff, 0x00000010,
743 	0x9234, 0xffffffff, 0x000f000e,
744 	0x9238, 0xffffffff, 0x00120011,
745 	0x923c, 0xffffffff, 0x000c000b,
746 	0x9240, 0xffffffff, 0x000e000d,
747 	0x9244, 0xffffffff, 0x00000011,
748 	0x9248, 0xffffffff, 0x0010000f,
749 	0x924c, 0xffffffff, 0x00130012,
750 	0x9250, 0xffffffff, 0x000d000c,
751 	0x9254, 0xffffffff, 0x000f000e,
752 	0x9258, 0xffffffff, 0x00100013,
753 	0x925c, 0xffffffff, 0x00120011,
754 	0x9260, 0xffffffff, 0x00150014,
755 	0x9150, 0xffffffff, 0x96940200,
756 	0x8708, 0xffffffff, 0x00900100,
757 	0xc478, 0xffffffff, 0x00000080,
758 	0xc404, 0xffffffff, 0x0020003f,
759 	0x30, 0xffffffff, 0x0000001c,
760 	0x34, 0x000f0000, 0x000f0000,
761 	0x160c, 0xffffffff, 0x00000100,
762 	0x1024, 0xffffffff, 0x00000100,
763 	0x102c, 0x00000101, 0x00000000,
764 	0x20a8, 0xffffffff, 0x00000104,
765 	0x55e4, 0xff000fff, 0x00000100,
766 	0x55e8, 0x00000001, 0x00000001,
767 	0x2f50, 0x00000001, 0x00000001,
768 	0x30cc, 0xc0000fff, 0x00000104,
769 	0xc1e4, 0x00000001, 0x00000001,
770 	0xd0c0, 0xfffffff0, 0x00000100,
771 	0xd8c0, 0xfffffff0, 0x00000100
772 };
773 
774 static const u32 verde_mgcg_cgcg_init[] =
775 {
776 	0xc400, 0xffffffff, 0xfffffffc,
777 	0x802c, 0xffffffff, 0xe0000000,
778 	0x9a60, 0xffffffff, 0x00000100,
779 	0x92a4, 0xffffffff, 0x00000100,
780 	0xc164, 0xffffffff, 0x00000100,
781 	0x9774, 0xffffffff, 0x00000100,
782 	0x8984, 0xffffffff, 0x06000100,
783 	0x8a18, 0xffffffff, 0x00000100,
784 	0x92a0, 0xffffffff, 0x00000100,
785 	0xc380, 0xffffffff, 0x00000100,
786 	0x8b28, 0xffffffff, 0x00000100,
787 	0x9144, 0xffffffff, 0x00000100,
788 	0x8d88, 0xffffffff, 0x00000100,
789 	0x8d8c, 0xffffffff, 0x00000100,
790 	0x9030, 0xffffffff, 0x00000100,
791 	0x9034, 0xffffffff, 0x00000100,
792 	0x9038, 0xffffffff, 0x00000100,
793 	0x903c, 0xffffffff, 0x00000100,
794 	0xad80, 0xffffffff, 0x00000100,
795 	0xac54, 0xffffffff, 0x00000100,
796 	0x897c, 0xffffffff, 0x06000100,
797 	0x9868, 0xffffffff, 0x00000100,
798 	0x9510, 0xffffffff, 0x00000100,
799 	0xaf04, 0xffffffff, 0x00000100,
800 	0xae04, 0xffffffff, 0x00000100,
801 	0x949c, 0xffffffff, 0x00000100,
802 	0x802c, 0xffffffff, 0xe0000000,
803 	0x9160, 0xffffffff, 0x00010000,
804 	0x9164, 0xffffffff, 0x00030002,
805 	0x9168, 0xffffffff, 0x00040007,
806 	0x916c, 0xffffffff, 0x00060005,
807 	0x9170, 0xffffffff, 0x00090008,
808 	0x9174, 0xffffffff, 0x00020001,
809 	0x9178, 0xffffffff, 0x00040003,
810 	0x917c, 0xffffffff, 0x00000007,
811 	0x9180, 0xffffffff, 0x00060005,
812 	0x9184, 0xffffffff, 0x00090008,
813 	0x9188, 0xffffffff, 0x00030002,
814 	0x918c, 0xffffffff, 0x00050004,
815 	0x9190, 0xffffffff, 0x00000008,
816 	0x9194, 0xffffffff, 0x00070006,
817 	0x9198, 0xffffffff, 0x000a0009,
818 	0x919c, 0xffffffff, 0x00040003,
819 	0x91a0, 0xffffffff, 0x00060005,
820 	0x91a4, 0xffffffff, 0x00000009,
821 	0x91a8, 0xffffffff, 0x00080007,
822 	0x91ac, 0xffffffff, 0x000b000a,
823 	0x91b0, 0xffffffff, 0x00050004,
824 	0x91b4, 0xffffffff, 0x00070006,
825 	0x91b8, 0xffffffff, 0x0008000b,
826 	0x91bc, 0xffffffff, 0x000a0009,
827 	0x91c0, 0xffffffff, 0x000d000c,
828 	0x9200, 0xffffffff, 0x00090008,
829 	0x9204, 0xffffffff, 0x000b000a,
830 	0x9208, 0xffffffff, 0x000c000f,
831 	0x920c, 0xffffffff, 0x000e000d,
832 	0x9210, 0xffffffff, 0x00110010,
833 	0x9214, 0xffffffff, 0x000a0009,
834 	0x9218, 0xffffffff, 0x000c000b,
835 	0x921c, 0xffffffff, 0x0000000f,
836 	0x9220, 0xffffffff, 0x000e000d,
837 	0x9224, 0xffffffff, 0x00110010,
838 	0x9228, 0xffffffff, 0x000b000a,
839 	0x922c, 0xffffffff, 0x000d000c,
840 	0x9230, 0xffffffff, 0x00000010,
841 	0x9234, 0xffffffff, 0x000f000e,
842 	0x9238, 0xffffffff, 0x00120011,
843 	0x923c, 0xffffffff, 0x000c000b,
844 	0x9240, 0xffffffff, 0x000e000d,
845 	0x9244, 0xffffffff, 0x00000011,
846 	0x9248, 0xffffffff, 0x0010000f,
847 	0x924c, 0xffffffff, 0x00130012,
848 	0x9250, 0xffffffff, 0x000d000c,
849 	0x9254, 0xffffffff, 0x000f000e,
850 	0x9258, 0xffffffff, 0x00100013,
851 	0x925c, 0xffffffff, 0x00120011,
852 	0x9260, 0xffffffff, 0x00150014,
853 	0x9150, 0xffffffff, 0x96940200,
854 	0x8708, 0xffffffff, 0x00900100,
855 	0xc478, 0xffffffff, 0x00000080,
856 	0xc404, 0xffffffff, 0x0020003f,
857 	0x30, 0xffffffff, 0x0000001c,
858 	0x34, 0x000f0000, 0x000f0000,
859 	0x160c, 0xffffffff, 0x00000100,
860 	0x1024, 0xffffffff, 0x00000100,
861 	0x102c, 0x00000101, 0x00000000,
862 	0x20a8, 0xffffffff, 0x00000104,
863 	0x264c, 0x000c0000, 0x000c0000,
864 	0x2648, 0x000c0000, 0x000c0000,
865 	0x55e4, 0xff000fff, 0x00000100,
866 	0x55e8, 0x00000001, 0x00000001,
867 	0x2f50, 0x00000001, 0x00000001,
868 	0x30cc, 0xc0000fff, 0x00000104,
869 	0xc1e4, 0x00000001, 0x00000001,
870 	0xd0c0, 0xfffffff0, 0x00000100,
871 	0xd8c0, 0xfffffff0, 0x00000100
872 };
873 
874 static const u32 oland_mgcg_cgcg_init[] =
875 {
876 	0xc400, 0xffffffff, 0xfffffffc,
877 	0x802c, 0xffffffff, 0xe0000000,
878 	0x9a60, 0xffffffff, 0x00000100,
879 	0x92a4, 0xffffffff, 0x00000100,
880 	0xc164, 0xffffffff, 0x00000100,
881 	0x9774, 0xffffffff, 0x00000100,
882 	0x8984, 0xffffffff, 0x06000100,
883 	0x8a18, 0xffffffff, 0x00000100,
884 	0x92a0, 0xffffffff, 0x00000100,
885 	0xc380, 0xffffffff, 0x00000100,
886 	0x8b28, 0xffffffff, 0x00000100,
887 	0x9144, 0xffffffff, 0x00000100,
888 	0x8d88, 0xffffffff, 0x00000100,
889 	0x8d8c, 0xffffffff, 0x00000100,
890 	0x9030, 0xffffffff, 0x00000100,
891 	0x9034, 0xffffffff, 0x00000100,
892 	0x9038, 0xffffffff, 0x00000100,
893 	0x903c, 0xffffffff, 0x00000100,
894 	0xad80, 0xffffffff, 0x00000100,
895 	0xac54, 0xffffffff, 0x00000100,
896 	0x897c, 0xffffffff, 0x06000100,
897 	0x9868, 0xffffffff, 0x00000100,
898 	0x9510, 0xffffffff, 0x00000100,
899 	0xaf04, 0xffffffff, 0x00000100,
900 	0xae04, 0xffffffff, 0x00000100,
901 	0x949c, 0xffffffff, 0x00000100,
902 	0x802c, 0xffffffff, 0xe0000000,
903 	0x9160, 0xffffffff, 0x00010000,
904 	0x9164, 0xffffffff, 0x00030002,
905 	0x9168, 0xffffffff, 0x00040007,
906 	0x916c, 0xffffffff, 0x00060005,
907 	0x9170, 0xffffffff, 0x00090008,
908 	0x9174, 0xffffffff, 0x00020001,
909 	0x9178, 0xffffffff, 0x00040003,
910 	0x917c, 0xffffffff, 0x00000007,
911 	0x9180, 0xffffffff, 0x00060005,
912 	0x9184, 0xffffffff, 0x00090008,
913 	0x9188, 0xffffffff, 0x00030002,
914 	0x918c, 0xffffffff, 0x00050004,
915 	0x9190, 0xffffffff, 0x00000008,
916 	0x9194, 0xffffffff, 0x00070006,
917 	0x9198, 0xffffffff, 0x000a0009,
918 	0x919c, 0xffffffff, 0x00040003,
919 	0x91a0, 0xffffffff, 0x00060005,
920 	0x91a4, 0xffffffff, 0x00000009,
921 	0x91a8, 0xffffffff, 0x00080007,
922 	0x91ac, 0xffffffff, 0x000b000a,
923 	0x91b0, 0xffffffff, 0x00050004,
924 	0x91b4, 0xffffffff, 0x00070006,
925 	0x91b8, 0xffffffff, 0x0008000b,
926 	0x91bc, 0xffffffff, 0x000a0009,
927 	0x91c0, 0xffffffff, 0x000d000c,
928 	0x91c4, 0xffffffff, 0x00060005,
929 	0x91c8, 0xffffffff, 0x00080007,
930 	0x91cc, 0xffffffff, 0x0000000b,
931 	0x91d0, 0xffffffff, 0x000a0009,
932 	0x91d4, 0xffffffff, 0x000d000c,
933 	0x9150, 0xffffffff, 0x96940200,
934 	0x8708, 0xffffffff, 0x00900100,
935 	0xc478, 0xffffffff, 0x00000080,
936 	0xc404, 0xffffffff, 0x0020003f,
937 	0x30, 0xffffffff, 0x0000001c,
938 	0x34, 0x000f0000, 0x000f0000,
939 	0x160c, 0xffffffff, 0x00000100,
940 	0x1024, 0xffffffff, 0x00000100,
941 	0x102c, 0x00000101, 0x00000000,
942 	0x20a8, 0xffffffff, 0x00000104,
943 	0x264c, 0x000c0000, 0x000c0000,
944 	0x2648, 0x000c0000, 0x000c0000,
945 	0x55e4, 0xff000fff, 0x00000100,
946 	0x55e8, 0x00000001, 0x00000001,
947 	0x2f50, 0x00000001, 0x00000001,
948 	0x30cc, 0xc0000fff, 0x00000104,
949 	0xc1e4, 0x00000001, 0x00000001,
950 	0xd0c0, 0xfffffff0, 0x00000100,
951 	0xd8c0, 0xfffffff0, 0x00000100
952 };
953 
954 static const u32 hainan_mgcg_cgcg_init[] =
955 {
956 	0xc400, 0xffffffff, 0xfffffffc,
957 	0x802c, 0xffffffff, 0xe0000000,
958 	0x9a60, 0xffffffff, 0x00000100,
959 	0x92a4, 0xffffffff, 0x00000100,
960 	0xc164, 0xffffffff, 0x00000100,
961 	0x9774, 0xffffffff, 0x00000100,
962 	0x8984, 0xffffffff, 0x06000100,
963 	0x8a18, 0xffffffff, 0x00000100,
964 	0x92a0, 0xffffffff, 0x00000100,
965 	0xc380, 0xffffffff, 0x00000100,
966 	0x8b28, 0xffffffff, 0x00000100,
967 	0x9144, 0xffffffff, 0x00000100,
968 	0x8d88, 0xffffffff, 0x00000100,
969 	0x8d8c, 0xffffffff, 0x00000100,
970 	0x9030, 0xffffffff, 0x00000100,
971 	0x9034, 0xffffffff, 0x00000100,
972 	0x9038, 0xffffffff, 0x00000100,
973 	0x903c, 0xffffffff, 0x00000100,
974 	0xad80, 0xffffffff, 0x00000100,
975 	0xac54, 0xffffffff, 0x00000100,
976 	0x897c, 0xffffffff, 0x06000100,
977 	0x9868, 0xffffffff, 0x00000100,
978 	0x9510, 0xffffffff, 0x00000100,
979 	0xaf04, 0xffffffff, 0x00000100,
980 	0xae04, 0xffffffff, 0x00000100,
981 	0x949c, 0xffffffff, 0x00000100,
982 	0x802c, 0xffffffff, 0xe0000000,
983 	0x9160, 0xffffffff, 0x00010000,
984 	0x9164, 0xffffffff, 0x00030002,
985 	0x9168, 0xffffffff, 0x00040007,
986 	0x916c, 0xffffffff, 0x00060005,
987 	0x9170, 0xffffffff, 0x00090008,
988 	0x9174, 0xffffffff, 0x00020001,
989 	0x9178, 0xffffffff, 0x00040003,
990 	0x917c, 0xffffffff, 0x00000007,
991 	0x9180, 0xffffffff, 0x00060005,
992 	0x9184, 0xffffffff, 0x00090008,
993 	0x9188, 0xffffffff, 0x00030002,
994 	0x918c, 0xffffffff, 0x00050004,
995 	0x9190, 0xffffffff, 0x00000008,
996 	0x9194, 0xffffffff, 0x00070006,
997 	0x9198, 0xffffffff, 0x000a0009,
998 	0x919c, 0xffffffff, 0x00040003,
999 	0x91a0, 0xffffffff, 0x00060005,
1000 	0x91a4, 0xffffffff, 0x00000009,
1001 	0x91a8, 0xffffffff, 0x00080007,
1002 	0x91ac, 0xffffffff, 0x000b000a,
1003 	0x91b0, 0xffffffff, 0x00050004,
1004 	0x91b4, 0xffffffff, 0x00070006,
1005 	0x91b8, 0xffffffff, 0x0008000b,
1006 	0x91bc, 0xffffffff, 0x000a0009,
1007 	0x91c0, 0xffffffff, 0x000d000c,
1008 	0x91c4, 0xffffffff, 0x00060005,
1009 	0x91c8, 0xffffffff, 0x00080007,
1010 	0x91cc, 0xffffffff, 0x0000000b,
1011 	0x91d0, 0xffffffff, 0x000a0009,
1012 	0x91d4, 0xffffffff, 0x000d000c,
1013 	0x9150, 0xffffffff, 0x96940200,
1014 	0x8708, 0xffffffff, 0x00900100,
1015 	0xc478, 0xffffffff, 0x00000080,
1016 	0xc404, 0xffffffff, 0x0020003f,
1017 	0x30, 0xffffffff, 0x0000001c,
1018 	0x34, 0x000f0000, 0x000f0000,
1019 	0x160c, 0xffffffff, 0x00000100,
1020 	0x1024, 0xffffffff, 0x00000100,
1021 	0x20a8, 0xffffffff, 0x00000104,
1022 	0x264c, 0x000c0000, 0x000c0000,
1023 	0x2648, 0x000c0000, 0x000c0000,
1024 	0x2f50, 0x00000001, 0x00000001,
1025 	0x30cc, 0xc0000fff, 0x00000104,
1026 	0xc1e4, 0x00000001, 0x00000001,
1027 	0xd0c0, 0xfffffff0, 0x00000100,
1028 	0xd8c0, 0xfffffff0, 0x00000100
1029 };
1030 
1031 static u32 verde_pg_init[] =
1032 {
1033 	0x353c, 0xffffffff, 0x40000,
1034 	0x3538, 0xffffffff, 0x200010ff,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x7007,
1041 	0x3538, 0xffffffff, 0x300010ff,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x400000,
1048 	0x3538, 0xffffffff, 0x100010ff,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x120200,
1055 	0x3538, 0xffffffff, 0x500010ff,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x1e1e16,
1062 	0x3538, 0xffffffff, 0x600010ff,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x171f1e,
1069 	0x3538, 0xffffffff, 0x700010ff,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x3538, 0xffffffff, 0x9ff,
1077 	0x3500, 0xffffffff, 0x0,
1078 	0x3504, 0xffffffff, 0x10000800,
1079 	0x3504, 0xffffffff, 0xf,
1080 	0x3504, 0xffffffff, 0xf,
1081 	0x3500, 0xffffffff, 0x4,
1082 	0x3504, 0xffffffff, 0x1000051e,
1083 	0x3504, 0xffffffff, 0xffff,
1084 	0x3504, 0xffffffff, 0xffff,
1085 	0x3500, 0xffffffff, 0x8,
1086 	0x3504, 0xffffffff, 0x80500,
1087 	0x3500, 0xffffffff, 0x12,
1088 	0x3504, 0xffffffff, 0x9050c,
1089 	0x3500, 0xffffffff, 0x1d,
1090 	0x3504, 0xffffffff, 0xb052c,
1091 	0x3500, 0xffffffff, 0x2a,
1092 	0x3504, 0xffffffff, 0x1053e,
1093 	0x3500, 0xffffffff, 0x2d,
1094 	0x3504, 0xffffffff, 0x10546,
1095 	0x3500, 0xffffffff, 0x30,
1096 	0x3504, 0xffffffff, 0xa054e,
1097 	0x3500, 0xffffffff, 0x3c,
1098 	0x3504, 0xffffffff, 0x1055f,
1099 	0x3500, 0xffffffff, 0x3f,
1100 	0x3504, 0xffffffff, 0x10567,
1101 	0x3500, 0xffffffff, 0x42,
1102 	0x3504, 0xffffffff, 0x1056f,
1103 	0x3500, 0xffffffff, 0x45,
1104 	0x3504, 0xffffffff, 0x10572,
1105 	0x3500, 0xffffffff, 0x48,
1106 	0x3504, 0xffffffff, 0x20575,
1107 	0x3500, 0xffffffff, 0x4c,
1108 	0x3504, 0xffffffff, 0x190801,
1109 	0x3500, 0xffffffff, 0x67,
1110 	0x3504, 0xffffffff, 0x1082a,
1111 	0x3500, 0xffffffff, 0x6a,
1112 	0x3504, 0xffffffff, 0x1b082d,
1113 	0x3500, 0xffffffff, 0x87,
1114 	0x3504, 0xffffffff, 0x310851,
1115 	0x3500, 0xffffffff, 0xba,
1116 	0x3504, 0xffffffff, 0x891,
1117 	0x3500, 0xffffffff, 0xbc,
1118 	0x3504, 0xffffffff, 0x893,
1119 	0x3500, 0xffffffff, 0xbe,
1120 	0x3504, 0xffffffff, 0x20895,
1121 	0x3500, 0xffffffff, 0xc2,
1122 	0x3504, 0xffffffff, 0x20899,
1123 	0x3500, 0xffffffff, 0xc6,
1124 	0x3504, 0xffffffff, 0x2089d,
1125 	0x3500, 0xffffffff, 0xca,
1126 	0x3504, 0xffffffff, 0x8a1,
1127 	0x3500, 0xffffffff, 0xcc,
1128 	0x3504, 0xffffffff, 0x8a3,
1129 	0x3500, 0xffffffff, 0xce,
1130 	0x3504, 0xffffffff, 0x308a5,
1131 	0x3500, 0xffffffff, 0xd3,
1132 	0x3504, 0xffffffff, 0x6d08cd,
1133 	0x3500, 0xffffffff, 0x142,
1134 	0x3504, 0xffffffff, 0x2000095a,
1135 	0x3504, 0xffffffff, 0x1,
1136 	0x3500, 0xffffffff, 0x144,
1137 	0x3504, 0xffffffff, 0x301f095b,
1138 	0x3500, 0xffffffff, 0x165,
1139 	0x3504, 0xffffffff, 0xc094d,
1140 	0x3500, 0xffffffff, 0x173,
1141 	0x3504, 0xffffffff, 0xf096d,
1142 	0x3500, 0xffffffff, 0x184,
1143 	0x3504, 0xffffffff, 0x15097f,
1144 	0x3500, 0xffffffff, 0x19b,
1145 	0x3504, 0xffffffff, 0xc0998,
1146 	0x3500, 0xffffffff, 0x1a9,
1147 	0x3504, 0xffffffff, 0x409a7,
1148 	0x3500, 0xffffffff, 0x1af,
1149 	0x3504, 0xffffffff, 0xcdc,
1150 	0x3500, 0xffffffff, 0x1b1,
1151 	0x3504, 0xffffffff, 0x800,
1152 	0x3508, 0xffffffff, 0x6c9b2000,
1153 	0x3510, 0xfc00, 0x2000,
1154 	0x3544, 0xffffffff, 0xfc0,
1155 	0x28d4, 0x00000100, 0x100
1156 };
1157 
1158 static void si_init_golden_registers(struct radeon_device *rdev)
1159 {
1160 	switch (rdev->family) {
1161 	case CHIP_TAHITI:
1162 		radeon_program_register_sequence(rdev,
1163 						 tahiti_golden_registers,
1164 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1165 		radeon_program_register_sequence(rdev,
1166 						 tahiti_golden_rlc_registers,
1167 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1168 		radeon_program_register_sequence(rdev,
1169 						 tahiti_mgcg_cgcg_init,
1170 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1171 		radeon_program_register_sequence(rdev,
1172 						 tahiti_golden_registers2,
1173 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1174 		break;
1175 	case CHIP_PITCAIRN:
1176 		radeon_program_register_sequence(rdev,
1177 						 pitcairn_golden_registers,
1178 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1179 		radeon_program_register_sequence(rdev,
1180 						 pitcairn_golden_rlc_registers,
1181 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 pitcairn_mgcg_cgcg_init,
1184 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1185 		break;
1186 	case CHIP_VERDE:
1187 		radeon_program_register_sequence(rdev,
1188 						 verde_golden_registers,
1189 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1190 		radeon_program_register_sequence(rdev,
1191 						 verde_golden_rlc_registers,
1192 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1193 		radeon_program_register_sequence(rdev,
1194 						 verde_mgcg_cgcg_init,
1195 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1196 		radeon_program_register_sequence(rdev,
1197 						 verde_pg_init,
1198 						 (const u32)ARRAY_SIZE(verde_pg_init));
1199 		break;
1200 	case CHIP_OLAND:
1201 		radeon_program_register_sequence(rdev,
1202 						 oland_golden_registers,
1203 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 oland_golden_rlc_registers,
1206 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 oland_mgcg_cgcg_init,
1209 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1210 		break;
1211 	case CHIP_HAINAN:
1212 		radeon_program_register_sequence(rdev,
1213 						 hainan_golden_registers,
1214 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 hainan_golden_registers2,
1217 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1218 		radeon_program_register_sequence(rdev,
1219 						 hainan_mgcg_cgcg_init,
1220 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1221 		break;
1222 	default:
1223 		break;
1224 	}
1225 }
1226 
1227 #define PCIE_BUS_CLK                10000
1228 #define TCLK                        (PCIE_BUS_CLK / 10)
1229 
1230 /**
1231  * si_get_xclk - get the xclk
1232  *
1233  * @rdev: radeon_device pointer
1234  *
1235  * Returns the reference clock used by the gfx engine
1236  * (SI).
1237  */
1238 u32 si_get_xclk(struct radeon_device *rdev)
1239 {
1240         u32 reference_clock = rdev->clock.spll.reference_freq;
1241 	u32 tmp;
1242 
1243 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1244 	if (tmp & MUX_TCLK_TO_XCLK)
1245 		return TCLK;
1246 
1247 	tmp = RREG32(CG_CLKPIN_CNTL);
1248 	if (tmp & XTALIN_DIVIDE)
1249 		return reference_clock / 4;
1250 
1251 	return reference_clock;
1252 }
1253 
1254 /* get temperature in millidegrees */
1255 int si_get_temp(struct radeon_device *rdev)
1256 {
1257 	u32 temp;
1258 	int actual_temp = 0;
1259 
1260 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1261 		CTF_TEMP_SHIFT;
1262 
1263 	if (temp & 0x200)
1264 		actual_temp = 255;
1265 	else
1266 		actual_temp = temp & 0x1ff;
1267 
1268 	actual_temp = (actual_temp * 1000);
1269 
1270 	return actual_temp;
1271 }
1272 
1273 #define TAHITI_IO_MC_REGS_SIZE 36
1274 
1275 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1276 	{0x0000006f, 0x03044000},
1277 	{0x00000070, 0x0480c018},
1278 	{0x00000071, 0x00000040},
1279 	{0x00000072, 0x01000000},
1280 	{0x00000074, 0x000000ff},
1281 	{0x00000075, 0x00143400},
1282 	{0x00000076, 0x08ec0800},
1283 	{0x00000077, 0x040000cc},
1284 	{0x00000079, 0x00000000},
1285 	{0x0000007a, 0x21000409},
1286 	{0x0000007c, 0x00000000},
1287 	{0x0000007d, 0xe8000000},
1288 	{0x0000007e, 0x044408a8},
1289 	{0x0000007f, 0x00000003},
1290 	{0x00000080, 0x00000000},
1291 	{0x00000081, 0x01000000},
1292 	{0x00000082, 0x02000000},
1293 	{0x00000083, 0x00000000},
1294 	{0x00000084, 0xe3f3e4f4},
1295 	{0x00000085, 0x00052024},
1296 	{0x00000087, 0x00000000},
1297 	{0x00000088, 0x66036603},
1298 	{0x00000089, 0x01000000},
1299 	{0x0000008b, 0x1c0a0000},
1300 	{0x0000008c, 0xff010000},
1301 	{0x0000008e, 0xffffefff},
1302 	{0x0000008f, 0xfff3efff},
1303 	{0x00000090, 0xfff3efbf},
1304 	{0x00000094, 0x00101101},
1305 	{0x00000095, 0x00000fff},
1306 	{0x00000096, 0x00116fff},
1307 	{0x00000097, 0x60010000},
1308 	{0x00000098, 0x10010000},
1309 	{0x00000099, 0x00006000},
1310 	{0x0000009a, 0x00001000},
1311 	{0x0000009f, 0x00a77400}
1312 };
1313 
1314 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1315 	{0x0000006f, 0x03044000},
1316 	{0x00000070, 0x0480c018},
1317 	{0x00000071, 0x00000040},
1318 	{0x00000072, 0x01000000},
1319 	{0x00000074, 0x000000ff},
1320 	{0x00000075, 0x00143400},
1321 	{0x00000076, 0x08ec0800},
1322 	{0x00000077, 0x040000cc},
1323 	{0x00000079, 0x00000000},
1324 	{0x0000007a, 0x21000409},
1325 	{0x0000007c, 0x00000000},
1326 	{0x0000007d, 0xe8000000},
1327 	{0x0000007e, 0x044408a8},
1328 	{0x0000007f, 0x00000003},
1329 	{0x00000080, 0x00000000},
1330 	{0x00000081, 0x01000000},
1331 	{0x00000082, 0x02000000},
1332 	{0x00000083, 0x00000000},
1333 	{0x00000084, 0xe3f3e4f4},
1334 	{0x00000085, 0x00052024},
1335 	{0x00000087, 0x00000000},
1336 	{0x00000088, 0x66036603},
1337 	{0x00000089, 0x01000000},
1338 	{0x0000008b, 0x1c0a0000},
1339 	{0x0000008c, 0xff010000},
1340 	{0x0000008e, 0xffffefff},
1341 	{0x0000008f, 0xfff3efff},
1342 	{0x00000090, 0xfff3efbf},
1343 	{0x00000094, 0x00101101},
1344 	{0x00000095, 0x00000fff},
1345 	{0x00000096, 0x00116fff},
1346 	{0x00000097, 0x60010000},
1347 	{0x00000098, 0x10010000},
1348 	{0x00000099, 0x00006000},
1349 	{0x0000009a, 0x00001000},
1350 	{0x0000009f, 0x00a47400}
1351 };
1352 
1353 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1354 	{0x0000006f, 0x03044000},
1355 	{0x00000070, 0x0480c018},
1356 	{0x00000071, 0x00000040},
1357 	{0x00000072, 0x01000000},
1358 	{0x00000074, 0x000000ff},
1359 	{0x00000075, 0x00143400},
1360 	{0x00000076, 0x08ec0800},
1361 	{0x00000077, 0x040000cc},
1362 	{0x00000079, 0x00000000},
1363 	{0x0000007a, 0x21000409},
1364 	{0x0000007c, 0x00000000},
1365 	{0x0000007d, 0xe8000000},
1366 	{0x0000007e, 0x044408a8},
1367 	{0x0000007f, 0x00000003},
1368 	{0x00000080, 0x00000000},
1369 	{0x00000081, 0x01000000},
1370 	{0x00000082, 0x02000000},
1371 	{0x00000083, 0x00000000},
1372 	{0x00000084, 0xe3f3e4f4},
1373 	{0x00000085, 0x00052024},
1374 	{0x00000087, 0x00000000},
1375 	{0x00000088, 0x66036603},
1376 	{0x00000089, 0x01000000},
1377 	{0x0000008b, 0x1c0a0000},
1378 	{0x0000008c, 0xff010000},
1379 	{0x0000008e, 0xffffefff},
1380 	{0x0000008f, 0xfff3efff},
1381 	{0x00000090, 0xfff3efbf},
1382 	{0x00000094, 0x00101101},
1383 	{0x00000095, 0x00000fff},
1384 	{0x00000096, 0x00116fff},
1385 	{0x00000097, 0x60010000},
1386 	{0x00000098, 0x10010000},
1387 	{0x00000099, 0x00006000},
1388 	{0x0000009a, 0x00001000},
1389 	{0x0000009f, 0x00a37400}
1390 };
1391 
1392 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1393 	{0x0000006f, 0x03044000},
1394 	{0x00000070, 0x0480c018},
1395 	{0x00000071, 0x00000040},
1396 	{0x00000072, 0x01000000},
1397 	{0x00000074, 0x000000ff},
1398 	{0x00000075, 0x00143400},
1399 	{0x00000076, 0x08ec0800},
1400 	{0x00000077, 0x040000cc},
1401 	{0x00000079, 0x00000000},
1402 	{0x0000007a, 0x21000409},
1403 	{0x0000007c, 0x00000000},
1404 	{0x0000007d, 0xe8000000},
1405 	{0x0000007e, 0x044408a8},
1406 	{0x0000007f, 0x00000003},
1407 	{0x00000080, 0x00000000},
1408 	{0x00000081, 0x01000000},
1409 	{0x00000082, 0x02000000},
1410 	{0x00000083, 0x00000000},
1411 	{0x00000084, 0xe3f3e4f4},
1412 	{0x00000085, 0x00052024},
1413 	{0x00000087, 0x00000000},
1414 	{0x00000088, 0x66036603},
1415 	{0x00000089, 0x01000000},
1416 	{0x0000008b, 0x1c0a0000},
1417 	{0x0000008c, 0xff010000},
1418 	{0x0000008e, 0xffffefff},
1419 	{0x0000008f, 0xfff3efff},
1420 	{0x00000090, 0xfff3efbf},
1421 	{0x00000094, 0x00101101},
1422 	{0x00000095, 0x00000fff},
1423 	{0x00000096, 0x00116fff},
1424 	{0x00000097, 0x60010000},
1425 	{0x00000098, 0x10010000},
1426 	{0x00000099, 0x00006000},
1427 	{0x0000009a, 0x00001000},
1428 	{0x0000009f, 0x00a17730}
1429 };
1430 
1431 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1432 	{0x0000006f, 0x03044000},
1433 	{0x00000070, 0x0480c018},
1434 	{0x00000071, 0x00000040},
1435 	{0x00000072, 0x01000000},
1436 	{0x00000074, 0x000000ff},
1437 	{0x00000075, 0x00143400},
1438 	{0x00000076, 0x08ec0800},
1439 	{0x00000077, 0x040000cc},
1440 	{0x00000079, 0x00000000},
1441 	{0x0000007a, 0x21000409},
1442 	{0x0000007c, 0x00000000},
1443 	{0x0000007d, 0xe8000000},
1444 	{0x0000007e, 0x044408a8},
1445 	{0x0000007f, 0x00000003},
1446 	{0x00000080, 0x00000000},
1447 	{0x00000081, 0x01000000},
1448 	{0x00000082, 0x02000000},
1449 	{0x00000083, 0x00000000},
1450 	{0x00000084, 0xe3f3e4f4},
1451 	{0x00000085, 0x00052024},
1452 	{0x00000087, 0x00000000},
1453 	{0x00000088, 0x66036603},
1454 	{0x00000089, 0x01000000},
1455 	{0x0000008b, 0x1c0a0000},
1456 	{0x0000008c, 0xff010000},
1457 	{0x0000008e, 0xffffefff},
1458 	{0x0000008f, 0xfff3efff},
1459 	{0x00000090, 0xfff3efbf},
1460 	{0x00000094, 0x00101101},
1461 	{0x00000095, 0x00000fff},
1462 	{0x00000096, 0x00116fff},
1463 	{0x00000097, 0x60010000},
1464 	{0x00000098, 0x10010000},
1465 	{0x00000099, 0x00006000},
1466 	{0x0000009a, 0x00001000},
1467 	{0x0000009f, 0x00a07730}
1468 };
1469 
1470 /* ucode loading */
1471 int si_mc_load_microcode(struct radeon_device *rdev)
1472 {
1473 	const __be32 *fw_data;
1474 	u32 running, blackout = 0;
1475 	u32 *io_mc_regs;
1476 	int i, regs_size, ucode_size;
1477 
1478 	if (!rdev->mc_fw)
1479 		return -EINVAL;
1480 
1481 	ucode_size = rdev->mc_fw->size / 4;
1482 
1483 	switch (rdev->family) {
1484 	case CHIP_TAHITI:
1485 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1486 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1487 		break;
1488 	case CHIP_PITCAIRN:
1489 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1490 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1491 		break;
1492 	case CHIP_VERDE:
1493 	default:
1494 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1495 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1496 		break;
1497 	case CHIP_OLAND:
1498 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1499 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1500 		break;
1501 	case CHIP_HAINAN:
1502 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1503 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1504 		break;
1505 	}
1506 
1507 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1508 
1509 	if (running == 0) {
1510 		if (running) {
1511 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1512 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1513 		}
1514 
1515 		/* reset the engine and set to writable */
1516 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1517 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1518 
1519 		/* load mc io regs */
1520 		for (i = 0; i < regs_size; i++) {
1521 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1522 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1523 		}
1524 		/* load the MC ucode */
1525 		fw_data = (const __be32 *)rdev->mc_fw->data;
1526 		for (i = 0; i < ucode_size; i++)
1527 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1528 
1529 		/* put the engine back into the active state */
1530 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1532 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1533 
1534 		/* wait for training to complete */
1535 		for (i = 0; i < rdev->usec_timeout; i++) {
1536 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1537 				break;
1538 			udelay(1);
1539 		}
1540 		for (i = 0; i < rdev->usec_timeout; i++) {
1541 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1542 				break;
1543 			udelay(1);
1544 		}
1545 
1546 		if (running)
1547 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1548 	}
1549 
1550 	return 0;
1551 }
1552 
1553 static int si_init_microcode(struct radeon_device *rdev)
1554 {
1555 	const char *chip_name;
1556 	const char *rlc_chip_name;
1557 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1558 	size_t smc_req_size, mc2_req_size;
1559 	char fw_name[30];
1560 	int err;
1561 
1562 	DRM_DEBUG("\n");
1563 
1564 	switch (rdev->family) {
1565 	case CHIP_TAHITI:
1566 		chip_name = "TAHITI";
1567 		rlc_chip_name = "TAHITI";
1568 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1570 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1571 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1573 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1574 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1575 		break;
1576 	case CHIP_PITCAIRN:
1577 		chip_name = "PITCAIRN";
1578 		rlc_chip_name = "PITCAIRN";
1579 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1580 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1581 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1582 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1583 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1584 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1585 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1586 		break;
1587 	case CHIP_VERDE:
1588 		chip_name = "VERDE";
1589 		rlc_chip_name = "VERDE";
1590 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1591 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1592 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1593 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1594 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1595 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1596 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1597 		break;
1598 	case CHIP_OLAND:
1599 		chip_name = "OLAND";
1600 		rlc_chip_name = "OLAND";
1601 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1602 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1603 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1604 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1605 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1606 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1607 		break;
1608 	case CHIP_HAINAN:
1609 		chip_name = "HAINAN";
1610 		rlc_chip_name = "HAINAN";
1611 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1612 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1613 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1614 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1615 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1616 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1617 		break;
1618 	default: BUG();
1619 	}
1620 
1621 	DRM_INFO("Loading %s Microcode\n", chip_name);
1622 
1623 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1624 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1625 	if (err)
1626 		goto out;
1627 	if (rdev->pfp_fw->size != pfp_req_size) {
1628 		printk(KERN_ERR
1629 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1630 		       rdev->pfp_fw->size, fw_name);
1631 		err = -EINVAL;
1632 		goto out;
1633 	}
1634 
1635 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1636 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1637 	if (err)
1638 		goto out;
1639 	if (rdev->me_fw->size != me_req_size) {
1640 		printk(KERN_ERR
1641 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1642 		       rdev->me_fw->size, fw_name);
1643 		err = -EINVAL;
1644 	}
1645 
1646 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1647 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1648 	if (err)
1649 		goto out;
1650 	if (rdev->ce_fw->size != ce_req_size) {
1651 		printk(KERN_ERR
1652 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1653 		       rdev->ce_fw->size, fw_name);
1654 		err = -EINVAL;
1655 	}
1656 
1657 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1658 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1659 	if (err)
1660 		goto out;
1661 	if (rdev->rlc_fw->size != rlc_req_size) {
1662 		printk(KERN_ERR
1663 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1664 		       rdev->rlc_fw->size, fw_name);
1665 		err = -EINVAL;
1666 	}
1667 
1668 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1669 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1670 	if (err) {
1671 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1672 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1673 		if (err)
1674 			goto out;
1675 	}
1676 	if ((rdev->mc_fw->size != mc_req_size) &&
1677 	    (rdev->mc_fw->size != mc2_req_size)) {
1678 		printk(KERN_ERR
1679 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1680 		       rdev->mc_fw->size, fw_name);
1681 		err = -EINVAL;
1682 	}
1683 	DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1684 
1685 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1686 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1687 	if (err) {
1688 		printk(KERN_ERR
1689 		       "smc: error loading firmware \"%s\"\n",
1690 		       fw_name);
1691 		release_firmware(rdev->smc_fw);
1692 		rdev->smc_fw = NULL;
1693 		err = 0;
1694 	} else if (rdev->smc_fw->size != smc_req_size) {
1695 		printk(KERN_ERR
1696 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1697 		       rdev->smc_fw->size, fw_name);
1698 		err = -EINVAL;
1699 	}
1700 
1701 out:
1702 	if (err) {
1703 		if (err != -EINVAL)
1704 			printk(KERN_ERR
1705 			       "si_cp: Failed to load firmware \"%s\"\n",
1706 			       fw_name);
1707 		release_firmware(rdev->pfp_fw);
1708 		rdev->pfp_fw = NULL;
1709 		release_firmware(rdev->me_fw);
1710 		rdev->me_fw = NULL;
1711 		release_firmware(rdev->ce_fw);
1712 		rdev->ce_fw = NULL;
1713 		release_firmware(rdev->rlc_fw);
1714 		rdev->rlc_fw = NULL;
1715 		release_firmware(rdev->mc_fw);
1716 		rdev->mc_fw = NULL;
1717 		release_firmware(rdev->smc_fw);
1718 		rdev->smc_fw = NULL;
1719 	}
1720 	return err;
1721 }
1722 
1723 /* watermark setup */
1724 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1725 				   struct radeon_crtc *radeon_crtc,
1726 				   struct drm_display_mode *mode,
1727 				   struct drm_display_mode *other_mode)
1728 {
1729 	u32 tmp, buffer_alloc, i;
1730 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1731 	/*
1732 	 * Line Buffer Setup
1733 	 * There are 3 line buffers, each one shared by 2 display controllers.
1734 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1735 	 * the display controllers.  The paritioning is done via one of four
1736 	 * preset allocations specified in bits 21:20:
1737 	 *  0 - half lb
1738 	 *  2 - whole lb, other crtc must be disabled
1739 	 */
1740 	/* this can get tricky if we have two large displays on a paired group
1741 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1742 	 * non-linked crtcs for maximum line buffer allocation.
1743 	 */
1744 	if (radeon_crtc->base.enabled && mode) {
1745 		if (other_mode) {
1746 			tmp = 0; /* 1/2 */
1747 			buffer_alloc = 1;
1748 		} else {
1749 			tmp = 2; /* whole */
1750 			buffer_alloc = 2;
1751 		}
1752 	} else {
1753 		tmp = 0;
1754 		buffer_alloc = 0;
1755 	}
1756 
1757 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1758 	       DC_LB_MEMORY_CONFIG(tmp));
1759 
1760 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1761 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1762 	for (i = 0; i < rdev->usec_timeout; i++) {
1763 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1764 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1765 			break;
1766 		udelay(1);
1767 	}
1768 
1769 	if (radeon_crtc->base.enabled && mode) {
1770 		switch (tmp) {
1771 		case 0:
1772 		default:
1773 			return 4096 * 2;
1774 		case 2:
1775 			return 8192 * 2;
1776 		}
1777 	}
1778 
1779 	/* controller not enabled, so no lb used */
1780 	return 0;
1781 }
1782 
1783 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1784 {
1785 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1786 
1787 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1788 	case 0:
1789 	default:
1790 		return 1;
1791 	case 1:
1792 		return 2;
1793 	case 2:
1794 		return 4;
1795 	case 3:
1796 		return 8;
1797 	case 4:
1798 		return 3;
1799 	case 5:
1800 		return 6;
1801 	case 6:
1802 		return 10;
1803 	case 7:
1804 		return 12;
1805 	case 8:
1806 		return 16;
1807 	}
1808 }
1809 
1810 struct dce6_wm_params {
1811 	u32 dram_channels; /* number of dram channels */
1812 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1813 	u32 sclk;          /* engine clock in kHz */
1814 	u32 disp_clk;      /* display clock in kHz */
1815 	u32 src_width;     /* viewport width */
1816 	u32 active_time;   /* active display time in ns */
1817 	u32 blank_time;    /* blank time in ns */
1818 	bool interlaced;    /* mode is interlaced */
1819 	fixed20_12 vsc;    /* vertical scale ratio */
1820 	u32 num_heads;     /* number of active crtcs */
1821 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1822 	u32 lb_size;       /* line buffer allocated to pipe */
1823 	u32 vtaps;         /* vertical scaler taps */
1824 };
1825 
1826 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1827 {
1828 	/* Calculate raw DRAM Bandwidth */
1829 	fixed20_12 dram_efficiency; /* 0.7 */
1830 	fixed20_12 yclk, dram_channels, bandwidth;
1831 	fixed20_12 a;
1832 
1833 	a.full = dfixed_const(1000);
1834 	yclk.full = dfixed_const(wm->yclk);
1835 	yclk.full = dfixed_div(yclk, a);
1836 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1837 	a.full = dfixed_const(10);
1838 	dram_efficiency.full = dfixed_const(7);
1839 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1840 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1841 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1842 
1843 	return dfixed_trunc(bandwidth);
1844 }
1845 
1846 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1847 {
1848 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1849 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1850 	fixed20_12 yclk, dram_channels, bandwidth;
1851 	fixed20_12 a;
1852 
1853 	a.full = dfixed_const(1000);
1854 	yclk.full = dfixed_const(wm->yclk);
1855 	yclk.full = dfixed_div(yclk, a);
1856 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1857 	a.full = dfixed_const(10);
1858 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1859 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1860 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1861 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1862 
1863 	return dfixed_trunc(bandwidth);
1864 }
1865 
1866 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1867 {
1868 	/* Calculate the display Data return Bandwidth */
1869 	fixed20_12 return_efficiency; /* 0.8 */
1870 	fixed20_12 sclk, bandwidth;
1871 	fixed20_12 a;
1872 
1873 	a.full = dfixed_const(1000);
1874 	sclk.full = dfixed_const(wm->sclk);
1875 	sclk.full = dfixed_div(sclk, a);
1876 	a.full = dfixed_const(10);
1877 	return_efficiency.full = dfixed_const(8);
1878 	return_efficiency.full = dfixed_div(return_efficiency, a);
1879 	a.full = dfixed_const(32);
1880 	bandwidth.full = dfixed_mul(a, sclk);
1881 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1882 
1883 	return dfixed_trunc(bandwidth);
1884 }
1885 
1886 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1887 {
1888 	return 32;
1889 }
1890 
1891 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1892 {
1893 	/* Calculate the DMIF Request Bandwidth */
1894 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1895 	fixed20_12 disp_clk, sclk, bandwidth;
1896 	fixed20_12 a, b1, b2;
1897 	u32 min_bandwidth;
1898 
1899 	a.full = dfixed_const(1000);
1900 	disp_clk.full = dfixed_const(wm->disp_clk);
1901 	disp_clk.full = dfixed_div(disp_clk, a);
1902 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1903 	b1.full = dfixed_mul(a, disp_clk);
1904 
1905 	a.full = dfixed_const(1000);
1906 	sclk.full = dfixed_const(wm->sclk);
1907 	sclk.full = dfixed_div(sclk, a);
1908 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1909 	b2.full = dfixed_mul(a, sclk);
1910 
1911 	a.full = dfixed_const(10);
1912 	disp_clk_request_efficiency.full = dfixed_const(8);
1913 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1914 
1915 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1916 
1917 	a.full = dfixed_const(min_bandwidth);
1918 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1919 
1920 	return dfixed_trunc(bandwidth);
1921 }
1922 
1923 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1924 {
1925 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1926 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1927 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1928 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1929 
1930 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1931 }
1932 
1933 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1934 {
1935 	/* Calculate the display mode Average Bandwidth
1936 	 * DisplayMode should contain the source and destination dimensions,
1937 	 * timing, etc.
1938 	 */
1939 	fixed20_12 bpp;
1940 	fixed20_12 line_time;
1941 	fixed20_12 src_width;
1942 	fixed20_12 bandwidth;
1943 	fixed20_12 a;
1944 
1945 	a.full = dfixed_const(1000);
1946 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1947 	line_time.full = dfixed_div(line_time, a);
1948 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1949 	src_width.full = dfixed_const(wm->src_width);
1950 	bandwidth.full = dfixed_mul(src_width, bpp);
1951 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1952 	bandwidth.full = dfixed_div(bandwidth, line_time);
1953 
1954 	return dfixed_trunc(bandwidth);
1955 }
1956 
1957 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1958 {
1959 	/* First calcualte the latency in ns */
1960 	u32 mc_latency = 2000; /* 2000 ns. */
1961 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1962 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1963 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1964 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1965 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1966 		(wm->num_heads * cursor_line_pair_return_time);
1967 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1968 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1969 	u32 tmp, dmif_size = 12288;
1970 	fixed20_12 a, b, c;
1971 
1972 	if (wm->num_heads == 0)
1973 		return 0;
1974 
1975 	a.full = dfixed_const(2);
1976 	b.full = dfixed_const(1);
1977 	if ((wm->vsc.full > a.full) ||
1978 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1979 	    (wm->vtaps >= 5) ||
1980 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1981 		max_src_lines_per_dst_line = 4;
1982 	else
1983 		max_src_lines_per_dst_line = 2;
1984 
1985 	a.full = dfixed_const(available_bandwidth);
1986 	b.full = dfixed_const(wm->num_heads);
1987 	a.full = dfixed_div(a, b);
1988 
1989 	b.full = dfixed_const(mc_latency + 512);
1990 	c.full = dfixed_const(wm->disp_clk);
1991 	b.full = dfixed_div(b, c);
1992 
1993 	c.full = dfixed_const(dmif_size);
1994 	b.full = dfixed_div(c, b);
1995 
1996 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1997 
1998 	b.full = dfixed_const(1000);
1999 	c.full = dfixed_const(wm->disp_clk);
2000 	b.full = dfixed_div(c, b);
2001 	c.full = dfixed_const(wm->bytes_per_pixel);
2002 	b.full = dfixed_mul(b, c);
2003 
2004 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2005 
2006 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2007 	b.full = dfixed_const(1000);
2008 	c.full = dfixed_const(lb_fill_bw);
2009 	b.full = dfixed_div(c, b);
2010 	a.full = dfixed_div(a, b);
2011 	line_fill_time = dfixed_trunc(a);
2012 
2013 	if (line_fill_time < wm->active_time)
2014 		return latency;
2015 	else
2016 		return latency + (line_fill_time - wm->active_time);
2017 
2018 }
2019 
2020 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2021 {
2022 	if (dce6_average_bandwidth(wm) <=
2023 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2024 		return true;
2025 	else
2026 		return false;
2027 };
2028 
2029 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2030 {
2031 	if (dce6_average_bandwidth(wm) <=
2032 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2033 		return true;
2034 	else
2035 		return false;
2036 };
2037 
2038 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2039 {
2040 	u32 lb_partitions = wm->lb_size / wm->src_width;
2041 	u32 line_time = wm->active_time + wm->blank_time;
2042 	u32 latency_tolerant_lines;
2043 	u32 latency_hiding;
2044 	fixed20_12 a;
2045 
2046 	a.full = dfixed_const(1);
2047 	if (wm->vsc.full > a.full)
2048 		latency_tolerant_lines = 1;
2049 	else {
2050 		if (lb_partitions <= (wm->vtaps + 1))
2051 			latency_tolerant_lines = 1;
2052 		else
2053 			latency_tolerant_lines = 2;
2054 	}
2055 
2056 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2057 
2058 	if (dce6_latency_watermark(wm) <= latency_hiding)
2059 		return true;
2060 	else
2061 		return false;
2062 }
2063 
2064 static void dce6_program_watermarks(struct radeon_device *rdev,
2065 					 struct radeon_crtc *radeon_crtc,
2066 					 u32 lb_size, u32 num_heads)
2067 {
2068 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2069 	struct dce6_wm_params wm_low, wm_high;
2070 	u32 dram_channels;
2071 	u32 pixel_period;
2072 	u32 line_time = 0;
2073 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2074 	u32 priority_a_mark = 0, priority_b_mark = 0;
2075 	u32 priority_a_cnt = PRIORITY_OFF;
2076 	u32 priority_b_cnt = PRIORITY_OFF;
2077 	u32 tmp, arb_control3;
2078 	fixed20_12 a, b, c;
2079 
2080 	if (radeon_crtc->base.enabled && num_heads && mode) {
2081 		pixel_period = 1000000 / (u32)mode->clock;
2082 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2083 		priority_a_cnt = 0;
2084 		priority_b_cnt = 0;
2085 
2086 		if (rdev->family == CHIP_ARUBA)
2087 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2088 		else
2089 			dram_channels = si_get_number_of_dram_channels(rdev);
2090 
2091 		/* watermark for high clocks */
2092 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2093 			wm_high.yclk =
2094 				radeon_dpm_get_mclk(rdev, false) * 10;
2095 			wm_high.sclk =
2096 				radeon_dpm_get_sclk(rdev, false) * 10;
2097 		} else {
2098 			wm_high.yclk = rdev->pm.current_mclk * 10;
2099 			wm_high.sclk = rdev->pm.current_sclk * 10;
2100 		}
2101 
2102 		wm_high.disp_clk = mode->clock;
2103 		wm_high.src_width = mode->crtc_hdisplay;
2104 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2105 		wm_high.blank_time = line_time - wm_high.active_time;
2106 		wm_high.interlaced = false;
2107 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2108 			wm_high.interlaced = true;
2109 		wm_high.vsc = radeon_crtc->vsc;
2110 		wm_high.vtaps = 1;
2111 		if (radeon_crtc->rmx_type != RMX_OFF)
2112 			wm_high.vtaps = 2;
2113 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2114 		wm_high.lb_size = lb_size;
2115 		wm_high.dram_channels = dram_channels;
2116 		wm_high.num_heads = num_heads;
2117 
2118 		/* watermark for low clocks */
2119 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2120 			wm_low.yclk =
2121 				radeon_dpm_get_mclk(rdev, true) * 10;
2122 			wm_low.sclk =
2123 				radeon_dpm_get_sclk(rdev, true) * 10;
2124 		} else {
2125 			wm_low.yclk = rdev->pm.current_mclk * 10;
2126 			wm_low.sclk = rdev->pm.current_sclk * 10;
2127 		}
2128 
2129 		wm_low.disp_clk = mode->clock;
2130 		wm_low.src_width = mode->crtc_hdisplay;
2131 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2132 		wm_low.blank_time = line_time - wm_low.active_time;
2133 		wm_low.interlaced = false;
2134 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2135 			wm_low.interlaced = true;
2136 		wm_low.vsc = radeon_crtc->vsc;
2137 		wm_low.vtaps = 1;
2138 		if (radeon_crtc->rmx_type != RMX_OFF)
2139 			wm_low.vtaps = 2;
2140 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2141 		wm_low.lb_size = lb_size;
2142 		wm_low.dram_channels = dram_channels;
2143 		wm_low.num_heads = num_heads;
2144 
2145 		/* set for high clocks */
2146 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2147 		/* set for low clocks */
2148 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2149 
2150 		/* possibly force display priority to high */
2151 		/* should really do this at mode validation time... */
2152 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2153 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2154 		    !dce6_check_latency_hiding(&wm_high) ||
2155 		    (rdev->disp_priority == 2)) {
2156 			DRM_DEBUG_KMS("force priority to high\n");
2157 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2158 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2159 		}
2160 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2161 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2162 		    !dce6_check_latency_hiding(&wm_low) ||
2163 		    (rdev->disp_priority == 2)) {
2164 			DRM_DEBUG_KMS("force priority to high\n");
2165 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2166 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2167 		}
2168 
2169 		a.full = dfixed_const(1000);
2170 		b.full = dfixed_const(mode->clock);
2171 		b.full = dfixed_div(b, a);
2172 		c.full = dfixed_const(latency_watermark_a);
2173 		c.full = dfixed_mul(c, b);
2174 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2175 		c.full = dfixed_div(c, a);
2176 		a.full = dfixed_const(16);
2177 		c.full = dfixed_div(c, a);
2178 		priority_a_mark = dfixed_trunc(c);
2179 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2180 
2181 		a.full = dfixed_const(1000);
2182 		b.full = dfixed_const(mode->clock);
2183 		b.full = dfixed_div(b, a);
2184 		c.full = dfixed_const(latency_watermark_b);
2185 		c.full = dfixed_mul(c, b);
2186 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2187 		c.full = dfixed_div(c, a);
2188 		a.full = dfixed_const(16);
2189 		c.full = dfixed_div(c, a);
2190 		priority_b_mark = dfixed_trunc(c);
2191 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2192 	}
2193 
2194 	/* select wm A */
2195 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2196 	tmp = arb_control3;
2197 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2198 	tmp |= LATENCY_WATERMARK_MASK(1);
2199 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2200 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2201 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2202 		LATENCY_HIGH_WATERMARK(line_time)));
2203 	/* select wm B */
2204 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2205 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2206 	tmp |= LATENCY_WATERMARK_MASK(2);
2207 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2208 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2209 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2210 		LATENCY_HIGH_WATERMARK(line_time)));
2211 	/* restore original selection */
2212 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2213 
2214 	/* write the priority marks */
2215 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2216 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2217 
2218 	/* save values for DPM */
2219 	radeon_crtc->line_time = line_time;
2220 	radeon_crtc->wm_high = latency_watermark_a;
2221 	radeon_crtc->wm_low = latency_watermark_b;
2222 }
2223 
2224 void dce6_bandwidth_update(struct radeon_device *rdev)
2225 {
2226 	struct drm_display_mode *mode0 = NULL;
2227 	struct drm_display_mode *mode1 = NULL;
2228 	u32 num_heads = 0, lb_size;
2229 	int i;
2230 
2231 	radeon_update_display_priority(rdev);
2232 
2233 	for (i = 0; i < rdev->num_crtc; i++) {
2234 		if (rdev->mode_info.crtcs[i]->base.enabled)
2235 			num_heads++;
2236 	}
2237 	for (i = 0; i < rdev->num_crtc; i += 2) {
2238 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2239 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2240 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2241 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2242 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2243 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2244 	}
2245 }
2246 
2247 /*
2248  * Core functions
2249  */
2250 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2251 {
2252 	const u32 num_tile_mode_states = 32;
2253 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2254 
2255 	switch (rdev->config.si.mem_row_size_in_kb) {
2256 	case 1:
2257 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2258 		break;
2259 	case 2:
2260 	default:
2261 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2262 		break;
2263 	case 4:
2264 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2265 		break;
2266 	}
2267 
2268 	if ((rdev->family == CHIP_TAHITI) ||
2269 	    (rdev->family == CHIP_PITCAIRN)) {
2270 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2271 			switch (reg_offset) {
2272 			case 0:  /* non-AA compressed depth or any compressed stencil */
2273 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2275 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2276 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2277 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2278 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2281 				break;
2282 			case 1:  /* 2xAA/4xAA compressed depth only */
2283 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2285 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2286 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2287 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2288 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2291 				break;
2292 			case 2:  /* 8xAA compressed depth only */
2293 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2295 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2296 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2297 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2298 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2301 				break;
2302 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2303 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2305 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2306 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2307 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2308 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2310 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2311 				break;
2312 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2313 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2315 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2316 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2317 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2318 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2321 				break;
2322 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2323 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326 						 TILE_SPLIT(split_equal_to_row_size) |
2327 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2328 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2331 				break;
2332 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336 						 TILE_SPLIT(split_equal_to_row_size) |
2337 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2338 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2341 				break;
2342 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2346 						 TILE_SPLIT(split_equal_to_row_size) |
2347 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2348 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2351 				break;
2352 			case 8:  /* 1D and 1D Array Surfaces */
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2356 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2357 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2358 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2361 				break;
2362 			case 9:  /* Displayable maps. */
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2367 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2368 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2371 				break;
2372 			case 10:  /* Display 8bpp. */
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2376 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2377 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2378 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2380 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2381 				break;
2382 			case 11:  /* Display 16bpp. */
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2386 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2387 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2388 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2390 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2391 				break;
2392 			case 12:  /* Display 32bpp. */
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2396 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2397 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2398 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2400 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2401 				break;
2402 			case 13:  /* Thin. */
2403 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2405 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2406 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2408 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2410 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2411 				break;
2412 			case 14:  /* Thin 8 bpp. */
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2415 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2416 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2417 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2418 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2420 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2421 				break;
2422 			case 15:  /* Thin 16 bpp. */
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2425 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2426 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2428 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2431 				break;
2432 			case 16:  /* Thin 32 bpp. */
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2438 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2441 				break;
2442 			case 17:  /* Thin 64 bpp. */
2443 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2445 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446 						 TILE_SPLIT(split_equal_to_row_size) |
2447 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2448 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2451 				break;
2452 			case 21:  /* 8 bpp PRT. */
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2458 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461 				break;
2462 			case 22:  /* 16 bpp PRT */
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2467 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2468 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2471 				break;
2472 			case 23:  /* 32 bpp PRT */
2473 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2474 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2475 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2477 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2478 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481 				break;
2482 			case 24:  /* 64 bpp PRT */
2483 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2485 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2488 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491 				break;
2492 			case 25:  /* 128 bpp PRT */
2493 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2495 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2497 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2498 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501 				break;
2502 			default:
2503 				gb_tile_moden = 0;
2504 				break;
2505 			}
2506 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2507 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2508 		}
2509 	} else if ((rdev->family == CHIP_VERDE) ||
2510 		   (rdev->family == CHIP_OLAND) ||
2511 		   (rdev->family == CHIP_HAINAN)) {
2512 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2513 			switch (reg_offset) {
2514 			case 0:  /* non-AA compressed depth or any compressed stencil */
2515 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2518 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2520 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2523 				break;
2524 			case 1:  /* 2xAA/4xAA compressed depth only */
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2530 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2533 				break;
2534 			case 2:  /* 8xAA compressed depth only */
2535 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2538 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2539 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2540 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2542 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2543 				break;
2544 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2549 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2550 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2553 				break;
2554 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2559 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2560 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 				break;
2564 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2565 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568 						 TILE_SPLIT(split_equal_to_row_size) |
2569 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2570 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573 				break;
2574 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2575 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2577 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578 						 TILE_SPLIT(split_equal_to_row_size) |
2579 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2580 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583 				break;
2584 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2587 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588 						 TILE_SPLIT(split_equal_to_row_size) |
2589 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2590 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2593 				break;
2594 			case 8:  /* 1D and 1D Array Surfaces */
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2596 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2599 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2600 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603 				break;
2604 			case 9:  /* Displayable maps. */
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2609 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2610 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613 				break;
2614 			case 10:  /* Display 8bpp. */
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2620 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2623 				break;
2624 			case 11:  /* Display 16bpp. */
2625 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2629 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2630 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2632 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633 				break;
2634 			case 12:  /* Display 32bpp. */
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2639 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2640 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643 				break;
2644 			case 13:  /* Thin. */
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2646 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2649 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2650 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653 				break;
2654 			case 14:  /* Thin 8 bpp. */
2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2660 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663 				break;
2664 			case 15:  /* Thin 16 bpp. */
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2670 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2672 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673 				break;
2674 			case 16:  /* Thin 32 bpp. */
2675 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2679 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2680 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2683 				break;
2684 			case 17:  /* Thin 64 bpp. */
2685 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688 						 TILE_SPLIT(split_equal_to_row_size) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2690 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693 				break;
2694 			case 21:  /* 8 bpp PRT. */
2695 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2700 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2701 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703 				break;
2704 			case 22:  /* 16 bpp PRT */
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2710 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713 				break;
2714 			case 23:  /* 32 bpp PRT */
2715 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2717 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2718 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2720 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723 				break;
2724 			case 24:  /* 64 bpp PRT */
2725 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2727 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2728 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2730 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733 				break;
2734 			case 25:  /* 128 bpp PRT */
2735 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2737 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2738 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2739 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2740 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2743 				break;
2744 			default:
2745 				gb_tile_moden = 0;
2746 				break;
2747 			}
2748 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2749 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2750 		}
2751 	} else
2752 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2753 }
2754 
2755 static void si_select_se_sh(struct radeon_device *rdev,
2756 			    u32 se_num, u32 sh_num)
2757 {
2758 	u32 data = INSTANCE_BROADCAST_WRITES;
2759 
2760 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2761 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2762 	else if (se_num == 0xffffffff)
2763 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2764 	else if (sh_num == 0xffffffff)
2765 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2766 	else
2767 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2768 	WREG32(GRBM_GFX_INDEX, data);
2769 }
2770 
2771 static u32 si_create_bitmask(u32 bit_width)
2772 {
2773 	u32 i, mask = 0;
2774 
2775 	for (i = 0; i < bit_width; i++) {
2776 		mask <<= 1;
2777 		mask |= 1;
2778 	}
2779 	return mask;
2780 }
2781 
2782 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2783 {
2784 	u32 data, mask;
2785 
2786 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2787 	if (data & 1)
2788 		data &= INACTIVE_CUS_MASK;
2789 	else
2790 		data = 0;
2791 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2792 
2793 	data >>= INACTIVE_CUS_SHIFT;
2794 
2795 	mask = si_create_bitmask(cu_per_sh);
2796 
2797 	return ~data & mask;
2798 }
2799 
2800 static void si_setup_spi(struct radeon_device *rdev,
2801 			 u32 se_num, u32 sh_per_se,
2802 			 u32 cu_per_sh)
2803 {
2804 	int i, j, k;
2805 	u32 data, mask, active_cu;
2806 
2807 	for (i = 0; i < se_num; i++) {
2808 		for (j = 0; j < sh_per_se; j++) {
2809 			si_select_se_sh(rdev, i, j);
2810 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2811 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2812 
2813 			mask = 1;
2814 			for (k = 0; k < 16; k++) {
2815 				mask <<= k;
2816 				if (active_cu & mask) {
2817 					data &= ~mask;
2818 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2819 					break;
2820 				}
2821 			}
2822 		}
2823 	}
2824 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2825 }
2826 
2827 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2828 			      u32 max_rb_num_per_se,
2829 			      u32 sh_per_se)
2830 {
2831 	u32 data, mask;
2832 
2833 	data = RREG32(CC_RB_BACKEND_DISABLE);
2834 	if (data & 1)
2835 		data &= BACKEND_DISABLE_MASK;
2836 	else
2837 		data = 0;
2838 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2839 
2840 	data >>= BACKEND_DISABLE_SHIFT;
2841 
2842 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2843 
2844 	return data & mask;
2845 }
2846 
2847 static void si_setup_rb(struct radeon_device *rdev,
2848 			u32 se_num, u32 sh_per_se,
2849 			u32 max_rb_num_per_se)
2850 {
2851 	int i, j;
2852 	u32 data, mask;
2853 	u32 disabled_rbs = 0;
2854 	u32 enabled_rbs = 0;
2855 
2856 	for (i = 0; i < se_num; i++) {
2857 		for (j = 0; j < sh_per_se; j++) {
2858 			si_select_se_sh(rdev, i, j);
2859 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2860 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2861 		}
2862 	}
2863 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2864 
2865 	mask = 1;
2866 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2867 		if (!(disabled_rbs & mask))
2868 			enabled_rbs |= mask;
2869 		mask <<= 1;
2870 	}
2871 
2872 	rdev->config.si.backend_enable_mask = enabled_rbs;
2873 
2874 	for (i = 0; i < se_num; i++) {
2875 		si_select_se_sh(rdev, i, 0xffffffff);
2876 		data = 0;
2877 		for (j = 0; j < sh_per_se; j++) {
2878 			switch (enabled_rbs & 3) {
2879 			case 1:
2880 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2881 				break;
2882 			case 2:
2883 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2884 				break;
2885 			case 3:
2886 			default:
2887 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2888 				break;
2889 			}
2890 			enabled_rbs >>= 2;
2891 		}
2892 		WREG32(PA_SC_RASTER_CONFIG, data);
2893 	}
2894 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2895 }
2896 
2897 static void si_gpu_init(struct radeon_device *rdev)
2898 {
2899 	u32 gb_addr_config = 0;
2900 	u32 mc_shared_chmap, mc_arb_ramcfg;
2901 	u32 sx_debug_1;
2902 	u32 hdp_host_path_cntl;
2903 	u32 tmp;
2904 	int i, j, k;
2905 
2906 	switch (rdev->family) {
2907 	case CHIP_TAHITI:
2908 		rdev->config.si.max_shader_engines = 2;
2909 		rdev->config.si.max_tile_pipes = 12;
2910 		rdev->config.si.max_cu_per_sh = 8;
2911 		rdev->config.si.max_sh_per_se = 2;
2912 		rdev->config.si.max_backends_per_se = 4;
2913 		rdev->config.si.max_texture_channel_caches = 12;
2914 		rdev->config.si.max_gprs = 256;
2915 		rdev->config.si.max_gs_threads = 32;
2916 		rdev->config.si.max_hw_contexts = 8;
2917 
2918 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2919 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2920 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2921 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2922 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2923 		break;
2924 	case CHIP_PITCAIRN:
2925 		rdev->config.si.max_shader_engines = 2;
2926 		rdev->config.si.max_tile_pipes = 8;
2927 		rdev->config.si.max_cu_per_sh = 5;
2928 		rdev->config.si.max_sh_per_se = 2;
2929 		rdev->config.si.max_backends_per_se = 4;
2930 		rdev->config.si.max_texture_channel_caches = 8;
2931 		rdev->config.si.max_gprs = 256;
2932 		rdev->config.si.max_gs_threads = 32;
2933 		rdev->config.si.max_hw_contexts = 8;
2934 
2935 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2936 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2937 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2938 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2939 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2940 		break;
2941 	case CHIP_VERDE:
2942 	default:
2943 		rdev->config.si.max_shader_engines = 1;
2944 		rdev->config.si.max_tile_pipes = 4;
2945 		rdev->config.si.max_cu_per_sh = 5;
2946 		rdev->config.si.max_sh_per_se = 2;
2947 		rdev->config.si.max_backends_per_se = 4;
2948 		rdev->config.si.max_texture_channel_caches = 4;
2949 		rdev->config.si.max_gprs = 256;
2950 		rdev->config.si.max_gs_threads = 32;
2951 		rdev->config.si.max_hw_contexts = 8;
2952 
2953 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2954 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2955 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2956 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2957 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2958 		break;
2959 	case CHIP_OLAND:
2960 		rdev->config.si.max_shader_engines = 1;
2961 		rdev->config.si.max_tile_pipes = 4;
2962 		rdev->config.si.max_cu_per_sh = 6;
2963 		rdev->config.si.max_sh_per_se = 1;
2964 		rdev->config.si.max_backends_per_se = 2;
2965 		rdev->config.si.max_texture_channel_caches = 4;
2966 		rdev->config.si.max_gprs = 256;
2967 		rdev->config.si.max_gs_threads = 16;
2968 		rdev->config.si.max_hw_contexts = 8;
2969 
2970 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2971 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2972 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2973 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2974 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2975 		break;
2976 	case CHIP_HAINAN:
2977 		rdev->config.si.max_shader_engines = 1;
2978 		rdev->config.si.max_tile_pipes = 4;
2979 		rdev->config.si.max_cu_per_sh = 5;
2980 		rdev->config.si.max_sh_per_se = 1;
2981 		rdev->config.si.max_backends_per_se = 1;
2982 		rdev->config.si.max_texture_channel_caches = 2;
2983 		rdev->config.si.max_gprs = 256;
2984 		rdev->config.si.max_gs_threads = 16;
2985 		rdev->config.si.max_hw_contexts = 8;
2986 
2987 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2988 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2989 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2990 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2991 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2992 		break;
2993 	}
2994 
2995 	/* Initialize HDP */
2996 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2997 		WREG32((0x2c14 + j), 0x00000000);
2998 		WREG32((0x2c18 + j), 0x00000000);
2999 		WREG32((0x2c1c + j), 0x00000000);
3000 		WREG32((0x2c20 + j), 0x00000000);
3001 		WREG32((0x2c24 + j), 0x00000000);
3002 	}
3003 
3004 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3005 
3006 	evergreen_fix_pci_max_read_req_size(rdev);
3007 
3008 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3009 
3010 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3011 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3012 
3013 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3014 	rdev->config.si.mem_max_burst_length_bytes = 256;
3015 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3016 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3017 	if (rdev->config.si.mem_row_size_in_kb > 4)
3018 		rdev->config.si.mem_row_size_in_kb = 4;
3019 	/* XXX use MC settings? */
3020 	rdev->config.si.shader_engine_tile_size = 32;
3021 	rdev->config.si.num_gpus = 1;
3022 	rdev->config.si.multi_gpu_tile_size = 64;
3023 
3024 	/* fix up row size */
3025 	gb_addr_config &= ~ROW_SIZE_MASK;
3026 	switch (rdev->config.si.mem_row_size_in_kb) {
3027 	case 1:
3028 	default:
3029 		gb_addr_config |= ROW_SIZE(0);
3030 		break;
3031 	case 2:
3032 		gb_addr_config |= ROW_SIZE(1);
3033 		break;
3034 	case 4:
3035 		gb_addr_config |= ROW_SIZE(2);
3036 		break;
3037 	}
3038 
3039 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3040 	 * not have bank info, so create a custom tiling dword.
3041 	 * bits 3:0   num_pipes
3042 	 * bits 7:4   num_banks
3043 	 * bits 11:8  group_size
3044 	 * bits 15:12 row_size
3045 	 */
3046 	rdev->config.si.tile_config = 0;
3047 	switch (rdev->config.si.num_tile_pipes) {
3048 	case 1:
3049 		rdev->config.si.tile_config |= (0 << 0);
3050 		break;
3051 	case 2:
3052 		rdev->config.si.tile_config |= (1 << 0);
3053 		break;
3054 	case 4:
3055 		rdev->config.si.tile_config |= (2 << 0);
3056 		break;
3057 	case 8:
3058 	default:
3059 		/* XXX what about 12? */
3060 		rdev->config.si.tile_config |= (3 << 0);
3061 		break;
3062 	}
3063 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3064 	case 0: /* four banks */
3065 		rdev->config.si.tile_config |= 0 << 4;
3066 		break;
3067 	case 1: /* eight banks */
3068 		rdev->config.si.tile_config |= 1 << 4;
3069 		break;
3070 	case 2: /* sixteen banks */
3071 	default:
3072 		rdev->config.si.tile_config |= 2 << 4;
3073 		break;
3074 	}
3075 	rdev->config.si.tile_config |=
3076 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3077 	rdev->config.si.tile_config |=
3078 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3079 
3080 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3081 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3082 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3083 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3084 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3085 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3086 	if (rdev->has_uvd) {
3087 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3088 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3089 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3090 	}
3091 
3092 	si_tiling_mode_table_init(rdev);
3093 
3094 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3095 		    rdev->config.si.max_sh_per_se,
3096 		    rdev->config.si.max_backends_per_se);
3097 
3098 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3099 		     rdev->config.si.max_sh_per_se,
3100 		     rdev->config.si.max_cu_per_sh);
3101 
3102 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3103 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3104 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
3105 				rdev->config.si.active_cus +=
3106 					hweight32(si_get_cu_active_bitmap(rdev, i, j));
3107 			}
3108 		}
3109 	}
3110 
3111 	/* set HW defaults for 3D engine */
3112 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3113 				     ROQ_IB2_START(0x2b)));
3114 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3115 
3116 	sx_debug_1 = RREG32(SX_DEBUG_1);
3117 	WREG32(SX_DEBUG_1, sx_debug_1);
3118 
3119 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3120 
3121 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3122 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3123 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3124 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3125 
3126 	WREG32(VGT_NUM_INSTANCES, 1);
3127 
3128 	WREG32(CP_PERFMON_CNTL, 0);
3129 
3130 	WREG32(SQ_CONFIG, 0);
3131 
3132 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3133 					  FORCE_EOV_MAX_REZ_CNT(255)));
3134 
3135 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3136 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3137 
3138 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3139 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3140 
3141 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3142 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3143 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3144 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3145 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3146 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3147 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3148 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3149 
3150 	tmp = RREG32(HDP_MISC_CNTL);
3151 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3152 	WREG32(HDP_MISC_CNTL, tmp);
3153 
3154 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3155 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3156 
3157 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3158 
3159 	udelay(50);
3160 }
3161 
3162 /*
3163  * GPU scratch registers helpers function.
3164  */
3165 static void si_scratch_init(struct radeon_device *rdev)
3166 {
3167 	int i;
3168 
3169 	rdev->scratch.num_reg = 7;
3170 	rdev->scratch.reg_base = SCRATCH_REG0;
3171 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3172 		rdev->scratch.free[i] = true;
3173 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3174 	}
3175 }
3176 
3177 void si_fence_ring_emit(struct radeon_device *rdev,
3178 			struct radeon_fence *fence)
3179 {
3180 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3181 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3182 
3183 	/* flush read cache over gart */
3184 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3185 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3186 	radeon_ring_write(ring, 0);
3187 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3188 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3189 			  PACKET3_TC_ACTION_ENA |
3190 			  PACKET3_SH_KCACHE_ACTION_ENA |
3191 			  PACKET3_SH_ICACHE_ACTION_ENA);
3192 	radeon_ring_write(ring, 0xFFFFFFFF);
3193 	radeon_ring_write(ring, 0);
3194 	radeon_ring_write(ring, 10); /* poll interval */
3195 	/* EVENT_WRITE_EOP - flush caches, send int */
3196 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3197 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3198 	radeon_ring_write(ring, lower_32_bits(addr));
3199 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3200 	radeon_ring_write(ring, fence->seq);
3201 	radeon_ring_write(ring, 0);
3202 }
3203 
3204 /*
3205  * IB stuff
3206  */
3207 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3208 {
3209 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3210 	u32 header;
3211 
3212 	if (ib->is_const_ib) {
3213 		/* set switch buffer packet before const IB */
3214 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3215 		radeon_ring_write(ring, 0);
3216 
3217 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3218 	} else {
3219 		u32 next_rptr;
3220 		if (ring->rptr_save_reg) {
3221 			next_rptr = ring->wptr + 3 + 4 + 8;
3222 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3223 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3224 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3225 			radeon_ring_write(ring, next_rptr);
3226 		} else if (rdev->wb.enabled) {
3227 			next_rptr = ring->wptr + 5 + 4 + 8;
3228 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3229 			radeon_ring_write(ring, (1 << 8));
3230 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3231 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3232 			radeon_ring_write(ring, next_rptr);
3233 		}
3234 
3235 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3236 	}
3237 
3238 	radeon_ring_write(ring, header);
3239 	radeon_ring_write(ring,
3240 #ifdef __BIG_ENDIAN
3241 			  (2 << 0) |
3242 #endif
3243 			  (ib->gpu_addr & 0xFFFFFFFC));
3244 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3245 	radeon_ring_write(ring, ib->length_dw |
3246 			  (ib->vm ? (ib->vm->id << 24) : 0));
3247 
3248 	if (!ib->is_const_ib) {
3249 		/* flush read cache over gart for this vmid */
3250 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3251 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3252 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3253 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3254 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3255 				  PACKET3_TC_ACTION_ENA |
3256 				  PACKET3_SH_KCACHE_ACTION_ENA |
3257 				  PACKET3_SH_ICACHE_ACTION_ENA);
3258 		radeon_ring_write(ring, 0xFFFFFFFF);
3259 		radeon_ring_write(ring, 0);
3260 		radeon_ring_write(ring, 10); /* poll interval */
3261 	}
3262 }
3263 
3264 /*
3265  * CP.
3266  */
3267 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3268 {
3269 	if (enable)
3270 		WREG32(CP_ME_CNTL, 0);
3271 	else {
3272 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3273 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3274 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3275 		WREG32(SCRATCH_UMSK, 0);
3276 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3277 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3278 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3279 	}
3280 	udelay(50);
3281 }
3282 
3283 static int si_cp_load_microcode(struct radeon_device *rdev)
3284 {
3285 	const __be32 *fw_data;
3286 	int i;
3287 
3288 	if (!rdev->me_fw || !rdev->pfp_fw)
3289 		return -EINVAL;
3290 
3291 	si_cp_enable(rdev, false);
3292 
3293 	/* PFP */
3294 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3295 	WREG32(CP_PFP_UCODE_ADDR, 0);
3296 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3297 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3298 	WREG32(CP_PFP_UCODE_ADDR, 0);
3299 
3300 	/* CE */
3301 	fw_data = (const __be32 *)rdev->ce_fw->data;
3302 	WREG32(CP_CE_UCODE_ADDR, 0);
3303 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3304 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3305 	WREG32(CP_CE_UCODE_ADDR, 0);
3306 
3307 	/* ME */
3308 	fw_data = (const __be32 *)rdev->me_fw->data;
3309 	WREG32(CP_ME_RAM_WADDR, 0);
3310 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3311 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3312 	WREG32(CP_ME_RAM_WADDR, 0);
3313 
3314 	WREG32(CP_PFP_UCODE_ADDR, 0);
3315 	WREG32(CP_CE_UCODE_ADDR, 0);
3316 	WREG32(CP_ME_RAM_WADDR, 0);
3317 	WREG32(CP_ME_RAM_RADDR, 0);
3318 	return 0;
3319 }
3320 
3321 static int si_cp_start(struct radeon_device *rdev)
3322 {
3323 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3324 	int r, i;
3325 
3326 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3327 	if (r) {
3328 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3329 		return r;
3330 	}
3331 	/* init the CP */
3332 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3333 	radeon_ring_write(ring, 0x1);
3334 	radeon_ring_write(ring, 0x0);
3335 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3336 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3337 	radeon_ring_write(ring, 0);
3338 	radeon_ring_write(ring, 0);
3339 
3340 	/* init the CE partitions */
3341 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3342 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3343 	radeon_ring_write(ring, 0xc000);
3344 	radeon_ring_write(ring, 0xe000);
3345 	radeon_ring_unlock_commit(rdev, ring);
3346 
3347 	si_cp_enable(rdev, true);
3348 
3349 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3350 	if (r) {
3351 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3352 		return r;
3353 	}
3354 
3355 	/* setup clear context state */
3356 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3357 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3358 
3359 	for (i = 0; i < si_default_size; i++)
3360 		radeon_ring_write(ring, si_default_state[i]);
3361 
3362 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3363 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3364 
3365 	/* set clear context state */
3366 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3367 	radeon_ring_write(ring, 0);
3368 
3369 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3370 	radeon_ring_write(ring, 0x00000316);
3371 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3372 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3373 
3374 	radeon_ring_unlock_commit(rdev, ring);
3375 
3376 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3377 		ring = &rdev->ring[i];
3378 		r = radeon_ring_lock(rdev, ring, 2);
3379 
3380 		/* clear the compute context state */
3381 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3382 		radeon_ring_write(ring, 0);
3383 
3384 		radeon_ring_unlock_commit(rdev, ring);
3385 	}
3386 
3387 	return 0;
3388 }
3389 
3390 static void si_cp_fini(struct radeon_device *rdev)
3391 {
3392 	struct radeon_ring *ring;
3393 	si_cp_enable(rdev, false);
3394 
3395 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3396 	radeon_ring_fini(rdev, ring);
3397 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3398 
3399 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3400 	radeon_ring_fini(rdev, ring);
3401 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3402 
3403 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3404 	radeon_ring_fini(rdev, ring);
3405 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3406 }
3407 
3408 static int si_cp_resume(struct radeon_device *rdev)
3409 {
3410 	struct radeon_ring *ring;
3411 	u32 tmp;
3412 	u32 rb_bufsz;
3413 	int r;
3414 
3415 	si_enable_gui_idle_interrupt(rdev, false);
3416 
3417 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3418 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3419 
3420 	/* Set the write pointer delay */
3421 	WREG32(CP_RB_WPTR_DELAY, 0);
3422 
3423 	WREG32(CP_DEBUG, 0);
3424 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3425 
3426 	/* ring 0 - compute and gfx */
3427 	/* Set ring buffer size */
3428 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3429 	rb_bufsz = order_base_2(ring->ring_size / 8);
3430 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3431 #ifdef __BIG_ENDIAN
3432 	tmp |= BUF_SWAP_32BIT;
3433 #endif
3434 	WREG32(CP_RB0_CNTL, tmp);
3435 
3436 	/* Initialize the ring buffer's read and write pointers */
3437 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3438 	ring->wptr = 0;
3439 	WREG32(CP_RB0_WPTR, ring->wptr);
3440 
3441 	/* set the wb address whether it's enabled or not */
3442 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3443 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3444 
3445 	if (rdev->wb.enabled)
3446 		WREG32(SCRATCH_UMSK, 0xff);
3447 	else {
3448 		tmp |= RB_NO_UPDATE;
3449 		WREG32(SCRATCH_UMSK, 0);
3450 	}
3451 
3452 	mdelay(1);
3453 	WREG32(CP_RB0_CNTL, tmp);
3454 
3455 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3456 
3457 	/* ring1  - compute only */
3458 	/* Set ring buffer size */
3459 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3460 	rb_bufsz = order_base_2(ring->ring_size / 8);
3461 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3462 #ifdef __BIG_ENDIAN
3463 	tmp |= BUF_SWAP_32BIT;
3464 #endif
3465 	WREG32(CP_RB1_CNTL, tmp);
3466 
3467 	/* Initialize the ring buffer's read and write pointers */
3468 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3469 	ring->wptr = 0;
3470 	WREG32(CP_RB1_WPTR, ring->wptr);
3471 
3472 	/* set the wb address whether it's enabled or not */
3473 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3474 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3475 
3476 	mdelay(1);
3477 	WREG32(CP_RB1_CNTL, tmp);
3478 
3479 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3480 
3481 	/* ring2 - compute only */
3482 	/* Set ring buffer size */
3483 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3484 	rb_bufsz = order_base_2(ring->ring_size / 8);
3485 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3486 #ifdef __BIG_ENDIAN
3487 	tmp |= BUF_SWAP_32BIT;
3488 #endif
3489 	WREG32(CP_RB2_CNTL, tmp);
3490 
3491 	/* Initialize the ring buffer's read and write pointers */
3492 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3493 	ring->wptr = 0;
3494 	WREG32(CP_RB2_WPTR, ring->wptr);
3495 
3496 	/* set the wb address whether it's enabled or not */
3497 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3498 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3499 
3500 	mdelay(1);
3501 	WREG32(CP_RB2_CNTL, tmp);
3502 
3503 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3504 
3505 	/* start the rings */
3506 	si_cp_start(rdev);
3507 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3508 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3509 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3510 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3511 	if (r) {
3512 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3513 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3514 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3515 		return r;
3516 	}
3517 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3518 	if (r) {
3519 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3520 	}
3521 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3522 	if (r) {
3523 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3524 	}
3525 
3526 	si_enable_gui_idle_interrupt(rdev, true);
3527 
3528 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3529 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3530 
3531 	return 0;
3532 }
3533 
3534 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3535 {
3536 	u32 reset_mask = 0;
3537 	u32 tmp;
3538 
3539 	/* GRBM_STATUS */
3540 	tmp = RREG32(GRBM_STATUS);
3541 	if (tmp & (PA_BUSY | SC_BUSY |
3542 		   BCI_BUSY | SX_BUSY |
3543 		   TA_BUSY | VGT_BUSY |
3544 		   DB_BUSY | CB_BUSY |
3545 		   GDS_BUSY | SPI_BUSY |
3546 		   IA_BUSY | IA_BUSY_NO_DMA))
3547 		reset_mask |= RADEON_RESET_GFX;
3548 
3549 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3550 		   CP_BUSY | CP_COHERENCY_BUSY))
3551 		reset_mask |= RADEON_RESET_CP;
3552 
3553 	if (tmp & GRBM_EE_BUSY)
3554 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3555 
3556 	/* GRBM_STATUS2 */
3557 	tmp = RREG32(GRBM_STATUS2);
3558 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3559 		reset_mask |= RADEON_RESET_RLC;
3560 
3561 	/* DMA_STATUS_REG 0 */
3562 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3563 	if (!(tmp & DMA_IDLE))
3564 		reset_mask |= RADEON_RESET_DMA;
3565 
3566 	/* DMA_STATUS_REG 1 */
3567 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3568 	if (!(tmp & DMA_IDLE))
3569 		reset_mask |= RADEON_RESET_DMA1;
3570 
3571 	/* SRBM_STATUS2 */
3572 	tmp = RREG32(SRBM_STATUS2);
3573 	if (tmp & DMA_BUSY)
3574 		reset_mask |= RADEON_RESET_DMA;
3575 
3576 	if (tmp & DMA1_BUSY)
3577 		reset_mask |= RADEON_RESET_DMA1;
3578 
3579 	/* SRBM_STATUS */
3580 	tmp = RREG32(SRBM_STATUS);
3581 
3582 	if (tmp & IH_BUSY)
3583 		reset_mask |= RADEON_RESET_IH;
3584 
3585 	if (tmp & SEM_BUSY)
3586 		reset_mask |= RADEON_RESET_SEM;
3587 
3588 	if (tmp & GRBM_RQ_PENDING)
3589 		reset_mask |= RADEON_RESET_GRBM;
3590 
3591 	if (tmp & VMC_BUSY)
3592 		reset_mask |= RADEON_RESET_VMC;
3593 
3594 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3595 		   MCC_BUSY | MCD_BUSY))
3596 		reset_mask |= RADEON_RESET_MC;
3597 
3598 	if (evergreen_is_display_hung(rdev))
3599 		reset_mask |= RADEON_RESET_DISPLAY;
3600 
3601 	/* VM_L2_STATUS */
3602 	tmp = RREG32(VM_L2_STATUS);
3603 	if (tmp & L2_BUSY)
3604 		reset_mask |= RADEON_RESET_VMC;
3605 
3606 	/* Skip MC reset as it's mostly likely not hung, just busy */
3607 	if (reset_mask & RADEON_RESET_MC) {
3608 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3609 		reset_mask &= ~RADEON_RESET_MC;
3610 	}
3611 
3612 	return reset_mask;
3613 }
3614 
3615 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3616 {
3617 	struct evergreen_mc_save save;
3618 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3619 	u32 tmp;
3620 
3621 	if (reset_mask == 0)
3622 		return;
3623 
3624 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3625 
3626 	evergreen_print_gpu_status_regs(rdev);
3627 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3628 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3629 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3630 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3631 
3632 	/* disable PG/CG */
3633 	si_fini_pg(rdev);
3634 	si_fini_cg(rdev);
3635 
3636 	/* stop the rlc */
3637 	si_rlc_stop(rdev);
3638 
3639 	/* Disable CP parsing/prefetching */
3640 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3641 
3642 	if (reset_mask & RADEON_RESET_DMA) {
3643 		/* dma0 */
3644 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3645 		tmp &= ~DMA_RB_ENABLE;
3646 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3647 	}
3648 	if (reset_mask & RADEON_RESET_DMA1) {
3649 		/* dma1 */
3650 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3651 		tmp &= ~DMA_RB_ENABLE;
3652 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3653 	}
3654 
3655 	udelay(50);
3656 
3657 	evergreen_mc_stop(rdev, &save);
3658 	if (evergreen_mc_wait_for_idle(rdev)) {
3659 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3660 	}
3661 
3662 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3663 		grbm_soft_reset = SOFT_RESET_CB |
3664 			SOFT_RESET_DB |
3665 			SOFT_RESET_GDS |
3666 			SOFT_RESET_PA |
3667 			SOFT_RESET_SC |
3668 			SOFT_RESET_BCI |
3669 			SOFT_RESET_SPI |
3670 			SOFT_RESET_SX |
3671 			SOFT_RESET_TC |
3672 			SOFT_RESET_TA |
3673 			SOFT_RESET_VGT |
3674 			SOFT_RESET_IA;
3675 	}
3676 
3677 	if (reset_mask & RADEON_RESET_CP) {
3678 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3679 
3680 		srbm_soft_reset |= SOFT_RESET_GRBM;
3681 	}
3682 
3683 	if (reset_mask & RADEON_RESET_DMA)
3684 		srbm_soft_reset |= SOFT_RESET_DMA;
3685 
3686 	if (reset_mask & RADEON_RESET_DMA1)
3687 		srbm_soft_reset |= SOFT_RESET_DMA1;
3688 
3689 	if (reset_mask & RADEON_RESET_DISPLAY)
3690 		srbm_soft_reset |= SOFT_RESET_DC;
3691 
3692 	if (reset_mask & RADEON_RESET_RLC)
3693 		grbm_soft_reset |= SOFT_RESET_RLC;
3694 
3695 	if (reset_mask & RADEON_RESET_SEM)
3696 		srbm_soft_reset |= SOFT_RESET_SEM;
3697 
3698 	if (reset_mask & RADEON_RESET_IH)
3699 		srbm_soft_reset |= SOFT_RESET_IH;
3700 
3701 	if (reset_mask & RADEON_RESET_GRBM)
3702 		srbm_soft_reset |= SOFT_RESET_GRBM;
3703 
3704 	if (reset_mask & RADEON_RESET_VMC)
3705 		srbm_soft_reset |= SOFT_RESET_VMC;
3706 
3707 	if (reset_mask & RADEON_RESET_MC)
3708 		srbm_soft_reset |= SOFT_RESET_MC;
3709 
3710 	if (grbm_soft_reset) {
3711 		tmp = RREG32(GRBM_SOFT_RESET);
3712 		tmp |= grbm_soft_reset;
3713 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3714 		WREG32(GRBM_SOFT_RESET, tmp);
3715 		tmp = RREG32(GRBM_SOFT_RESET);
3716 
3717 		udelay(50);
3718 
3719 		tmp &= ~grbm_soft_reset;
3720 		WREG32(GRBM_SOFT_RESET, tmp);
3721 		tmp = RREG32(GRBM_SOFT_RESET);
3722 	}
3723 
3724 	if (srbm_soft_reset) {
3725 		tmp = RREG32(SRBM_SOFT_RESET);
3726 		tmp |= srbm_soft_reset;
3727 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3728 		WREG32(SRBM_SOFT_RESET, tmp);
3729 		tmp = RREG32(SRBM_SOFT_RESET);
3730 
3731 		udelay(50);
3732 
3733 		tmp &= ~srbm_soft_reset;
3734 		WREG32(SRBM_SOFT_RESET, tmp);
3735 		tmp = RREG32(SRBM_SOFT_RESET);
3736 	}
3737 
3738 	/* Wait a little for things to settle down */
3739 	udelay(50);
3740 
3741 	evergreen_mc_resume(rdev, &save);
3742 	udelay(50);
3743 
3744 	evergreen_print_gpu_status_regs(rdev);
3745 }
3746 
3747 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3748 {
3749 	u32 tmp, i;
3750 
3751 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3752 	tmp |= SPLL_BYPASS_EN;
3753 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3754 
3755 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3756 	tmp |= SPLL_CTLREQ_CHG;
3757 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3758 
3759 	for (i = 0; i < rdev->usec_timeout; i++) {
3760 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3761 			break;
3762 		udelay(1);
3763 	}
3764 
3765 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3766 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3767 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3768 
3769 	tmp = RREG32(MPLL_CNTL_MODE);
3770 	tmp &= ~MPLL_MCLK_SEL;
3771 	WREG32(MPLL_CNTL_MODE, tmp);
3772 }
3773 
3774 static void si_spll_powerdown(struct radeon_device *rdev)
3775 {
3776 	u32 tmp;
3777 
3778 	tmp = RREG32(SPLL_CNTL_MODE);
3779 	tmp |= SPLL_SW_DIR_CONTROL;
3780 	WREG32(SPLL_CNTL_MODE, tmp);
3781 
3782 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3783 	tmp |= SPLL_RESET;
3784 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3785 
3786 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3787 	tmp |= SPLL_SLEEP;
3788 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3789 
3790 	tmp = RREG32(SPLL_CNTL_MODE);
3791 	tmp &= ~SPLL_SW_DIR_CONTROL;
3792 	WREG32(SPLL_CNTL_MODE, tmp);
3793 }
3794 
3795 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3796 {
3797 	struct evergreen_mc_save save;
3798 	u32 tmp, i;
3799 
3800 	dev_info(rdev->dev, "GPU pci config reset\n");
3801 
3802 	/* disable dpm? */
3803 
3804 	/* disable cg/pg */
3805 	si_fini_pg(rdev);
3806 	si_fini_cg(rdev);
3807 
3808 	/* Disable CP parsing/prefetching */
3809 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3810 	/* dma0 */
3811 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3812 	tmp &= ~DMA_RB_ENABLE;
3813 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3814 	/* dma1 */
3815 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3816 	tmp &= ~DMA_RB_ENABLE;
3817 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3818 	/* XXX other engines? */
3819 
3820 	/* halt the rlc, disable cp internal ints */
3821 	si_rlc_stop(rdev);
3822 
3823 	udelay(50);
3824 
3825 	/* disable mem access */
3826 	evergreen_mc_stop(rdev, &save);
3827 	if (evergreen_mc_wait_for_idle(rdev)) {
3828 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3829 	}
3830 
3831 	/* set mclk/sclk to bypass */
3832 	si_set_clk_bypass_mode(rdev);
3833 	/* powerdown spll */
3834 	si_spll_powerdown(rdev);
3835 	/* disable BM */
3836 	pci_clear_master(rdev->pdev);
3837 	/* reset */
3838 	radeon_pci_config_reset(rdev);
3839 	/* wait for asic to come out of reset */
3840 	for (i = 0; i < rdev->usec_timeout; i++) {
3841 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3842 			break;
3843 		udelay(1);
3844 	}
3845 }
3846 
3847 int si_asic_reset(struct radeon_device *rdev)
3848 {
3849 	u32 reset_mask;
3850 
3851 	reset_mask = si_gpu_check_soft_reset(rdev);
3852 
3853 	if (reset_mask)
3854 		r600_set_bios_scratch_engine_hung(rdev, true);
3855 
3856 	/* try soft reset */
3857 	si_gpu_soft_reset(rdev, reset_mask);
3858 
3859 	reset_mask = si_gpu_check_soft_reset(rdev);
3860 
3861 	/* try pci config reset */
3862 	if (reset_mask && radeon_hard_reset)
3863 		si_gpu_pci_config_reset(rdev);
3864 
3865 	reset_mask = si_gpu_check_soft_reset(rdev);
3866 
3867 	if (!reset_mask)
3868 		r600_set_bios_scratch_engine_hung(rdev, false);
3869 
3870 	return 0;
3871 }
3872 
3873 /**
3874  * si_gfx_is_lockup - Check if the GFX engine is locked up
3875  *
3876  * @rdev: radeon_device pointer
3877  * @ring: radeon_ring structure holding ring information
3878  *
3879  * Check if the GFX engine is locked up.
3880  * Returns true if the engine appears to be locked up, false if not.
3881  */
3882 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3883 {
3884 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3885 
3886 	if (!(reset_mask & (RADEON_RESET_GFX |
3887 			    RADEON_RESET_COMPUTE |
3888 			    RADEON_RESET_CP))) {
3889 		radeon_ring_lockup_update(rdev, ring);
3890 		return false;
3891 	}
3892 	return radeon_ring_test_lockup(rdev, ring);
3893 }
3894 
3895 /* MC */
3896 static void si_mc_program(struct radeon_device *rdev)
3897 {
3898 	struct evergreen_mc_save save;
3899 	u32 tmp;
3900 	int i, j;
3901 
3902 	/* Initialize HDP */
3903 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3904 		WREG32((0x2c14 + j), 0x00000000);
3905 		WREG32((0x2c18 + j), 0x00000000);
3906 		WREG32((0x2c1c + j), 0x00000000);
3907 		WREG32((0x2c20 + j), 0x00000000);
3908 		WREG32((0x2c24 + j), 0x00000000);
3909 	}
3910 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3911 
3912 	evergreen_mc_stop(rdev, &save);
3913 	if (radeon_mc_wait_for_idle(rdev)) {
3914 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3915 	}
3916 	if (!ASIC_IS_NODCE(rdev))
3917 		/* Lockout access through VGA aperture*/
3918 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3919 	/* Update configuration */
3920 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3921 	       rdev->mc.vram_start >> 12);
3922 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3923 	       rdev->mc.vram_end >> 12);
3924 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3925 	       rdev->vram_scratch.gpu_addr >> 12);
3926 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3927 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3928 	WREG32(MC_VM_FB_LOCATION, tmp);
3929 	/* XXX double check these! */
3930 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3931 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3932 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3933 	WREG32(MC_VM_AGP_BASE, 0);
3934 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3935 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3936 	if (radeon_mc_wait_for_idle(rdev)) {
3937 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3938 	}
3939 	evergreen_mc_resume(rdev, &save);
3940 	if (!ASIC_IS_NODCE(rdev)) {
3941 		/* we need to own VRAM, so turn off the VGA renderer here
3942 		 * to stop it overwriting our objects */
3943 		rv515_vga_render_disable(rdev);
3944 	}
3945 }
3946 
3947 void si_vram_gtt_location(struct radeon_device *rdev,
3948 			  struct radeon_mc *mc)
3949 {
3950 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3951 		/* leave room for at least 1024M GTT */
3952 		dev_warn(rdev->dev, "limiting VRAM\n");
3953 		mc->real_vram_size = 0xFFC0000000ULL;
3954 		mc->mc_vram_size = 0xFFC0000000ULL;
3955 	}
3956 	radeon_vram_location(rdev, &rdev->mc, 0);
3957 	rdev->mc.gtt_base_align = 0;
3958 	radeon_gtt_location(rdev, mc);
3959 }
3960 
3961 static int si_mc_init(struct radeon_device *rdev)
3962 {
3963 	u32 tmp;
3964 	int chansize, numchan;
3965 
3966 	/* Get VRAM informations */
3967 	rdev->mc.vram_is_ddr = true;
3968 	tmp = RREG32(MC_ARB_RAMCFG);
3969 	if (tmp & CHANSIZE_OVERRIDE) {
3970 		chansize = 16;
3971 	} else if (tmp & CHANSIZE_MASK) {
3972 		chansize = 64;
3973 	} else {
3974 		chansize = 32;
3975 	}
3976 	tmp = RREG32(MC_SHARED_CHMAP);
3977 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3978 	case 0:
3979 	default:
3980 		numchan = 1;
3981 		break;
3982 	case 1:
3983 		numchan = 2;
3984 		break;
3985 	case 2:
3986 		numchan = 4;
3987 		break;
3988 	case 3:
3989 		numchan = 8;
3990 		break;
3991 	case 4:
3992 		numchan = 3;
3993 		break;
3994 	case 5:
3995 		numchan = 6;
3996 		break;
3997 	case 6:
3998 		numchan = 10;
3999 		break;
4000 	case 7:
4001 		numchan = 12;
4002 		break;
4003 	case 8:
4004 		numchan = 16;
4005 		break;
4006 	}
4007 	rdev->mc.vram_width = numchan * chansize;
4008 	/* Could aper size report 0 ? */
4009 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4010 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4011 	/* size in MB on si */
4012 	tmp = RREG32(CONFIG_MEMSIZE);
4013 	/* some boards may have garbage in the upper 16 bits */
4014 	if (tmp & 0xffff0000) {
4015 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4016 		if (tmp & 0xffff)
4017 			tmp &= 0xffff;
4018 	}
4019 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4020 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4021 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4022 	si_vram_gtt_location(rdev, &rdev->mc);
4023 	radeon_update_bandwidth_info(rdev);
4024 
4025 	return 0;
4026 }
4027 
4028 /*
4029  * GART
4030  */
4031 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4032 {
4033 	/* flush hdp cache */
4034 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4035 
4036 	/* bits 0-15 are the VM contexts0-15 */
4037 	WREG32(VM_INVALIDATE_REQUEST, 1);
4038 }
4039 
4040 static int si_pcie_gart_enable(struct radeon_device *rdev)
4041 {
4042 	int r, i;
4043 
4044 	if (rdev->gart.robj == NULL) {
4045 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4046 		return -EINVAL;
4047 	}
4048 	r = radeon_gart_table_vram_pin(rdev);
4049 	if (r)
4050 		return r;
4051 	radeon_gart_restore(rdev);
4052 	/* Setup TLB control */
4053 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4054 	       (0xA << 7) |
4055 	       ENABLE_L1_TLB |
4056 	       ENABLE_L1_FRAGMENT_PROCESSING |
4057 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4058 	       ENABLE_ADVANCED_DRIVER_MODEL |
4059 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4060 	/* Setup L2 cache */
4061 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4062 	       ENABLE_L2_FRAGMENT_PROCESSING |
4063 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4064 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4065 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4066 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4067 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4068 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4069 	       BANK_SELECT(4) |
4070 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4071 	/* setup context0 */
4072 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4073 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4074 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4075 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4076 			(u32)(rdev->dummy_page.addr >> 12));
4077 	WREG32(VM_CONTEXT0_CNTL2, 0);
4078 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4079 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4080 
4081 	WREG32(0x15D4, 0);
4082 	WREG32(0x15D8, 0);
4083 	WREG32(0x15DC, 0);
4084 
4085 	/* empty context1-15 */
4086 	/* set vm size, must be a multiple of 4 */
4087 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4088 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4089 	/* Assign the pt base to something valid for now; the pts used for
4090 	 * the VMs are determined by the application and setup and assigned
4091 	 * on the fly in the vm part of radeon_gart.c
4092 	 */
4093 	for (i = 1; i < 16; i++) {
4094 		if (i < 8)
4095 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4096 			       rdev->gart.table_addr >> 12);
4097 		else
4098 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4099 			       rdev->gart.table_addr >> 12);
4100 	}
4101 
4102 	/* enable context1-15 */
4103 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4104 	       (u32)(rdev->dummy_page.addr >> 12));
4105 	WREG32(VM_CONTEXT1_CNTL2, 4);
4106 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4107 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4108 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4109 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4110 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4111 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4112 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4113 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4114 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4115 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4116 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4117 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4118 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4119 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4120 
4121 	si_pcie_gart_tlb_flush(rdev);
4122 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4123 		 (unsigned)(rdev->mc.gtt_size >> 20),
4124 		 (unsigned long long)rdev->gart.table_addr);
4125 	rdev->gart.ready = true;
4126 	return 0;
4127 }
4128 
4129 static void si_pcie_gart_disable(struct radeon_device *rdev)
4130 {
4131 	/* Disable all tables */
4132 	WREG32(VM_CONTEXT0_CNTL, 0);
4133 	WREG32(VM_CONTEXT1_CNTL, 0);
4134 	/* Setup TLB control */
4135 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4136 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4137 	/* Setup L2 cache */
4138 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4139 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4140 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4141 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4142 	WREG32(VM_L2_CNTL2, 0);
4143 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4144 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4145 	radeon_gart_table_vram_unpin(rdev);
4146 }
4147 
4148 static void si_pcie_gart_fini(struct radeon_device *rdev)
4149 {
4150 	si_pcie_gart_disable(rdev);
4151 	radeon_gart_table_vram_free(rdev);
4152 	radeon_gart_fini(rdev);
4153 }
4154 
4155 /* vm parser */
4156 static bool si_vm_reg_valid(u32 reg)
4157 {
4158 	/* context regs are fine */
4159 	if (reg >= 0x28000)
4160 		return true;
4161 
4162 	/* check config regs */
4163 	switch (reg) {
4164 	case GRBM_GFX_INDEX:
4165 	case CP_STRMOUT_CNTL:
4166 	case VGT_VTX_VECT_EJECT_REG:
4167 	case VGT_CACHE_INVALIDATION:
4168 	case VGT_ESGS_RING_SIZE:
4169 	case VGT_GSVS_RING_SIZE:
4170 	case VGT_GS_VERTEX_REUSE:
4171 	case VGT_PRIMITIVE_TYPE:
4172 	case VGT_INDEX_TYPE:
4173 	case VGT_NUM_INDICES:
4174 	case VGT_NUM_INSTANCES:
4175 	case VGT_TF_RING_SIZE:
4176 	case VGT_HS_OFFCHIP_PARAM:
4177 	case VGT_TF_MEMORY_BASE:
4178 	case PA_CL_ENHANCE:
4179 	case PA_SU_LINE_STIPPLE_VALUE:
4180 	case PA_SC_LINE_STIPPLE_STATE:
4181 	case PA_SC_ENHANCE:
4182 	case SQC_CACHES:
4183 	case SPI_STATIC_THREAD_MGMT_1:
4184 	case SPI_STATIC_THREAD_MGMT_2:
4185 	case SPI_STATIC_THREAD_MGMT_3:
4186 	case SPI_PS_MAX_WAVE_ID:
4187 	case SPI_CONFIG_CNTL:
4188 	case SPI_CONFIG_CNTL_1:
4189 	case TA_CNTL_AUX:
4190 		return true;
4191 	default:
4192 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4193 		return false;
4194 	}
4195 }
4196 
4197 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4198 				  u32 *ib, struct radeon_cs_packet *pkt)
4199 {
4200 	switch (pkt->opcode) {
4201 	case PACKET3_NOP:
4202 	case PACKET3_SET_BASE:
4203 	case PACKET3_SET_CE_DE_COUNTERS:
4204 	case PACKET3_LOAD_CONST_RAM:
4205 	case PACKET3_WRITE_CONST_RAM:
4206 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4207 	case PACKET3_DUMP_CONST_RAM:
4208 	case PACKET3_INCREMENT_CE_COUNTER:
4209 	case PACKET3_WAIT_ON_DE_COUNTER:
4210 	case PACKET3_CE_WRITE:
4211 		break;
4212 	default:
4213 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4214 		return -EINVAL;
4215 	}
4216 	return 0;
4217 }
4218 
4219 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4220 {
4221 	u32 start_reg, reg, i;
4222 	u32 command = ib[idx + 4];
4223 	u32 info = ib[idx + 1];
4224 	u32 idx_value = ib[idx];
4225 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4226 		/* src address space is register */
4227 		if (((info & 0x60000000) >> 29) == 0) {
4228 			start_reg = idx_value << 2;
4229 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4230 				reg = start_reg;
4231 				if (!si_vm_reg_valid(reg)) {
4232 					DRM_ERROR("CP DMA Bad SRC register\n");
4233 					return -EINVAL;
4234 				}
4235 			} else {
4236 				for (i = 0; i < (command & 0x1fffff); i++) {
4237 					reg = start_reg + (4 * i);
4238 					if (!si_vm_reg_valid(reg)) {
4239 						DRM_ERROR("CP DMA Bad SRC register\n");
4240 						return -EINVAL;
4241 					}
4242 				}
4243 			}
4244 		}
4245 	}
4246 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4247 		/* dst address space is register */
4248 		if (((info & 0x00300000) >> 20) == 0) {
4249 			start_reg = ib[idx + 2];
4250 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4251 				reg = start_reg;
4252 				if (!si_vm_reg_valid(reg)) {
4253 					DRM_ERROR("CP DMA Bad DST register\n");
4254 					return -EINVAL;
4255 				}
4256 			} else {
4257 				for (i = 0; i < (command & 0x1fffff); i++) {
4258 					reg = start_reg + (4 * i);
4259 				if (!si_vm_reg_valid(reg)) {
4260 						DRM_ERROR("CP DMA Bad DST register\n");
4261 						return -EINVAL;
4262 					}
4263 				}
4264 			}
4265 		}
4266 	}
4267 	return 0;
4268 }
4269 
4270 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4271 				   u32 *ib, struct radeon_cs_packet *pkt)
4272 {
4273 	int r;
4274 	u32 idx = pkt->idx + 1;
4275 	u32 idx_value = ib[idx];
4276 	u32 start_reg, end_reg, reg, i;
4277 
4278 	switch (pkt->opcode) {
4279 	case PACKET3_NOP:
4280 	case PACKET3_SET_BASE:
4281 	case PACKET3_CLEAR_STATE:
4282 	case PACKET3_INDEX_BUFFER_SIZE:
4283 	case PACKET3_DISPATCH_DIRECT:
4284 	case PACKET3_DISPATCH_INDIRECT:
4285 	case PACKET3_ALLOC_GDS:
4286 	case PACKET3_WRITE_GDS_RAM:
4287 	case PACKET3_ATOMIC_GDS:
4288 	case PACKET3_ATOMIC:
4289 	case PACKET3_OCCLUSION_QUERY:
4290 	case PACKET3_SET_PREDICATION:
4291 	case PACKET3_COND_EXEC:
4292 	case PACKET3_PRED_EXEC:
4293 	case PACKET3_DRAW_INDIRECT:
4294 	case PACKET3_DRAW_INDEX_INDIRECT:
4295 	case PACKET3_INDEX_BASE:
4296 	case PACKET3_DRAW_INDEX_2:
4297 	case PACKET3_CONTEXT_CONTROL:
4298 	case PACKET3_INDEX_TYPE:
4299 	case PACKET3_DRAW_INDIRECT_MULTI:
4300 	case PACKET3_DRAW_INDEX_AUTO:
4301 	case PACKET3_DRAW_INDEX_IMMD:
4302 	case PACKET3_NUM_INSTANCES:
4303 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4304 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4305 	case PACKET3_DRAW_INDEX_OFFSET_2:
4306 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4307 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4308 	case PACKET3_MPEG_INDEX:
4309 	case PACKET3_WAIT_REG_MEM:
4310 	case PACKET3_MEM_WRITE:
4311 	case PACKET3_PFP_SYNC_ME:
4312 	case PACKET3_SURFACE_SYNC:
4313 	case PACKET3_EVENT_WRITE:
4314 	case PACKET3_EVENT_WRITE_EOP:
4315 	case PACKET3_EVENT_WRITE_EOS:
4316 	case PACKET3_SET_CONTEXT_REG:
4317 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4318 	case PACKET3_SET_SH_REG:
4319 	case PACKET3_SET_SH_REG_OFFSET:
4320 	case PACKET3_INCREMENT_DE_COUNTER:
4321 	case PACKET3_WAIT_ON_CE_COUNTER:
4322 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4323 	case PACKET3_ME_WRITE:
4324 		break;
4325 	case PACKET3_COPY_DATA:
4326 		if ((idx_value & 0xf00) == 0) {
4327 			reg = ib[idx + 3] * 4;
4328 			if (!si_vm_reg_valid(reg))
4329 				return -EINVAL;
4330 		}
4331 		break;
4332 	case PACKET3_WRITE_DATA:
4333 		if ((idx_value & 0xf00) == 0) {
4334 			start_reg = ib[idx + 1] * 4;
4335 			if (idx_value & 0x10000) {
4336 				if (!si_vm_reg_valid(start_reg))
4337 					return -EINVAL;
4338 			} else {
4339 				for (i = 0; i < (pkt->count - 2); i++) {
4340 					reg = start_reg + (4 * i);
4341 					if (!si_vm_reg_valid(reg))
4342 						return -EINVAL;
4343 				}
4344 			}
4345 		}
4346 		break;
4347 	case PACKET3_COND_WRITE:
4348 		if (idx_value & 0x100) {
4349 			reg = ib[idx + 5] * 4;
4350 			if (!si_vm_reg_valid(reg))
4351 				return -EINVAL;
4352 		}
4353 		break;
4354 	case PACKET3_COPY_DW:
4355 		if (idx_value & 0x2) {
4356 			reg = ib[idx + 3] * 4;
4357 			if (!si_vm_reg_valid(reg))
4358 				return -EINVAL;
4359 		}
4360 		break;
4361 	case PACKET3_SET_CONFIG_REG:
4362 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4363 		end_reg = 4 * pkt->count + start_reg - 4;
4364 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4365 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4366 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4367 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4368 			return -EINVAL;
4369 		}
4370 		for (i = 0; i < pkt->count; i++) {
4371 			reg = start_reg + (4 * i);
4372 			if (!si_vm_reg_valid(reg))
4373 				return -EINVAL;
4374 		}
4375 		break;
4376 	case PACKET3_CP_DMA:
4377 		r = si_vm_packet3_cp_dma_check(ib, idx);
4378 		if (r)
4379 			return r;
4380 		break;
4381 	default:
4382 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4383 		return -EINVAL;
4384 	}
4385 	return 0;
4386 }
4387 
4388 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4389 				       u32 *ib, struct radeon_cs_packet *pkt)
4390 {
4391 	int r;
4392 	u32 idx = pkt->idx + 1;
4393 	u32 idx_value = ib[idx];
4394 	u32 start_reg, reg, i;
4395 
4396 	switch (pkt->opcode) {
4397 	case PACKET3_NOP:
4398 	case PACKET3_SET_BASE:
4399 	case PACKET3_CLEAR_STATE:
4400 	case PACKET3_DISPATCH_DIRECT:
4401 	case PACKET3_DISPATCH_INDIRECT:
4402 	case PACKET3_ALLOC_GDS:
4403 	case PACKET3_WRITE_GDS_RAM:
4404 	case PACKET3_ATOMIC_GDS:
4405 	case PACKET3_ATOMIC:
4406 	case PACKET3_OCCLUSION_QUERY:
4407 	case PACKET3_SET_PREDICATION:
4408 	case PACKET3_COND_EXEC:
4409 	case PACKET3_PRED_EXEC:
4410 	case PACKET3_CONTEXT_CONTROL:
4411 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4412 	case PACKET3_WAIT_REG_MEM:
4413 	case PACKET3_MEM_WRITE:
4414 	case PACKET3_PFP_SYNC_ME:
4415 	case PACKET3_SURFACE_SYNC:
4416 	case PACKET3_EVENT_WRITE:
4417 	case PACKET3_EVENT_WRITE_EOP:
4418 	case PACKET3_EVENT_WRITE_EOS:
4419 	case PACKET3_SET_CONTEXT_REG:
4420 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4421 	case PACKET3_SET_SH_REG:
4422 	case PACKET3_SET_SH_REG_OFFSET:
4423 	case PACKET3_INCREMENT_DE_COUNTER:
4424 	case PACKET3_WAIT_ON_CE_COUNTER:
4425 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4426 	case PACKET3_ME_WRITE:
4427 		break;
4428 	case PACKET3_COPY_DATA:
4429 		if ((idx_value & 0xf00) == 0) {
4430 			reg = ib[idx + 3] * 4;
4431 			if (!si_vm_reg_valid(reg))
4432 				return -EINVAL;
4433 		}
4434 		break;
4435 	case PACKET3_WRITE_DATA:
4436 		if ((idx_value & 0xf00) == 0) {
4437 			start_reg = ib[idx + 1] * 4;
4438 			if (idx_value & 0x10000) {
4439 				if (!si_vm_reg_valid(start_reg))
4440 					return -EINVAL;
4441 			} else {
4442 				for (i = 0; i < (pkt->count - 2); i++) {
4443 					reg = start_reg + (4 * i);
4444 					if (!si_vm_reg_valid(reg))
4445 						return -EINVAL;
4446 				}
4447 			}
4448 		}
4449 		break;
4450 	case PACKET3_COND_WRITE:
4451 		if (idx_value & 0x100) {
4452 			reg = ib[idx + 5] * 4;
4453 			if (!si_vm_reg_valid(reg))
4454 				return -EINVAL;
4455 		}
4456 		break;
4457 	case PACKET3_COPY_DW:
4458 		if (idx_value & 0x2) {
4459 			reg = ib[idx + 3] * 4;
4460 			if (!si_vm_reg_valid(reg))
4461 				return -EINVAL;
4462 		}
4463 		break;
4464 	case PACKET3_CP_DMA:
4465 		r = si_vm_packet3_cp_dma_check(ib, idx);
4466 		if (r)
4467 			return r;
4468 		break;
4469 	default:
4470 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4471 		return -EINVAL;
4472 	}
4473 	return 0;
4474 }
4475 
4476 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4477 {
4478 	int ret = 0;
4479 	u32 idx = 0;
4480 	struct radeon_cs_packet pkt;
4481 
4482 	do {
4483 		pkt.idx = idx;
4484 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4485 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4486 		pkt.one_reg_wr = 0;
4487 		switch (pkt.type) {
4488 		case RADEON_PACKET_TYPE0:
4489 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4490 			ret = -EINVAL;
4491 			break;
4492 		case RADEON_PACKET_TYPE2:
4493 			idx += 1;
4494 			break;
4495 		case RADEON_PACKET_TYPE3:
4496 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4497 			if (ib->is_const_ib)
4498 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4499 			else {
4500 				switch (ib->ring) {
4501 				case RADEON_RING_TYPE_GFX_INDEX:
4502 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4503 					break;
4504 				case CAYMAN_RING_TYPE_CP1_INDEX:
4505 				case CAYMAN_RING_TYPE_CP2_INDEX:
4506 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4507 					break;
4508 				default:
4509 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4510 					ret = -EINVAL;
4511 					break;
4512 				}
4513 			}
4514 			idx += pkt.count + 2;
4515 			break;
4516 		default:
4517 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4518 			ret = -EINVAL;
4519 			break;
4520 		}
4521 		if (ret)
4522 			break;
4523 	} while (idx < ib->length_dw);
4524 
4525 	return ret;
4526 }
4527 
4528 /*
4529  * vm
4530  */
4531 int si_vm_init(struct radeon_device *rdev)
4532 {
4533 	/* number of VMs */
4534 	rdev->vm_manager.nvm = 16;
4535 	/* base offset of vram pages */
4536 	rdev->vm_manager.vram_base_offset = 0;
4537 
4538 	return 0;
4539 }
4540 
4541 void si_vm_fini(struct radeon_device *rdev)
4542 {
4543 }
4544 
4545 /**
4546  * si_vm_decode_fault - print human readable fault info
4547  *
4548  * @rdev: radeon_device pointer
4549  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4550  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4551  *
4552  * Print human readable fault information (SI).
4553  */
4554 static void si_vm_decode_fault(struct radeon_device *rdev,
4555 			       u32 status, u32 addr)
4556 {
4557 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4558 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4559 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4560 	char *block;
4561 
4562 	if (rdev->family == CHIP_TAHITI) {
4563 		switch (mc_id) {
4564 		case 160:
4565 		case 144:
4566 		case 96:
4567 		case 80:
4568 		case 224:
4569 		case 208:
4570 		case 32:
4571 		case 16:
4572 			block = "CB";
4573 			break;
4574 		case 161:
4575 		case 145:
4576 		case 97:
4577 		case 81:
4578 		case 225:
4579 		case 209:
4580 		case 33:
4581 		case 17:
4582 			block = "CB_FMASK";
4583 			break;
4584 		case 162:
4585 		case 146:
4586 		case 98:
4587 		case 82:
4588 		case 226:
4589 		case 210:
4590 		case 34:
4591 		case 18:
4592 			block = "CB_CMASK";
4593 			break;
4594 		case 163:
4595 		case 147:
4596 		case 99:
4597 		case 83:
4598 		case 227:
4599 		case 211:
4600 		case 35:
4601 		case 19:
4602 			block = "CB_IMMED";
4603 			break;
4604 		case 164:
4605 		case 148:
4606 		case 100:
4607 		case 84:
4608 		case 228:
4609 		case 212:
4610 		case 36:
4611 		case 20:
4612 			block = "DB";
4613 			break;
4614 		case 165:
4615 		case 149:
4616 		case 101:
4617 		case 85:
4618 		case 229:
4619 		case 213:
4620 		case 37:
4621 		case 21:
4622 			block = "DB_HTILE";
4623 			break;
4624 		case 167:
4625 		case 151:
4626 		case 103:
4627 		case 87:
4628 		case 231:
4629 		case 215:
4630 		case 39:
4631 		case 23:
4632 			block = "DB_STEN";
4633 			break;
4634 		case 72:
4635 		case 68:
4636 		case 64:
4637 		case 8:
4638 		case 4:
4639 		case 0:
4640 		case 136:
4641 		case 132:
4642 		case 128:
4643 		case 200:
4644 		case 196:
4645 		case 192:
4646 			block = "TC";
4647 			break;
4648 		case 112:
4649 		case 48:
4650 			block = "CP";
4651 			break;
4652 		case 49:
4653 		case 177:
4654 		case 50:
4655 		case 178:
4656 			block = "SH";
4657 			break;
4658 		case 53:
4659 		case 190:
4660 			block = "VGT";
4661 			break;
4662 		case 117:
4663 			block = "IH";
4664 			break;
4665 		case 51:
4666 		case 115:
4667 			block = "RLC";
4668 			break;
4669 		case 119:
4670 		case 183:
4671 			block = "DMA0";
4672 			break;
4673 		case 61:
4674 			block = "DMA1";
4675 			break;
4676 		case 248:
4677 		case 120:
4678 			block = "HDP";
4679 			break;
4680 		default:
4681 			block = "unknown";
4682 			break;
4683 		}
4684 	} else {
4685 		switch (mc_id) {
4686 		case 32:
4687 		case 16:
4688 		case 96:
4689 		case 80:
4690 		case 160:
4691 		case 144:
4692 		case 224:
4693 		case 208:
4694 			block = "CB";
4695 			break;
4696 		case 33:
4697 		case 17:
4698 		case 97:
4699 		case 81:
4700 		case 161:
4701 		case 145:
4702 		case 225:
4703 		case 209:
4704 			block = "CB_FMASK";
4705 			break;
4706 		case 34:
4707 		case 18:
4708 		case 98:
4709 		case 82:
4710 		case 162:
4711 		case 146:
4712 		case 226:
4713 		case 210:
4714 			block = "CB_CMASK";
4715 			break;
4716 		case 35:
4717 		case 19:
4718 		case 99:
4719 		case 83:
4720 		case 163:
4721 		case 147:
4722 		case 227:
4723 		case 211:
4724 			block = "CB_IMMED";
4725 			break;
4726 		case 36:
4727 		case 20:
4728 		case 100:
4729 		case 84:
4730 		case 164:
4731 		case 148:
4732 		case 228:
4733 		case 212:
4734 			block = "DB";
4735 			break;
4736 		case 37:
4737 		case 21:
4738 		case 101:
4739 		case 85:
4740 		case 165:
4741 		case 149:
4742 		case 229:
4743 		case 213:
4744 			block = "DB_HTILE";
4745 			break;
4746 		case 39:
4747 		case 23:
4748 		case 103:
4749 		case 87:
4750 		case 167:
4751 		case 151:
4752 		case 231:
4753 		case 215:
4754 			block = "DB_STEN";
4755 			break;
4756 		case 72:
4757 		case 68:
4758 		case 8:
4759 		case 4:
4760 		case 136:
4761 		case 132:
4762 		case 200:
4763 		case 196:
4764 			block = "TC";
4765 			break;
4766 		case 112:
4767 		case 48:
4768 			block = "CP";
4769 			break;
4770 		case 49:
4771 		case 177:
4772 		case 50:
4773 		case 178:
4774 			block = "SH";
4775 			break;
4776 		case 53:
4777 			block = "VGT";
4778 			break;
4779 		case 117:
4780 			block = "IH";
4781 			break;
4782 		case 51:
4783 		case 115:
4784 			block = "RLC";
4785 			break;
4786 		case 119:
4787 		case 183:
4788 			block = "DMA0";
4789 			break;
4790 		case 61:
4791 			block = "DMA1";
4792 			break;
4793 		case 248:
4794 		case 120:
4795 			block = "HDP";
4796 			break;
4797 		default:
4798 			block = "unknown";
4799 			break;
4800 		}
4801 	}
4802 
4803 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4804 	       protections, vmid, addr,
4805 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4806 	       block, mc_id);
4807 }
4808 
4809 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4810 {
4811 	struct radeon_ring *ring = &rdev->ring[ridx];
4812 
4813 	if (vm == NULL)
4814 		return;
4815 
4816 	/* write new base address */
4817 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4818 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4819 				 WRITE_DATA_DST_SEL(0)));
4820 
4821 	if (vm->id < 8) {
4822 		radeon_ring_write(ring,
4823 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4824 	} else {
4825 		radeon_ring_write(ring,
4826 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4827 	}
4828 	radeon_ring_write(ring, 0);
4829 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4830 
4831 	/* flush hdp cache */
4832 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4833 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4834 				 WRITE_DATA_DST_SEL(0)));
4835 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4836 	radeon_ring_write(ring, 0);
4837 	radeon_ring_write(ring, 0x1);
4838 
4839 	/* bits 0-15 are the VM contexts0-15 */
4840 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4841 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4842 				 WRITE_DATA_DST_SEL(0)));
4843 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4844 	radeon_ring_write(ring, 0);
4845 	radeon_ring_write(ring, 1 << vm->id);
4846 
4847 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4848 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4849 	radeon_ring_write(ring, 0x0);
4850 }
4851 
4852 /*
4853  *  Power and clock gating
4854  */
4855 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4856 {
4857 	int i;
4858 
4859 	for (i = 0; i < rdev->usec_timeout; i++) {
4860 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4861 			break;
4862 		udelay(1);
4863 	}
4864 
4865 	for (i = 0; i < rdev->usec_timeout; i++) {
4866 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4867 			break;
4868 		udelay(1);
4869 	}
4870 }
4871 
4872 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4873 					 bool enable)
4874 {
4875 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4876 	u32 mask;
4877 	int i;
4878 
4879 	if (enable)
4880 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4881 	else
4882 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4883 	WREG32(CP_INT_CNTL_RING0, tmp);
4884 
4885 	if (!enable) {
4886 		/* read a gfx register */
4887 		tmp = RREG32(DB_DEPTH_INFO);
4888 
4889 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4890 		for (i = 0; i < rdev->usec_timeout; i++) {
4891 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4892 				break;
4893 			udelay(1);
4894 		}
4895 	}
4896 }
4897 
4898 static void si_set_uvd_dcm(struct radeon_device *rdev,
4899 			   bool sw_mode)
4900 {
4901 	u32 tmp, tmp2;
4902 
4903 	tmp = RREG32(UVD_CGC_CTRL);
4904 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4905 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4906 
4907 	if (sw_mode) {
4908 		tmp &= ~0x7ffff800;
4909 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4910 	} else {
4911 		tmp |= 0x7ffff800;
4912 		tmp2 = 0;
4913 	}
4914 
4915 	WREG32(UVD_CGC_CTRL, tmp);
4916 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4917 }
4918 
4919 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4920 {
4921 	bool hw_mode = true;
4922 
4923 	if (hw_mode) {
4924 		si_set_uvd_dcm(rdev, false);
4925 	} else {
4926 		u32 tmp = RREG32(UVD_CGC_CTRL);
4927 		tmp &= ~DCM;
4928 		WREG32(UVD_CGC_CTRL, tmp);
4929 	}
4930 }
4931 
4932 static u32 si_halt_rlc(struct radeon_device *rdev)
4933 {
4934 	u32 data, orig;
4935 
4936 	orig = data = RREG32(RLC_CNTL);
4937 
4938 	if (data & RLC_ENABLE) {
4939 		data &= ~RLC_ENABLE;
4940 		WREG32(RLC_CNTL, data);
4941 
4942 		si_wait_for_rlc_serdes(rdev);
4943 	}
4944 
4945 	return orig;
4946 }
4947 
4948 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4949 {
4950 	u32 tmp;
4951 
4952 	tmp = RREG32(RLC_CNTL);
4953 	if (tmp != rlc)
4954 		WREG32(RLC_CNTL, rlc);
4955 }
4956 
4957 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4958 {
4959 	u32 data, orig;
4960 
4961 	orig = data = RREG32(DMA_PG);
4962 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4963 		data |= PG_CNTL_ENABLE;
4964 	else
4965 		data &= ~PG_CNTL_ENABLE;
4966 	if (orig != data)
4967 		WREG32(DMA_PG, data);
4968 }
4969 
4970 static void si_init_dma_pg(struct radeon_device *rdev)
4971 {
4972 	u32 tmp;
4973 
4974 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4975 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4976 
4977 	for (tmp = 0; tmp < 5; tmp++)
4978 		WREG32(DMA_PGFSM_WRITE, 0);
4979 }
4980 
4981 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4982 			       bool enable)
4983 {
4984 	u32 tmp;
4985 
4986 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4987 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4988 		WREG32(RLC_TTOP_D, tmp);
4989 
4990 		tmp = RREG32(RLC_PG_CNTL);
4991 		tmp |= GFX_PG_ENABLE;
4992 		WREG32(RLC_PG_CNTL, tmp);
4993 
4994 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4995 		tmp |= AUTO_PG_EN;
4996 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4997 	} else {
4998 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4999 		tmp &= ~AUTO_PG_EN;
5000 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5001 
5002 		tmp = RREG32(DB_RENDER_CONTROL);
5003 	}
5004 }
5005 
5006 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5007 {
5008 	u32 tmp;
5009 
5010 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5011 
5012 	tmp = RREG32(RLC_PG_CNTL);
5013 	tmp |= GFX_PG_SRC;
5014 	WREG32(RLC_PG_CNTL, tmp);
5015 
5016 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5017 
5018 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5019 
5020 	tmp &= ~GRBM_REG_SGIT_MASK;
5021 	tmp |= GRBM_REG_SGIT(0x700);
5022 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5023 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5024 }
5025 
5026 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5027 {
5028 	u32 mask = 0, tmp, tmp1;
5029 	int i;
5030 
5031 	si_select_se_sh(rdev, se, sh);
5032 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5033 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5034 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5035 
5036 	tmp &= 0xffff0000;
5037 
5038 	tmp |= tmp1;
5039 	tmp >>= 16;
5040 
5041 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5042 		mask <<= 1;
5043 		mask |= 1;
5044 	}
5045 
5046 	return (~tmp) & mask;
5047 }
5048 
5049 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5050 {
5051 	u32 i, j, k, active_cu_number = 0;
5052 	u32 mask, counter, cu_bitmap;
5053 	u32 tmp = 0;
5054 
5055 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5056 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5057 			mask = 1;
5058 			cu_bitmap = 0;
5059 			counter  = 0;
5060 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5061 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5062 					if (counter < 2)
5063 						cu_bitmap |= mask;
5064 					counter++;
5065 				}
5066 				mask <<= 1;
5067 			}
5068 
5069 			active_cu_number += counter;
5070 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5071 		}
5072 	}
5073 
5074 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5075 
5076 	tmp = RREG32(RLC_MAX_PG_CU);
5077 	tmp &= ~MAX_PU_CU_MASK;
5078 	tmp |= MAX_PU_CU(active_cu_number);
5079 	WREG32(RLC_MAX_PG_CU, tmp);
5080 }
5081 
5082 static void si_enable_cgcg(struct radeon_device *rdev,
5083 			   bool enable)
5084 {
5085 	u32 data, orig, tmp;
5086 
5087 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5088 
5089 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5090 		si_enable_gui_idle_interrupt(rdev, true);
5091 
5092 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5093 
5094 		tmp = si_halt_rlc(rdev);
5095 
5096 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5097 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5098 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5099 
5100 		si_wait_for_rlc_serdes(rdev);
5101 
5102 		si_update_rlc(rdev, tmp);
5103 
5104 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5105 
5106 		data |= CGCG_EN | CGLS_EN;
5107 	} else {
5108 		si_enable_gui_idle_interrupt(rdev, false);
5109 
5110 		RREG32(CB_CGTT_SCLK_CTRL);
5111 		RREG32(CB_CGTT_SCLK_CTRL);
5112 		RREG32(CB_CGTT_SCLK_CTRL);
5113 		RREG32(CB_CGTT_SCLK_CTRL);
5114 
5115 		data &= ~(CGCG_EN | CGLS_EN);
5116 	}
5117 
5118 	if (orig != data)
5119 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5120 }
5121 
5122 static void si_enable_mgcg(struct radeon_device *rdev,
5123 			   bool enable)
5124 {
5125 	u32 data, orig, tmp = 0;
5126 
5127 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5128 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5129 		data = 0x96940200;
5130 		if (orig != data)
5131 			WREG32(CGTS_SM_CTRL_REG, data);
5132 
5133 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5134 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5135 			data |= CP_MEM_LS_EN;
5136 			if (orig != data)
5137 				WREG32(CP_MEM_SLP_CNTL, data);
5138 		}
5139 
5140 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5141 		data &= 0xffffffc0;
5142 		if (orig != data)
5143 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5144 
5145 		tmp = si_halt_rlc(rdev);
5146 
5147 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5148 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5149 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5150 
5151 		si_update_rlc(rdev, tmp);
5152 	} else {
5153 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5154 		data |= 0x00000003;
5155 		if (orig != data)
5156 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5157 
5158 		data = RREG32(CP_MEM_SLP_CNTL);
5159 		if (data & CP_MEM_LS_EN) {
5160 			data &= ~CP_MEM_LS_EN;
5161 			WREG32(CP_MEM_SLP_CNTL, data);
5162 		}
5163 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5164 		data |= LS_OVERRIDE | OVERRIDE;
5165 		if (orig != data)
5166 			WREG32(CGTS_SM_CTRL_REG, data);
5167 
5168 		tmp = si_halt_rlc(rdev);
5169 
5170 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5171 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5172 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5173 
5174 		si_update_rlc(rdev, tmp);
5175 	}
5176 }
5177 
5178 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5179 			       bool enable)
5180 {
5181 	u32 orig, data, tmp;
5182 
5183 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5184 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5185 		tmp |= 0x3fff;
5186 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5187 
5188 		orig = data = RREG32(UVD_CGC_CTRL);
5189 		data |= DCM;
5190 		if (orig != data)
5191 			WREG32(UVD_CGC_CTRL, data);
5192 
5193 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5194 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5195 	} else {
5196 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5197 		tmp &= ~0x3fff;
5198 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5199 
5200 		orig = data = RREG32(UVD_CGC_CTRL);
5201 		data &= ~DCM;
5202 		if (orig != data)
5203 			WREG32(UVD_CGC_CTRL, data);
5204 
5205 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5206 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5207 	}
5208 }
5209 
5210 static const u32 mc_cg_registers[] =
5211 {
5212 	MC_HUB_MISC_HUB_CG,
5213 	MC_HUB_MISC_SIP_CG,
5214 	MC_HUB_MISC_VM_CG,
5215 	MC_XPB_CLK_GAT,
5216 	ATC_MISC_CG,
5217 	MC_CITF_MISC_WR_CG,
5218 	MC_CITF_MISC_RD_CG,
5219 	MC_CITF_MISC_VM_CG,
5220 	VM_L2_CG,
5221 };
5222 
5223 static void si_enable_mc_ls(struct radeon_device *rdev,
5224 			    bool enable)
5225 {
5226 	int i;
5227 	u32 orig, data;
5228 
5229 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5230 		orig = data = RREG32(mc_cg_registers[i]);
5231 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5232 			data |= MC_LS_ENABLE;
5233 		else
5234 			data &= ~MC_LS_ENABLE;
5235 		if (data != orig)
5236 			WREG32(mc_cg_registers[i], data);
5237 	}
5238 }
5239 
5240 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5241 			       bool enable)
5242 {
5243 	int i;
5244 	u32 orig, data;
5245 
5246 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5247 		orig = data = RREG32(mc_cg_registers[i]);
5248 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5249 			data |= MC_CG_ENABLE;
5250 		else
5251 			data &= ~MC_CG_ENABLE;
5252 		if (data != orig)
5253 			WREG32(mc_cg_registers[i], data);
5254 	}
5255 }
5256 
5257 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5258 			       bool enable)
5259 {
5260 	u32 orig, data, offset;
5261 	int i;
5262 
5263 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5264 		for (i = 0; i < 2; i++) {
5265 			if (i == 0)
5266 				offset = DMA0_REGISTER_OFFSET;
5267 			else
5268 				offset = DMA1_REGISTER_OFFSET;
5269 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5270 			data &= ~MEM_POWER_OVERRIDE;
5271 			if (data != orig)
5272 				WREG32(DMA_POWER_CNTL + offset, data);
5273 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5274 		}
5275 	} else {
5276 		for (i = 0; i < 2; i++) {
5277 			if (i == 0)
5278 				offset = DMA0_REGISTER_OFFSET;
5279 			else
5280 				offset = DMA1_REGISTER_OFFSET;
5281 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5282 			data |= MEM_POWER_OVERRIDE;
5283 			if (data != orig)
5284 				WREG32(DMA_POWER_CNTL + offset, data);
5285 
5286 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5287 			data = 0xff000000;
5288 			if (data != orig)
5289 				WREG32(DMA_CLK_CTRL + offset, data);
5290 		}
5291 	}
5292 }
5293 
5294 static void si_enable_bif_mgls(struct radeon_device *rdev,
5295 			       bool enable)
5296 {
5297 	u32 orig, data;
5298 
5299 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5300 
5301 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5302 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5303 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5304 	else
5305 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5306 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5307 
5308 	if (orig != data)
5309 		WREG32_PCIE(PCIE_CNTL2, data);
5310 }
5311 
5312 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5313 			       bool enable)
5314 {
5315 	u32 orig, data;
5316 
5317 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5318 
5319 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5320 		data &= ~CLOCK_GATING_DIS;
5321 	else
5322 		data |= CLOCK_GATING_DIS;
5323 
5324 	if (orig != data)
5325 		WREG32(HDP_HOST_PATH_CNTL, data);
5326 }
5327 
5328 static void si_enable_hdp_ls(struct radeon_device *rdev,
5329 			     bool enable)
5330 {
5331 	u32 orig, data;
5332 
5333 	orig = data = RREG32(HDP_MEM_POWER_LS);
5334 
5335 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5336 		data |= HDP_LS_ENABLE;
5337 	else
5338 		data &= ~HDP_LS_ENABLE;
5339 
5340 	if (orig != data)
5341 		WREG32(HDP_MEM_POWER_LS, data);
5342 }
5343 
5344 static void si_update_cg(struct radeon_device *rdev,
5345 			 u32 block, bool enable)
5346 {
5347 	if (block & RADEON_CG_BLOCK_GFX) {
5348 		si_enable_gui_idle_interrupt(rdev, false);
5349 		/* order matters! */
5350 		if (enable) {
5351 			si_enable_mgcg(rdev, true);
5352 			si_enable_cgcg(rdev, true);
5353 		} else {
5354 			si_enable_cgcg(rdev, false);
5355 			si_enable_mgcg(rdev, false);
5356 		}
5357 		si_enable_gui_idle_interrupt(rdev, true);
5358 	}
5359 
5360 	if (block & RADEON_CG_BLOCK_MC) {
5361 		si_enable_mc_mgcg(rdev, enable);
5362 		si_enable_mc_ls(rdev, enable);
5363 	}
5364 
5365 	if (block & RADEON_CG_BLOCK_SDMA) {
5366 		si_enable_dma_mgcg(rdev, enable);
5367 	}
5368 
5369 	if (block & RADEON_CG_BLOCK_BIF) {
5370 		si_enable_bif_mgls(rdev, enable);
5371 	}
5372 
5373 	if (block & RADEON_CG_BLOCK_UVD) {
5374 		if (rdev->has_uvd) {
5375 			si_enable_uvd_mgcg(rdev, enable);
5376 		}
5377 	}
5378 
5379 	if (block & RADEON_CG_BLOCK_HDP) {
5380 		si_enable_hdp_mgcg(rdev, enable);
5381 		si_enable_hdp_ls(rdev, enable);
5382 	}
5383 }
5384 
5385 static void si_init_cg(struct radeon_device *rdev)
5386 {
5387 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5388 			    RADEON_CG_BLOCK_MC |
5389 			    RADEON_CG_BLOCK_SDMA |
5390 			    RADEON_CG_BLOCK_BIF |
5391 			    RADEON_CG_BLOCK_HDP), true);
5392 	if (rdev->has_uvd) {
5393 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5394 		si_init_uvd_internal_cg(rdev);
5395 	}
5396 }
5397 
5398 static void si_fini_cg(struct radeon_device *rdev)
5399 {
5400 	if (rdev->has_uvd) {
5401 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5402 	}
5403 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5404 			    RADEON_CG_BLOCK_MC |
5405 			    RADEON_CG_BLOCK_SDMA |
5406 			    RADEON_CG_BLOCK_BIF |
5407 			    RADEON_CG_BLOCK_HDP), false);
5408 }
5409 
5410 u32 si_get_csb_size(struct radeon_device *rdev)
5411 {
5412 	u32 count = 0;
5413 	const struct cs_section_def *sect = NULL;
5414 	const struct cs_extent_def *ext = NULL;
5415 
5416 	if (rdev->rlc.cs_data == NULL)
5417 		return 0;
5418 
5419 	/* begin clear state */
5420 	count += 2;
5421 	/* context control state */
5422 	count += 3;
5423 
5424 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5425 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5426 			if (sect->id == SECT_CONTEXT)
5427 				count += 2 + ext->reg_count;
5428 			else
5429 				return 0;
5430 		}
5431 	}
5432 	/* pa_sc_raster_config */
5433 	count += 3;
5434 	/* end clear state */
5435 	count += 2;
5436 	/* clear state */
5437 	count += 2;
5438 
5439 	return count;
5440 }
5441 
5442 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5443 {
5444 	u32 count = 0, i;
5445 	const struct cs_section_def *sect = NULL;
5446 	const struct cs_extent_def *ext = NULL;
5447 
5448 	if (rdev->rlc.cs_data == NULL)
5449 		return;
5450 	if (buffer == NULL)
5451 		return;
5452 
5453 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5454 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5455 
5456 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5457 	buffer[count++] = cpu_to_le32(0x80000000);
5458 	buffer[count++] = cpu_to_le32(0x80000000);
5459 
5460 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5461 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5462 			if (sect->id == SECT_CONTEXT) {
5463 				buffer[count++] =
5464 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5465 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5466 				for (i = 0; i < ext->reg_count; i++)
5467 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5468 			} else {
5469 				return;
5470 			}
5471 		}
5472 	}
5473 
5474 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5475 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5476 	switch (rdev->family) {
5477 	case CHIP_TAHITI:
5478 	case CHIP_PITCAIRN:
5479 		buffer[count++] = cpu_to_le32(0x2a00126a);
5480 		break;
5481 	case CHIP_VERDE:
5482 		buffer[count++] = cpu_to_le32(0x0000124a);
5483 		break;
5484 	case CHIP_OLAND:
5485 		buffer[count++] = cpu_to_le32(0x00000082);
5486 		break;
5487 	case CHIP_HAINAN:
5488 		buffer[count++] = cpu_to_le32(0x00000000);
5489 		break;
5490 	default:
5491 		buffer[count++] = cpu_to_le32(0x00000000);
5492 		break;
5493 	}
5494 
5495 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5496 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5497 
5498 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5499 	buffer[count++] = cpu_to_le32(0);
5500 }
5501 
5502 static void si_init_pg(struct radeon_device *rdev)
5503 {
5504 	if (rdev->pg_flags) {
5505 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5506 			si_init_dma_pg(rdev);
5507 		}
5508 		si_init_ao_cu_mask(rdev);
5509 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5510 			si_init_gfx_cgpg(rdev);
5511 		} else {
5512 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5513 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5514 		}
5515 		si_enable_dma_pg(rdev, true);
5516 		si_enable_gfx_cgpg(rdev, true);
5517 	} else {
5518 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5519 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5520 	}
5521 }
5522 
5523 static void si_fini_pg(struct radeon_device *rdev)
5524 {
5525 	if (rdev->pg_flags) {
5526 		si_enable_dma_pg(rdev, false);
5527 		si_enable_gfx_cgpg(rdev, false);
5528 	}
5529 }
5530 
5531 /*
5532  * RLC
5533  */
5534 void si_rlc_reset(struct radeon_device *rdev)
5535 {
5536 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5537 
5538 	tmp |= SOFT_RESET_RLC;
5539 	WREG32(GRBM_SOFT_RESET, tmp);
5540 	udelay(50);
5541 	tmp &= ~SOFT_RESET_RLC;
5542 	WREG32(GRBM_SOFT_RESET, tmp);
5543 	udelay(50);
5544 }
5545 
5546 static void si_rlc_stop(struct radeon_device *rdev)
5547 {
5548 	WREG32(RLC_CNTL, 0);
5549 
5550 	si_enable_gui_idle_interrupt(rdev, false);
5551 
5552 	si_wait_for_rlc_serdes(rdev);
5553 }
5554 
5555 static void si_rlc_start(struct radeon_device *rdev)
5556 {
5557 	WREG32(RLC_CNTL, RLC_ENABLE);
5558 
5559 	si_enable_gui_idle_interrupt(rdev, true);
5560 
5561 	udelay(50);
5562 }
5563 
5564 static bool si_lbpw_supported(struct radeon_device *rdev)
5565 {
5566 	u32 tmp;
5567 
5568 	/* Enable LBPW only for DDR3 */
5569 	tmp = RREG32(MC_SEQ_MISC0);
5570 	if ((tmp & 0xF0000000) == 0xB0000000)
5571 		return true;
5572 	return false;
5573 }
5574 
5575 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5576 {
5577 	u32 tmp;
5578 
5579 	tmp = RREG32(RLC_LB_CNTL);
5580 	if (enable)
5581 		tmp |= LOAD_BALANCE_ENABLE;
5582 	else
5583 		tmp &= ~LOAD_BALANCE_ENABLE;
5584 	WREG32(RLC_LB_CNTL, tmp);
5585 
5586 	if (!enable) {
5587 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5588 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5589 	}
5590 }
5591 
5592 static int si_rlc_resume(struct radeon_device *rdev)
5593 {
5594 	u32 i;
5595 	const __be32 *fw_data;
5596 
5597 	if (!rdev->rlc_fw)
5598 		return -EINVAL;
5599 
5600 	si_rlc_stop(rdev);
5601 
5602 	si_rlc_reset(rdev);
5603 
5604 	si_init_pg(rdev);
5605 
5606 	si_init_cg(rdev);
5607 
5608 	WREG32(RLC_RL_BASE, 0);
5609 	WREG32(RLC_RL_SIZE, 0);
5610 	WREG32(RLC_LB_CNTL, 0);
5611 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5612 	WREG32(RLC_LB_CNTR_INIT, 0);
5613 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5614 
5615 	WREG32(RLC_MC_CNTL, 0);
5616 	WREG32(RLC_UCODE_CNTL, 0);
5617 
5618 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5619 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5620 		WREG32(RLC_UCODE_ADDR, i);
5621 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5622 	}
5623 	WREG32(RLC_UCODE_ADDR, 0);
5624 
5625 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5626 
5627 	si_rlc_start(rdev);
5628 
5629 	return 0;
5630 }
5631 
5632 static void si_enable_interrupts(struct radeon_device *rdev)
5633 {
5634 	u32 ih_cntl = RREG32(IH_CNTL);
5635 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5636 
5637 	ih_cntl |= ENABLE_INTR;
5638 	ih_rb_cntl |= IH_RB_ENABLE;
5639 	WREG32(IH_CNTL, ih_cntl);
5640 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5641 	rdev->ih.enabled = true;
5642 }
5643 
5644 static void si_disable_interrupts(struct radeon_device *rdev)
5645 {
5646 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5647 	u32 ih_cntl = RREG32(IH_CNTL);
5648 
5649 	ih_rb_cntl &= ~IH_RB_ENABLE;
5650 	ih_cntl &= ~ENABLE_INTR;
5651 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5652 	WREG32(IH_CNTL, ih_cntl);
5653 	/* set rptr, wptr to 0 */
5654 	WREG32(IH_RB_RPTR, 0);
5655 	WREG32(IH_RB_WPTR, 0);
5656 	rdev->ih.enabled = false;
5657 	rdev->ih.rptr = 0;
5658 }
5659 
5660 static void si_disable_interrupt_state(struct radeon_device *rdev)
5661 {
5662 	u32 tmp;
5663 
5664 	tmp = RREG32(CP_INT_CNTL_RING0) &
5665 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5666 	WREG32(CP_INT_CNTL_RING0, tmp);
5667 	WREG32(CP_INT_CNTL_RING1, 0);
5668 	WREG32(CP_INT_CNTL_RING2, 0);
5669 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5670 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5671 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5672 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5673 	WREG32(GRBM_INT_CNTL, 0);
5674 	if (rdev->num_crtc >= 2) {
5675 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5676 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5677 	}
5678 	if (rdev->num_crtc >= 4) {
5679 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5680 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5681 	}
5682 	if (rdev->num_crtc >= 6) {
5683 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5684 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5685 	}
5686 
5687 	if (rdev->num_crtc >= 2) {
5688 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5689 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5690 	}
5691 	if (rdev->num_crtc >= 4) {
5692 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5693 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5694 	}
5695 	if (rdev->num_crtc >= 6) {
5696 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5697 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5698 	}
5699 
5700 	if (!ASIC_IS_NODCE(rdev)) {
5701 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5702 
5703 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5704 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5705 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5706 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5707 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5708 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5709 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5710 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5711 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5712 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5713 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5714 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5715 	}
5716 }
5717 
5718 static int si_irq_init(struct radeon_device *rdev)
5719 {
5720 	int ret = 0;
5721 	int rb_bufsz;
5722 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5723 
5724 	/* allocate ring */
5725 	ret = r600_ih_ring_alloc(rdev);
5726 	if (ret)
5727 		return ret;
5728 
5729 	/* disable irqs */
5730 	si_disable_interrupts(rdev);
5731 
5732 	/* init rlc */
5733 	ret = si_rlc_resume(rdev);
5734 	if (ret) {
5735 		r600_ih_ring_fini(rdev);
5736 		return ret;
5737 	}
5738 
5739 	/* setup interrupt control */
5740 	/* set dummy read address to ring address */
5741 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5742 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5743 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5744 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5745 	 */
5746 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5747 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5748 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5749 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5750 
5751 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5752 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5753 
5754 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5755 		      IH_WPTR_OVERFLOW_CLEAR |
5756 		      (rb_bufsz << 1));
5757 
5758 	if (rdev->wb.enabled)
5759 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5760 
5761 	/* set the writeback address whether it's enabled or not */
5762 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5763 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5764 
5765 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5766 
5767 	/* set rptr, wptr to 0 */
5768 	WREG32(IH_RB_RPTR, 0);
5769 	WREG32(IH_RB_WPTR, 0);
5770 
5771 	/* Default settings for IH_CNTL (disabled at first) */
5772 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5773 	/* RPTR_REARM only works if msi's are enabled */
5774 	if (rdev->msi_enabled)
5775 		ih_cntl |= RPTR_REARM;
5776 	WREG32(IH_CNTL, ih_cntl);
5777 
5778 	/* force the active interrupt state to all disabled */
5779 	si_disable_interrupt_state(rdev);
5780 
5781 	pci_set_master(rdev->pdev);
5782 
5783 	/* enable irqs */
5784 	si_enable_interrupts(rdev);
5785 
5786 	return ret;
5787 }
5788 
5789 int si_irq_set(struct radeon_device *rdev)
5790 {
5791 	u32 cp_int_cntl;
5792 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5793 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5794 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5795 	u32 grbm_int_cntl = 0;
5796 	u32 dma_cntl, dma_cntl1;
5797 	u32 thermal_int = 0;
5798 
5799 	if (!rdev->irq.installed) {
5800 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5801 		return -EINVAL;
5802 	}
5803 	/* don't enable anything if the ih is disabled */
5804 	if (!rdev->ih.enabled) {
5805 		si_disable_interrupts(rdev);
5806 		/* force the active interrupt state to all disabled */
5807 		si_disable_interrupt_state(rdev);
5808 		return 0;
5809 	}
5810 
5811 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5812 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5813 
5814 	if (!ASIC_IS_NODCE(rdev)) {
5815 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5816 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5817 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5818 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5819 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5820 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5821 	}
5822 
5823 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5824 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5825 
5826 	thermal_int = RREG32(CG_THERMAL_INT) &
5827 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5828 
5829 	/* enable CP interrupts on all rings */
5830 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5831 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5832 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5833 	}
5834 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5835 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5836 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5837 	}
5838 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5839 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5840 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5841 	}
5842 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5843 		DRM_DEBUG("si_irq_set: sw int dma\n");
5844 		dma_cntl |= TRAP_ENABLE;
5845 	}
5846 
5847 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5848 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5849 		dma_cntl1 |= TRAP_ENABLE;
5850 	}
5851 	if (rdev->irq.crtc_vblank_int[0] ||
5852 	    atomic_read(&rdev->irq.pflip[0])) {
5853 		DRM_DEBUG("si_irq_set: vblank 0\n");
5854 		crtc1 |= VBLANK_INT_MASK;
5855 	}
5856 	if (rdev->irq.crtc_vblank_int[1] ||
5857 	    atomic_read(&rdev->irq.pflip[1])) {
5858 		DRM_DEBUG("si_irq_set: vblank 1\n");
5859 		crtc2 |= VBLANK_INT_MASK;
5860 	}
5861 	if (rdev->irq.crtc_vblank_int[2] ||
5862 	    atomic_read(&rdev->irq.pflip[2])) {
5863 		DRM_DEBUG("si_irq_set: vblank 2\n");
5864 		crtc3 |= VBLANK_INT_MASK;
5865 	}
5866 	if (rdev->irq.crtc_vblank_int[3] ||
5867 	    atomic_read(&rdev->irq.pflip[3])) {
5868 		DRM_DEBUG("si_irq_set: vblank 3\n");
5869 		crtc4 |= VBLANK_INT_MASK;
5870 	}
5871 	if (rdev->irq.crtc_vblank_int[4] ||
5872 	    atomic_read(&rdev->irq.pflip[4])) {
5873 		DRM_DEBUG("si_irq_set: vblank 4\n");
5874 		crtc5 |= VBLANK_INT_MASK;
5875 	}
5876 	if (rdev->irq.crtc_vblank_int[5] ||
5877 	    atomic_read(&rdev->irq.pflip[5])) {
5878 		DRM_DEBUG("si_irq_set: vblank 5\n");
5879 		crtc6 |= VBLANK_INT_MASK;
5880 	}
5881 	if (rdev->irq.hpd[0]) {
5882 		DRM_DEBUG("si_irq_set: hpd 1\n");
5883 		hpd1 |= DC_HPDx_INT_EN;
5884 	}
5885 	if (rdev->irq.hpd[1]) {
5886 		DRM_DEBUG("si_irq_set: hpd 2\n");
5887 		hpd2 |= DC_HPDx_INT_EN;
5888 	}
5889 	if (rdev->irq.hpd[2]) {
5890 		DRM_DEBUG("si_irq_set: hpd 3\n");
5891 		hpd3 |= DC_HPDx_INT_EN;
5892 	}
5893 	if (rdev->irq.hpd[3]) {
5894 		DRM_DEBUG("si_irq_set: hpd 4\n");
5895 		hpd4 |= DC_HPDx_INT_EN;
5896 	}
5897 	if (rdev->irq.hpd[4]) {
5898 		DRM_DEBUG("si_irq_set: hpd 5\n");
5899 		hpd5 |= DC_HPDx_INT_EN;
5900 	}
5901 	if (rdev->irq.hpd[5]) {
5902 		DRM_DEBUG("si_irq_set: hpd 6\n");
5903 		hpd6 |= DC_HPDx_INT_EN;
5904 	}
5905 
5906 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5907 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5908 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5909 
5910 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5911 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5912 
5913 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5914 
5915 	if (rdev->irq.dpm_thermal) {
5916 		DRM_DEBUG("dpm thermal\n");
5917 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5918 	}
5919 
5920 	if (rdev->num_crtc >= 2) {
5921 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5922 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5923 	}
5924 	if (rdev->num_crtc >= 4) {
5925 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5926 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5927 	}
5928 	if (rdev->num_crtc >= 6) {
5929 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5930 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5931 	}
5932 
5933 	if (rdev->num_crtc >= 2) {
5934 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
5935 		       GRPH_PFLIP_INT_MASK);
5936 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
5937 		       GRPH_PFLIP_INT_MASK);
5938 	}
5939 	if (rdev->num_crtc >= 4) {
5940 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
5941 		       GRPH_PFLIP_INT_MASK);
5942 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
5943 		       GRPH_PFLIP_INT_MASK);
5944 	}
5945 	if (rdev->num_crtc >= 6) {
5946 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
5947 		       GRPH_PFLIP_INT_MASK);
5948 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
5949 		       GRPH_PFLIP_INT_MASK);
5950 	}
5951 
5952 	if (!ASIC_IS_NODCE(rdev)) {
5953 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5954 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5955 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5956 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5957 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5958 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5959 	}
5960 
5961 	WREG32(CG_THERMAL_INT, thermal_int);
5962 
5963 	return 0;
5964 }
5965 
5966 static inline void si_irq_ack(struct radeon_device *rdev)
5967 {
5968 	u32 tmp;
5969 
5970 	if (ASIC_IS_NODCE(rdev))
5971 		return;
5972 
5973 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5974 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5975 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5976 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5977 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5978 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5979 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5980 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5981 	if (rdev->num_crtc >= 4) {
5982 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5983 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5984 	}
5985 	if (rdev->num_crtc >= 6) {
5986 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5987 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5988 	}
5989 
5990 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5991 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5992 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5993 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5994 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5995 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5996 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5997 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5998 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5999 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6000 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6001 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6002 
6003 	if (rdev->num_crtc >= 4) {
6004 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6005 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6006 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6007 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6008 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6009 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6010 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6011 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6012 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6013 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6014 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6015 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6016 	}
6017 
6018 	if (rdev->num_crtc >= 6) {
6019 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6020 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6021 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6022 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6023 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6024 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6025 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6026 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6027 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6028 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6029 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6030 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6031 	}
6032 
6033 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6034 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6035 		tmp |= DC_HPDx_INT_ACK;
6036 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6037 	}
6038 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6039 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6040 		tmp |= DC_HPDx_INT_ACK;
6041 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6042 	}
6043 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6044 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6045 		tmp |= DC_HPDx_INT_ACK;
6046 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6047 	}
6048 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6049 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6050 		tmp |= DC_HPDx_INT_ACK;
6051 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6052 	}
6053 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6054 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6055 		tmp |= DC_HPDx_INT_ACK;
6056 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6057 	}
6058 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6059 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6060 		tmp |= DC_HPDx_INT_ACK;
6061 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6062 	}
6063 }
6064 
6065 static void si_irq_disable(struct radeon_device *rdev)
6066 {
6067 	si_disable_interrupts(rdev);
6068 	/* Wait and acknowledge irq */
6069 	mdelay(1);
6070 	si_irq_ack(rdev);
6071 	si_disable_interrupt_state(rdev);
6072 }
6073 
6074 static void si_irq_suspend(struct radeon_device *rdev)
6075 {
6076 	si_irq_disable(rdev);
6077 	si_rlc_stop(rdev);
6078 }
6079 
6080 static void si_irq_fini(struct radeon_device *rdev)
6081 {
6082 	si_irq_suspend(rdev);
6083 	r600_ih_ring_fini(rdev);
6084 }
6085 
6086 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6087 {
6088 	u32 wptr, tmp;
6089 
6090 	if (rdev->wb.enabled)
6091 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6092 	else
6093 		wptr = RREG32(IH_RB_WPTR);
6094 
6095 	if (wptr & RB_OVERFLOW) {
6096 		/* When a ring buffer overflow happen start parsing interrupt
6097 		 * from the last not overwritten vector (wptr + 16). Hopefully
6098 		 * this should allow us to catchup.
6099 		 */
6100 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6101 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6102 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6103 		tmp = RREG32(IH_RB_CNTL);
6104 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6105 		WREG32(IH_RB_CNTL, tmp);
6106 		wptr &= ~RB_OVERFLOW;
6107 	}
6108 	return (wptr & rdev->ih.ptr_mask);
6109 }
6110 
6111 /*        SI IV Ring
6112  * Each IV ring entry is 128 bits:
6113  * [7:0]    - interrupt source id
6114  * [31:8]   - reserved
6115  * [59:32]  - interrupt source data
6116  * [63:60]  - reserved
6117  * [71:64]  - RINGID
6118  * [79:72]  - VMID
6119  * [127:80] - reserved
6120  */
6121 int si_irq_process(struct radeon_device *rdev)
6122 {
6123 	u32 wptr;
6124 	u32 rptr;
6125 	u32 src_id, src_data, ring_id;
6126 	u32 ring_index;
6127 	bool queue_hotplug = false;
6128 	bool queue_thermal = false;
6129 	u32 status, addr;
6130 
6131 	if (!rdev->ih.enabled || rdev->shutdown)
6132 		return IRQ_NONE;
6133 
6134 	wptr = si_get_ih_wptr(rdev);
6135 
6136 restart_ih:
6137 	/* is somebody else already processing irqs? */
6138 	if (atomic_xchg(&rdev->ih.lock, 1))
6139 		return IRQ_NONE;
6140 
6141 	rptr = rdev->ih.rptr;
6142 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6143 
6144 	/* Order reading of wptr vs. reading of IH ring data */
6145 	rmb();
6146 
6147 	/* display interrupts */
6148 	si_irq_ack(rdev);
6149 
6150 	while (rptr != wptr) {
6151 		/* wptr/rptr are in bytes! */
6152 		ring_index = rptr / 4;
6153 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6154 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6155 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6156 
6157 		switch (src_id) {
6158 		case 1: /* D1 vblank/vline */
6159 			switch (src_data) {
6160 			case 0: /* D1 vblank */
6161 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6162 					if (rdev->irq.crtc_vblank_int[0]) {
6163 						drm_handle_vblank(rdev->ddev, 0);
6164 						rdev->pm.vblank_sync = true;
6165 						wake_up(&rdev->irq.vblank_queue);
6166 					}
6167 					if (atomic_read(&rdev->irq.pflip[0]))
6168 						radeon_crtc_handle_vblank(rdev, 0);
6169 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6170 					DRM_DEBUG("IH: D1 vblank\n");
6171 				}
6172 				break;
6173 			case 1: /* D1 vline */
6174 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6175 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6176 					DRM_DEBUG("IH: D1 vline\n");
6177 				}
6178 				break;
6179 			default:
6180 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6181 				break;
6182 			}
6183 			break;
6184 		case 2: /* D2 vblank/vline */
6185 			switch (src_data) {
6186 			case 0: /* D2 vblank */
6187 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6188 					if (rdev->irq.crtc_vblank_int[1]) {
6189 						drm_handle_vblank(rdev->ddev, 1);
6190 						rdev->pm.vblank_sync = true;
6191 						wake_up(&rdev->irq.vblank_queue);
6192 					}
6193 					if (atomic_read(&rdev->irq.pflip[1]))
6194 						radeon_crtc_handle_vblank(rdev, 1);
6195 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6196 					DRM_DEBUG("IH: D2 vblank\n");
6197 				}
6198 				break;
6199 			case 1: /* D2 vline */
6200 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6201 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6202 					DRM_DEBUG("IH: D2 vline\n");
6203 				}
6204 				break;
6205 			default:
6206 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6207 				break;
6208 			}
6209 			break;
6210 		case 3: /* D3 vblank/vline */
6211 			switch (src_data) {
6212 			case 0: /* D3 vblank */
6213 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6214 					if (rdev->irq.crtc_vblank_int[2]) {
6215 						drm_handle_vblank(rdev->ddev, 2);
6216 						rdev->pm.vblank_sync = true;
6217 						wake_up(&rdev->irq.vblank_queue);
6218 					}
6219 					if (atomic_read(&rdev->irq.pflip[2]))
6220 						radeon_crtc_handle_vblank(rdev, 2);
6221 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6222 					DRM_DEBUG("IH: D3 vblank\n");
6223 				}
6224 				break;
6225 			case 1: /* D3 vline */
6226 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6227 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6228 					DRM_DEBUG("IH: D3 vline\n");
6229 				}
6230 				break;
6231 			default:
6232 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6233 				break;
6234 			}
6235 			break;
6236 		case 4: /* D4 vblank/vline */
6237 			switch (src_data) {
6238 			case 0: /* D4 vblank */
6239 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6240 					if (rdev->irq.crtc_vblank_int[3]) {
6241 						drm_handle_vblank(rdev->ddev, 3);
6242 						rdev->pm.vblank_sync = true;
6243 						wake_up(&rdev->irq.vblank_queue);
6244 					}
6245 					if (atomic_read(&rdev->irq.pflip[3]))
6246 						radeon_crtc_handle_vblank(rdev, 3);
6247 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6248 					DRM_DEBUG("IH: D4 vblank\n");
6249 				}
6250 				break;
6251 			case 1: /* D4 vline */
6252 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6253 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6254 					DRM_DEBUG("IH: D4 vline\n");
6255 				}
6256 				break;
6257 			default:
6258 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6259 				break;
6260 			}
6261 			break;
6262 		case 5: /* D5 vblank/vline */
6263 			switch (src_data) {
6264 			case 0: /* D5 vblank */
6265 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6266 					if (rdev->irq.crtc_vblank_int[4]) {
6267 						drm_handle_vblank(rdev->ddev, 4);
6268 						rdev->pm.vblank_sync = true;
6269 						wake_up(&rdev->irq.vblank_queue);
6270 					}
6271 					if (atomic_read(&rdev->irq.pflip[4]))
6272 						radeon_crtc_handle_vblank(rdev, 4);
6273 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6274 					DRM_DEBUG("IH: D5 vblank\n");
6275 				}
6276 				break;
6277 			case 1: /* D5 vline */
6278 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6279 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6280 					DRM_DEBUG("IH: D5 vline\n");
6281 				}
6282 				break;
6283 			default:
6284 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6285 				break;
6286 			}
6287 			break;
6288 		case 6: /* D6 vblank/vline */
6289 			switch (src_data) {
6290 			case 0: /* D6 vblank */
6291 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6292 					if (rdev->irq.crtc_vblank_int[5]) {
6293 						drm_handle_vblank(rdev->ddev, 5);
6294 						rdev->pm.vblank_sync = true;
6295 						wake_up(&rdev->irq.vblank_queue);
6296 					}
6297 					if (atomic_read(&rdev->irq.pflip[5]))
6298 						radeon_crtc_handle_vblank(rdev, 5);
6299 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6300 					DRM_DEBUG("IH: D6 vblank\n");
6301 				}
6302 				break;
6303 			case 1: /* D6 vline */
6304 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6305 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6306 					DRM_DEBUG("IH: D6 vline\n");
6307 				}
6308 				break;
6309 			default:
6310 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6311 				break;
6312 			}
6313 			break;
6314 		case 8: /* D1 page flip */
6315 		case 10: /* D2 page flip */
6316 		case 12: /* D3 page flip */
6317 		case 14: /* D4 page flip */
6318 		case 16: /* D5 page flip */
6319 		case 18: /* D6 page flip */
6320 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6321 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6322 			break;
6323 		case 42: /* HPD hotplug */
6324 			switch (src_data) {
6325 			case 0:
6326 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6327 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6328 					queue_hotplug = true;
6329 					DRM_DEBUG("IH: HPD1\n");
6330 				}
6331 				break;
6332 			case 1:
6333 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6334 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6335 					queue_hotplug = true;
6336 					DRM_DEBUG("IH: HPD2\n");
6337 				}
6338 				break;
6339 			case 2:
6340 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6341 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6342 					queue_hotplug = true;
6343 					DRM_DEBUG("IH: HPD3\n");
6344 				}
6345 				break;
6346 			case 3:
6347 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6348 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6349 					queue_hotplug = true;
6350 					DRM_DEBUG("IH: HPD4\n");
6351 				}
6352 				break;
6353 			case 4:
6354 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6355 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6356 					queue_hotplug = true;
6357 					DRM_DEBUG("IH: HPD5\n");
6358 				}
6359 				break;
6360 			case 5:
6361 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6362 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6363 					queue_hotplug = true;
6364 					DRM_DEBUG("IH: HPD6\n");
6365 				}
6366 				break;
6367 			default:
6368 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6369 				break;
6370 			}
6371 			break;
6372 		case 124: /* UVD */
6373 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6374 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6375 			break;
6376 		case 146:
6377 		case 147:
6378 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6379 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6380 			/* reset addr and status */
6381 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6382 			if (addr == 0x0 && status == 0x0)
6383 				break;
6384 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6385 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6386 				addr);
6387 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6388 				status);
6389 			si_vm_decode_fault(rdev, status, addr);
6390 			break;
6391 		case 176: /* RINGID0 CP_INT */
6392 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6393 			break;
6394 		case 177: /* RINGID1 CP_INT */
6395 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6396 			break;
6397 		case 178: /* RINGID2 CP_INT */
6398 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6399 			break;
6400 		case 181: /* CP EOP event */
6401 			DRM_DEBUG("IH: CP EOP\n");
6402 			switch (ring_id) {
6403 			case 0:
6404 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6405 				break;
6406 			case 1:
6407 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6408 				break;
6409 			case 2:
6410 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6411 				break;
6412 			}
6413 			break;
6414 		case 224: /* DMA trap event */
6415 			DRM_DEBUG("IH: DMA trap\n");
6416 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6417 			break;
6418 		case 230: /* thermal low to high */
6419 			DRM_DEBUG("IH: thermal low to high\n");
6420 			rdev->pm.dpm.thermal.high_to_low = false;
6421 			queue_thermal = true;
6422 			break;
6423 		case 231: /* thermal high to low */
6424 			DRM_DEBUG("IH: thermal high to low\n");
6425 			rdev->pm.dpm.thermal.high_to_low = true;
6426 			queue_thermal = true;
6427 			break;
6428 		case 233: /* GUI IDLE */
6429 			DRM_DEBUG("IH: GUI idle\n");
6430 			break;
6431 		case 244: /* DMA trap event */
6432 			DRM_DEBUG("IH: DMA1 trap\n");
6433 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6434 			break;
6435 		default:
6436 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6437 			break;
6438 		}
6439 
6440 		/* wptr/rptr are in bytes! */
6441 		rptr += 16;
6442 		rptr &= rdev->ih.ptr_mask;
6443 	}
6444 	if (queue_hotplug)
6445 		schedule_work(&rdev->hotplug_work);
6446 	if (queue_thermal && rdev->pm.dpm_enabled)
6447 		schedule_work(&rdev->pm.dpm.thermal.work);
6448 	rdev->ih.rptr = rptr;
6449 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6450 	atomic_set(&rdev->ih.lock, 0);
6451 
6452 	/* make sure wptr hasn't changed while processing */
6453 	wptr = si_get_ih_wptr(rdev);
6454 	if (wptr != rptr)
6455 		goto restart_ih;
6456 
6457 	return IRQ_HANDLED;
6458 }
6459 
6460 /*
6461  * startup/shutdown callbacks
6462  */
6463 static int si_startup(struct radeon_device *rdev)
6464 {
6465 	struct radeon_ring *ring;
6466 	int r;
6467 
6468 	/* enable pcie gen2/3 link */
6469 	si_pcie_gen3_enable(rdev);
6470 	/* enable aspm */
6471 	si_program_aspm(rdev);
6472 
6473 	/* scratch needs to be initialized before MC */
6474 	r = r600_vram_scratch_init(rdev);
6475 	if (r)
6476 		return r;
6477 
6478 	si_mc_program(rdev);
6479 
6480 	if (!rdev->pm.dpm_enabled) {
6481 		r = si_mc_load_microcode(rdev);
6482 		if (r) {
6483 			DRM_ERROR("Failed to load MC firmware!\n");
6484 			return r;
6485 		}
6486 	}
6487 
6488 	r = si_pcie_gart_enable(rdev);
6489 	if (r)
6490 		return r;
6491 	si_gpu_init(rdev);
6492 
6493 	/* allocate rlc buffers */
6494 	if (rdev->family == CHIP_VERDE) {
6495 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6496 		rdev->rlc.reg_list_size =
6497 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6498 	}
6499 	rdev->rlc.cs_data = si_cs_data;
6500 	r = sumo_rlc_init(rdev);
6501 	if (r) {
6502 		DRM_ERROR("Failed to init rlc BOs!\n");
6503 		return r;
6504 	}
6505 
6506 	/* allocate wb buffer */
6507 	r = radeon_wb_init(rdev);
6508 	if (r)
6509 		return r;
6510 
6511 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6512 	if (r) {
6513 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6514 		return r;
6515 	}
6516 
6517 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6518 	if (r) {
6519 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6520 		return r;
6521 	}
6522 
6523 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6524 	if (r) {
6525 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6526 		return r;
6527 	}
6528 
6529 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6530 	if (r) {
6531 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6532 		return r;
6533 	}
6534 
6535 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6536 	if (r) {
6537 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6538 		return r;
6539 	}
6540 
6541 	if (rdev->has_uvd) {
6542 		r = uvd_v2_2_resume(rdev);
6543 		if (!r) {
6544 			r = radeon_fence_driver_start_ring(rdev,
6545 							   R600_RING_TYPE_UVD_INDEX);
6546 			if (r)
6547 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6548 		}
6549 		if (r)
6550 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6551 	}
6552 
6553 	/* Enable IRQ */
6554 	if (!rdev->irq.installed) {
6555 		r = radeon_irq_kms_init(rdev);
6556 		if (r)
6557 			return r;
6558 	}
6559 
6560 	r = si_irq_init(rdev);
6561 	if (r) {
6562 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6563 		radeon_irq_kms_fini(rdev);
6564 		return r;
6565 	}
6566 	si_irq_set(rdev);
6567 
6568 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6569 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6570 			     RADEON_CP_PACKET2);
6571 	if (r)
6572 		return r;
6573 
6574 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6575 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6576 			     RADEON_CP_PACKET2);
6577 	if (r)
6578 		return r;
6579 
6580 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6581 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6582 			     RADEON_CP_PACKET2);
6583 	if (r)
6584 		return r;
6585 
6586 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6587 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6588 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6589 	if (r)
6590 		return r;
6591 
6592 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6593 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6594 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6595 	if (r)
6596 		return r;
6597 
6598 	r = si_cp_load_microcode(rdev);
6599 	if (r)
6600 		return r;
6601 	r = si_cp_resume(rdev);
6602 	if (r)
6603 		return r;
6604 
6605 	r = cayman_dma_resume(rdev);
6606 	if (r)
6607 		return r;
6608 
6609 	if (rdev->has_uvd) {
6610 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6611 		if (ring->ring_size) {
6612 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6613 					     RADEON_CP_PACKET2);
6614 			if (!r)
6615 				r = uvd_v1_0_init(rdev);
6616 			if (r)
6617 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6618 		}
6619 	}
6620 
6621 	r = radeon_ib_pool_init(rdev);
6622 	if (r) {
6623 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6624 		return r;
6625 	}
6626 
6627 	r = radeon_vm_manager_init(rdev);
6628 	if (r) {
6629 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6630 		return r;
6631 	}
6632 
6633 	r = dce6_audio_init(rdev);
6634 	if (r)
6635 		return r;
6636 
6637 	return 0;
6638 }
6639 
6640 int si_resume(struct radeon_device *rdev)
6641 {
6642 	int r;
6643 
6644 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6645 	 * posting will perform necessary task to bring back GPU into good
6646 	 * shape.
6647 	 */
6648 	/* post card */
6649 	atom_asic_init(rdev->mode_info.atom_context);
6650 
6651 	/* init golden registers */
6652 	si_init_golden_registers(rdev);
6653 
6654 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6655 		radeon_pm_resume(rdev);
6656 
6657 	rdev->accel_working = true;
6658 	r = si_startup(rdev);
6659 	if (r) {
6660 		DRM_ERROR("si startup failed on resume\n");
6661 		rdev->accel_working = false;
6662 		return r;
6663 	}
6664 
6665 	return r;
6666 
6667 }
6668 
6669 int si_suspend(struct radeon_device *rdev)
6670 {
6671 	radeon_pm_suspend(rdev);
6672 	dce6_audio_fini(rdev);
6673 	radeon_vm_manager_fini(rdev);
6674 	si_cp_enable(rdev, false);
6675 	cayman_dma_stop(rdev);
6676 	if (rdev->has_uvd) {
6677 		uvd_v1_0_fini(rdev);
6678 		radeon_uvd_suspend(rdev);
6679 	}
6680 	si_fini_pg(rdev);
6681 	si_fini_cg(rdev);
6682 	si_irq_suspend(rdev);
6683 	radeon_wb_disable(rdev);
6684 	si_pcie_gart_disable(rdev);
6685 	return 0;
6686 }
6687 
6688 /* Plan is to move initialization in that function and use
6689  * helper function so that radeon_device_init pretty much
6690  * do nothing more than calling asic specific function. This
6691  * should also allow to remove a bunch of callback function
6692  * like vram_info.
6693  */
6694 int si_init(struct radeon_device *rdev)
6695 {
6696 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6697 	int r;
6698 
6699 	/* Read BIOS */
6700 	if (!radeon_get_bios(rdev)) {
6701 		if (ASIC_IS_AVIVO(rdev))
6702 			return -EINVAL;
6703 	}
6704 	/* Must be an ATOMBIOS */
6705 	if (!rdev->is_atom_bios) {
6706 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6707 		return -EINVAL;
6708 	}
6709 	r = radeon_atombios_init(rdev);
6710 	if (r)
6711 		return r;
6712 
6713 	/* Post card if necessary */
6714 	if (!radeon_card_posted(rdev)) {
6715 		if (!rdev->bios) {
6716 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6717 			return -EINVAL;
6718 		}
6719 		DRM_INFO("GPU not posted. posting now...\n");
6720 		atom_asic_init(rdev->mode_info.atom_context);
6721 	}
6722 	/* init golden registers */
6723 	si_init_golden_registers(rdev);
6724 	/* Initialize scratch registers */
6725 	si_scratch_init(rdev);
6726 	/* Initialize surface registers */
6727 	radeon_surface_init(rdev);
6728 	/* Initialize clocks */
6729 	radeon_get_clock_info(rdev->ddev);
6730 
6731 	/* Fence driver */
6732 	r = radeon_fence_driver_init(rdev);
6733 	if (r)
6734 		return r;
6735 
6736 	/* initialize memory controller */
6737 	r = si_mc_init(rdev);
6738 	if (r)
6739 		return r;
6740 	/* Memory manager */
6741 	r = radeon_bo_init(rdev);
6742 	if (r)
6743 		return r;
6744 
6745 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6746 	    !rdev->rlc_fw || !rdev->mc_fw) {
6747 		r = si_init_microcode(rdev);
6748 		if (r) {
6749 			DRM_ERROR("Failed to load firmware!\n");
6750 			return r;
6751 		}
6752 	}
6753 
6754 	/* Initialize power management */
6755 	radeon_pm_init(rdev);
6756 
6757 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6758 	ring->ring_obj = NULL;
6759 	r600_ring_init(rdev, ring, 1024 * 1024);
6760 
6761 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6762 	ring->ring_obj = NULL;
6763 	r600_ring_init(rdev, ring, 1024 * 1024);
6764 
6765 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6766 	ring->ring_obj = NULL;
6767 	r600_ring_init(rdev, ring, 1024 * 1024);
6768 
6769 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6770 	ring->ring_obj = NULL;
6771 	r600_ring_init(rdev, ring, 64 * 1024);
6772 
6773 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6774 	ring->ring_obj = NULL;
6775 	r600_ring_init(rdev, ring, 64 * 1024);
6776 
6777 	if (rdev->has_uvd) {
6778 		r = radeon_uvd_init(rdev);
6779 		if (!r) {
6780 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6781 			ring->ring_obj = NULL;
6782 			r600_ring_init(rdev, ring, 4096);
6783 		}
6784 	}
6785 
6786 	rdev->ih.ring_obj = NULL;
6787 	r600_ih_ring_init(rdev, 64 * 1024);
6788 
6789 	r = r600_pcie_gart_init(rdev);
6790 	if (r)
6791 		return r;
6792 
6793 	rdev->accel_working = true;
6794 	r = si_startup(rdev);
6795 	if (r) {
6796 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6797 		si_cp_fini(rdev);
6798 		cayman_dma_fini(rdev);
6799 		si_irq_fini(rdev);
6800 		sumo_rlc_fini(rdev);
6801 		radeon_wb_fini(rdev);
6802 		radeon_ib_pool_fini(rdev);
6803 		radeon_vm_manager_fini(rdev);
6804 		radeon_irq_kms_fini(rdev);
6805 		si_pcie_gart_fini(rdev);
6806 		rdev->accel_working = false;
6807 	}
6808 
6809 	/* Don't start up if the MC ucode is missing.
6810 	 * The default clocks and voltages before the MC ucode
6811 	 * is loaded are not suffient for advanced operations.
6812 	 */
6813 	if (!rdev->mc_fw) {
6814 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6815 		return -EINVAL;
6816 	}
6817 
6818 	return 0;
6819 }
6820 
6821 void si_fini(struct radeon_device *rdev)
6822 {
6823 	radeon_pm_fini(rdev);
6824 	si_cp_fini(rdev);
6825 	cayman_dma_fini(rdev);
6826 	si_fini_pg(rdev);
6827 	si_fini_cg(rdev);
6828 	si_irq_fini(rdev);
6829 	sumo_rlc_fini(rdev);
6830 	radeon_wb_fini(rdev);
6831 	radeon_vm_manager_fini(rdev);
6832 	radeon_ib_pool_fini(rdev);
6833 	radeon_irq_kms_fini(rdev);
6834 	if (rdev->has_uvd) {
6835 		uvd_v1_0_fini(rdev);
6836 		radeon_uvd_fini(rdev);
6837 	}
6838 	si_pcie_gart_fini(rdev);
6839 	r600_vram_scratch_fini(rdev);
6840 	radeon_gem_fini(rdev);
6841 	radeon_fence_driver_fini(rdev);
6842 	radeon_bo_fini(rdev);
6843 	radeon_atombios_fini(rdev);
6844 	kfree(rdev->bios);
6845 	rdev->bios = NULL;
6846 }
6847 
6848 /**
6849  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6850  *
6851  * @rdev: radeon_device pointer
6852  *
6853  * Fetches a GPU clock counter snapshot (SI).
6854  * Returns the 64 bit clock counter snapshot.
6855  */
6856 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6857 {
6858 	uint64_t clock;
6859 
6860 	mutex_lock(&rdev->gpu_clock_mutex);
6861 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6862 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6863 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6864 	mutex_unlock(&rdev->gpu_clock_mutex);
6865 	return clock;
6866 }
6867 
6868 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6869 {
6870 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6871 	int r;
6872 
6873 	/* bypass vclk and dclk with bclk */
6874 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6875 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6876 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6877 
6878 	/* put PLL in bypass mode */
6879 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6880 
6881 	if (!vclk || !dclk) {
6882 		/* keep the Bypass mode, put PLL to sleep */
6883 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6884 		return 0;
6885 	}
6886 
6887 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6888 					  16384, 0x03FFFFFF, 0, 128, 5,
6889 					  &fb_div, &vclk_div, &dclk_div);
6890 	if (r)
6891 		return r;
6892 
6893 	/* set RESET_ANTI_MUX to 0 */
6894 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6895 
6896 	/* set VCO_MODE to 1 */
6897 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6898 
6899 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6900 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6901 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6902 
6903 	/* deassert UPLL_RESET */
6904 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6905 
6906 	mdelay(1);
6907 
6908 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6909 	if (r)
6910 		return r;
6911 
6912 	/* assert UPLL_RESET again */
6913 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6914 
6915 	/* disable spread spectrum. */
6916 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6917 
6918 	/* set feedback divider */
6919 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6920 
6921 	/* set ref divider to 0 */
6922 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6923 
6924 	if (fb_div < 307200)
6925 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6926 	else
6927 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6928 
6929 	/* set PDIV_A and PDIV_B */
6930 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6931 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6932 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6933 
6934 	/* give the PLL some time to settle */
6935 	mdelay(15);
6936 
6937 	/* deassert PLL_RESET */
6938 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6939 
6940 	mdelay(15);
6941 
6942 	/* switch from bypass mode to normal mode */
6943 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6944 
6945 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6946 	if (r)
6947 		return r;
6948 
6949 	/* switch VCLK and DCLK selection */
6950 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6951 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6952 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6953 
6954 	mdelay(100);
6955 
6956 	return 0;
6957 }
6958 
6959 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6960 {
6961 	struct pci_dev *root = rdev->pdev->bus->self;
6962 	int bridge_pos, gpu_pos;
6963 	u32 speed_cntl, mask, current_data_rate;
6964 	int ret, i;
6965 	u16 tmp16;
6966 
6967 	if (radeon_pcie_gen2 == 0)
6968 		return;
6969 
6970 	if (rdev->flags & RADEON_IS_IGP)
6971 		return;
6972 
6973 	if (!(rdev->flags & RADEON_IS_PCIE))
6974 		return;
6975 
6976 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6977 	if (ret != 0)
6978 		return;
6979 
6980 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6981 		return;
6982 
6983 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6984 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6985 		LC_CURRENT_DATA_RATE_SHIFT;
6986 	if (mask & DRM_PCIE_SPEED_80) {
6987 		if (current_data_rate == 2) {
6988 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6989 			return;
6990 		}
6991 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6992 	} else if (mask & DRM_PCIE_SPEED_50) {
6993 		if (current_data_rate == 1) {
6994 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6995 			return;
6996 		}
6997 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6998 	}
6999 
7000 	bridge_pos = pci_pcie_cap(root);
7001 	if (!bridge_pos)
7002 		return;
7003 
7004 	gpu_pos = pci_pcie_cap(rdev->pdev);
7005 	if (!gpu_pos)
7006 		return;
7007 
7008 	if (mask & DRM_PCIE_SPEED_80) {
7009 		/* re-try equalization if gen3 is not already enabled */
7010 		if (current_data_rate != 2) {
7011 			u16 bridge_cfg, gpu_cfg;
7012 			u16 bridge_cfg2, gpu_cfg2;
7013 			u32 max_lw, current_lw, tmp;
7014 
7015 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7016 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7017 
7018 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7019 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7020 
7021 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7022 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7023 
7024 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7025 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7026 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7027 
7028 			if (current_lw < max_lw) {
7029 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7030 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7031 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7032 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7033 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7034 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7035 				}
7036 			}
7037 
7038 			for (i = 0; i < 10; i++) {
7039 				/* check status */
7040 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7041 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7042 					break;
7043 
7044 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7045 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7046 
7047 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7048 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7049 
7050 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7051 				tmp |= LC_SET_QUIESCE;
7052 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7053 
7054 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7055 				tmp |= LC_REDO_EQ;
7056 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7057 
7058 				mdelay(100);
7059 
7060 				/* linkctl */
7061 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7062 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7063 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7064 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7065 
7066 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7067 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7068 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7069 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7070 
7071 				/* linkctl2 */
7072 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7073 				tmp16 &= ~((1 << 4) | (7 << 9));
7074 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7075 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7076 
7077 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7078 				tmp16 &= ~((1 << 4) | (7 << 9));
7079 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7080 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7081 
7082 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7083 				tmp &= ~LC_SET_QUIESCE;
7084 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7085 			}
7086 		}
7087 	}
7088 
7089 	/* set the link speed */
7090 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7091 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7092 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7093 
7094 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7095 	tmp16 &= ~0xf;
7096 	if (mask & DRM_PCIE_SPEED_80)
7097 		tmp16 |= 3; /* gen3 */
7098 	else if (mask & DRM_PCIE_SPEED_50)
7099 		tmp16 |= 2; /* gen2 */
7100 	else
7101 		tmp16 |= 1; /* gen1 */
7102 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7103 
7104 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7105 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7106 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7107 
7108 	for (i = 0; i < rdev->usec_timeout; i++) {
7109 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7110 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7111 			break;
7112 		udelay(1);
7113 	}
7114 }
7115 
7116 static void si_program_aspm(struct radeon_device *rdev)
7117 {
7118 	u32 data, orig;
7119 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7120 	bool disable_clkreq = false;
7121 
7122 	if (radeon_aspm == 0)
7123 		return;
7124 
7125 	if (!(rdev->flags & RADEON_IS_PCIE))
7126 		return;
7127 
7128 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7129 	data &= ~LC_XMIT_N_FTS_MASK;
7130 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7131 	if (orig != data)
7132 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7133 
7134 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7135 	data |= LC_GO_TO_RECOVERY;
7136 	if (orig != data)
7137 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7138 
7139 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7140 	data |= P_IGNORE_EDB_ERR;
7141 	if (orig != data)
7142 		WREG32_PCIE(PCIE_P_CNTL, data);
7143 
7144 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7145 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7146 	data |= LC_PMI_TO_L1_DIS;
7147 	if (!disable_l0s)
7148 		data |= LC_L0S_INACTIVITY(7);
7149 
7150 	if (!disable_l1) {
7151 		data |= LC_L1_INACTIVITY(7);
7152 		data &= ~LC_PMI_TO_L1_DIS;
7153 		if (orig != data)
7154 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7155 
7156 		if (!disable_plloff_in_l1) {
7157 			bool clk_req_support;
7158 
7159 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7160 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7161 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7162 			if (orig != data)
7163 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7164 
7165 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7166 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7167 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7168 			if (orig != data)
7169 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7170 
7171 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7172 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7173 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7174 			if (orig != data)
7175 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7176 
7177 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7178 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7179 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7180 			if (orig != data)
7181 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7182 
7183 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7184 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7185 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7186 				if (orig != data)
7187 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7188 
7189 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7190 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7191 				if (orig != data)
7192 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7193 
7194 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7195 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7196 				if (orig != data)
7197 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7198 
7199 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7200 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7201 				if (orig != data)
7202 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7203 
7204 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7205 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7206 				if (orig != data)
7207 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7208 
7209 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7210 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7211 				if (orig != data)
7212 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7213 
7214 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7215 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7216 				if (orig != data)
7217 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7218 
7219 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7220 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7221 				if (orig != data)
7222 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7223 			}
7224 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7225 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7226 			data |= LC_DYN_LANES_PWR_STATE(3);
7227 			if (orig != data)
7228 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7229 
7230 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7231 			data &= ~LS2_EXIT_TIME_MASK;
7232 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7233 				data |= LS2_EXIT_TIME(5);
7234 			if (orig != data)
7235 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7236 
7237 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7238 			data &= ~LS2_EXIT_TIME_MASK;
7239 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7240 				data |= LS2_EXIT_TIME(5);
7241 			if (orig != data)
7242 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7243 
7244 			if (!disable_clkreq) {
7245 				struct pci_dev *root = rdev->pdev->bus->self;
7246 				u32 lnkcap;
7247 
7248 				clk_req_support = false;
7249 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7250 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7251 					clk_req_support = true;
7252 			} else {
7253 				clk_req_support = false;
7254 			}
7255 
7256 			if (clk_req_support) {
7257 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7258 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7259 				if (orig != data)
7260 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7261 
7262 				orig = data = RREG32(THM_CLK_CNTL);
7263 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7264 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7265 				if (orig != data)
7266 					WREG32(THM_CLK_CNTL, data);
7267 
7268 				orig = data = RREG32(MISC_CLK_CNTL);
7269 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7270 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7271 				if (orig != data)
7272 					WREG32(MISC_CLK_CNTL, data);
7273 
7274 				orig = data = RREG32(CG_CLKPIN_CNTL);
7275 				data &= ~BCLK_AS_XCLK;
7276 				if (orig != data)
7277 					WREG32(CG_CLKPIN_CNTL, data);
7278 
7279 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7280 				data &= ~FORCE_BIF_REFCLK_EN;
7281 				if (orig != data)
7282 					WREG32(CG_CLKPIN_CNTL_2, data);
7283 
7284 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7285 				data &= ~MPLL_CLKOUT_SEL_MASK;
7286 				data |= MPLL_CLKOUT_SEL(4);
7287 				if (orig != data)
7288 					WREG32(MPLL_BYPASSCLK_SEL, data);
7289 
7290 				orig = data = RREG32(SPLL_CNTL_MODE);
7291 				data &= ~SPLL_REFCLK_SEL_MASK;
7292 				if (orig != data)
7293 					WREG32(SPLL_CNTL_MODE, data);
7294 			}
7295 		}
7296 	} else {
7297 		if (orig != data)
7298 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7299 	}
7300 
7301 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7302 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7303 	if (orig != data)
7304 		WREG32_PCIE(PCIE_CNTL2, data);
7305 
7306 	if (!disable_l0s) {
7307 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7308 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7309 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7310 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7311 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7312 				data &= ~LC_L0S_INACTIVITY_MASK;
7313 				if (orig != data)
7314 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7315 			}
7316 		}
7317 	}
7318 }
7319