xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 5bd8e16d)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82 			       struct radeon_ib *ib,
83 			       uint64_t pe,
84 			       uint64_t addr, unsigned count,
85 			       uint32_t incr, uint32_t flags);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87 					 bool enable);
88 
89 static const u32 verde_rlc_save_restore_register_list[] =
90 {
91 	(0x8000 << 16) | (0x98f4 >> 2),
92 	0x00000000,
93 	(0x8040 << 16) | (0x98f4 >> 2),
94 	0x00000000,
95 	(0x8000 << 16) | (0xe80 >> 2),
96 	0x00000000,
97 	(0x8040 << 16) | (0xe80 >> 2),
98 	0x00000000,
99 	(0x8000 << 16) | (0x89bc >> 2),
100 	0x00000000,
101 	(0x8040 << 16) | (0x89bc >> 2),
102 	0x00000000,
103 	(0x8000 << 16) | (0x8c1c >> 2),
104 	0x00000000,
105 	(0x8040 << 16) | (0x8c1c >> 2),
106 	0x00000000,
107 	(0x9c00 << 16) | (0x98f0 >> 2),
108 	0x00000000,
109 	(0x9c00 << 16) | (0xe7c >> 2),
110 	0x00000000,
111 	(0x8000 << 16) | (0x9148 >> 2),
112 	0x00000000,
113 	(0x8040 << 16) | (0x9148 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0x9150 >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x897c >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0x8d8c >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0xac54 >> 2),
122 	0X00000000,
123 	0x3,
124 	(0x9c00 << 16) | (0x98f8 >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0x9910 >> 2),
127 	0x00000000,
128 	(0x9c00 << 16) | (0x9914 >> 2),
129 	0x00000000,
130 	(0x9c00 << 16) | (0x9918 >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x991c >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x9920 >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9924 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x9928 >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x992c >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x9930 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9934 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x9938 >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x993c >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9940 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9944 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x9948 >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x994c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x9950 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9954 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x9958 >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x995c >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9960 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9964 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9968 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x996c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9970 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9974 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9978 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x997c >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9980 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9984 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x9988 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x998c >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x8c00 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x8c14 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x8c04 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x8c08 >> 2),
197 	0x00000000,
198 	(0x8000 << 16) | (0x9b7c >> 2),
199 	0x00000000,
200 	(0x8040 << 16) | (0x9b7c >> 2),
201 	0x00000000,
202 	(0x8000 << 16) | (0xe84 >> 2),
203 	0x00000000,
204 	(0x8040 << 16) | (0xe84 >> 2),
205 	0x00000000,
206 	(0x8000 << 16) | (0x89c0 >> 2),
207 	0x00000000,
208 	(0x8040 << 16) | (0x89c0 >> 2),
209 	0x00000000,
210 	(0x8000 << 16) | (0x914c >> 2),
211 	0x00000000,
212 	(0x8040 << 16) | (0x914c >> 2),
213 	0x00000000,
214 	(0x8000 << 16) | (0x8c20 >> 2),
215 	0x00000000,
216 	(0x8040 << 16) | (0x8c20 >> 2),
217 	0x00000000,
218 	(0x8000 << 16) | (0x9354 >> 2),
219 	0x00000000,
220 	(0x8040 << 16) | (0x9354 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9060 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9364 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9100 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x913c >> 2),
229 	0x00000000,
230 	(0x8000 << 16) | (0x90e0 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x90e4 >> 2),
233 	0x00000000,
234 	(0x8000 << 16) | (0x90e8 >> 2),
235 	0x00000000,
236 	(0x8040 << 16) | (0x90e0 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0x90e4 >> 2),
239 	0x00000000,
240 	(0x8040 << 16) | (0x90e8 >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8bcc >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x8b24 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x88c4 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8e50 >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x8c0c >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x8e58 >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x8e5c >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x9508 >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x950c >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x9494 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0xac0c >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0xac10 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0xac14 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0xae00 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0xac08 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x88d4 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x88c8 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x88cc >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x89b0 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x8b10 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x8a14 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x9830 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x9834 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x9838 >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x9a10 >> 2),
291 	0x00000000,
292 	(0x8000 << 16) | (0x9870 >> 2),
293 	0x00000000,
294 	(0x8000 << 16) | (0x9874 >> 2),
295 	0x00000000,
296 	(0x8001 << 16) | (0x9870 >> 2),
297 	0x00000000,
298 	(0x8001 << 16) | (0x9874 >> 2),
299 	0x00000000,
300 	(0x8040 << 16) | (0x9870 >> 2),
301 	0x00000000,
302 	(0x8040 << 16) | (0x9874 >> 2),
303 	0x00000000,
304 	(0x8041 << 16) | (0x9870 >> 2),
305 	0x00000000,
306 	(0x8041 << 16) | (0x9874 >> 2),
307 	0x00000000,
308 	0x00000000
309 };
310 
311 static const u32 tahiti_golden_rlc_registers[] =
312 {
313 	0xc424, 0xffffffff, 0x00601005,
314 	0xc47c, 0xffffffff, 0x10104040,
315 	0xc488, 0xffffffff, 0x0100000a,
316 	0xc314, 0xffffffff, 0x00000800,
317 	0xc30c, 0xffffffff, 0x800000f4,
318 	0xf4a8, 0xffffffff, 0x00000000
319 };
320 
321 static const u32 tahiti_golden_registers[] =
322 {
323 	0x9a10, 0x00010000, 0x00018208,
324 	0x9830, 0xffffffff, 0x00000000,
325 	0x9834, 0xf00fffff, 0x00000400,
326 	0x9838, 0x0002021c, 0x00020200,
327 	0xc78, 0x00000080, 0x00000000,
328 	0xd030, 0x000300c0, 0x00800040,
329 	0xd830, 0x000300c0, 0x00800040,
330 	0x5bb0, 0x000000f0, 0x00000070,
331 	0x5bc0, 0x00200000, 0x50100000,
332 	0x7030, 0x31000311, 0x00000011,
333 	0x277c, 0x00000003, 0x000007ff,
334 	0x240c, 0x000007ff, 0x00000000,
335 	0x8a14, 0xf000001f, 0x00000007,
336 	0x8b24, 0xffffffff, 0x00ffffff,
337 	0x8b10, 0x0000ff0f, 0x00000000,
338 	0x28a4c, 0x07ffffff, 0x4e000000,
339 	0x28350, 0x3f3f3fff, 0x2a00126a,
340 	0x30, 0x000000ff, 0x0040,
341 	0x34, 0x00000040, 0x00004040,
342 	0x9100, 0x07ffffff, 0x03000000,
343 	0x8e88, 0x01ff1f3f, 0x00000000,
344 	0x8e84, 0x01ff1f3f, 0x00000000,
345 	0x9060, 0x0000007f, 0x00000020,
346 	0x9508, 0x00010000, 0x00010000,
347 	0xac14, 0x00000200, 0x000002fb,
348 	0xac10, 0xffffffff, 0x0000543b,
349 	0xac0c, 0xffffffff, 0xa9210876,
350 	0x88d0, 0xffffffff, 0x000fff40,
351 	0x88d4, 0x0000001f, 0x00000010,
352 	0x1410, 0x20000000, 0x20fffed8,
353 	0x15c0, 0x000c0fc0, 0x000c0400
354 };
355 
356 static const u32 tahiti_golden_registers2[] =
357 {
358 	0xc64, 0x00000001, 0x00000001
359 };
360 
361 static const u32 pitcairn_golden_rlc_registers[] =
362 {
363 	0xc424, 0xffffffff, 0x00601004,
364 	0xc47c, 0xffffffff, 0x10102020,
365 	0xc488, 0xffffffff, 0x01000020,
366 	0xc314, 0xffffffff, 0x00000800,
367 	0xc30c, 0xffffffff, 0x800000a4
368 };
369 
370 static const u32 pitcairn_golden_registers[] =
371 {
372 	0x9a10, 0x00010000, 0x00018208,
373 	0x9830, 0xffffffff, 0x00000000,
374 	0x9834, 0xf00fffff, 0x00000400,
375 	0x9838, 0x0002021c, 0x00020200,
376 	0xc78, 0x00000080, 0x00000000,
377 	0xd030, 0x000300c0, 0x00800040,
378 	0xd830, 0x000300c0, 0x00800040,
379 	0x5bb0, 0x000000f0, 0x00000070,
380 	0x5bc0, 0x00200000, 0x50100000,
381 	0x7030, 0x31000311, 0x00000011,
382 	0x2ae4, 0x00073ffe, 0x000022a2,
383 	0x240c, 0x000007ff, 0x00000000,
384 	0x8a14, 0xf000001f, 0x00000007,
385 	0x8b24, 0xffffffff, 0x00ffffff,
386 	0x8b10, 0x0000ff0f, 0x00000000,
387 	0x28a4c, 0x07ffffff, 0x4e000000,
388 	0x28350, 0x3f3f3fff, 0x2a00126a,
389 	0x30, 0x000000ff, 0x0040,
390 	0x34, 0x00000040, 0x00004040,
391 	0x9100, 0x07ffffff, 0x03000000,
392 	0x9060, 0x0000007f, 0x00000020,
393 	0x9508, 0x00010000, 0x00010000,
394 	0xac14, 0x000003ff, 0x000000f7,
395 	0xac10, 0xffffffff, 0x00000000,
396 	0xac0c, 0xffffffff, 0x32761054,
397 	0x88d4, 0x0000001f, 0x00000010,
398 	0x15c0, 0x000c0fc0, 0x000c0400
399 };
400 
401 static const u32 verde_golden_rlc_registers[] =
402 {
403 	0xc424, 0xffffffff, 0x033f1005,
404 	0xc47c, 0xffffffff, 0x10808020,
405 	0xc488, 0xffffffff, 0x00800008,
406 	0xc314, 0xffffffff, 0x00001000,
407 	0xc30c, 0xffffffff, 0x80010014
408 };
409 
410 static const u32 verde_golden_registers[] =
411 {
412 	0x9a10, 0x00010000, 0x00018208,
413 	0x9830, 0xffffffff, 0x00000000,
414 	0x9834, 0xf00fffff, 0x00000400,
415 	0x9838, 0x0002021c, 0x00020200,
416 	0xc78, 0x00000080, 0x00000000,
417 	0xd030, 0x000300c0, 0x00800040,
418 	0xd030, 0x000300c0, 0x00800040,
419 	0xd830, 0x000300c0, 0x00800040,
420 	0xd830, 0x000300c0, 0x00800040,
421 	0x5bb0, 0x000000f0, 0x00000070,
422 	0x5bc0, 0x00200000, 0x50100000,
423 	0x7030, 0x31000311, 0x00000011,
424 	0x2ae4, 0x00073ffe, 0x000022a2,
425 	0x2ae4, 0x00073ffe, 0x000022a2,
426 	0x2ae4, 0x00073ffe, 0x000022a2,
427 	0x240c, 0x000007ff, 0x00000000,
428 	0x240c, 0x000007ff, 0x00000000,
429 	0x240c, 0x000007ff, 0x00000000,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8a14, 0xf000001f, 0x00000007,
432 	0x8a14, 0xf000001f, 0x00000007,
433 	0x8b24, 0xffffffff, 0x00ffffff,
434 	0x8b10, 0x0000ff0f, 0x00000000,
435 	0x28a4c, 0x07ffffff, 0x4e000000,
436 	0x28350, 0x3f3f3fff, 0x0000124a,
437 	0x28350, 0x3f3f3fff, 0x0000124a,
438 	0x28350, 0x3f3f3fff, 0x0000124a,
439 	0x30, 0x000000ff, 0x0040,
440 	0x34, 0x00000040, 0x00004040,
441 	0x9100, 0x07ffffff, 0x03000000,
442 	0x9100, 0x07ffffff, 0x03000000,
443 	0x8e88, 0x01ff1f3f, 0x00000000,
444 	0x8e88, 0x01ff1f3f, 0x00000000,
445 	0x8e88, 0x01ff1f3f, 0x00000000,
446 	0x8e84, 0x01ff1f3f, 0x00000000,
447 	0x8e84, 0x01ff1f3f, 0x00000000,
448 	0x8e84, 0x01ff1f3f, 0x00000000,
449 	0x9060, 0x0000007f, 0x00000020,
450 	0x9508, 0x00010000, 0x00010000,
451 	0xac14, 0x000003ff, 0x00000003,
452 	0xac14, 0x000003ff, 0x00000003,
453 	0xac14, 0x000003ff, 0x00000003,
454 	0xac10, 0xffffffff, 0x00000000,
455 	0xac10, 0xffffffff, 0x00000000,
456 	0xac10, 0xffffffff, 0x00000000,
457 	0xac0c, 0xffffffff, 0x00001032,
458 	0xac0c, 0xffffffff, 0x00001032,
459 	0xac0c, 0xffffffff, 0x00001032,
460 	0x88d4, 0x0000001f, 0x00000010,
461 	0x88d4, 0x0000001f, 0x00000010,
462 	0x88d4, 0x0000001f, 0x00000010,
463 	0x15c0, 0x000c0fc0, 0x000c0400
464 };
465 
466 static const u32 oland_golden_rlc_registers[] =
467 {
468 	0xc424, 0xffffffff, 0x00601005,
469 	0xc47c, 0xffffffff, 0x10104040,
470 	0xc488, 0xffffffff, 0x0100000a,
471 	0xc314, 0xffffffff, 0x00000800,
472 	0xc30c, 0xffffffff, 0x800000f4
473 };
474 
475 static const u32 oland_golden_registers[] =
476 {
477 	0x9a10, 0x00010000, 0x00018208,
478 	0x9830, 0xffffffff, 0x00000000,
479 	0x9834, 0xf00fffff, 0x00000400,
480 	0x9838, 0x0002021c, 0x00020200,
481 	0xc78, 0x00000080, 0x00000000,
482 	0xd030, 0x000300c0, 0x00800040,
483 	0xd830, 0x000300c0, 0x00800040,
484 	0x5bb0, 0x000000f0, 0x00000070,
485 	0x5bc0, 0x00200000, 0x50100000,
486 	0x7030, 0x31000311, 0x00000011,
487 	0x2ae4, 0x00073ffe, 0x000022a2,
488 	0x240c, 0x000007ff, 0x00000000,
489 	0x8a14, 0xf000001f, 0x00000007,
490 	0x8b24, 0xffffffff, 0x00ffffff,
491 	0x8b10, 0x0000ff0f, 0x00000000,
492 	0x28a4c, 0x07ffffff, 0x4e000000,
493 	0x28350, 0x3f3f3fff, 0x00000082,
494 	0x30, 0x000000ff, 0x0040,
495 	0x34, 0x00000040, 0x00004040,
496 	0x9100, 0x07ffffff, 0x03000000,
497 	0x9060, 0x0000007f, 0x00000020,
498 	0x9508, 0x00010000, 0x00010000,
499 	0xac14, 0x000003ff, 0x000000f3,
500 	0xac10, 0xffffffff, 0x00000000,
501 	0xac0c, 0xffffffff, 0x00003210,
502 	0x88d4, 0x0000001f, 0x00000010,
503 	0x15c0, 0x000c0fc0, 0x000c0400
504 };
505 
506 static const u32 hainan_golden_registers[] =
507 {
508 	0x9a10, 0x00010000, 0x00018208,
509 	0x9830, 0xffffffff, 0x00000000,
510 	0x9834, 0xf00fffff, 0x00000400,
511 	0x9838, 0x0002021c, 0x00020200,
512 	0xd0c0, 0xff000fff, 0x00000100,
513 	0xd030, 0x000300c0, 0x00800040,
514 	0xd8c0, 0xff000fff, 0x00000100,
515 	0xd830, 0x000300c0, 0x00800040,
516 	0x2ae4, 0x00073ffe, 0x000022a2,
517 	0x240c, 0x000007ff, 0x00000000,
518 	0x8a14, 0xf000001f, 0x00000007,
519 	0x8b24, 0xffffffff, 0x00ffffff,
520 	0x8b10, 0x0000ff0f, 0x00000000,
521 	0x28a4c, 0x07ffffff, 0x4e000000,
522 	0x28350, 0x3f3f3fff, 0x00000000,
523 	0x30, 0x000000ff, 0x0040,
524 	0x34, 0x00000040, 0x00004040,
525 	0x9100, 0x03e00000, 0x03600000,
526 	0x9060, 0x0000007f, 0x00000020,
527 	0x9508, 0x00010000, 0x00010000,
528 	0xac14, 0x000003ff, 0x000000f1,
529 	0xac10, 0xffffffff, 0x00000000,
530 	0xac0c, 0xffffffff, 0x00003210,
531 	0x88d4, 0x0000001f, 0x00000010,
532 	0x15c0, 0x000c0fc0, 0x000c0400
533 };
534 
535 static const u32 hainan_golden_registers2[] =
536 {
537 	0x98f8, 0xffffffff, 0x02010001
538 };
539 
540 static const u32 tahiti_mgcg_cgcg_init[] =
541 {
542 	0xc400, 0xffffffff, 0xfffffffc,
543 	0x802c, 0xffffffff, 0xe0000000,
544 	0x9a60, 0xffffffff, 0x00000100,
545 	0x92a4, 0xffffffff, 0x00000100,
546 	0xc164, 0xffffffff, 0x00000100,
547 	0x9774, 0xffffffff, 0x00000100,
548 	0x8984, 0xffffffff, 0x06000100,
549 	0x8a18, 0xffffffff, 0x00000100,
550 	0x92a0, 0xffffffff, 0x00000100,
551 	0xc380, 0xffffffff, 0x00000100,
552 	0x8b28, 0xffffffff, 0x00000100,
553 	0x9144, 0xffffffff, 0x00000100,
554 	0x8d88, 0xffffffff, 0x00000100,
555 	0x8d8c, 0xffffffff, 0x00000100,
556 	0x9030, 0xffffffff, 0x00000100,
557 	0x9034, 0xffffffff, 0x00000100,
558 	0x9038, 0xffffffff, 0x00000100,
559 	0x903c, 0xffffffff, 0x00000100,
560 	0xad80, 0xffffffff, 0x00000100,
561 	0xac54, 0xffffffff, 0x00000100,
562 	0x897c, 0xffffffff, 0x06000100,
563 	0x9868, 0xffffffff, 0x00000100,
564 	0x9510, 0xffffffff, 0x00000100,
565 	0xaf04, 0xffffffff, 0x00000100,
566 	0xae04, 0xffffffff, 0x00000100,
567 	0x949c, 0xffffffff, 0x00000100,
568 	0x802c, 0xffffffff, 0xe0000000,
569 	0x9160, 0xffffffff, 0x00010000,
570 	0x9164, 0xffffffff, 0x00030002,
571 	0x9168, 0xffffffff, 0x00040007,
572 	0x916c, 0xffffffff, 0x00060005,
573 	0x9170, 0xffffffff, 0x00090008,
574 	0x9174, 0xffffffff, 0x00020001,
575 	0x9178, 0xffffffff, 0x00040003,
576 	0x917c, 0xffffffff, 0x00000007,
577 	0x9180, 0xffffffff, 0x00060005,
578 	0x9184, 0xffffffff, 0x00090008,
579 	0x9188, 0xffffffff, 0x00030002,
580 	0x918c, 0xffffffff, 0x00050004,
581 	0x9190, 0xffffffff, 0x00000008,
582 	0x9194, 0xffffffff, 0x00070006,
583 	0x9198, 0xffffffff, 0x000a0009,
584 	0x919c, 0xffffffff, 0x00040003,
585 	0x91a0, 0xffffffff, 0x00060005,
586 	0x91a4, 0xffffffff, 0x00000009,
587 	0x91a8, 0xffffffff, 0x00080007,
588 	0x91ac, 0xffffffff, 0x000b000a,
589 	0x91b0, 0xffffffff, 0x00050004,
590 	0x91b4, 0xffffffff, 0x00070006,
591 	0x91b8, 0xffffffff, 0x0008000b,
592 	0x91bc, 0xffffffff, 0x000a0009,
593 	0x91c0, 0xffffffff, 0x000d000c,
594 	0x91c4, 0xffffffff, 0x00060005,
595 	0x91c8, 0xffffffff, 0x00080007,
596 	0x91cc, 0xffffffff, 0x0000000b,
597 	0x91d0, 0xffffffff, 0x000a0009,
598 	0x91d4, 0xffffffff, 0x000d000c,
599 	0x91d8, 0xffffffff, 0x00070006,
600 	0x91dc, 0xffffffff, 0x00090008,
601 	0x91e0, 0xffffffff, 0x0000000c,
602 	0x91e4, 0xffffffff, 0x000b000a,
603 	0x91e8, 0xffffffff, 0x000e000d,
604 	0x91ec, 0xffffffff, 0x00080007,
605 	0x91f0, 0xffffffff, 0x000a0009,
606 	0x91f4, 0xffffffff, 0x0000000d,
607 	0x91f8, 0xffffffff, 0x000c000b,
608 	0x91fc, 0xffffffff, 0x000f000e,
609 	0x9200, 0xffffffff, 0x00090008,
610 	0x9204, 0xffffffff, 0x000b000a,
611 	0x9208, 0xffffffff, 0x000c000f,
612 	0x920c, 0xffffffff, 0x000e000d,
613 	0x9210, 0xffffffff, 0x00110010,
614 	0x9214, 0xffffffff, 0x000a0009,
615 	0x9218, 0xffffffff, 0x000c000b,
616 	0x921c, 0xffffffff, 0x0000000f,
617 	0x9220, 0xffffffff, 0x000e000d,
618 	0x9224, 0xffffffff, 0x00110010,
619 	0x9228, 0xffffffff, 0x000b000a,
620 	0x922c, 0xffffffff, 0x000d000c,
621 	0x9230, 0xffffffff, 0x00000010,
622 	0x9234, 0xffffffff, 0x000f000e,
623 	0x9238, 0xffffffff, 0x00120011,
624 	0x923c, 0xffffffff, 0x000c000b,
625 	0x9240, 0xffffffff, 0x000e000d,
626 	0x9244, 0xffffffff, 0x00000011,
627 	0x9248, 0xffffffff, 0x0010000f,
628 	0x924c, 0xffffffff, 0x00130012,
629 	0x9250, 0xffffffff, 0x000d000c,
630 	0x9254, 0xffffffff, 0x000f000e,
631 	0x9258, 0xffffffff, 0x00100013,
632 	0x925c, 0xffffffff, 0x00120011,
633 	0x9260, 0xffffffff, 0x00150014,
634 	0x9264, 0xffffffff, 0x000e000d,
635 	0x9268, 0xffffffff, 0x0010000f,
636 	0x926c, 0xffffffff, 0x00000013,
637 	0x9270, 0xffffffff, 0x00120011,
638 	0x9274, 0xffffffff, 0x00150014,
639 	0x9278, 0xffffffff, 0x000f000e,
640 	0x927c, 0xffffffff, 0x00110010,
641 	0x9280, 0xffffffff, 0x00000014,
642 	0x9284, 0xffffffff, 0x00130012,
643 	0x9288, 0xffffffff, 0x00160015,
644 	0x928c, 0xffffffff, 0x0010000f,
645 	0x9290, 0xffffffff, 0x00120011,
646 	0x9294, 0xffffffff, 0x00000015,
647 	0x9298, 0xffffffff, 0x00140013,
648 	0x929c, 0xffffffff, 0x00170016,
649 	0x9150, 0xffffffff, 0x96940200,
650 	0x8708, 0xffffffff, 0x00900100,
651 	0xc478, 0xffffffff, 0x00000080,
652 	0xc404, 0xffffffff, 0x0020003f,
653 	0x30, 0xffffffff, 0x0000001c,
654 	0x34, 0x000f0000, 0x000f0000,
655 	0x160c, 0xffffffff, 0x00000100,
656 	0x1024, 0xffffffff, 0x00000100,
657 	0x102c, 0x00000101, 0x00000000,
658 	0x20a8, 0xffffffff, 0x00000104,
659 	0x264c, 0x000c0000, 0x000c0000,
660 	0x2648, 0x000c0000, 0x000c0000,
661 	0x55e4, 0xff000fff, 0x00000100,
662 	0x55e8, 0x00000001, 0x00000001,
663 	0x2f50, 0x00000001, 0x00000001,
664 	0x30cc, 0xc0000fff, 0x00000104,
665 	0xc1e4, 0x00000001, 0x00000001,
666 	0xd0c0, 0xfffffff0, 0x00000100,
667 	0xd8c0, 0xfffffff0, 0x00000100
668 };
669 
670 static const u32 pitcairn_mgcg_cgcg_init[] =
671 {
672 	0xc400, 0xffffffff, 0xfffffffc,
673 	0x802c, 0xffffffff, 0xe0000000,
674 	0x9a60, 0xffffffff, 0x00000100,
675 	0x92a4, 0xffffffff, 0x00000100,
676 	0xc164, 0xffffffff, 0x00000100,
677 	0x9774, 0xffffffff, 0x00000100,
678 	0x8984, 0xffffffff, 0x06000100,
679 	0x8a18, 0xffffffff, 0x00000100,
680 	0x92a0, 0xffffffff, 0x00000100,
681 	0xc380, 0xffffffff, 0x00000100,
682 	0x8b28, 0xffffffff, 0x00000100,
683 	0x9144, 0xffffffff, 0x00000100,
684 	0x8d88, 0xffffffff, 0x00000100,
685 	0x8d8c, 0xffffffff, 0x00000100,
686 	0x9030, 0xffffffff, 0x00000100,
687 	0x9034, 0xffffffff, 0x00000100,
688 	0x9038, 0xffffffff, 0x00000100,
689 	0x903c, 0xffffffff, 0x00000100,
690 	0xad80, 0xffffffff, 0x00000100,
691 	0xac54, 0xffffffff, 0x00000100,
692 	0x897c, 0xffffffff, 0x06000100,
693 	0x9868, 0xffffffff, 0x00000100,
694 	0x9510, 0xffffffff, 0x00000100,
695 	0xaf04, 0xffffffff, 0x00000100,
696 	0xae04, 0xffffffff, 0x00000100,
697 	0x949c, 0xffffffff, 0x00000100,
698 	0x802c, 0xffffffff, 0xe0000000,
699 	0x9160, 0xffffffff, 0x00010000,
700 	0x9164, 0xffffffff, 0x00030002,
701 	0x9168, 0xffffffff, 0x00040007,
702 	0x916c, 0xffffffff, 0x00060005,
703 	0x9170, 0xffffffff, 0x00090008,
704 	0x9174, 0xffffffff, 0x00020001,
705 	0x9178, 0xffffffff, 0x00040003,
706 	0x917c, 0xffffffff, 0x00000007,
707 	0x9180, 0xffffffff, 0x00060005,
708 	0x9184, 0xffffffff, 0x00090008,
709 	0x9188, 0xffffffff, 0x00030002,
710 	0x918c, 0xffffffff, 0x00050004,
711 	0x9190, 0xffffffff, 0x00000008,
712 	0x9194, 0xffffffff, 0x00070006,
713 	0x9198, 0xffffffff, 0x000a0009,
714 	0x919c, 0xffffffff, 0x00040003,
715 	0x91a0, 0xffffffff, 0x00060005,
716 	0x91a4, 0xffffffff, 0x00000009,
717 	0x91a8, 0xffffffff, 0x00080007,
718 	0x91ac, 0xffffffff, 0x000b000a,
719 	0x91b0, 0xffffffff, 0x00050004,
720 	0x91b4, 0xffffffff, 0x00070006,
721 	0x91b8, 0xffffffff, 0x0008000b,
722 	0x91bc, 0xffffffff, 0x000a0009,
723 	0x91c0, 0xffffffff, 0x000d000c,
724 	0x9200, 0xffffffff, 0x00090008,
725 	0x9204, 0xffffffff, 0x000b000a,
726 	0x9208, 0xffffffff, 0x000c000f,
727 	0x920c, 0xffffffff, 0x000e000d,
728 	0x9210, 0xffffffff, 0x00110010,
729 	0x9214, 0xffffffff, 0x000a0009,
730 	0x9218, 0xffffffff, 0x000c000b,
731 	0x921c, 0xffffffff, 0x0000000f,
732 	0x9220, 0xffffffff, 0x000e000d,
733 	0x9224, 0xffffffff, 0x00110010,
734 	0x9228, 0xffffffff, 0x000b000a,
735 	0x922c, 0xffffffff, 0x000d000c,
736 	0x9230, 0xffffffff, 0x00000010,
737 	0x9234, 0xffffffff, 0x000f000e,
738 	0x9238, 0xffffffff, 0x00120011,
739 	0x923c, 0xffffffff, 0x000c000b,
740 	0x9240, 0xffffffff, 0x000e000d,
741 	0x9244, 0xffffffff, 0x00000011,
742 	0x9248, 0xffffffff, 0x0010000f,
743 	0x924c, 0xffffffff, 0x00130012,
744 	0x9250, 0xffffffff, 0x000d000c,
745 	0x9254, 0xffffffff, 0x000f000e,
746 	0x9258, 0xffffffff, 0x00100013,
747 	0x925c, 0xffffffff, 0x00120011,
748 	0x9260, 0xffffffff, 0x00150014,
749 	0x9150, 0xffffffff, 0x96940200,
750 	0x8708, 0xffffffff, 0x00900100,
751 	0xc478, 0xffffffff, 0x00000080,
752 	0xc404, 0xffffffff, 0x0020003f,
753 	0x30, 0xffffffff, 0x0000001c,
754 	0x34, 0x000f0000, 0x000f0000,
755 	0x160c, 0xffffffff, 0x00000100,
756 	0x1024, 0xffffffff, 0x00000100,
757 	0x102c, 0x00000101, 0x00000000,
758 	0x20a8, 0xffffffff, 0x00000104,
759 	0x55e4, 0xff000fff, 0x00000100,
760 	0x55e8, 0x00000001, 0x00000001,
761 	0x2f50, 0x00000001, 0x00000001,
762 	0x30cc, 0xc0000fff, 0x00000104,
763 	0xc1e4, 0x00000001, 0x00000001,
764 	0xd0c0, 0xfffffff0, 0x00000100,
765 	0xd8c0, 0xfffffff0, 0x00000100
766 };
767 
768 static const u32 verde_mgcg_cgcg_init[] =
769 {
770 	0xc400, 0xffffffff, 0xfffffffc,
771 	0x802c, 0xffffffff, 0xe0000000,
772 	0x9a60, 0xffffffff, 0x00000100,
773 	0x92a4, 0xffffffff, 0x00000100,
774 	0xc164, 0xffffffff, 0x00000100,
775 	0x9774, 0xffffffff, 0x00000100,
776 	0x8984, 0xffffffff, 0x06000100,
777 	0x8a18, 0xffffffff, 0x00000100,
778 	0x92a0, 0xffffffff, 0x00000100,
779 	0xc380, 0xffffffff, 0x00000100,
780 	0x8b28, 0xffffffff, 0x00000100,
781 	0x9144, 0xffffffff, 0x00000100,
782 	0x8d88, 0xffffffff, 0x00000100,
783 	0x8d8c, 0xffffffff, 0x00000100,
784 	0x9030, 0xffffffff, 0x00000100,
785 	0x9034, 0xffffffff, 0x00000100,
786 	0x9038, 0xffffffff, 0x00000100,
787 	0x903c, 0xffffffff, 0x00000100,
788 	0xad80, 0xffffffff, 0x00000100,
789 	0xac54, 0xffffffff, 0x00000100,
790 	0x897c, 0xffffffff, 0x06000100,
791 	0x9868, 0xffffffff, 0x00000100,
792 	0x9510, 0xffffffff, 0x00000100,
793 	0xaf04, 0xffffffff, 0x00000100,
794 	0xae04, 0xffffffff, 0x00000100,
795 	0x949c, 0xffffffff, 0x00000100,
796 	0x802c, 0xffffffff, 0xe0000000,
797 	0x9160, 0xffffffff, 0x00010000,
798 	0x9164, 0xffffffff, 0x00030002,
799 	0x9168, 0xffffffff, 0x00040007,
800 	0x916c, 0xffffffff, 0x00060005,
801 	0x9170, 0xffffffff, 0x00090008,
802 	0x9174, 0xffffffff, 0x00020001,
803 	0x9178, 0xffffffff, 0x00040003,
804 	0x917c, 0xffffffff, 0x00000007,
805 	0x9180, 0xffffffff, 0x00060005,
806 	0x9184, 0xffffffff, 0x00090008,
807 	0x9188, 0xffffffff, 0x00030002,
808 	0x918c, 0xffffffff, 0x00050004,
809 	0x9190, 0xffffffff, 0x00000008,
810 	0x9194, 0xffffffff, 0x00070006,
811 	0x9198, 0xffffffff, 0x000a0009,
812 	0x919c, 0xffffffff, 0x00040003,
813 	0x91a0, 0xffffffff, 0x00060005,
814 	0x91a4, 0xffffffff, 0x00000009,
815 	0x91a8, 0xffffffff, 0x00080007,
816 	0x91ac, 0xffffffff, 0x000b000a,
817 	0x91b0, 0xffffffff, 0x00050004,
818 	0x91b4, 0xffffffff, 0x00070006,
819 	0x91b8, 0xffffffff, 0x0008000b,
820 	0x91bc, 0xffffffff, 0x000a0009,
821 	0x91c0, 0xffffffff, 0x000d000c,
822 	0x9200, 0xffffffff, 0x00090008,
823 	0x9204, 0xffffffff, 0x000b000a,
824 	0x9208, 0xffffffff, 0x000c000f,
825 	0x920c, 0xffffffff, 0x000e000d,
826 	0x9210, 0xffffffff, 0x00110010,
827 	0x9214, 0xffffffff, 0x000a0009,
828 	0x9218, 0xffffffff, 0x000c000b,
829 	0x921c, 0xffffffff, 0x0000000f,
830 	0x9220, 0xffffffff, 0x000e000d,
831 	0x9224, 0xffffffff, 0x00110010,
832 	0x9228, 0xffffffff, 0x000b000a,
833 	0x922c, 0xffffffff, 0x000d000c,
834 	0x9230, 0xffffffff, 0x00000010,
835 	0x9234, 0xffffffff, 0x000f000e,
836 	0x9238, 0xffffffff, 0x00120011,
837 	0x923c, 0xffffffff, 0x000c000b,
838 	0x9240, 0xffffffff, 0x000e000d,
839 	0x9244, 0xffffffff, 0x00000011,
840 	0x9248, 0xffffffff, 0x0010000f,
841 	0x924c, 0xffffffff, 0x00130012,
842 	0x9250, 0xffffffff, 0x000d000c,
843 	0x9254, 0xffffffff, 0x000f000e,
844 	0x9258, 0xffffffff, 0x00100013,
845 	0x925c, 0xffffffff, 0x00120011,
846 	0x9260, 0xffffffff, 0x00150014,
847 	0x9150, 0xffffffff, 0x96940200,
848 	0x8708, 0xffffffff, 0x00900100,
849 	0xc478, 0xffffffff, 0x00000080,
850 	0xc404, 0xffffffff, 0x0020003f,
851 	0x30, 0xffffffff, 0x0000001c,
852 	0x34, 0x000f0000, 0x000f0000,
853 	0x160c, 0xffffffff, 0x00000100,
854 	0x1024, 0xffffffff, 0x00000100,
855 	0x102c, 0x00000101, 0x00000000,
856 	0x20a8, 0xffffffff, 0x00000104,
857 	0x264c, 0x000c0000, 0x000c0000,
858 	0x2648, 0x000c0000, 0x000c0000,
859 	0x55e4, 0xff000fff, 0x00000100,
860 	0x55e8, 0x00000001, 0x00000001,
861 	0x2f50, 0x00000001, 0x00000001,
862 	0x30cc, 0xc0000fff, 0x00000104,
863 	0xc1e4, 0x00000001, 0x00000001,
864 	0xd0c0, 0xfffffff0, 0x00000100,
865 	0xd8c0, 0xfffffff0, 0x00000100
866 };
867 
868 static const u32 oland_mgcg_cgcg_init[] =
869 {
870 	0xc400, 0xffffffff, 0xfffffffc,
871 	0x802c, 0xffffffff, 0xe0000000,
872 	0x9a60, 0xffffffff, 0x00000100,
873 	0x92a4, 0xffffffff, 0x00000100,
874 	0xc164, 0xffffffff, 0x00000100,
875 	0x9774, 0xffffffff, 0x00000100,
876 	0x8984, 0xffffffff, 0x06000100,
877 	0x8a18, 0xffffffff, 0x00000100,
878 	0x92a0, 0xffffffff, 0x00000100,
879 	0xc380, 0xffffffff, 0x00000100,
880 	0x8b28, 0xffffffff, 0x00000100,
881 	0x9144, 0xffffffff, 0x00000100,
882 	0x8d88, 0xffffffff, 0x00000100,
883 	0x8d8c, 0xffffffff, 0x00000100,
884 	0x9030, 0xffffffff, 0x00000100,
885 	0x9034, 0xffffffff, 0x00000100,
886 	0x9038, 0xffffffff, 0x00000100,
887 	0x903c, 0xffffffff, 0x00000100,
888 	0xad80, 0xffffffff, 0x00000100,
889 	0xac54, 0xffffffff, 0x00000100,
890 	0x897c, 0xffffffff, 0x06000100,
891 	0x9868, 0xffffffff, 0x00000100,
892 	0x9510, 0xffffffff, 0x00000100,
893 	0xaf04, 0xffffffff, 0x00000100,
894 	0xae04, 0xffffffff, 0x00000100,
895 	0x949c, 0xffffffff, 0x00000100,
896 	0x802c, 0xffffffff, 0xe0000000,
897 	0x9160, 0xffffffff, 0x00010000,
898 	0x9164, 0xffffffff, 0x00030002,
899 	0x9168, 0xffffffff, 0x00040007,
900 	0x916c, 0xffffffff, 0x00060005,
901 	0x9170, 0xffffffff, 0x00090008,
902 	0x9174, 0xffffffff, 0x00020001,
903 	0x9178, 0xffffffff, 0x00040003,
904 	0x917c, 0xffffffff, 0x00000007,
905 	0x9180, 0xffffffff, 0x00060005,
906 	0x9184, 0xffffffff, 0x00090008,
907 	0x9188, 0xffffffff, 0x00030002,
908 	0x918c, 0xffffffff, 0x00050004,
909 	0x9190, 0xffffffff, 0x00000008,
910 	0x9194, 0xffffffff, 0x00070006,
911 	0x9198, 0xffffffff, 0x000a0009,
912 	0x919c, 0xffffffff, 0x00040003,
913 	0x91a0, 0xffffffff, 0x00060005,
914 	0x91a4, 0xffffffff, 0x00000009,
915 	0x91a8, 0xffffffff, 0x00080007,
916 	0x91ac, 0xffffffff, 0x000b000a,
917 	0x91b0, 0xffffffff, 0x00050004,
918 	0x91b4, 0xffffffff, 0x00070006,
919 	0x91b8, 0xffffffff, 0x0008000b,
920 	0x91bc, 0xffffffff, 0x000a0009,
921 	0x91c0, 0xffffffff, 0x000d000c,
922 	0x91c4, 0xffffffff, 0x00060005,
923 	0x91c8, 0xffffffff, 0x00080007,
924 	0x91cc, 0xffffffff, 0x0000000b,
925 	0x91d0, 0xffffffff, 0x000a0009,
926 	0x91d4, 0xffffffff, 0x000d000c,
927 	0x9150, 0xffffffff, 0x96940200,
928 	0x8708, 0xffffffff, 0x00900100,
929 	0xc478, 0xffffffff, 0x00000080,
930 	0xc404, 0xffffffff, 0x0020003f,
931 	0x30, 0xffffffff, 0x0000001c,
932 	0x34, 0x000f0000, 0x000f0000,
933 	0x160c, 0xffffffff, 0x00000100,
934 	0x1024, 0xffffffff, 0x00000100,
935 	0x102c, 0x00000101, 0x00000000,
936 	0x20a8, 0xffffffff, 0x00000104,
937 	0x264c, 0x000c0000, 0x000c0000,
938 	0x2648, 0x000c0000, 0x000c0000,
939 	0x55e4, 0xff000fff, 0x00000100,
940 	0x55e8, 0x00000001, 0x00000001,
941 	0x2f50, 0x00000001, 0x00000001,
942 	0x30cc, 0xc0000fff, 0x00000104,
943 	0xc1e4, 0x00000001, 0x00000001,
944 	0xd0c0, 0xfffffff0, 0x00000100,
945 	0xd8c0, 0xfffffff0, 0x00000100
946 };
947 
948 static const u32 hainan_mgcg_cgcg_init[] =
949 {
950 	0xc400, 0xffffffff, 0xfffffffc,
951 	0x802c, 0xffffffff, 0xe0000000,
952 	0x9a60, 0xffffffff, 0x00000100,
953 	0x92a4, 0xffffffff, 0x00000100,
954 	0xc164, 0xffffffff, 0x00000100,
955 	0x9774, 0xffffffff, 0x00000100,
956 	0x8984, 0xffffffff, 0x06000100,
957 	0x8a18, 0xffffffff, 0x00000100,
958 	0x92a0, 0xffffffff, 0x00000100,
959 	0xc380, 0xffffffff, 0x00000100,
960 	0x8b28, 0xffffffff, 0x00000100,
961 	0x9144, 0xffffffff, 0x00000100,
962 	0x8d88, 0xffffffff, 0x00000100,
963 	0x8d8c, 0xffffffff, 0x00000100,
964 	0x9030, 0xffffffff, 0x00000100,
965 	0x9034, 0xffffffff, 0x00000100,
966 	0x9038, 0xffffffff, 0x00000100,
967 	0x903c, 0xffffffff, 0x00000100,
968 	0xad80, 0xffffffff, 0x00000100,
969 	0xac54, 0xffffffff, 0x00000100,
970 	0x897c, 0xffffffff, 0x06000100,
971 	0x9868, 0xffffffff, 0x00000100,
972 	0x9510, 0xffffffff, 0x00000100,
973 	0xaf04, 0xffffffff, 0x00000100,
974 	0xae04, 0xffffffff, 0x00000100,
975 	0x949c, 0xffffffff, 0x00000100,
976 	0x802c, 0xffffffff, 0xe0000000,
977 	0x9160, 0xffffffff, 0x00010000,
978 	0x9164, 0xffffffff, 0x00030002,
979 	0x9168, 0xffffffff, 0x00040007,
980 	0x916c, 0xffffffff, 0x00060005,
981 	0x9170, 0xffffffff, 0x00090008,
982 	0x9174, 0xffffffff, 0x00020001,
983 	0x9178, 0xffffffff, 0x00040003,
984 	0x917c, 0xffffffff, 0x00000007,
985 	0x9180, 0xffffffff, 0x00060005,
986 	0x9184, 0xffffffff, 0x00090008,
987 	0x9188, 0xffffffff, 0x00030002,
988 	0x918c, 0xffffffff, 0x00050004,
989 	0x9190, 0xffffffff, 0x00000008,
990 	0x9194, 0xffffffff, 0x00070006,
991 	0x9198, 0xffffffff, 0x000a0009,
992 	0x919c, 0xffffffff, 0x00040003,
993 	0x91a0, 0xffffffff, 0x00060005,
994 	0x91a4, 0xffffffff, 0x00000009,
995 	0x91a8, 0xffffffff, 0x00080007,
996 	0x91ac, 0xffffffff, 0x000b000a,
997 	0x91b0, 0xffffffff, 0x00050004,
998 	0x91b4, 0xffffffff, 0x00070006,
999 	0x91b8, 0xffffffff, 0x0008000b,
1000 	0x91bc, 0xffffffff, 0x000a0009,
1001 	0x91c0, 0xffffffff, 0x000d000c,
1002 	0x91c4, 0xffffffff, 0x00060005,
1003 	0x91c8, 0xffffffff, 0x00080007,
1004 	0x91cc, 0xffffffff, 0x0000000b,
1005 	0x91d0, 0xffffffff, 0x000a0009,
1006 	0x91d4, 0xffffffff, 0x000d000c,
1007 	0x9150, 0xffffffff, 0x96940200,
1008 	0x8708, 0xffffffff, 0x00900100,
1009 	0xc478, 0xffffffff, 0x00000080,
1010 	0xc404, 0xffffffff, 0x0020003f,
1011 	0x30, 0xffffffff, 0x0000001c,
1012 	0x34, 0x000f0000, 0x000f0000,
1013 	0x160c, 0xffffffff, 0x00000100,
1014 	0x1024, 0xffffffff, 0x00000100,
1015 	0x20a8, 0xffffffff, 0x00000104,
1016 	0x264c, 0x000c0000, 0x000c0000,
1017 	0x2648, 0x000c0000, 0x000c0000,
1018 	0x2f50, 0x00000001, 0x00000001,
1019 	0x30cc, 0xc0000fff, 0x00000104,
1020 	0xc1e4, 0x00000001, 0x00000001,
1021 	0xd0c0, 0xfffffff0, 0x00000100,
1022 	0xd8c0, 0xfffffff0, 0x00000100
1023 };
1024 
1025 static u32 verde_pg_init[] =
1026 {
1027 	0x353c, 0xffffffff, 0x40000,
1028 	0x3538, 0xffffffff, 0x200010ff,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x0,
1033 	0x353c, 0xffffffff, 0x0,
1034 	0x353c, 0xffffffff, 0x7007,
1035 	0x3538, 0xffffffff, 0x300010ff,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x0,
1041 	0x353c, 0xffffffff, 0x400000,
1042 	0x3538, 0xffffffff, 0x100010ff,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x0,
1048 	0x353c, 0xffffffff, 0x120200,
1049 	0x3538, 0xffffffff, 0x500010ff,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x0,
1055 	0x353c, 0xffffffff, 0x1e1e16,
1056 	0x3538, 0xffffffff, 0x600010ff,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x0,
1062 	0x353c, 0xffffffff, 0x171f1e,
1063 	0x3538, 0xffffffff, 0x700010ff,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x0,
1069 	0x353c, 0xffffffff, 0x0,
1070 	0x3538, 0xffffffff, 0x9ff,
1071 	0x3500, 0xffffffff, 0x0,
1072 	0x3504, 0xffffffff, 0x10000800,
1073 	0x3504, 0xffffffff, 0xf,
1074 	0x3504, 0xffffffff, 0xf,
1075 	0x3500, 0xffffffff, 0x4,
1076 	0x3504, 0xffffffff, 0x1000051e,
1077 	0x3504, 0xffffffff, 0xffff,
1078 	0x3504, 0xffffffff, 0xffff,
1079 	0x3500, 0xffffffff, 0x8,
1080 	0x3504, 0xffffffff, 0x80500,
1081 	0x3500, 0xffffffff, 0x12,
1082 	0x3504, 0xffffffff, 0x9050c,
1083 	0x3500, 0xffffffff, 0x1d,
1084 	0x3504, 0xffffffff, 0xb052c,
1085 	0x3500, 0xffffffff, 0x2a,
1086 	0x3504, 0xffffffff, 0x1053e,
1087 	0x3500, 0xffffffff, 0x2d,
1088 	0x3504, 0xffffffff, 0x10546,
1089 	0x3500, 0xffffffff, 0x30,
1090 	0x3504, 0xffffffff, 0xa054e,
1091 	0x3500, 0xffffffff, 0x3c,
1092 	0x3504, 0xffffffff, 0x1055f,
1093 	0x3500, 0xffffffff, 0x3f,
1094 	0x3504, 0xffffffff, 0x10567,
1095 	0x3500, 0xffffffff, 0x42,
1096 	0x3504, 0xffffffff, 0x1056f,
1097 	0x3500, 0xffffffff, 0x45,
1098 	0x3504, 0xffffffff, 0x10572,
1099 	0x3500, 0xffffffff, 0x48,
1100 	0x3504, 0xffffffff, 0x20575,
1101 	0x3500, 0xffffffff, 0x4c,
1102 	0x3504, 0xffffffff, 0x190801,
1103 	0x3500, 0xffffffff, 0x67,
1104 	0x3504, 0xffffffff, 0x1082a,
1105 	0x3500, 0xffffffff, 0x6a,
1106 	0x3504, 0xffffffff, 0x1b082d,
1107 	0x3500, 0xffffffff, 0x87,
1108 	0x3504, 0xffffffff, 0x310851,
1109 	0x3500, 0xffffffff, 0xba,
1110 	0x3504, 0xffffffff, 0x891,
1111 	0x3500, 0xffffffff, 0xbc,
1112 	0x3504, 0xffffffff, 0x893,
1113 	0x3500, 0xffffffff, 0xbe,
1114 	0x3504, 0xffffffff, 0x20895,
1115 	0x3500, 0xffffffff, 0xc2,
1116 	0x3504, 0xffffffff, 0x20899,
1117 	0x3500, 0xffffffff, 0xc6,
1118 	0x3504, 0xffffffff, 0x2089d,
1119 	0x3500, 0xffffffff, 0xca,
1120 	0x3504, 0xffffffff, 0x8a1,
1121 	0x3500, 0xffffffff, 0xcc,
1122 	0x3504, 0xffffffff, 0x8a3,
1123 	0x3500, 0xffffffff, 0xce,
1124 	0x3504, 0xffffffff, 0x308a5,
1125 	0x3500, 0xffffffff, 0xd3,
1126 	0x3504, 0xffffffff, 0x6d08cd,
1127 	0x3500, 0xffffffff, 0x142,
1128 	0x3504, 0xffffffff, 0x2000095a,
1129 	0x3504, 0xffffffff, 0x1,
1130 	0x3500, 0xffffffff, 0x144,
1131 	0x3504, 0xffffffff, 0x301f095b,
1132 	0x3500, 0xffffffff, 0x165,
1133 	0x3504, 0xffffffff, 0xc094d,
1134 	0x3500, 0xffffffff, 0x173,
1135 	0x3504, 0xffffffff, 0xf096d,
1136 	0x3500, 0xffffffff, 0x184,
1137 	0x3504, 0xffffffff, 0x15097f,
1138 	0x3500, 0xffffffff, 0x19b,
1139 	0x3504, 0xffffffff, 0xc0998,
1140 	0x3500, 0xffffffff, 0x1a9,
1141 	0x3504, 0xffffffff, 0x409a7,
1142 	0x3500, 0xffffffff, 0x1af,
1143 	0x3504, 0xffffffff, 0xcdc,
1144 	0x3500, 0xffffffff, 0x1b1,
1145 	0x3504, 0xffffffff, 0x800,
1146 	0x3508, 0xffffffff, 0x6c9b2000,
1147 	0x3510, 0xfc00, 0x2000,
1148 	0x3544, 0xffffffff, 0xfc0,
1149 	0x28d4, 0x00000100, 0x100
1150 };
1151 
1152 static void si_init_golden_registers(struct radeon_device *rdev)
1153 {
1154 	switch (rdev->family) {
1155 	case CHIP_TAHITI:
1156 		radeon_program_register_sequence(rdev,
1157 						 tahiti_golden_registers,
1158 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1159 		radeon_program_register_sequence(rdev,
1160 						 tahiti_golden_rlc_registers,
1161 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1162 		radeon_program_register_sequence(rdev,
1163 						 tahiti_mgcg_cgcg_init,
1164 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1165 		radeon_program_register_sequence(rdev,
1166 						 tahiti_golden_registers2,
1167 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1168 		break;
1169 	case CHIP_PITCAIRN:
1170 		radeon_program_register_sequence(rdev,
1171 						 pitcairn_golden_registers,
1172 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1173 		radeon_program_register_sequence(rdev,
1174 						 pitcairn_golden_rlc_registers,
1175 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1176 		radeon_program_register_sequence(rdev,
1177 						 pitcairn_mgcg_cgcg_init,
1178 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1179 		break;
1180 	case CHIP_VERDE:
1181 		radeon_program_register_sequence(rdev,
1182 						 verde_golden_registers,
1183 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1184 		radeon_program_register_sequence(rdev,
1185 						 verde_golden_rlc_registers,
1186 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1187 		radeon_program_register_sequence(rdev,
1188 						 verde_mgcg_cgcg_init,
1189 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1190 		radeon_program_register_sequence(rdev,
1191 						 verde_pg_init,
1192 						 (const u32)ARRAY_SIZE(verde_pg_init));
1193 		break;
1194 	case CHIP_OLAND:
1195 		radeon_program_register_sequence(rdev,
1196 						 oland_golden_registers,
1197 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1198 		radeon_program_register_sequence(rdev,
1199 						 oland_golden_rlc_registers,
1200 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1201 		radeon_program_register_sequence(rdev,
1202 						 oland_mgcg_cgcg_init,
1203 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1204 		break;
1205 	case CHIP_HAINAN:
1206 		radeon_program_register_sequence(rdev,
1207 						 hainan_golden_registers,
1208 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1209 		radeon_program_register_sequence(rdev,
1210 						 hainan_golden_registers2,
1211 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1212 		radeon_program_register_sequence(rdev,
1213 						 hainan_mgcg_cgcg_init,
1214 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1215 		break;
1216 	default:
1217 		break;
1218 	}
1219 }
1220 
1221 #define PCIE_BUS_CLK                10000
1222 #define TCLK                        (PCIE_BUS_CLK / 10)
1223 
1224 /**
1225  * si_get_xclk - get the xclk
1226  *
1227  * @rdev: radeon_device pointer
1228  *
1229  * Returns the reference clock used by the gfx engine
1230  * (SI).
1231  */
1232 u32 si_get_xclk(struct radeon_device *rdev)
1233 {
1234         u32 reference_clock = rdev->clock.spll.reference_freq;
1235 	u32 tmp;
1236 
1237 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1238 	if (tmp & MUX_TCLK_TO_XCLK)
1239 		return TCLK;
1240 
1241 	tmp = RREG32(CG_CLKPIN_CNTL);
1242 	if (tmp & XTALIN_DIVIDE)
1243 		return reference_clock / 4;
1244 
1245 	return reference_clock;
1246 }
1247 
1248 /* get temperature in millidegrees */
1249 int si_get_temp(struct radeon_device *rdev)
1250 {
1251 	u32 temp;
1252 	int actual_temp = 0;
1253 
1254 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1255 		CTF_TEMP_SHIFT;
1256 
1257 	if (temp & 0x200)
1258 		actual_temp = 255;
1259 	else
1260 		actual_temp = temp & 0x1ff;
1261 
1262 	actual_temp = (actual_temp * 1000);
1263 
1264 	return actual_temp;
1265 }
1266 
1267 #define TAHITI_IO_MC_REGS_SIZE 36
1268 
1269 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1270 	{0x0000006f, 0x03044000},
1271 	{0x00000070, 0x0480c018},
1272 	{0x00000071, 0x00000040},
1273 	{0x00000072, 0x01000000},
1274 	{0x00000074, 0x000000ff},
1275 	{0x00000075, 0x00143400},
1276 	{0x00000076, 0x08ec0800},
1277 	{0x00000077, 0x040000cc},
1278 	{0x00000079, 0x00000000},
1279 	{0x0000007a, 0x21000409},
1280 	{0x0000007c, 0x00000000},
1281 	{0x0000007d, 0xe8000000},
1282 	{0x0000007e, 0x044408a8},
1283 	{0x0000007f, 0x00000003},
1284 	{0x00000080, 0x00000000},
1285 	{0x00000081, 0x01000000},
1286 	{0x00000082, 0x02000000},
1287 	{0x00000083, 0x00000000},
1288 	{0x00000084, 0xe3f3e4f4},
1289 	{0x00000085, 0x00052024},
1290 	{0x00000087, 0x00000000},
1291 	{0x00000088, 0x66036603},
1292 	{0x00000089, 0x01000000},
1293 	{0x0000008b, 0x1c0a0000},
1294 	{0x0000008c, 0xff010000},
1295 	{0x0000008e, 0xffffefff},
1296 	{0x0000008f, 0xfff3efff},
1297 	{0x00000090, 0xfff3efbf},
1298 	{0x00000094, 0x00101101},
1299 	{0x00000095, 0x00000fff},
1300 	{0x00000096, 0x00116fff},
1301 	{0x00000097, 0x60010000},
1302 	{0x00000098, 0x10010000},
1303 	{0x00000099, 0x00006000},
1304 	{0x0000009a, 0x00001000},
1305 	{0x0000009f, 0x00a77400}
1306 };
1307 
1308 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1309 	{0x0000006f, 0x03044000},
1310 	{0x00000070, 0x0480c018},
1311 	{0x00000071, 0x00000040},
1312 	{0x00000072, 0x01000000},
1313 	{0x00000074, 0x000000ff},
1314 	{0x00000075, 0x00143400},
1315 	{0x00000076, 0x08ec0800},
1316 	{0x00000077, 0x040000cc},
1317 	{0x00000079, 0x00000000},
1318 	{0x0000007a, 0x21000409},
1319 	{0x0000007c, 0x00000000},
1320 	{0x0000007d, 0xe8000000},
1321 	{0x0000007e, 0x044408a8},
1322 	{0x0000007f, 0x00000003},
1323 	{0x00000080, 0x00000000},
1324 	{0x00000081, 0x01000000},
1325 	{0x00000082, 0x02000000},
1326 	{0x00000083, 0x00000000},
1327 	{0x00000084, 0xe3f3e4f4},
1328 	{0x00000085, 0x00052024},
1329 	{0x00000087, 0x00000000},
1330 	{0x00000088, 0x66036603},
1331 	{0x00000089, 0x01000000},
1332 	{0x0000008b, 0x1c0a0000},
1333 	{0x0000008c, 0xff010000},
1334 	{0x0000008e, 0xffffefff},
1335 	{0x0000008f, 0xfff3efff},
1336 	{0x00000090, 0xfff3efbf},
1337 	{0x00000094, 0x00101101},
1338 	{0x00000095, 0x00000fff},
1339 	{0x00000096, 0x00116fff},
1340 	{0x00000097, 0x60010000},
1341 	{0x00000098, 0x10010000},
1342 	{0x00000099, 0x00006000},
1343 	{0x0000009a, 0x00001000},
1344 	{0x0000009f, 0x00a47400}
1345 };
1346 
1347 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1348 	{0x0000006f, 0x03044000},
1349 	{0x00000070, 0x0480c018},
1350 	{0x00000071, 0x00000040},
1351 	{0x00000072, 0x01000000},
1352 	{0x00000074, 0x000000ff},
1353 	{0x00000075, 0x00143400},
1354 	{0x00000076, 0x08ec0800},
1355 	{0x00000077, 0x040000cc},
1356 	{0x00000079, 0x00000000},
1357 	{0x0000007a, 0x21000409},
1358 	{0x0000007c, 0x00000000},
1359 	{0x0000007d, 0xe8000000},
1360 	{0x0000007e, 0x044408a8},
1361 	{0x0000007f, 0x00000003},
1362 	{0x00000080, 0x00000000},
1363 	{0x00000081, 0x01000000},
1364 	{0x00000082, 0x02000000},
1365 	{0x00000083, 0x00000000},
1366 	{0x00000084, 0xe3f3e4f4},
1367 	{0x00000085, 0x00052024},
1368 	{0x00000087, 0x00000000},
1369 	{0x00000088, 0x66036603},
1370 	{0x00000089, 0x01000000},
1371 	{0x0000008b, 0x1c0a0000},
1372 	{0x0000008c, 0xff010000},
1373 	{0x0000008e, 0xffffefff},
1374 	{0x0000008f, 0xfff3efff},
1375 	{0x00000090, 0xfff3efbf},
1376 	{0x00000094, 0x00101101},
1377 	{0x00000095, 0x00000fff},
1378 	{0x00000096, 0x00116fff},
1379 	{0x00000097, 0x60010000},
1380 	{0x00000098, 0x10010000},
1381 	{0x00000099, 0x00006000},
1382 	{0x0000009a, 0x00001000},
1383 	{0x0000009f, 0x00a37400}
1384 };
1385 
1386 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1387 	{0x0000006f, 0x03044000},
1388 	{0x00000070, 0x0480c018},
1389 	{0x00000071, 0x00000040},
1390 	{0x00000072, 0x01000000},
1391 	{0x00000074, 0x000000ff},
1392 	{0x00000075, 0x00143400},
1393 	{0x00000076, 0x08ec0800},
1394 	{0x00000077, 0x040000cc},
1395 	{0x00000079, 0x00000000},
1396 	{0x0000007a, 0x21000409},
1397 	{0x0000007c, 0x00000000},
1398 	{0x0000007d, 0xe8000000},
1399 	{0x0000007e, 0x044408a8},
1400 	{0x0000007f, 0x00000003},
1401 	{0x00000080, 0x00000000},
1402 	{0x00000081, 0x01000000},
1403 	{0x00000082, 0x02000000},
1404 	{0x00000083, 0x00000000},
1405 	{0x00000084, 0xe3f3e4f4},
1406 	{0x00000085, 0x00052024},
1407 	{0x00000087, 0x00000000},
1408 	{0x00000088, 0x66036603},
1409 	{0x00000089, 0x01000000},
1410 	{0x0000008b, 0x1c0a0000},
1411 	{0x0000008c, 0xff010000},
1412 	{0x0000008e, 0xffffefff},
1413 	{0x0000008f, 0xfff3efff},
1414 	{0x00000090, 0xfff3efbf},
1415 	{0x00000094, 0x00101101},
1416 	{0x00000095, 0x00000fff},
1417 	{0x00000096, 0x00116fff},
1418 	{0x00000097, 0x60010000},
1419 	{0x00000098, 0x10010000},
1420 	{0x00000099, 0x00006000},
1421 	{0x0000009a, 0x00001000},
1422 	{0x0000009f, 0x00a17730}
1423 };
1424 
1425 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1426 	{0x0000006f, 0x03044000},
1427 	{0x00000070, 0x0480c018},
1428 	{0x00000071, 0x00000040},
1429 	{0x00000072, 0x01000000},
1430 	{0x00000074, 0x000000ff},
1431 	{0x00000075, 0x00143400},
1432 	{0x00000076, 0x08ec0800},
1433 	{0x00000077, 0x040000cc},
1434 	{0x00000079, 0x00000000},
1435 	{0x0000007a, 0x21000409},
1436 	{0x0000007c, 0x00000000},
1437 	{0x0000007d, 0xe8000000},
1438 	{0x0000007e, 0x044408a8},
1439 	{0x0000007f, 0x00000003},
1440 	{0x00000080, 0x00000000},
1441 	{0x00000081, 0x01000000},
1442 	{0x00000082, 0x02000000},
1443 	{0x00000083, 0x00000000},
1444 	{0x00000084, 0xe3f3e4f4},
1445 	{0x00000085, 0x00052024},
1446 	{0x00000087, 0x00000000},
1447 	{0x00000088, 0x66036603},
1448 	{0x00000089, 0x01000000},
1449 	{0x0000008b, 0x1c0a0000},
1450 	{0x0000008c, 0xff010000},
1451 	{0x0000008e, 0xffffefff},
1452 	{0x0000008f, 0xfff3efff},
1453 	{0x00000090, 0xfff3efbf},
1454 	{0x00000094, 0x00101101},
1455 	{0x00000095, 0x00000fff},
1456 	{0x00000096, 0x00116fff},
1457 	{0x00000097, 0x60010000},
1458 	{0x00000098, 0x10010000},
1459 	{0x00000099, 0x00006000},
1460 	{0x0000009a, 0x00001000},
1461 	{0x0000009f, 0x00a07730}
1462 };
1463 
1464 /* ucode loading */
1465 static int si_mc_load_microcode(struct radeon_device *rdev)
1466 {
1467 	const __be32 *fw_data;
1468 	u32 running, blackout = 0;
1469 	u32 *io_mc_regs;
1470 	int i, ucode_size, regs_size;
1471 
1472 	if (!rdev->mc_fw)
1473 		return -EINVAL;
1474 
1475 	switch (rdev->family) {
1476 	case CHIP_TAHITI:
1477 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1478 		ucode_size = SI_MC_UCODE_SIZE;
1479 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1480 		break;
1481 	case CHIP_PITCAIRN:
1482 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1483 		ucode_size = SI_MC_UCODE_SIZE;
1484 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1485 		break;
1486 	case CHIP_VERDE:
1487 	default:
1488 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1489 		ucode_size = SI_MC_UCODE_SIZE;
1490 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1491 		break;
1492 	case CHIP_OLAND:
1493 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1494 		ucode_size = OLAND_MC_UCODE_SIZE;
1495 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1496 		break;
1497 	case CHIP_HAINAN:
1498 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1499 		ucode_size = OLAND_MC_UCODE_SIZE;
1500 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1501 		break;
1502 	}
1503 
1504 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1505 
1506 	if (running == 0) {
1507 		if (running) {
1508 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1509 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1510 		}
1511 
1512 		/* reset the engine and set to writable */
1513 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1514 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1515 
1516 		/* load mc io regs */
1517 		for (i = 0; i < regs_size; i++) {
1518 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1519 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1520 		}
1521 		/* load the MC ucode */
1522 		fw_data = (const __be32 *)rdev->mc_fw->data;
1523 		for (i = 0; i < ucode_size; i++)
1524 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1525 
1526 		/* put the engine back into the active state */
1527 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1528 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1529 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1530 
1531 		/* wait for training to complete */
1532 		for (i = 0; i < rdev->usec_timeout; i++) {
1533 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1534 				break;
1535 			udelay(1);
1536 		}
1537 		for (i = 0; i < rdev->usec_timeout; i++) {
1538 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1539 				break;
1540 			udelay(1);
1541 		}
1542 
1543 		if (running)
1544 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1545 	}
1546 
1547 	return 0;
1548 }
1549 
1550 static int si_init_microcode(struct radeon_device *rdev)
1551 {
1552 	const char *chip_name;
1553 	const char *rlc_chip_name;
1554 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1555 	size_t smc_req_size;
1556 	char fw_name[30];
1557 	int err;
1558 
1559 	DRM_DEBUG("\n");
1560 
1561 	switch (rdev->family) {
1562 	case CHIP_TAHITI:
1563 		chip_name = "TAHITI";
1564 		rlc_chip_name = "TAHITI";
1565 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1566 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1567 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1568 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1569 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1570 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1571 		break;
1572 	case CHIP_PITCAIRN:
1573 		chip_name = "PITCAIRN";
1574 		rlc_chip_name = "PITCAIRN";
1575 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1576 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1577 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1578 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1579 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1580 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1581 		break;
1582 	case CHIP_VERDE:
1583 		chip_name = "VERDE";
1584 		rlc_chip_name = "VERDE";
1585 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1586 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1587 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1588 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1589 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1590 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1591 		break;
1592 	case CHIP_OLAND:
1593 		chip_name = "OLAND";
1594 		rlc_chip_name = "OLAND";
1595 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1596 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1597 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1598 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1599 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1600 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1601 		break;
1602 	case CHIP_HAINAN:
1603 		chip_name = "HAINAN";
1604 		rlc_chip_name = "HAINAN";
1605 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1606 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1607 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1608 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1609 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1610 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1611 		break;
1612 	default: BUG();
1613 	}
1614 
1615 	DRM_INFO("Loading %s Microcode\n", chip_name);
1616 
1617 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1618 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1619 	if (err)
1620 		goto out;
1621 	if (rdev->pfp_fw->size != pfp_req_size) {
1622 		printk(KERN_ERR
1623 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1624 		       rdev->pfp_fw->size, fw_name);
1625 		err = -EINVAL;
1626 		goto out;
1627 	}
1628 
1629 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1630 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1631 	if (err)
1632 		goto out;
1633 	if (rdev->me_fw->size != me_req_size) {
1634 		printk(KERN_ERR
1635 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1636 		       rdev->me_fw->size, fw_name);
1637 		err = -EINVAL;
1638 	}
1639 
1640 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1641 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1642 	if (err)
1643 		goto out;
1644 	if (rdev->ce_fw->size != ce_req_size) {
1645 		printk(KERN_ERR
1646 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1647 		       rdev->ce_fw->size, fw_name);
1648 		err = -EINVAL;
1649 	}
1650 
1651 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1652 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1653 	if (err)
1654 		goto out;
1655 	if (rdev->rlc_fw->size != rlc_req_size) {
1656 		printk(KERN_ERR
1657 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1658 		       rdev->rlc_fw->size, fw_name);
1659 		err = -EINVAL;
1660 	}
1661 
1662 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1663 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1664 	if (err)
1665 		goto out;
1666 	if (rdev->mc_fw->size != mc_req_size) {
1667 		printk(KERN_ERR
1668 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1669 		       rdev->mc_fw->size, fw_name);
1670 		err = -EINVAL;
1671 	}
1672 
1673 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1674 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1675 	if (err) {
1676 		printk(KERN_ERR
1677 		       "smc: error loading firmware \"%s\"\n",
1678 		       fw_name);
1679 		release_firmware(rdev->smc_fw);
1680 		rdev->smc_fw = NULL;
1681 	} else if (rdev->smc_fw->size != smc_req_size) {
1682 		printk(KERN_ERR
1683 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1684 		       rdev->smc_fw->size, fw_name);
1685 		err = -EINVAL;
1686 	}
1687 
1688 out:
1689 	if (err) {
1690 		if (err != -EINVAL)
1691 			printk(KERN_ERR
1692 			       "si_cp: Failed to load firmware \"%s\"\n",
1693 			       fw_name);
1694 		release_firmware(rdev->pfp_fw);
1695 		rdev->pfp_fw = NULL;
1696 		release_firmware(rdev->me_fw);
1697 		rdev->me_fw = NULL;
1698 		release_firmware(rdev->ce_fw);
1699 		rdev->ce_fw = NULL;
1700 		release_firmware(rdev->rlc_fw);
1701 		rdev->rlc_fw = NULL;
1702 		release_firmware(rdev->mc_fw);
1703 		rdev->mc_fw = NULL;
1704 		release_firmware(rdev->smc_fw);
1705 		rdev->smc_fw = NULL;
1706 	}
1707 	return err;
1708 }
1709 
1710 /* watermark setup */
1711 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1712 				   struct radeon_crtc *radeon_crtc,
1713 				   struct drm_display_mode *mode,
1714 				   struct drm_display_mode *other_mode)
1715 {
1716 	u32 tmp, buffer_alloc, i;
1717 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1718 	/*
1719 	 * Line Buffer Setup
1720 	 * There are 3 line buffers, each one shared by 2 display controllers.
1721 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1722 	 * the display controllers.  The paritioning is done via one of four
1723 	 * preset allocations specified in bits 21:20:
1724 	 *  0 - half lb
1725 	 *  2 - whole lb, other crtc must be disabled
1726 	 */
1727 	/* this can get tricky if we have two large displays on a paired group
1728 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1729 	 * non-linked crtcs for maximum line buffer allocation.
1730 	 */
1731 	if (radeon_crtc->base.enabled && mode) {
1732 		if (other_mode) {
1733 			tmp = 0; /* 1/2 */
1734 			buffer_alloc = 1;
1735 		} else {
1736 			tmp = 2; /* whole */
1737 			buffer_alloc = 2;
1738 		}
1739 	} else {
1740 		tmp = 0;
1741 		buffer_alloc = 0;
1742 	}
1743 
1744 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1745 	       DC_LB_MEMORY_CONFIG(tmp));
1746 
1747 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1748 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1749 	for (i = 0; i < rdev->usec_timeout; i++) {
1750 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1751 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1752 			break;
1753 		udelay(1);
1754 	}
1755 
1756 	if (radeon_crtc->base.enabled && mode) {
1757 		switch (tmp) {
1758 		case 0:
1759 		default:
1760 			return 4096 * 2;
1761 		case 2:
1762 			return 8192 * 2;
1763 		}
1764 	}
1765 
1766 	/* controller not enabled, so no lb used */
1767 	return 0;
1768 }
1769 
1770 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1771 {
1772 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1773 
1774 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1775 	case 0:
1776 	default:
1777 		return 1;
1778 	case 1:
1779 		return 2;
1780 	case 2:
1781 		return 4;
1782 	case 3:
1783 		return 8;
1784 	case 4:
1785 		return 3;
1786 	case 5:
1787 		return 6;
1788 	case 6:
1789 		return 10;
1790 	case 7:
1791 		return 12;
1792 	case 8:
1793 		return 16;
1794 	}
1795 }
1796 
1797 struct dce6_wm_params {
1798 	u32 dram_channels; /* number of dram channels */
1799 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1800 	u32 sclk;          /* engine clock in kHz */
1801 	u32 disp_clk;      /* display clock in kHz */
1802 	u32 src_width;     /* viewport width */
1803 	u32 active_time;   /* active display time in ns */
1804 	u32 blank_time;    /* blank time in ns */
1805 	bool interlaced;    /* mode is interlaced */
1806 	fixed20_12 vsc;    /* vertical scale ratio */
1807 	u32 num_heads;     /* number of active crtcs */
1808 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1809 	u32 lb_size;       /* line buffer allocated to pipe */
1810 	u32 vtaps;         /* vertical scaler taps */
1811 };
1812 
1813 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1814 {
1815 	/* Calculate raw DRAM Bandwidth */
1816 	fixed20_12 dram_efficiency; /* 0.7 */
1817 	fixed20_12 yclk, dram_channels, bandwidth;
1818 	fixed20_12 a;
1819 
1820 	a.full = dfixed_const(1000);
1821 	yclk.full = dfixed_const(wm->yclk);
1822 	yclk.full = dfixed_div(yclk, a);
1823 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1824 	a.full = dfixed_const(10);
1825 	dram_efficiency.full = dfixed_const(7);
1826 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1827 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1828 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1829 
1830 	return dfixed_trunc(bandwidth);
1831 }
1832 
1833 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1834 {
1835 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1836 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1837 	fixed20_12 yclk, dram_channels, bandwidth;
1838 	fixed20_12 a;
1839 
1840 	a.full = dfixed_const(1000);
1841 	yclk.full = dfixed_const(wm->yclk);
1842 	yclk.full = dfixed_div(yclk, a);
1843 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1844 	a.full = dfixed_const(10);
1845 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1846 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1847 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1848 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1849 
1850 	return dfixed_trunc(bandwidth);
1851 }
1852 
1853 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1854 {
1855 	/* Calculate the display Data return Bandwidth */
1856 	fixed20_12 return_efficiency; /* 0.8 */
1857 	fixed20_12 sclk, bandwidth;
1858 	fixed20_12 a;
1859 
1860 	a.full = dfixed_const(1000);
1861 	sclk.full = dfixed_const(wm->sclk);
1862 	sclk.full = dfixed_div(sclk, a);
1863 	a.full = dfixed_const(10);
1864 	return_efficiency.full = dfixed_const(8);
1865 	return_efficiency.full = dfixed_div(return_efficiency, a);
1866 	a.full = dfixed_const(32);
1867 	bandwidth.full = dfixed_mul(a, sclk);
1868 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1869 
1870 	return dfixed_trunc(bandwidth);
1871 }
1872 
1873 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1874 {
1875 	return 32;
1876 }
1877 
1878 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1879 {
1880 	/* Calculate the DMIF Request Bandwidth */
1881 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1882 	fixed20_12 disp_clk, sclk, bandwidth;
1883 	fixed20_12 a, b1, b2;
1884 	u32 min_bandwidth;
1885 
1886 	a.full = dfixed_const(1000);
1887 	disp_clk.full = dfixed_const(wm->disp_clk);
1888 	disp_clk.full = dfixed_div(disp_clk, a);
1889 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1890 	b1.full = dfixed_mul(a, disp_clk);
1891 
1892 	a.full = dfixed_const(1000);
1893 	sclk.full = dfixed_const(wm->sclk);
1894 	sclk.full = dfixed_div(sclk, a);
1895 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1896 	b2.full = dfixed_mul(a, sclk);
1897 
1898 	a.full = dfixed_const(10);
1899 	disp_clk_request_efficiency.full = dfixed_const(8);
1900 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1901 
1902 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1903 
1904 	a.full = dfixed_const(min_bandwidth);
1905 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1906 
1907 	return dfixed_trunc(bandwidth);
1908 }
1909 
1910 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1911 {
1912 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1913 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1914 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1915 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1916 
1917 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1918 }
1919 
1920 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1921 {
1922 	/* Calculate the display mode Average Bandwidth
1923 	 * DisplayMode should contain the source and destination dimensions,
1924 	 * timing, etc.
1925 	 */
1926 	fixed20_12 bpp;
1927 	fixed20_12 line_time;
1928 	fixed20_12 src_width;
1929 	fixed20_12 bandwidth;
1930 	fixed20_12 a;
1931 
1932 	a.full = dfixed_const(1000);
1933 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1934 	line_time.full = dfixed_div(line_time, a);
1935 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1936 	src_width.full = dfixed_const(wm->src_width);
1937 	bandwidth.full = dfixed_mul(src_width, bpp);
1938 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1939 	bandwidth.full = dfixed_div(bandwidth, line_time);
1940 
1941 	return dfixed_trunc(bandwidth);
1942 }
1943 
1944 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1945 {
1946 	/* First calcualte the latency in ns */
1947 	u32 mc_latency = 2000; /* 2000 ns. */
1948 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1949 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1950 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1951 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1952 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1953 		(wm->num_heads * cursor_line_pair_return_time);
1954 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1955 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1956 	u32 tmp, dmif_size = 12288;
1957 	fixed20_12 a, b, c;
1958 
1959 	if (wm->num_heads == 0)
1960 		return 0;
1961 
1962 	a.full = dfixed_const(2);
1963 	b.full = dfixed_const(1);
1964 	if ((wm->vsc.full > a.full) ||
1965 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1966 	    (wm->vtaps >= 5) ||
1967 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1968 		max_src_lines_per_dst_line = 4;
1969 	else
1970 		max_src_lines_per_dst_line = 2;
1971 
1972 	a.full = dfixed_const(available_bandwidth);
1973 	b.full = dfixed_const(wm->num_heads);
1974 	a.full = dfixed_div(a, b);
1975 
1976 	b.full = dfixed_const(mc_latency + 512);
1977 	c.full = dfixed_const(wm->disp_clk);
1978 	b.full = dfixed_div(b, c);
1979 
1980 	c.full = dfixed_const(dmif_size);
1981 	b.full = dfixed_div(c, b);
1982 
1983 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1984 
1985 	b.full = dfixed_const(1000);
1986 	c.full = dfixed_const(wm->disp_clk);
1987 	b.full = dfixed_div(c, b);
1988 	c.full = dfixed_const(wm->bytes_per_pixel);
1989 	b.full = dfixed_mul(b, c);
1990 
1991 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1992 
1993 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1994 	b.full = dfixed_const(1000);
1995 	c.full = dfixed_const(lb_fill_bw);
1996 	b.full = dfixed_div(c, b);
1997 	a.full = dfixed_div(a, b);
1998 	line_fill_time = dfixed_trunc(a);
1999 
2000 	if (line_fill_time < wm->active_time)
2001 		return latency;
2002 	else
2003 		return latency + (line_fill_time - wm->active_time);
2004 
2005 }
2006 
2007 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2008 {
2009 	if (dce6_average_bandwidth(wm) <=
2010 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2011 		return true;
2012 	else
2013 		return false;
2014 };
2015 
2016 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2017 {
2018 	if (dce6_average_bandwidth(wm) <=
2019 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2020 		return true;
2021 	else
2022 		return false;
2023 };
2024 
2025 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2026 {
2027 	u32 lb_partitions = wm->lb_size / wm->src_width;
2028 	u32 line_time = wm->active_time + wm->blank_time;
2029 	u32 latency_tolerant_lines;
2030 	u32 latency_hiding;
2031 	fixed20_12 a;
2032 
2033 	a.full = dfixed_const(1);
2034 	if (wm->vsc.full > a.full)
2035 		latency_tolerant_lines = 1;
2036 	else {
2037 		if (lb_partitions <= (wm->vtaps + 1))
2038 			latency_tolerant_lines = 1;
2039 		else
2040 			latency_tolerant_lines = 2;
2041 	}
2042 
2043 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2044 
2045 	if (dce6_latency_watermark(wm) <= latency_hiding)
2046 		return true;
2047 	else
2048 		return false;
2049 }
2050 
2051 static void dce6_program_watermarks(struct radeon_device *rdev,
2052 					 struct radeon_crtc *radeon_crtc,
2053 					 u32 lb_size, u32 num_heads)
2054 {
2055 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2056 	struct dce6_wm_params wm_low, wm_high;
2057 	u32 dram_channels;
2058 	u32 pixel_period;
2059 	u32 line_time = 0;
2060 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2061 	u32 priority_a_mark = 0, priority_b_mark = 0;
2062 	u32 priority_a_cnt = PRIORITY_OFF;
2063 	u32 priority_b_cnt = PRIORITY_OFF;
2064 	u32 tmp, arb_control3;
2065 	fixed20_12 a, b, c;
2066 
2067 	if (radeon_crtc->base.enabled && num_heads && mode) {
2068 		pixel_period = 1000000 / (u32)mode->clock;
2069 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2070 		priority_a_cnt = 0;
2071 		priority_b_cnt = 0;
2072 
2073 		if (rdev->family == CHIP_ARUBA)
2074 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2075 		else
2076 			dram_channels = si_get_number_of_dram_channels(rdev);
2077 
2078 		/* watermark for high clocks */
2079 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2080 			wm_high.yclk =
2081 				radeon_dpm_get_mclk(rdev, false) * 10;
2082 			wm_high.sclk =
2083 				radeon_dpm_get_sclk(rdev, false) * 10;
2084 		} else {
2085 			wm_high.yclk = rdev->pm.current_mclk * 10;
2086 			wm_high.sclk = rdev->pm.current_sclk * 10;
2087 		}
2088 
2089 		wm_high.disp_clk = mode->clock;
2090 		wm_high.src_width = mode->crtc_hdisplay;
2091 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2092 		wm_high.blank_time = line_time - wm_high.active_time;
2093 		wm_high.interlaced = false;
2094 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2095 			wm_high.interlaced = true;
2096 		wm_high.vsc = radeon_crtc->vsc;
2097 		wm_high.vtaps = 1;
2098 		if (radeon_crtc->rmx_type != RMX_OFF)
2099 			wm_high.vtaps = 2;
2100 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2101 		wm_high.lb_size = lb_size;
2102 		wm_high.dram_channels = dram_channels;
2103 		wm_high.num_heads = num_heads;
2104 
2105 		/* watermark for low clocks */
2106 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2107 			wm_low.yclk =
2108 				radeon_dpm_get_mclk(rdev, true) * 10;
2109 			wm_low.sclk =
2110 				radeon_dpm_get_sclk(rdev, true) * 10;
2111 		} else {
2112 			wm_low.yclk = rdev->pm.current_mclk * 10;
2113 			wm_low.sclk = rdev->pm.current_sclk * 10;
2114 		}
2115 
2116 		wm_low.disp_clk = mode->clock;
2117 		wm_low.src_width = mode->crtc_hdisplay;
2118 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2119 		wm_low.blank_time = line_time - wm_low.active_time;
2120 		wm_low.interlaced = false;
2121 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2122 			wm_low.interlaced = true;
2123 		wm_low.vsc = radeon_crtc->vsc;
2124 		wm_low.vtaps = 1;
2125 		if (radeon_crtc->rmx_type != RMX_OFF)
2126 			wm_low.vtaps = 2;
2127 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2128 		wm_low.lb_size = lb_size;
2129 		wm_low.dram_channels = dram_channels;
2130 		wm_low.num_heads = num_heads;
2131 
2132 		/* set for high clocks */
2133 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2134 		/* set for low clocks */
2135 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2136 
2137 		/* possibly force display priority to high */
2138 		/* should really do this at mode validation time... */
2139 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2140 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2141 		    !dce6_check_latency_hiding(&wm_high) ||
2142 		    (rdev->disp_priority == 2)) {
2143 			DRM_DEBUG_KMS("force priority to high\n");
2144 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2145 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2146 		}
2147 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2148 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2149 		    !dce6_check_latency_hiding(&wm_low) ||
2150 		    (rdev->disp_priority == 2)) {
2151 			DRM_DEBUG_KMS("force priority to high\n");
2152 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2153 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2154 		}
2155 
2156 		a.full = dfixed_const(1000);
2157 		b.full = dfixed_const(mode->clock);
2158 		b.full = dfixed_div(b, a);
2159 		c.full = dfixed_const(latency_watermark_a);
2160 		c.full = dfixed_mul(c, b);
2161 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2162 		c.full = dfixed_div(c, a);
2163 		a.full = dfixed_const(16);
2164 		c.full = dfixed_div(c, a);
2165 		priority_a_mark = dfixed_trunc(c);
2166 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2167 
2168 		a.full = dfixed_const(1000);
2169 		b.full = dfixed_const(mode->clock);
2170 		b.full = dfixed_div(b, a);
2171 		c.full = dfixed_const(latency_watermark_b);
2172 		c.full = dfixed_mul(c, b);
2173 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2174 		c.full = dfixed_div(c, a);
2175 		a.full = dfixed_const(16);
2176 		c.full = dfixed_div(c, a);
2177 		priority_b_mark = dfixed_trunc(c);
2178 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2179 	}
2180 
2181 	/* select wm A */
2182 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2183 	tmp = arb_control3;
2184 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2185 	tmp |= LATENCY_WATERMARK_MASK(1);
2186 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2187 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2188 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2189 		LATENCY_HIGH_WATERMARK(line_time)));
2190 	/* select wm B */
2191 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2192 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2193 	tmp |= LATENCY_WATERMARK_MASK(2);
2194 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2195 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2196 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2197 		LATENCY_HIGH_WATERMARK(line_time)));
2198 	/* restore original selection */
2199 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2200 
2201 	/* write the priority marks */
2202 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2203 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2204 
2205 	/* save values for DPM */
2206 	radeon_crtc->line_time = line_time;
2207 	radeon_crtc->wm_high = latency_watermark_a;
2208 	radeon_crtc->wm_low = latency_watermark_b;
2209 }
2210 
2211 void dce6_bandwidth_update(struct radeon_device *rdev)
2212 {
2213 	struct drm_display_mode *mode0 = NULL;
2214 	struct drm_display_mode *mode1 = NULL;
2215 	u32 num_heads = 0, lb_size;
2216 	int i;
2217 
2218 	radeon_update_display_priority(rdev);
2219 
2220 	for (i = 0; i < rdev->num_crtc; i++) {
2221 		if (rdev->mode_info.crtcs[i]->base.enabled)
2222 			num_heads++;
2223 	}
2224 	for (i = 0; i < rdev->num_crtc; i += 2) {
2225 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2226 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2227 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2228 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2229 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2230 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2231 	}
2232 }
2233 
2234 /*
2235  * Core functions
2236  */
2237 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2238 {
2239 	const u32 num_tile_mode_states = 32;
2240 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2241 
2242 	switch (rdev->config.si.mem_row_size_in_kb) {
2243 	case 1:
2244 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2245 		break;
2246 	case 2:
2247 	default:
2248 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2249 		break;
2250 	case 4:
2251 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2252 		break;
2253 	}
2254 
2255 	if ((rdev->family == CHIP_TAHITI) ||
2256 	    (rdev->family == CHIP_PITCAIRN)) {
2257 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2258 			switch (reg_offset) {
2259 			case 0:  /* non-AA compressed depth or any compressed stencil */
2260 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2262 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2263 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2265 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2268 				break;
2269 			case 1:  /* 2xAA/4xAA compressed depth only */
2270 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2272 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2273 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2274 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2275 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2278 				break;
2279 			case 2:  /* 8xAA compressed depth only */
2280 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2282 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2283 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2284 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2285 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2288 				break;
2289 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2292 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2293 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2294 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2295 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2298 				break;
2299 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2301 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2304 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2305 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2307 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2308 				break;
2309 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2310 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2312 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2313 						 TILE_SPLIT(split_equal_to_row_size) |
2314 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2315 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2317 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2318 				break;
2319 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2323 						 TILE_SPLIT(split_equal_to_row_size) |
2324 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2325 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2328 				break;
2329 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2333 						 TILE_SPLIT(split_equal_to_row_size) |
2334 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2335 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2338 				break;
2339 			case 8:  /* 1D and 1D Array Surfaces */
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2341 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2343 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2344 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2345 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2348 				break;
2349 			case 9:  /* Displayable maps. */
2350 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2351 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2353 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2354 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2355 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2358 				break;
2359 			case 10:  /* Display 8bpp. */
2360 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2363 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2365 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2367 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2368 				break;
2369 			case 11:  /* Display 16bpp. */
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2373 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2374 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2375 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2378 				break;
2379 			case 12:  /* Display 32bpp. */
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2384 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2385 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2388 				break;
2389 			case 13:  /* Thin. */
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2392 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2393 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2395 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2398 				break;
2399 			case 14:  /* Thin 8 bpp. */
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2402 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2403 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2405 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2408 				break;
2409 			case 15:  /* Thin 16 bpp. */
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2413 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2414 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2415 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2418 				break;
2419 			case 16:  /* Thin 32 bpp. */
2420 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2422 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2423 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2424 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2425 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2428 				break;
2429 			case 17:  /* Thin 64 bpp. */
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2433 						 TILE_SPLIT(split_equal_to_row_size) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2435 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2438 				break;
2439 			case 21:  /* 8 bpp PRT. */
2440 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2442 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2443 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2444 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2445 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2448 				break;
2449 			case 22:  /* 16 bpp PRT */
2450 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2452 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2453 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2455 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2458 				break;
2459 			case 23:  /* 32 bpp PRT */
2460 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2462 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2463 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2465 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2468 				break;
2469 			case 24:  /* 64 bpp PRT */
2470 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2472 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2473 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2474 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2475 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2478 				break;
2479 			case 25:  /* 128 bpp PRT */
2480 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2482 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2483 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2484 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2485 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2488 				break;
2489 			default:
2490 				gb_tile_moden = 0;
2491 				break;
2492 			}
2493 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2494 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2495 		}
2496 	} else if ((rdev->family == CHIP_VERDE) ||
2497 		   (rdev->family == CHIP_OLAND) ||
2498 		   (rdev->family == CHIP_HAINAN)) {
2499 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2500 			switch (reg_offset) {
2501 			case 0:  /* non-AA compressed depth or any compressed stencil */
2502 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2505 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2507 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2510 				break;
2511 			case 1:  /* 2xAA/4xAA compressed depth only */
2512 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2515 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2516 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2517 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2520 				break;
2521 			case 2:  /* 8xAA compressed depth only */
2522 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2524 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2525 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2526 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2527 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2530 				break;
2531 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2532 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2535 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2536 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2537 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2540 				break;
2541 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2542 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2546 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2547 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 				break;
2551 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2552 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2555 						 TILE_SPLIT(split_equal_to_row_size) |
2556 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2557 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560 				break;
2561 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2562 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2564 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2565 						 TILE_SPLIT(split_equal_to_row_size) |
2566 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2567 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2570 				break;
2571 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2575 						 TILE_SPLIT(split_equal_to_row_size) |
2576 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2577 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2580 				break;
2581 			case 8:  /* 1D and 1D Array Surfaces */
2582 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2583 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2584 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2586 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2587 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2589 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2590 				break;
2591 			case 9:  /* Displayable maps. */
2592 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2595 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2596 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2597 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600 				break;
2601 			case 10:  /* Display 8bpp. */
2602 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2605 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2606 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2607 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2610 				break;
2611 			case 11:  /* Display 16bpp. */
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2617 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2619 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2620 				break;
2621 			case 12:  /* Display 32bpp. */
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2625 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2626 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2627 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2630 				break;
2631 			case 13:  /* Thin. */
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2637 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640 				break;
2641 			case 14:  /* Thin 8 bpp. */
2642 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2645 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2647 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2650 				break;
2651 			case 15:  /* Thin 16 bpp. */
2652 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2654 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2656 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2657 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2659 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2660 				break;
2661 			case 16:  /* Thin 32 bpp. */
2662 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2664 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2666 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2667 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2670 				break;
2671 			case 17:  /* Thin 64 bpp. */
2672 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2674 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 						 TILE_SPLIT(split_equal_to_row_size) |
2676 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2677 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2680 				break;
2681 			case 21:  /* 8 bpp PRT. */
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2687 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2688 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690 				break;
2691 			case 22:  /* 16 bpp PRT */
2692 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2694 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2695 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2697 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2700 				break;
2701 			case 23:  /* 32 bpp PRT */
2702 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2704 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2705 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2706 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2707 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2710 				break;
2711 			case 24:  /* 64 bpp PRT */
2712 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2714 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2715 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2716 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2717 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2719 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2720 				break;
2721 			case 25:  /* 128 bpp PRT */
2722 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2724 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2725 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2726 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2727 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2730 				break;
2731 			default:
2732 				gb_tile_moden = 0;
2733 				break;
2734 			}
2735 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2736 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2737 		}
2738 	} else
2739 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2740 }
2741 
2742 static void si_select_se_sh(struct radeon_device *rdev,
2743 			    u32 se_num, u32 sh_num)
2744 {
2745 	u32 data = INSTANCE_BROADCAST_WRITES;
2746 
2747 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2748 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2749 	else if (se_num == 0xffffffff)
2750 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2751 	else if (sh_num == 0xffffffff)
2752 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2753 	else
2754 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2755 	WREG32(GRBM_GFX_INDEX, data);
2756 }
2757 
2758 static u32 si_create_bitmask(u32 bit_width)
2759 {
2760 	u32 i, mask = 0;
2761 
2762 	for (i = 0; i < bit_width; i++) {
2763 		mask <<= 1;
2764 		mask |= 1;
2765 	}
2766 	return mask;
2767 }
2768 
2769 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2770 {
2771 	u32 data, mask;
2772 
2773 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2774 	if (data & 1)
2775 		data &= INACTIVE_CUS_MASK;
2776 	else
2777 		data = 0;
2778 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2779 
2780 	data >>= INACTIVE_CUS_SHIFT;
2781 
2782 	mask = si_create_bitmask(cu_per_sh);
2783 
2784 	return ~data & mask;
2785 }
2786 
2787 static void si_setup_spi(struct radeon_device *rdev,
2788 			 u32 se_num, u32 sh_per_se,
2789 			 u32 cu_per_sh)
2790 {
2791 	int i, j, k;
2792 	u32 data, mask, active_cu;
2793 
2794 	for (i = 0; i < se_num; i++) {
2795 		for (j = 0; j < sh_per_se; j++) {
2796 			si_select_se_sh(rdev, i, j);
2797 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2798 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2799 
2800 			mask = 1;
2801 			for (k = 0; k < 16; k++) {
2802 				mask <<= k;
2803 				if (active_cu & mask) {
2804 					data &= ~mask;
2805 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2806 					break;
2807 				}
2808 			}
2809 		}
2810 	}
2811 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2812 }
2813 
2814 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2815 			      u32 max_rb_num, u32 se_num,
2816 			      u32 sh_per_se)
2817 {
2818 	u32 data, mask;
2819 
2820 	data = RREG32(CC_RB_BACKEND_DISABLE);
2821 	if (data & 1)
2822 		data &= BACKEND_DISABLE_MASK;
2823 	else
2824 		data = 0;
2825 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2826 
2827 	data >>= BACKEND_DISABLE_SHIFT;
2828 
2829 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2830 
2831 	return data & mask;
2832 }
2833 
2834 static void si_setup_rb(struct radeon_device *rdev,
2835 			u32 se_num, u32 sh_per_se,
2836 			u32 max_rb_num)
2837 {
2838 	int i, j;
2839 	u32 data, mask;
2840 	u32 disabled_rbs = 0;
2841 	u32 enabled_rbs = 0;
2842 
2843 	for (i = 0; i < se_num; i++) {
2844 		for (j = 0; j < sh_per_se; j++) {
2845 			si_select_se_sh(rdev, i, j);
2846 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2847 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2848 		}
2849 	}
2850 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2851 
2852 	mask = 1;
2853 	for (i = 0; i < max_rb_num; i++) {
2854 		if (!(disabled_rbs & mask))
2855 			enabled_rbs |= mask;
2856 		mask <<= 1;
2857 	}
2858 
2859 	for (i = 0; i < se_num; i++) {
2860 		si_select_se_sh(rdev, i, 0xffffffff);
2861 		data = 0;
2862 		for (j = 0; j < sh_per_se; j++) {
2863 			switch (enabled_rbs & 3) {
2864 			case 1:
2865 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2866 				break;
2867 			case 2:
2868 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2869 				break;
2870 			case 3:
2871 			default:
2872 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2873 				break;
2874 			}
2875 			enabled_rbs >>= 2;
2876 		}
2877 		WREG32(PA_SC_RASTER_CONFIG, data);
2878 	}
2879 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2880 }
2881 
2882 static void si_gpu_init(struct radeon_device *rdev)
2883 {
2884 	u32 gb_addr_config = 0;
2885 	u32 mc_shared_chmap, mc_arb_ramcfg;
2886 	u32 sx_debug_1;
2887 	u32 hdp_host_path_cntl;
2888 	u32 tmp;
2889 	int i, j;
2890 
2891 	switch (rdev->family) {
2892 	case CHIP_TAHITI:
2893 		rdev->config.si.max_shader_engines = 2;
2894 		rdev->config.si.max_tile_pipes = 12;
2895 		rdev->config.si.max_cu_per_sh = 8;
2896 		rdev->config.si.max_sh_per_se = 2;
2897 		rdev->config.si.max_backends_per_se = 4;
2898 		rdev->config.si.max_texture_channel_caches = 12;
2899 		rdev->config.si.max_gprs = 256;
2900 		rdev->config.si.max_gs_threads = 32;
2901 		rdev->config.si.max_hw_contexts = 8;
2902 
2903 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2904 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2905 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2906 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2907 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2908 		break;
2909 	case CHIP_PITCAIRN:
2910 		rdev->config.si.max_shader_engines = 2;
2911 		rdev->config.si.max_tile_pipes = 8;
2912 		rdev->config.si.max_cu_per_sh = 5;
2913 		rdev->config.si.max_sh_per_se = 2;
2914 		rdev->config.si.max_backends_per_se = 4;
2915 		rdev->config.si.max_texture_channel_caches = 8;
2916 		rdev->config.si.max_gprs = 256;
2917 		rdev->config.si.max_gs_threads = 32;
2918 		rdev->config.si.max_hw_contexts = 8;
2919 
2920 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2921 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2922 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2923 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2924 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2925 		break;
2926 	case CHIP_VERDE:
2927 	default:
2928 		rdev->config.si.max_shader_engines = 1;
2929 		rdev->config.si.max_tile_pipes = 4;
2930 		rdev->config.si.max_cu_per_sh = 5;
2931 		rdev->config.si.max_sh_per_se = 2;
2932 		rdev->config.si.max_backends_per_se = 4;
2933 		rdev->config.si.max_texture_channel_caches = 4;
2934 		rdev->config.si.max_gprs = 256;
2935 		rdev->config.si.max_gs_threads = 32;
2936 		rdev->config.si.max_hw_contexts = 8;
2937 
2938 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2939 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2940 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2941 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2942 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2943 		break;
2944 	case CHIP_OLAND:
2945 		rdev->config.si.max_shader_engines = 1;
2946 		rdev->config.si.max_tile_pipes = 4;
2947 		rdev->config.si.max_cu_per_sh = 6;
2948 		rdev->config.si.max_sh_per_se = 1;
2949 		rdev->config.si.max_backends_per_se = 2;
2950 		rdev->config.si.max_texture_channel_caches = 4;
2951 		rdev->config.si.max_gprs = 256;
2952 		rdev->config.si.max_gs_threads = 16;
2953 		rdev->config.si.max_hw_contexts = 8;
2954 
2955 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2956 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2957 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2958 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2959 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2960 		break;
2961 	case CHIP_HAINAN:
2962 		rdev->config.si.max_shader_engines = 1;
2963 		rdev->config.si.max_tile_pipes = 4;
2964 		rdev->config.si.max_cu_per_sh = 5;
2965 		rdev->config.si.max_sh_per_se = 1;
2966 		rdev->config.si.max_backends_per_se = 1;
2967 		rdev->config.si.max_texture_channel_caches = 2;
2968 		rdev->config.si.max_gprs = 256;
2969 		rdev->config.si.max_gs_threads = 16;
2970 		rdev->config.si.max_hw_contexts = 8;
2971 
2972 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2973 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2974 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2975 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2976 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2977 		break;
2978 	}
2979 
2980 	/* Initialize HDP */
2981 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2982 		WREG32((0x2c14 + j), 0x00000000);
2983 		WREG32((0x2c18 + j), 0x00000000);
2984 		WREG32((0x2c1c + j), 0x00000000);
2985 		WREG32((0x2c20 + j), 0x00000000);
2986 		WREG32((0x2c24 + j), 0x00000000);
2987 	}
2988 
2989 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2990 
2991 	evergreen_fix_pci_max_read_req_size(rdev);
2992 
2993 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2994 
2995 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2996 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2997 
2998 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2999 	rdev->config.si.mem_max_burst_length_bytes = 256;
3000 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3001 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3002 	if (rdev->config.si.mem_row_size_in_kb > 4)
3003 		rdev->config.si.mem_row_size_in_kb = 4;
3004 	/* XXX use MC settings? */
3005 	rdev->config.si.shader_engine_tile_size = 32;
3006 	rdev->config.si.num_gpus = 1;
3007 	rdev->config.si.multi_gpu_tile_size = 64;
3008 
3009 	/* fix up row size */
3010 	gb_addr_config &= ~ROW_SIZE_MASK;
3011 	switch (rdev->config.si.mem_row_size_in_kb) {
3012 	case 1:
3013 	default:
3014 		gb_addr_config |= ROW_SIZE(0);
3015 		break;
3016 	case 2:
3017 		gb_addr_config |= ROW_SIZE(1);
3018 		break;
3019 	case 4:
3020 		gb_addr_config |= ROW_SIZE(2);
3021 		break;
3022 	}
3023 
3024 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3025 	 * not have bank info, so create a custom tiling dword.
3026 	 * bits 3:0   num_pipes
3027 	 * bits 7:4   num_banks
3028 	 * bits 11:8  group_size
3029 	 * bits 15:12 row_size
3030 	 */
3031 	rdev->config.si.tile_config = 0;
3032 	switch (rdev->config.si.num_tile_pipes) {
3033 	case 1:
3034 		rdev->config.si.tile_config |= (0 << 0);
3035 		break;
3036 	case 2:
3037 		rdev->config.si.tile_config |= (1 << 0);
3038 		break;
3039 	case 4:
3040 		rdev->config.si.tile_config |= (2 << 0);
3041 		break;
3042 	case 8:
3043 	default:
3044 		/* XXX what about 12? */
3045 		rdev->config.si.tile_config |= (3 << 0);
3046 		break;
3047 	}
3048 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3049 	case 0: /* four banks */
3050 		rdev->config.si.tile_config |= 0 << 4;
3051 		break;
3052 	case 1: /* eight banks */
3053 		rdev->config.si.tile_config |= 1 << 4;
3054 		break;
3055 	case 2: /* sixteen banks */
3056 	default:
3057 		rdev->config.si.tile_config |= 2 << 4;
3058 		break;
3059 	}
3060 	rdev->config.si.tile_config |=
3061 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3062 	rdev->config.si.tile_config |=
3063 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3064 
3065 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3066 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3067 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3068 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3069 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3070 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3071 	if (rdev->has_uvd) {
3072 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3073 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3074 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3075 	}
3076 
3077 	si_tiling_mode_table_init(rdev);
3078 
3079 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3080 		    rdev->config.si.max_sh_per_se,
3081 		    rdev->config.si.max_backends_per_se);
3082 
3083 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3084 		     rdev->config.si.max_sh_per_se,
3085 		     rdev->config.si.max_cu_per_sh);
3086 
3087 
3088 	/* set HW defaults for 3D engine */
3089 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3090 				     ROQ_IB2_START(0x2b)));
3091 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3092 
3093 	sx_debug_1 = RREG32(SX_DEBUG_1);
3094 	WREG32(SX_DEBUG_1, sx_debug_1);
3095 
3096 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3097 
3098 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3099 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3100 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3101 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3102 
3103 	WREG32(VGT_NUM_INSTANCES, 1);
3104 
3105 	WREG32(CP_PERFMON_CNTL, 0);
3106 
3107 	WREG32(SQ_CONFIG, 0);
3108 
3109 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3110 					  FORCE_EOV_MAX_REZ_CNT(255)));
3111 
3112 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3113 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3114 
3115 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3116 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3117 
3118 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3119 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3120 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3121 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3122 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3123 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3124 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3125 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3126 
3127 	tmp = RREG32(HDP_MISC_CNTL);
3128 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3129 	WREG32(HDP_MISC_CNTL, tmp);
3130 
3131 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3132 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3133 
3134 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3135 
3136 	udelay(50);
3137 }
3138 
3139 /*
3140  * GPU scratch registers helpers function.
3141  */
3142 static void si_scratch_init(struct radeon_device *rdev)
3143 {
3144 	int i;
3145 
3146 	rdev->scratch.num_reg = 7;
3147 	rdev->scratch.reg_base = SCRATCH_REG0;
3148 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3149 		rdev->scratch.free[i] = true;
3150 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3151 	}
3152 }
3153 
3154 void si_fence_ring_emit(struct radeon_device *rdev,
3155 			struct radeon_fence *fence)
3156 {
3157 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3158 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3159 
3160 	/* flush read cache over gart */
3161 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3162 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3163 	radeon_ring_write(ring, 0);
3164 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3165 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3166 			  PACKET3_TC_ACTION_ENA |
3167 			  PACKET3_SH_KCACHE_ACTION_ENA |
3168 			  PACKET3_SH_ICACHE_ACTION_ENA);
3169 	radeon_ring_write(ring, 0xFFFFFFFF);
3170 	radeon_ring_write(ring, 0);
3171 	radeon_ring_write(ring, 10); /* poll interval */
3172 	/* EVENT_WRITE_EOP - flush caches, send int */
3173 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3174 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3175 	radeon_ring_write(ring, addr & 0xffffffff);
3176 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3177 	radeon_ring_write(ring, fence->seq);
3178 	radeon_ring_write(ring, 0);
3179 }
3180 
3181 /*
3182  * IB stuff
3183  */
3184 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3185 {
3186 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3187 	u32 header;
3188 
3189 	if (ib->is_const_ib) {
3190 		/* set switch buffer packet before const IB */
3191 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3192 		radeon_ring_write(ring, 0);
3193 
3194 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3195 	} else {
3196 		u32 next_rptr;
3197 		if (ring->rptr_save_reg) {
3198 			next_rptr = ring->wptr + 3 + 4 + 8;
3199 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3201 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3202 			radeon_ring_write(ring, next_rptr);
3203 		} else if (rdev->wb.enabled) {
3204 			next_rptr = ring->wptr + 5 + 4 + 8;
3205 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3206 			radeon_ring_write(ring, (1 << 8));
3207 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3208 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3209 			radeon_ring_write(ring, next_rptr);
3210 		}
3211 
3212 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3213 	}
3214 
3215 	radeon_ring_write(ring, header);
3216 	radeon_ring_write(ring,
3217 #ifdef __BIG_ENDIAN
3218 			  (2 << 0) |
3219 #endif
3220 			  (ib->gpu_addr & 0xFFFFFFFC));
3221 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3222 	radeon_ring_write(ring, ib->length_dw |
3223 			  (ib->vm ? (ib->vm->id << 24) : 0));
3224 
3225 	if (!ib->is_const_ib) {
3226 		/* flush read cache over gart for this vmid */
3227 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3228 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3229 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3230 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3231 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3232 				  PACKET3_TC_ACTION_ENA |
3233 				  PACKET3_SH_KCACHE_ACTION_ENA |
3234 				  PACKET3_SH_ICACHE_ACTION_ENA);
3235 		radeon_ring_write(ring, 0xFFFFFFFF);
3236 		radeon_ring_write(ring, 0);
3237 		radeon_ring_write(ring, 10); /* poll interval */
3238 	}
3239 }
3240 
3241 /*
3242  * CP.
3243  */
3244 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3245 {
3246 	if (enable)
3247 		WREG32(CP_ME_CNTL, 0);
3248 	else {
3249 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3250 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3251 		WREG32(SCRATCH_UMSK, 0);
3252 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3253 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3254 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3255 	}
3256 	udelay(50);
3257 }
3258 
3259 static int si_cp_load_microcode(struct radeon_device *rdev)
3260 {
3261 	const __be32 *fw_data;
3262 	int i;
3263 
3264 	if (!rdev->me_fw || !rdev->pfp_fw)
3265 		return -EINVAL;
3266 
3267 	si_cp_enable(rdev, false);
3268 
3269 	/* PFP */
3270 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3271 	WREG32(CP_PFP_UCODE_ADDR, 0);
3272 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3273 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3274 	WREG32(CP_PFP_UCODE_ADDR, 0);
3275 
3276 	/* CE */
3277 	fw_data = (const __be32 *)rdev->ce_fw->data;
3278 	WREG32(CP_CE_UCODE_ADDR, 0);
3279 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3280 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3281 	WREG32(CP_CE_UCODE_ADDR, 0);
3282 
3283 	/* ME */
3284 	fw_data = (const __be32 *)rdev->me_fw->data;
3285 	WREG32(CP_ME_RAM_WADDR, 0);
3286 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3287 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3288 	WREG32(CP_ME_RAM_WADDR, 0);
3289 
3290 	WREG32(CP_PFP_UCODE_ADDR, 0);
3291 	WREG32(CP_CE_UCODE_ADDR, 0);
3292 	WREG32(CP_ME_RAM_WADDR, 0);
3293 	WREG32(CP_ME_RAM_RADDR, 0);
3294 	return 0;
3295 }
3296 
3297 static int si_cp_start(struct radeon_device *rdev)
3298 {
3299 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3300 	int r, i;
3301 
3302 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3303 	if (r) {
3304 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3305 		return r;
3306 	}
3307 	/* init the CP */
3308 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3309 	radeon_ring_write(ring, 0x1);
3310 	radeon_ring_write(ring, 0x0);
3311 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3312 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3313 	radeon_ring_write(ring, 0);
3314 	radeon_ring_write(ring, 0);
3315 
3316 	/* init the CE partitions */
3317 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3318 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3319 	radeon_ring_write(ring, 0xc000);
3320 	radeon_ring_write(ring, 0xe000);
3321 	radeon_ring_unlock_commit(rdev, ring);
3322 
3323 	si_cp_enable(rdev, true);
3324 
3325 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3326 	if (r) {
3327 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3328 		return r;
3329 	}
3330 
3331 	/* setup clear context state */
3332 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3333 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3334 
3335 	for (i = 0; i < si_default_size; i++)
3336 		radeon_ring_write(ring, si_default_state[i]);
3337 
3338 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3339 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3340 
3341 	/* set clear context state */
3342 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3343 	radeon_ring_write(ring, 0);
3344 
3345 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3346 	radeon_ring_write(ring, 0x00000316);
3347 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3348 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3349 
3350 	radeon_ring_unlock_commit(rdev, ring);
3351 
3352 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3353 		ring = &rdev->ring[i];
3354 		r = radeon_ring_lock(rdev, ring, 2);
3355 
3356 		/* clear the compute context state */
3357 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3358 		radeon_ring_write(ring, 0);
3359 
3360 		radeon_ring_unlock_commit(rdev, ring);
3361 	}
3362 
3363 	return 0;
3364 }
3365 
3366 static void si_cp_fini(struct radeon_device *rdev)
3367 {
3368 	struct radeon_ring *ring;
3369 	si_cp_enable(rdev, false);
3370 
3371 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3372 	radeon_ring_fini(rdev, ring);
3373 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3374 
3375 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3376 	radeon_ring_fini(rdev, ring);
3377 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3378 
3379 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3380 	radeon_ring_fini(rdev, ring);
3381 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3382 }
3383 
3384 static int si_cp_resume(struct radeon_device *rdev)
3385 {
3386 	struct radeon_ring *ring;
3387 	u32 tmp;
3388 	u32 rb_bufsz;
3389 	int r;
3390 
3391 	si_enable_gui_idle_interrupt(rdev, false);
3392 
3393 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3394 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3395 
3396 	/* Set the write pointer delay */
3397 	WREG32(CP_RB_WPTR_DELAY, 0);
3398 
3399 	WREG32(CP_DEBUG, 0);
3400 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3401 
3402 	/* ring 0 - compute and gfx */
3403 	/* Set ring buffer size */
3404 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3405 	rb_bufsz = order_base_2(ring->ring_size / 8);
3406 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3407 #ifdef __BIG_ENDIAN
3408 	tmp |= BUF_SWAP_32BIT;
3409 #endif
3410 	WREG32(CP_RB0_CNTL, tmp);
3411 
3412 	/* Initialize the ring buffer's read and write pointers */
3413 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3414 	ring->wptr = 0;
3415 	WREG32(CP_RB0_WPTR, ring->wptr);
3416 
3417 	/* set the wb address whether it's enabled or not */
3418 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3419 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3420 
3421 	if (rdev->wb.enabled)
3422 		WREG32(SCRATCH_UMSK, 0xff);
3423 	else {
3424 		tmp |= RB_NO_UPDATE;
3425 		WREG32(SCRATCH_UMSK, 0);
3426 	}
3427 
3428 	mdelay(1);
3429 	WREG32(CP_RB0_CNTL, tmp);
3430 
3431 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3432 
3433 	ring->rptr = RREG32(CP_RB0_RPTR);
3434 
3435 	/* ring1  - compute only */
3436 	/* Set ring buffer size */
3437 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3438 	rb_bufsz = order_base_2(ring->ring_size / 8);
3439 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3440 #ifdef __BIG_ENDIAN
3441 	tmp |= BUF_SWAP_32BIT;
3442 #endif
3443 	WREG32(CP_RB1_CNTL, tmp);
3444 
3445 	/* Initialize the ring buffer's read and write pointers */
3446 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3447 	ring->wptr = 0;
3448 	WREG32(CP_RB1_WPTR, ring->wptr);
3449 
3450 	/* set the wb address whether it's enabled or not */
3451 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3452 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3453 
3454 	mdelay(1);
3455 	WREG32(CP_RB1_CNTL, tmp);
3456 
3457 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3458 
3459 	ring->rptr = RREG32(CP_RB1_RPTR);
3460 
3461 	/* ring2 - compute only */
3462 	/* Set ring buffer size */
3463 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3464 	rb_bufsz = order_base_2(ring->ring_size / 8);
3465 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3466 #ifdef __BIG_ENDIAN
3467 	tmp |= BUF_SWAP_32BIT;
3468 #endif
3469 	WREG32(CP_RB2_CNTL, tmp);
3470 
3471 	/* Initialize the ring buffer's read and write pointers */
3472 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3473 	ring->wptr = 0;
3474 	WREG32(CP_RB2_WPTR, ring->wptr);
3475 
3476 	/* set the wb address whether it's enabled or not */
3477 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3478 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3479 
3480 	mdelay(1);
3481 	WREG32(CP_RB2_CNTL, tmp);
3482 
3483 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3484 
3485 	ring->rptr = RREG32(CP_RB2_RPTR);
3486 
3487 	/* start the rings */
3488 	si_cp_start(rdev);
3489 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3490 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3491 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3492 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3493 	if (r) {
3494 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3495 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3496 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3497 		return r;
3498 	}
3499 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3500 	if (r) {
3501 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3502 	}
3503 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3504 	if (r) {
3505 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3506 	}
3507 
3508 	si_enable_gui_idle_interrupt(rdev, true);
3509 
3510 	return 0;
3511 }
3512 
3513 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3514 {
3515 	u32 reset_mask = 0;
3516 	u32 tmp;
3517 
3518 	/* GRBM_STATUS */
3519 	tmp = RREG32(GRBM_STATUS);
3520 	if (tmp & (PA_BUSY | SC_BUSY |
3521 		   BCI_BUSY | SX_BUSY |
3522 		   TA_BUSY | VGT_BUSY |
3523 		   DB_BUSY | CB_BUSY |
3524 		   GDS_BUSY | SPI_BUSY |
3525 		   IA_BUSY | IA_BUSY_NO_DMA))
3526 		reset_mask |= RADEON_RESET_GFX;
3527 
3528 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3529 		   CP_BUSY | CP_COHERENCY_BUSY))
3530 		reset_mask |= RADEON_RESET_CP;
3531 
3532 	if (tmp & GRBM_EE_BUSY)
3533 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3534 
3535 	/* GRBM_STATUS2 */
3536 	tmp = RREG32(GRBM_STATUS2);
3537 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3538 		reset_mask |= RADEON_RESET_RLC;
3539 
3540 	/* DMA_STATUS_REG 0 */
3541 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3542 	if (!(tmp & DMA_IDLE))
3543 		reset_mask |= RADEON_RESET_DMA;
3544 
3545 	/* DMA_STATUS_REG 1 */
3546 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3547 	if (!(tmp & DMA_IDLE))
3548 		reset_mask |= RADEON_RESET_DMA1;
3549 
3550 	/* SRBM_STATUS2 */
3551 	tmp = RREG32(SRBM_STATUS2);
3552 	if (tmp & DMA_BUSY)
3553 		reset_mask |= RADEON_RESET_DMA;
3554 
3555 	if (tmp & DMA1_BUSY)
3556 		reset_mask |= RADEON_RESET_DMA1;
3557 
3558 	/* SRBM_STATUS */
3559 	tmp = RREG32(SRBM_STATUS);
3560 
3561 	if (tmp & IH_BUSY)
3562 		reset_mask |= RADEON_RESET_IH;
3563 
3564 	if (tmp & SEM_BUSY)
3565 		reset_mask |= RADEON_RESET_SEM;
3566 
3567 	if (tmp & GRBM_RQ_PENDING)
3568 		reset_mask |= RADEON_RESET_GRBM;
3569 
3570 	if (tmp & VMC_BUSY)
3571 		reset_mask |= RADEON_RESET_VMC;
3572 
3573 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3574 		   MCC_BUSY | MCD_BUSY))
3575 		reset_mask |= RADEON_RESET_MC;
3576 
3577 	if (evergreen_is_display_hung(rdev))
3578 		reset_mask |= RADEON_RESET_DISPLAY;
3579 
3580 	/* VM_L2_STATUS */
3581 	tmp = RREG32(VM_L2_STATUS);
3582 	if (tmp & L2_BUSY)
3583 		reset_mask |= RADEON_RESET_VMC;
3584 
3585 	/* Skip MC reset as it's mostly likely not hung, just busy */
3586 	if (reset_mask & RADEON_RESET_MC) {
3587 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3588 		reset_mask &= ~RADEON_RESET_MC;
3589 	}
3590 
3591 	return reset_mask;
3592 }
3593 
3594 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3595 {
3596 	struct evergreen_mc_save save;
3597 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3598 	u32 tmp;
3599 
3600 	if (reset_mask == 0)
3601 		return;
3602 
3603 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3604 
3605 	evergreen_print_gpu_status_regs(rdev);
3606 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3607 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3608 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3609 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3610 
3611 	/* Disable CP parsing/prefetching */
3612 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3613 
3614 	if (reset_mask & RADEON_RESET_DMA) {
3615 		/* dma0 */
3616 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3617 		tmp &= ~DMA_RB_ENABLE;
3618 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3619 	}
3620 	if (reset_mask & RADEON_RESET_DMA1) {
3621 		/* dma1 */
3622 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3623 		tmp &= ~DMA_RB_ENABLE;
3624 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3625 	}
3626 
3627 	udelay(50);
3628 
3629 	evergreen_mc_stop(rdev, &save);
3630 	if (evergreen_mc_wait_for_idle(rdev)) {
3631 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3632 	}
3633 
3634 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3635 		grbm_soft_reset = SOFT_RESET_CB |
3636 			SOFT_RESET_DB |
3637 			SOFT_RESET_GDS |
3638 			SOFT_RESET_PA |
3639 			SOFT_RESET_SC |
3640 			SOFT_RESET_BCI |
3641 			SOFT_RESET_SPI |
3642 			SOFT_RESET_SX |
3643 			SOFT_RESET_TC |
3644 			SOFT_RESET_TA |
3645 			SOFT_RESET_VGT |
3646 			SOFT_RESET_IA;
3647 	}
3648 
3649 	if (reset_mask & RADEON_RESET_CP) {
3650 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3651 
3652 		srbm_soft_reset |= SOFT_RESET_GRBM;
3653 	}
3654 
3655 	if (reset_mask & RADEON_RESET_DMA)
3656 		srbm_soft_reset |= SOFT_RESET_DMA;
3657 
3658 	if (reset_mask & RADEON_RESET_DMA1)
3659 		srbm_soft_reset |= SOFT_RESET_DMA1;
3660 
3661 	if (reset_mask & RADEON_RESET_DISPLAY)
3662 		srbm_soft_reset |= SOFT_RESET_DC;
3663 
3664 	if (reset_mask & RADEON_RESET_RLC)
3665 		grbm_soft_reset |= SOFT_RESET_RLC;
3666 
3667 	if (reset_mask & RADEON_RESET_SEM)
3668 		srbm_soft_reset |= SOFT_RESET_SEM;
3669 
3670 	if (reset_mask & RADEON_RESET_IH)
3671 		srbm_soft_reset |= SOFT_RESET_IH;
3672 
3673 	if (reset_mask & RADEON_RESET_GRBM)
3674 		srbm_soft_reset |= SOFT_RESET_GRBM;
3675 
3676 	if (reset_mask & RADEON_RESET_VMC)
3677 		srbm_soft_reset |= SOFT_RESET_VMC;
3678 
3679 	if (reset_mask & RADEON_RESET_MC)
3680 		srbm_soft_reset |= SOFT_RESET_MC;
3681 
3682 	if (grbm_soft_reset) {
3683 		tmp = RREG32(GRBM_SOFT_RESET);
3684 		tmp |= grbm_soft_reset;
3685 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3686 		WREG32(GRBM_SOFT_RESET, tmp);
3687 		tmp = RREG32(GRBM_SOFT_RESET);
3688 
3689 		udelay(50);
3690 
3691 		tmp &= ~grbm_soft_reset;
3692 		WREG32(GRBM_SOFT_RESET, tmp);
3693 		tmp = RREG32(GRBM_SOFT_RESET);
3694 	}
3695 
3696 	if (srbm_soft_reset) {
3697 		tmp = RREG32(SRBM_SOFT_RESET);
3698 		tmp |= srbm_soft_reset;
3699 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3700 		WREG32(SRBM_SOFT_RESET, tmp);
3701 		tmp = RREG32(SRBM_SOFT_RESET);
3702 
3703 		udelay(50);
3704 
3705 		tmp &= ~srbm_soft_reset;
3706 		WREG32(SRBM_SOFT_RESET, tmp);
3707 		tmp = RREG32(SRBM_SOFT_RESET);
3708 	}
3709 
3710 	/* Wait a little for things to settle down */
3711 	udelay(50);
3712 
3713 	evergreen_mc_resume(rdev, &save);
3714 	udelay(50);
3715 
3716 	evergreen_print_gpu_status_regs(rdev);
3717 }
3718 
3719 int si_asic_reset(struct radeon_device *rdev)
3720 {
3721 	u32 reset_mask;
3722 
3723 	reset_mask = si_gpu_check_soft_reset(rdev);
3724 
3725 	if (reset_mask)
3726 		r600_set_bios_scratch_engine_hung(rdev, true);
3727 
3728 	si_gpu_soft_reset(rdev, reset_mask);
3729 
3730 	reset_mask = si_gpu_check_soft_reset(rdev);
3731 
3732 	if (!reset_mask)
3733 		r600_set_bios_scratch_engine_hung(rdev, false);
3734 
3735 	return 0;
3736 }
3737 
3738 /**
3739  * si_gfx_is_lockup - Check if the GFX engine is locked up
3740  *
3741  * @rdev: radeon_device pointer
3742  * @ring: radeon_ring structure holding ring information
3743  *
3744  * Check if the GFX engine is locked up.
3745  * Returns true if the engine appears to be locked up, false if not.
3746  */
3747 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3748 {
3749 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3750 
3751 	if (!(reset_mask & (RADEON_RESET_GFX |
3752 			    RADEON_RESET_COMPUTE |
3753 			    RADEON_RESET_CP))) {
3754 		radeon_ring_lockup_update(ring);
3755 		return false;
3756 	}
3757 	/* force CP activities */
3758 	radeon_ring_force_activity(rdev, ring);
3759 	return radeon_ring_test_lockup(rdev, ring);
3760 }
3761 
3762 /* MC */
3763 static void si_mc_program(struct radeon_device *rdev)
3764 {
3765 	struct evergreen_mc_save save;
3766 	u32 tmp;
3767 	int i, j;
3768 
3769 	/* Initialize HDP */
3770 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3771 		WREG32((0x2c14 + j), 0x00000000);
3772 		WREG32((0x2c18 + j), 0x00000000);
3773 		WREG32((0x2c1c + j), 0x00000000);
3774 		WREG32((0x2c20 + j), 0x00000000);
3775 		WREG32((0x2c24 + j), 0x00000000);
3776 	}
3777 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3778 
3779 	evergreen_mc_stop(rdev, &save);
3780 	if (radeon_mc_wait_for_idle(rdev)) {
3781 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3782 	}
3783 	if (!ASIC_IS_NODCE(rdev))
3784 		/* Lockout access through VGA aperture*/
3785 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3786 	/* Update configuration */
3787 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3788 	       rdev->mc.vram_start >> 12);
3789 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3790 	       rdev->mc.vram_end >> 12);
3791 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3792 	       rdev->vram_scratch.gpu_addr >> 12);
3793 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3794 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3795 	WREG32(MC_VM_FB_LOCATION, tmp);
3796 	/* XXX double check these! */
3797 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3798 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3799 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3800 	WREG32(MC_VM_AGP_BASE, 0);
3801 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3802 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3803 	if (radeon_mc_wait_for_idle(rdev)) {
3804 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3805 	}
3806 	evergreen_mc_resume(rdev, &save);
3807 	if (!ASIC_IS_NODCE(rdev)) {
3808 		/* we need to own VRAM, so turn off the VGA renderer here
3809 		 * to stop it overwriting our objects */
3810 		rv515_vga_render_disable(rdev);
3811 	}
3812 }
3813 
3814 void si_vram_gtt_location(struct radeon_device *rdev,
3815 			  struct radeon_mc *mc)
3816 {
3817 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3818 		/* leave room for at least 1024M GTT */
3819 		dev_warn(rdev->dev, "limiting VRAM\n");
3820 		mc->real_vram_size = 0xFFC0000000ULL;
3821 		mc->mc_vram_size = 0xFFC0000000ULL;
3822 	}
3823 	radeon_vram_location(rdev, &rdev->mc, 0);
3824 	rdev->mc.gtt_base_align = 0;
3825 	radeon_gtt_location(rdev, mc);
3826 }
3827 
3828 static int si_mc_init(struct radeon_device *rdev)
3829 {
3830 	u32 tmp;
3831 	int chansize, numchan;
3832 
3833 	/* Get VRAM informations */
3834 	rdev->mc.vram_is_ddr = true;
3835 	tmp = RREG32(MC_ARB_RAMCFG);
3836 	if (tmp & CHANSIZE_OVERRIDE) {
3837 		chansize = 16;
3838 	} else if (tmp & CHANSIZE_MASK) {
3839 		chansize = 64;
3840 	} else {
3841 		chansize = 32;
3842 	}
3843 	tmp = RREG32(MC_SHARED_CHMAP);
3844 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3845 	case 0:
3846 	default:
3847 		numchan = 1;
3848 		break;
3849 	case 1:
3850 		numchan = 2;
3851 		break;
3852 	case 2:
3853 		numchan = 4;
3854 		break;
3855 	case 3:
3856 		numchan = 8;
3857 		break;
3858 	case 4:
3859 		numchan = 3;
3860 		break;
3861 	case 5:
3862 		numchan = 6;
3863 		break;
3864 	case 6:
3865 		numchan = 10;
3866 		break;
3867 	case 7:
3868 		numchan = 12;
3869 		break;
3870 	case 8:
3871 		numchan = 16;
3872 		break;
3873 	}
3874 	rdev->mc.vram_width = numchan * chansize;
3875 	/* Could aper size report 0 ? */
3876 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3877 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3878 	/* size in MB on si */
3879 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3880 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3881 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3882 	si_vram_gtt_location(rdev, &rdev->mc);
3883 	radeon_update_bandwidth_info(rdev);
3884 
3885 	return 0;
3886 }
3887 
3888 /*
3889  * GART
3890  */
3891 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3892 {
3893 	/* flush hdp cache */
3894 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3895 
3896 	/* bits 0-15 are the VM contexts0-15 */
3897 	WREG32(VM_INVALIDATE_REQUEST, 1);
3898 }
3899 
3900 static int si_pcie_gart_enable(struct radeon_device *rdev)
3901 {
3902 	int r, i;
3903 
3904 	if (rdev->gart.robj == NULL) {
3905 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3906 		return -EINVAL;
3907 	}
3908 	r = radeon_gart_table_vram_pin(rdev);
3909 	if (r)
3910 		return r;
3911 	radeon_gart_restore(rdev);
3912 	/* Setup TLB control */
3913 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3914 	       (0xA << 7) |
3915 	       ENABLE_L1_TLB |
3916 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3917 	       ENABLE_ADVANCED_DRIVER_MODEL |
3918 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3919 	/* Setup L2 cache */
3920 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3921 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3922 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3923 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3924 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3925 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3926 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3927 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3928 	/* setup context0 */
3929 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3930 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3931 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3932 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3933 			(u32)(rdev->dummy_page.addr >> 12));
3934 	WREG32(VM_CONTEXT0_CNTL2, 0);
3935 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3936 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3937 
3938 	WREG32(0x15D4, 0);
3939 	WREG32(0x15D8, 0);
3940 	WREG32(0x15DC, 0);
3941 
3942 	/* empty context1-15 */
3943 	/* set vm size, must be a multiple of 4 */
3944 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3945 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3946 	/* Assign the pt base to something valid for now; the pts used for
3947 	 * the VMs are determined by the application and setup and assigned
3948 	 * on the fly in the vm part of radeon_gart.c
3949 	 */
3950 	for (i = 1; i < 16; i++) {
3951 		if (i < 8)
3952 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3953 			       rdev->gart.table_addr >> 12);
3954 		else
3955 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3956 			       rdev->gart.table_addr >> 12);
3957 	}
3958 
3959 	/* enable context1-15 */
3960 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3961 	       (u32)(rdev->dummy_page.addr >> 12));
3962 	WREG32(VM_CONTEXT1_CNTL2, 4);
3963 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3964 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3965 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3966 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3967 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3968 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3969 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3970 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3971 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3972 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3973 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3974 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3975 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3976 
3977 	si_pcie_gart_tlb_flush(rdev);
3978 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3979 		 (unsigned)(rdev->mc.gtt_size >> 20),
3980 		 (unsigned long long)rdev->gart.table_addr);
3981 	rdev->gart.ready = true;
3982 	return 0;
3983 }
3984 
3985 static void si_pcie_gart_disable(struct radeon_device *rdev)
3986 {
3987 	/* Disable all tables */
3988 	WREG32(VM_CONTEXT0_CNTL, 0);
3989 	WREG32(VM_CONTEXT1_CNTL, 0);
3990 	/* Setup TLB control */
3991 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3992 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3993 	/* Setup L2 cache */
3994 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3995 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3996 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3997 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3998 	WREG32(VM_L2_CNTL2, 0);
3999 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4000 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4001 	radeon_gart_table_vram_unpin(rdev);
4002 }
4003 
4004 static void si_pcie_gart_fini(struct radeon_device *rdev)
4005 {
4006 	si_pcie_gart_disable(rdev);
4007 	radeon_gart_table_vram_free(rdev);
4008 	radeon_gart_fini(rdev);
4009 }
4010 
4011 /* vm parser */
4012 static bool si_vm_reg_valid(u32 reg)
4013 {
4014 	/* context regs are fine */
4015 	if (reg >= 0x28000)
4016 		return true;
4017 
4018 	/* check config regs */
4019 	switch (reg) {
4020 	case GRBM_GFX_INDEX:
4021 	case CP_STRMOUT_CNTL:
4022 	case VGT_VTX_VECT_EJECT_REG:
4023 	case VGT_CACHE_INVALIDATION:
4024 	case VGT_ESGS_RING_SIZE:
4025 	case VGT_GSVS_RING_SIZE:
4026 	case VGT_GS_VERTEX_REUSE:
4027 	case VGT_PRIMITIVE_TYPE:
4028 	case VGT_INDEX_TYPE:
4029 	case VGT_NUM_INDICES:
4030 	case VGT_NUM_INSTANCES:
4031 	case VGT_TF_RING_SIZE:
4032 	case VGT_HS_OFFCHIP_PARAM:
4033 	case VGT_TF_MEMORY_BASE:
4034 	case PA_CL_ENHANCE:
4035 	case PA_SU_LINE_STIPPLE_VALUE:
4036 	case PA_SC_LINE_STIPPLE_STATE:
4037 	case PA_SC_ENHANCE:
4038 	case SQC_CACHES:
4039 	case SPI_STATIC_THREAD_MGMT_1:
4040 	case SPI_STATIC_THREAD_MGMT_2:
4041 	case SPI_STATIC_THREAD_MGMT_3:
4042 	case SPI_PS_MAX_WAVE_ID:
4043 	case SPI_CONFIG_CNTL:
4044 	case SPI_CONFIG_CNTL_1:
4045 	case TA_CNTL_AUX:
4046 		return true;
4047 	default:
4048 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4049 		return false;
4050 	}
4051 }
4052 
4053 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4054 				  u32 *ib, struct radeon_cs_packet *pkt)
4055 {
4056 	switch (pkt->opcode) {
4057 	case PACKET3_NOP:
4058 	case PACKET3_SET_BASE:
4059 	case PACKET3_SET_CE_DE_COUNTERS:
4060 	case PACKET3_LOAD_CONST_RAM:
4061 	case PACKET3_WRITE_CONST_RAM:
4062 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4063 	case PACKET3_DUMP_CONST_RAM:
4064 	case PACKET3_INCREMENT_CE_COUNTER:
4065 	case PACKET3_WAIT_ON_DE_COUNTER:
4066 	case PACKET3_CE_WRITE:
4067 		break;
4068 	default:
4069 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4070 		return -EINVAL;
4071 	}
4072 	return 0;
4073 }
4074 
4075 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4076 {
4077 	u32 start_reg, reg, i;
4078 	u32 command = ib[idx + 4];
4079 	u32 info = ib[idx + 1];
4080 	u32 idx_value = ib[idx];
4081 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4082 		/* src address space is register */
4083 		if (((info & 0x60000000) >> 29) == 0) {
4084 			start_reg = idx_value << 2;
4085 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4086 				reg = start_reg;
4087 				if (!si_vm_reg_valid(reg)) {
4088 					DRM_ERROR("CP DMA Bad SRC register\n");
4089 					return -EINVAL;
4090 				}
4091 			} else {
4092 				for (i = 0; i < (command & 0x1fffff); i++) {
4093 					reg = start_reg + (4 * i);
4094 					if (!si_vm_reg_valid(reg)) {
4095 						DRM_ERROR("CP DMA Bad SRC register\n");
4096 						return -EINVAL;
4097 					}
4098 				}
4099 			}
4100 		}
4101 	}
4102 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4103 		/* dst address space is register */
4104 		if (((info & 0x00300000) >> 20) == 0) {
4105 			start_reg = ib[idx + 2];
4106 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4107 				reg = start_reg;
4108 				if (!si_vm_reg_valid(reg)) {
4109 					DRM_ERROR("CP DMA Bad DST register\n");
4110 					return -EINVAL;
4111 				}
4112 			} else {
4113 				for (i = 0; i < (command & 0x1fffff); i++) {
4114 					reg = start_reg + (4 * i);
4115 				if (!si_vm_reg_valid(reg)) {
4116 						DRM_ERROR("CP DMA Bad DST register\n");
4117 						return -EINVAL;
4118 					}
4119 				}
4120 			}
4121 		}
4122 	}
4123 	return 0;
4124 }
4125 
4126 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4127 				   u32 *ib, struct radeon_cs_packet *pkt)
4128 {
4129 	int r;
4130 	u32 idx = pkt->idx + 1;
4131 	u32 idx_value = ib[idx];
4132 	u32 start_reg, end_reg, reg, i;
4133 
4134 	switch (pkt->opcode) {
4135 	case PACKET3_NOP:
4136 	case PACKET3_SET_BASE:
4137 	case PACKET3_CLEAR_STATE:
4138 	case PACKET3_INDEX_BUFFER_SIZE:
4139 	case PACKET3_DISPATCH_DIRECT:
4140 	case PACKET3_DISPATCH_INDIRECT:
4141 	case PACKET3_ALLOC_GDS:
4142 	case PACKET3_WRITE_GDS_RAM:
4143 	case PACKET3_ATOMIC_GDS:
4144 	case PACKET3_ATOMIC:
4145 	case PACKET3_OCCLUSION_QUERY:
4146 	case PACKET3_SET_PREDICATION:
4147 	case PACKET3_COND_EXEC:
4148 	case PACKET3_PRED_EXEC:
4149 	case PACKET3_DRAW_INDIRECT:
4150 	case PACKET3_DRAW_INDEX_INDIRECT:
4151 	case PACKET3_INDEX_BASE:
4152 	case PACKET3_DRAW_INDEX_2:
4153 	case PACKET3_CONTEXT_CONTROL:
4154 	case PACKET3_INDEX_TYPE:
4155 	case PACKET3_DRAW_INDIRECT_MULTI:
4156 	case PACKET3_DRAW_INDEX_AUTO:
4157 	case PACKET3_DRAW_INDEX_IMMD:
4158 	case PACKET3_NUM_INSTANCES:
4159 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4160 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4161 	case PACKET3_DRAW_INDEX_OFFSET_2:
4162 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4163 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4164 	case PACKET3_MPEG_INDEX:
4165 	case PACKET3_WAIT_REG_MEM:
4166 	case PACKET3_MEM_WRITE:
4167 	case PACKET3_PFP_SYNC_ME:
4168 	case PACKET3_SURFACE_SYNC:
4169 	case PACKET3_EVENT_WRITE:
4170 	case PACKET3_EVENT_WRITE_EOP:
4171 	case PACKET3_EVENT_WRITE_EOS:
4172 	case PACKET3_SET_CONTEXT_REG:
4173 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4174 	case PACKET3_SET_SH_REG:
4175 	case PACKET3_SET_SH_REG_OFFSET:
4176 	case PACKET3_INCREMENT_DE_COUNTER:
4177 	case PACKET3_WAIT_ON_CE_COUNTER:
4178 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4179 	case PACKET3_ME_WRITE:
4180 		break;
4181 	case PACKET3_COPY_DATA:
4182 		if ((idx_value & 0xf00) == 0) {
4183 			reg = ib[idx + 3] * 4;
4184 			if (!si_vm_reg_valid(reg))
4185 				return -EINVAL;
4186 		}
4187 		break;
4188 	case PACKET3_WRITE_DATA:
4189 		if ((idx_value & 0xf00) == 0) {
4190 			start_reg = ib[idx + 1] * 4;
4191 			if (idx_value & 0x10000) {
4192 				if (!si_vm_reg_valid(start_reg))
4193 					return -EINVAL;
4194 			} else {
4195 				for (i = 0; i < (pkt->count - 2); i++) {
4196 					reg = start_reg + (4 * i);
4197 					if (!si_vm_reg_valid(reg))
4198 						return -EINVAL;
4199 				}
4200 			}
4201 		}
4202 		break;
4203 	case PACKET3_COND_WRITE:
4204 		if (idx_value & 0x100) {
4205 			reg = ib[idx + 5] * 4;
4206 			if (!si_vm_reg_valid(reg))
4207 				return -EINVAL;
4208 		}
4209 		break;
4210 	case PACKET3_COPY_DW:
4211 		if (idx_value & 0x2) {
4212 			reg = ib[idx + 3] * 4;
4213 			if (!si_vm_reg_valid(reg))
4214 				return -EINVAL;
4215 		}
4216 		break;
4217 	case PACKET3_SET_CONFIG_REG:
4218 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4219 		end_reg = 4 * pkt->count + start_reg - 4;
4220 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4221 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4222 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4223 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4224 			return -EINVAL;
4225 		}
4226 		for (i = 0; i < pkt->count; i++) {
4227 			reg = start_reg + (4 * i);
4228 			if (!si_vm_reg_valid(reg))
4229 				return -EINVAL;
4230 		}
4231 		break;
4232 	case PACKET3_CP_DMA:
4233 		r = si_vm_packet3_cp_dma_check(ib, idx);
4234 		if (r)
4235 			return r;
4236 		break;
4237 	default:
4238 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4239 		return -EINVAL;
4240 	}
4241 	return 0;
4242 }
4243 
4244 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4245 				       u32 *ib, struct radeon_cs_packet *pkt)
4246 {
4247 	int r;
4248 	u32 idx = pkt->idx + 1;
4249 	u32 idx_value = ib[idx];
4250 	u32 start_reg, reg, i;
4251 
4252 	switch (pkt->opcode) {
4253 	case PACKET3_NOP:
4254 	case PACKET3_SET_BASE:
4255 	case PACKET3_CLEAR_STATE:
4256 	case PACKET3_DISPATCH_DIRECT:
4257 	case PACKET3_DISPATCH_INDIRECT:
4258 	case PACKET3_ALLOC_GDS:
4259 	case PACKET3_WRITE_GDS_RAM:
4260 	case PACKET3_ATOMIC_GDS:
4261 	case PACKET3_ATOMIC:
4262 	case PACKET3_OCCLUSION_QUERY:
4263 	case PACKET3_SET_PREDICATION:
4264 	case PACKET3_COND_EXEC:
4265 	case PACKET3_PRED_EXEC:
4266 	case PACKET3_CONTEXT_CONTROL:
4267 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4268 	case PACKET3_WAIT_REG_MEM:
4269 	case PACKET3_MEM_WRITE:
4270 	case PACKET3_PFP_SYNC_ME:
4271 	case PACKET3_SURFACE_SYNC:
4272 	case PACKET3_EVENT_WRITE:
4273 	case PACKET3_EVENT_WRITE_EOP:
4274 	case PACKET3_EVENT_WRITE_EOS:
4275 	case PACKET3_SET_CONTEXT_REG:
4276 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4277 	case PACKET3_SET_SH_REG:
4278 	case PACKET3_SET_SH_REG_OFFSET:
4279 	case PACKET3_INCREMENT_DE_COUNTER:
4280 	case PACKET3_WAIT_ON_CE_COUNTER:
4281 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4282 	case PACKET3_ME_WRITE:
4283 		break;
4284 	case PACKET3_COPY_DATA:
4285 		if ((idx_value & 0xf00) == 0) {
4286 			reg = ib[idx + 3] * 4;
4287 			if (!si_vm_reg_valid(reg))
4288 				return -EINVAL;
4289 		}
4290 		break;
4291 	case PACKET3_WRITE_DATA:
4292 		if ((idx_value & 0xf00) == 0) {
4293 			start_reg = ib[idx + 1] * 4;
4294 			if (idx_value & 0x10000) {
4295 				if (!si_vm_reg_valid(start_reg))
4296 					return -EINVAL;
4297 			} else {
4298 				for (i = 0; i < (pkt->count - 2); i++) {
4299 					reg = start_reg + (4 * i);
4300 					if (!si_vm_reg_valid(reg))
4301 						return -EINVAL;
4302 				}
4303 			}
4304 		}
4305 		break;
4306 	case PACKET3_COND_WRITE:
4307 		if (idx_value & 0x100) {
4308 			reg = ib[idx + 5] * 4;
4309 			if (!si_vm_reg_valid(reg))
4310 				return -EINVAL;
4311 		}
4312 		break;
4313 	case PACKET3_COPY_DW:
4314 		if (idx_value & 0x2) {
4315 			reg = ib[idx + 3] * 4;
4316 			if (!si_vm_reg_valid(reg))
4317 				return -EINVAL;
4318 		}
4319 		break;
4320 	case PACKET3_CP_DMA:
4321 		r = si_vm_packet3_cp_dma_check(ib, idx);
4322 		if (r)
4323 			return r;
4324 		break;
4325 	default:
4326 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4327 		return -EINVAL;
4328 	}
4329 	return 0;
4330 }
4331 
4332 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4333 {
4334 	int ret = 0;
4335 	u32 idx = 0;
4336 	struct radeon_cs_packet pkt;
4337 
4338 	do {
4339 		pkt.idx = idx;
4340 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4341 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4342 		pkt.one_reg_wr = 0;
4343 		switch (pkt.type) {
4344 		case RADEON_PACKET_TYPE0:
4345 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4346 			ret = -EINVAL;
4347 			break;
4348 		case RADEON_PACKET_TYPE2:
4349 			idx += 1;
4350 			break;
4351 		case RADEON_PACKET_TYPE3:
4352 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4353 			if (ib->is_const_ib)
4354 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4355 			else {
4356 				switch (ib->ring) {
4357 				case RADEON_RING_TYPE_GFX_INDEX:
4358 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4359 					break;
4360 				case CAYMAN_RING_TYPE_CP1_INDEX:
4361 				case CAYMAN_RING_TYPE_CP2_INDEX:
4362 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4363 					break;
4364 				default:
4365 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4366 					ret = -EINVAL;
4367 					break;
4368 				}
4369 			}
4370 			idx += pkt.count + 2;
4371 			break;
4372 		default:
4373 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4374 			ret = -EINVAL;
4375 			break;
4376 		}
4377 		if (ret)
4378 			break;
4379 	} while (idx < ib->length_dw);
4380 
4381 	return ret;
4382 }
4383 
4384 /*
4385  * vm
4386  */
4387 int si_vm_init(struct radeon_device *rdev)
4388 {
4389 	/* number of VMs */
4390 	rdev->vm_manager.nvm = 16;
4391 	/* base offset of vram pages */
4392 	rdev->vm_manager.vram_base_offset = 0;
4393 
4394 	return 0;
4395 }
4396 
4397 void si_vm_fini(struct radeon_device *rdev)
4398 {
4399 }
4400 
4401 /**
4402  * si_vm_decode_fault - print human readable fault info
4403  *
4404  * @rdev: radeon_device pointer
4405  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4406  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4407  *
4408  * Print human readable fault information (SI).
4409  */
4410 static void si_vm_decode_fault(struct radeon_device *rdev,
4411 			       u32 status, u32 addr)
4412 {
4413 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4414 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4415 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4416 	char *block;
4417 
4418 	if (rdev->family == CHIP_TAHITI) {
4419 		switch (mc_id) {
4420 		case 160:
4421 		case 144:
4422 		case 96:
4423 		case 80:
4424 		case 224:
4425 		case 208:
4426 		case 32:
4427 		case 16:
4428 			block = "CB";
4429 			break;
4430 		case 161:
4431 		case 145:
4432 		case 97:
4433 		case 81:
4434 		case 225:
4435 		case 209:
4436 		case 33:
4437 		case 17:
4438 			block = "CB_FMASK";
4439 			break;
4440 		case 162:
4441 		case 146:
4442 		case 98:
4443 		case 82:
4444 		case 226:
4445 		case 210:
4446 		case 34:
4447 		case 18:
4448 			block = "CB_CMASK";
4449 			break;
4450 		case 163:
4451 		case 147:
4452 		case 99:
4453 		case 83:
4454 		case 227:
4455 		case 211:
4456 		case 35:
4457 		case 19:
4458 			block = "CB_IMMED";
4459 			break;
4460 		case 164:
4461 		case 148:
4462 		case 100:
4463 		case 84:
4464 		case 228:
4465 		case 212:
4466 		case 36:
4467 		case 20:
4468 			block = "DB";
4469 			break;
4470 		case 165:
4471 		case 149:
4472 		case 101:
4473 		case 85:
4474 		case 229:
4475 		case 213:
4476 		case 37:
4477 		case 21:
4478 			block = "DB_HTILE";
4479 			break;
4480 		case 167:
4481 		case 151:
4482 		case 103:
4483 		case 87:
4484 		case 231:
4485 		case 215:
4486 		case 39:
4487 		case 23:
4488 			block = "DB_STEN";
4489 			break;
4490 		case 72:
4491 		case 68:
4492 		case 64:
4493 		case 8:
4494 		case 4:
4495 		case 0:
4496 		case 136:
4497 		case 132:
4498 		case 128:
4499 		case 200:
4500 		case 196:
4501 		case 192:
4502 			block = "TC";
4503 			break;
4504 		case 112:
4505 		case 48:
4506 			block = "CP";
4507 			break;
4508 		case 49:
4509 		case 177:
4510 		case 50:
4511 		case 178:
4512 			block = "SH";
4513 			break;
4514 		case 53:
4515 		case 190:
4516 			block = "VGT";
4517 			break;
4518 		case 117:
4519 			block = "IH";
4520 			break;
4521 		case 51:
4522 		case 115:
4523 			block = "RLC";
4524 			break;
4525 		case 119:
4526 		case 183:
4527 			block = "DMA0";
4528 			break;
4529 		case 61:
4530 			block = "DMA1";
4531 			break;
4532 		case 248:
4533 		case 120:
4534 			block = "HDP";
4535 			break;
4536 		default:
4537 			block = "unknown";
4538 			break;
4539 		}
4540 	} else {
4541 		switch (mc_id) {
4542 		case 32:
4543 		case 16:
4544 		case 96:
4545 		case 80:
4546 		case 160:
4547 		case 144:
4548 		case 224:
4549 		case 208:
4550 			block = "CB";
4551 			break;
4552 		case 33:
4553 		case 17:
4554 		case 97:
4555 		case 81:
4556 		case 161:
4557 		case 145:
4558 		case 225:
4559 		case 209:
4560 			block = "CB_FMASK";
4561 			break;
4562 		case 34:
4563 		case 18:
4564 		case 98:
4565 		case 82:
4566 		case 162:
4567 		case 146:
4568 		case 226:
4569 		case 210:
4570 			block = "CB_CMASK";
4571 			break;
4572 		case 35:
4573 		case 19:
4574 		case 99:
4575 		case 83:
4576 		case 163:
4577 		case 147:
4578 		case 227:
4579 		case 211:
4580 			block = "CB_IMMED";
4581 			break;
4582 		case 36:
4583 		case 20:
4584 		case 100:
4585 		case 84:
4586 		case 164:
4587 		case 148:
4588 		case 228:
4589 		case 212:
4590 			block = "DB";
4591 			break;
4592 		case 37:
4593 		case 21:
4594 		case 101:
4595 		case 85:
4596 		case 165:
4597 		case 149:
4598 		case 229:
4599 		case 213:
4600 			block = "DB_HTILE";
4601 			break;
4602 		case 39:
4603 		case 23:
4604 		case 103:
4605 		case 87:
4606 		case 167:
4607 		case 151:
4608 		case 231:
4609 		case 215:
4610 			block = "DB_STEN";
4611 			break;
4612 		case 72:
4613 		case 68:
4614 		case 8:
4615 		case 4:
4616 		case 136:
4617 		case 132:
4618 		case 200:
4619 		case 196:
4620 			block = "TC";
4621 			break;
4622 		case 112:
4623 		case 48:
4624 			block = "CP";
4625 			break;
4626 		case 49:
4627 		case 177:
4628 		case 50:
4629 		case 178:
4630 			block = "SH";
4631 			break;
4632 		case 53:
4633 			block = "VGT";
4634 			break;
4635 		case 117:
4636 			block = "IH";
4637 			break;
4638 		case 51:
4639 		case 115:
4640 			block = "RLC";
4641 			break;
4642 		case 119:
4643 		case 183:
4644 			block = "DMA0";
4645 			break;
4646 		case 61:
4647 			block = "DMA1";
4648 			break;
4649 		case 248:
4650 		case 120:
4651 			block = "HDP";
4652 			break;
4653 		default:
4654 			block = "unknown";
4655 			break;
4656 		}
4657 	}
4658 
4659 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4660 	       protections, vmid, addr,
4661 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4662 	       block, mc_id);
4663 }
4664 
4665 /**
4666  * si_vm_set_page - update the page tables using the CP
4667  *
4668  * @rdev: radeon_device pointer
4669  * @ib: indirect buffer to fill with commands
4670  * @pe: addr of the page entry
4671  * @addr: dst addr to write into pe
4672  * @count: number of page entries to update
4673  * @incr: increase next addr by incr bytes
4674  * @flags: access flags
4675  *
4676  * Update the page tables using the CP (SI).
4677  */
4678 void si_vm_set_page(struct radeon_device *rdev,
4679 		    struct radeon_ib *ib,
4680 		    uint64_t pe,
4681 		    uint64_t addr, unsigned count,
4682 		    uint32_t incr, uint32_t flags)
4683 {
4684 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4685 	uint64_t value;
4686 	unsigned ndw;
4687 
4688 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4689 		while (count) {
4690 			ndw = 2 + count * 2;
4691 			if (ndw > 0x3FFE)
4692 				ndw = 0x3FFE;
4693 
4694 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4695 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4696 					WRITE_DATA_DST_SEL(1));
4697 			ib->ptr[ib->length_dw++] = pe;
4698 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4699 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4700 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4701 					value = radeon_vm_map_gart(rdev, addr);
4702 					value &= 0xFFFFFFFFFFFFF000ULL;
4703 				} else if (flags & RADEON_VM_PAGE_VALID) {
4704 					value = addr;
4705 				} else {
4706 					value = 0;
4707 				}
4708 				addr += incr;
4709 				value |= r600_flags;
4710 				ib->ptr[ib->length_dw++] = value;
4711 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4712 			}
4713 		}
4714 	} else {
4715 		/* DMA */
4716 		si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4717 	}
4718 }
4719 
4720 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4721 {
4722 	struct radeon_ring *ring = &rdev->ring[ridx];
4723 
4724 	if (vm == NULL)
4725 		return;
4726 
4727 	/* write new base address */
4728 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4729 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4730 				 WRITE_DATA_DST_SEL(0)));
4731 
4732 	if (vm->id < 8) {
4733 		radeon_ring_write(ring,
4734 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4735 	} else {
4736 		radeon_ring_write(ring,
4737 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4738 	}
4739 	radeon_ring_write(ring, 0);
4740 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4741 
4742 	/* flush hdp cache */
4743 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4744 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4745 				 WRITE_DATA_DST_SEL(0)));
4746 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4747 	radeon_ring_write(ring, 0);
4748 	radeon_ring_write(ring, 0x1);
4749 
4750 	/* bits 0-15 are the VM contexts0-15 */
4751 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4752 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4753 				 WRITE_DATA_DST_SEL(0)));
4754 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4755 	radeon_ring_write(ring, 0);
4756 	radeon_ring_write(ring, 1 << vm->id);
4757 
4758 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4759 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4760 	radeon_ring_write(ring, 0x0);
4761 }
4762 
4763 /*
4764  *  Power and clock gating
4765  */
4766 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4767 {
4768 	int i;
4769 
4770 	for (i = 0; i < rdev->usec_timeout; i++) {
4771 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4772 			break;
4773 		udelay(1);
4774 	}
4775 
4776 	for (i = 0; i < rdev->usec_timeout; i++) {
4777 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4778 			break;
4779 		udelay(1);
4780 	}
4781 }
4782 
4783 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4784 					 bool enable)
4785 {
4786 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4787 	u32 mask;
4788 	int i;
4789 
4790 	if (enable)
4791 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4792 	else
4793 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4794 	WREG32(CP_INT_CNTL_RING0, tmp);
4795 
4796 	if (!enable) {
4797 		/* read a gfx register */
4798 		tmp = RREG32(DB_DEPTH_INFO);
4799 
4800 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4801 		for (i = 0; i < rdev->usec_timeout; i++) {
4802 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4803 				break;
4804 			udelay(1);
4805 		}
4806 	}
4807 }
4808 
4809 static void si_set_uvd_dcm(struct radeon_device *rdev,
4810 			   bool sw_mode)
4811 {
4812 	u32 tmp, tmp2;
4813 
4814 	tmp = RREG32(UVD_CGC_CTRL);
4815 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4816 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4817 
4818 	if (sw_mode) {
4819 		tmp &= ~0x7ffff800;
4820 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4821 	} else {
4822 		tmp |= 0x7ffff800;
4823 		tmp2 = 0;
4824 	}
4825 
4826 	WREG32(UVD_CGC_CTRL, tmp);
4827 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4828 }
4829 
4830 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4831 {
4832 	bool hw_mode = true;
4833 
4834 	if (hw_mode) {
4835 		si_set_uvd_dcm(rdev, false);
4836 	} else {
4837 		u32 tmp = RREG32(UVD_CGC_CTRL);
4838 		tmp &= ~DCM;
4839 		WREG32(UVD_CGC_CTRL, tmp);
4840 	}
4841 }
4842 
4843 static u32 si_halt_rlc(struct radeon_device *rdev)
4844 {
4845 	u32 data, orig;
4846 
4847 	orig = data = RREG32(RLC_CNTL);
4848 
4849 	if (data & RLC_ENABLE) {
4850 		data &= ~RLC_ENABLE;
4851 		WREG32(RLC_CNTL, data);
4852 
4853 		si_wait_for_rlc_serdes(rdev);
4854 	}
4855 
4856 	return orig;
4857 }
4858 
4859 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4860 {
4861 	u32 tmp;
4862 
4863 	tmp = RREG32(RLC_CNTL);
4864 	if (tmp != rlc)
4865 		WREG32(RLC_CNTL, rlc);
4866 }
4867 
4868 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4869 {
4870 	u32 data, orig;
4871 
4872 	orig = data = RREG32(DMA_PG);
4873 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4874 		data |= PG_CNTL_ENABLE;
4875 	else
4876 		data &= ~PG_CNTL_ENABLE;
4877 	if (orig != data)
4878 		WREG32(DMA_PG, data);
4879 }
4880 
4881 static void si_init_dma_pg(struct radeon_device *rdev)
4882 {
4883 	u32 tmp;
4884 
4885 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4886 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4887 
4888 	for (tmp = 0; tmp < 5; tmp++)
4889 		WREG32(DMA_PGFSM_WRITE, 0);
4890 }
4891 
4892 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4893 			       bool enable)
4894 {
4895 	u32 tmp;
4896 
4897 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4898 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4899 		WREG32(RLC_TTOP_D, tmp);
4900 
4901 		tmp = RREG32(RLC_PG_CNTL);
4902 		tmp |= GFX_PG_ENABLE;
4903 		WREG32(RLC_PG_CNTL, tmp);
4904 
4905 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4906 		tmp |= AUTO_PG_EN;
4907 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4908 	} else {
4909 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4910 		tmp &= ~AUTO_PG_EN;
4911 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4912 
4913 		tmp = RREG32(DB_RENDER_CONTROL);
4914 	}
4915 }
4916 
4917 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4918 {
4919 	u32 tmp;
4920 
4921 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4922 
4923 	tmp = RREG32(RLC_PG_CNTL);
4924 	tmp |= GFX_PG_SRC;
4925 	WREG32(RLC_PG_CNTL, tmp);
4926 
4927 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4928 
4929 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4930 
4931 	tmp &= ~GRBM_REG_SGIT_MASK;
4932 	tmp |= GRBM_REG_SGIT(0x700);
4933 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4934 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4935 }
4936 
4937 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4938 {
4939 	u32 mask = 0, tmp, tmp1;
4940 	int i;
4941 
4942 	si_select_se_sh(rdev, se, sh);
4943 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4944 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4945 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4946 
4947 	tmp &= 0xffff0000;
4948 
4949 	tmp |= tmp1;
4950 	tmp >>= 16;
4951 
4952 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4953 		mask <<= 1;
4954 		mask |= 1;
4955 	}
4956 
4957 	return (~tmp) & mask;
4958 }
4959 
4960 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4961 {
4962 	u32 i, j, k, active_cu_number = 0;
4963 	u32 mask, counter, cu_bitmap;
4964 	u32 tmp = 0;
4965 
4966 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4967 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4968 			mask = 1;
4969 			cu_bitmap = 0;
4970 			counter  = 0;
4971 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4972 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4973 					if (counter < 2)
4974 						cu_bitmap |= mask;
4975 					counter++;
4976 				}
4977 				mask <<= 1;
4978 			}
4979 
4980 			active_cu_number += counter;
4981 			tmp |= (cu_bitmap << (i * 16 + j * 8));
4982 		}
4983 	}
4984 
4985 	WREG32(RLC_PG_AO_CU_MASK, tmp);
4986 
4987 	tmp = RREG32(RLC_MAX_PG_CU);
4988 	tmp &= ~MAX_PU_CU_MASK;
4989 	tmp |= MAX_PU_CU(active_cu_number);
4990 	WREG32(RLC_MAX_PG_CU, tmp);
4991 }
4992 
4993 static void si_enable_cgcg(struct radeon_device *rdev,
4994 			   bool enable)
4995 {
4996 	u32 data, orig, tmp;
4997 
4998 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4999 
5000 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5001 		si_enable_gui_idle_interrupt(rdev, true);
5002 
5003 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5004 
5005 		tmp = si_halt_rlc(rdev);
5006 
5007 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5008 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5009 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5010 
5011 		si_wait_for_rlc_serdes(rdev);
5012 
5013 		si_update_rlc(rdev, tmp);
5014 
5015 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5016 
5017 		data |= CGCG_EN | CGLS_EN;
5018 	} else {
5019 		si_enable_gui_idle_interrupt(rdev, false);
5020 
5021 		RREG32(CB_CGTT_SCLK_CTRL);
5022 		RREG32(CB_CGTT_SCLK_CTRL);
5023 		RREG32(CB_CGTT_SCLK_CTRL);
5024 		RREG32(CB_CGTT_SCLK_CTRL);
5025 
5026 		data &= ~(CGCG_EN | CGLS_EN);
5027 	}
5028 
5029 	if (orig != data)
5030 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5031 }
5032 
5033 static void si_enable_mgcg(struct radeon_device *rdev,
5034 			   bool enable)
5035 {
5036 	u32 data, orig, tmp = 0;
5037 
5038 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5039 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5040 		data = 0x96940200;
5041 		if (orig != data)
5042 			WREG32(CGTS_SM_CTRL_REG, data);
5043 
5044 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5045 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5046 			data |= CP_MEM_LS_EN;
5047 			if (orig != data)
5048 				WREG32(CP_MEM_SLP_CNTL, data);
5049 		}
5050 
5051 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5052 		data &= 0xffffffc0;
5053 		if (orig != data)
5054 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5055 
5056 		tmp = si_halt_rlc(rdev);
5057 
5058 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5059 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5060 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5061 
5062 		si_update_rlc(rdev, tmp);
5063 	} else {
5064 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5065 		data |= 0x00000003;
5066 		if (orig != data)
5067 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5068 
5069 		data = RREG32(CP_MEM_SLP_CNTL);
5070 		if (data & CP_MEM_LS_EN) {
5071 			data &= ~CP_MEM_LS_EN;
5072 			WREG32(CP_MEM_SLP_CNTL, data);
5073 		}
5074 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5075 		data |= LS_OVERRIDE | OVERRIDE;
5076 		if (orig != data)
5077 			WREG32(CGTS_SM_CTRL_REG, data);
5078 
5079 		tmp = si_halt_rlc(rdev);
5080 
5081 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5082 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5083 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5084 
5085 		si_update_rlc(rdev, tmp);
5086 	}
5087 }
5088 
5089 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5090 			       bool enable)
5091 {
5092 	u32 orig, data, tmp;
5093 
5094 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5095 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5096 		tmp |= 0x3fff;
5097 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5098 
5099 		orig = data = RREG32(UVD_CGC_CTRL);
5100 		data |= DCM;
5101 		if (orig != data)
5102 			WREG32(UVD_CGC_CTRL, data);
5103 
5104 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5105 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5106 	} else {
5107 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5108 		tmp &= ~0x3fff;
5109 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5110 
5111 		orig = data = RREG32(UVD_CGC_CTRL);
5112 		data &= ~DCM;
5113 		if (orig != data)
5114 			WREG32(UVD_CGC_CTRL, data);
5115 
5116 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5117 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5118 	}
5119 }
5120 
5121 static const u32 mc_cg_registers[] =
5122 {
5123 	MC_HUB_MISC_HUB_CG,
5124 	MC_HUB_MISC_SIP_CG,
5125 	MC_HUB_MISC_VM_CG,
5126 	MC_XPB_CLK_GAT,
5127 	ATC_MISC_CG,
5128 	MC_CITF_MISC_WR_CG,
5129 	MC_CITF_MISC_RD_CG,
5130 	MC_CITF_MISC_VM_CG,
5131 	VM_L2_CG,
5132 };
5133 
5134 static void si_enable_mc_ls(struct radeon_device *rdev,
5135 			    bool enable)
5136 {
5137 	int i;
5138 	u32 orig, data;
5139 
5140 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5141 		orig = data = RREG32(mc_cg_registers[i]);
5142 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5143 			data |= MC_LS_ENABLE;
5144 		else
5145 			data &= ~MC_LS_ENABLE;
5146 		if (data != orig)
5147 			WREG32(mc_cg_registers[i], data);
5148 	}
5149 }
5150 
5151 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5152 			       bool enable)
5153 {
5154 	int i;
5155 	u32 orig, data;
5156 
5157 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5158 		orig = data = RREG32(mc_cg_registers[i]);
5159 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5160 			data |= MC_CG_ENABLE;
5161 		else
5162 			data &= ~MC_CG_ENABLE;
5163 		if (data != orig)
5164 			WREG32(mc_cg_registers[i], data);
5165 	}
5166 }
5167 
5168 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5169 			       bool enable)
5170 {
5171 	u32 orig, data, offset;
5172 	int i;
5173 
5174 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5175 		for (i = 0; i < 2; i++) {
5176 			if (i == 0)
5177 				offset = DMA0_REGISTER_OFFSET;
5178 			else
5179 				offset = DMA1_REGISTER_OFFSET;
5180 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5181 			data &= ~MEM_POWER_OVERRIDE;
5182 			if (data != orig)
5183 				WREG32(DMA_POWER_CNTL + offset, data);
5184 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5185 		}
5186 	} else {
5187 		for (i = 0; i < 2; i++) {
5188 			if (i == 0)
5189 				offset = DMA0_REGISTER_OFFSET;
5190 			else
5191 				offset = DMA1_REGISTER_OFFSET;
5192 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5193 			data |= MEM_POWER_OVERRIDE;
5194 			if (data != orig)
5195 				WREG32(DMA_POWER_CNTL + offset, data);
5196 
5197 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5198 			data = 0xff000000;
5199 			if (data != orig)
5200 				WREG32(DMA_CLK_CTRL + offset, data);
5201 		}
5202 	}
5203 }
5204 
5205 static void si_enable_bif_mgls(struct radeon_device *rdev,
5206 			       bool enable)
5207 {
5208 	u32 orig, data;
5209 
5210 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5211 
5212 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5213 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5214 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5215 	else
5216 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5217 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5218 
5219 	if (orig != data)
5220 		WREG32_PCIE(PCIE_CNTL2, data);
5221 }
5222 
5223 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5224 			       bool enable)
5225 {
5226 	u32 orig, data;
5227 
5228 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5229 
5230 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5231 		data &= ~CLOCK_GATING_DIS;
5232 	else
5233 		data |= CLOCK_GATING_DIS;
5234 
5235 	if (orig != data)
5236 		WREG32(HDP_HOST_PATH_CNTL, data);
5237 }
5238 
5239 static void si_enable_hdp_ls(struct radeon_device *rdev,
5240 			     bool enable)
5241 {
5242 	u32 orig, data;
5243 
5244 	orig = data = RREG32(HDP_MEM_POWER_LS);
5245 
5246 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5247 		data |= HDP_LS_ENABLE;
5248 	else
5249 		data &= ~HDP_LS_ENABLE;
5250 
5251 	if (orig != data)
5252 		WREG32(HDP_MEM_POWER_LS, data);
5253 }
5254 
5255 void si_update_cg(struct radeon_device *rdev,
5256 		  u32 block, bool enable)
5257 {
5258 	if (block & RADEON_CG_BLOCK_GFX) {
5259 		si_enable_gui_idle_interrupt(rdev, false);
5260 		/* order matters! */
5261 		if (enable) {
5262 			si_enable_mgcg(rdev, true);
5263 			si_enable_cgcg(rdev, true);
5264 		} else {
5265 			si_enable_cgcg(rdev, false);
5266 			si_enable_mgcg(rdev, false);
5267 		}
5268 		si_enable_gui_idle_interrupt(rdev, true);
5269 	}
5270 
5271 	if (block & RADEON_CG_BLOCK_MC) {
5272 		si_enable_mc_mgcg(rdev, enable);
5273 		si_enable_mc_ls(rdev, enable);
5274 	}
5275 
5276 	if (block & RADEON_CG_BLOCK_SDMA) {
5277 		si_enable_dma_mgcg(rdev, enable);
5278 	}
5279 
5280 	if (block & RADEON_CG_BLOCK_BIF) {
5281 		si_enable_bif_mgls(rdev, enable);
5282 	}
5283 
5284 	if (block & RADEON_CG_BLOCK_UVD) {
5285 		if (rdev->has_uvd) {
5286 			si_enable_uvd_mgcg(rdev, enable);
5287 		}
5288 	}
5289 
5290 	if (block & RADEON_CG_BLOCK_HDP) {
5291 		si_enable_hdp_mgcg(rdev, enable);
5292 		si_enable_hdp_ls(rdev, enable);
5293 	}
5294 }
5295 
5296 static void si_init_cg(struct radeon_device *rdev)
5297 {
5298 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5299 			    RADEON_CG_BLOCK_MC |
5300 			    RADEON_CG_BLOCK_SDMA |
5301 			    RADEON_CG_BLOCK_BIF |
5302 			    RADEON_CG_BLOCK_HDP), true);
5303 	if (rdev->has_uvd) {
5304 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5305 		si_init_uvd_internal_cg(rdev);
5306 	}
5307 }
5308 
5309 static void si_fini_cg(struct radeon_device *rdev)
5310 {
5311 	if (rdev->has_uvd) {
5312 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5313 	}
5314 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5315 			    RADEON_CG_BLOCK_MC |
5316 			    RADEON_CG_BLOCK_SDMA |
5317 			    RADEON_CG_BLOCK_BIF |
5318 			    RADEON_CG_BLOCK_HDP), false);
5319 }
5320 
5321 u32 si_get_csb_size(struct radeon_device *rdev)
5322 {
5323 	u32 count = 0;
5324 	const struct cs_section_def *sect = NULL;
5325 	const struct cs_extent_def *ext = NULL;
5326 
5327 	if (rdev->rlc.cs_data == NULL)
5328 		return 0;
5329 
5330 	/* begin clear state */
5331 	count += 2;
5332 	/* context control state */
5333 	count += 3;
5334 
5335 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5336 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5337 			if (sect->id == SECT_CONTEXT)
5338 				count += 2 + ext->reg_count;
5339 			else
5340 				return 0;
5341 		}
5342 	}
5343 	/* pa_sc_raster_config */
5344 	count += 3;
5345 	/* end clear state */
5346 	count += 2;
5347 	/* clear state */
5348 	count += 2;
5349 
5350 	return count;
5351 }
5352 
5353 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5354 {
5355 	u32 count = 0, i;
5356 	const struct cs_section_def *sect = NULL;
5357 	const struct cs_extent_def *ext = NULL;
5358 
5359 	if (rdev->rlc.cs_data == NULL)
5360 		return;
5361 	if (buffer == NULL)
5362 		return;
5363 
5364 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5365 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5366 
5367 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5368 	buffer[count++] = 0x80000000;
5369 	buffer[count++] = 0x80000000;
5370 
5371 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5372 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5373 			if (sect->id == SECT_CONTEXT) {
5374 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5375 				buffer[count++] = ext->reg_index - 0xa000;
5376 				for (i = 0; i < ext->reg_count; i++)
5377 					buffer[count++] = ext->extent[i];
5378 			} else {
5379 				return;
5380 			}
5381 		}
5382 	}
5383 
5384 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5385 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5386 	switch (rdev->family) {
5387 	case CHIP_TAHITI:
5388 	case CHIP_PITCAIRN:
5389 		buffer[count++] = 0x2a00126a;
5390 		break;
5391 	case CHIP_VERDE:
5392 		buffer[count++] = 0x0000124a;
5393 		break;
5394 	case CHIP_OLAND:
5395 		buffer[count++] = 0x00000082;
5396 		break;
5397 	case CHIP_HAINAN:
5398 		buffer[count++] = 0x00000000;
5399 		break;
5400 	default:
5401 		buffer[count++] = 0x00000000;
5402 		break;
5403 	}
5404 
5405 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5406 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5407 
5408 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5409 	buffer[count++] = 0;
5410 }
5411 
5412 static void si_init_pg(struct radeon_device *rdev)
5413 {
5414 	if (rdev->pg_flags) {
5415 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5416 			si_init_dma_pg(rdev);
5417 		}
5418 		si_init_ao_cu_mask(rdev);
5419 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5420 			si_init_gfx_cgpg(rdev);
5421 		}
5422 		si_enable_dma_pg(rdev, true);
5423 		si_enable_gfx_cgpg(rdev, true);
5424 	} else {
5425 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5426 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5427 	}
5428 }
5429 
5430 static void si_fini_pg(struct radeon_device *rdev)
5431 {
5432 	if (rdev->pg_flags) {
5433 		si_enable_dma_pg(rdev, false);
5434 		si_enable_gfx_cgpg(rdev, false);
5435 	}
5436 }
5437 
5438 /*
5439  * RLC
5440  */
5441 void si_rlc_reset(struct radeon_device *rdev)
5442 {
5443 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5444 
5445 	tmp |= SOFT_RESET_RLC;
5446 	WREG32(GRBM_SOFT_RESET, tmp);
5447 	udelay(50);
5448 	tmp &= ~SOFT_RESET_RLC;
5449 	WREG32(GRBM_SOFT_RESET, tmp);
5450 	udelay(50);
5451 }
5452 
5453 static void si_rlc_stop(struct radeon_device *rdev)
5454 {
5455 	WREG32(RLC_CNTL, 0);
5456 
5457 	si_enable_gui_idle_interrupt(rdev, false);
5458 
5459 	si_wait_for_rlc_serdes(rdev);
5460 }
5461 
5462 static void si_rlc_start(struct radeon_device *rdev)
5463 {
5464 	WREG32(RLC_CNTL, RLC_ENABLE);
5465 
5466 	si_enable_gui_idle_interrupt(rdev, true);
5467 
5468 	udelay(50);
5469 }
5470 
5471 static bool si_lbpw_supported(struct radeon_device *rdev)
5472 {
5473 	u32 tmp;
5474 
5475 	/* Enable LBPW only for DDR3 */
5476 	tmp = RREG32(MC_SEQ_MISC0);
5477 	if ((tmp & 0xF0000000) == 0xB0000000)
5478 		return true;
5479 	return false;
5480 }
5481 
5482 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5483 {
5484 	u32 tmp;
5485 
5486 	tmp = RREG32(RLC_LB_CNTL);
5487 	if (enable)
5488 		tmp |= LOAD_BALANCE_ENABLE;
5489 	else
5490 		tmp &= ~LOAD_BALANCE_ENABLE;
5491 	WREG32(RLC_LB_CNTL, tmp);
5492 
5493 	if (!enable) {
5494 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5495 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5496 	}
5497 }
5498 
5499 static int si_rlc_resume(struct radeon_device *rdev)
5500 {
5501 	u32 i;
5502 	const __be32 *fw_data;
5503 
5504 	if (!rdev->rlc_fw)
5505 		return -EINVAL;
5506 
5507 	si_rlc_stop(rdev);
5508 
5509 	si_rlc_reset(rdev);
5510 
5511 	si_init_pg(rdev);
5512 
5513 	si_init_cg(rdev);
5514 
5515 	WREG32(RLC_RL_BASE, 0);
5516 	WREG32(RLC_RL_SIZE, 0);
5517 	WREG32(RLC_LB_CNTL, 0);
5518 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5519 	WREG32(RLC_LB_CNTR_INIT, 0);
5520 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5521 
5522 	WREG32(RLC_MC_CNTL, 0);
5523 	WREG32(RLC_UCODE_CNTL, 0);
5524 
5525 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5526 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5527 		WREG32(RLC_UCODE_ADDR, i);
5528 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5529 	}
5530 	WREG32(RLC_UCODE_ADDR, 0);
5531 
5532 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5533 
5534 	si_rlc_start(rdev);
5535 
5536 	return 0;
5537 }
5538 
5539 static void si_enable_interrupts(struct radeon_device *rdev)
5540 {
5541 	u32 ih_cntl = RREG32(IH_CNTL);
5542 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5543 
5544 	ih_cntl |= ENABLE_INTR;
5545 	ih_rb_cntl |= IH_RB_ENABLE;
5546 	WREG32(IH_CNTL, ih_cntl);
5547 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5548 	rdev->ih.enabled = true;
5549 }
5550 
5551 static void si_disable_interrupts(struct radeon_device *rdev)
5552 {
5553 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5554 	u32 ih_cntl = RREG32(IH_CNTL);
5555 
5556 	ih_rb_cntl &= ~IH_RB_ENABLE;
5557 	ih_cntl &= ~ENABLE_INTR;
5558 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5559 	WREG32(IH_CNTL, ih_cntl);
5560 	/* set rptr, wptr to 0 */
5561 	WREG32(IH_RB_RPTR, 0);
5562 	WREG32(IH_RB_WPTR, 0);
5563 	rdev->ih.enabled = false;
5564 	rdev->ih.rptr = 0;
5565 }
5566 
5567 static void si_disable_interrupt_state(struct radeon_device *rdev)
5568 {
5569 	u32 tmp;
5570 
5571 	tmp = RREG32(CP_INT_CNTL_RING0) &
5572 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5573 	WREG32(CP_INT_CNTL_RING0, tmp);
5574 	WREG32(CP_INT_CNTL_RING1, 0);
5575 	WREG32(CP_INT_CNTL_RING2, 0);
5576 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5577 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5578 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5579 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5580 	WREG32(GRBM_INT_CNTL, 0);
5581 	if (rdev->num_crtc >= 2) {
5582 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5583 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5584 	}
5585 	if (rdev->num_crtc >= 4) {
5586 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5587 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5588 	}
5589 	if (rdev->num_crtc >= 6) {
5590 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5591 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5592 	}
5593 
5594 	if (rdev->num_crtc >= 2) {
5595 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5596 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5597 	}
5598 	if (rdev->num_crtc >= 4) {
5599 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5600 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5601 	}
5602 	if (rdev->num_crtc >= 6) {
5603 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5604 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5605 	}
5606 
5607 	if (!ASIC_IS_NODCE(rdev)) {
5608 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5609 
5610 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5611 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5612 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5613 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5614 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5615 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5616 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5617 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5618 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5619 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5620 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5621 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5622 	}
5623 }
5624 
5625 static int si_irq_init(struct radeon_device *rdev)
5626 {
5627 	int ret = 0;
5628 	int rb_bufsz;
5629 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5630 
5631 	/* allocate ring */
5632 	ret = r600_ih_ring_alloc(rdev);
5633 	if (ret)
5634 		return ret;
5635 
5636 	/* disable irqs */
5637 	si_disable_interrupts(rdev);
5638 
5639 	/* init rlc */
5640 	ret = si_rlc_resume(rdev);
5641 	if (ret) {
5642 		r600_ih_ring_fini(rdev);
5643 		return ret;
5644 	}
5645 
5646 	/* setup interrupt control */
5647 	/* set dummy read address to ring address */
5648 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5649 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5650 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5651 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5652 	 */
5653 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5654 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5655 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5656 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5657 
5658 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5659 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5660 
5661 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5662 		      IH_WPTR_OVERFLOW_CLEAR |
5663 		      (rb_bufsz << 1));
5664 
5665 	if (rdev->wb.enabled)
5666 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5667 
5668 	/* set the writeback address whether it's enabled or not */
5669 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5670 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5671 
5672 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5673 
5674 	/* set rptr, wptr to 0 */
5675 	WREG32(IH_RB_RPTR, 0);
5676 	WREG32(IH_RB_WPTR, 0);
5677 
5678 	/* Default settings for IH_CNTL (disabled at first) */
5679 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5680 	/* RPTR_REARM only works if msi's are enabled */
5681 	if (rdev->msi_enabled)
5682 		ih_cntl |= RPTR_REARM;
5683 	WREG32(IH_CNTL, ih_cntl);
5684 
5685 	/* force the active interrupt state to all disabled */
5686 	si_disable_interrupt_state(rdev);
5687 
5688 	pci_set_master(rdev->pdev);
5689 
5690 	/* enable irqs */
5691 	si_enable_interrupts(rdev);
5692 
5693 	return ret;
5694 }
5695 
5696 int si_irq_set(struct radeon_device *rdev)
5697 {
5698 	u32 cp_int_cntl;
5699 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5700 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5701 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5702 	u32 grbm_int_cntl = 0;
5703 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5704 	u32 dma_cntl, dma_cntl1;
5705 	u32 thermal_int = 0;
5706 
5707 	if (!rdev->irq.installed) {
5708 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5709 		return -EINVAL;
5710 	}
5711 	/* don't enable anything if the ih is disabled */
5712 	if (!rdev->ih.enabled) {
5713 		si_disable_interrupts(rdev);
5714 		/* force the active interrupt state to all disabled */
5715 		si_disable_interrupt_state(rdev);
5716 		return 0;
5717 	}
5718 
5719 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5720 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5721 
5722 	if (!ASIC_IS_NODCE(rdev)) {
5723 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5724 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5725 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5726 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5727 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5728 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5729 	}
5730 
5731 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5732 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5733 
5734 	thermal_int = RREG32(CG_THERMAL_INT) &
5735 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5736 
5737 	/* enable CP interrupts on all rings */
5738 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5739 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5740 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5741 	}
5742 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5743 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5744 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5745 	}
5746 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5747 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5748 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5749 	}
5750 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5751 		DRM_DEBUG("si_irq_set: sw int dma\n");
5752 		dma_cntl |= TRAP_ENABLE;
5753 	}
5754 
5755 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5756 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5757 		dma_cntl1 |= TRAP_ENABLE;
5758 	}
5759 	if (rdev->irq.crtc_vblank_int[0] ||
5760 	    atomic_read(&rdev->irq.pflip[0])) {
5761 		DRM_DEBUG("si_irq_set: vblank 0\n");
5762 		crtc1 |= VBLANK_INT_MASK;
5763 	}
5764 	if (rdev->irq.crtc_vblank_int[1] ||
5765 	    atomic_read(&rdev->irq.pflip[1])) {
5766 		DRM_DEBUG("si_irq_set: vblank 1\n");
5767 		crtc2 |= VBLANK_INT_MASK;
5768 	}
5769 	if (rdev->irq.crtc_vblank_int[2] ||
5770 	    atomic_read(&rdev->irq.pflip[2])) {
5771 		DRM_DEBUG("si_irq_set: vblank 2\n");
5772 		crtc3 |= VBLANK_INT_MASK;
5773 	}
5774 	if (rdev->irq.crtc_vblank_int[3] ||
5775 	    atomic_read(&rdev->irq.pflip[3])) {
5776 		DRM_DEBUG("si_irq_set: vblank 3\n");
5777 		crtc4 |= VBLANK_INT_MASK;
5778 	}
5779 	if (rdev->irq.crtc_vblank_int[4] ||
5780 	    atomic_read(&rdev->irq.pflip[4])) {
5781 		DRM_DEBUG("si_irq_set: vblank 4\n");
5782 		crtc5 |= VBLANK_INT_MASK;
5783 	}
5784 	if (rdev->irq.crtc_vblank_int[5] ||
5785 	    atomic_read(&rdev->irq.pflip[5])) {
5786 		DRM_DEBUG("si_irq_set: vblank 5\n");
5787 		crtc6 |= VBLANK_INT_MASK;
5788 	}
5789 	if (rdev->irq.hpd[0]) {
5790 		DRM_DEBUG("si_irq_set: hpd 1\n");
5791 		hpd1 |= DC_HPDx_INT_EN;
5792 	}
5793 	if (rdev->irq.hpd[1]) {
5794 		DRM_DEBUG("si_irq_set: hpd 2\n");
5795 		hpd2 |= DC_HPDx_INT_EN;
5796 	}
5797 	if (rdev->irq.hpd[2]) {
5798 		DRM_DEBUG("si_irq_set: hpd 3\n");
5799 		hpd3 |= DC_HPDx_INT_EN;
5800 	}
5801 	if (rdev->irq.hpd[3]) {
5802 		DRM_DEBUG("si_irq_set: hpd 4\n");
5803 		hpd4 |= DC_HPDx_INT_EN;
5804 	}
5805 	if (rdev->irq.hpd[4]) {
5806 		DRM_DEBUG("si_irq_set: hpd 5\n");
5807 		hpd5 |= DC_HPDx_INT_EN;
5808 	}
5809 	if (rdev->irq.hpd[5]) {
5810 		DRM_DEBUG("si_irq_set: hpd 6\n");
5811 		hpd6 |= DC_HPDx_INT_EN;
5812 	}
5813 
5814 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5815 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5816 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5817 
5818 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5819 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5820 
5821 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5822 
5823 	if (rdev->irq.dpm_thermal) {
5824 		DRM_DEBUG("dpm thermal\n");
5825 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5826 	}
5827 
5828 	if (rdev->num_crtc >= 2) {
5829 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5830 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5831 	}
5832 	if (rdev->num_crtc >= 4) {
5833 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5834 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5835 	}
5836 	if (rdev->num_crtc >= 6) {
5837 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5838 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5839 	}
5840 
5841 	if (rdev->num_crtc >= 2) {
5842 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5843 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5844 	}
5845 	if (rdev->num_crtc >= 4) {
5846 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5847 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5848 	}
5849 	if (rdev->num_crtc >= 6) {
5850 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5851 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5852 	}
5853 
5854 	if (!ASIC_IS_NODCE(rdev)) {
5855 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5856 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5857 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5858 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5859 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5860 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5861 	}
5862 
5863 	WREG32(CG_THERMAL_INT, thermal_int);
5864 
5865 	return 0;
5866 }
5867 
5868 static inline void si_irq_ack(struct radeon_device *rdev)
5869 {
5870 	u32 tmp;
5871 
5872 	if (ASIC_IS_NODCE(rdev))
5873 		return;
5874 
5875 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5876 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5877 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5878 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5879 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5880 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5881 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5882 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5883 	if (rdev->num_crtc >= 4) {
5884 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5885 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5886 	}
5887 	if (rdev->num_crtc >= 6) {
5888 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5889 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5890 	}
5891 
5892 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5893 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5894 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5895 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5896 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5897 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5898 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5899 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5900 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5901 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5902 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5903 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5904 
5905 	if (rdev->num_crtc >= 4) {
5906 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5907 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5908 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5909 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5910 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5911 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5912 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5913 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5914 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5915 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5916 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5917 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5918 	}
5919 
5920 	if (rdev->num_crtc >= 6) {
5921 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5922 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5923 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5924 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5925 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5926 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5927 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5928 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5929 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5930 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5931 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5932 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5933 	}
5934 
5935 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5936 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5937 		tmp |= DC_HPDx_INT_ACK;
5938 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5939 	}
5940 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5941 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5942 		tmp |= DC_HPDx_INT_ACK;
5943 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5944 	}
5945 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5946 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5947 		tmp |= DC_HPDx_INT_ACK;
5948 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5949 	}
5950 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5951 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5952 		tmp |= DC_HPDx_INT_ACK;
5953 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5954 	}
5955 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5956 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5957 		tmp |= DC_HPDx_INT_ACK;
5958 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5959 	}
5960 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5961 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5962 		tmp |= DC_HPDx_INT_ACK;
5963 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5964 	}
5965 }
5966 
5967 static void si_irq_disable(struct radeon_device *rdev)
5968 {
5969 	si_disable_interrupts(rdev);
5970 	/* Wait and acknowledge irq */
5971 	mdelay(1);
5972 	si_irq_ack(rdev);
5973 	si_disable_interrupt_state(rdev);
5974 }
5975 
5976 static void si_irq_suspend(struct radeon_device *rdev)
5977 {
5978 	si_irq_disable(rdev);
5979 	si_rlc_stop(rdev);
5980 }
5981 
5982 static void si_irq_fini(struct radeon_device *rdev)
5983 {
5984 	si_irq_suspend(rdev);
5985 	r600_ih_ring_fini(rdev);
5986 }
5987 
5988 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5989 {
5990 	u32 wptr, tmp;
5991 
5992 	if (rdev->wb.enabled)
5993 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5994 	else
5995 		wptr = RREG32(IH_RB_WPTR);
5996 
5997 	if (wptr & RB_OVERFLOW) {
5998 		/* When a ring buffer overflow happen start parsing interrupt
5999 		 * from the last not overwritten vector (wptr + 16). Hopefully
6000 		 * this should allow us to catchup.
6001 		 */
6002 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6003 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6004 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6005 		tmp = RREG32(IH_RB_CNTL);
6006 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6007 		WREG32(IH_RB_CNTL, tmp);
6008 	}
6009 	return (wptr & rdev->ih.ptr_mask);
6010 }
6011 
6012 /*        SI IV Ring
6013  * Each IV ring entry is 128 bits:
6014  * [7:0]    - interrupt source id
6015  * [31:8]   - reserved
6016  * [59:32]  - interrupt source data
6017  * [63:60]  - reserved
6018  * [71:64]  - RINGID
6019  * [79:72]  - VMID
6020  * [127:80] - reserved
6021  */
6022 int si_irq_process(struct radeon_device *rdev)
6023 {
6024 	u32 wptr;
6025 	u32 rptr;
6026 	u32 src_id, src_data, ring_id;
6027 	u32 ring_index;
6028 	bool queue_hotplug = false;
6029 	bool queue_thermal = false;
6030 	u32 status, addr;
6031 
6032 	if (!rdev->ih.enabled || rdev->shutdown)
6033 		return IRQ_NONE;
6034 
6035 	wptr = si_get_ih_wptr(rdev);
6036 
6037 restart_ih:
6038 	/* is somebody else already processing irqs? */
6039 	if (atomic_xchg(&rdev->ih.lock, 1))
6040 		return IRQ_NONE;
6041 
6042 	rptr = rdev->ih.rptr;
6043 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6044 
6045 	/* Order reading of wptr vs. reading of IH ring data */
6046 	rmb();
6047 
6048 	/* display interrupts */
6049 	si_irq_ack(rdev);
6050 
6051 	while (rptr != wptr) {
6052 		/* wptr/rptr are in bytes! */
6053 		ring_index = rptr / 4;
6054 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6055 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6056 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6057 
6058 		switch (src_id) {
6059 		case 1: /* D1 vblank/vline */
6060 			switch (src_data) {
6061 			case 0: /* D1 vblank */
6062 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6063 					if (rdev->irq.crtc_vblank_int[0]) {
6064 						drm_handle_vblank(rdev->ddev, 0);
6065 						rdev->pm.vblank_sync = true;
6066 						wake_up(&rdev->irq.vblank_queue);
6067 					}
6068 					if (atomic_read(&rdev->irq.pflip[0]))
6069 						radeon_crtc_handle_flip(rdev, 0);
6070 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6071 					DRM_DEBUG("IH: D1 vblank\n");
6072 				}
6073 				break;
6074 			case 1: /* D1 vline */
6075 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6076 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6077 					DRM_DEBUG("IH: D1 vline\n");
6078 				}
6079 				break;
6080 			default:
6081 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6082 				break;
6083 			}
6084 			break;
6085 		case 2: /* D2 vblank/vline */
6086 			switch (src_data) {
6087 			case 0: /* D2 vblank */
6088 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6089 					if (rdev->irq.crtc_vblank_int[1]) {
6090 						drm_handle_vblank(rdev->ddev, 1);
6091 						rdev->pm.vblank_sync = true;
6092 						wake_up(&rdev->irq.vblank_queue);
6093 					}
6094 					if (atomic_read(&rdev->irq.pflip[1]))
6095 						radeon_crtc_handle_flip(rdev, 1);
6096 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6097 					DRM_DEBUG("IH: D2 vblank\n");
6098 				}
6099 				break;
6100 			case 1: /* D2 vline */
6101 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6102 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6103 					DRM_DEBUG("IH: D2 vline\n");
6104 				}
6105 				break;
6106 			default:
6107 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6108 				break;
6109 			}
6110 			break;
6111 		case 3: /* D3 vblank/vline */
6112 			switch (src_data) {
6113 			case 0: /* D3 vblank */
6114 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6115 					if (rdev->irq.crtc_vblank_int[2]) {
6116 						drm_handle_vblank(rdev->ddev, 2);
6117 						rdev->pm.vblank_sync = true;
6118 						wake_up(&rdev->irq.vblank_queue);
6119 					}
6120 					if (atomic_read(&rdev->irq.pflip[2]))
6121 						radeon_crtc_handle_flip(rdev, 2);
6122 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6123 					DRM_DEBUG("IH: D3 vblank\n");
6124 				}
6125 				break;
6126 			case 1: /* D3 vline */
6127 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6128 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6129 					DRM_DEBUG("IH: D3 vline\n");
6130 				}
6131 				break;
6132 			default:
6133 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6134 				break;
6135 			}
6136 			break;
6137 		case 4: /* D4 vblank/vline */
6138 			switch (src_data) {
6139 			case 0: /* D4 vblank */
6140 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6141 					if (rdev->irq.crtc_vblank_int[3]) {
6142 						drm_handle_vblank(rdev->ddev, 3);
6143 						rdev->pm.vblank_sync = true;
6144 						wake_up(&rdev->irq.vblank_queue);
6145 					}
6146 					if (atomic_read(&rdev->irq.pflip[3]))
6147 						radeon_crtc_handle_flip(rdev, 3);
6148 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6149 					DRM_DEBUG("IH: D4 vblank\n");
6150 				}
6151 				break;
6152 			case 1: /* D4 vline */
6153 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6154 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6155 					DRM_DEBUG("IH: D4 vline\n");
6156 				}
6157 				break;
6158 			default:
6159 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6160 				break;
6161 			}
6162 			break;
6163 		case 5: /* D5 vblank/vline */
6164 			switch (src_data) {
6165 			case 0: /* D5 vblank */
6166 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6167 					if (rdev->irq.crtc_vblank_int[4]) {
6168 						drm_handle_vblank(rdev->ddev, 4);
6169 						rdev->pm.vblank_sync = true;
6170 						wake_up(&rdev->irq.vblank_queue);
6171 					}
6172 					if (atomic_read(&rdev->irq.pflip[4]))
6173 						radeon_crtc_handle_flip(rdev, 4);
6174 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6175 					DRM_DEBUG("IH: D5 vblank\n");
6176 				}
6177 				break;
6178 			case 1: /* D5 vline */
6179 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6180 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6181 					DRM_DEBUG("IH: D5 vline\n");
6182 				}
6183 				break;
6184 			default:
6185 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6186 				break;
6187 			}
6188 			break;
6189 		case 6: /* D6 vblank/vline */
6190 			switch (src_data) {
6191 			case 0: /* D6 vblank */
6192 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6193 					if (rdev->irq.crtc_vblank_int[5]) {
6194 						drm_handle_vblank(rdev->ddev, 5);
6195 						rdev->pm.vblank_sync = true;
6196 						wake_up(&rdev->irq.vblank_queue);
6197 					}
6198 					if (atomic_read(&rdev->irq.pflip[5]))
6199 						radeon_crtc_handle_flip(rdev, 5);
6200 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6201 					DRM_DEBUG("IH: D6 vblank\n");
6202 				}
6203 				break;
6204 			case 1: /* D6 vline */
6205 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6207 					DRM_DEBUG("IH: D6 vline\n");
6208 				}
6209 				break;
6210 			default:
6211 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6212 				break;
6213 			}
6214 			break;
6215 		case 42: /* HPD hotplug */
6216 			switch (src_data) {
6217 			case 0:
6218 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6219 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6220 					queue_hotplug = true;
6221 					DRM_DEBUG("IH: HPD1\n");
6222 				}
6223 				break;
6224 			case 1:
6225 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6226 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6227 					queue_hotplug = true;
6228 					DRM_DEBUG("IH: HPD2\n");
6229 				}
6230 				break;
6231 			case 2:
6232 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6233 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6234 					queue_hotplug = true;
6235 					DRM_DEBUG("IH: HPD3\n");
6236 				}
6237 				break;
6238 			case 3:
6239 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6240 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6241 					queue_hotplug = true;
6242 					DRM_DEBUG("IH: HPD4\n");
6243 				}
6244 				break;
6245 			case 4:
6246 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6247 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6248 					queue_hotplug = true;
6249 					DRM_DEBUG("IH: HPD5\n");
6250 				}
6251 				break;
6252 			case 5:
6253 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6254 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6255 					queue_hotplug = true;
6256 					DRM_DEBUG("IH: HPD6\n");
6257 				}
6258 				break;
6259 			default:
6260 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6261 				break;
6262 			}
6263 			break;
6264 		case 146:
6265 		case 147:
6266 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6267 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6268 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6269 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6270 				addr);
6271 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6272 				status);
6273 			si_vm_decode_fault(rdev, status, addr);
6274 			/* reset addr and status */
6275 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6276 			break;
6277 		case 176: /* RINGID0 CP_INT */
6278 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6279 			break;
6280 		case 177: /* RINGID1 CP_INT */
6281 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6282 			break;
6283 		case 178: /* RINGID2 CP_INT */
6284 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6285 			break;
6286 		case 181: /* CP EOP event */
6287 			DRM_DEBUG("IH: CP EOP\n");
6288 			switch (ring_id) {
6289 			case 0:
6290 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6291 				break;
6292 			case 1:
6293 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6294 				break;
6295 			case 2:
6296 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6297 				break;
6298 			}
6299 			break;
6300 		case 224: /* DMA trap event */
6301 			DRM_DEBUG("IH: DMA trap\n");
6302 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6303 			break;
6304 		case 230: /* thermal low to high */
6305 			DRM_DEBUG("IH: thermal low to high\n");
6306 			rdev->pm.dpm.thermal.high_to_low = false;
6307 			queue_thermal = true;
6308 			break;
6309 		case 231: /* thermal high to low */
6310 			DRM_DEBUG("IH: thermal high to low\n");
6311 			rdev->pm.dpm.thermal.high_to_low = true;
6312 			queue_thermal = true;
6313 			break;
6314 		case 233: /* GUI IDLE */
6315 			DRM_DEBUG("IH: GUI idle\n");
6316 			break;
6317 		case 244: /* DMA trap event */
6318 			DRM_DEBUG("IH: DMA1 trap\n");
6319 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6320 			break;
6321 		default:
6322 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6323 			break;
6324 		}
6325 
6326 		/* wptr/rptr are in bytes! */
6327 		rptr += 16;
6328 		rptr &= rdev->ih.ptr_mask;
6329 	}
6330 	if (queue_hotplug)
6331 		schedule_work(&rdev->hotplug_work);
6332 	if (queue_thermal && rdev->pm.dpm_enabled)
6333 		schedule_work(&rdev->pm.dpm.thermal.work);
6334 	rdev->ih.rptr = rptr;
6335 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6336 	atomic_set(&rdev->ih.lock, 0);
6337 
6338 	/* make sure wptr hasn't changed while processing */
6339 	wptr = si_get_ih_wptr(rdev);
6340 	if (wptr != rptr)
6341 		goto restart_ih;
6342 
6343 	return IRQ_HANDLED;
6344 }
6345 
6346 /*
6347  * startup/shutdown callbacks
6348  */
6349 static int si_startup(struct radeon_device *rdev)
6350 {
6351 	struct radeon_ring *ring;
6352 	int r;
6353 
6354 	/* enable pcie gen2/3 link */
6355 	si_pcie_gen3_enable(rdev);
6356 	/* enable aspm */
6357 	si_program_aspm(rdev);
6358 
6359 	/* scratch needs to be initialized before MC */
6360 	r = r600_vram_scratch_init(rdev);
6361 	if (r)
6362 		return r;
6363 
6364 	si_mc_program(rdev);
6365 
6366 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6367 	    !rdev->rlc_fw || !rdev->mc_fw) {
6368 		r = si_init_microcode(rdev);
6369 		if (r) {
6370 			DRM_ERROR("Failed to load firmware!\n");
6371 			return r;
6372 		}
6373 	}
6374 
6375 	r = si_mc_load_microcode(rdev);
6376 	if (r) {
6377 		DRM_ERROR("Failed to load MC firmware!\n");
6378 		return r;
6379 	}
6380 
6381 	r = si_pcie_gart_enable(rdev);
6382 	if (r)
6383 		return r;
6384 	si_gpu_init(rdev);
6385 
6386 	/* allocate rlc buffers */
6387 	if (rdev->family == CHIP_VERDE) {
6388 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6389 		rdev->rlc.reg_list_size =
6390 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6391 	}
6392 	rdev->rlc.cs_data = si_cs_data;
6393 	r = sumo_rlc_init(rdev);
6394 	if (r) {
6395 		DRM_ERROR("Failed to init rlc BOs!\n");
6396 		return r;
6397 	}
6398 
6399 	/* allocate wb buffer */
6400 	r = radeon_wb_init(rdev);
6401 	if (r)
6402 		return r;
6403 
6404 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6405 	if (r) {
6406 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6407 		return r;
6408 	}
6409 
6410 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6411 	if (r) {
6412 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6413 		return r;
6414 	}
6415 
6416 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6417 	if (r) {
6418 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6419 		return r;
6420 	}
6421 
6422 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6423 	if (r) {
6424 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6425 		return r;
6426 	}
6427 
6428 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6429 	if (r) {
6430 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6431 		return r;
6432 	}
6433 
6434 	if (rdev->has_uvd) {
6435 		r = uvd_v2_2_resume(rdev);
6436 		if (!r) {
6437 			r = radeon_fence_driver_start_ring(rdev,
6438 							   R600_RING_TYPE_UVD_INDEX);
6439 			if (r)
6440 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6441 		}
6442 		if (r)
6443 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6444 	}
6445 
6446 	/* Enable IRQ */
6447 	if (!rdev->irq.installed) {
6448 		r = radeon_irq_kms_init(rdev);
6449 		if (r)
6450 			return r;
6451 	}
6452 
6453 	r = si_irq_init(rdev);
6454 	if (r) {
6455 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6456 		radeon_irq_kms_fini(rdev);
6457 		return r;
6458 	}
6459 	si_irq_set(rdev);
6460 
6461 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6462 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6463 			     CP_RB0_RPTR, CP_RB0_WPTR,
6464 			     RADEON_CP_PACKET2);
6465 	if (r)
6466 		return r;
6467 
6468 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6469 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6470 			     CP_RB1_RPTR, CP_RB1_WPTR,
6471 			     RADEON_CP_PACKET2);
6472 	if (r)
6473 		return r;
6474 
6475 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6476 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6477 			     CP_RB2_RPTR, CP_RB2_WPTR,
6478 			     RADEON_CP_PACKET2);
6479 	if (r)
6480 		return r;
6481 
6482 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6483 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6484 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6485 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6486 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6487 	if (r)
6488 		return r;
6489 
6490 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6491 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6492 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6493 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6494 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6495 	if (r)
6496 		return r;
6497 
6498 	r = si_cp_load_microcode(rdev);
6499 	if (r)
6500 		return r;
6501 	r = si_cp_resume(rdev);
6502 	if (r)
6503 		return r;
6504 
6505 	r = cayman_dma_resume(rdev);
6506 	if (r)
6507 		return r;
6508 
6509 	if (rdev->has_uvd) {
6510 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6511 		if (ring->ring_size) {
6512 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6513 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6514 					     RADEON_CP_PACKET2);
6515 			if (!r)
6516 				r = uvd_v1_0_init(rdev);
6517 			if (r)
6518 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6519 		}
6520 	}
6521 
6522 	r = radeon_ib_pool_init(rdev);
6523 	if (r) {
6524 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6525 		return r;
6526 	}
6527 
6528 	r = radeon_vm_manager_init(rdev);
6529 	if (r) {
6530 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6531 		return r;
6532 	}
6533 
6534 	r = dce6_audio_init(rdev);
6535 	if (r)
6536 		return r;
6537 
6538 	return 0;
6539 }
6540 
6541 int si_resume(struct radeon_device *rdev)
6542 {
6543 	int r;
6544 
6545 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6546 	 * posting will perform necessary task to bring back GPU into good
6547 	 * shape.
6548 	 */
6549 	/* post card */
6550 	atom_asic_init(rdev->mode_info.atom_context);
6551 
6552 	/* init golden registers */
6553 	si_init_golden_registers(rdev);
6554 
6555 	rdev->accel_working = true;
6556 	r = si_startup(rdev);
6557 	if (r) {
6558 		DRM_ERROR("si startup failed on resume\n");
6559 		rdev->accel_working = false;
6560 		return r;
6561 	}
6562 
6563 	return r;
6564 
6565 }
6566 
6567 int si_suspend(struct radeon_device *rdev)
6568 {
6569 	dce6_audio_fini(rdev);
6570 	radeon_vm_manager_fini(rdev);
6571 	si_cp_enable(rdev, false);
6572 	cayman_dma_stop(rdev);
6573 	if (rdev->has_uvd) {
6574 		uvd_v1_0_fini(rdev);
6575 		radeon_uvd_suspend(rdev);
6576 	}
6577 	si_fini_pg(rdev);
6578 	si_fini_cg(rdev);
6579 	si_irq_suspend(rdev);
6580 	radeon_wb_disable(rdev);
6581 	si_pcie_gart_disable(rdev);
6582 	return 0;
6583 }
6584 
6585 /* Plan is to move initialization in that function and use
6586  * helper function so that radeon_device_init pretty much
6587  * do nothing more than calling asic specific function. This
6588  * should also allow to remove a bunch of callback function
6589  * like vram_info.
6590  */
6591 int si_init(struct radeon_device *rdev)
6592 {
6593 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6594 	int r;
6595 
6596 	/* Read BIOS */
6597 	if (!radeon_get_bios(rdev)) {
6598 		if (ASIC_IS_AVIVO(rdev))
6599 			return -EINVAL;
6600 	}
6601 	/* Must be an ATOMBIOS */
6602 	if (!rdev->is_atom_bios) {
6603 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6604 		return -EINVAL;
6605 	}
6606 	r = radeon_atombios_init(rdev);
6607 	if (r)
6608 		return r;
6609 
6610 	/* Post card if necessary */
6611 	if (!radeon_card_posted(rdev)) {
6612 		if (!rdev->bios) {
6613 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6614 			return -EINVAL;
6615 		}
6616 		DRM_INFO("GPU not posted. posting now...\n");
6617 		atom_asic_init(rdev->mode_info.atom_context);
6618 	}
6619 	/* init golden registers */
6620 	si_init_golden_registers(rdev);
6621 	/* Initialize scratch registers */
6622 	si_scratch_init(rdev);
6623 	/* Initialize surface registers */
6624 	radeon_surface_init(rdev);
6625 	/* Initialize clocks */
6626 	radeon_get_clock_info(rdev->ddev);
6627 
6628 	/* Fence driver */
6629 	r = radeon_fence_driver_init(rdev);
6630 	if (r)
6631 		return r;
6632 
6633 	/* initialize memory controller */
6634 	r = si_mc_init(rdev);
6635 	if (r)
6636 		return r;
6637 	/* Memory manager */
6638 	r = radeon_bo_init(rdev);
6639 	if (r)
6640 		return r;
6641 
6642 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6643 	ring->ring_obj = NULL;
6644 	r600_ring_init(rdev, ring, 1024 * 1024);
6645 
6646 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6647 	ring->ring_obj = NULL;
6648 	r600_ring_init(rdev, ring, 1024 * 1024);
6649 
6650 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6651 	ring->ring_obj = NULL;
6652 	r600_ring_init(rdev, ring, 1024 * 1024);
6653 
6654 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6655 	ring->ring_obj = NULL;
6656 	r600_ring_init(rdev, ring, 64 * 1024);
6657 
6658 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6659 	ring->ring_obj = NULL;
6660 	r600_ring_init(rdev, ring, 64 * 1024);
6661 
6662 	if (rdev->has_uvd) {
6663 		r = radeon_uvd_init(rdev);
6664 		if (!r) {
6665 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6666 			ring->ring_obj = NULL;
6667 			r600_ring_init(rdev, ring, 4096);
6668 		}
6669 	}
6670 
6671 	rdev->ih.ring_obj = NULL;
6672 	r600_ih_ring_init(rdev, 64 * 1024);
6673 
6674 	r = r600_pcie_gart_init(rdev);
6675 	if (r)
6676 		return r;
6677 
6678 	rdev->accel_working = true;
6679 	r = si_startup(rdev);
6680 	if (r) {
6681 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6682 		si_cp_fini(rdev);
6683 		cayman_dma_fini(rdev);
6684 		si_irq_fini(rdev);
6685 		sumo_rlc_fini(rdev);
6686 		radeon_wb_fini(rdev);
6687 		radeon_ib_pool_fini(rdev);
6688 		radeon_vm_manager_fini(rdev);
6689 		radeon_irq_kms_fini(rdev);
6690 		si_pcie_gart_fini(rdev);
6691 		rdev->accel_working = false;
6692 	}
6693 
6694 	/* Don't start up if the MC ucode is missing.
6695 	 * The default clocks and voltages before the MC ucode
6696 	 * is loaded are not suffient for advanced operations.
6697 	 */
6698 	if (!rdev->mc_fw) {
6699 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6700 		return -EINVAL;
6701 	}
6702 
6703 	return 0;
6704 }
6705 
6706 void si_fini(struct radeon_device *rdev)
6707 {
6708 	si_cp_fini(rdev);
6709 	cayman_dma_fini(rdev);
6710 	si_fini_pg(rdev);
6711 	si_fini_cg(rdev);
6712 	si_irq_fini(rdev);
6713 	sumo_rlc_fini(rdev);
6714 	radeon_wb_fini(rdev);
6715 	radeon_vm_manager_fini(rdev);
6716 	radeon_ib_pool_fini(rdev);
6717 	radeon_irq_kms_fini(rdev);
6718 	if (rdev->has_uvd) {
6719 		uvd_v1_0_fini(rdev);
6720 		radeon_uvd_fini(rdev);
6721 	}
6722 	si_pcie_gart_fini(rdev);
6723 	r600_vram_scratch_fini(rdev);
6724 	radeon_gem_fini(rdev);
6725 	radeon_fence_driver_fini(rdev);
6726 	radeon_bo_fini(rdev);
6727 	radeon_atombios_fini(rdev);
6728 	kfree(rdev->bios);
6729 	rdev->bios = NULL;
6730 }
6731 
6732 /**
6733  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6734  *
6735  * @rdev: radeon_device pointer
6736  *
6737  * Fetches a GPU clock counter snapshot (SI).
6738  * Returns the 64 bit clock counter snapshot.
6739  */
6740 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6741 {
6742 	uint64_t clock;
6743 
6744 	mutex_lock(&rdev->gpu_clock_mutex);
6745 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6746 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6747 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6748 	mutex_unlock(&rdev->gpu_clock_mutex);
6749 	return clock;
6750 }
6751 
6752 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6753 {
6754 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6755 	int r;
6756 
6757 	/* bypass vclk and dclk with bclk */
6758 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6759 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6760 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6761 
6762 	/* put PLL in bypass mode */
6763 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6764 
6765 	if (!vclk || !dclk) {
6766 		/* keep the Bypass mode, put PLL to sleep */
6767 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6768 		return 0;
6769 	}
6770 
6771 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6772 					  16384, 0x03FFFFFF, 0, 128, 5,
6773 					  &fb_div, &vclk_div, &dclk_div);
6774 	if (r)
6775 		return r;
6776 
6777 	/* set RESET_ANTI_MUX to 0 */
6778 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6779 
6780 	/* set VCO_MODE to 1 */
6781 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6782 
6783 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6784 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6785 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6786 
6787 	/* deassert UPLL_RESET */
6788 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6789 
6790 	mdelay(1);
6791 
6792 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6793 	if (r)
6794 		return r;
6795 
6796 	/* assert UPLL_RESET again */
6797 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6798 
6799 	/* disable spread spectrum. */
6800 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6801 
6802 	/* set feedback divider */
6803 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6804 
6805 	/* set ref divider to 0 */
6806 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6807 
6808 	if (fb_div < 307200)
6809 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6810 	else
6811 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6812 
6813 	/* set PDIV_A and PDIV_B */
6814 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6815 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6816 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6817 
6818 	/* give the PLL some time to settle */
6819 	mdelay(15);
6820 
6821 	/* deassert PLL_RESET */
6822 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6823 
6824 	mdelay(15);
6825 
6826 	/* switch from bypass mode to normal mode */
6827 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6828 
6829 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6830 	if (r)
6831 		return r;
6832 
6833 	/* switch VCLK and DCLK selection */
6834 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6835 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6836 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6837 
6838 	mdelay(100);
6839 
6840 	return 0;
6841 }
6842 
6843 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6844 {
6845 	struct pci_dev *root = rdev->pdev->bus->self;
6846 	int bridge_pos, gpu_pos;
6847 	u32 speed_cntl, mask, current_data_rate;
6848 	int ret, i;
6849 	u16 tmp16;
6850 
6851 	if (radeon_pcie_gen2 == 0)
6852 		return;
6853 
6854 	if (rdev->flags & RADEON_IS_IGP)
6855 		return;
6856 
6857 	if (!(rdev->flags & RADEON_IS_PCIE))
6858 		return;
6859 
6860 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6861 	if (ret != 0)
6862 		return;
6863 
6864 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6865 		return;
6866 
6867 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6868 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6869 		LC_CURRENT_DATA_RATE_SHIFT;
6870 	if (mask & DRM_PCIE_SPEED_80) {
6871 		if (current_data_rate == 2) {
6872 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6873 			return;
6874 		}
6875 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6876 	} else if (mask & DRM_PCIE_SPEED_50) {
6877 		if (current_data_rate == 1) {
6878 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6879 			return;
6880 		}
6881 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6882 	}
6883 
6884 	bridge_pos = pci_pcie_cap(root);
6885 	if (!bridge_pos)
6886 		return;
6887 
6888 	gpu_pos = pci_pcie_cap(rdev->pdev);
6889 	if (!gpu_pos)
6890 		return;
6891 
6892 	if (mask & DRM_PCIE_SPEED_80) {
6893 		/* re-try equalization if gen3 is not already enabled */
6894 		if (current_data_rate != 2) {
6895 			u16 bridge_cfg, gpu_cfg;
6896 			u16 bridge_cfg2, gpu_cfg2;
6897 			u32 max_lw, current_lw, tmp;
6898 
6899 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6900 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6901 
6902 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6903 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6904 
6905 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6906 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6907 
6908 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6909 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6910 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6911 
6912 			if (current_lw < max_lw) {
6913 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6914 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6915 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6916 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6917 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6918 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6919 				}
6920 			}
6921 
6922 			for (i = 0; i < 10; i++) {
6923 				/* check status */
6924 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6925 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6926 					break;
6927 
6928 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6929 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6930 
6931 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6932 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6933 
6934 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6935 				tmp |= LC_SET_QUIESCE;
6936 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6937 
6938 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6939 				tmp |= LC_REDO_EQ;
6940 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6941 
6942 				mdelay(100);
6943 
6944 				/* linkctl */
6945 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6946 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6947 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6948 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6949 
6950 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6951 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6952 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6953 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6954 
6955 				/* linkctl2 */
6956 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6957 				tmp16 &= ~((1 << 4) | (7 << 9));
6958 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6959 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6960 
6961 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6962 				tmp16 &= ~((1 << 4) | (7 << 9));
6963 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6964 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6965 
6966 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6967 				tmp &= ~LC_SET_QUIESCE;
6968 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6969 			}
6970 		}
6971 	}
6972 
6973 	/* set the link speed */
6974 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6975 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6976 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6977 
6978 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6979 	tmp16 &= ~0xf;
6980 	if (mask & DRM_PCIE_SPEED_80)
6981 		tmp16 |= 3; /* gen3 */
6982 	else if (mask & DRM_PCIE_SPEED_50)
6983 		tmp16 |= 2; /* gen2 */
6984 	else
6985 		tmp16 |= 1; /* gen1 */
6986 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6987 
6988 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6989 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6990 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6991 
6992 	for (i = 0; i < rdev->usec_timeout; i++) {
6993 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6994 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6995 			break;
6996 		udelay(1);
6997 	}
6998 }
6999 
7000 static void si_program_aspm(struct radeon_device *rdev)
7001 {
7002 	u32 data, orig;
7003 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7004 	bool disable_clkreq = false;
7005 
7006 	if (radeon_aspm == 0)
7007 		return;
7008 
7009 	if (!(rdev->flags & RADEON_IS_PCIE))
7010 		return;
7011 
7012 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7013 	data &= ~LC_XMIT_N_FTS_MASK;
7014 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7015 	if (orig != data)
7016 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7017 
7018 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7019 	data |= LC_GO_TO_RECOVERY;
7020 	if (orig != data)
7021 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7022 
7023 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7024 	data |= P_IGNORE_EDB_ERR;
7025 	if (orig != data)
7026 		WREG32_PCIE(PCIE_P_CNTL, data);
7027 
7028 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7029 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7030 	data |= LC_PMI_TO_L1_DIS;
7031 	if (!disable_l0s)
7032 		data |= LC_L0S_INACTIVITY(7);
7033 
7034 	if (!disable_l1) {
7035 		data |= LC_L1_INACTIVITY(7);
7036 		data &= ~LC_PMI_TO_L1_DIS;
7037 		if (orig != data)
7038 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7039 
7040 		if (!disable_plloff_in_l1) {
7041 			bool clk_req_support;
7042 
7043 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7044 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7045 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7046 			if (orig != data)
7047 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7048 
7049 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7050 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7051 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7052 			if (orig != data)
7053 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7054 
7055 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7056 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7057 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7058 			if (orig != data)
7059 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7060 
7061 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7062 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7063 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7064 			if (orig != data)
7065 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7066 
7067 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7068 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7069 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7070 				if (orig != data)
7071 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7072 
7073 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7074 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7075 				if (orig != data)
7076 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7077 
7078 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7079 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7080 				if (orig != data)
7081 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7082 
7083 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7084 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7085 				if (orig != data)
7086 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7087 
7088 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7089 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7090 				if (orig != data)
7091 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7092 
7093 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7094 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7095 				if (orig != data)
7096 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7097 
7098 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7099 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7100 				if (orig != data)
7101 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7102 
7103 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7104 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7105 				if (orig != data)
7106 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7107 			}
7108 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7109 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7110 			data |= LC_DYN_LANES_PWR_STATE(3);
7111 			if (orig != data)
7112 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7113 
7114 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7115 			data &= ~LS2_EXIT_TIME_MASK;
7116 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7117 				data |= LS2_EXIT_TIME(5);
7118 			if (orig != data)
7119 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7120 
7121 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7122 			data &= ~LS2_EXIT_TIME_MASK;
7123 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7124 				data |= LS2_EXIT_TIME(5);
7125 			if (orig != data)
7126 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7127 
7128 			if (!disable_clkreq) {
7129 				struct pci_dev *root = rdev->pdev->bus->self;
7130 				u32 lnkcap;
7131 
7132 				clk_req_support = false;
7133 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7134 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7135 					clk_req_support = true;
7136 			} else {
7137 				clk_req_support = false;
7138 			}
7139 
7140 			if (clk_req_support) {
7141 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7142 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7143 				if (orig != data)
7144 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7145 
7146 				orig = data = RREG32(THM_CLK_CNTL);
7147 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7148 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7149 				if (orig != data)
7150 					WREG32(THM_CLK_CNTL, data);
7151 
7152 				orig = data = RREG32(MISC_CLK_CNTL);
7153 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7154 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7155 				if (orig != data)
7156 					WREG32(MISC_CLK_CNTL, data);
7157 
7158 				orig = data = RREG32(CG_CLKPIN_CNTL);
7159 				data &= ~BCLK_AS_XCLK;
7160 				if (orig != data)
7161 					WREG32(CG_CLKPIN_CNTL, data);
7162 
7163 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7164 				data &= ~FORCE_BIF_REFCLK_EN;
7165 				if (orig != data)
7166 					WREG32(CG_CLKPIN_CNTL_2, data);
7167 
7168 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7169 				data &= ~MPLL_CLKOUT_SEL_MASK;
7170 				data |= MPLL_CLKOUT_SEL(4);
7171 				if (orig != data)
7172 					WREG32(MPLL_BYPASSCLK_SEL, data);
7173 
7174 				orig = data = RREG32(SPLL_CNTL_MODE);
7175 				data &= ~SPLL_REFCLK_SEL_MASK;
7176 				if (orig != data)
7177 					WREG32(SPLL_CNTL_MODE, data);
7178 			}
7179 		}
7180 	} else {
7181 		if (orig != data)
7182 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7183 	}
7184 
7185 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7186 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7187 	if (orig != data)
7188 		WREG32_PCIE(PCIE_CNTL2, data);
7189 
7190 	if (!disable_l0s) {
7191 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7192 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7193 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7194 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7195 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7196 				data &= ~LC_L0S_INACTIVITY_MASK;
7197 				if (orig != data)
7198 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7199 			}
7200 		}
7201 	}
7202 }
7203