xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision e23feb16)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82 			       struct radeon_ib *ib,
83 			       uint64_t pe,
84 			       uint64_t addr, unsigned count,
85 			       uint32_t incr, uint32_t flags);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87 					 bool enable);
88 static void si_fini_pg(struct radeon_device *rdev);
89 static void si_fini_cg(struct radeon_device *rdev);
90 static void si_rlc_stop(struct radeon_device *rdev);
91 
92 static const u32 verde_rlc_save_restore_register_list[] =
93 {
94 	(0x8000 << 16) | (0x98f4 >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x98f4 >> 2),
97 	0x00000000,
98 	(0x8000 << 16) | (0xe80 >> 2),
99 	0x00000000,
100 	(0x8040 << 16) | (0xe80 >> 2),
101 	0x00000000,
102 	(0x8000 << 16) | (0x89bc >> 2),
103 	0x00000000,
104 	(0x8040 << 16) | (0x89bc >> 2),
105 	0x00000000,
106 	(0x8000 << 16) | (0x8c1c >> 2),
107 	0x00000000,
108 	(0x8040 << 16) | (0x8c1c >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x98f0 >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0xe7c >> 2),
113 	0x00000000,
114 	(0x8000 << 16) | (0x9148 >> 2),
115 	0x00000000,
116 	(0x8040 << 16) | (0x9148 >> 2),
117 	0x00000000,
118 	(0x9c00 << 16) | (0x9150 >> 2),
119 	0x00000000,
120 	(0x9c00 << 16) | (0x897c >> 2),
121 	0x00000000,
122 	(0x9c00 << 16) | (0x8d8c >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0xac54 >> 2),
125 	0X00000000,
126 	0x3,
127 	(0x9c00 << 16) | (0x98f8 >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9910 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x9914 >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9918 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x991c >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9920 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x9924 >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9928 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x992c >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9930 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x9934 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9938 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x993c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9940 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9944 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9948 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x994c >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9950 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9954 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9958 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x995c >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9960 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9964 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9968 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x996c >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9970 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9974 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9978 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x997c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9980 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9984 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9988 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x998c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x8c00 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x8c14 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x8c04 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x8c08 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x9b7c >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x9b7c >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0xe84 >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0xe84 >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x89c0 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x89c0 >> 2),
212 	0x00000000,
213 	(0x8000 << 16) | (0x914c >> 2),
214 	0x00000000,
215 	(0x8040 << 16) | (0x914c >> 2),
216 	0x00000000,
217 	(0x8000 << 16) | (0x8c20 >> 2),
218 	0x00000000,
219 	(0x8040 << 16) | (0x8c20 >> 2),
220 	0x00000000,
221 	(0x8000 << 16) | (0x9354 >> 2),
222 	0x00000000,
223 	(0x8040 << 16) | (0x9354 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9060 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9364 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9100 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x913c >> 2),
232 	0x00000000,
233 	(0x8000 << 16) | (0x90e0 >> 2),
234 	0x00000000,
235 	(0x8000 << 16) | (0x90e4 >> 2),
236 	0x00000000,
237 	(0x8000 << 16) | (0x90e8 >> 2),
238 	0x00000000,
239 	(0x8040 << 16) | (0x90e0 >> 2),
240 	0x00000000,
241 	(0x8040 << 16) | (0x90e4 >> 2),
242 	0x00000000,
243 	(0x8040 << 16) | (0x90e8 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8bcc >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8b24 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x88c4 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x8e50 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x8c0c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x8e58 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x8e5c >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9508 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x950c >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x9494 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0xac0c >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0xac10 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0xac14 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0xae00 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0xac08 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x88d4 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x88c8 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x88cc >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x89b0 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x8b10 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x8a14 >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x9830 >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x9834 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x9838 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x9a10 >> 2),
294 	0x00000000,
295 	(0x8000 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8000 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	(0x8001 << 16) | (0x9870 >> 2),
300 	0x00000000,
301 	(0x8001 << 16) | (0x9874 >> 2),
302 	0x00000000,
303 	(0x8040 << 16) | (0x9870 >> 2),
304 	0x00000000,
305 	(0x8040 << 16) | (0x9874 >> 2),
306 	0x00000000,
307 	(0x8041 << 16) | (0x9870 >> 2),
308 	0x00000000,
309 	(0x8041 << 16) | (0x9874 >> 2),
310 	0x00000000,
311 	0x00000000
312 };
313 
314 static const u32 tahiti_golden_rlc_registers[] =
315 {
316 	0xc424, 0xffffffff, 0x00601005,
317 	0xc47c, 0xffffffff, 0x10104040,
318 	0xc488, 0xffffffff, 0x0100000a,
319 	0xc314, 0xffffffff, 0x00000800,
320 	0xc30c, 0xffffffff, 0x800000f4,
321 	0xf4a8, 0xffffffff, 0x00000000
322 };
323 
324 static const u32 tahiti_golden_registers[] =
325 {
326 	0x9a10, 0x00010000, 0x00018208,
327 	0x9830, 0xffffffff, 0x00000000,
328 	0x9834, 0xf00fffff, 0x00000400,
329 	0x9838, 0x0002021c, 0x00020200,
330 	0xc78, 0x00000080, 0x00000000,
331 	0xd030, 0x000300c0, 0x00800040,
332 	0xd830, 0x000300c0, 0x00800040,
333 	0x5bb0, 0x000000f0, 0x00000070,
334 	0x5bc0, 0x00200000, 0x50100000,
335 	0x7030, 0x31000311, 0x00000011,
336 	0x277c, 0x00000003, 0x000007ff,
337 	0x240c, 0x000007ff, 0x00000000,
338 	0x8a14, 0xf000001f, 0x00000007,
339 	0x8b24, 0xffffffff, 0x00ffffff,
340 	0x8b10, 0x0000ff0f, 0x00000000,
341 	0x28a4c, 0x07ffffff, 0x4e000000,
342 	0x28350, 0x3f3f3fff, 0x2a00126a,
343 	0x30, 0x000000ff, 0x0040,
344 	0x34, 0x00000040, 0x00004040,
345 	0x9100, 0x07ffffff, 0x03000000,
346 	0x8e88, 0x01ff1f3f, 0x00000000,
347 	0x8e84, 0x01ff1f3f, 0x00000000,
348 	0x9060, 0x0000007f, 0x00000020,
349 	0x9508, 0x00010000, 0x00010000,
350 	0xac14, 0x00000200, 0x000002fb,
351 	0xac10, 0xffffffff, 0x0000543b,
352 	0xac0c, 0xffffffff, 0xa9210876,
353 	0x88d0, 0xffffffff, 0x000fff40,
354 	0x88d4, 0x0000001f, 0x00000010,
355 	0x1410, 0x20000000, 0x20fffed8,
356 	0x15c0, 0x000c0fc0, 0x000c0400
357 };
358 
359 static const u32 tahiti_golden_registers2[] =
360 {
361 	0xc64, 0x00000001, 0x00000001
362 };
363 
364 static const u32 pitcairn_golden_rlc_registers[] =
365 {
366 	0xc424, 0xffffffff, 0x00601004,
367 	0xc47c, 0xffffffff, 0x10102020,
368 	0xc488, 0xffffffff, 0x01000020,
369 	0xc314, 0xffffffff, 0x00000800,
370 	0xc30c, 0xffffffff, 0x800000a4
371 };
372 
373 static const u32 pitcairn_golden_registers[] =
374 {
375 	0x9a10, 0x00010000, 0x00018208,
376 	0x9830, 0xffffffff, 0x00000000,
377 	0x9834, 0xf00fffff, 0x00000400,
378 	0x9838, 0x0002021c, 0x00020200,
379 	0xc78, 0x00000080, 0x00000000,
380 	0xd030, 0x000300c0, 0x00800040,
381 	0xd830, 0x000300c0, 0x00800040,
382 	0x5bb0, 0x000000f0, 0x00000070,
383 	0x5bc0, 0x00200000, 0x50100000,
384 	0x7030, 0x31000311, 0x00000011,
385 	0x2ae4, 0x00073ffe, 0x000022a2,
386 	0x240c, 0x000007ff, 0x00000000,
387 	0x8a14, 0xf000001f, 0x00000007,
388 	0x8b24, 0xffffffff, 0x00ffffff,
389 	0x8b10, 0x0000ff0f, 0x00000000,
390 	0x28a4c, 0x07ffffff, 0x4e000000,
391 	0x28350, 0x3f3f3fff, 0x2a00126a,
392 	0x30, 0x000000ff, 0x0040,
393 	0x34, 0x00000040, 0x00004040,
394 	0x9100, 0x07ffffff, 0x03000000,
395 	0x9060, 0x0000007f, 0x00000020,
396 	0x9508, 0x00010000, 0x00010000,
397 	0xac14, 0x000003ff, 0x000000f7,
398 	0xac10, 0xffffffff, 0x00000000,
399 	0xac0c, 0xffffffff, 0x32761054,
400 	0x88d4, 0x0000001f, 0x00000010,
401 	0x15c0, 0x000c0fc0, 0x000c0400
402 };
403 
404 static const u32 verde_golden_rlc_registers[] =
405 {
406 	0xc424, 0xffffffff, 0x033f1005,
407 	0xc47c, 0xffffffff, 0x10808020,
408 	0xc488, 0xffffffff, 0x00800008,
409 	0xc314, 0xffffffff, 0x00001000,
410 	0xc30c, 0xffffffff, 0x80010014
411 };
412 
413 static const u32 verde_golden_registers[] =
414 {
415 	0x9a10, 0x00010000, 0x00018208,
416 	0x9830, 0xffffffff, 0x00000000,
417 	0x9834, 0xf00fffff, 0x00000400,
418 	0x9838, 0x0002021c, 0x00020200,
419 	0xc78, 0x00000080, 0x00000000,
420 	0xd030, 0x000300c0, 0x00800040,
421 	0xd030, 0x000300c0, 0x00800040,
422 	0xd830, 0x000300c0, 0x00800040,
423 	0xd830, 0x000300c0, 0x00800040,
424 	0x5bb0, 0x000000f0, 0x00000070,
425 	0x5bc0, 0x00200000, 0x50100000,
426 	0x7030, 0x31000311, 0x00000011,
427 	0x2ae4, 0x00073ffe, 0x000022a2,
428 	0x2ae4, 0x00073ffe, 0x000022a2,
429 	0x2ae4, 0x00073ffe, 0x000022a2,
430 	0x240c, 0x000007ff, 0x00000000,
431 	0x240c, 0x000007ff, 0x00000000,
432 	0x240c, 0x000007ff, 0x00000000,
433 	0x8a14, 0xf000001f, 0x00000007,
434 	0x8a14, 0xf000001f, 0x00000007,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8b24, 0xffffffff, 0x00ffffff,
437 	0x8b10, 0x0000ff0f, 0x00000000,
438 	0x28a4c, 0x07ffffff, 0x4e000000,
439 	0x28350, 0x3f3f3fff, 0x0000124a,
440 	0x28350, 0x3f3f3fff, 0x0000124a,
441 	0x28350, 0x3f3f3fff, 0x0000124a,
442 	0x30, 0x000000ff, 0x0040,
443 	0x34, 0x00000040, 0x00004040,
444 	0x9100, 0x07ffffff, 0x03000000,
445 	0x9100, 0x07ffffff, 0x03000000,
446 	0x8e88, 0x01ff1f3f, 0x00000000,
447 	0x8e88, 0x01ff1f3f, 0x00000000,
448 	0x8e88, 0x01ff1f3f, 0x00000000,
449 	0x8e84, 0x01ff1f3f, 0x00000000,
450 	0x8e84, 0x01ff1f3f, 0x00000000,
451 	0x8e84, 0x01ff1f3f, 0x00000000,
452 	0x9060, 0x0000007f, 0x00000020,
453 	0x9508, 0x00010000, 0x00010000,
454 	0xac14, 0x000003ff, 0x00000003,
455 	0xac14, 0x000003ff, 0x00000003,
456 	0xac14, 0x000003ff, 0x00000003,
457 	0xac10, 0xffffffff, 0x00000000,
458 	0xac10, 0xffffffff, 0x00000000,
459 	0xac10, 0xffffffff, 0x00000000,
460 	0xac0c, 0xffffffff, 0x00001032,
461 	0xac0c, 0xffffffff, 0x00001032,
462 	0xac0c, 0xffffffff, 0x00001032,
463 	0x88d4, 0x0000001f, 0x00000010,
464 	0x88d4, 0x0000001f, 0x00000010,
465 	0x88d4, 0x0000001f, 0x00000010,
466 	0x15c0, 0x000c0fc0, 0x000c0400
467 };
468 
469 static const u32 oland_golden_rlc_registers[] =
470 {
471 	0xc424, 0xffffffff, 0x00601005,
472 	0xc47c, 0xffffffff, 0x10104040,
473 	0xc488, 0xffffffff, 0x0100000a,
474 	0xc314, 0xffffffff, 0x00000800,
475 	0xc30c, 0xffffffff, 0x800000f4
476 };
477 
478 static const u32 oland_golden_registers[] =
479 {
480 	0x9a10, 0x00010000, 0x00018208,
481 	0x9830, 0xffffffff, 0x00000000,
482 	0x9834, 0xf00fffff, 0x00000400,
483 	0x9838, 0x0002021c, 0x00020200,
484 	0xc78, 0x00000080, 0x00000000,
485 	0xd030, 0x000300c0, 0x00800040,
486 	0xd830, 0x000300c0, 0x00800040,
487 	0x5bb0, 0x000000f0, 0x00000070,
488 	0x5bc0, 0x00200000, 0x50100000,
489 	0x7030, 0x31000311, 0x00000011,
490 	0x2ae4, 0x00073ffe, 0x000022a2,
491 	0x240c, 0x000007ff, 0x00000000,
492 	0x8a14, 0xf000001f, 0x00000007,
493 	0x8b24, 0xffffffff, 0x00ffffff,
494 	0x8b10, 0x0000ff0f, 0x00000000,
495 	0x28a4c, 0x07ffffff, 0x4e000000,
496 	0x28350, 0x3f3f3fff, 0x00000082,
497 	0x30, 0x000000ff, 0x0040,
498 	0x34, 0x00000040, 0x00004040,
499 	0x9100, 0x07ffffff, 0x03000000,
500 	0x9060, 0x0000007f, 0x00000020,
501 	0x9508, 0x00010000, 0x00010000,
502 	0xac14, 0x000003ff, 0x000000f3,
503 	0xac10, 0xffffffff, 0x00000000,
504 	0xac0c, 0xffffffff, 0x00003210,
505 	0x88d4, 0x0000001f, 0x00000010,
506 	0x15c0, 0x000c0fc0, 0x000c0400
507 };
508 
509 static const u32 hainan_golden_registers[] =
510 {
511 	0x9a10, 0x00010000, 0x00018208,
512 	0x9830, 0xffffffff, 0x00000000,
513 	0x9834, 0xf00fffff, 0x00000400,
514 	0x9838, 0x0002021c, 0x00020200,
515 	0xd0c0, 0xff000fff, 0x00000100,
516 	0xd030, 0x000300c0, 0x00800040,
517 	0xd8c0, 0xff000fff, 0x00000100,
518 	0xd830, 0x000300c0, 0x00800040,
519 	0x2ae4, 0x00073ffe, 0x000022a2,
520 	0x240c, 0x000007ff, 0x00000000,
521 	0x8a14, 0xf000001f, 0x00000007,
522 	0x8b24, 0xffffffff, 0x00ffffff,
523 	0x8b10, 0x0000ff0f, 0x00000000,
524 	0x28a4c, 0x07ffffff, 0x4e000000,
525 	0x28350, 0x3f3f3fff, 0x00000000,
526 	0x30, 0x000000ff, 0x0040,
527 	0x34, 0x00000040, 0x00004040,
528 	0x9100, 0x03e00000, 0x03600000,
529 	0x9060, 0x0000007f, 0x00000020,
530 	0x9508, 0x00010000, 0x00010000,
531 	0xac14, 0x000003ff, 0x000000f1,
532 	0xac10, 0xffffffff, 0x00000000,
533 	0xac0c, 0xffffffff, 0x00003210,
534 	0x88d4, 0x0000001f, 0x00000010,
535 	0x15c0, 0x000c0fc0, 0x000c0400
536 };
537 
538 static const u32 hainan_golden_registers2[] =
539 {
540 	0x98f8, 0xffffffff, 0x02010001
541 };
542 
543 static const u32 tahiti_mgcg_cgcg_init[] =
544 {
545 	0xc400, 0xffffffff, 0xfffffffc,
546 	0x802c, 0xffffffff, 0xe0000000,
547 	0x9a60, 0xffffffff, 0x00000100,
548 	0x92a4, 0xffffffff, 0x00000100,
549 	0xc164, 0xffffffff, 0x00000100,
550 	0x9774, 0xffffffff, 0x00000100,
551 	0x8984, 0xffffffff, 0x06000100,
552 	0x8a18, 0xffffffff, 0x00000100,
553 	0x92a0, 0xffffffff, 0x00000100,
554 	0xc380, 0xffffffff, 0x00000100,
555 	0x8b28, 0xffffffff, 0x00000100,
556 	0x9144, 0xffffffff, 0x00000100,
557 	0x8d88, 0xffffffff, 0x00000100,
558 	0x8d8c, 0xffffffff, 0x00000100,
559 	0x9030, 0xffffffff, 0x00000100,
560 	0x9034, 0xffffffff, 0x00000100,
561 	0x9038, 0xffffffff, 0x00000100,
562 	0x903c, 0xffffffff, 0x00000100,
563 	0xad80, 0xffffffff, 0x00000100,
564 	0xac54, 0xffffffff, 0x00000100,
565 	0x897c, 0xffffffff, 0x06000100,
566 	0x9868, 0xffffffff, 0x00000100,
567 	0x9510, 0xffffffff, 0x00000100,
568 	0xaf04, 0xffffffff, 0x00000100,
569 	0xae04, 0xffffffff, 0x00000100,
570 	0x949c, 0xffffffff, 0x00000100,
571 	0x802c, 0xffffffff, 0xe0000000,
572 	0x9160, 0xffffffff, 0x00010000,
573 	0x9164, 0xffffffff, 0x00030002,
574 	0x9168, 0xffffffff, 0x00040007,
575 	0x916c, 0xffffffff, 0x00060005,
576 	0x9170, 0xffffffff, 0x00090008,
577 	0x9174, 0xffffffff, 0x00020001,
578 	0x9178, 0xffffffff, 0x00040003,
579 	0x917c, 0xffffffff, 0x00000007,
580 	0x9180, 0xffffffff, 0x00060005,
581 	0x9184, 0xffffffff, 0x00090008,
582 	0x9188, 0xffffffff, 0x00030002,
583 	0x918c, 0xffffffff, 0x00050004,
584 	0x9190, 0xffffffff, 0x00000008,
585 	0x9194, 0xffffffff, 0x00070006,
586 	0x9198, 0xffffffff, 0x000a0009,
587 	0x919c, 0xffffffff, 0x00040003,
588 	0x91a0, 0xffffffff, 0x00060005,
589 	0x91a4, 0xffffffff, 0x00000009,
590 	0x91a8, 0xffffffff, 0x00080007,
591 	0x91ac, 0xffffffff, 0x000b000a,
592 	0x91b0, 0xffffffff, 0x00050004,
593 	0x91b4, 0xffffffff, 0x00070006,
594 	0x91b8, 0xffffffff, 0x0008000b,
595 	0x91bc, 0xffffffff, 0x000a0009,
596 	0x91c0, 0xffffffff, 0x000d000c,
597 	0x91c4, 0xffffffff, 0x00060005,
598 	0x91c8, 0xffffffff, 0x00080007,
599 	0x91cc, 0xffffffff, 0x0000000b,
600 	0x91d0, 0xffffffff, 0x000a0009,
601 	0x91d4, 0xffffffff, 0x000d000c,
602 	0x91d8, 0xffffffff, 0x00070006,
603 	0x91dc, 0xffffffff, 0x00090008,
604 	0x91e0, 0xffffffff, 0x0000000c,
605 	0x91e4, 0xffffffff, 0x000b000a,
606 	0x91e8, 0xffffffff, 0x000e000d,
607 	0x91ec, 0xffffffff, 0x00080007,
608 	0x91f0, 0xffffffff, 0x000a0009,
609 	0x91f4, 0xffffffff, 0x0000000d,
610 	0x91f8, 0xffffffff, 0x000c000b,
611 	0x91fc, 0xffffffff, 0x000f000e,
612 	0x9200, 0xffffffff, 0x00090008,
613 	0x9204, 0xffffffff, 0x000b000a,
614 	0x9208, 0xffffffff, 0x000c000f,
615 	0x920c, 0xffffffff, 0x000e000d,
616 	0x9210, 0xffffffff, 0x00110010,
617 	0x9214, 0xffffffff, 0x000a0009,
618 	0x9218, 0xffffffff, 0x000c000b,
619 	0x921c, 0xffffffff, 0x0000000f,
620 	0x9220, 0xffffffff, 0x000e000d,
621 	0x9224, 0xffffffff, 0x00110010,
622 	0x9228, 0xffffffff, 0x000b000a,
623 	0x922c, 0xffffffff, 0x000d000c,
624 	0x9230, 0xffffffff, 0x00000010,
625 	0x9234, 0xffffffff, 0x000f000e,
626 	0x9238, 0xffffffff, 0x00120011,
627 	0x923c, 0xffffffff, 0x000c000b,
628 	0x9240, 0xffffffff, 0x000e000d,
629 	0x9244, 0xffffffff, 0x00000011,
630 	0x9248, 0xffffffff, 0x0010000f,
631 	0x924c, 0xffffffff, 0x00130012,
632 	0x9250, 0xffffffff, 0x000d000c,
633 	0x9254, 0xffffffff, 0x000f000e,
634 	0x9258, 0xffffffff, 0x00100013,
635 	0x925c, 0xffffffff, 0x00120011,
636 	0x9260, 0xffffffff, 0x00150014,
637 	0x9264, 0xffffffff, 0x000e000d,
638 	0x9268, 0xffffffff, 0x0010000f,
639 	0x926c, 0xffffffff, 0x00000013,
640 	0x9270, 0xffffffff, 0x00120011,
641 	0x9274, 0xffffffff, 0x00150014,
642 	0x9278, 0xffffffff, 0x000f000e,
643 	0x927c, 0xffffffff, 0x00110010,
644 	0x9280, 0xffffffff, 0x00000014,
645 	0x9284, 0xffffffff, 0x00130012,
646 	0x9288, 0xffffffff, 0x00160015,
647 	0x928c, 0xffffffff, 0x0010000f,
648 	0x9290, 0xffffffff, 0x00120011,
649 	0x9294, 0xffffffff, 0x00000015,
650 	0x9298, 0xffffffff, 0x00140013,
651 	0x929c, 0xffffffff, 0x00170016,
652 	0x9150, 0xffffffff, 0x96940200,
653 	0x8708, 0xffffffff, 0x00900100,
654 	0xc478, 0xffffffff, 0x00000080,
655 	0xc404, 0xffffffff, 0x0020003f,
656 	0x30, 0xffffffff, 0x0000001c,
657 	0x34, 0x000f0000, 0x000f0000,
658 	0x160c, 0xffffffff, 0x00000100,
659 	0x1024, 0xffffffff, 0x00000100,
660 	0x102c, 0x00000101, 0x00000000,
661 	0x20a8, 0xffffffff, 0x00000104,
662 	0x264c, 0x000c0000, 0x000c0000,
663 	0x2648, 0x000c0000, 0x000c0000,
664 	0x55e4, 0xff000fff, 0x00000100,
665 	0x55e8, 0x00000001, 0x00000001,
666 	0x2f50, 0x00000001, 0x00000001,
667 	0x30cc, 0xc0000fff, 0x00000104,
668 	0xc1e4, 0x00000001, 0x00000001,
669 	0xd0c0, 0xfffffff0, 0x00000100,
670 	0xd8c0, 0xfffffff0, 0x00000100
671 };
672 
673 static const u32 pitcairn_mgcg_cgcg_init[] =
674 {
675 	0xc400, 0xffffffff, 0xfffffffc,
676 	0x802c, 0xffffffff, 0xe0000000,
677 	0x9a60, 0xffffffff, 0x00000100,
678 	0x92a4, 0xffffffff, 0x00000100,
679 	0xc164, 0xffffffff, 0x00000100,
680 	0x9774, 0xffffffff, 0x00000100,
681 	0x8984, 0xffffffff, 0x06000100,
682 	0x8a18, 0xffffffff, 0x00000100,
683 	0x92a0, 0xffffffff, 0x00000100,
684 	0xc380, 0xffffffff, 0x00000100,
685 	0x8b28, 0xffffffff, 0x00000100,
686 	0x9144, 0xffffffff, 0x00000100,
687 	0x8d88, 0xffffffff, 0x00000100,
688 	0x8d8c, 0xffffffff, 0x00000100,
689 	0x9030, 0xffffffff, 0x00000100,
690 	0x9034, 0xffffffff, 0x00000100,
691 	0x9038, 0xffffffff, 0x00000100,
692 	0x903c, 0xffffffff, 0x00000100,
693 	0xad80, 0xffffffff, 0x00000100,
694 	0xac54, 0xffffffff, 0x00000100,
695 	0x897c, 0xffffffff, 0x06000100,
696 	0x9868, 0xffffffff, 0x00000100,
697 	0x9510, 0xffffffff, 0x00000100,
698 	0xaf04, 0xffffffff, 0x00000100,
699 	0xae04, 0xffffffff, 0x00000100,
700 	0x949c, 0xffffffff, 0x00000100,
701 	0x802c, 0xffffffff, 0xe0000000,
702 	0x9160, 0xffffffff, 0x00010000,
703 	0x9164, 0xffffffff, 0x00030002,
704 	0x9168, 0xffffffff, 0x00040007,
705 	0x916c, 0xffffffff, 0x00060005,
706 	0x9170, 0xffffffff, 0x00090008,
707 	0x9174, 0xffffffff, 0x00020001,
708 	0x9178, 0xffffffff, 0x00040003,
709 	0x917c, 0xffffffff, 0x00000007,
710 	0x9180, 0xffffffff, 0x00060005,
711 	0x9184, 0xffffffff, 0x00090008,
712 	0x9188, 0xffffffff, 0x00030002,
713 	0x918c, 0xffffffff, 0x00050004,
714 	0x9190, 0xffffffff, 0x00000008,
715 	0x9194, 0xffffffff, 0x00070006,
716 	0x9198, 0xffffffff, 0x000a0009,
717 	0x919c, 0xffffffff, 0x00040003,
718 	0x91a0, 0xffffffff, 0x00060005,
719 	0x91a4, 0xffffffff, 0x00000009,
720 	0x91a8, 0xffffffff, 0x00080007,
721 	0x91ac, 0xffffffff, 0x000b000a,
722 	0x91b0, 0xffffffff, 0x00050004,
723 	0x91b4, 0xffffffff, 0x00070006,
724 	0x91b8, 0xffffffff, 0x0008000b,
725 	0x91bc, 0xffffffff, 0x000a0009,
726 	0x91c0, 0xffffffff, 0x000d000c,
727 	0x9200, 0xffffffff, 0x00090008,
728 	0x9204, 0xffffffff, 0x000b000a,
729 	0x9208, 0xffffffff, 0x000c000f,
730 	0x920c, 0xffffffff, 0x000e000d,
731 	0x9210, 0xffffffff, 0x00110010,
732 	0x9214, 0xffffffff, 0x000a0009,
733 	0x9218, 0xffffffff, 0x000c000b,
734 	0x921c, 0xffffffff, 0x0000000f,
735 	0x9220, 0xffffffff, 0x000e000d,
736 	0x9224, 0xffffffff, 0x00110010,
737 	0x9228, 0xffffffff, 0x000b000a,
738 	0x922c, 0xffffffff, 0x000d000c,
739 	0x9230, 0xffffffff, 0x00000010,
740 	0x9234, 0xffffffff, 0x000f000e,
741 	0x9238, 0xffffffff, 0x00120011,
742 	0x923c, 0xffffffff, 0x000c000b,
743 	0x9240, 0xffffffff, 0x000e000d,
744 	0x9244, 0xffffffff, 0x00000011,
745 	0x9248, 0xffffffff, 0x0010000f,
746 	0x924c, 0xffffffff, 0x00130012,
747 	0x9250, 0xffffffff, 0x000d000c,
748 	0x9254, 0xffffffff, 0x000f000e,
749 	0x9258, 0xffffffff, 0x00100013,
750 	0x925c, 0xffffffff, 0x00120011,
751 	0x9260, 0xffffffff, 0x00150014,
752 	0x9150, 0xffffffff, 0x96940200,
753 	0x8708, 0xffffffff, 0x00900100,
754 	0xc478, 0xffffffff, 0x00000080,
755 	0xc404, 0xffffffff, 0x0020003f,
756 	0x30, 0xffffffff, 0x0000001c,
757 	0x34, 0x000f0000, 0x000f0000,
758 	0x160c, 0xffffffff, 0x00000100,
759 	0x1024, 0xffffffff, 0x00000100,
760 	0x102c, 0x00000101, 0x00000000,
761 	0x20a8, 0xffffffff, 0x00000104,
762 	0x55e4, 0xff000fff, 0x00000100,
763 	0x55e8, 0x00000001, 0x00000001,
764 	0x2f50, 0x00000001, 0x00000001,
765 	0x30cc, 0xc0000fff, 0x00000104,
766 	0xc1e4, 0x00000001, 0x00000001,
767 	0xd0c0, 0xfffffff0, 0x00000100,
768 	0xd8c0, 0xfffffff0, 0x00000100
769 };
770 
771 static const u32 verde_mgcg_cgcg_init[] =
772 {
773 	0xc400, 0xffffffff, 0xfffffffc,
774 	0x802c, 0xffffffff, 0xe0000000,
775 	0x9a60, 0xffffffff, 0x00000100,
776 	0x92a4, 0xffffffff, 0x00000100,
777 	0xc164, 0xffffffff, 0x00000100,
778 	0x9774, 0xffffffff, 0x00000100,
779 	0x8984, 0xffffffff, 0x06000100,
780 	0x8a18, 0xffffffff, 0x00000100,
781 	0x92a0, 0xffffffff, 0x00000100,
782 	0xc380, 0xffffffff, 0x00000100,
783 	0x8b28, 0xffffffff, 0x00000100,
784 	0x9144, 0xffffffff, 0x00000100,
785 	0x8d88, 0xffffffff, 0x00000100,
786 	0x8d8c, 0xffffffff, 0x00000100,
787 	0x9030, 0xffffffff, 0x00000100,
788 	0x9034, 0xffffffff, 0x00000100,
789 	0x9038, 0xffffffff, 0x00000100,
790 	0x903c, 0xffffffff, 0x00000100,
791 	0xad80, 0xffffffff, 0x00000100,
792 	0xac54, 0xffffffff, 0x00000100,
793 	0x897c, 0xffffffff, 0x06000100,
794 	0x9868, 0xffffffff, 0x00000100,
795 	0x9510, 0xffffffff, 0x00000100,
796 	0xaf04, 0xffffffff, 0x00000100,
797 	0xae04, 0xffffffff, 0x00000100,
798 	0x949c, 0xffffffff, 0x00000100,
799 	0x802c, 0xffffffff, 0xe0000000,
800 	0x9160, 0xffffffff, 0x00010000,
801 	0x9164, 0xffffffff, 0x00030002,
802 	0x9168, 0xffffffff, 0x00040007,
803 	0x916c, 0xffffffff, 0x00060005,
804 	0x9170, 0xffffffff, 0x00090008,
805 	0x9174, 0xffffffff, 0x00020001,
806 	0x9178, 0xffffffff, 0x00040003,
807 	0x917c, 0xffffffff, 0x00000007,
808 	0x9180, 0xffffffff, 0x00060005,
809 	0x9184, 0xffffffff, 0x00090008,
810 	0x9188, 0xffffffff, 0x00030002,
811 	0x918c, 0xffffffff, 0x00050004,
812 	0x9190, 0xffffffff, 0x00000008,
813 	0x9194, 0xffffffff, 0x00070006,
814 	0x9198, 0xffffffff, 0x000a0009,
815 	0x919c, 0xffffffff, 0x00040003,
816 	0x91a0, 0xffffffff, 0x00060005,
817 	0x91a4, 0xffffffff, 0x00000009,
818 	0x91a8, 0xffffffff, 0x00080007,
819 	0x91ac, 0xffffffff, 0x000b000a,
820 	0x91b0, 0xffffffff, 0x00050004,
821 	0x91b4, 0xffffffff, 0x00070006,
822 	0x91b8, 0xffffffff, 0x0008000b,
823 	0x91bc, 0xffffffff, 0x000a0009,
824 	0x91c0, 0xffffffff, 0x000d000c,
825 	0x9200, 0xffffffff, 0x00090008,
826 	0x9204, 0xffffffff, 0x000b000a,
827 	0x9208, 0xffffffff, 0x000c000f,
828 	0x920c, 0xffffffff, 0x000e000d,
829 	0x9210, 0xffffffff, 0x00110010,
830 	0x9214, 0xffffffff, 0x000a0009,
831 	0x9218, 0xffffffff, 0x000c000b,
832 	0x921c, 0xffffffff, 0x0000000f,
833 	0x9220, 0xffffffff, 0x000e000d,
834 	0x9224, 0xffffffff, 0x00110010,
835 	0x9228, 0xffffffff, 0x000b000a,
836 	0x922c, 0xffffffff, 0x000d000c,
837 	0x9230, 0xffffffff, 0x00000010,
838 	0x9234, 0xffffffff, 0x000f000e,
839 	0x9238, 0xffffffff, 0x00120011,
840 	0x923c, 0xffffffff, 0x000c000b,
841 	0x9240, 0xffffffff, 0x000e000d,
842 	0x9244, 0xffffffff, 0x00000011,
843 	0x9248, 0xffffffff, 0x0010000f,
844 	0x924c, 0xffffffff, 0x00130012,
845 	0x9250, 0xffffffff, 0x000d000c,
846 	0x9254, 0xffffffff, 0x000f000e,
847 	0x9258, 0xffffffff, 0x00100013,
848 	0x925c, 0xffffffff, 0x00120011,
849 	0x9260, 0xffffffff, 0x00150014,
850 	0x9150, 0xffffffff, 0x96940200,
851 	0x8708, 0xffffffff, 0x00900100,
852 	0xc478, 0xffffffff, 0x00000080,
853 	0xc404, 0xffffffff, 0x0020003f,
854 	0x30, 0xffffffff, 0x0000001c,
855 	0x34, 0x000f0000, 0x000f0000,
856 	0x160c, 0xffffffff, 0x00000100,
857 	0x1024, 0xffffffff, 0x00000100,
858 	0x102c, 0x00000101, 0x00000000,
859 	0x20a8, 0xffffffff, 0x00000104,
860 	0x264c, 0x000c0000, 0x000c0000,
861 	0x2648, 0x000c0000, 0x000c0000,
862 	0x55e4, 0xff000fff, 0x00000100,
863 	0x55e8, 0x00000001, 0x00000001,
864 	0x2f50, 0x00000001, 0x00000001,
865 	0x30cc, 0xc0000fff, 0x00000104,
866 	0xc1e4, 0x00000001, 0x00000001,
867 	0xd0c0, 0xfffffff0, 0x00000100,
868 	0xd8c0, 0xfffffff0, 0x00000100
869 };
870 
871 static const u32 oland_mgcg_cgcg_init[] =
872 {
873 	0xc400, 0xffffffff, 0xfffffffc,
874 	0x802c, 0xffffffff, 0xe0000000,
875 	0x9a60, 0xffffffff, 0x00000100,
876 	0x92a4, 0xffffffff, 0x00000100,
877 	0xc164, 0xffffffff, 0x00000100,
878 	0x9774, 0xffffffff, 0x00000100,
879 	0x8984, 0xffffffff, 0x06000100,
880 	0x8a18, 0xffffffff, 0x00000100,
881 	0x92a0, 0xffffffff, 0x00000100,
882 	0xc380, 0xffffffff, 0x00000100,
883 	0x8b28, 0xffffffff, 0x00000100,
884 	0x9144, 0xffffffff, 0x00000100,
885 	0x8d88, 0xffffffff, 0x00000100,
886 	0x8d8c, 0xffffffff, 0x00000100,
887 	0x9030, 0xffffffff, 0x00000100,
888 	0x9034, 0xffffffff, 0x00000100,
889 	0x9038, 0xffffffff, 0x00000100,
890 	0x903c, 0xffffffff, 0x00000100,
891 	0xad80, 0xffffffff, 0x00000100,
892 	0xac54, 0xffffffff, 0x00000100,
893 	0x897c, 0xffffffff, 0x06000100,
894 	0x9868, 0xffffffff, 0x00000100,
895 	0x9510, 0xffffffff, 0x00000100,
896 	0xaf04, 0xffffffff, 0x00000100,
897 	0xae04, 0xffffffff, 0x00000100,
898 	0x949c, 0xffffffff, 0x00000100,
899 	0x802c, 0xffffffff, 0xe0000000,
900 	0x9160, 0xffffffff, 0x00010000,
901 	0x9164, 0xffffffff, 0x00030002,
902 	0x9168, 0xffffffff, 0x00040007,
903 	0x916c, 0xffffffff, 0x00060005,
904 	0x9170, 0xffffffff, 0x00090008,
905 	0x9174, 0xffffffff, 0x00020001,
906 	0x9178, 0xffffffff, 0x00040003,
907 	0x917c, 0xffffffff, 0x00000007,
908 	0x9180, 0xffffffff, 0x00060005,
909 	0x9184, 0xffffffff, 0x00090008,
910 	0x9188, 0xffffffff, 0x00030002,
911 	0x918c, 0xffffffff, 0x00050004,
912 	0x9190, 0xffffffff, 0x00000008,
913 	0x9194, 0xffffffff, 0x00070006,
914 	0x9198, 0xffffffff, 0x000a0009,
915 	0x919c, 0xffffffff, 0x00040003,
916 	0x91a0, 0xffffffff, 0x00060005,
917 	0x91a4, 0xffffffff, 0x00000009,
918 	0x91a8, 0xffffffff, 0x00080007,
919 	0x91ac, 0xffffffff, 0x000b000a,
920 	0x91b0, 0xffffffff, 0x00050004,
921 	0x91b4, 0xffffffff, 0x00070006,
922 	0x91b8, 0xffffffff, 0x0008000b,
923 	0x91bc, 0xffffffff, 0x000a0009,
924 	0x91c0, 0xffffffff, 0x000d000c,
925 	0x91c4, 0xffffffff, 0x00060005,
926 	0x91c8, 0xffffffff, 0x00080007,
927 	0x91cc, 0xffffffff, 0x0000000b,
928 	0x91d0, 0xffffffff, 0x000a0009,
929 	0x91d4, 0xffffffff, 0x000d000c,
930 	0x9150, 0xffffffff, 0x96940200,
931 	0x8708, 0xffffffff, 0x00900100,
932 	0xc478, 0xffffffff, 0x00000080,
933 	0xc404, 0xffffffff, 0x0020003f,
934 	0x30, 0xffffffff, 0x0000001c,
935 	0x34, 0x000f0000, 0x000f0000,
936 	0x160c, 0xffffffff, 0x00000100,
937 	0x1024, 0xffffffff, 0x00000100,
938 	0x102c, 0x00000101, 0x00000000,
939 	0x20a8, 0xffffffff, 0x00000104,
940 	0x264c, 0x000c0000, 0x000c0000,
941 	0x2648, 0x000c0000, 0x000c0000,
942 	0x55e4, 0xff000fff, 0x00000100,
943 	0x55e8, 0x00000001, 0x00000001,
944 	0x2f50, 0x00000001, 0x00000001,
945 	0x30cc, 0xc0000fff, 0x00000104,
946 	0xc1e4, 0x00000001, 0x00000001,
947 	0xd0c0, 0xfffffff0, 0x00000100,
948 	0xd8c0, 0xfffffff0, 0x00000100
949 };
950 
951 static const u32 hainan_mgcg_cgcg_init[] =
952 {
953 	0xc400, 0xffffffff, 0xfffffffc,
954 	0x802c, 0xffffffff, 0xe0000000,
955 	0x9a60, 0xffffffff, 0x00000100,
956 	0x92a4, 0xffffffff, 0x00000100,
957 	0xc164, 0xffffffff, 0x00000100,
958 	0x9774, 0xffffffff, 0x00000100,
959 	0x8984, 0xffffffff, 0x06000100,
960 	0x8a18, 0xffffffff, 0x00000100,
961 	0x92a0, 0xffffffff, 0x00000100,
962 	0xc380, 0xffffffff, 0x00000100,
963 	0x8b28, 0xffffffff, 0x00000100,
964 	0x9144, 0xffffffff, 0x00000100,
965 	0x8d88, 0xffffffff, 0x00000100,
966 	0x8d8c, 0xffffffff, 0x00000100,
967 	0x9030, 0xffffffff, 0x00000100,
968 	0x9034, 0xffffffff, 0x00000100,
969 	0x9038, 0xffffffff, 0x00000100,
970 	0x903c, 0xffffffff, 0x00000100,
971 	0xad80, 0xffffffff, 0x00000100,
972 	0xac54, 0xffffffff, 0x00000100,
973 	0x897c, 0xffffffff, 0x06000100,
974 	0x9868, 0xffffffff, 0x00000100,
975 	0x9510, 0xffffffff, 0x00000100,
976 	0xaf04, 0xffffffff, 0x00000100,
977 	0xae04, 0xffffffff, 0x00000100,
978 	0x949c, 0xffffffff, 0x00000100,
979 	0x802c, 0xffffffff, 0xe0000000,
980 	0x9160, 0xffffffff, 0x00010000,
981 	0x9164, 0xffffffff, 0x00030002,
982 	0x9168, 0xffffffff, 0x00040007,
983 	0x916c, 0xffffffff, 0x00060005,
984 	0x9170, 0xffffffff, 0x00090008,
985 	0x9174, 0xffffffff, 0x00020001,
986 	0x9178, 0xffffffff, 0x00040003,
987 	0x917c, 0xffffffff, 0x00000007,
988 	0x9180, 0xffffffff, 0x00060005,
989 	0x9184, 0xffffffff, 0x00090008,
990 	0x9188, 0xffffffff, 0x00030002,
991 	0x918c, 0xffffffff, 0x00050004,
992 	0x9190, 0xffffffff, 0x00000008,
993 	0x9194, 0xffffffff, 0x00070006,
994 	0x9198, 0xffffffff, 0x000a0009,
995 	0x919c, 0xffffffff, 0x00040003,
996 	0x91a0, 0xffffffff, 0x00060005,
997 	0x91a4, 0xffffffff, 0x00000009,
998 	0x91a8, 0xffffffff, 0x00080007,
999 	0x91ac, 0xffffffff, 0x000b000a,
1000 	0x91b0, 0xffffffff, 0x00050004,
1001 	0x91b4, 0xffffffff, 0x00070006,
1002 	0x91b8, 0xffffffff, 0x0008000b,
1003 	0x91bc, 0xffffffff, 0x000a0009,
1004 	0x91c0, 0xffffffff, 0x000d000c,
1005 	0x91c4, 0xffffffff, 0x00060005,
1006 	0x91c8, 0xffffffff, 0x00080007,
1007 	0x91cc, 0xffffffff, 0x0000000b,
1008 	0x91d0, 0xffffffff, 0x000a0009,
1009 	0x91d4, 0xffffffff, 0x000d000c,
1010 	0x9150, 0xffffffff, 0x96940200,
1011 	0x8708, 0xffffffff, 0x00900100,
1012 	0xc478, 0xffffffff, 0x00000080,
1013 	0xc404, 0xffffffff, 0x0020003f,
1014 	0x30, 0xffffffff, 0x0000001c,
1015 	0x34, 0x000f0000, 0x000f0000,
1016 	0x160c, 0xffffffff, 0x00000100,
1017 	0x1024, 0xffffffff, 0x00000100,
1018 	0x20a8, 0xffffffff, 0x00000104,
1019 	0x264c, 0x000c0000, 0x000c0000,
1020 	0x2648, 0x000c0000, 0x000c0000,
1021 	0x2f50, 0x00000001, 0x00000001,
1022 	0x30cc, 0xc0000fff, 0x00000104,
1023 	0xc1e4, 0x00000001, 0x00000001,
1024 	0xd0c0, 0xfffffff0, 0x00000100,
1025 	0xd8c0, 0xfffffff0, 0x00000100
1026 };
1027 
1028 static u32 verde_pg_init[] =
1029 {
1030 	0x353c, 0xffffffff, 0x40000,
1031 	0x3538, 0xffffffff, 0x200010ff,
1032 	0x353c, 0xffffffff, 0x0,
1033 	0x353c, 0xffffffff, 0x0,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x7007,
1038 	0x3538, 0xffffffff, 0x300010ff,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x0,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x400000,
1045 	0x3538, 0xffffffff, 0x100010ff,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x0,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x120200,
1052 	0x3538, 0xffffffff, 0x500010ff,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x0,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x1e1e16,
1059 	0x3538, 0xffffffff, 0x600010ff,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x0,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x171f1e,
1066 	0x3538, 0xffffffff, 0x700010ff,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x0,
1069 	0x353c, 0xffffffff, 0x0,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x3538, 0xffffffff, 0x9ff,
1074 	0x3500, 0xffffffff, 0x0,
1075 	0x3504, 0xffffffff, 0x10000800,
1076 	0x3504, 0xffffffff, 0xf,
1077 	0x3504, 0xffffffff, 0xf,
1078 	0x3500, 0xffffffff, 0x4,
1079 	0x3504, 0xffffffff, 0x1000051e,
1080 	0x3504, 0xffffffff, 0xffff,
1081 	0x3504, 0xffffffff, 0xffff,
1082 	0x3500, 0xffffffff, 0x8,
1083 	0x3504, 0xffffffff, 0x80500,
1084 	0x3500, 0xffffffff, 0x12,
1085 	0x3504, 0xffffffff, 0x9050c,
1086 	0x3500, 0xffffffff, 0x1d,
1087 	0x3504, 0xffffffff, 0xb052c,
1088 	0x3500, 0xffffffff, 0x2a,
1089 	0x3504, 0xffffffff, 0x1053e,
1090 	0x3500, 0xffffffff, 0x2d,
1091 	0x3504, 0xffffffff, 0x10546,
1092 	0x3500, 0xffffffff, 0x30,
1093 	0x3504, 0xffffffff, 0xa054e,
1094 	0x3500, 0xffffffff, 0x3c,
1095 	0x3504, 0xffffffff, 0x1055f,
1096 	0x3500, 0xffffffff, 0x3f,
1097 	0x3504, 0xffffffff, 0x10567,
1098 	0x3500, 0xffffffff, 0x42,
1099 	0x3504, 0xffffffff, 0x1056f,
1100 	0x3500, 0xffffffff, 0x45,
1101 	0x3504, 0xffffffff, 0x10572,
1102 	0x3500, 0xffffffff, 0x48,
1103 	0x3504, 0xffffffff, 0x20575,
1104 	0x3500, 0xffffffff, 0x4c,
1105 	0x3504, 0xffffffff, 0x190801,
1106 	0x3500, 0xffffffff, 0x67,
1107 	0x3504, 0xffffffff, 0x1082a,
1108 	0x3500, 0xffffffff, 0x6a,
1109 	0x3504, 0xffffffff, 0x1b082d,
1110 	0x3500, 0xffffffff, 0x87,
1111 	0x3504, 0xffffffff, 0x310851,
1112 	0x3500, 0xffffffff, 0xba,
1113 	0x3504, 0xffffffff, 0x891,
1114 	0x3500, 0xffffffff, 0xbc,
1115 	0x3504, 0xffffffff, 0x893,
1116 	0x3500, 0xffffffff, 0xbe,
1117 	0x3504, 0xffffffff, 0x20895,
1118 	0x3500, 0xffffffff, 0xc2,
1119 	0x3504, 0xffffffff, 0x20899,
1120 	0x3500, 0xffffffff, 0xc6,
1121 	0x3504, 0xffffffff, 0x2089d,
1122 	0x3500, 0xffffffff, 0xca,
1123 	0x3504, 0xffffffff, 0x8a1,
1124 	0x3500, 0xffffffff, 0xcc,
1125 	0x3504, 0xffffffff, 0x8a3,
1126 	0x3500, 0xffffffff, 0xce,
1127 	0x3504, 0xffffffff, 0x308a5,
1128 	0x3500, 0xffffffff, 0xd3,
1129 	0x3504, 0xffffffff, 0x6d08cd,
1130 	0x3500, 0xffffffff, 0x142,
1131 	0x3504, 0xffffffff, 0x2000095a,
1132 	0x3504, 0xffffffff, 0x1,
1133 	0x3500, 0xffffffff, 0x144,
1134 	0x3504, 0xffffffff, 0x301f095b,
1135 	0x3500, 0xffffffff, 0x165,
1136 	0x3504, 0xffffffff, 0xc094d,
1137 	0x3500, 0xffffffff, 0x173,
1138 	0x3504, 0xffffffff, 0xf096d,
1139 	0x3500, 0xffffffff, 0x184,
1140 	0x3504, 0xffffffff, 0x15097f,
1141 	0x3500, 0xffffffff, 0x19b,
1142 	0x3504, 0xffffffff, 0xc0998,
1143 	0x3500, 0xffffffff, 0x1a9,
1144 	0x3504, 0xffffffff, 0x409a7,
1145 	0x3500, 0xffffffff, 0x1af,
1146 	0x3504, 0xffffffff, 0xcdc,
1147 	0x3500, 0xffffffff, 0x1b1,
1148 	0x3504, 0xffffffff, 0x800,
1149 	0x3508, 0xffffffff, 0x6c9b2000,
1150 	0x3510, 0xfc00, 0x2000,
1151 	0x3544, 0xffffffff, 0xfc0,
1152 	0x28d4, 0x00000100, 0x100
1153 };
1154 
1155 static void si_init_golden_registers(struct radeon_device *rdev)
1156 {
1157 	switch (rdev->family) {
1158 	case CHIP_TAHITI:
1159 		radeon_program_register_sequence(rdev,
1160 						 tahiti_golden_registers,
1161 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1162 		radeon_program_register_sequence(rdev,
1163 						 tahiti_golden_rlc_registers,
1164 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1165 		radeon_program_register_sequence(rdev,
1166 						 tahiti_mgcg_cgcg_init,
1167 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1168 		radeon_program_register_sequence(rdev,
1169 						 tahiti_golden_registers2,
1170 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1171 		break;
1172 	case CHIP_PITCAIRN:
1173 		radeon_program_register_sequence(rdev,
1174 						 pitcairn_golden_registers,
1175 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1176 		radeon_program_register_sequence(rdev,
1177 						 pitcairn_golden_rlc_registers,
1178 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1179 		radeon_program_register_sequence(rdev,
1180 						 pitcairn_mgcg_cgcg_init,
1181 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1182 		break;
1183 	case CHIP_VERDE:
1184 		radeon_program_register_sequence(rdev,
1185 						 verde_golden_registers,
1186 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1187 		radeon_program_register_sequence(rdev,
1188 						 verde_golden_rlc_registers,
1189 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1190 		radeon_program_register_sequence(rdev,
1191 						 verde_mgcg_cgcg_init,
1192 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1193 		radeon_program_register_sequence(rdev,
1194 						 verde_pg_init,
1195 						 (const u32)ARRAY_SIZE(verde_pg_init));
1196 		break;
1197 	case CHIP_OLAND:
1198 		radeon_program_register_sequence(rdev,
1199 						 oland_golden_registers,
1200 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1201 		radeon_program_register_sequence(rdev,
1202 						 oland_golden_rlc_registers,
1203 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 oland_mgcg_cgcg_init,
1206 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1207 		break;
1208 	case CHIP_HAINAN:
1209 		radeon_program_register_sequence(rdev,
1210 						 hainan_golden_registers,
1211 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1212 		radeon_program_register_sequence(rdev,
1213 						 hainan_golden_registers2,
1214 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1215 		radeon_program_register_sequence(rdev,
1216 						 hainan_mgcg_cgcg_init,
1217 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1218 		break;
1219 	default:
1220 		break;
1221 	}
1222 }
1223 
1224 #define PCIE_BUS_CLK                10000
1225 #define TCLK                        (PCIE_BUS_CLK / 10)
1226 
1227 /**
1228  * si_get_xclk - get the xclk
1229  *
1230  * @rdev: radeon_device pointer
1231  *
1232  * Returns the reference clock used by the gfx engine
1233  * (SI).
1234  */
1235 u32 si_get_xclk(struct radeon_device *rdev)
1236 {
1237         u32 reference_clock = rdev->clock.spll.reference_freq;
1238 	u32 tmp;
1239 
1240 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1241 	if (tmp & MUX_TCLK_TO_XCLK)
1242 		return TCLK;
1243 
1244 	tmp = RREG32(CG_CLKPIN_CNTL);
1245 	if (tmp & XTALIN_DIVIDE)
1246 		return reference_clock / 4;
1247 
1248 	return reference_clock;
1249 }
1250 
1251 /* get temperature in millidegrees */
1252 int si_get_temp(struct radeon_device *rdev)
1253 {
1254 	u32 temp;
1255 	int actual_temp = 0;
1256 
1257 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1258 		CTF_TEMP_SHIFT;
1259 
1260 	if (temp & 0x200)
1261 		actual_temp = 255;
1262 	else
1263 		actual_temp = temp & 0x1ff;
1264 
1265 	actual_temp = (actual_temp * 1000);
1266 
1267 	return actual_temp;
1268 }
1269 
1270 #define TAHITI_IO_MC_REGS_SIZE 36
1271 
1272 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1273 	{0x0000006f, 0x03044000},
1274 	{0x00000070, 0x0480c018},
1275 	{0x00000071, 0x00000040},
1276 	{0x00000072, 0x01000000},
1277 	{0x00000074, 0x000000ff},
1278 	{0x00000075, 0x00143400},
1279 	{0x00000076, 0x08ec0800},
1280 	{0x00000077, 0x040000cc},
1281 	{0x00000079, 0x00000000},
1282 	{0x0000007a, 0x21000409},
1283 	{0x0000007c, 0x00000000},
1284 	{0x0000007d, 0xe8000000},
1285 	{0x0000007e, 0x044408a8},
1286 	{0x0000007f, 0x00000003},
1287 	{0x00000080, 0x00000000},
1288 	{0x00000081, 0x01000000},
1289 	{0x00000082, 0x02000000},
1290 	{0x00000083, 0x00000000},
1291 	{0x00000084, 0xe3f3e4f4},
1292 	{0x00000085, 0x00052024},
1293 	{0x00000087, 0x00000000},
1294 	{0x00000088, 0x66036603},
1295 	{0x00000089, 0x01000000},
1296 	{0x0000008b, 0x1c0a0000},
1297 	{0x0000008c, 0xff010000},
1298 	{0x0000008e, 0xffffefff},
1299 	{0x0000008f, 0xfff3efff},
1300 	{0x00000090, 0xfff3efbf},
1301 	{0x00000094, 0x00101101},
1302 	{0x00000095, 0x00000fff},
1303 	{0x00000096, 0x00116fff},
1304 	{0x00000097, 0x60010000},
1305 	{0x00000098, 0x10010000},
1306 	{0x00000099, 0x00006000},
1307 	{0x0000009a, 0x00001000},
1308 	{0x0000009f, 0x00a77400}
1309 };
1310 
1311 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1312 	{0x0000006f, 0x03044000},
1313 	{0x00000070, 0x0480c018},
1314 	{0x00000071, 0x00000040},
1315 	{0x00000072, 0x01000000},
1316 	{0x00000074, 0x000000ff},
1317 	{0x00000075, 0x00143400},
1318 	{0x00000076, 0x08ec0800},
1319 	{0x00000077, 0x040000cc},
1320 	{0x00000079, 0x00000000},
1321 	{0x0000007a, 0x21000409},
1322 	{0x0000007c, 0x00000000},
1323 	{0x0000007d, 0xe8000000},
1324 	{0x0000007e, 0x044408a8},
1325 	{0x0000007f, 0x00000003},
1326 	{0x00000080, 0x00000000},
1327 	{0x00000081, 0x01000000},
1328 	{0x00000082, 0x02000000},
1329 	{0x00000083, 0x00000000},
1330 	{0x00000084, 0xe3f3e4f4},
1331 	{0x00000085, 0x00052024},
1332 	{0x00000087, 0x00000000},
1333 	{0x00000088, 0x66036603},
1334 	{0x00000089, 0x01000000},
1335 	{0x0000008b, 0x1c0a0000},
1336 	{0x0000008c, 0xff010000},
1337 	{0x0000008e, 0xffffefff},
1338 	{0x0000008f, 0xfff3efff},
1339 	{0x00000090, 0xfff3efbf},
1340 	{0x00000094, 0x00101101},
1341 	{0x00000095, 0x00000fff},
1342 	{0x00000096, 0x00116fff},
1343 	{0x00000097, 0x60010000},
1344 	{0x00000098, 0x10010000},
1345 	{0x00000099, 0x00006000},
1346 	{0x0000009a, 0x00001000},
1347 	{0x0000009f, 0x00a47400}
1348 };
1349 
1350 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351 	{0x0000006f, 0x03044000},
1352 	{0x00000070, 0x0480c018},
1353 	{0x00000071, 0x00000040},
1354 	{0x00000072, 0x01000000},
1355 	{0x00000074, 0x000000ff},
1356 	{0x00000075, 0x00143400},
1357 	{0x00000076, 0x08ec0800},
1358 	{0x00000077, 0x040000cc},
1359 	{0x00000079, 0x00000000},
1360 	{0x0000007a, 0x21000409},
1361 	{0x0000007c, 0x00000000},
1362 	{0x0000007d, 0xe8000000},
1363 	{0x0000007e, 0x044408a8},
1364 	{0x0000007f, 0x00000003},
1365 	{0x00000080, 0x00000000},
1366 	{0x00000081, 0x01000000},
1367 	{0x00000082, 0x02000000},
1368 	{0x00000083, 0x00000000},
1369 	{0x00000084, 0xe3f3e4f4},
1370 	{0x00000085, 0x00052024},
1371 	{0x00000087, 0x00000000},
1372 	{0x00000088, 0x66036603},
1373 	{0x00000089, 0x01000000},
1374 	{0x0000008b, 0x1c0a0000},
1375 	{0x0000008c, 0xff010000},
1376 	{0x0000008e, 0xffffefff},
1377 	{0x0000008f, 0xfff3efff},
1378 	{0x00000090, 0xfff3efbf},
1379 	{0x00000094, 0x00101101},
1380 	{0x00000095, 0x00000fff},
1381 	{0x00000096, 0x00116fff},
1382 	{0x00000097, 0x60010000},
1383 	{0x00000098, 0x10010000},
1384 	{0x00000099, 0x00006000},
1385 	{0x0000009a, 0x00001000},
1386 	{0x0000009f, 0x00a37400}
1387 };
1388 
1389 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390 	{0x0000006f, 0x03044000},
1391 	{0x00000070, 0x0480c018},
1392 	{0x00000071, 0x00000040},
1393 	{0x00000072, 0x01000000},
1394 	{0x00000074, 0x000000ff},
1395 	{0x00000075, 0x00143400},
1396 	{0x00000076, 0x08ec0800},
1397 	{0x00000077, 0x040000cc},
1398 	{0x00000079, 0x00000000},
1399 	{0x0000007a, 0x21000409},
1400 	{0x0000007c, 0x00000000},
1401 	{0x0000007d, 0xe8000000},
1402 	{0x0000007e, 0x044408a8},
1403 	{0x0000007f, 0x00000003},
1404 	{0x00000080, 0x00000000},
1405 	{0x00000081, 0x01000000},
1406 	{0x00000082, 0x02000000},
1407 	{0x00000083, 0x00000000},
1408 	{0x00000084, 0xe3f3e4f4},
1409 	{0x00000085, 0x00052024},
1410 	{0x00000087, 0x00000000},
1411 	{0x00000088, 0x66036603},
1412 	{0x00000089, 0x01000000},
1413 	{0x0000008b, 0x1c0a0000},
1414 	{0x0000008c, 0xff010000},
1415 	{0x0000008e, 0xffffefff},
1416 	{0x0000008f, 0xfff3efff},
1417 	{0x00000090, 0xfff3efbf},
1418 	{0x00000094, 0x00101101},
1419 	{0x00000095, 0x00000fff},
1420 	{0x00000096, 0x00116fff},
1421 	{0x00000097, 0x60010000},
1422 	{0x00000098, 0x10010000},
1423 	{0x00000099, 0x00006000},
1424 	{0x0000009a, 0x00001000},
1425 	{0x0000009f, 0x00a17730}
1426 };
1427 
1428 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429 	{0x0000006f, 0x03044000},
1430 	{0x00000070, 0x0480c018},
1431 	{0x00000071, 0x00000040},
1432 	{0x00000072, 0x01000000},
1433 	{0x00000074, 0x000000ff},
1434 	{0x00000075, 0x00143400},
1435 	{0x00000076, 0x08ec0800},
1436 	{0x00000077, 0x040000cc},
1437 	{0x00000079, 0x00000000},
1438 	{0x0000007a, 0x21000409},
1439 	{0x0000007c, 0x00000000},
1440 	{0x0000007d, 0xe8000000},
1441 	{0x0000007e, 0x044408a8},
1442 	{0x0000007f, 0x00000003},
1443 	{0x00000080, 0x00000000},
1444 	{0x00000081, 0x01000000},
1445 	{0x00000082, 0x02000000},
1446 	{0x00000083, 0x00000000},
1447 	{0x00000084, 0xe3f3e4f4},
1448 	{0x00000085, 0x00052024},
1449 	{0x00000087, 0x00000000},
1450 	{0x00000088, 0x66036603},
1451 	{0x00000089, 0x01000000},
1452 	{0x0000008b, 0x1c0a0000},
1453 	{0x0000008c, 0xff010000},
1454 	{0x0000008e, 0xffffefff},
1455 	{0x0000008f, 0xfff3efff},
1456 	{0x00000090, 0xfff3efbf},
1457 	{0x00000094, 0x00101101},
1458 	{0x00000095, 0x00000fff},
1459 	{0x00000096, 0x00116fff},
1460 	{0x00000097, 0x60010000},
1461 	{0x00000098, 0x10010000},
1462 	{0x00000099, 0x00006000},
1463 	{0x0000009a, 0x00001000},
1464 	{0x0000009f, 0x00a07730}
1465 };
1466 
1467 /* ucode loading */
1468 static int si_mc_load_microcode(struct radeon_device *rdev)
1469 {
1470 	const __be32 *fw_data;
1471 	u32 running, blackout = 0;
1472 	u32 *io_mc_regs;
1473 	int i, ucode_size, regs_size;
1474 
1475 	if (!rdev->mc_fw)
1476 		return -EINVAL;
1477 
1478 	switch (rdev->family) {
1479 	case CHIP_TAHITI:
1480 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1481 		ucode_size = SI_MC_UCODE_SIZE;
1482 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1483 		break;
1484 	case CHIP_PITCAIRN:
1485 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1486 		ucode_size = SI_MC_UCODE_SIZE;
1487 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1488 		break;
1489 	case CHIP_VERDE:
1490 	default:
1491 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1492 		ucode_size = SI_MC_UCODE_SIZE;
1493 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1494 		break;
1495 	case CHIP_OLAND:
1496 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1497 		ucode_size = OLAND_MC_UCODE_SIZE;
1498 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1499 		break;
1500 	case CHIP_HAINAN:
1501 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1502 		ucode_size = OLAND_MC_UCODE_SIZE;
1503 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1504 		break;
1505 	}
1506 
1507 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1508 
1509 	if (running == 0) {
1510 		if (running) {
1511 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1512 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1513 		}
1514 
1515 		/* reset the engine and set to writable */
1516 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1517 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1518 
1519 		/* load mc io regs */
1520 		for (i = 0; i < regs_size; i++) {
1521 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1522 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1523 		}
1524 		/* load the MC ucode */
1525 		fw_data = (const __be32 *)rdev->mc_fw->data;
1526 		for (i = 0; i < ucode_size; i++)
1527 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1528 
1529 		/* put the engine back into the active state */
1530 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1531 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1532 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1533 
1534 		/* wait for training to complete */
1535 		for (i = 0; i < rdev->usec_timeout; i++) {
1536 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1537 				break;
1538 			udelay(1);
1539 		}
1540 		for (i = 0; i < rdev->usec_timeout; i++) {
1541 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1542 				break;
1543 			udelay(1);
1544 		}
1545 
1546 		if (running)
1547 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1548 	}
1549 
1550 	return 0;
1551 }
1552 
1553 static int si_init_microcode(struct radeon_device *rdev)
1554 {
1555 	const char *chip_name;
1556 	const char *rlc_chip_name;
1557 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1558 	size_t smc_req_size;
1559 	char fw_name[30];
1560 	int err;
1561 
1562 	DRM_DEBUG("\n");
1563 
1564 	switch (rdev->family) {
1565 	case CHIP_TAHITI:
1566 		chip_name = "TAHITI";
1567 		rlc_chip_name = "TAHITI";
1568 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1570 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1571 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1573 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1574 		break;
1575 	case CHIP_PITCAIRN:
1576 		chip_name = "PITCAIRN";
1577 		rlc_chip_name = "PITCAIRN";
1578 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1580 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1581 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1583 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1584 		break;
1585 	case CHIP_VERDE:
1586 		chip_name = "VERDE";
1587 		rlc_chip_name = "VERDE";
1588 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1589 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1590 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1591 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1592 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1593 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1594 		break;
1595 	case CHIP_OLAND:
1596 		chip_name = "OLAND";
1597 		rlc_chip_name = "OLAND";
1598 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1599 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1600 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1601 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1602 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1603 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1604 		break;
1605 	case CHIP_HAINAN:
1606 		chip_name = "HAINAN";
1607 		rlc_chip_name = "HAINAN";
1608 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1609 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1610 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1611 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1612 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1613 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1614 		break;
1615 	default: BUG();
1616 	}
1617 
1618 	DRM_INFO("Loading %s Microcode\n", chip_name);
1619 
1620 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1621 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1622 	if (err)
1623 		goto out;
1624 	if (rdev->pfp_fw->size != pfp_req_size) {
1625 		printk(KERN_ERR
1626 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627 		       rdev->pfp_fw->size, fw_name);
1628 		err = -EINVAL;
1629 		goto out;
1630 	}
1631 
1632 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1633 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1634 	if (err)
1635 		goto out;
1636 	if (rdev->me_fw->size != me_req_size) {
1637 		printk(KERN_ERR
1638 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1639 		       rdev->me_fw->size, fw_name);
1640 		err = -EINVAL;
1641 	}
1642 
1643 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1644 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1645 	if (err)
1646 		goto out;
1647 	if (rdev->ce_fw->size != ce_req_size) {
1648 		printk(KERN_ERR
1649 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1650 		       rdev->ce_fw->size, fw_name);
1651 		err = -EINVAL;
1652 	}
1653 
1654 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1655 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 	if (err)
1657 		goto out;
1658 	if (rdev->rlc_fw->size != rlc_req_size) {
1659 		printk(KERN_ERR
1660 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 		       rdev->rlc_fw->size, fw_name);
1662 		err = -EINVAL;
1663 	}
1664 
1665 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1666 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1667 	if (err)
1668 		goto out;
1669 	if (rdev->mc_fw->size != mc_req_size) {
1670 		printk(KERN_ERR
1671 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1672 		       rdev->mc_fw->size, fw_name);
1673 		err = -EINVAL;
1674 	}
1675 
1676 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1677 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1678 	if (err) {
1679 		printk(KERN_ERR
1680 		       "smc: error loading firmware \"%s\"\n",
1681 		       fw_name);
1682 		release_firmware(rdev->smc_fw);
1683 		rdev->smc_fw = NULL;
1684 	} else if (rdev->smc_fw->size != smc_req_size) {
1685 		printk(KERN_ERR
1686 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1687 		       rdev->smc_fw->size, fw_name);
1688 		err = -EINVAL;
1689 	}
1690 
1691 out:
1692 	if (err) {
1693 		if (err != -EINVAL)
1694 			printk(KERN_ERR
1695 			       "si_cp: Failed to load firmware \"%s\"\n",
1696 			       fw_name);
1697 		release_firmware(rdev->pfp_fw);
1698 		rdev->pfp_fw = NULL;
1699 		release_firmware(rdev->me_fw);
1700 		rdev->me_fw = NULL;
1701 		release_firmware(rdev->ce_fw);
1702 		rdev->ce_fw = NULL;
1703 		release_firmware(rdev->rlc_fw);
1704 		rdev->rlc_fw = NULL;
1705 		release_firmware(rdev->mc_fw);
1706 		rdev->mc_fw = NULL;
1707 		release_firmware(rdev->smc_fw);
1708 		rdev->smc_fw = NULL;
1709 	}
1710 	return err;
1711 }
1712 
1713 /* watermark setup */
1714 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1715 				   struct radeon_crtc *radeon_crtc,
1716 				   struct drm_display_mode *mode,
1717 				   struct drm_display_mode *other_mode)
1718 {
1719 	u32 tmp, buffer_alloc, i;
1720 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1721 	/*
1722 	 * Line Buffer Setup
1723 	 * There are 3 line buffers, each one shared by 2 display controllers.
1724 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1725 	 * the display controllers.  The paritioning is done via one of four
1726 	 * preset allocations specified in bits 21:20:
1727 	 *  0 - half lb
1728 	 *  2 - whole lb, other crtc must be disabled
1729 	 */
1730 	/* this can get tricky if we have two large displays on a paired group
1731 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1732 	 * non-linked crtcs for maximum line buffer allocation.
1733 	 */
1734 	if (radeon_crtc->base.enabled && mode) {
1735 		if (other_mode) {
1736 			tmp = 0; /* 1/2 */
1737 			buffer_alloc = 1;
1738 		} else {
1739 			tmp = 2; /* whole */
1740 			buffer_alloc = 2;
1741 		}
1742 	} else {
1743 		tmp = 0;
1744 		buffer_alloc = 0;
1745 	}
1746 
1747 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1748 	       DC_LB_MEMORY_CONFIG(tmp));
1749 
1750 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1751 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1752 	for (i = 0; i < rdev->usec_timeout; i++) {
1753 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1754 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1755 			break;
1756 		udelay(1);
1757 	}
1758 
1759 	if (radeon_crtc->base.enabled && mode) {
1760 		switch (tmp) {
1761 		case 0:
1762 		default:
1763 			return 4096 * 2;
1764 		case 2:
1765 			return 8192 * 2;
1766 		}
1767 	}
1768 
1769 	/* controller not enabled, so no lb used */
1770 	return 0;
1771 }
1772 
1773 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1774 {
1775 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1776 
1777 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1778 	case 0:
1779 	default:
1780 		return 1;
1781 	case 1:
1782 		return 2;
1783 	case 2:
1784 		return 4;
1785 	case 3:
1786 		return 8;
1787 	case 4:
1788 		return 3;
1789 	case 5:
1790 		return 6;
1791 	case 6:
1792 		return 10;
1793 	case 7:
1794 		return 12;
1795 	case 8:
1796 		return 16;
1797 	}
1798 }
1799 
1800 struct dce6_wm_params {
1801 	u32 dram_channels; /* number of dram channels */
1802 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1803 	u32 sclk;          /* engine clock in kHz */
1804 	u32 disp_clk;      /* display clock in kHz */
1805 	u32 src_width;     /* viewport width */
1806 	u32 active_time;   /* active display time in ns */
1807 	u32 blank_time;    /* blank time in ns */
1808 	bool interlaced;    /* mode is interlaced */
1809 	fixed20_12 vsc;    /* vertical scale ratio */
1810 	u32 num_heads;     /* number of active crtcs */
1811 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1812 	u32 lb_size;       /* line buffer allocated to pipe */
1813 	u32 vtaps;         /* vertical scaler taps */
1814 };
1815 
1816 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1817 {
1818 	/* Calculate raw DRAM Bandwidth */
1819 	fixed20_12 dram_efficiency; /* 0.7 */
1820 	fixed20_12 yclk, dram_channels, bandwidth;
1821 	fixed20_12 a;
1822 
1823 	a.full = dfixed_const(1000);
1824 	yclk.full = dfixed_const(wm->yclk);
1825 	yclk.full = dfixed_div(yclk, a);
1826 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1827 	a.full = dfixed_const(10);
1828 	dram_efficiency.full = dfixed_const(7);
1829 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1830 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1831 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1832 
1833 	return dfixed_trunc(bandwidth);
1834 }
1835 
1836 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1837 {
1838 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1839 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1840 	fixed20_12 yclk, dram_channels, bandwidth;
1841 	fixed20_12 a;
1842 
1843 	a.full = dfixed_const(1000);
1844 	yclk.full = dfixed_const(wm->yclk);
1845 	yclk.full = dfixed_div(yclk, a);
1846 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1847 	a.full = dfixed_const(10);
1848 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1849 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1850 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1851 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1852 
1853 	return dfixed_trunc(bandwidth);
1854 }
1855 
1856 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1857 {
1858 	/* Calculate the display Data return Bandwidth */
1859 	fixed20_12 return_efficiency; /* 0.8 */
1860 	fixed20_12 sclk, bandwidth;
1861 	fixed20_12 a;
1862 
1863 	a.full = dfixed_const(1000);
1864 	sclk.full = dfixed_const(wm->sclk);
1865 	sclk.full = dfixed_div(sclk, a);
1866 	a.full = dfixed_const(10);
1867 	return_efficiency.full = dfixed_const(8);
1868 	return_efficiency.full = dfixed_div(return_efficiency, a);
1869 	a.full = dfixed_const(32);
1870 	bandwidth.full = dfixed_mul(a, sclk);
1871 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1872 
1873 	return dfixed_trunc(bandwidth);
1874 }
1875 
1876 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1877 {
1878 	return 32;
1879 }
1880 
1881 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1882 {
1883 	/* Calculate the DMIF Request Bandwidth */
1884 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1885 	fixed20_12 disp_clk, sclk, bandwidth;
1886 	fixed20_12 a, b1, b2;
1887 	u32 min_bandwidth;
1888 
1889 	a.full = dfixed_const(1000);
1890 	disp_clk.full = dfixed_const(wm->disp_clk);
1891 	disp_clk.full = dfixed_div(disp_clk, a);
1892 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1893 	b1.full = dfixed_mul(a, disp_clk);
1894 
1895 	a.full = dfixed_const(1000);
1896 	sclk.full = dfixed_const(wm->sclk);
1897 	sclk.full = dfixed_div(sclk, a);
1898 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1899 	b2.full = dfixed_mul(a, sclk);
1900 
1901 	a.full = dfixed_const(10);
1902 	disp_clk_request_efficiency.full = dfixed_const(8);
1903 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1904 
1905 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1906 
1907 	a.full = dfixed_const(min_bandwidth);
1908 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1909 
1910 	return dfixed_trunc(bandwidth);
1911 }
1912 
1913 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1914 {
1915 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1916 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1917 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1918 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1919 
1920 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1921 }
1922 
1923 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1924 {
1925 	/* Calculate the display mode Average Bandwidth
1926 	 * DisplayMode should contain the source and destination dimensions,
1927 	 * timing, etc.
1928 	 */
1929 	fixed20_12 bpp;
1930 	fixed20_12 line_time;
1931 	fixed20_12 src_width;
1932 	fixed20_12 bandwidth;
1933 	fixed20_12 a;
1934 
1935 	a.full = dfixed_const(1000);
1936 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1937 	line_time.full = dfixed_div(line_time, a);
1938 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1939 	src_width.full = dfixed_const(wm->src_width);
1940 	bandwidth.full = dfixed_mul(src_width, bpp);
1941 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1942 	bandwidth.full = dfixed_div(bandwidth, line_time);
1943 
1944 	return dfixed_trunc(bandwidth);
1945 }
1946 
1947 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1948 {
1949 	/* First calcualte the latency in ns */
1950 	u32 mc_latency = 2000; /* 2000 ns. */
1951 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1952 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1953 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1954 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1955 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1956 		(wm->num_heads * cursor_line_pair_return_time);
1957 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1958 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1959 	u32 tmp, dmif_size = 12288;
1960 	fixed20_12 a, b, c;
1961 
1962 	if (wm->num_heads == 0)
1963 		return 0;
1964 
1965 	a.full = dfixed_const(2);
1966 	b.full = dfixed_const(1);
1967 	if ((wm->vsc.full > a.full) ||
1968 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1969 	    (wm->vtaps >= 5) ||
1970 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1971 		max_src_lines_per_dst_line = 4;
1972 	else
1973 		max_src_lines_per_dst_line = 2;
1974 
1975 	a.full = dfixed_const(available_bandwidth);
1976 	b.full = dfixed_const(wm->num_heads);
1977 	a.full = dfixed_div(a, b);
1978 
1979 	b.full = dfixed_const(mc_latency + 512);
1980 	c.full = dfixed_const(wm->disp_clk);
1981 	b.full = dfixed_div(b, c);
1982 
1983 	c.full = dfixed_const(dmif_size);
1984 	b.full = dfixed_div(c, b);
1985 
1986 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1987 
1988 	b.full = dfixed_const(1000);
1989 	c.full = dfixed_const(wm->disp_clk);
1990 	b.full = dfixed_div(c, b);
1991 	c.full = dfixed_const(wm->bytes_per_pixel);
1992 	b.full = dfixed_mul(b, c);
1993 
1994 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1995 
1996 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1997 	b.full = dfixed_const(1000);
1998 	c.full = dfixed_const(lb_fill_bw);
1999 	b.full = dfixed_div(c, b);
2000 	a.full = dfixed_div(a, b);
2001 	line_fill_time = dfixed_trunc(a);
2002 
2003 	if (line_fill_time < wm->active_time)
2004 		return latency;
2005 	else
2006 		return latency + (line_fill_time - wm->active_time);
2007 
2008 }
2009 
2010 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2011 {
2012 	if (dce6_average_bandwidth(wm) <=
2013 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2014 		return true;
2015 	else
2016 		return false;
2017 };
2018 
2019 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2020 {
2021 	if (dce6_average_bandwidth(wm) <=
2022 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2023 		return true;
2024 	else
2025 		return false;
2026 };
2027 
2028 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2029 {
2030 	u32 lb_partitions = wm->lb_size / wm->src_width;
2031 	u32 line_time = wm->active_time + wm->blank_time;
2032 	u32 latency_tolerant_lines;
2033 	u32 latency_hiding;
2034 	fixed20_12 a;
2035 
2036 	a.full = dfixed_const(1);
2037 	if (wm->vsc.full > a.full)
2038 		latency_tolerant_lines = 1;
2039 	else {
2040 		if (lb_partitions <= (wm->vtaps + 1))
2041 			latency_tolerant_lines = 1;
2042 		else
2043 			latency_tolerant_lines = 2;
2044 	}
2045 
2046 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2047 
2048 	if (dce6_latency_watermark(wm) <= latency_hiding)
2049 		return true;
2050 	else
2051 		return false;
2052 }
2053 
2054 static void dce6_program_watermarks(struct radeon_device *rdev,
2055 					 struct radeon_crtc *radeon_crtc,
2056 					 u32 lb_size, u32 num_heads)
2057 {
2058 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2059 	struct dce6_wm_params wm_low, wm_high;
2060 	u32 dram_channels;
2061 	u32 pixel_period;
2062 	u32 line_time = 0;
2063 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2064 	u32 priority_a_mark = 0, priority_b_mark = 0;
2065 	u32 priority_a_cnt = PRIORITY_OFF;
2066 	u32 priority_b_cnt = PRIORITY_OFF;
2067 	u32 tmp, arb_control3;
2068 	fixed20_12 a, b, c;
2069 
2070 	if (radeon_crtc->base.enabled && num_heads && mode) {
2071 		pixel_period = 1000000 / (u32)mode->clock;
2072 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2073 		priority_a_cnt = 0;
2074 		priority_b_cnt = 0;
2075 
2076 		if (rdev->family == CHIP_ARUBA)
2077 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2078 		else
2079 			dram_channels = si_get_number_of_dram_channels(rdev);
2080 
2081 		/* watermark for high clocks */
2082 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2083 			wm_high.yclk =
2084 				radeon_dpm_get_mclk(rdev, false) * 10;
2085 			wm_high.sclk =
2086 				radeon_dpm_get_sclk(rdev, false) * 10;
2087 		} else {
2088 			wm_high.yclk = rdev->pm.current_mclk * 10;
2089 			wm_high.sclk = rdev->pm.current_sclk * 10;
2090 		}
2091 
2092 		wm_high.disp_clk = mode->clock;
2093 		wm_high.src_width = mode->crtc_hdisplay;
2094 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2095 		wm_high.blank_time = line_time - wm_high.active_time;
2096 		wm_high.interlaced = false;
2097 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2098 			wm_high.interlaced = true;
2099 		wm_high.vsc = radeon_crtc->vsc;
2100 		wm_high.vtaps = 1;
2101 		if (radeon_crtc->rmx_type != RMX_OFF)
2102 			wm_high.vtaps = 2;
2103 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2104 		wm_high.lb_size = lb_size;
2105 		wm_high.dram_channels = dram_channels;
2106 		wm_high.num_heads = num_heads;
2107 
2108 		/* watermark for low clocks */
2109 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2110 			wm_low.yclk =
2111 				radeon_dpm_get_mclk(rdev, true) * 10;
2112 			wm_low.sclk =
2113 				radeon_dpm_get_sclk(rdev, true) * 10;
2114 		} else {
2115 			wm_low.yclk = rdev->pm.current_mclk * 10;
2116 			wm_low.sclk = rdev->pm.current_sclk * 10;
2117 		}
2118 
2119 		wm_low.disp_clk = mode->clock;
2120 		wm_low.src_width = mode->crtc_hdisplay;
2121 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2122 		wm_low.blank_time = line_time - wm_low.active_time;
2123 		wm_low.interlaced = false;
2124 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2125 			wm_low.interlaced = true;
2126 		wm_low.vsc = radeon_crtc->vsc;
2127 		wm_low.vtaps = 1;
2128 		if (radeon_crtc->rmx_type != RMX_OFF)
2129 			wm_low.vtaps = 2;
2130 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2131 		wm_low.lb_size = lb_size;
2132 		wm_low.dram_channels = dram_channels;
2133 		wm_low.num_heads = num_heads;
2134 
2135 		/* set for high clocks */
2136 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2137 		/* set for low clocks */
2138 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2139 
2140 		/* possibly force display priority to high */
2141 		/* should really do this at mode validation time... */
2142 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2143 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2144 		    !dce6_check_latency_hiding(&wm_high) ||
2145 		    (rdev->disp_priority == 2)) {
2146 			DRM_DEBUG_KMS("force priority to high\n");
2147 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2148 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2149 		}
2150 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2151 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2152 		    !dce6_check_latency_hiding(&wm_low) ||
2153 		    (rdev->disp_priority == 2)) {
2154 			DRM_DEBUG_KMS("force priority to high\n");
2155 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2156 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2157 		}
2158 
2159 		a.full = dfixed_const(1000);
2160 		b.full = dfixed_const(mode->clock);
2161 		b.full = dfixed_div(b, a);
2162 		c.full = dfixed_const(latency_watermark_a);
2163 		c.full = dfixed_mul(c, b);
2164 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2165 		c.full = dfixed_div(c, a);
2166 		a.full = dfixed_const(16);
2167 		c.full = dfixed_div(c, a);
2168 		priority_a_mark = dfixed_trunc(c);
2169 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2170 
2171 		a.full = dfixed_const(1000);
2172 		b.full = dfixed_const(mode->clock);
2173 		b.full = dfixed_div(b, a);
2174 		c.full = dfixed_const(latency_watermark_b);
2175 		c.full = dfixed_mul(c, b);
2176 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2177 		c.full = dfixed_div(c, a);
2178 		a.full = dfixed_const(16);
2179 		c.full = dfixed_div(c, a);
2180 		priority_b_mark = dfixed_trunc(c);
2181 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2182 	}
2183 
2184 	/* select wm A */
2185 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2186 	tmp = arb_control3;
2187 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2188 	tmp |= LATENCY_WATERMARK_MASK(1);
2189 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2190 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2191 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2192 		LATENCY_HIGH_WATERMARK(line_time)));
2193 	/* select wm B */
2194 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2195 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2196 	tmp |= LATENCY_WATERMARK_MASK(2);
2197 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2198 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2199 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2200 		LATENCY_HIGH_WATERMARK(line_time)));
2201 	/* restore original selection */
2202 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2203 
2204 	/* write the priority marks */
2205 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2206 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2207 
2208 	/* save values for DPM */
2209 	radeon_crtc->line_time = line_time;
2210 	radeon_crtc->wm_high = latency_watermark_a;
2211 	radeon_crtc->wm_low = latency_watermark_b;
2212 }
2213 
2214 void dce6_bandwidth_update(struct radeon_device *rdev)
2215 {
2216 	struct drm_display_mode *mode0 = NULL;
2217 	struct drm_display_mode *mode1 = NULL;
2218 	u32 num_heads = 0, lb_size;
2219 	int i;
2220 
2221 	radeon_update_display_priority(rdev);
2222 
2223 	for (i = 0; i < rdev->num_crtc; i++) {
2224 		if (rdev->mode_info.crtcs[i]->base.enabled)
2225 			num_heads++;
2226 	}
2227 	for (i = 0; i < rdev->num_crtc; i += 2) {
2228 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2229 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2230 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2231 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2232 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2233 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2234 	}
2235 }
2236 
2237 /*
2238  * Core functions
2239  */
2240 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2241 {
2242 	const u32 num_tile_mode_states = 32;
2243 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2244 
2245 	switch (rdev->config.si.mem_row_size_in_kb) {
2246 	case 1:
2247 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2248 		break;
2249 	case 2:
2250 	default:
2251 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2252 		break;
2253 	case 4:
2254 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2255 		break;
2256 	}
2257 
2258 	if ((rdev->family == CHIP_TAHITI) ||
2259 	    (rdev->family == CHIP_PITCAIRN)) {
2260 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2261 			switch (reg_offset) {
2262 			case 0:  /* non-AA compressed depth or any compressed stencil */
2263 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2265 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2266 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2267 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2268 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2271 				break;
2272 			case 1:  /* 2xAA/4xAA compressed depth only */
2273 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2275 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2276 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2277 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2278 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2281 				break;
2282 			case 2:  /* 8xAA compressed depth only */
2283 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2285 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2286 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2287 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2288 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2291 				break;
2292 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2293 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2295 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2296 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2298 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2301 				break;
2302 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2303 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2305 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2306 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2307 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2308 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2310 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2311 				break;
2312 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2313 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2315 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2316 						 TILE_SPLIT(split_equal_to_row_size) |
2317 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2318 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2320 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2321 				break;
2322 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2323 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326 						 TILE_SPLIT(split_equal_to_row_size) |
2327 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2328 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2331 				break;
2332 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336 						 TILE_SPLIT(split_equal_to_row_size) |
2337 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2338 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2341 				break;
2342 			case 8:  /* 1D and 1D Array Surfaces */
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2344 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2346 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2347 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2348 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2350 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2351 				break;
2352 			case 9:  /* Displayable maps. */
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2356 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2357 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2358 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2361 				break;
2362 			case 10:  /* Display 8bpp. */
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2367 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2368 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2371 				break;
2372 			case 11:  /* Display 16bpp. */
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2376 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2377 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2378 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2381 				break;
2382 			case 12:  /* Display 32bpp. */
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2386 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2387 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2388 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2391 				break;
2392 			case 13:  /* Thin. */
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2396 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2397 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2398 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2401 				break;
2402 			case 14:  /* Thin 8 bpp. */
2403 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2405 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2406 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2407 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2408 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2411 				break;
2412 			case 15:  /* Thin 16 bpp. */
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2415 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2416 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2417 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2418 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2420 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2421 				break;
2422 			case 16:  /* Thin 32 bpp. */
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2425 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2426 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2427 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2428 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2431 				break;
2432 			case 17:  /* Thin 64 bpp. */
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436 						 TILE_SPLIT(split_equal_to_row_size) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2438 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2441 				break;
2442 			case 21:  /* 8 bpp PRT. */
2443 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2445 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2447 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2448 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451 				break;
2452 			case 22:  /* 16 bpp PRT */
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2458 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2461 				break;
2462 			case 23:  /* 32 bpp PRT */
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2467 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2468 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471 				break;
2472 			case 24:  /* 64 bpp PRT */
2473 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2474 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2475 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2477 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2478 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481 				break;
2482 			case 25:  /* 128 bpp PRT */
2483 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2485 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2487 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2488 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2491 				break;
2492 			default:
2493 				gb_tile_moden = 0;
2494 				break;
2495 			}
2496 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2497 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2498 		}
2499 	} else if ((rdev->family == CHIP_VERDE) ||
2500 		   (rdev->family == CHIP_OLAND) ||
2501 		   (rdev->family == CHIP_HAINAN)) {
2502 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2503 			switch (reg_offset) {
2504 			case 0:  /* non-AA compressed depth or any compressed stencil */
2505 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2508 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2509 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2510 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2513 				break;
2514 			case 1:  /* 2xAA/4xAA compressed depth only */
2515 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2518 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2520 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2523 				break;
2524 			case 2:  /* 8xAA compressed depth only */
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2530 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2533 				break;
2534 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2535 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2538 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2539 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2540 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2542 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2543 				break;
2544 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2549 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2550 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553 				break;
2554 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558 						 TILE_SPLIT(split_equal_to_row_size) |
2559 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2560 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 				break;
2564 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2565 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568 						 TILE_SPLIT(split_equal_to_row_size) |
2569 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2570 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573 				break;
2574 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2575 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2577 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578 						 TILE_SPLIT(split_equal_to_row_size) |
2579 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2580 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2583 				break;
2584 			case 8:  /* 1D and 1D Array Surfaces */
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2589 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2590 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2593 				break;
2594 			case 9:  /* Displayable maps. */
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2599 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2600 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603 				break;
2604 			case 10:  /* Display 8bpp. */
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2610 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2613 				break;
2614 			case 11:  /* Display 16bpp. */
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2620 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623 				break;
2624 			case 12:  /* Display 32bpp. */
2625 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2629 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2630 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633 				break;
2634 			case 13:  /* Thin. */
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2639 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2640 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643 				break;
2644 			case 14:  /* Thin 8 bpp. */
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2649 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2650 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653 				break;
2654 			case 15:  /* Thin 16 bpp. */
2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2660 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663 				break;
2664 			case 16:  /* Thin 32 bpp. */
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2670 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673 				break;
2674 			case 17:  /* Thin 64 bpp. */
2675 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 						 TILE_SPLIT(split_equal_to_row_size) |
2679 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2680 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2683 				break;
2684 			case 21:  /* 8 bpp PRT. */
2685 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2690 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693 				break;
2694 			case 22:  /* 16 bpp PRT */
2695 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2700 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703 				break;
2704 			case 23:  /* 32 bpp PRT */
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2710 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2713 				break;
2714 			case 24:  /* 64 bpp PRT */
2715 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2717 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2718 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2720 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723 				break;
2724 			case 25:  /* 128 bpp PRT */
2725 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2727 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2728 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2729 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2730 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2733 				break;
2734 			default:
2735 				gb_tile_moden = 0;
2736 				break;
2737 			}
2738 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2739 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2740 		}
2741 	} else
2742 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2743 }
2744 
2745 static void si_select_se_sh(struct radeon_device *rdev,
2746 			    u32 se_num, u32 sh_num)
2747 {
2748 	u32 data = INSTANCE_BROADCAST_WRITES;
2749 
2750 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2751 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2752 	else if (se_num == 0xffffffff)
2753 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2754 	else if (sh_num == 0xffffffff)
2755 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2756 	else
2757 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2758 	WREG32(GRBM_GFX_INDEX, data);
2759 }
2760 
2761 static u32 si_create_bitmask(u32 bit_width)
2762 {
2763 	u32 i, mask = 0;
2764 
2765 	for (i = 0; i < bit_width; i++) {
2766 		mask <<= 1;
2767 		mask |= 1;
2768 	}
2769 	return mask;
2770 }
2771 
2772 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2773 {
2774 	u32 data, mask;
2775 
2776 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2777 	if (data & 1)
2778 		data &= INACTIVE_CUS_MASK;
2779 	else
2780 		data = 0;
2781 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2782 
2783 	data >>= INACTIVE_CUS_SHIFT;
2784 
2785 	mask = si_create_bitmask(cu_per_sh);
2786 
2787 	return ~data & mask;
2788 }
2789 
2790 static void si_setup_spi(struct radeon_device *rdev,
2791 			 u32 se_num, u32 sh_per_se,
2792 			 u32 cu_per_sh)
2793 {
2794 	int i, j, k;
2795 	u32 data, mask, active_cu;
2796 
2797 	for (i = 0; i < se_num; i++) {
2798 		for (j = 0; j < sh_per_se; j++) {
2799 			si_select_se_sh(rdev, i, j);
2800 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2801 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2802 
2803 			mask = 1;
2804 			for (k = 0; k < 16; k++) {
2805 				mask <<= k;
2806 				if (active_cu & mask) {
2807 					data &= ~mask;
2808 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2809 					break;
2810 				}
2811 			}
2812 		}
2813 	}
2814 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2815 }
2816 
2817 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2818 			      u32 max_rb_num, u32 se_num,
2819 			      u32 sh_per_se)
2820 {
2821 	u32 data, mask;
2822 
2823 	data = RREG32(CC_RB_BACKEND_DISABLE);
2824 	if (data & 1)
2825 		data &= BACKEND_DISABLE_MASK;
2826 	else
2827 		data = 0;
2828 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2829 
2830 	data >>= BACKEND_DISABLE_SHIFT;
2831 
2832 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2833 
2834 	return data & mask;
2835 }
2836 
2837 static void si_setup_rb(struct radeon_device *rdev,
2838 			u32 se_num, u32 sh_per_se,
2839 			u32 max_rb_num)
2840 {
2841 	int i, j;
2842 	u32 data, mask;
2843 	u32 disabled_rbs = 0;
2844 	u32 enabled_rbs = 0;
2845 
2846 	for (i = 0; i < se_num; i++) {
2847 		for (j = 0; j < sh_per_se; j++) {
2848 			si_select_se_sh(rdev, i, j);
2849 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2850 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2851 		}
2852 	}
2853 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2854 
2855 	mask = 1;
2856 	for (i = 0; i < max_rb_num; i++) {
2857 		if (!(disabled_rbs & mask))
2858 			enabled_rbs |= mask;
2859 		mask <<= 1;
2860 	}
2861 
2862 	for (i = 0; i < se_num; i++) {
2863 		si_select_se_sh(rdev, i, 0xffffffff);
2864 		data = 0;
2865 		for (j = 0; j < sh_per_se; j++) {
2866 			switch (enabled_rbs & 3) {
2867 			case 1:
2868 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2869 				break;
2870 			case 2:
2871 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2872 				break;
2873 			case 3:
2874 			default:
2875 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2876 				break;
2877 			}
2878 			enabled_rbs >>= 2;
2879 		}
2880 		WREG32(PA_SC_RASTER_CONFIG, data);
2881 	}
2882 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2883 }
2884 
2885 static void si_gpu_init(struct radeon_device *rdev)
2886 {
2887 	u32 gb_addr_config = 0;
2888 	u32 mc_shared_chmap, mc_arb_ramcfg;
2889 	u32 sx_debug_1;
2890 	u32 hdp_host_path_cntl;
2891 	u32 tmp;
2892 	int i, j;
2893 
2894 	switch (rdev->family) {
2895 	case CHIP_TAHITI:
2896 		rdev->config.si.max_shader_engines = 2;
2897 		rdev->config.si.max_tile_pipes = 12;
2898 		rdev->config.si.max_cu_per_sh = 8;
2899 		rdev->config.si.max_sh_per_se = 2;
2900 		rdev->config.si.max_backends_per_se = 4;
2901 		rdev->config.si.max_texture_channel_caches = 12;
2902 		rdev->config.si.max_gprs = 256;
2903 		rdev->config.si.max_gs_threads = 32;
2904 		rdev->config.si.max_hw_contexts = 8;
2905 
2906 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2907 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2908 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2909 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2910 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2911 		break;
2912 	case CHIP_PITCAIRN:
2913 		rdev->config.si.max_shader_engines = 2;
2914 		rdev->config.si.max_tile_pipes = 8;
2915 		rdev->config.si.max_cu_per_sh = 5;
2916 		rdev->config.si.max_sh_per_se = 2;
2917 		rdev->config.si.max_backends_per_se = 4;
2918 		rdev->config.si.max_texture_channel_caches = 8;
2919 		rdev->config.si.max_gprs = 256;
2920 		rdev->config.si.max_gs_threads = 32;
2921 		rdev->config.si.max_hw_contexts = 8;
2922 
2923 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2924 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2925 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2926 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2927 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2928 		break;
2929 	case CHIP_VERDE:
2930 	default:
2931 		rdev->config.si.max_shader_engines = 1;
2932 		rdev->config.si.max_tile_pipes = 4;
2933 		rdev->config.si.max_cu_per_sh = 5;
2934 		rdev->config.si.max_sh_per_se = 2;
2935 		rdev->config.si.max_backends_per_se = 4;
2936 		rdev->config.si.max_texture_channel_caches = 4;
2937 		rdev->config.si.max_gprs = 256;
2938 		rdev->config.si.max_gs_threads = 32;
2939 		rdev->config.si.max_hw_contexts = 8;
2940 
2941 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2942 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2943 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2944 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2945 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2946 		break;
2947 	case CHIP_OLAND:
2948 		rdev->config.si.max_shader_engines = 1;
2949 		rdev->config.si.max_tile_pipes = 4;
2950 		rdev->config.si.max_cu_per_sh = 6;
2951 		rdev->config.si.max_sh_per_se = 1;
2952 		rdev->config.si.max_backends_per_se = 2;
2953 		rdev->config.si.max_texture_channel_caches = 4;
2954 		rdev->config.si.max_gprs = 256;
2955 		rdev->config.si.max_gs_threads = 16;
2956 		rdev->config.si.max_hw_contexts = 8;
2957 
2958 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2959 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2960 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2961 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2962 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2963 		break;
2964 	case CHIP_HAINAN:
2965 		rdev->config.si.max_shader_engines = 1;
2966 		rdev->config.si.max_tile_pipes = 4;
2967 		rdev->config.si.max_cu_per_sh = 5;
2968 		rdev->config.si.max_sh_per_se = 1;
2969 		rdev->config.si.max_backends_per_se = 1;
2970 		rdev->config.si.max_texture_channel_caches = 2;
2971 		rdev->config.si.max_gprs = 256;
2972 		rdev->config.si.max_gs_threads = 16;
2973 		rdev->config.si.max_hw_contexts = 8;
2974 
2975 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2976 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2977 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2978 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2979 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2980 		break;
2981 	}
2982 
2983 	/* Initialize HDP */
2984 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2985 		WREG32((0x2c14 + j), 0x00000000);
2986 		WREG32((0x2c18 + j), 0x00000000);
2987 		WREG32((0x2c1c + j), 0x00000000);
2988 		WREG32((0x2c20 + j), 0x00000000);
2989 		WREG32((0x2c24 + j), 0x00000000);
2990 	}
2991 
2992 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2993 
2994 	evergreen_fix_pci_max_read_req_size(rdev);
2995 
2996 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2997 
2998 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2999 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3000 
3001 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3002 	rdev->config.si.mem_max_burst_length_bytes = 256;
3003 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3004 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3005 	if (rdev->config.si.mem_row_size_in_kb > 4)
3006 		rdev->config.si.mem_row_size_in_kb = 4;
3007 	/* XXX use MC settings? */
3008 	rdev->config.si.shader_engine_tile_size = 32;
3009 	rdev->config.si.num_gpus = 1;
3010 	rdev->config.si.multi_gpu_tile_size = 64;
3011 
3012 	/* fix up row size */
3013 	gb_addr_config &= ~ROW_SIZE_MASK;
3014 	switch (rdev->config.si.mem_row_size_in_kb) {
3015 	case 1:
3016 	default:
3017 		gb_addr_config |= ROW_SIZE(0);
3018 		break;
3019 	case 2:
3020 		gb_addr_config |= ROW_SIZE(1);
3021 		break;
3022 	case 4:
3023 		gb_addr_config |= ROW_SIZE(2);
3024 		break;
3025 	}
3026 
3027 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3028 	 * not have bank info, so create a custom tiling dword.
3029 	 * bits 3:0   num_pipes
3030 	 * bits 7:4   num_banks
3031 	 * bits 11:8  group_size
3032 	 * bits 15:12 row_size
3033 	 */
3034 	rdev->config.si.tile_config = 0;
3035 	switch (rdev->config.si.num_tile_pipes) {
3036 	case 1:
3037 		rdev->config.si.tile_config |= (0 << 0);
3038 		break;
3039 	case 2:
3040 		rdev->config.si.tile_config |= (1 << 0);
3041 		break;
3042 	case 4:
3043 		rdev->config.si.tile_config |= (2 << 0);
3044 		break;
3045 	case 8:
3046 	default:
3047 		/* XXX what about 12? */
3048 		rdev->config.si.tile_config |= (3 << 0);
3049 		break;
3050 	}
3051 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3052 	case 0: /* four banks */
3053 		rdev->config.si.tile_config |= 0 << 4;
3054 		break;
3055 	case 1: /* eight banks */
3056 		rdev->config.si.tile_config |= 1 << 4;
3057 		break;
3058 	case 2: /* sixteen banks */
3059 	default:
3060 		rdev->config.si.tile_config |= 2 << 4;
3061 		break;
3062 	}
3063 	rdev->config.si.tile_config |=
3064 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3065 	rdev->config.si.tile_config |=
3066 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3067 
3068 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3069 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3070 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3071 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3072 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3073 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3074 	if (rdev->has_uvd) {
3075 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3076 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3077 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3078 	}
3079 
3080 	si_tiling_mode_table_init(rdev);
3081 
3082 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3083 		    rdev->config.si.max_sh_per_se,
3084 		    rdev->config.si.max_backends_per_se);
3085 
3086 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3087 		     rdev->config.si.max_sh_per_se,
3088 		     rdev->config.si.max_cu_per_sh);
3089 
3090 
3091 	/* set HW defaults for 3D engine */
3092 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3093 				     ROQ_IB2_START(0x2b)));
3094 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3095 
3096 	sx_debug_1 = RREG32(SX_DEBUG_1);
3097 	WREG32(SX_DEBUG_1, sx_debug_1);
3098 
3099 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3100 
3101 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3102 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3103 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3104 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3105 
3106 	WREG32(VGT_NUM_INSTANCES, 1);
3107 
3108 	WREG32(CP_PERFMON_CNTL, 0);
3109 
3110 	WREG32(SQ_CONFIG, 0);
3111 
3112 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3113 					  FORCE_EOV_MAX_REZ_CNT(255)));
3114 
3115 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3116 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3117 
3118 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3119 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3120 
3121 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3122 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3123 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3124 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3125 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3126 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3127 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3128 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3129 
3130 	tmp = RREG32(HDP_MISC_CNTL);
3131 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3132 	WREG32(HDP_MISC_CNTL, tmp);
3133 
3134 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3135 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3136 
3137 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3138 
3139 	udelay(50);
3140 }
3141 
3142 /*
3143  * GPU scratch registers helpers function.
3144  */
3145 static void si_scratch_init(struct radeon_device *rdev)
3146 {
3147 	int i;
3148 
3149 	rdev->scratch.num_reg = 7;
3150 	rdev->scratch.reg_base = SCRATCH_REG0;
3151 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3152 		rdev->scratch.free[i] = true;
3153 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3154 	}
3155 }
3156 
3157 void si_fence_ring_emit(struct radeon_device *rdev,
3158 			struct radeon_fence *fence)
3159 {
3160 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3161 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3162 
3163 	/* flush read cache over gart */
3164 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3165 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3166 	radeon_ring_write(ring, 0);
3167 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3168 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3169 			  PACKET3_TC_ACTION_ENA |
3170 			  PACKET3_SH_KCACHE_ACTION_ENA |
3171 			  PACKET3_SH_ICACHE_ACTION_ENA);
3172 	radeon_ring_write(ring, 0xFFFFFFFF);
3173 	radeon_ring_write(ring, 0);
3174 	radeon_ring_write(ring, 10); /* poll interval */
3175 	/* EVENT_WRITE_EOP - flush caches, send int */
3176 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3177 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3178 	radeon_ring_write(ring, addr & 0xffffffff);
3179 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3180 	radeon_ring_write(ring, fence->seq);
3181 	radeon_ring_write(ring, 0);
3182 }
3183 
3184 /*
3185  * IB stuff
3186  */
3187 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3188 {
3189 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3190 	u32 header;
3191 
3192 	if (ib->is_const_ib) {
3193 		/* set switch buffer packet before const IB */
3194 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3195 		radeon_ring_write(ring, 0);
3196 
3197 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3198 	} else {
3199 		u32 next_rptr;
3200 		if (ring->rptr_save_reg) {
3201 			next_rptr = ring->wptr + 3 + 4 + 8;
3202 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3203 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3204 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3205 			radeon_ring_write(ring, next_rptr);
3206 		} else if (rdev->wb.enabled) {
3207 			next_rptr = ring->wptr + 5 + 4 + 8;
3208 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3209 			radeon_ring_write(ring, (1 << 8));
3210 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212 			radeon_ring_write(ring, next_rptr);
3213 		}
3214 
3215 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3216 	}
3217 
3218 	radeon_ring_write(ring, header);
3219 	radeon_ring_write(ring,
3220 #ifdef __BIG_ENDIAN
3221 			  (2 << 0) |
3222 #endif
3223 			  (ib->gpu_addr & 0xFFFFFFFC));
3224 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3225 	radeon_ring_write(ring, ib->length_dw |
3226 			  (ib->vm ? (ib->vm->id << 24) : 0));
3227 
3228 	if (!ib->is_const_ib) {
3229 		/* flush read cache over gart for this vmid */
3230 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3231 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3232 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3233 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3234 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3235 				  PACKET3_TC_ACTION_ENA |
3236 				  PACKET3_SH_KCACHE_ACTION_ENA |
3237 				  PACKET3_SH_ICACHE_ACTION_ENA);
3238 		radeon_ring_write(ring, 0xFFFFFFFF);
3239 		radeon_ring_write(ring, 0);
3240 		radeon_ring_write(ring, 10); /* poll interval */
3241 	}
3242 }
3243 
3244 /*
3245  * CP.
3246  */
3247 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3248 {
3249 	if (enable)
3250 		WREG32(CP_ME_CNTL, 0);
3251 	else {
3252 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3253 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3254 		WREG32(SCRATCH_UMSK, 0);
3255 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3256 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3257 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3258 	}
3259 	udelay(50);
3260 }
3261 
3262 static int si_cp_load_microcode(struct radeon_device *rdev)
3263 {
3264 	const __be32 *fw_data;
3265 	int i;
3266 
3267 	if (!rdev->me_fw || !rdev->pfp_fw)
3268 		return -EINVAL;
3269 
3270 	si_cp_enable(rdev, false);
3271 
3272 	/* PFP */
3273 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3274 	WREG32(CP_PFP_UCODE_ADDR, 0);
3275 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3276 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3277 	WREG32(CP_PFP_UCODE_ADDR, 0);
3278 
3279 	/* CE */
3280 	fw_data = (const __be32 *)rdev->ce_fw->data;
3281 	WREG32(CP_CE_UCODE_ADDR, 0);
3282 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3283 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3284 	WREG32(CP_CE_UCODE_ADDR, 0);
3285 
3286 	/* ME */
3287 	fw_data = (const __be32 *)rdev->me_fw->data;
3288 	WREG32(CP_ME_RAM_WADDR, 0);
3289 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3290 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3291 	WREG32(CP_ME_RAM_WADDR, 0);
3292 
3293 	WREG32(CP_PFP_UCODE_ADDR, 0);
3294 	WREG32(CP_CE_UCODE_ADDR, 0);
3295 	WREG32(CP_ME_RAM_WADDR, 0);
3296 	WREG32(CP_ME_RAM_RADDR, 0);
3297 	return 0;
3298 }
3299 
3300 static int si_cp_start(struct radeon_device *rdev)
3301 {
3302 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3303 	int r, i;
3304 
3305 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3306 	if (r) {
3307 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3308 		return r;
3309 	}
3310 	/* init the CP */
3311 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3312 	radeon_ring_write(ring, 0x1);
3313 	radeon_ring_write(ring, 0x0);
3314 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3315 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3316 	radeon_ring_write(ring, 0);
3317 	radeon_ring_write(ring, 0);
3318 
3319 	/* init the CE partitions */
3320 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3321 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3322 	radeon_ring_write(ring, 0xc000);
3323 	radeon_ring_write(ring, 0xe000);
3324 	radeon_ring_unlock_commit(rdev, ring);
3325 
3326 	si_cp_enable(rdev, true);
3327 
3328 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3329 	if (r) {
3330 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3331 		return r;
3332 	}
3333 
3334 	/* setup clear context state */
3335 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3336 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3337 
3338 	for (i = 0; i < si_default_size; i++)
3339 		radeon_ring_write(ring, si_default_state[i]);
3340 
3341 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3343 
3344 	/* set clear context state */
3345 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3346 	radeon_ring_write(ring, 0);
3347 
3348 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3349 	radeon_ring_write(ring, 0x00000316);
3350 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3351 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3352 
3353 	radeon_ring_unlock_commit(rdev, ring);
3354 
3355 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3356 		ring = &rdev->ring[i];
3357 		r = radeon_ring_lock(rdev, ring, 2);
3358 
3359 		/* clear the compute context state */
3360 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3361 		radeon_ring_write(ring, 0);
3362 
3363 		radeon_ring_unlock_commit(rdev, ring);
3364 	}
3365 
3366 	return 0;
3367 }
3368 
3369 static void si_cp_fini(struct radeon_device *rdev)
3370 {
3371 	struct radeon_ring *ring;
3372 	si_cp_enable(rdev, false);
3373 
3374 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3375 	radeon_ring_fini(rdev, ring);
3376 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3377 
3378 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3379 	radeon_ring_fini(rdev, ring);
3380 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3381 
3382 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3383 	radeon_ring_fini(rdev, ring);
3384 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3385 }
3386 
3387 static int si_cp_resume(struct radeon_device *rdev)
3388 {
3389 	struct radeon_ring *ring;
3390 	u32 tmp;
3391 	u32 rb_bufsz;
3392 	int r;
3393 
3394 	si_enable_gui_idle_interrupt(rdev, false);
3395 
3396 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3397 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3398 
3399 	/* Set the write pointer delay */
3400 	WREG32(CP_RB_WPTR_DELAY, 0);
3401 
3402 	WREG32(CP_DEBUG, 0);
3403 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3404 
3405 	/* ring 0 - compute and gfx */
3406 	/* Set ring buffer size */
3407 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3408 	rb_bufsz = order_base_2(ring->ring_size / 8);
3409 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3410 #ifdef __BIG_ENDIAN
3411 	tmp |= BUF_SWAP_32BIT;
3412 #endif
3413 	WREG32(CP_RB0_CNTL, tmp);
3414 
3415 	/* Initialize the ring buffer's read and write pointers */
3416 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3417 	ring->wptr = 0;
3418 	WREG32(CP_RB0_WPTR, ring->wptr);
3419 
3420 	/* set the wb address whether it's enabled or not */
3421 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3422 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3423 
3424 	if (rdev->wb.enabled)
3425 		WREG32(SCRATCH_UMSK, 0xff);
3426 	else {
3427 		tmp |= RB_NO_UPDATE;
3428 		WREG32(SCRATCH_UMSK, 0);
3429 	}
3430 
3431 	mdelay(1);
3432 	WREG32(CP_RB0_CNTL, tmp);
3433 
3434 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3435 
3436 	ring->rptr = RREG32(CP_RB0_RPTR);
3437 
3438 	/* ring1  - compute only */
3439 	/* Set ring buffer size */
3440 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3441 	rb_bufsz = order_base_2(ring->ring_size / 8);
3442 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3443 #ifdef __BIG_ENDIAN
3444 	tmp |= BUF_SWAP_32BIT;
3445 #endif
3446 	WREG32(CP_RB1_CNTL, tmp);
3447 
3448 	/* Initialize the ring buffer's read and write pointers */
3449 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3450 	ring->wptr = 0;
3451 	WREG32(CP_RB1_WPTR, ring->wptr);
3452 
3453 	/* set the wb address whether it's enabled or not */
3454 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3455 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3456 
3457 	mdelay(1);
3458 	WREG32(CP_RB1_CNTL, tmp);
3459 
3460 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3461 
3462 	ring->rptr = RREG32(CP_RB1_RPTR);
3463 
3464 	/* ring2 - compute only */
3465 	/* Set ring buffer size */
3466 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3467 	rb_bufsz = order_base_2(ring->ring_size / 8);
3468 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3469 #ifdef __BIG_ENDIAN
3470 	tmp |= BUF_SWAP_32BIT;
3471 #endif
3472 	WREG32(CP_RB2_CNTL, tmp);
3473 
3474 	/* Initialize the ring buffer's read and write pointers */
3475 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3476 	ring->wptr = 0;
3477 	WREG32(CP_RB2_WPTR, ring->wptr);
3478 
3479 	/* set the wb address whether it's enabled or not */
3480 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3481 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3482 
3483 	mdelay(1);
3484 	WREG32(CP_RB2_CNTL, tmp);
3485 
3486 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3487 
3488 	ring->rptr = RREG32(CP_RB2_RPTR);
3489 
3490 	/* start the rings */
3491 	si_cp_start(rdev);
3492 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3493 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3494 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3495 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3496 	if (r) {
3497 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3498 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3499 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3500 		return r;
3501 	}
3502 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3503 	if (r) {
3504 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3505 	}
3506 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3507 	if (r) {
3508 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3509 	}
3510 
3511 	si_enable_gui_idle_interrupt(rdev, true);
3512 
3513 	return 0;
3514 }
3515 
3516 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3517 {
3518 	u32 reset_mask = 0;
3519 	u32 tmp;
3520 
3521 	/* GRBM_STATUS */
3522 	tmp = RREG32(GRBM_STATUS);
3523 	if (tmp & (PA_BUSY | SC_BUSY |
3524 		   BCI_BUSY | SX_BUSY |
3525 		   TA_BUSY | VGT_BUSY |
3526 		   DB_BUSY | CB_BUSY |
3527 		   GDS_BUSY | SPI_BUSY |
3528 		   IA_BUSY | IA_BUSY_NO_DMA))
3529 		reset_mask |= RADEON_RESET_GFX;
3530 
3531 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3532 		   CP_BUSY | CP_COHERENCY_BUSY))
3533 		reset_mask |= RADEON_RESET_CP;
3534 
3535 	if (tmp & GRBM_EE_BUSY)
3536 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3537 
3538 	/* GRBM_STATUS2 */
3539 	tmp = RREG32(GRBM_STATUS2);
3540 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3541 		reset_mask |= RADEON_RESET_RLC;
3542 
3543 	/* DMA_STATUS_REG 0 */
3544 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3545 	if (!(tmp & DMA_IDLE))
3546 		reset_mask |= RADEON_RESET_DMA;
3547 
3548 	/* DMA_STATUS_REG 1 */
3549 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3550 	if (!(tmp & DMA_IDLE))
3551 		reset_mask |= RADEON_RESET_DMA1;
3552 
3553 	/* SRBM_STATUS2 */
3554 	tmp = RREG32(SRBM_STATUS2);
3555 	if (tmp & DMA_BUSY)
3556 		reset_mask |= RADEON_RESET_DMA;
3557 
3558 	if (tmp & DMA1_BUSY)
3559 		reset_mask |= RADEON_RESET_DMA1;
3560 
3561 	/* SRBM_STATUS */
3562 	tmp = RREG32(SRBM_STATUS);
3563 
3564 	if (tmp & IH_BUSY)
3565 		reset_mask |= RADEON_RESET_IH;
3566 
3567 	if (tmp & SEM_BUSY)
3568 		reset_mask |= RADEON_RESET_SEM;
3569 
3570 	if (tmp & GRBM_RQ_PENDING)
3571 		reset_mask |= RADEON_RESET_GRBM;
3572 
3573 	if (tmp & VMC_BUSY)
3574 		reset_mask |= RADEON_RESET_VMC;
3575 
3576 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3577 		   MCC_BUSY | MCD_BUSY))
3578 		reset_mask |= RADEON_RESET_MC;
3579 
3580 	if (evergreen_is_display_hung(rdev))
3581 		reset_mask |= RADEON_RESET_DISPLAY;
3582 
3583 	/* VM_L2_STATUS */
3584 	tmp = RREG32(VM_L2_STATUS);
3585 	if (tmp & L2_BUSY)
3586 		reset_mask |= RADEON_RESET_VMC;
3587 
3588 	/* Skip MC reset as it's mostly likely not hung, just busy */
3589 	if (reset_mask & RADEON_RESET_MC) {
3590 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3591 		reset_mask &= ~RADEON_RESET_MC;
3592 	}
3593 
3594 	return reset_mask;
3595 }
3596 
3597 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3598 {
3599 	struct evergreen_mc_save save;
3600 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3601 	u32 tmp;
3602 
3603 	if (reset_mask == 0)
3604 		return;
3605 
3606 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3607 
3608 	evergreen_print_gpu_status_regs(rdev);
3609 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3610 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3611 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3612 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3613 
3614 	/* disable PG/CG */
3615 	si_fini_pg(rdev);
3616 	si_fini_cg(rdev);
3617 
3618 	/* stop the rlc */
3619 	si_rlc_stop(rdev);
3620 
3621 	/* Disable CP parsing/prefetching */
3622 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3623 
3624 	if (reset_mask & RADEON_RESET_DMA) {
3625 		/* dma0 */
3626 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3627 		tmp &= ~DMA_RB_ENABLE;
3628 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3629 	}
3630 	if (reset_mask & RADEON_RESET_DMA1) {
3631 		/* dma1 */
3632 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3633 		tmp &= ~DMA_RB_ENABLE;
3634 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3635 	}
3636 
3637 	udelay(50);
3638 
3639 	evergreen_mc_stop(rdev, &save);
3640 	if (evergreen_mc_wait_for_idle(rdev)) {
3641 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3642 	}
3643 
3644 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3645 		grbm_soft_reset = SOFT_RESET_CB |
3646 			SOFT_RESET_DB |
3647 			SOFT_RESET_GDS |
3648 			SOFT_RESET_PA |
3649 			SOFT_RESET_SC |
3650 			SOFT_RESET_BCI |
3651 			SOFT_RESET_SPI |
3652 			SOFT_RESET_SX |
3653 			SOFT_RESET_TC |
3654 			SOFT_RESET_TA |
3655 			SOFT_RESET_VGT |
3656 			SOFT_RESET_IA;
3657 	}
3658 
3659 	if (reset_mask & RADEON_RESET_CP) {
3660 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3661 
3662 		srbm_soft_reset |= SOFT_RESET_GRBM;
3663 	}
3664 
3665 	if (reset_mask & RADEON_RESET_DMA)
3666 		srbm_soft_reset |= SOFT_RESET_DMA;
3667 
3668 	if (reset_mask & RADEON_RESET_DMA1)
3669 		srbm_soft_reset |= SOFT_RESET_DMA1;
3670 
3671 	if (reset_mask & RADEON_RESET_DISPLAY)
3672 		srbm_soft_reset |= SOFT_RESET_DC;
3673 
3674 	if (reset_mask & RADEON_RESET_RLC)
3675 		grbm_soft_reset |= SOFT_RESET_RLC;
3676 
3677 	if (reset_mask & RADEON_RESET_SEM)
3678 		srbm_soft_reset |= SOFT_RESET_SEM;
3679 
3680 	if (reset_mask & RADEON_RESET_IH)
3681 		srbm_soft_reset |= SOFT_RESET_IH;
3682 
3683 	if (reset_mask & RADEON_RESET_GRBM)
3684 		srbm_soft_reset |= SOFT_RESET_GRBM;
3685 
3686 	if (reset_mask & RADEON_RESET_VMC)
3687 		srbm_soft_reset |= SOFT_RESET_VMC;
3688 
3689 	if (reset_mask & RADEON_RESET_MC)
3690 		srbm_soft_reset |= SOFT_RESET_MC;
3691 
3692 	if (grbm_soft_reset) {
3693 		tmp = RREG32(GRBM_SOFT_RESET);
3694 		tmp |= grbm_soft_reset;
3695 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3696 		WREG32(GRBM_SOFT_RESET, tmp);
3697 		tmp = RREG32(GRBM_SOFT_RESET);
3698 
3699 		udelay(50);
3700 
3701 		tmp &= ~grbm_soft_reset;
3702 		WREG32(GRBM_SOFT_RESET, tmp);
3703 		tmp = RREG32(GRBM_SOFT_RESET);
3704 	}
3705 
3706 	if (srbm_soft_reset) {
3707 		tmp = RREG32(SRBM_SOFT_RESET);
3708 		tmp |= srbm_soft_reset;
3709 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3710 		WREG32(SRBM_SOFT_RESET, tmp);
3711 		tmp = RREG32(SRBM_SOFT_RESET);
3712 
3713 		udelay(50);
3714 
3715 		tmp &= ~srbm_soft_reset;
3716 		WREG32(SRBM_SOFT_RESET, tmp);
3717 		tmp = RREG32(SRBM_SOFT_RESET);
3718 	}
3719 
3720 	/* Wait a little for things to settle down */
3721 	udelay(50);
3722 
3723 	evergreen_mc_resume(rdev, &save);
3724 	udelay(50);
3725 
3726 	evergreen_print_gpu_status_regs(rdev);
3727 }
3728 
3729 int si_asic_reset(struct radeon_device *rdev)
3730 {
3731 	u32 reset_mask;
3732 
3733 	reset_mask = si_gpu_check_soft_reset(rdev);
3734 
3735 	if (reset_mask)
3736 		r600_set_bios_scratch_engine_hung(rdev, true);
3737 
3738 	si_gpu_soft_reset(rdev, reset_mask);
3739 
3740 	reset_mask = si_gpu_check_soft_reset(rdev);
3741 
3742 	if (!reset_mask)
3743 		r600_set_bios_scratch_engine_hung(rdev, false);
3744 
3745 	return 0;
3746 }
3747 
3748 /**
3749  * si_gfx_is_lockup - Check if the GFX engine is locked up
3750  *
3751  * @rdev: radeon_device pointer
3752  * @ring: radeon_ring structure holding ring information
3753  *
3754  * Check if the GFX engine is locked up.
3755  * Returns true if the engine appears to be locked up, false if not.
3756  */
3757 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3758 {
3759 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3760 
3761 	if (!(reset_mask & (RADEON_RESET_GFX |
3762 			    RADEON_RESET_COMPUTE |
3763 			    RADEON_RESET_CP))) {
3764 		radeon_ring_lockup_update(ring);
3765 		return false;
3766 	}
3767 	/* force CP activities */
3768 	radeon_ring_force_activity(rdev, ring);
3769 	return radeon_ring_test_lockup(rdev, ring);
3770 }
3771 
3772 /* MC */
3773 static void si_mc_program(struct radeon_device *rdev)
3774 {
3775 	struct evergreen_mc_save save;
3776 	u32 tmp;
3777 	int i, j;
3778 
3779 	/* Initialize HDP */
3780 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3781 		WREG32((0x2c14 + j), 0x00000000);
3782 		WREG32((0x2c18 + j), 0x00000000);
3783 		WREG32((0x2c1c + j), 0x00000000);
3784 		WREG32((0x2c20 + j), 0x00000000);
3785 		WREG32((0x2c24 + j), 0x00000000);
3786 	}
3787 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3788 
3789 	evergreen_mc_stop(rdev, &save);
3790 	if (radeon_mc_wait_for_idle(rdev)) {
3791 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3792 	}
3793 	if (!ASIC_IS_NODCE(rdev))
3794 		/* Lockout access through VGA aperture*/
3795 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3796 	/* Update configuration */
3797 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3798 	       rdev->mc.vram_start >> 12);
3799 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3800 	       rdev->mc.vram_end >> 12);
3801 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3802 	       rdev->vram_scratch.gpu_addr >> 12);
3803 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3804 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3805 	WREG32(MC_VM_FB_LOCATION, tmp);
3806 	/* XXX double check these! */
3807 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3808 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3809 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3810 	WREG32(MC_VM_AGP_BASE, 0);
3811 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3812 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3813 	if (radeon_mc_wait_for_idle(rdev)) {
3814 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3815 	}
3816 	evergreen_mc_resume(rdev, &save);
3817 	if (!ASIC_IS_NODCE(rdev)) {
3818 		/* we need to own VRAM, so turn off the VGA renderer here
3819 		 * to stop it overwriting our objects */
3820 		rv515_vga_render_disable(rdev);
3821 	}
3822 }
3823 
3824 void si_vram_gtt_location(struct radeon_device *rdev,
3825 			  struct radeon_mc *mc)
3826 {
3827 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3828 		/* leave room for at least 1024M GTT */
3829 		dev_warn(rdev->dev, "limiting VRAM\n");
3830 		mc->real_vram_size = 0xFFC0000000ULL;
3831 		mc->mc_vram_size = 0xFFC0000000ULL;
3832 	}
3833 	radeon_vram_location(rdev, &rdev->mc, 0);
3834 	rdev->mc.gtt_base_align = 0;
3835 	radeon_gtt_location(rdev, mc);
3836 }
3837 
3838 static int si_mc_init(struct radeon_device *rdev)
3839 {
3840 	u32 tmp;
3841 	int chansize, numchan;
3842 
3843 	/* Get VRAM informations */
3844 	rdev->mc.vram_is_ddr = true;
3845 	tmp = RREG32(MC_ARB_RAMCFG);
3846 	if (tmp & CHANSIZE_OVERRIDE) {
3847 		chansize = 16;
3848 	} else if (tmp & CHANSIZE_MASK) {
3849 		chansize = 64;
3850 	} else {
3851 		chansize = 32;
3852 	}
3853 	tmp = RREG32(MC_SHARED_CHMAP);
3854 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3855 	case 0:
3856 	default:
3857 		numchan = 1;
3858 		break;
3859 	case 1:
3860 		numchan = 2;
3861 		break;
3862 	case 2:
3863 		numchan = 4;
3864 		break;
3865 	case 3:
3866 		numchan = 8;
3867 		break;
3868 	case 4:
3869 		numchan = 3;
3870 		break;
3871 	case 5:
3872 		numchan = 6;
3873 		break;
3874 	case 6:
3875 		numchan = 10;
3876 		break;
3877 	case 7:
3878 		numchan = 12;
3879 		break;
3880 	case 8:
3881 		numchan = 16;
3882 		break;
3883 	}
3884 	rdev->mc.vram_width = numchan * chansize;
3885 	/* Could aper size report 0 ? */
3886 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3887 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3888 	/* size in MB on si */
3889 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3890 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3891 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3892 	si_vram_gtt_location(rdev, &rdev->mc);
3893 	radeon_update_bandwidth_info(rdev);
3894 
3895 	return 0;
3896 }
3897 
3898 /*
3899  * GART
3900  */
3901 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3902 {
3903 	/* flush hdp cache */
3904 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3905 
3906 	/* bits 0-15 are the VM contexts0-15 */
3907 	WREG32(VM_INVALIDATE_REQUEST, 1);
3908 }
3909 
3910 static int si_pcie_gart_enable(struct radeon_device *rdev)
3911 {
3912 	int r, i;
3913 
3914 	if (rdev->gart.robj == NULL) {
3915 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3916 		return -EINVAL;
3917 	}
3918 	r = radeon_gart_table_vram_pin(rdev);
3919 	if (r)
3920 		return r;
3921 	radeon_gart_restore(rdev);
3922 	/* Setup TLB control */
3923 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3924 	       (0xA << 7) |
3925 	       ENABLE_L1_TLB |
3926 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3927 	       ENABLE_ADVANCED_DRIVER_MODEL |
3928 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3929 	/* Setup L2 cache */
3930 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3931 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3932 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3933 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3934 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3935 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3936 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3937 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3938 	/* setup context0 */
3939 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3940 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3941 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3942 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3943 			(u32)(rdev->dummy_page.addr >> 12));
3944 	WREG32(VM_CONTEXT0_CNTL2, 0);
3945 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3946 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3947 
3948 	WREG32(0x15D4, 0);
3949 	WREG32(0x15D8, 0);
3950 	WREG32(0x15DC, 0);
3951 
3952 	/* empty context1-15 */
3953 	/* set vm size, must be a multiple of 4 */
3954 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3955 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3956 	/* Assign the pt base to something valid for now; the pts used for
3957 	 * the VMs are determined by the application and setup and assigned
3958 	 * on the fly in the vm part of radeon_gart.c
3959 	 */
3960 	for (i = 1; i < 16; i++) {
3961 		if (i < 8)
3962 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3963 			       rdev->gart.table_addr >> 12);
3964 		else
3965 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3966 			       rdev->gart.table_addr >> 12);
3967 	}
3968 
3969 	/* enable context1-15 */
3970 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3971 	       (u32)(rdev->dummy_page.addr >> 12));
3972 	WREG32(VM_CONTEXT1_CNTL2, 4);
3973 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3974 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3975 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3976 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3977 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3978 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3979 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3980 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3981 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3982 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3983 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3984 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3985 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3986 
3987 	si_pcie_gart_tlb_flush(rdev);
3988 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3989 		 (unsigned)(rdev->mc.gtt_size >> 20),
3990 		 (unsigned long long)rdev->gart.table_addr);
3991 	rdev->gart.ready = true;
3992 	return 0;
3993 }
3994 
3995 static void si_pcie_gart_disable(struct radeon_device *rdev)
3996 {
3997 	/* Disable all tables */
3998 	WREG32(VM_CONTEXT0_CNTL, 0);
3999 	WREG32(VM_CONTEXT1_CNTL, 0);
4000 	/* Setup TLB control */
4001 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4002 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4003 	/* Setup L2 cache */
4004 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4005 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4006 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4007 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4008 	WREG32(VM_L2_CNTL2, 0);
4009 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4010 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4011 	radeon_gart_table_vram_unpin(rdev);
4012 }
4013 
4014 static void si_pcie_gart_fini(struct radeon_device *rdev)
4015 {
4016 	si_pcie_gart_disable(rdev);
4017 	radeon_gart_table_vram_free(rdev);
4018 	radeon_gart_fini(rdev);
4019 }
4020 
4021 /* vm parser */
4022 static bool si_vm_reg_valid(u32 reg)
4023 {
4024 	/* context regs are fine */
4025 	if (reg >= 0x28000)
4026 		return true;
4027 
4028 	/* check config regs */
4029 	switch (reg) {
4030 	case GRBM_GFX_INDEX:
4031 	case CP_STRMOUT_CNTL:
4032 	case VGT_VTX_VECT_EJECT_REG:
4033 	case VGT_CACHE_INVALIDATION:
4034 	case VGT_ESGS_RING_SIZE:
4035 	case VGT_GSVS_RING_SIZE:
4036 	case VGT_GS_VERTEX_REUSE:
4037 	case VGT_PRIMITIVE_TYPE:
4038 	case VGT_INDEX_TYPE:
4039 	case VGT_NUM_INDICES:
4040 	case VGT_NUM_INSTANCES:
4041 	case VGT_TF_RING_SIZE:
4042 	case VGT_HS_OFFCHIP_PARAM:
4043 	case VGT_TF_MEMORY_BASE:
4044 	case PA_CL_ENHANCE:
4045 	case PA_SU_LINE_STIPPLE_VALUE:
4046 	case PA_SC_LINE_STIPPLE_STATE:
4047 	case PA_SC_ENHANCE:
4048 	case SQC_CACHES:
4049 	case SPI_STATIC_THREAD_MGMT_1:
4050 	case SPI_STATIC_THREAD_MGMT_2:
4051 	case SPI_STATIC_THREAD_MGMT_3:
4052 	case SPI_PS_MAX_WAVE_ID:
4053 	case SPI_CONFIG_CNTL:
4054 	case SPI_CONFIG_CNTL_1:
4055 	case TA_CNTL_AUX:
4056 		return true;
4057 	default:
4058 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4059 		return false;
4060 	}
4061 }
4062 
4063 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4064 				  u32 *ib, struct radeon_cs_packet *pkt)
4065 {
4066 	switch (pkt->opcode) {
4067 	case PACKET3_NOP:
4068 	case PACKET3_SET_BASE:
4069 	case PACKET3_SET_CE_DE_COUNTERS:
4070 	case PACKET3_LOAD_CONST_RAM:
4071 	case PACKET3_WRITE_CONST_RAM:
4072 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4073 	case PACKET3_DUMP_CONST_RAM:
4074 	case PACKET3_INCREMENT_CE_COUNTER:
4075 	case PACKET3_WAIT_ON_DE_COUNTER:
4076 	case PACKET3_CE_WRITE:
4077 		break;
4078 	default:
4079 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4080 		return -EINVAL;
4081 	}
4082 	return 0;
4083 }
4084 
4085 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4086 {
4087 	u32 start_reg, reg, i;
4088 	u32 command = ib[idx + 4];
4089 	u32 info = ib[idx + 1];
4090 	u32 idx_value = ib[idx];
4091 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4092 		/* src address space is register */
4093 		if (((info & 0x60000000) >> 29) == 0) {
4094 			start_reg = idx_value << 2;
4095 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4096 				reg = start_reg;
4097 				if (!si_vm_reg_valid(reg)) {
4098 					DRM_ERROR("CP DMA Bad SRC register\n");
4099 					return -EINVAL;
4100 				}
4101 			} else {
4102 				for (i = 0; i < (command & 0x1fffff); i++) {
4103 					reg = start_reg + (4 * i);
4104 					if (!si_vm_reg_valid(reg)) {
4105 						DRM_ERROR("CP DMA Bad SRC register\n");
4106 						return -EINVAL;
4107 					}
4108 				}
4109 			}
4110 		}
4111 	}
4112 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4113 		/* dst address space is register */
4114 		if (((info & 0x00300000) >> 20) == 0) {
4115 			start_reg = ib[idx + 2];
4116 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4117 				reg = start_reg;
4118 				if (!si_vm_reg_valid(reg)) {
4119 					DRM_ERROR("CP DMA Bad DST register\n");
4120 					return -EINVAL;
4121 				}
4122 			} else {
4123 				for (i = 0; i < (command & 0x1fffff); i++) {
4124 					reg = start_reg + (4 * i);
4125 				if (!si_vm_reg_valid(reg)) {
4126 						DRM_ERROR("CP DMA Bad DST register\n");
4127 						return -EINVAL;
4128 					}
4129 				}
4130 			}
4131 		}
4132 	}
4133 	return 0;
4134 }
4135 
4136 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4137 				   u32 *ib, struct radeon_cs_packet *pkt)
4138 {
4139 	int r;
4140 	u32 idx = pkt->idx + 1;
4141 	u32 idx_value = ib[idx];
4142 	u32 start_reg, end_reg, reg, i;
4143 
4144 	switch (pkt->opcode) {
4145 	case PACKET3_NOP:
4146 	case PACKET3_SET_BASE:
4147 	case PACKET3_CLEAR_STATE:
4148 	case PACKET3_INDEX_BUFFER_SIZE:
4149 	case PACKET3_DISPATCH_DIRECT:
4150 	case PACKET3_DISPATCH_INDIRECT:
4151 	case PACKET3_ALLOC_GDS:
4152 	case PACKET3_WRITE_GDS_RAM:
4153 	case PACKET3_ATOMIC_GDS:
4154 	case PACKET3_ATOMIC:
4155 	case PACKET3_OCCLUSION_QUERY:
4156 	case PACKET3_SET_PREDICATION:
4157 	case PACKET3_COND_EXEC:
4158 	case PACKET3_PRED_EXEC:
4159 	case PACKET3_DRAW_INDIRECT:
4160 	case PACKET3_DRAW_INDEX_INDIRECT:
4161 	case PACKET3_INDEX_BASE:
4162 	case PACKET3_DRAW_INDEX_2:
4163 	case PACKET3_CONTEXT_CONTROL:
4164 	case PACKET3_INDEX_TYPE:
4165 	case PACKET3_DRAW_INDIRECT_MULTI:
4166 	case PACKET3_DRAW_INDEX_AUTO:
4167 	case PACKET3_DRAW_INDEX_IMMD:
4168 	case PACKET3_NUM_INSTANCES:
4169 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4170 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4171 	case PACKET3_DRAW_INDEX_OFFSET_2:
4172 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4173 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4174 	case PACKET3_MPEG_INDEX:
4175 	case PACKET3_WAIT_REG_MEM:
4176 	case PACKET3_MEM_WRITE:
4177 	case PACKET3_PFP_SYNC_ME:
4178 	case PACKET3_SURFACE_SYNC:
4179 	case PACKET3_EVENT_WRITE:
4180 	case PACKET3_EVENT_WRITE_EOP:
4181 	case PACKET3_EVENT_WRITE_EOS:
4182 	case PACKET3_SET_CONTEXT_REG:
4183 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4184 	case PACKET3_SET_SH_REG:
4185 	case PACKET3_SET_SH_REG_OFFSET:
4186 	case PACKET3_INCREMENT_DE_COUNTER:
4187 	case PACKET3_WAIT_ON_CE_COUNTER:
4188 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4189 	case PACKET3_ME_WRITE:
4190 		break;
4191 	case PACKET3_COPY_DATA:
4192 		if ((idx_value & 0xf00) == 0) {
4193 			reg = ib[idx + 3] * 4;
4194 			if (!si_vm_reg_valid(reg))
4195 				return -EINVAL;
4196 		}
4197 		break;
4198 	case PACKET3_WRITE_DATA:
4199 		if ((idx_value & 0xf00) == 0) {
4200 			start_reg = ib[idx + 1] * 4;
4201 			if (idx_value & 0x10000) {
4202 				if (!si_vm_reg_valid(start_reg))
4203 					return -EINVAL;
4204 			} else {
4205 				for (i = 0; i < (pkt->count - 2); i++) {
4206 					reg = start_reg + (4 * i);
4207 					if (!si_vm_reg_valid(reg))
4208 						return -EINVAL;
4209 				}
4210 			}
4211 		}
4212 		break;
4213 	case PACKET3_COND_WRITE:
4214 		if (idx_value & 0x100) {
4215 			reg = ib[idx + 5] * 4;
4216 			if (!si_vm_reg_valid(reg))
4217 				return -EINVAL;
4218 		}
4219 		break;
4220 	case PACKET3_COPY_DW:
4221 		if (idx_value & 0x2) {
4222 			reg = ib[idx + 3] * 4;
4223 			if (!si_vm_reg_valid(reg))
4224 				return -EINVAL;
4225 		}
4226 		break;
4227 	case PACKET3_SET_CONFIG_REG:
4228 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4229 		end_reg = 4 * pkt->count + start_reg - 4;
4230 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4231 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4232 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4233 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4234 			return -EINVAL;
4235 		}
4236 		for (i = 0; i < pkt->count; i++) {
4237 			reg = start_reg + (4 * i);
4238 			if (!si_vm_reg_valid(reg))
4239 				return -EINVAL;
4240 		}
4241 		break;
4242 	case PACKET3_CP_DMA:
4243 		r = si_vm_packet3_cp_dma_check(ib, idx);
4244 		if (r)
4245 			return r;
4246 		break;
4247 	default:
4248 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4249 		return -EINVAL;
4250 	}
4251 	return 0;
4252 }
4253 
4254 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4255 				       u32 *ib, struct radeon_cs_packet *pkt)
4256 {
4257 	int r;
4258 	u32 idx = pkt->idx + 1;
4259 	u32 idx_value = ib[idx];
4260 	u32 start_reg, reg, i;
4261 
4262 	switch (pkt->opcode) {
4263 	case PACKET3_NOP:
4264 	case PACKET3_SET_BASE:
4265 	case PACKET3_CLEAR_STATE:
4266 	case PACKET3_DISPATCH_DIRECT:
4267 	case PACKET3_DISPATCH_INDIRECT:
4268 	case PACKET3_ALLOC_GDS:
4269 	case PACKET3_WRITE_GDS_RAM:
4270 	case PACKET3_ATOMIC_GDS:
4271 	case PACKET3_ATOMIC:
4272 	case PACKET3_OCCLUSION_QUERY:
4273 	case PACKET3_SET_PREDICATION:
4274 	case PACKET3_COND_EXEC:
4275 	case PACKET3_PRED_EXEC:
4276 	case PACKET3_CONTEXT_CONTROL:
4277 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4278 	case PACKET3_WAIT_REG_MEM:
4279 	case PACKET3_MEM_WRITE:
4280 	case PACKET3_PFP_SYNC_ME:
4281 	case PACKET3_SURFACE_SYNC:
4282 	case PACKET3_EVENT_WRITE:
4283 	case PACKET3_EVENT_WRITE_EOP:
4284 	case PACKET3_EVENT_WRITE_EOS:
4285 	case PACKET3_SET_CONTEXT_REG:
4286 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4287 	case PACKET3_SET_SH_REG:
4288 	case PACKET3_SET_SH_REG_OFFSET:
4289 	case PACKET3_INCREMENT_DE_COUNTER:
4290 	case PACKET3_WAIT_ON_CE_COUNTER:
4291 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4292 	case PACKET3_ME_WRITE:
4293 		break;
4294 	case PACKET3_COPY_DATA:
4295 		if ((idx_value & 0xf00) == 0) {
4296 			reg = ib[idx + 3] * 4;
4297 			if (!si_vm_reg_valid(reg))
4298 				return -EINVAL;
4299 		}
4300 		break;
4301 	case PACKET3_WRITE_DATA:
4302 		if ((idx_value & 0xf00) == 0) {
4303 			start_reg = ib[idx + 1] * 4;
4304 			if (idx_value & 0x10000) {
4305 				if (!si_vm_reg_valid(start_reg))
4306 					return -EINVAL;
4307 			} else {
4308 				for (i = 0; i < (pkt->count - 2); i++) {
4309 					reg = start_reg + (4 * i);
4310 					if (!si_vm_reg_valid(reg))
4311 						return -EINVAL;
4312 				}
4313 			}
4314 		}
4315 		break;
4316 	case PACKET3_COND_WRITE:
4317 		if (idx_value & 0x100) {
4318 			reg = ib[idx + 5] * 4;
4319 			if (!si_vm_reg_valid(reg))
4320 				return -EINVAL;
4321 		}
4322 		break;
4323 	case PACKET3_COPY_DW:
4324 		if (idx_value & 0x2) {
4325 			reg = ib[idx + 3] * 4;
4326 			if (!si_vm_reg_valid(reg))
4327 				return -EINVAL;
4328 		}
4329 		break;
4330 	case PACKET3_CP_DMA:
4331 		r = si_vm_packet3_cp_dma_check(ib, idx);
4332 		if (r)
4333 			return r;
4334 		break;
4335 	default:
4336 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4337 		return -EINVAL;
4338 	}
4339 	return 0;
4340 }
4341 
4342 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4343 {
4344 	int ret = 0;
4345 	u32 idx = 0;
4346 	struct radeon_cs_packet pkt;
4347 
4348 	do {
4349 		pkt.idx = idx;
4350 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4351 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4352 		pkt.one_reg_wr = 0;
4353 		switch (pkt.type) {
4354 		case RADEON_PACKET_TYPE0:
4355 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4356 			ret = -EINVAL;
4357 			break;
4358 		case RADEON_PACKET_TYPE2:
4359 			idx += 1;
4360 			break;
4361 		case RADEON_PACKET_TYPE3:
4362 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4363 			if (ib->is_const_ib)
4364 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4365 			else {
4366 				switch (ib->ring) {
4367 				case RADEON_RING_TYPE_GFX_INDEX:
4368 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4369 					break;
4370 				case CAYMAN_RING_TYPE_CP1_INDEX:
4371 				case CAYMAN_RING_TYPE_CP2_INDEX:
4372 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4373 					break;
4374 				default:
4375 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4376 					ret = -EINVAL;
4377 					break;
4378 				}
4379 			}
4380 			idx += pkt.count + 2;
4381 			break;
4382 		default:
4383 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4384 			ret = -EINVAL;
4385 			break;
4386 		}
4387 		if (ret)
4388 			break;
4389 	} while (idx < ib->length_dw);
4390 
4391 	return ret;
4392 }
4393 
4394 /*
4395  * vm
4396  */
4397 int si_vm_init(struct radeon_device *rdev)
4398 {
4399 	/* number of VMs */
4400 	rdev->vm_manager.nvm = 16;
4401 	/* base offset of vram pages */
4402 	rdev->vm_manager.vram_base_offset = 0;
4403 
4404 	return 0;
4405 }
4406 
4407 void si_vm_fini(struct radeon_device *rdev)
4408 {
4409 }
4410 
4411 /**
4412  * si_vm_decode_fault - print human readable fault info
4413  *
4414  * @rdev: radeon_device pointer
4415  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4416  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4417  *
4418  * Print human readable fault information (SI).
4419  */
4420 static void si_vm_decode_fault(struct radeon_device *rdev,
4421 			       u32 status, u32 addr)
4422 {
4423 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4424 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4425 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4426 	char *block;
4427 
4428 	if (rdev->family == CHIP_TAHITI) {
4429 		switch (mc_id) {
4430 		case 160:
4431 		case 144:
4432 		case 96:
4433 		case 80:
4434 		case 224:
4435 		case 208:
4436 		case 32:
4437 		case 16:
4438 			block = "CB";
4439 			break;
4440 		case 161:
4441 		case 145:
4442 		case 97:
4443 		case 81:
4444 		case 225:
4445 		case 209:
4446 		case 33:
4447 		case 17:
4448 			block = "CB_FMASK";
4449 			break;
4450 		case 162:
4451 		case 146:
4452 		case 98:
4453 		case 82:
4454 		case 226:
4455 		case 210:
4456 		case 34:
4457 		case 18:
4458 			block = "CB_CMASK";
4459 			break;
4460 		case 163:
4461 		case 147:
4462 		case 99:
4463 		case 83:
4464 		case 227:
4465 		case 211:
4466 		case 35:
4467 		case 19:
4468 			block = "CB_IMMED";
4469 			break;
4470 		case 164:
4471 		case 148:
4472 		case 100:
4473 		case 84:
4474 		case 228:
4475 		case 212:
4476 		case 36:
4477 		case 20:
4478 			block = "DB";
4479 			break;
4480 		case 165:
4481 		case 149:
4482 		case 101:
4483 		case 85:
4484 		case 229:
4485 		case 213:
4486 		case 37:
4487 		case 21:
4488 			block = "DB_HTILE";
4489 			break;
4490 		case 167:
4491 		case 151:
4492 		case 103:
4493 		case 87:
4494 		case 231:
4495 		case 215:
4496 		case 39:
4497 		case 23:
4498 			block = "DB_STEN";
4499 			break;
4500 		case 72:
4501 		case 68:
4502 		case 64:
4503 		case 8:
4504 		case 4:
4505 		case 0:
4506 		case 136:
4507 		case 132:
4508 		case 128:
4509 		case 200:
4510 		case 196:
4511 		case 192:
4512 			block = "TC";
4513 			break;
4514 		case 112:
4515 		case 48:
4516 			block = "CP";
4517 			break;
4518 		case 49:
4519 		case 177:
4520 		case 50:
4521 		case 178:
4522 			block = "SH";
4523 			break;
4524 		case 53:
4525 		case 190:
4526 			block = "VGT";
4527 			break;
4528 		case 117:
4529 			block = "IH";
4530 			break;
4531 		case 51:
4532 		case 115:
4533 			block = "RLC";
4534 			break;
4535 		case 119:
4536 		case 183:
4537 			block = "DMA0";
4538 			break;
4539 		case 61:
4540 			block = "DMA1";
4541 			break;
4542 		case 248:
4543 		case 120:
4544 			block = "HDP";
4545 			break;
4546 		default:
4547 			block = "unknown";
4548 			break;
4549 		}
4550 	} else {
4551 		switch (mc_id) {
4552 		case 32:
4553 		case 16:
4554 		case 96:
4555 		case 80:
4556 		case 160:
4557 		case 144:
4558 		case 224:
4559 		case 208:
4560 			block = "CB";
4561 			break;
4562 		case 33:
4563 		case 17:
4564 		case 97:
4565 		case 81:
4566 		case 161:
4567 		case 145:
4568 		case 225:
4569 		case 209:
4570 			block = "CB_FMASK";
4571 			break;
4572 		case 34:
4573 		case 18:
4574 		case 98:
4575 		case 82:
4576 		case 162:
4577 		case 146:
4578 		case 226:
4579 		case 210:
4580 			block = "CB_CMASK";
4581 			break;
4582 		case 35:
4583 		case 19:
4584 		case 99:
4585 		case 83:
4586 		case 163:
4587 		case 147:
4588 		case 227:
4589 		case 211:
4590 			block = "CB_IMMED";
4591 			break;
4592 		case 36:
4593 		case 20:
4594 		case 100:
4595 		case 84:
4596 		case 164:
4597 		case 148:
4598 		case 228:
4599 		case 212:
4600 			block = "DB";
4601 			break;
4602 		case 37:
4603 		case 21:
4604 		case 101:
4605 		case 85:
4606 		case 165:
4607 		case 149:
4608 		case 229:
4609 		case 213:
4610 			block = "DB_HTILE";
4611 			break;
4612 		case 39:
4613 		case 23:
4614 		case 103:
4615 		case 87:
4616 		case 167:
4617 		case 151:
4618 		case 231:
4619 		case 215:
4620 			block = "DB_STEN";
4621 			break;
4622 		case 72:
4623 		case 68:
4624 		case 8:
4625 		case 4:
4626 		case 136:
4627 		case 132:
4628 		case 200:
4629 		case 196:
4630 			block = "TC";
4631 			break;
4632 		case 112:
4633 		case 48:
4634 			block = "CP";
4635 			break;
4636 		case 49:
4637 		case 177:
4638 		case 50:
4639 		case 178:
4640 			block = "SH";
4641 			break;
4642 		case 53:
4643 			block = "VGT";
4644 			break;
4645 		case 117:
4646 			block = "IH";
4647 			break;
4648 		case 51:
4649 		case 115:
4650 			block = "RLC";
4651 			break;
4652 		case 119:
4653 		case 183:
4654 			block = "DMA0";
4655 			break;
4656 		case 61:
4657 			block = "DMA1";
4658 			break;
4659 		case 248:
4660 		case 120:
4661 			block = "HDP";
4662 			break;
4663 		default:
4664 			block = "unknown";
4665 			break;
4666 		}
4667 	}
4668 
4669 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4670 	       protections, vmid, addr,
4671 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4672 	       block, mc_id);
4673 }
4674 
4675 /**
4676  * si_vm_set_page - update the page tables using the CP
4677  *
4678  * @rdev: radeon_device pointer
4679  * @ib: indirect buffer to fill with commands
4680  * @pe: addr of the page entry
4681  * @addr: dst addr to write into pe
4682  * @count: number of page entries to update
4683  * @incr: increase next addr by incr bytes
4684  * @flags: access flags
4685  *
4686  * Update the page tables using the CP (SI).
4687  */
4688 void si_vm_set_page(struct radeon_device *rdev,
4689 		    struct radeon_ib *ib,
4690 		    uint64_t pe,
4691 		    uint64_t addr, unsigned count,
4692 		    uint32_t incr, uint32_t flags)
4693 {
4694 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4695 	uint64_t value;
4696 	unsigned ndw;
4697 
4698 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4699 		while (count) {
4700 			ndw = 2 + count * 2;
4701 			if (ndw > 0x3FFE)
4702 				ndw = 0x3FFE;
4703 
4704 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4705 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4706 					WRITE_DATA_DST_SEL(1));
4707 			ib->ptr[ib->length_dw++] = pe;
4708 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4709 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4710 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4711 					value = radeon_vm_map_gart(rdev, addr);
4712 					value &= 0xFFFFFFFFFFFFF000ULL;
4713 				} else if (flags & RADEON_VM_PAGE_VALID) {
4714 					value = addr;
4715 				} else {
4716 					value = 0;
4717 				}
4718 				addr += incr;
4719 				value |= r600_flags;
4720 				ib->ptr[ib->length_dw++] = value;
4721 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4722 			}
4723 		}
4724 	} else {
4725 		/* DMA */
4726 		si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4727 	}
4728 }
4729 
4730 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4731 {
4732 	struct radeon_ring *ring = &rdev->ring[ridx];
4733 
4734 	if (vm == NULL)
4735 		return;
4736 
4737 	/* write new base address */
4738 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4739 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4740 				 WRITE_DATA_DST_SEL(0)));
4741 
4742 	if (vm->id < 8) {
4743 		radeon_ring_write(ring,
4744 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4745 	} else {
4746 		radeon_ring_write(ring,
4747 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4748 	}
4749 	radeon_ring_write(ring, 0);
4750 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4751 
4752 	/* flush hdp cache */
4753 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4754 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4755 				 WRITE_DATA_DST_SEL(0)));
4756 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4757 	radeon_ring_write(ring, 0);
4758 	radeon_ring_write(ring, 0x1);
4759 
4760 	/* bits 0-15 are the VM contexts0-15 */
4761 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763 				 WRITE_DATA_DST_SEL(0)));
4764 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4765 	radeon_ring_write(ring, 0);
4766 	radeon_ring_write(ring, 1 << vm->id);
4767 
4768 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4769 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4770 	radeon_ring_write(ring, 0x0);
4771 }
4772 
4773 /*
4774  *  Power and clock gating
4775  */
4776 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4777 {
4778 	int i;
4779 
4780 	for (i = 0; i < rdev->usec_timeout; i++) {
4781 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4782 			break;
4783 		udelay(1);
4784 	}
4785 
4786 	for (i = 0; i < rdev->usec_timeout; i++) {
4787 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4788 			break;
4789 		udelay(1);
4790 	}
4791 }
4792 
4793 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4794 					 bool enable)
4795 {
4796 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4797 	u32 mask;
4798 	int i;
4799 
4800 	if (enable)
4801 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4802 	else
4803 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4804 	WREG32(CP_INT_CNTL_RING0, tmp);
4805 
4806 	if (!enable) {
4807 		/* read a gfx register */
4808 		tmp = RREG32(DB_DEPTH_INFO);
4809 
4810 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4811 		for (i = 0; i < rdev->usec_timeout; i++) {
4812 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4813 				break;
4814 			udelay(1);
4815 		}
4816 	}
4817 }
4818 
4819 static void si_set_uvd_dcm(struct radeon_device *rdev,
4820 			   bool sw_mode)
4821 {
4822 	u32 tmp, tmp2;
4823 
4824 	tmp = RREG32(UVD_CGC_CTRL);
4825 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4826 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4827 
4828 	if (sw_mode) {
4829 		tmp &= ~0x7ffff800;
4830 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4831 	} else {
4832 		tmp |= 0x7ffff800;
4833 		tmp2 = 0;
4834 	}
4835 
4836 	WREG32(UVD_CGC_CTRL, tmp);
4837 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4838 }
4839 
4840 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4841 {
4842 	bool hw_mode = true;
4843 
4844 	if (hw_mode) {
4845 		si_set_uvd_dcm(rdev, false);
4846 	} else {
4847 		u32 tmp = RREG32(UVD_CGC_CTRL);
4848 		tmp &= ~DCM;
4849 		WREG32(UVD_CGC_CTRL, tmp);
4850 	}
4851 }
4852 
4853 static u32 si_halt_rlc(struct radeon_device *rdev)
4854 {
4855 	u32 data, orig;
4856 
4857 	orig = data = RREG32(RLC_CNTL);
4858 
4859 	if (data & RLC_ENABLE) {
4860 		data &= ~RLC_ENABLE;
4861 		WREG32(RLC_CNTL, data);
4862 
4863 		si_wait_for_rlc_serdes(rdev);
4864 	}
4865 
4866 	return orig;
4867 }
4868 
4869 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4870 {
4871 	u32 tmp;
4872 
4873 	tmp = RREG32(RLC_CNTL);
4874 	if (tmp != rlc)
4875 		WREG32(RLC_CNTL, rlc);
4876 }
4877 
4878 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4879 {
4880 	u32 data, orig;
4881 
4882 	orig = data = RREG32(DMA_PG);
4883 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4884 		data |= PG_CNTL_ENABLE;
4885 	else
4886 		data &= ~PG_CNTL_ENABLE;
4887 	if (orig != data)
4888 		WREG32(DMA_PG, data);
4889 }
4890 
4891 static void si_init_dma_pg(struct radeon_device *rdev)
4892 {
4893 	u32 tmp;
4894 
4895 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4896 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4897 
4898 	for (tmp = 0; tmp < 5; tmp++)
4899 		WREG32(DMA_PGFSM_WRITE, 0);
4900 }
4901 
4902 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4903 			       bool enable)
4904 {
4905 	u32 tmp;
4906 
4907 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4908 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4909 		WREG32(RLC_TTOP_D, tmp);
4910 
4911 		tmp = RREG32(RLC_PG_CNTL);
4912 		tmp |= GFX_PG_ENABLE;
4913 		WREG32(RLC_PG_CNTL, tmp);
4914 
4915 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4916 		tmp |= AUTO_PG_EN;
4917 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4918 	} else {
4919 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4920 		tmp &= ~AUTO_PG_EN;
4921 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4922 
4923 		tmp = RREG32(DB_RENDER_CONTROL);
4924 	}
4925 }
4926 
4927 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4928 {
4929 	u32 tmp;
4930 
4931 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4932 
4933 	tmp = RREG32(RLC_PG_CNTL);
4934 	tmp |= GFX_PG_SRC;
4935 	WREG32(RLC_PG_CNTL, tmp);
4936 
4937 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4938 
4939 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4940 
4941 	tmp &= ~GRBM_REG_SGIT_MASK;
4942 	tmp |= GRBM_REG_SGIT(0x700);
4943 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4944 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4945 }
4946 
4947 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4948 {
4949 	u32 mask = 0, tmp, tmp1;
4950 	int i;
4951 
4952 	si_select_se_sh(rdev, se, sh);
4953 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4954 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4955 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4956 
4957 	tmp &= 0xffff0000;
4958 
4959 	tmp |= tmp1;
4960 	tmp >>= 16;
4961 
4962 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4963 		mask <<= 1;
4964 		mask |= 1;
4965 	}
4966 
4967 	return (~tmp) & mask;
4968 }
4969 
4970 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4971 {
4972 	u32 i, j, k, active_cu_number = 0;
4973 	u32 mask, counter, cu_bitmap;
4974 	u32 tmp = 0;
4975 
4976 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4977 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4978 			mask = 1;
4979 			cu_bitmap = 0;
4980 			counter  = 0;
4981 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4982 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4983 					if (counter < 2)
4984 						cu_bitmap |= mask;
4985 					counter++;
4986 				}
4987 				mask <<= 1;
4988 			}
4989 
4990 			active_cu_number += counter;
4991 			tmp |= (cu_bitmap << (i * 16 + j * 8));
4992 		}
4993 	}
4994 
4995 	WREG32(RLC_PG_AO_CU_MASK, tmp);
4996 
4997 	tmp = RREG32(RLC_MAX_PG_CU);
4998 	tmp &= ~MAX_PU_CU_MASK;
4999 	tmp |= MAX_PU_CU(active_cu_number);
5000 	WREG32(RLC_MAX_PG_CU, tmp);
5001 }
5002 
5003 static void si_enable_cgcg(struct radeon_device *rdev,
5004 			   bool enable)
5005 {
5006 	u32 data, orig, tmp;
5007 
5008 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5009 
5010 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5011 		si_enable_gui_idle_interrupt(rdev, true);
5012 
5013 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5014 
5015 		tmp = si_halt_rlc(rdev);
5016 
5017 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5018 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5019 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5020 
5021 		si_wait_for_rlc_serdes(rdev);
5022 
5023 		si_update_rlc(rdev, tmp);
5024 
5025 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5026 
5027 		data |= CGCG_EN | CGLS_EN;
5028 	} else {
5029 		si_enable_gui_idle_interrupt(rdev, false);
5030 
5031 		RREG32(CB_CGTT_SCLK_CTRL);
5032 		RREG32(CB_CGTT_SCLK_CTRL);
5033 		RREG32(CB_CGTT_SCLK_CTRL);
5034 		RREG32(CB_CGTT_SCLK_CTRL);
5035 
5036 		data &= ~(CGCG_EN | CGLS_EN);
5037 	}
5038 
5039 	if (orig != data)
5040 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5041 }
5042 
5043 static void si_enable_mgcg(struct radeon_device *rdev,
5044 			   bool enable)
5045 {
5046 	u32 data, orig, tmp = 0;
5047 
5048 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5049 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5050 		data = 0x96940200;
5051 		if (orig != data)
5052 			WREG32(CGTS_SM_CTRL_REG, data);
5053 
5054 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5055 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5056 			data |= CP_MEM_LS_EN;
5057 			if (orig != data)
5058 				WREG32(CP_MEM_SLP_CNTL, data);
5059 		}
5060 
5061 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5062 		data &= 0xffffffc0;
5063 		if (orig != data)
5064 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5065 
5066 		tmp = si_halt_rlc(rdev);
5067 
5068 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5069 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5070 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5071 
5072 		si_update_rlc(rdev, tmp);
5073 	} else {
5074 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5075 		data |= 0x00000003;
5076 		if (orig != data)
5077 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5078 
5079 		data = RREG32(CP_MEM_SLP_CNTL);
5080 		if (data & CP_MEM_LS_EN) {
5081 			data &= ~CP_MEM_LS_EN;
5082 			WREG32(CP_MEM_SLP_CNTL, data);
5083 		}
5084 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5085 		data |= LS_OVERRIDE | OVERRIDE;
5086 		if (orig != data)
5087 			WREG32(CGTS_SM_CTRL_REG, data);
5088 
5089 		tmp = si_halt_rlc(rdev);
5090 
5091 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5092 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5093 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5094 
5095 		si_update_rlc(rdev, tmp);
5096 	}
5097 }
5098 
5099 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5100 			       bool enable)
5101 {
5102 	u32 orig, data, tmp;
5103 
5104 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5105 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5106 		tmp |= 0x3fff;
5107 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5108 
5109 		orig = data = RREG32(UVD_CGC_CTRL);
5110 		data |= DCM;
5111 		if (orig != data)
5112 			WREG32(UVD_CGC_CTRL, data);
5113 
5114 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5115 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5116 	} else {
5117 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5118 		tmp &= ~0x3fff;
5119 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5120 
5121 		orig = data = RREG32(UVD_CGC_CTRL);
5122 		data &= ~DCM;
5123 		if (orig != data)
5124 			WREG32(UVD_CGC_CTRL, data);
5125 
5126 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5127 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5128 	}
5129 }
5130 
5131 static const u32 mc_cg_registers[] =
5132 {
5133 	MC_HUB_MISC_HUB_CG,
5134 	MC_HUB_MISC_SIP_CG,
5135 	MC_HUB_MISC_VM_CG,
5136 	MC_XPB_CLK_GAT,
5137 	ATC_MISC_CG,
5138 	MC_CITF_MISC_WR_CG,
5139 	MC_CITF_MISC_RD_CG,
5140 	MC_CITF_MISC_VM_CG,
5141 	VM_L2_CG,
5142 };
5143 
5144 static void si_enable_mc_ls(struct radeon_device *rdev,
5145 			    bool enable)
5146 {
5147 	int i;
5148 	u32 orig, data;
5149 
5150 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5151 		orig = data = RREG32(mc_cg_registers[i]);
5152 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5153 			data |= MC_LS_ENABLE;
5154 		else
5155 			data &= ~MC_LS_ENABLE;
5156 		if (data != orig)
5157 			WREG32(mc_cg_registers[i], data);
5158 	}
5159 }
5160 
5161 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5162 			       bool enable)
5163 {
5164 	int i;
5165 	u32 orig, data;
5166 
5167 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5168 		orig = data = RREG32(mc_cg_registers[i]);
5169 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5170 			data |= MC_CG_ENABLE;
5171 		else
5172 			data &= ~MC_CG_ENABLE;
5173 		if (data != orig)
5174 			WREG32(mc_cg_registers[i], data);
5175 	}
5176 }
5177 
5178 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5179 			       bool enable)
5180 {
5181 	u32 orig, data, offset;
5182 	int i;
5183 
5184 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5185 		for (i = 0; i < 2; i++) {
5186 			if (i == 0)
5187 				offset = DMA0_REGISTER_OFFSET;
5188 			else
5189 				offset = DMA1_REGISTER_OFFSET;
5190 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5191 			data &= ~MEM_POWER_OVERRIDE;
5192 			if (data != orig)
5193 				WREG32(DMA_POWER_CNTL + offset, data);
5194 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5195 		}
5196 	} else {
5197 		for (i = 0; i < 2; i++) {
5198 			if (i == 0)
5199 				offset = DMA0_REGISTER_OFFSET;
5200 			else
5201 				offset = DMA1_REGISTER_OFFSET;
5202 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5203 			data |= MEM_POWER_OVERRIDE;
5204 			if (data != orig)
5205 				WREG32(DMA_POWER_CNTL + offset, data);
5206 
5207 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5208 			data = 0xff000000;
5209 			if (data != orig)
5210 				WREG32(DMA_CLK_CTRL + offset, data);
5211 		}
5212 	}
5213 }
5214 
5215 static void si_enable_bif_mgls(struct radeon_device *rdev,
5216 			       bool enable)
5217 {
5218 	u32 orig, data;
5219 
5220 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5221 
5222 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5223 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5224 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5225 	else
5226 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5227 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5228 
5229 	if (orig != data)
5230 		WREG32_PCIE(PCIE_CNTL2, data);
5231 }
5232 
5233 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5234 			       bool enable)
5235 {
5236 	u32 orig, data;
5237 
5238 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5239 
5240 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5241 		data &= ~CLOCK_GATING_DIS;
5242 	else
5243 		data |= CLOCK_GATING_DIS;
5244 
5245 	if (orig != data)
5246 		WREG32(HDP_HOST_PATH_CNTL, data);
5247 }
5248 
5249 static void si_enable_hdp_ls(struct radeon_device *rdev,
5250 			     bool enable)
5251 {
5252 	u32 orig, data;
5253 
5254 	orig = data = RREG32(HDP_MEM_POWER_LS);
5255 
5256 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5257 		data |= HDP_LS_ENABLE;
5258 	else
5259 		data &= ~HDP_LS_ENABLE;
5260 
5261 	if (orig != data)
5262 		WREG32(HDP_MEM_POWER_LS, data);
5263 }
5264 
5265 void si_update_cg(struct radeon_device *rdev,
5266 		  u32 block, bool enable)
5267 {
5268 	if (block & RADEON_CG_BLOCK_GFX) {
5269 		si_enable_gui_idle_interrupt(rdev, false);
5270 		/* order matters! */
5271 		if (enable) {
5272 			si_enable_mgcg(rdev, true);
5273 			si_enable_cgcg(rdev, true);
5274 		} else {
5275 			si_enable_cgcg(rdev, false);
5276 			si_enable_mgcg(rdev, false);
5277 		}
5278 		si_enable_gui_idle_interrupt(rdev, true);
5279 	}
5280 
5281 	if (block & RADEON_CG_BLOCK_MC) {
5282 		si_enable_mc_mgcg(rdev, enable);
5283 		si_enable_mc_ls(rdev, enable);
5284 	}
5285 
5286 	if (block & RADEON_CG_BLOCK_SDMA) {
5287 		si_enable_dma_mgcg(rdev, enable);
5288 	}
5289 
5290 	if (block & RADEON_CG_BLOCK_BIF) {
5291 		si_enable_bif_mgls(rdev, enable);
5292 	}
5293 
5294 	if (block & RADEON_CG_BLOCK_UVD) {
5295 		if (rdev->has_uvd) {
5296 			si_enable_uvd_mgcg(rdev, enable);
5297 		}
5298 	}
5299 
5300 	if (block & RADEON_CG_BLOCK_HDP) {
5301 		si_enable_hdp_mgcg(rdev, enable);
5302 		si_enable_hdp_ls(rdev, enable);
5303 	}
5304 }
5305 
5306 static void si_init_cg(struct radeon_device *rdev)
5307 {
5308 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5309 			    RADEON_CG_BLOCK_MC |
5310 			    RADEON_CG_BLOCK_SDMA |
5311 			    RADEON_CG_BLOCK_BIF |
5312 			    RADEON_CG_BLOCK_HDP), true);
5313 	if (rdev->has_uvd) {
5314 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5315 		si_init_uvd_internal_cg(rdev);
5316 	}
5317 }
5318 
5319 static void si_fini_cg(struct radeon_device *rdev)
5320 {
5321 	if (rdev->has_uvd) {
5322 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5323 	}
5324 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5325 			    RADEON_CG_BLOCK_MC |
5326 			    RADEON_CG_BLOCK_SDMA |
5327 			    RADEON_CG_BLOCK_BIF |
5328 			    RADEON_CG_BLOCK_HDP), false);
5329 }
5330 
5331 u32 si_get_csb_size(struct radeon_device *rdev)
5332 {
5333 	u32 count = 0;
5334 	const struct cs_section_def *sect = NULL;
5335 	const struct cs_extent_def *ext = NULL;
5336 
5337 	if (rdev->rlc.cs_data == NULL)
5338 		return 0;
5339 
5340 	/* begin clear state */
5341 	count += 2;
5342 	/* context control state */
5343 	count += 3;
5344 
5345 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5346 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5347 			if (sect->id == SECT_CONTEXT)
5348 				count += 2 + ext->reg_count;
5349 			else
5350 				return 0;
5351 		}
5352 	}
5353 	/* pa_sc_raster_config */
5354 	count += 3;
5355 	/* end clear state */
5356 	count += 2;
5357 	/* clear state */
5358 	count += 2;
5359 
5360 	return count;
5361 }
5362 
5363 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5364 {
5365 	u32 count = 0, i;
5366 	const struct cs_section_def *sect = NULL;
5367 	const struct cs_extent_def *ext = NULL;
5368 
5369 	if (rdev->rlc.cs_data == NULL)
5370 		return;
5371 	if (buffer == NULL)
5372 		return;
5373 
5374 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5375 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5376 
5377 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5378 	buffer[count++] = 0x80000000;
5379 	buffer[count++] = 0x80000000;
5380 
5381 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5382 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5383 			if (sect->id == SECT_CONTEXT) {
5384 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5385 				buffer[count++] = ext->reg_index - 0xa000;
5386 				for (i = 0; i < ext->reg_count; i++)
5387 					buffer[count++] = ext->extent[i];
5388 			} else {
5389 				return;
5390 			}
5391 		}
5392 	}
5393 
5394 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5395 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5396 	switch (rdev->family) {
5397 	case CHIP_TAHITI:
5398 	case CHIP_PITCAIRN:
5399 		buffer[count++] = 0x2a00126a;
5400 		break;
5401 	case CHIP_VERDE:
5402 		buffer[count++] = 0x0000124a;
5403 		break;
5404 	case CHIP_OLAND:
5405 		buffer[count++] = 0x00000082;
5406 		break;
5407 	case CHIP_HAINAN:
5408 		buffer[count++] = 0x00000000;
5409 		break;
5410 	default:
5411 		buffer[count++] = 0x00000000;
5412 		break;
5413 	}
5414 
5415 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5416 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5417 
5418 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5419 	buffer[count++] = 0;
5420 }
5421 
5422 static void si_init_pg(struct radeon_device *rdev)
5423 {
5424 	if (rdev->pg_flags) {
5425 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5426 			si_init_dma_pg(rdev);
5427 		}
5428 		si_init_ao_cu_mask(rdev);
5429 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5430 			si_init_gfx_cgpg(rdev);
5431 		}
5432 		si_enable_dma_pg(rdev, true);
5433 		si_enable_gfx_cgpg(rdev, true);
5434 	} else {
5435 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5436 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5437 	}
5438 }
5439 
5440 static void si_fini_pg(struct radeon_device *rdev)
5441 {
5442 	if (rdev->pg_flags) {
5443 		si_enable_dma_pg(rdev, false);
5444 		si_enable_gfx_cgpg(rdev, false);
5445 	}
5446 }
5447 
5448 /*
5449  * RLC
5450  */
5451 void si_rlc_reset(struct radeon_device *rdev)
5452 {
5453 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5454 
5455 	tmp |= SOFT_RESET_RLC;
5456 	WREG32(GRBM_SOFT_RESET, tmp);
5457 	udelay(50);
5458 	tmp &= ~SOFT_RESET_RLC;
5459 	WREG32(GRBM_SOFT_RESET, tmp);
5460 	udelay(50);
5461 }
5462 
5463 static void si_rlc_stop(struct radeon_device *rdev)
5464 {
5465 	WREG32(RLC_CNTL, 0);
5466 
5467 	si_enable_gui_idle_interrupt(rdev, false);
5468 
5469 	si_wait_for_rlc_serdes(rdev);
5470 }
5471 
5472 static void si_rlc_start(struct radeon_device *rdev)
5473 {
5474 	WREG32(RLC_CNTL, RLC_ENABLE);
5475 
5476 	si_enable_gui_idle_interrupt(rdev, true);
5477 
5478 	udelay(50);
5479 }
5480 
5481 static bool si_lbpw_supported(struct radeon_device *rdev)
5482 {
5483 	u32 tmp;
5484 
5485 	/* Enable LBPW only for DDR3 */
5486 	tmp = RREG32(MC_SEQ_MISC0);
5487 	if ((tmp & 0xF0000000) == 0xB0000000)
5488 		return true;
5489 	return false;
5490 }
5491 
5492 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5493 {
5494 	u32 tmp;
5495 
5496 	tmp = RREG32(RLC_LB_CNTL);
5497 	if (enable)
5498 		tmp |= LOAD_BALANCE_ENABLE;
5499 	else
5500 		tmp &= ~LOAD_BALANCE_ENABLE;
5501 	WREG32(RLC_LB_CNTL, tmp);
5502 
5503 	if (!enable) {
5504 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5505 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5506 	}
5507 }
5508 
5509 static int si_rlc_resume(struct radeon_device *rdev)
5510 {
5511 	u32 i;
5512 	const __be32 *fw_data;
5513 
5514 	if (!rdev->rlc_fw)
5515 		return -EINVAL;
5516 
5517 	si_rlc_stop(rdev);
5518 
5519 	si_rlc_reset(rdev);
5520 
5521 	si_init_pg(rdev);
5522 
5523 	si_init_cg(rdev);
5524 
5525 	WREG32(RLC_RL_BASE, 0);
5526 	WREG32(RLC_RL_SIZE, 0);
5527 	WREG32(RLC_LB_CNTL, 0);
5528 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5529 	WREG32(RLC_LB_CNTR_INIT, 0);
5530 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5531 
5532 	WREG32(RLC_MC_CNTL, 0);
5533 	WREG32(RLC_UCODE_CNTL, 0);
5534 
5535 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5536 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5537 		WREG32(RLC_UCODE_ADDR, i);
5538 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5539 	}
5540 	WREG32(RLC_UCODE_ADDR, 0);
5541 
5542 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5543 
5544 	si_rlc_start(rdev);
5545 
5546 	return 0;
5547 }
5548 
5549 static void si_enable_interrupts(struct radeon_device *rdev)
5550 {
5551 	u32 ih_cntl = RREG32(IH_CNTL);
5552 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5553 
5554 	ih_cntl |= ENABLE_INTR;
5555 	ih_rb_cntl |= IH_RB_ENABLE;
5556 	WREG32(IH_CNTL, ih_cntl);
5557 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5558 	rdev->ih.enabled = true;
5559 }
5560 
5561 static void si_disable_interrupts(struct radeon_device *rdev)
5562 {
5563 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5564 	u32 ih_cntl = RREG32(IH_CNTL);
5565 
5566 	ih_rb_cntl &= ~IH_RB_ENABLE;
5567 	ih_cntl &= ~ENABLE_INTR;
5568 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5569 	WREG32(IH_CNTL, ih_cntl);
5570 	/* set rptr, wptr to 0 */
5571 	WREG32(IH_RB_RPTR, 0);
5572 	WREG32(IH_RB_WPTR, 0);
5573 	rdev->ih.enabled = false;
5574 	rdev->ih.rptr = 0;
5575 }
5576 
5577 static void si_disable_interrupt_state(struct radeon_device *rdev)
5578 {
5579 	u32 tmp;
5580 
5581 	tmp = RREG32(CP_INT_CNTL_RING0) &
5582 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5583 	WREG32(CP_INT_CNTL_RING0, tmp);
5584 	WREG32(CP_INT_CNTL_RING1, 0);
5585 	WREG32(CP_INT_CNTL_RING2, 0);
5586 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5587 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5588 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5589 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5590 	WREG32(GRBM_INT_CNTL, 0);
5591 	if (rdev->num_crtc >= 2) {
5592 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5593 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5594 	}
5595 	if (rdev->num_crtc >= 4) {
5596 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5597 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5598 	}
5599 	if (rdev->num_crtc >= 6) {
5600 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5601 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5602 	}
5603 
5604 	if (rdev->num_crtc >= 2) {
5605 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5606 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5607 	}
5608 	if (rdev->num_crtc >= 4) {
5609 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5610 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5611 	}
5612 	if (rdev->num_crtc >= 6) {
5613 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5614 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5615 	}
5616 
5617 	if (!ASIC_IS_NODCE(rdev)) {
5618 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5619 
5620 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5621 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5622 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5623 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5624 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5625 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5626 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5627 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5628 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5629 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5630 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5631 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5632 	}
5633 }
5634 
5635 static int si_irq_init(struct radeon_device *rdev)
5636 {
5637 	int ret = 0;
5638 	int rb_bufsz;
5639 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5640 
5641 	/* allocate ring */
5642 	ret = r600_ih_ring_alloc(rdev);
5643 	if (ret)
5644 		return ret;
5645 
5646 	/* disable irqs */
5647 	si_disable_interrupts(rdev);
5648 
5649 	/* init rlc */
5650 	ret = si_rlc_resume(rdev);
5651 	if (ret) {
5652 		r600_ih_ring_fini(rdev);
5653 		return ret;
5654 	}
5655 
5656 	/* setup interrupt control */
5657 	/* set dummy read address to ring address */
5658 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5659 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5660 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5661 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5662 	 */
5663 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5664 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5665 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5666 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5667 
5668 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5669 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5670 
5671 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5672 		      IH_WPTR_OVERFLOW_CLEAR |
5673 		      (rb_bufsz << 1));
5674 
5675 	if (rdev->wb.enabled)
5676 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5677 
5678 	/* set the writeback address whether it's enabled or not */
5679 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5680 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5681 
5682 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5683 
5684 	/* set rptr, wptr to 0 */
5685 	WREG32(IH_RB_RPTR, 0);
5686 	WREG32(IH_RB_WPTR, 0);
5687 
5688 	/* Default settings for IH_CNTL (disabled at first) */
5689 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5690 	/* RPTR_REARM only works if msi's are enabled */
5691 	if (rdev->msi_enabled)
5692 		ih_cntl |= RPTR_REARM;
5693 	WREG32(IH_CNTL, ih_cntl);
5694 
5695 	/* force the active interrupt state to all disabled */
5696 	si_disable_interrupt_state(rdev);
5697 
5698 	pci_set_master(rdev->pdev);
5699 
5700 	/* enable irqs */
5701 	si_enable_interrupts(rdev);
5702 
5703 	return ret;
5704 }
5705 
5706 int si_irq_set(struct radeon_device *rdev)
5707 {
5708 	u32 cp_int_cntl;
5709 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5710 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5711 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5712 	u32 grbm_int_cntl = 0;
5713 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5714 	u32 dma_cntl, dma_cntl1;
5715 	u32 thermal_int = 0;
5716 
5717 	if (!rdev->irq.installed) {
5718 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5719 		return -EINVAL;
5720 	}
5721 	/* don't enable anything if the ih is disabled */
5722 	if (!rdev->ih.enabled) {
5723 		si_disable_interrupts(rdev);
5724 		/* force the active interrupt state to all disabled */
5725 		si_disable_interrupt_state(rdev);
5726 		return 0;
5727 	}
5728 
5729 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5730 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5731 
5732 	if (!ASIC_IS_NODCE(rdev)) {
5733 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5734 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5735 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5736 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5737 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5738 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5739 	}
5740 
5741 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5742 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5743 
5744 	thermal_int = RREG32(CG_THERMAL_INT) &
5745 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5746 
5747 	/* enable CP interrupts on all rings */
5748 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5749 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5750 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5751 	}
5752 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5753 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5754 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5755 	}
5756 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5757 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5758 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5759 	}
5760 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5761 		DRM_DEBUG("si_irq_set: sw int dma\n");
5762 		dma_cntl |= TRAP_ENABLE;
5763 	}
5764 
5765 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5766 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5767 		dma_cntl1 |= TRAP_ENABLE;
5768 	}
5769 	if (rdev->irq.crtc_vblank_int[0] ||
5770 	    atomic_read(&rdev->irq.pflip[0])) {
5771 		DRM_DEBUG("si_irq_set: vblank 0\n");
5772 		crtc1 |= VBLANK_INT_MASK;
5773 	}
5774 	if (rdev->irq.crtc_vblank_int[1] ||
5775 	    atomic_read(&rdev->irq.pflip[1])) {
5776 		DRM_DEBUG("si_irq_set: vblank 1\n");
5777 		crtc2 |= VBLANK_INT_MASK;
5778 	}
5779 	if (rdev->irq.crtc_vblank_int[2] ||
5780 	    atomic_read(&rdev->irq.pflip[2])) {
5781 		DRM_DEBUG("si_irq_set: vblank 2\n");
5782 		crtc3 |= VBLANK_INT_MASK;
5783 	}
5784 	if (rdev->irq.crtc_vblank_int[3] ||
5785 	    atomic_read(&rdev->irq.pflip[3])) {
5786 		DRM_DEBUG("si_irq_set: vblank 3\n");
5787 		crtc4 |= VBLANK_INT_MASK;
5788 	}
5789 	if (rdev->irq.crtc_vblank_int[4] ||
5790 	    atomic_read(&rdev->irq.pflip[4])) {
5791 		DRM_DEBUG("si_irq_set: vblank 4\n");
5792 		crtc5 |= VBLANK_INT_MASK;
5793 	}
5794 	if (rdev->irq.crtc_vblank_int[5] ||
5795 	    atomic_read(&rdev->irq.pflip[5])) {
5796 		DRM_DEBUG("si_irq_set: vblank 5\n");
5797 		crtc6 |= VBLANK_INT_MASK;
5798 	}
5799 	if (rdev->irq.hpd[0]) {
5800 		DRM_DEBUG("si_irq_set: hpd 1\n");
5801 		hpd1 |= DC_HPDx_INT_EN;
5802 	}
5803 	if (rdev->irq.hpd[1]) {
5804 		DRM_DEBUG("si_irq_set: hpd 2\n");
5805 		hpd2 |= DC_HPDx_INT_EN;
5806 	}
5807 	if (rdev->irq.hpd[2]) {
5808 		DRM_DEBUG("si_irq_set: hpd 3\n");
5809 		hpd3 |= DC_HPDx_INT_EN;
5810 	}
5811 	if (rdev->irq.hpd[3]) {
5812 		DRM_DEBUG("si_irq_set: hpd 4\n");
5813 		hpd4 |= DC_HPDx_INT_EN;
5814 	}
5815 	if (rdev->irq.hpd[4]) {
5816 		DRM_DEBUG("si_irq_set: hpd 5\n");
5817 		hpd5 |= DC_HPDx_INT_EN;
5818 	}
5819 	if (rdev->irq.hpd[5]) {
5820 		DRM_DEBUG("si_irq_set: hpd 6\n");
5821 		hpd6 |= DC_HPDx_INT_EN;
5822 	}
5823 
5824 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5825 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5826 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5827 
5828 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5829 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5830 
5831 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5832 
5833 	if (rdev->irq.dpm_thermal) {
5834 		DRM_DEBUG("dpm thermal\n");
5835 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5836 	}
5837 
5838 	if (rdev->num_crtc >= 2) {
5839 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5840 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5841 	}
5842 	if (rdev->num_crtc >= 4) {
5843 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5844 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5845 	}
5846 	if (rdev->num_crtc >= 6) {
5847 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5848 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5849 	}
5850 
5851 	if (rdev->num_crtc >= 2) {
5852 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5853 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5854 	}
5855 	if (rdev->num_crtc >= 4) {
5856 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5857 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5858 	}
5859 	if (rdev->num_crtc >= 6) {
5860 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5861 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5862 	}
5863 
5864 	if (!ASIC_IS_NODCE(rdev)) {
5865 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5866 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5867 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5868 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5869 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5870 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5871 	}
5872 
5873 	WREG32(CG_THERMAL_INT, thermal_int);
5874 
5875 	return 0;
5876 }
5877 
5878 static inline void si_irq_ack(struct radeon_device *rdev)
5879 {
5880 	u32 tmp;
5881 
5882 	if (ASIC_IS_NODCE(rdev))
5883 		return;
5884 
5885 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5886 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5887 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5888 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5889 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5890 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5891 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5892 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5893 	if (rdev->num_crtc >= 4) {
5894 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5895 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5896 	}
5897 	if (rdev->num_crtc >= 6) {
5898 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5899 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5900 	}
5901 
5902 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5903 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5904 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5905 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5906 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5907 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5908 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5909 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5910 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5911 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5912 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5913 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5914 
5915 	if (rdev->num_crtc >= 4) {
5916 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5917 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5918 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5919 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5920 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5921 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5922 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5923 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5924 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5925 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5926 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5927 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5928 	}
5929 
5930 	if (rdev->num_crtc >= 6) {
5931 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5932 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5933 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5934 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5935 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5936 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5937 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5938 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5939 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5940 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5941 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5942 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5943 	}
5944 
5945 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5946 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5947 		tmp |= DC_HPDx_INT_ACK;
5948 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5949 	}
5950 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5951 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5952 		tmp |= DC_HPDx_INT_ACK;
5953 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5954 	}
5955 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5956 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5957 		tmp |= DC_HPDx_INT_ACK;
5958 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5959 	}
5960 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5961 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5962 		tmp |= DC_HPDx_INT_ACK;
5963 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5964 	}
5965 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5966 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5967 		tmp |= DC_HPDx_INT_ACK;
5968 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5969 	}
5970 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5971 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5972 		tmp |= DC_HPDx_INT_ACK;
5973 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5974 	}
5975 }
5976 
5977 static void si_irq_disable(struct radeon_device *rdev)
5978 {
5979 	si_disable_interrupts(rdev);
5980 	/* Wait and acknowledge irq */
5981 	mdelay(1);
5982 	si_irq_ack(rdev);
5983 	si_disable_interrupt_state(rdev);
5984 }
5985 
5986 static void si_irq_suspend(struct radeon_device *rdev)
5987 {
5988 	si_irq_disable(rdev);
5989 	si_rlc_stop(rdev);
5990 }
5991 
5992 static void si_irq_fini(struct radeon_device *rdev)
5993 {
5994 	si_irq_suspend(rdev);
5995 	r600_ih_ring_fini(rdev);
5996 }
5997 
5998 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5999 {
6000 	u32 wptr, tmp;
6001 
6002 	if (rdev->wb.enabled)
6003 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6004 	else
6005 		wptr = RREG32(IH_RB_WPTR);
6006 
6007 	if (wptr & RB_OVERFLOW) {
6008 		/* When a ring buffer overflow happen start parsing interrupt
6009 		 * from the last not overwritten vector (wptr + 16). Hopefully
6010 		 * this should allow us to catchup.
6011 		 */
6012 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6013 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6014 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6015 		tmp = RREG32(IH_RB_CNTL);
6016 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6017 		WREG32(IH_RB_CNTL, tmp);
6018 	}
6019 	return (wptr & rdev->ih.ptr_mask);
6020 }
6021 
6022 /*        SI IV Ring
6023  * Each IV ring entry is 128 bits:
6024  * [7:0]    - interrupt source id
6025  * [31:8]   - reserved
6026  * [59:32]  - interrupt source data
6027  * [63:60]  - reserved
6028  * [71:64]  - RINGID
6029  * [79:72]  - VMID
6030  * [127:80] - reserved
6031  */
6032 int si_irq_process(struct radeon_device *rdev)
6033 {
6034 	u32 wptr;
6035 	u32 rptr;
6036 	u32 src_id, src_data, ring_id;
6037 	u32 ring_index;
6038 	bool queue_hotplug = false;
6039 	bool queue_thermal = false;
6040 	u32 status, addr;
6041 
6042 	if (!rdev->ih.enabled || rdev->shutdown)
6043 		return IRQ_NONE;
6044 
6045 	wptr = si_get_ih_wptr(rdev);
6046 
6047 restart_ih:
6048 	/* is somebody else already processing irqs? */
6049 	if (atomic_xchg(&rdev->ih.lock, 1))
6050 		return IRQ_NONE;
6051 
6052 	rptr = rdev->ih.rptr;
6053 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6054 
6055 	/* Order reading of wptr vs. reading of IH ring data */
6056 	rmb();
6057 
6058 	/* display interrupts */
6059 	si_irq_ack(rdev);
6060 
6061 	while (rptr != wptr) {
6062 		/* wptr/rptr are in bytes! */
6063 		ring_index = rptr / 4;
6064 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6065 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6066 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6067 
6068 		switch (src_id) {
6069 		case 1: /* D1 vblank/vline */
6070 			switch (src_data) {
6071 			case 0: /* D1 vblank */
6072 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6073 					if (rdev->irq.crtc_vblank_int[0]) {
6074 						drm_handle_vblank(rdev->ddev, 0);
6075 						rdev->pm.vblank_sync = true;
6076 						wake_up(&rdev->irq.vblank_queue);
6077 					}
6078 					if (atomic_read(&rdev->irq.pflip[0]))
6079 						radeon_crtc_handle_flip(rdev, 0);
6080 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6081 					DRM_DEBUG("IH: D1 vblank\n");
6082 				}
6083 				break;
6084 			case 1: /* D1 vline */
6085 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6086 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6087 					DRM_DEBUG("IH: D1 vline\n");
6088 				}
6089 				break;
6090 			default:
6091 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6092 				break;
6093 			}
6094 			break;
6095 		case 2: /* D2 vblank/vline */
6096 			switch (src_data) {
6097 			case 0: /* D2 vblank */
6098 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6099 					if (rdev->irq.crtc_vblank_int[1]) {
6100 						drm_handle_vblank(rdev->ddev, 1);
6101 						rdev->pm.vblank_sync = true;
6102 						wake_up(&rdev->irq.vblank_queue);
6103 					}
6104 					if (atomic_read(&rdev->irq.pflip[1]))
6105 						radeon_crtc_handle_flip(rdev, 1);
6106 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6107 					DRM_DEBUG("IH: D2 vblank\n");
6108 				}
6109 				break;
6110 			case 1: /* D2 vline */
6111 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6112 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6113 					DRM_DEBUG("IH: D2 vline\n");
6114 				}
6115 				break;
6116 			default:
6117 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6118 				break;
6119 			}
6120 			break;
6121 		case 3: /* D3 vblank/vline */
6122 			switch (src_data) {
6123 			case 0: /* D3 vblank */
6124 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6125 					if (rdev->irq.crtc_vblank_int[2]) {
6126 						drm_handle_vblank(rdev->ddev, 2);
6127 						rdev->pm.vblank_sync = true;
6128 						wake_up(&rdev->irq.vblank_queue);
6129 					}
6130 					if (atomic_read(&rdev->irq.pflip[2]))
6131 						radeon_crtc_handle_flip(rdev, 2);
6132 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6133 					DRM_DEBUG("IH: D3 vblank\n");
6134 				}
6135 				break;
6136 			case 1: /* D3 vline */
6137 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6138 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6139 					DRM_DEBUG("IH: D3 vline\n");
6140 				}
6141 				break;
6142 			default:
6143 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6144 				break;
6145 			}
6146 			break;
6147 		case 4: /* D4 vblank/vline */
6148 			switch (src_data) {
6149 			case 0: /* D4 vblank */
6150 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6151 					if (rdev->irq.crtc_vblank_int[3]) {
6152 						drm_handle_vblank(rdev->ddev, 3);
6153 						rdev->pm.vblank_sync = true;
6154 						wake_up(&rdev->irq.vblank_queue);
6155 					}
6156 					if (atomic_read(&rdev->irq.pflip[3]))
6157 						radeon_crtc_handle_flip(rdev, 3);
6158 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6159 					DRM_DEBUG("IH: D4 vblank\n");
6160 				}
6161 				break;
6162 			case 1: /* D4 vline */
6163 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6164 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6165 					DRM_DEBUG("IH: D4 vline\n");
6166 				}
6167 				break;
6168 			default:
6169 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6170 				break;
6171 			}
6172 			break;
6173 		case 5: /* D5 vblank/vline */
6174 			switch (src_data) {
6175 			case 0: /* D5 vblank */
6176 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6177 					if (rdev->irq.crtc_vblank_int[4]) {
6178 						drm_handle_vblank(rdev->ddev, 4);
6179 						rdev->pm.vblank_sync = true;
6180 						wake_up(&rdev->irq.vblank_queue);
6181 					}
6182 					if (atomic_read(&rdev->irq.pflip[4]))
6183 						radeon_crtc_handle_flip(rdev, 4);
6184 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6185 					DRM_DEBUG("IH: D5 vblank\n");
6186 				}
6187 				break;
6188 			case 1: /* D5 vline */
6189 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6190 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6191 					DRM_DEBUG("IH: D5 vline\n");
6192 				}
6193 				break;
6194 			default:
6195 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6196 				break;
6197 			}
6198 			break;
6199 		case 6: /* D6 vblank/vline */
6200 			switch (src_data) {
6201 			case 0: /* D6 vblank */
6202 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6203 					if (rdev->irq.crtc_vblank_int[5]) {
6204 						drm_handle_vblank(rdev->ddev, 5);
6205 						rdev->pm.vblank_sync = true;
6206 						wake_up(&rdev->irq.vblank_queue);
6207 					}
6208 					if (atomic_read(&rdev->irq.pflip[5]))
6209 						radeon_crtc_handle_flip(rdev, 5);
6210 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6211 					DRM_DEBUG("IH: D6 vblank\n");
6212 				}
6213 				break;
6214 			case 1: /* D6 vline */
6215 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6216 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6217 					DRM_DEBUG("IH: D6 vline\n");
6218 				}
6219 				break;
6220 			default:
6221 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6222 				break;
6223 			}
6224 			break;
6225 		case 42: /* HPD hotplug */
6226 			switch (src_data) {
6227 			case 0:
6228 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6229 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6230 					queue_hotplug = true;
6231 					DRM_DEBUG("IH: HPD1\n");
6232 				}
6233 				break;
6234 			case 1:
6235 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6236 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6237 					queue_hotplug = true;
6238 					DRM_DEBUG("IH: HPD2\n");
6239 				}
6240 				break;
6241 			case 2:
6242 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6243 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6244 					queue_hotplug = true;
6245 					DRM_DEBUG("IH: HPD3\n");
6246 				}
6247 				break;
6248 			case 3:
6249 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6250 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6251 					queue_hotplug = true;
6252 					DRM_DEBUG("IH: HPD4\n");
6253 				}
6254 				break;
6255 			case 4:
6256 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6257 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6258 					queue_hotplug = true;
6259 					DRM_DEBUG("IH: HPD5\n");
6260 				}
6261 				break;
6262 			case 5:
6263 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6264 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6265 					queue_hotplug = true;
6266 					DRM_DEBUG("IH: HPD6\n");
6267 				}
6268 				break;
6269 			default:
6270 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6271 				break;
6272 			}
6273 			break;
6274 		case 146:
6275 		case 147:
6276 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6277 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6278 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6279 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6280 				addr);
6281 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6282 				status);
6283 			si_vm_decode_fault(rdev, status, addr);
6284 			/* reset addr and status */
6285 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6286 			break;
6287 		case 176: /* RINGID0 CP_INT */
6288 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6289 			break;
6290 		case 177: /* RINGID1 CP_INT */
6291 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6292 			break;
6293 		case 178: /* RINGID2 CP_INT */
6294 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6295 			break;
6296 		case 181: /* CP EOP event */
6297 			DRM_DEBUG("IH: CP EOP\n");
6298 			switch (ring_id) {
6299 			case 0:
6300 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6301 				break;
6302 			case 1:
6303 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6304 				break;
6305 			case 2:
6306 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6307 				break;
6308 			}
6309 			break;
6310 		case 224: /* DMA trap event */
6311 			DRM_DEBUG("IH: DMA trap\n");
6312 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6313 			break;
6314 		case 230: /* thermal low to high */
6315 			DRM_DEBUG("IH: thermal low to high\n");
6316 			rdev->pm.dpm.thermal.high_to_low = false;
6317 			queue_thermal = true;
6318 			break;
6319 		case 231: /* thermal high to low */
6320 			DRM_DEBUG("IH: thermal high to low\n");
6321 			rdev->pm.dpm.thermal.high_to_low = true;
6322 			queue_thermal = true;
6323 			break;
6324 		case 233: /* GUI IDLE */
6325 			DRM_DEBUG("IH: GUI idle\n");
6326 			break;
6327 		case 244: /* DMA trap event */
6328 			DRM_DEBUG("IH: DMA1 trap\n");
6329 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6330 			break;
6331 		default:
6332 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6333 			break;
6334 		}
6335 
6336 		/* wptr/rptr are in bytes! */
6337 		rptr += 16;
6338 		rptr &= rdev->ih.ptr_mask;
6339 	}
6340 	if (queue_hotplug)
6341 		schedule_work(&rdev->hotplug_work);
6342 	if (queue_thermal && rdev->pm.dpm_enabled)
6343 		schedule_work(&rdev->pm.dpm.thermal.work);
6344 	rdev->ih.rptr = rptr;
6345 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6346 	atomic_set(&rdev->ih.lock, 0);
6347 
6348 	/* make sure wptr hasn't changed while processing */
6349 	wptr = si_get_ih_wptr(rdev);
6350 	if (wptr != rptr)
6351 		goto restart_ih;
6352 
6353 	return IRQ_HANDLED;
6354 }
6355 
6356 /*
6357  * startup/shutdown callbacks
6358  */
6359 static int si_startup(struct radeon_device *rdev)
6360 {
6361 	struct radeon_ring *ring;
6362 	int r;
6363 
6364 	/* enable pcie gen2/3 link */
6365 	si_pcie_gen3_enable(rdev);
6366 	/* enable aspm */
6367 	si_program_aspm(rdev);
6368 
6369 	/* scratch needs to be initialized before MC */
6370 	r = r600_vram_scratch_init(rdev);
6371 	if (r)
6372 		return r;
6373 
6374 	si_mc_program(rdev);
6375 
6376 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6377 	    !rdev->rlc_fw || !rdev->mc_fw) {
6378 		r = si_init_microcode(rdev);
6379 		if (r) {
6380 			DRM_ERROR("Failed to load firmware!\n");
6381 			return r;
6382 		}
6383 	}
6384 
6385 	r = si_mc_load_microcode(rdev);
6386 	if (r) {
6387 		DRM_ERROR("Failed to load MC firmware!\n");
6388 		return r;
6389 	}
6390 
6391 	r = si_pcie_gart_enable(rdev);
6392 	if (r)
6393 		return r;
6394 	si_gpu_init(rdev);
6395 
6396 	/* allocate rlc buffers */
6397 	if (rdev->family == CHIP_VERDE) {
6398 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6399 		rdev->rlc.reg_list_size =
6400 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6401 	}
6402 	rdev->rlc.cs_data = si_cs_data;
6403 	r = sumo_rlc_init(rdev);
6404 	if (r) {
6405 		DRM_ERROR("Failed to init rlc BOs!\n");
6406 		return r;
6407 	}
6408 
6409 	/* allocate wb buffer */
6410 	r = radeon_wb_init(rdev);
6411 	if (r)
6412 		return r;
6413 
6414 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6415 	if (r) {
6416 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6417 		return r;
6418 	}
6419 
6420 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6421 	if (r) {
6422 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6423 		return r;
6424 	}
6425 
6426 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6427 	if (r) {
6428 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6429 		return r;
6430 	}
6431 
6432 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6433 	if (r) {
6434 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6435 		return r;
6436 	}
6437 
6438 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6439 	if (r) {
6440 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6441 		return r;
6442 	}
6443 
6444 	if (rdev->has_uvd) {
6445 		r = uvd_v2_2_resume(rdev);
6446 		if (!r) {
6447 			r = radeon_fence_driver_start_ring(rdev,
6448 							   R600_RING_TYPE_UVD_INDEX);
6449 			if (r)
6450 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6451 		}
6452 		if (r)
6453 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6454 	}
6455 
6456 	/* Enable IRQ */
6457 	if (!rdev->irq.installed) {
6458 		r = radeon_irq_kms_init(rdev);
6459 		if (r)
6460 			return r;
6461 	}
6462 
6463 	r = si_irq_init(rdev);
6464 	if (r) {
6465 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6466 		radeon_irq_kms_fini(rdev);
6467 		return r;
6468 	}
6469 	si_irq_set(rdev);
6470 
6471 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6472 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6473 			     CP_RB0_RPTR, CP_RB0_WPTR,
6474 			     RADEON_CP_PACKET2);
6475 	if (r)
6476 		return r;
6477 
6478 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6479 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6480 			     CP_RB1_RPTR, CP_RB1_WPTR,
6481 			     RADEON_CP_PACKET2);
6482 	if (r)
6483 		return r;
6484 
6485 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6486 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6487 			     CP_RB2_RPTR, CP_RB2_WPTR,
6488 			     RADEON_CP_PACKET2);
6489 	if (r)
6490 		return r;
6491 
6492 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6493 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6494 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6495 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6496 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6497 	if (r)
6498 		return r;
6499 
6500 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6501 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6502 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6503 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6504 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6505 	if (r)
6506 		return r;
6507 
6508 	r = si_cp_load_microcode(rdev);
6509 	if (r)
6510 		return r;
6511 	r = si_cp_resume(rdev);
6512 	if (r)
6513 		return r;
6514 
6515 	r = cayman_dma_resume(rdev);
6516 	if (r)
6517 		return r;
6518 
6519 	if (rdev->has_uvd) {
6520 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6521 		if (ring->ring_size) {
6522 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6523 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6524 					     RADEON_CP_PACKET2);
6525 			if (!r)
6526 				r = uvd_v1_0_init(rdev);
6527 			if (r)
6528 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6529 		}
6530 	}
6531 
6532 	r = radeon_ib_pool_init(rdev);
6533 	if (r) {
6534 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6535 		return r;
6536 	}
6537 
6538 	r = radeon_vm_manager_init(rdev);
6539 	if (r) {
6540 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6541 		return r;
6542 	}
6543 
6544 	r = dce6_audio_init(rdev);
6545 	if (r)
6546 		return r;
6547 
6548 	return 0;
6549 }
6550 
6551 int si_resume(struct radeon_device *rdev)
6552 {
6553 	int r;
6554 
6555 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6556 	 * posting will perform necessary task to bring back GPU into good
6557 	 * shape.
6558 	 */
6559 	/* post card */
6560 	atom_asic_init(rdev->mode_info.atom_context);
6561 
6562 	/* init golden registers */
6563 	si_init_golden_registers(rdev);
6564 
6565 	rdev->accel_working = true;
6566 	r = si_startup(rdev);
6567 	if (r) {
6568 		DRM_ERROR("si startup failed on resume\n");
6569 		rdev->accel_working = false;
6570 		return r;
6571 	}
6572 
6573 	return r;
6574 
6575 }
6576 
6577 int si_suspend(struct radeon_device *rdev)
6578 {
6579 	dce6_audio_fini(rdev);
6580 	radeon_vm_manager_fini(rdev);
6581 	si_cp_enable(rdev, false);
6582 	cayman_dma_stop(rdev);
6583 	if (rdev->has_uvd) {
6584 		uvd_v1_0_fini(rdev);
6585 		radeon_uvd_suspend(rdev);
6586 	}
6587 	si_fini_pg(rdev);
6588 	si_fini_cg(rdev);
6589 	si_irq_suspend(rdev);
6590 	radeon_wb_disable(rdev);
6591 	si_pcie_gart_disable(rdev);
6592 	return 0;
6593 }
6594 
6595 /* Plan is to move initialization in that function and use
6596  * helper function so that radeon_device_init pretty much
6597  * do nothing more than calling asic specific function. This
6598  * should also allow to remove a bunch of callback function
6599  * like vram_info.
6600  */
6601 int si_init(struct radeon_device *rdev)
6602 {
6603 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6604 	int r;
6605 
6606 	/* Read BIOS */
6607 	if (!radeon_get_bios(rdev)) {
6608 		if (ASIC_IS_AVIVO(rdev))
6609 			return -EINVAL;
6610 	}
6611 	/* Must be an ATOMBIOS */
6612 	if (!rdev->is_atom_bios) {
6613 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6614 		return -EINVAL;
6615 	}
6616 	r = radeon_atombios_init(rdev);
6617 	if (r)
6618 		return r;
6619 
6620 	/* Post card if necessary */
6621 	if (!radeon_card_posted(rdev)) {
6622 		if (!rdev->bios) {
6623 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6624 			return -EINVAL;
6625 		}
6626 		DRM_INFO("GPU not posted. posting now...\n");
6627 		atom_asic_init(rdev->mode_info.atom_context);
6628 	}
6629 	/* init golden registers */
6630 	si_init_golden_registers(rdev);
6631 	/* Initialize scratch registers */
6632 	si_scratch_init(rdev);
6633 	/* Initialize surface registers */
6634 	radeon_surface_init(rdev);
6635 	/* Initialize clocks */
6636 	radeon_get_clock_info(rdev->ddev);
6637 
6638 	/* Fence driver */
6639 	r = radeon_fence_driver_init(rdev);
6640 	if (r)
6641 		return r;
6642 
6643 	/* initialize memory controller */
6644 	r = si_mc_init(rdev);
6645 	if (r)
6646 		return r;
6647 	/* Memory manager */
6648 	r = radeon_bo_init(rdev);
6649 	if (r)
6650 		return r;
6651 
6652 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6653 	ring->ring_obj = NULL;
6654 	r600_ring_init(rdev, ring, 1024 * 1024);
6655 
6656 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6657 	ring->ring_obj = NULL;
6658 	r600_ring_init(rdev, ring, 1024 * 1024);
6659 
6660 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6661 	ring->ring_obj = NULL;
6662 	r600_ring_init(rdev, ring, 1024 * 1024);
6663 
6664 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6665 	ring->ring_obj = NULL;
6666 	r600_ring_init(rdev, ring, 64 * 1024);
6667 
6668 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6669 	ring->ring_obj = NULL;
6670 	r600_ring_init(rdev, ring, 64 * 1024);
6671 
6672 	if (rdev->has_uvd) {
6673 		r = radeon_uvd_init(rdev);
6674 		if (!r) {
6675 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6676 			ring->ring_obj = NULL;
6677 			r600_ring_init(rdev, ring, 4096);
6678 		}
6679 	}
6680 
6681 	rdev->ih.ring_obj = NULL;
6682 	r600_ih_ring_init(rdev, 64 * 1024);
6683 
6684 	r = r600_pcie_gart_init(rdev);
6685 	if (r)
6686 		return r;
6687 
6688 	rdev->accel_working = true;
6689 	r = si_startup(rdev);
6690 	if (r) {
6691 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6692 		si_cp_fini(rdev);
6693 		cayman_dma_fini(rdev);
6694 		si_irq_fini(rdev);
6695 		sumo_rlc_fini(rdev);
6696 		radeon_wb_fini(rdev);
6697 		radeon_ib_pool_fini(rdev);
6698 		radeon_vm_manager_fini(rdev);
6699 		radeon_irq_kms_fini(rdev);
6700 		si_pcie_gart_fini(rdev);
6701 		rdev->accel_working = false;
6702 	}
6703 
6704 	/* Don't start up if the MC ucode is missing.
6705 	 * The default clocks and voltages before the MC ucode
6706 	 * is loaded are not suffient for advanced operations.
6707 	 */
6708 	if (!rdev->mc_fw) {
6709 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6710 		return -EINVAL;
6711 	}
6712 
6713 	return 0;
6714 }
6715 
6716 void si_fini(struct radeon_device *rdev)
6717 {
6718 	si_cp_fini(rdev);
6719 	cayman_dma_fini(rdev);
6720 	si_fini_pg(rdev);
6721 	si_fini_cg(rdev);
6722 	si_irq_fini(rdev);
6723 	sumo_rlc_fini(rdev);
6724 	radeon_wb_fini(rdev);
6725 	radeon_vm_manager_fini(rdev);
6726 	radeon_ib_pool_fini(rdev);
6727 	radeon_irq_kms_fini(rdev);
6728 	if (rdev->has_uvd) {
6729 		uvd_v1_0_fini(rdev);
6730 		radeon_uvd_fini(rdev);
6731 	}
6732 	si_pcie_gart_fini(rdev);
6733 	r600_vram_scratch_fini(rdev);
6734 	radeon_gem_fini(rdev);
6735 	radeon_fence_driver_fini(rdev);
6736 	radeon_bo_fini(rdev);
6737 	radeon_atombios_fini(rdev);
6738 	kfree(rdev->bios);
6739 	rdev->bios = NULL;
6740 }
6741 
6742 /**
6743  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6744  *
6745  * @rdev: radeon_device pointer
6746  *
6747  * Fetches a GPU clock counter snapshot (SI).
6748  * Returns the 64 bit clock counter snapshot.
6749  */
6750 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6751 {
6752 	uint64_t clock;
6753 
6754 	mutex_lock(&rdev->gpu_clock_mutex);
6755 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6756 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6757 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6758 	mutex_unlock(&rdev->gpu_clock_mutex);
6759 	return clock;
6760 }
6761 
6762 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6763 {
6764 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6765 	int r;
6766 
6767 	/* bypass vclk and dclk with bclk */
6768 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6769 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6770 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6771 
6772 	/* put PLL in bypass mode */
6773 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6774 
6775 	if (!vclk || !dclk) {
6776 		/* keep the Bypass mode, put PLL to sleep */
6777 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6778 		return 0;
6779 	}
6780 
6781 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6782 					  16384, 0x03FFFFFF, 0, 128, 5,
6783 					  &fb_div, &vclk_div, &dclk_div);
6784 	if (r)
6785 		return r;
6786 
6787 	/* set RESET_ANTI_MUX to 0 */
6788 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6789 
6790 	/* set VCO_MODE to 1 */
6791 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6792 
6793 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6794 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6795 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6796 
6797 	/* deassert UPLL_RESET */
6798 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6799 
6800 	mdelay(1);
6801 
6802 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6803 	if (r)
6804 		return r;
6805 
6806 	/* assert UPLL_RESET again */
6807 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6808 
6809 	/* disable spread spectrum. */
6810 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6811 
6812 	/* set feedback divider */
6813 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6814 
6815 	/* set ref divider to 0 */
6816 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6817 
6818 	if (fb_div < 307200)
6819 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6820 	else
6821 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6822 
6823 	/* set PDIV_A and PDIV_B */
6824 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6825 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6826 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6827 
6828 	/* give the PLL some time to settle */
6829 	mdelay(15);
6830 
6831 	/* deassert PLL_RESET */
6832 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6833 
6834 	mdelay(15);
6835 
6836 	/* switch from bypass mode to normal mode */
6837 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6838 
6839 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6840 	if (r)
6841 		return r;
6842 
6843 	/* switch VCLK and DCLK selection */
6844 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6845 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6846 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6847 
6848 	mdelay(100);
6849 
6850 	return 0;
6851 }
6852 
6853 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6854 {
6855 	struct pci_dev *root = rdev->pdev->bus->self;
6856 	int bridge_pos, gpu_pos;
6857 	u32 speed_cntl, mask, current_data_rate;
6858 	int ret, i;
6859 	u16 tmp16;
6860 
6861 	if (radeon_pcie_gen2 == 0)
6862 		return;
6863 
6864 	if (rdev->flags & RADEON_IS_IGP)
6865 		return;
6866 
6867 	if (!(rdev->flags & RADEON_IS_PCIE))
6868 		return;
6869 
6870 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6871 	if (ret != 0)
6872 		return;
6873 
6874 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6875 		return;
6876 
6877 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6878 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6879 		LC_CURRENT_DATA_RATE_SHIFT;
6880 	if (mask & DRM_PCIE_SPEED_80) {
6881 		if (current_data_rate == 2) {
6882 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6883 			return;
6884 		}
6885 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6886 	} else if (mask & DRM_PCIE_SPEED_50) {
6887 		if (current_data_rate == 1) {
6888 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6889 			return;
6890 		}
6891 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6892 	}
6893 
6894 	bridge_pos = pci_pcie_cap(root);
6895 	if (!bridge_pos)
6896 		return;
6897 
6898 	gpu_pos = pci_pcie_cap(rdev->pdev);
6899 	if (!gpu_pos)
6900 		return;
6901 
6902 	if (mask & DRM_PCIE_SPEED_80) {
6903 		/* re-try equalization if gen3 is not already enabled */
6904 		if (current_data_rate != 2) {
6905 			u16 bridge_cfg, gpu_cfg;
6906 			u16 bridge_cfg2, gpu_cfg2;
6907 			u32 max_lw, current_lw, tmp;
6908 
6909 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6910 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6911 
6912 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6913 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6914 
6915 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6916 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6917 
6918 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6919 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6920 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6921 
6922 			if (current_lw < max_lw) {
6923 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6924 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6925 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6926 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6927 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6928 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6929 				}
6930 			}
6931 
6932 			for (i = 0; i < 10; i++) {
6933 				/* check status */
6934 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6935 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6936 					break;
6937 
6938 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6939 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6940 
6941 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6942 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6943 
6944 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6945 				tmp |= LC_SET_QUIESCE;
6946 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6947 
6948 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6949 				tmp |= LC_REDO_EQ;
6950 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6951 
6952 				mdelay(100);
6953 
6954 				/* linkctl */
6955 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6956 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6957 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6958 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6959 
6960 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6961 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6962 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6963 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6964 
6965 				/* linkctl2 */
6966 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6967 				tmp16 &= ~((1 << 4) | (7 << 9));
6968 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6969 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6970 
6971 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6972 				tmp16 &= ~((1 << 4) | (7 << 9));
6973 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6974 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6975 
6976 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6977 				tmp &= ~LC_SET_QUIESCE;
6978 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6979 			}
6980 		}
6981 	}
6982 
6983 	/* set the link speed */
6984 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6985 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6986 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6987 
6988 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6989 	tmp16 &= ~0xf;
6990 	if (mask & DRM_PCIE_SPEED_80)
6991 		tmp16 |= 3; /* gen3 */
6992 	else if (mask & DRM_PCIE_SPEED_50)
6993 		tmp16 |= 2; /* gen2 */
6994 	else
6995 		tmp16 |= 1; /* gen1 */
6996 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6997 
6998 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6999 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7000 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7001 
7002 	for (i = 0; i < rdev->usec_timeout; i++) {
7003 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7004 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7005 			break;
7006 		udelay(1);
7007 	}
7008 }
7009 
7010 static void si_program_aspm(struct radeon_device *rdev)
7011 {
7012 	u32 data, orig;
7013 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7014 	bool disable_clkreq = false;
7015 
7016 	if (radeon_aspm == 0)
7017 		return;
7018 
7019 	if (!(rdev->flags & RADEON_IS_PCIE))
7020 		return;
7021 
7022 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7023 	data &= ~LC_XMIT_N_FTS_MASK;
7024 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7025 	if (orig != data)
7026 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7027 
7028 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7029 	data |= LC_GO_TO_RECOVERY;
7030 	if (orig != data)
7031 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7032 
7033 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7034 	data |= P_IGNORE_EDB_ERR;
7035 	if (orig != data)
7036 		WREG32_PCIE(PCIE_P_CNTL, data);
7037 
7038 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7039 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7040 	data |= LC_PMI_TO_L1_DIS;
7041 	if (!disable_l0s)
7042 		data |= LC_L0S_INACTIVITY(7);
7043 
7044 	if (!disable_l1) {
7045 		data |= LC_L1_INACTIVITY(7);
7046 		data &= ~LC_PMI_TO_L1_DIS;
7047 		if (orig != data)
7048 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7049 
7050 		if (!disable_plloff_in_l1) {
7051 			bool clk_req_support;
7052 
7053 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7054 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7055 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7056 			if (orig != data)
7057 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7058 
7059 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7060 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7061 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7062 			if (orig != data)
7063 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7064 
7065 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7066 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7067 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7068 			if (orig != data)
7069 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7070 
7071 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7072 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7073 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7074 			if (orig != data)
7075 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7076 
7077 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7078 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7079 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7080 				if (orig != data)
7081 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7082 
7083 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7084 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7085 				if (orig != data)
7086 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7087 
7088 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7089 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7090 				if (orig != data)
7091 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7092 
7093 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7094 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7095 				if (orig != data)
7096 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7097 
7098 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7099 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7100 				if (orig != data)
7101 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7102 
7103 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7104 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7105 				if (orig != data)
7106 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7107 
7108 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7109 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7110 				if (orig != data)
7111 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7112 
7113 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7114 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7115 				if (orig != data)
7116 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7117 			}
7118 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7119 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7120 			data |= LC_DYN_LANES_PWR_STATE(3);
7121 			if (orig != data)
7122 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7123 
7124 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7125 			data &= ~LS2_EXIT_TIME_MASK;
7126 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7127 				data |= LS2_EXIT_TIME(5);
7128 			if (orig != data)
7129 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7130 
7131 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7132 			data &= ~LS2_EXIT_TIME_MASK;
7133 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7134 				data |= LS2_EXIT_TIME(5);
7135 			if (orig != data)
7136 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7137 
7138 			if (!disable_clkreq) {
7139 				struct pci_dev *root = rdev->pdev->bus->self;
7140 				u32 lnkcap;
7141 
7142 				clk_req_support = false;
7143 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7144 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7145 					clk_req_support = true;
7146 			} else {
7147 				clk_req_support = false;
7148 			}
7149 
7150 			if (clk_req_support) {
7151 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7152 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7153 				if (orig != data)
7154 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7155 
7156 				orig = data = RREG32(THM_CLK_CNTL);
7157 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7158 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7159 				if (orig != data)
7160 					WREG32(THM_CLK_CNTL, data);
7161 
7162 				orig = data = RREG32(MISC_CLK_CNTL);
7163 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7164 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7165 				if (orig != data)
7166 					WREG32(MISC_CLK_CNTL, data);
7167 
7168 				orig = data = RREG32(CG_CLKPIN_CNTL);
7169 				data &= ~BCLK_AS_XCLK;
7170 				if (orig != data)
7171 					WREG32(CG_CLKPIN_CNTL, data);
7172 
7173 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7174 				data &= ~FORCE_BIF_REFCLK_EN;
7175 				if (orig != data)
7176 					WREG32(CG_CLKPIN_CNTL_2, data);
7177 
7178 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7179 				data &= ~MPLL_CLKOUT_SEL_MASK;
7180 				data |= MPLL_CLKOUT_SEL(4);
7181 				if (orig != data)
7182 					WREG32(MPLL_BYPASSCLK_SEL, data);
7183 
7184 				orig = data = RREG32(SPLL_CNTL_MODE);
7185 				data &= ~SPLL_REFCLK_SEL_MASK;
7186 				if (orig != data)
7187 					WREG32(SPLL_CNTL_MODE, data);
7188 			}
7189 		}
7190 	} else {
7191 		if (orig != data)
7192 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7193 	}
7194 
7195 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7196 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7197 	if (orig != data)
7198 		WREG32_PCIE(PCIE_CNTL2, data);
7199 
7200 	if (!disable_l0s) {
7201 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7202 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7203 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7204 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7205 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7206 				data &= ~LC_L0S_INACTIVITY_MASK;
7207 				if (orig != data)
7208 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7209 			}
7210 		}
7211 	}
7212 }
7213