xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 1491eaf9)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
62 
63 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
72 MODULE_FIRMWARE("radeon/VERDE_me.bin");
73 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
76 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
77 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
78 
79 MODULE_FIRMWARE("radeon/verde_pfp.bin");
80 MODULE_FIRMWARE("radeon/verde_me.bin");
81 MODULE_FIRMWARE("radeon/verde_ce.bin");
82 MODULE_FIRMWARE("radeon/verde_mc.bin");
83 MODULE_FIRMWARE("radeon/verde_rlc.bin");
84 MODULE_FIRMWARE("radeon/verde_smc.bin");
85 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
86 
87 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
88 MODULE_FIRMWARE("radeon/OLAND_me.bin");
89 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
92 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
93 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
94 
95 MODULE_FIRMWARE("radeon/oland_pfp.bin");
96 MODULE_FIRMWARE("radeon/oland_me.bin");
97 MODULE_FIRMWARE("radeon/oland_ce.bin");
98 MODULE_FIRMWARE("radeon/oland_mc.bin");
99 MODULE_FIRMWARE("radeon/oland_rlc.bin");
100 MODULE_FIRMWARE("radeon/oland_smc.bin");
101 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
102 
103 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
110 
111 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
112 MODULE_FIRMWARE("radeon/hainan_me.bin");
113 MODULE_FIRMWARE("radeon/hainan_ce.bin");
114 MODULE_FIRMWARE("radeon/hainan_mc.bin");
115 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
116 MODULE_FIRMWARE("radeon/hainan_smc.bin");
117 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
118 
119 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
120 static void si_pcie_gen3_enable(struct radeon_device *rdev);
121 static void si_program_aspm(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
127 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
130 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
133 					 bool enable);
134 static void si_init_pg(struct radeon_device *rdev);
135 static void si_init_cg(struct radeon_device *rdev);
136 static void si_fini_pg(struct radeon_device *rdev);
137 static void si_fini_cg(struct radeon_device *rdev);
138 static void si_rlc_stop(struct radeon_device *rdev);
139 
140 static const u32 verde_rlc_save_restore_register_list[] =
141 {
142 	(0x8000 << 16) | (0x98f4 >> 2),
143 	0x00000000,
144 	(0x8040 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0xe80 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8000 << 16) | (0x89bc >> 2),
151 	0x00000000,
152 	(0x8040 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8000 << 16) | (0x8c1c >> 2),
155 	0x00000000,
156 	(0x8040 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x98f0 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0xe7c >> 2),
161 	0x00000000,
162 	(0x8000 << 16) | (0x9148 >> 2),
163 	0x00000000,
164 	(0x8040 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9150 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x897c >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x8d8c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0xac54 >> 2),
173 	0X00000000,
174 	0x3,
175 	(0x9c00 << 16) | (0x98f8 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9910 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9914 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9918 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x991c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9920 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9924 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9928 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x992c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9930 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9934 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9938 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x993c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9940 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9944 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9948 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x994c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9950 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9954 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9958 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x995c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9960 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9964 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9968 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x996c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9970 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9974 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9978 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x997c >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9980 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9984 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9988 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x998c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c00 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c14 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c04 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c08 >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x9b7c >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0xe84 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8000 << 16) | (0x89c0 >> 2),
258 	0x00000000,
259 	(0x8040 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8000 << 16) | (0x914c >> 2),
262 	0x00000000,
263 	(0x8040 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x8c20 >> 2),
266 	0x00000000,
267 	(0x8040 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x9354 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x9060 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9364 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9100 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x913c >> 2),
280 	0x00000000,
281 	(0x8000 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x90e0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e4 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e8 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8bcc >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8b24 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x88c4 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e50 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8c0c >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8e58 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e5c >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9508 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x950c >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x9494 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xac0c >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xae00 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xac08 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x88d4 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88c8 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88cc >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x89b0 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8b10 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8a14 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9830 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9834 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9838 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9a10 >> 2),
342 	0x00000000,
343 	(0x8000 << 16) | (0x9870 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9874 >> 2),
346 	0x00000000,
347 	(0x8001 << 16) | (0x9870 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9874 >> 2),
350 	0x00000000,
351 	(0x8040 << 16) | (0x9870 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9874 >> 2),
354 	0x00000000,
355 	(0x8041 << 16) | (0x9870 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9874 >> 2),
358 	0x00000000,
359 	0x00000000
360 };
361 
362 static const u32 tahiti_golden_rlc_registers[] =
363 {
364 	0xc424, 0xffffffff, 0x00601005,
365 	0xc47c, 0xffffffff, 0x10104040,
366 	0xc488, 0xffffffff, 0x0100000a,
367 	0xc314, 0xffffffff, 0x00000800,
368 	0xc30c, 0xffffffff, 0x800000f4,
369 	0xf4a8, 0xffffffff, 0x00000000
370 };
371 
372 static const u32 tahiti_golden_registers[] =
373 {
374 	0x9a10, 0x00010000, 0x00018208,
375 	0x9830, 0xffffffff, 0x00000000,
376 	0x9834, 0xf00fffff, 0x00000400,
377 	0x9838, 0x0002021c, 0x00020200,
378 	0xc78, 0x00000080, 0x00000000,
379 	0xd030, 0x000300c0, 0x00800040,
380 	0xd830, 0x000300c0, 0x00800040,
381 	0x5bb0, 0x000000f0, 0x00000070,
382 	0x5bc0, 0x00200000, 0x50100000,
383 	0x7030, 0x31000311, 0x00000011,
384 	0x277c, 0x00000003, 0x000007ff,
385 	0x240c, 0x000007ff, 0x00000000,
386 	0x8a14, 0xf000001f, 0x00000007,
387 	0x8b24, 0xffffffff, 0x00ffffff,
388 	0x8b10, 0x0000ff0f, 0x00000000,
389 	0x28a4c, 0x07ffffff, 0x4e000000,
390 	0x28350, 0x3f3f3fff, 0x2a00126a,
391 	0x30, 0x000000ff, 0x0040,
392 	0x34, 0x00000040, 0x00004040,
393 	0x9100, 0x07ffffff, 0x03000000,
394 	0x8e88, 0x01ff1f3f, 0x00000000,
395 	0x8e84, 0x01ff1f3f, 0x00000000,
396 	0x9060, 0x0000007f, 0x00000020,
397 	0x9508, 0x00010000, 0x00010000,
398 	0xac14, 0x00000200, 0x000002fb,
399 	0xac10, 0xffffffff, 0x0000543b,
400 	0xac0c, 0xffffffff, 0xa9210876,
401 	0x88d0, 0xffffffff, 0x000fff40,
402 	0x88d4, 0x0000001f, 0x00000010,
403 	0x1410, 0x20000000, 0x20fffed8,
404 	0x15c0, 0x000c0fc0, 0x000c0400
405 };
406 
407 static const u32 tahiti_golden_registers2[] =
408 {
409 	0xc64, 0x00000001, 0x00000001
410 };
411 
412 static const u32 pitcairn_golden_rlc_registers[] =
413 {
414 	0xc424, 0xffffffff, 0x00601004,
415 	0xc47c, 0xffffffff, 0x10102020,
416 	0xc488, 0xffffffff, 0x01000020,
417 	0xc314, 0xffffffff, 0x00000800,
418 	0xc30c, 0xffffffff, 0x800000a4
419 };
420 
421 static const u32 pitcairn_golden_registers[] =
422 {
423 	0x9a10, 0x00010000, 0x00018208,
424 	0x9830, 0xffffffff, 0x00000000,
425 	0x9834, 0xf00fffff, 0x00000400,
426 	0x9838, 0x0002021c, 0x00020200,
427 	0xc78, 0x00000080, 0x00000000,
428 	0xd030, 0x000300c0, 0x00800040,
429 	0xd830, 0x000300c0, 0x00800040,
430 	0x5bb0, 0x000000f0, 0x00000070,
431 	0x5bc0, 0x00200000, 0x50100000,
432 	0x7030, 0x31000311, 0x00000011,
433 	0x2ae4, 0x00073ffe, 0x000022a2,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8b24, 0xffffffff, 0x00ffffff,
437 	0x8b10, 0x0000ff0f, 0x00000000,
438 	0x28a4c, 0x07ffffff, 0x4e000000,
439 	0x28350, 0x3f3f3fff, 0x2a00126a,
440 	0x30, 0x000000ff, 0x0040,
441 	0x34, 0x00000040, 0x00004040,
442 	0x9100, 0x07ffffff, 0x03000000,
443 	0x9060, 0x0000007f, 0x00000020,
444 	0x9508, 0x00010000, 0x00010000,
445 	0xac14, 0x000003ff, 0x000000f7,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac0c, 0xffffffff, 0x32761054,
448 	0x88d4, 0x0000001f, 0x00000010,
449 	0x15c0, 0x000c0fc0, 0x000c0400
450 };
451 
452 static const u32 verde_golden_rlc_registers[] =
453 {
454 	0xc424, 0xffffffff, 0x033f1005,
455 	0xc47c, 0xffffffff, 0x10808020,
456 	0xc488, 0xffffffff, 0x00800008,
457 	0xc314, 0xffffffff, 0x00001000,
458 	0xc30c, 0xffffffff, 0x80010014
459 };
460 
461 static const u32 verde_golden_registers[] =
462 {
463 	0x9a10, 0x00010000, 0x00018208,
464 	0x9830, 0xffffffff, 0x00000000,
465 	0x9834, 0xf00fffff, 0x00000400,
466 	0x9838, 0x0002021c, 0x00020200,
467 	0xc78, 0x00000080, 0x00000000,
468 	0xd030, 0x000300c0, 0x00800040,
469 	0xd030, 0x000300c0, 0x00800040,
470 	0xd830, 0x000300c0, 0x00800040,
471 	0xd830, 0x000300c0, 0x00800040,
472 	0x5bb0, 0x000000f0, 0x00000070,
473 	0x5bc0, 0x00200000, 0x50100000,
474 	0x7030, 0x31000311, 0x00000011,
475 	0x2ae4, 0x00073ffe, 0x000022a2,
476 	0x2ae4, 0x00073ffe, 0x000022a2,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x240c, 0x000007ff, 0x00000000,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x8a14, 0xf000001f, 0x00000007,
482 	0x8a14, 0xf000001f, 0x00000007,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8b24, 0xffffffff, 0x00ffffff,
485 	0x8b10, 0x0000ff0f, 0x00000000,
486 	0x28a4c, 0x07ffffff, 0x4e000000,
487 	0x28350, 0x3f3f3fff, 0x0000124a,
488 	0x28350, 0x3f3f3fff, 0x0000124a,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x30, 0x000000ff, 0x0040,
491 	0x34, 0x00000040, 0x00004040,
492 	0x9100, 0x07ffffff, 0x03000000,
493 	0x9100, 0x07ffffff, 0x03000000,
494 	0x8e88, 0x01ff1f3f, 0x00000000,
495 	0x8e88, 0x01ff1f3f, 0x00000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e84, 0x01ff1f3f, 0x00000000,
498 	0x8e84, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x9060, 0x0000007f, 0x00000020,
501 	0x9508, 0x00010000, 0x00010000,
502 	0xac14, 0x000003ff, 0x00000003,
503 	0xac14, 0x000003ff, 0x00000003,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac10, 0xffffffff, 0x00000000,
506 	0xac10, 0xffffffff, 0x00000000,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac0c, 0xffffffff, 0x00001032,
509 	0xac0c, 0xffffffff, 0x00001032,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0x88d4, 0x0000001f, 0x00000010,
512 	0x88d4, 0x0000001f, 0x00000010,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x15c0, 0x000c0fc0, 0x000c0400
515 };
516 
517 static const u32 oland_golden_rlc_registers[] =
518 {
519 	0xc424, 0xffffffff, 0x00601005,
520 	0xc47c, 0xffffffff, 0x10104040,
521 	0xc488, 0xffffffff, 0x0100000a,
522 	0xc314, 0xffffffff, 0x00000800,
523 	0xc30c, 0xffffffff, 0x800000f4
524 };
525 
526 static const u32 oland_golden_registers[] =
527 {
528 	0x9a10, 0x00010000, 0x00018208,
529 	0x9830, 0xffffffff, 0x00000000,
530 	0x9834, 0xf00fffff, 0x00000400,
531 	0x9838, 0x0002021c, 0x00020200,
532 	0xc78, 0x00000080, 0x00000000,
533 	0xd030, 0x000300c0, 0x00800040,
534 	0xd830, 0x000300c0, 0x00800040,
535 	0x5bb0, 0x000000f0, 0x00000070,
536 	0x5bc0, 0x00200000, 0x50100000,
537 	0x7030, 0x31000311, 0x00000011,
538 	0x2ae4, 0x00073ffe, 0x000022a2,
539 	0x240c, 0x000007ff, 0x00000000,
540 	0x8a14, 0xf000001f, 0x00000007,
541 	0x8b24, 0xffffffff, 0x00ffffff,
542 	0x8b10, 0x0000ff0f, 0x00000000,
543 	0x28a4c, 0x07ffffff, 0x4e000000,
544 	0x28350, 0x3f3f3fff, 0x00000082,
545 	0x30, 0x000000ff, 0x0040,
546 	0x34, 0x00000040, 0x00004040,
547 	0x9100, 0x07ffffff, 0x03000000,
548 	0x9060, 0x0000007f, 0x00000020,
549 	0x9508, 0x00010000, 0x00010000,
550 	0xac14, 0x000003ff, 0x000000f3,
551 	0xac10, 0xffffffff, 0x00000000,
552 	0xac0c, 0xffffffff, 0x00003210,
553 	0x88d4, 0x0000001f, 0x00000010,
554 	0x15c0, 0x000c0fc0, 0x000c0400
555 };
556 
557 static const u32 hainan_golden_registers[] =
558 {
559 	0x9a10, 0x00010000, 0x00018208,
560 	0x9830, 0xffffffff, 0x00000000,
561 	0x9834, 0xf00fffff, 0x00000400,
562 	0x9838, 0x0002021c, 0x00020200,
563 	0xd0c0, 0xff000fff, 0x00000100,
564 	0xd030, 0x000300c0, 0x00800040,
565 	0xd8c0, 0xff000fff, 0x00000100,
566 	0xd830, 0x000300c0, 0x00800040,
567 	0x2ae4, 0x00073ffe, 0x000022a2,
568 	0x240c, 0x000007ff, 0x00000000,
569 	0x8a14, 0xf000001f, 0x00000007,
570 	0x8b24, 0xffffffff, 0x00ffffff,
571 	0x8b10, 0x0000ff0f, 0x00000000,
572 	0x28a4c, 0x07ffffff, 0x4e000000,
573 	0x28350, 0x3f3f3fff, 0x00000000,
574 	0x30, 0x000000ff, 0x0040,
575 	0x34, 0x00000040, 0x00004040,
576 	0x9100, 0x03e00000, 0x03600000,
577 	0x9060, 0x0000007f, 0x00000020,
578 	0x9508, 0x00010000, 0x00010000,
579 	0xac14, 0x000003ff, 0x000000f1,
580 	0xac10, 0xffffffff, 0x00000000,
581 	0xac0c, 0xffffffff, 0x00003210,
582 	0x88d4, 0x0000001f, 0x00000010,
583 	0x15c0, 0x000c0fc0, 0x000c0400
584 };
585 
586 static const u32 hainan_golden_registers2[] =
587 {
588 	0x98f8, 0xffffffff, 0x02010001
589 };
590 
591 static const u32 tahiti_mgcg_cgcg_init[] =
592 {
593 	0xc400, 0xffffffff, 0xfffffffc,
594 	0x802c, 0xffffffff, 0xe0000000,
595 	0x9a60, 0xffffffff, 0x00000100,
596 	0x92a4, 0xffffffff, 0x00000100,
597 	0xc164, 0xffffffff, 0x00000100,
598 	0x9774, 0xffffffff, 0x00000100,
599 	0x8984, 0xffffffff, 0x06000100,
600 	0x8a18, 0xffffffff, 0x00000100,
601 	0x92a0, 0xffffffff, 0x00000100,
602 	0xc380, 0xffffffff, 0x00000100,
603 	0x8b28, 0xffffffff, 0x00000100,
604 	0x9144, 0xffffffff, 0x00000100,
605 	0x8d88, 0xffffffff, 0x00000100,
606 	0x8d8c, 0xffffffff, 0x00000100,
607 	0x9030, 0xffffffff, 0x00000100,
608 	0x9034, 0xffffffff, 0x00000100,
609 	0x9038, 0xffffffff, 0x00000100,
610 	0x903c, 0xffffffff, 0x00000100,
611 	0xad80, 0xffffffff, 0x00000100,
612 	0xac54, 0xffffffff, 0x00000100,
613 	0x897c, 0xffffffff, 0x06000100,
614 	0x9868, 0xffffffff, 0x00000100,
615 	0x9510, 0xffffffff, 0x00000100,
616 	0xaf04, 0xffffffff, 0x00000100,
617 	0xae04, 0xffffffff, 0x00000100,
618 	0x949c, 0xffffffff, 0x00000100,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9160, 0xffffffff, 0x00010000,
621 	0x9164, 0xffffffff, 0x00030002,
622 	0x9168, 0xffffffff, 0x00040007,
623 	0x916c, 0xffffffff, 0x00060005,
624 	0x9170, 0xffffffff, 0x00090008,
625 	0x9174, 0xffffffff, 0x00020001,
626 	0x9178, 0xffffffff, 0x00040003,
627 	0x917c, 0xffffffff, 0x00000007,
628 	0x9180, 0xffffffff, 0x00060005,
629 	0x9184, 0xffffffff, 0x00090008,
630 	0x9188, 0xffffffff, 0x00030002,
631 	0x918c, 0xffffffff, 0x00050004,
632 	0x9190, 0xffffffff, 0x00000008,
633 	0x9194, 0xffffffff, 0x00070006,
634 	0x9198, 0xffffffff, 0x000a0009,
635 	0x919c, 0xffffffff, 0x00040003,
636 	0x91a0, 0xffffffff, 0x00060005,
637 	0x91a4, 0xffffffff, 0x00000009,
638 	0x91a8, 0xffffffff, 0x00080007,
639 	0x91ac, 0xffffffff, 0x000b000a,
640 	0x91b0, 0xffffffff, 0x00050004,
641 	0x91b4, 0xffffffff, 0x00070006,
642 	0x91b8, 0xffffffff, 0x0008000b,
643 	0x91bc, 0xffffffff, 0x000a0009,
644 	0x91c0, 0xffffffff, 0x000d000c,
645 	0x91c4, 0xffffffff, 0x00060005,
646 	0x91c8, 0xffffffff, 0x00080007,
647 	0x91cc, 0xffffffff, 0x0000000b,
648 	0x91d0, 0xffffffff, 0x000a0009,
649 	0x91d4, 0xffffffff, 0x000d000c,
650 	0x91d8, 0xffffffff, 0x00070006,
651 	0x91dc, 0xffffffff, 0x00090008,
652 	0x91e0, 0xffffffff, 0x0000000c,
653 	0x91e4, 0xffffffff, 0x000b000a,
654 	0x91e8, 0xffffffff, 0x000e000d,
655 	0x91ec, 0xffffffff, 0x00080007,
656 	0x91f0, 0xffffffff, 0x000a0009,
657 	0x91f4, 0xffffffff, 0x0000000d,
658 	0x91f8, 0xffffffff, 0x000c000b,
659 	0x91fc, 0xffffffff, 0x000f000e,
660 	0x9200, 0xffffffff, 0x00090008,
661 	0x9204, 0xffffffff, 0x000b000a,
662 	0x9208, 0xffffffff, 0x000c000f,
663 	0x920c, 0xffffffff, 0x000e000d,
664 	0x9210, 0xffffffff, 0x00110010,
665 	0x9214, 0xffffffff, 0x000a0009,
666 	0x9218, 0xffffffff, 0x000c000b,
667 	0x921c, 0xffffffff, 0x0000000f,
668 	0x9220, 0xffffffff, 0x000e000d,
669 	0x9224, 0xffffffff, 0x00110010,
670 	0x9228, 0xffffffff, 0x000b000a,
671 	0x922c, 0xffffffff, 0x000d000c,
672 	0x9230, 0xffffffff, 0x00000010,
673 	0x9234, 0xffffffff, 0x000f000e,
674 	0x9238, 0xffffffff, 0x00120011,
675 	0x923c, 0xffffffff, 0x000c000b,
676 	0x9240, 0xffffffff, 0x000e000d,
677 	0x9244, 0xffffffff, 0x00000011,
678 	0x9248, 0xffffffff, 0x0010000f,
679 	0x924c, 0xffffffff, 0x00130012,
680 	0x9250, 0xffffffff, 0x000d000c,
681 	0x9254, 0xffffffff, 0x000f000e,
682 	0x9258, 0xffffffff, 0x00100013,
683 	0x925c, 0xffffffff, 0x00120011,
684 	0x9260, 0xffffffff, 0x00150014,
685 	0x9264, 0xffffffff, 0x000e000d,
686 	0x9268, 0xffffffff, 0x0010000f,
687 	0x926c, 0xffffffff, 0x00000013,
688 	0x9270, 0xffffffff, 0x00120011,
689 	0x9274, 0xffffffff, 0x00150014,
690 	0x9278, 0xffffffff, 0x000f000e,
691 	0x927c, 0xffffffff, 0x00110010,
692 	0x9280, 0xffffffff, 0x00000014,
693 	0x9284, 0xffffffff, 0x00130012,
694 	0x9288, 0xffffffff, 0x00160015,
695 	0x928c, 0xffffffff, 0x0010000f,
696 	0x9290, 0xffffffff, 0x00120011,
697 	0x9294, 0xffffffff, 0x00000015,
698 	0x9298, 0xffffffff, 0x00140013,
699 	0x929c, 0xffffffff, 0x00170016,
700 	0x9150, 0xffffffff, 0x96940200,
701 	0x8708, 0xffffffff, 0x00900100,
702 	0xc478, 0xffffffff, 0x00000080,
703 	0xc404, 0xffffffff, 0x0020003f,
704 	0x30, 0xffffffff, 0x0000001c,
705 	0x34, 0x000f0000, 0x000f0000,
706 	0x160c, 0xffffffff, 0x00000100,
707 	0x1024, 0xffffffff, 0x00000100,
708 	0x102c, 0x00000101, 0x00000000,
709 	0x20a8, 0xffffffff, 0x00000104,
710 	0x264c, 0x000c0000, 0x000c0000,
711 	0x2648, 0x000c0000, 0x000c0000,
712 	0x55e4, 0xff000fff, 0x00000100,
713 	0x55e8, 0x00000001, 0x00000001,
714 	0x2f50, 0x00000001, 0x00000001,
715 	0x30cc, 0xc0000fff, 0x00000104,
716 	0xc1e4, 0x00000001, 0x00000001,
717 	0xd0c0, 0xfffffff0, 0x00000100,
718 	0xd8c0, 0xfffffff0, 0x00000100
719 };
720 
721 static const u32 pitcairn_mgcg_cgcg_init[] =
722 {
723 	0xc400, 0xffffffff, 0xfffffffc,
724 	0x802c, 0xffffffff, 0xe0000000,
725 	0x9a60, 0xffffffff, 0x00000100,
726 	0x92a4, 0xffffffff, 0x00000100,
727 	0xc164, 0xffffffff, 0x00000100,
728 	0x9774, 0xffffffff, 0x00000100,
729 	0x8984, 0xffffffff, 0x06000100,
730 	0x8a18, 0xffffffff, 0x00000100,
731 	0x92a0, 0xffffffff, 0x00000100,
732 	0xc380, 0xffffffff, 0x00000100,
733 	0x8b28, 0xffffffff, 0x00000100,
734 	0x9144, 0xffffffff, 0x00000100,
735 	0x8d88, 0xffffffff, 0x00000100,
736 	0x8d8c, 0xffffffff, 0x00000100,
737 	0x9030, 0xffffffff, 0x00000100,
738 	0x9034, 0xffffffff, 0x00000100,
739 	0x9038, 0xffffffff, 0x00000100,
740 	0x903c, 0xffffffff, 0x00000100,
741 	0xad80, 0xffffffff, 0x00000100,
742 	0xac54, 0xffffffff, 0x00000100,
743 	0x897c, 0xffffffff, 0x06000100,
744 	0x9868, 0xffffffff, 0x00000100,
745 	0x9510, 0xffffffff, 0x00000100,
746 	0xaf04, 0xffffffff, 0x00000100,
747 	0xae04, 0xffffffff, 0x00000100,
748 	0x949c, 0xffffffff, 0x00000100,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9160, 0xffffffff, 0x00010000,
751 	0x9164, 0xffffffff, 0x00030002,
752 	0x9168, 0xffffffff, 0x00040007,
753 	0x916c, 0xffffffff, 0x00060005,
754 	0x9170, 0xffffffff, 0x00090008,
755 	0x9174, 0xffffffff, 0x00020001,
756 	0x9178, 0xffffffff, 0x00040003,
757 	0x917c, 0xffffffff, 0x00000007,
758 	0x9180, 0xffffffff, 0x00060005,
759 	0x9184, 0xffffffff, 0x00090008,
760 	0x9188, 0xffffffff, 0x00030002,
761 	0x918c, 0xffffffff, 0x00050004,
762 	0x9190, 0xffffffff, 0x00000008,
763 	0x9194, 0xffffffff, 0x00070006,
764 	0x9198, 0xffffffff, 0x000a0009,
765 	0x919c, 0xffffffff, 0x00040003,
766 	0x91a0, 0xffffffff, 0x00060005,
767 	0x91a4, 0xffffffff, 0x00000009,
768 	0x91a8, 0xffffffff, 0x00080007,
769 	0x91ac, 0xffffffff, 0x000b000a,
770 	0x91b0, 0xffffffff, 0x00050004,
771 	0x91b4, 0xffffffff, 0x00070006,
772 	0x91b8, 0xffffffff, 0x0008000b,
773 	0x91bc, 0xffffffff, 0x000a0009,
774 	0x91c0, 0xffffffff, 0x000d000c,
775 	0x9200, 0xffffffff, 0x00090008,
776 	0x9204, 0xffffffff, 0x000b000a,
777 	0x9208, 0xffffffff, 0x000c000f,
778 	0x920c, 0xffffffff, 0x000e000d,
779 	0x9210, 0xffffffff, 0x00110010,
780 	0x9214, 0xffffffff, 0x000a0009,
781 	0x9218, 0xffffffff, 0x000c000b,
782 	0x921c, 0xffffffff, 0x0000000f,
783 	0x9220, 0xffffffff, 0x000e000d,
784 	0x9224, 0xffffffff, 0x00110010,
785 	0x9228, 0xffffffff, 0x000b000a,
786 	0x922c, 0xffffffff, 0x000d000c,
787 	0x9230, 0xffffffff, 0x00000010,
788 	0x9234, 0xffffffff, 0x000f000e,
789 	0x9238, 0xffffffff, 0x00120011,
790 	0x923c, 0xffffffff, 0x000c000b,
791 	0x9240, 0xffffffff, 0x000e000d,
792 	0x9244, 0xffffffff, 0x00000011,
793 	0x9248, 0xffffffff, 0x0010000f,
794 	0x924c, 0xffffffff, 0x00130012,
795 	0x9250, 0xffffffff, 0x000d000c,
796 	0x9254, 0xffffffff, 0x000f000e,
797 	0x9258, 0xffffffff, 0x00100013,
798 	0x925c, 0xffffffff, 0x00120011,
799 	0x9260, 0xffffffff, 0x00150014,
800 	0x9150, 0xffffffff, 0x96940200,
801 	0x8708, 0xffffffff, 0x00900100,
802 	0xc478, 0xffffffff, 0x00000080,
803 	0xc404, 0xffffffff, 0x0020003f,
804 	0x30, 0xffffffff, 0x0000001c,
805 	0x34, 0x000f0000, 0x000f0000,
806 	0x160c, 0xffffffff, 0x00000100,
807 	0x1024, 0xffffffff, 0x00000100,
808 	0x102c, 0x00000101, 0x00000000,
809 	0x20a8, 0xffffffff, 0x00000104,
810 	0x55e4, 0xff000fff, 0x00000100,
811 	0x55e8, 0x00000001, 0x00000001,
812 	0x2f50, 0x00000001, 0x00000001,
813 	0x30cc, 0xc0000fff, 0x00000104,
814 	0xc1e4, 0x00000001, 0x00000001,
815 	0xd0c0, 0xfffffff0, 0x00000100,
816 	0xd8c0, 0xfffffff0, 0x00000100
817 };
818 
819 static const u32 verde_mgcg_cgcg_init[] =
820 {
821 	0xc400, 0xffffffff, 0xfffffffc,
822 	0x802c, 0xffffffff, 0xe0000000,
823 	0x9a60, 0xffffffff, 0x00000100,
824 	0x92a4, 0xffffffff, 0x00000100,
825 	0xc164, 0xffffffff, 0x00000100,
826 	0x9774, 0xffffffff, 0x00000100,
827 	0x8984, 0xffffffff, 0x06000100,
828 	0x8a18, 0xffffffff, 0x00000100,
829 	0x92a0, 0xffffffff, 0x00000100,
830 	0xc380, 0xffffffff, 0x00000100,
831 	0x8b28, 0xffffffff, 0x00000100,
832 	0x9144, 0xffffffff, 0x00000100,
833 	0x8d88, 0xffffffff, 0x00000100,
834 	0x8d8c, 0xffffffff, 0x00000100,
835 	0x9030, 0xffffffff, 0x00000100,
836 	0x9034, 0xffffffff, 0x00000100,
837 	0x9038, 0xffffffff, 0x00000100,
838 	0x903c, 0xffffffff, 0x00000100,
839 	0xad80, 0xffffffff, 0x00000100,
840 	0xac54, 0xffffffff, 0x00000100,
841 	0x897c, 0xffffffff, 0x06000100,
842 	0x9868, 0xffffffff, 0x00000100,
843 	0x9510, 0xffffffff, 0x00000100,
844 	0xaf04, 0xffffffff, 0x00000100,
845 	0xae04, 0xffffffff, 0x00000100,
846 	0x949c, 0xffffffff, 0x00000100,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9160, 0xffffffff, 0x00010000,
849 	0x9164, 0xffffffff, 0x00030002,
850 	0x9168, 0xffffffff, 0x00040007,
851 	0x916c, 0xffffffff, 0x00060005,
852 	0x9170, 0xffffffff, 0x00090008,
853 	0x9174, 0xffffffff, 0x00020001,
854 	0x9178, 0xffffffff, 0x00040003,
855 	0x917c, 0xffffffff, 0x00000007,
856 	0x9180, 0xffffffff, 0x00060005,
857 	0x9184, 0xffffffff, 0x00090008,
858 	0x9188, 0xffffffff, 0x00030002,
859 	0x918c, 0xffffffff, 0x00050004,
860 	0x9190, 0xffffffff, 0x00000008,
861 	0x9194, 0xffffffff, 0x00070006,
862 	0x9198, 0xffffffff, 0x000a0009,
863 	0x919c, 0xffffffff, 0x00040003,
864 	0x91a0, 0xffffffff, 0x00060005,
865 	0x91a4, 0xffffffff, 0x00000009,
866 	0x91a8, 0xffffffff, 0x00080007,
867 	0x91ac, 0xffffffff, 0x000b000a,
868 	0x91b0, 0xffffffff, 0x00050004,
869 	0x91b4, 0xffffffff, 0x00070006,
870 	0x91b8, 0xffffffff, 0x0008000b,
871 	0x91bc, 0xffffffff, 0x000a0009,
872 	0x91c0, 0xffffffff, 0x000d000c,
873 	0x9200, 0xffffffff, 0x00090008,
874 	0x9204, 0xffffffff, 0x000b000a,
875 	0x9208, 0xffffffff, 0x000c000f,
876 	0x920c, 0xffffffff, 0x000e000d,
877 	0x9210, 0xffffffff, 0x00110010,
878 	0x9214, 0xffffffff, 0x000a0009,
879 	0x9218, 0xffffffff, 0x000c000b,
880 	0x921c, 0xffffffff, 0x0000000f,
881 	0x9220, 0xffffffff, 0x000e000d,
882 	0x9224, 0xffffffff, 0x00110010,
883 	0x9228, 0xffffffff, 0x000b000a,
884 	0x922c, 0xffffffff, 0x000d000c,
885 	0x9230, 0xffffffff, 0x00000010,
886 	0x9234, 0xffffffff, 0x000f000e,
887 	0x9238, 0xffffffff, 0x00120011,
888 	0x923c, 0xffffffff, 0x000c000b,
889 	0x9240, 0xffffffff, 0x000e000d,
890 	0x9244, 0xffffffff, 0x00000011,
891 	0x9248, 0xffffffff, 0x0010000f,
892 	0x924c, 0xffffffff, 0x00130012,
893 	0x9250, 0xffffffff, 0x000d000c,
894 	0x9254, 0xffffffff, 0x000f000e,
895 	0x9258, 0xffffffff, 0x00100013,
896 	0x925c, 0xffffffff, 0x00120011,
897 	0x9260, 0xffffffff, 0x00150014,
898 	0x9150, 0xffffffff, 0x96940200,
899 	0x8708, 0xffffffff, 0x00900100,
900 	0xc478, 0xffffffff, 0x00000080,
901 	0xc404, 0xffffffff, 0x0020003f,
902 	0x30, 0xffffffff, 0x0000001c,
903 	0x34, 0x000f0000, 0x000f0000,
904 	0x160c, 0xffffffff, 0x00000100,
905 	0x1024, 0xffffffff, 0x00000100,
906 	0x102c, 0x00000101, 0x00000000,
907 	0x20a8, 0xffffffff, 0x00000104,
908 	0x264c, 0x000c0000, 0x000c0000,
909 	0x2648, 0x000c0000, 0x000c0000,
910 	0x55e4, 0xff000fff, 0x00000100,
911 	0x55e8, 0x00000001, 0x00000001,
912 	0x2f50, 0x00000001, 0x00000001,
913 	0x30cc, 0xc0000fff, 0x00000104,
914 	0xc1e4, 0x00000001, 0x00000001,
915 	0xd0c0, 0xfffffff0, 0x00000100,
916 	0xd8c0, 0xfffffff0, 0x00000100
917 };
918 
919 static const u32 oland_mgcg_cgcg_init[] =
920 {
921 	0xc400, 0xffffffff, 0xfffffffc,
922 	0x802c, 0xffffffff, 0xe0000000,
923 	0x9a60, 0xffffffff, 0x00000100,
924 	0x92a4, 0xffffffff, 0x00000100,
925 	0xc164, 0xffffffff, 0x00000100,
926 	0x9774, 0xffffffff, 0x00000100,
927 	0x8984, 0xffffffff, 0x06000100,
928 	0x8a18, 0xffffffff, 0x00000100,
929 	0x92a0, 0xffffffff, 0x00000100,
930 	0xc380, 0xffffffff, 0x00000100,
931 	0x8b28, 0xffffffff, 0x00000100,
932 	0x9144, 0xffffffff, 0x00000100,
933 	0x8d88, 0xffffffff, 0x00000100,
934 	0x8d8c, 0xffffffff, 0x00000100,
935 	0x9030, 0xffffffff, 0x00000100,
936 	0x9034, 0xffffffff, 0x00000100,
937 	0x9038, 0xffffffff, 0x00000100,
938 	0x903c, 0xffffffff, 0x00000100,
939 	0xad80, 0xffffffff, 0x00000100,
940 	0xac54, 0xffffffff, 0x00000100,
941 	0x897c, 0xffffffff, 0x06000100,
942 	0x9868, 0xffffffff, 0x00000100,
943 	0x9510, 0xffffffff, 0x00000100,
944 	0xaf04, 0xffffffff, 0x00000100,
945 	0xae04, 0xffffffff, 0x00000100,
946 	0x949c, 0xffffffff, 0x00000100,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9160, 0xffffffff, 0x00010000,
949 	0x9164, 0xffffffff, 0x00030002,
950 	0x9168, 0xffffffff, 0x00040007,
951 	0x916c, 0xffffffff, 0x00060005,
952 	0x9170, 0xffffffff, 0x00090008,
953 	0x9174, 0xffffffff, 0x00020001,
954 	0x9178, 0xffffffff, 0x00040003,
955 	0x917c, 0xffffffff, 0x00000007,
956 	0x9180, 0xffffffff, 0x00060005,
957 	0x9184, 0xffffffff, 0x00090008,
958 	0x9188, 0xffffffff, 0x00030002,
959 	0x918c, 0xffffffff, 0x00050004,
960 	0x9190, 0xffffffff, 0x00000008,
961 	0x9194, 0xffffffff, 0x00070006,
962 	0x9198, 0xffffffff, 0x000a0009,
963 	0x919c, 0xffffffff, 0x00040003,
964 	0x91a0, 0xffffffff, 0x00060005,
965 	0x91a4, 0xffffffff, 0x00000009,
966 	0x91a8, 0xffffffff, 0x00080007,
967 	0x91ac, 0xffffffff, 0x000b000a,
968 	0x91b0, 0xffffffff, 0x00050004,
969 	0x91b4, 0xffffffff, 0x00070006,
970 	0x91b8, 0xffffffff, 0x0008000b,
971 	0x91bc, 0xffffffff, 0x000a0009,
972 	0x91c0, 0xffffffff, 0x000d000c,
973 	0x91c4, 0xffffffff, 0x00060005,
974 	0x91c8, 0xffffffff, 0x00080007,
975 	0x91cc, 0xffffffff, 0x0000000b,
976 	0x91d0, 0xffffffff, 0x000a0009,
977 	0x91d4, 0xffffffff, 0x000d000c,
978 	0x9150, 0xffffffff, 0x96940200,
979 	0x8708, 0xffffffff, 0x00900100,
980 	0xc478, 0xffffffff, 0x00000080,
981 	0xc404, 0xffffffff, 0x0020003f,
982 	0x30, 0xffffffff, 0x0000001c,
983 	0x34, 0x000f0000, 0x000f0000,
984 	0x160c, 0xffffffff, 0x00000100,
985 	0x1024, 0xffffffff, 0x00000100,
986 	0x102c, 0x00000101, 0x00000000,
987 	0x20a8, 0xffffffff, 0x00000104,
988 	0x264c, 0x000c0000, 0x000c0000,
989 	0x2648, 0x000c0000, 0x000c0000,
990 	0x55e4, 0xff000fff, 0x00000100,
991 	0x55e8, 0x00000001, 0x00000001,
992 	0x2f50, 0x00000001, 0x00000001,
993 	0x30cc, 0xc0000fff, 0x00000104,
994 	0xc1e4, 0x00000001, 0x00000001,
995 	0xd0c0, 0xfffffff0, 0x00000100,
996 	0xd8c0, 0xfffffff0, 0x00000100
997 };
998 
999 static const u32 hainan_mgcg_cgcg_init[] =
1000 {
1001 	0xc400, 0xffffffff, 0xfffffffc,
1002 	0x802c, 0xffffffff, 0xe0000000,
1003 	0x9a60, 0xffffffff, 0x00000100,
1004 	0x92a4, 0xffffffff, 0x00000100,
1005 	0xc164, 0xffffffff, 0x00000100,
1006 	0x9774, 0xffffffff, 0x00000100,
1007 	0x8984, 0xffffffff, 0x06000100,
1008 	0x8a18, 0xffffffff, 0x00000100,
1009 	0x92a0, 0xffffffff, 0x00000100,
1010 	0xc380, 0xffffffff, 0x00000100,
1011 	0x8b28, 0xffffffff, 0x00000100,
1012 	0x9144, 0xffffffff, 0x00000100,
1013 	0x8d88, 0xffffffff, 0x00000100,
1014 	0x8d8c, 0xffffffff, 0x00000100,
1015 	0x9030, 0xffffffff, 0x00000100,
1016 	0x9034, 0xffffffff, 0x00000100,
1017 	0x9038, 0xffffffff, 0x00000100,
1018 	0x903c, 0xffffffff, 0x00000100,
1019 	0xad80, 0xffffffff, 0x00000100,
1020 	0xac54, 0xffffffff, 0x00000100,
1021 	0x897c, 0xffffffff, 0x06000100,
1022 	0x9868, 0xffffffff, 0x00000100,
1023 	0x9510, 0xffffffff, 0x00000100,
1024 	0xaf04, 0xffffffff, 0x00000100,
1025 	0xae04, 0xffffffff, 0x00000100,
1026 	0x949c, 0xffffffff, 0x00000100,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9160, 0xffffffff, 0x00010000,
1029 	0x9164, 0xffffffff, 0x00030002,
1030 	0x9168, 0xffffffff, 0x00040007,
1031 	0x916c, 0xffffffff, 0x00060005,
1032 	0x9170, 0xffffffff, 0x00090008,
1033 	0x9174, 0xffffffff, 0x00020001,
1034 	0x9178, 0xffffffff, 0x00040003,
1035 	0x917c, 0xffffffff, 0x00000007,
1036 	0x9180, 0xffffffff, 0x00060005,
1037 	0x9184, 0xffffffff, 0x00090008,
1038 	0x9188, 0xffffffff, 0x00030002,
1039 	0x918c, 0xffffffff, 0x00050004,
1040 	0x9190, 0xffffffff, 0x00000008,
1041 	0x9194, 0xffffffff, 0x00070006,
1042 	0x9198, 0xffffffff, 0x000a0009,
1043 	0x919c, 0xffffffff, 0x00040003,
1044 	0x91a0, 0xffffffff, 0x00060005,
1045 	0x91a4, 0xffffffff, 0x00000009,
1046 	0x91a8, 0xffffffff, 0x00080007,
1047 	0x91ac, 0xffffffff, 0x000b000a,
1048 	0x91b0, 0xffffffff, 0x00050004,
1049 	0x91b4, 0xffffffff, 0x00070006,
1050 	0x91b8, 0xffffffff, 0x0008000b,
1051 	0x91bc, 0xffffffff, 0x000a0009,
1052 	0x91c0, 0xffffffff, 0x000d000c,
1053 	0x91c4, 0xffffffff, 0x00060005,
1054 	0x91c8, 0xffffffff, 0x00080007,
1055 	0x91cc, 0xffffffff, 0x0000000b,
1056 	0x91d0, 0xffffffff, 0x000a0009,
1057 	0x91d4, 0xffffffff, 0x000d000c,
1058 	0x9150, 0xffffffff, 0x96940200,
1059 	0x8708, 0xffffffff, 0x00900100,
1060 	0xc478, 0xffffffff, 0x00000080,
1061 	0xc404, 0xffffffff, 0x0020003f,
1062 	0x30, 0xffffffff, 0x0000001c,
1063 	0x34, 0x000f0000, 0x000f0000,
1064 	0x160c, 0xffffffff, 0x00000100,
1065 	0x1024, 0xffffffff, 0x00000100,
1066 	0x20a8, 0xffffffff, 0x00000104,
1067 	0x264c, 0x000c0000, 0x000c0000,
1068 	0x2648, 0x000c0000, 0x000c0000,
1069 	0x2f50, 0x00000001, 0x00000001,
1070 	0x30cc, 0xc0000fff, 0x00000104,
1071 	0xc1e4, 0x00000001, 0x00000001,
1072 	0xd0c0, 0xfffffff0, 0x00000100,
1073 	0xd8c0, 0xfffffff0, 0x00000100
1074 };
1075 
1076 static u32 verde_pg_init[] =
1077 {
1078 	0x353c, 0xffffffff, 0x40000,
1079 	0x3538, 0xffffffff, 0x200010ff,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x7007,
1086 	0x3538, 0xffffffff, 0x300010ff,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x400000,
1093 	0x3538, 0xffffffff, 0x100010ff,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x120200,
1100 	0x3538, 0xffffffff, 0x500010ff,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x1e1e16,
1107 	0x3538, 0xffffffff, 0x600010ff,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x171f1e,
1114 	0x3538, 0xffffffff, 0x700010ff,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x3538, 0xffffffff, 0x9ff,
1122 	0x3500, 0xffffffff, 0x0,
1123 	0x3504, 0xffffffff, 0x10000800,
1124 	0x3504, 0xffffffff, 0xf,
1125 	0x3504, 0xffffffff, 0xf,
1126 	0x3500, 0xffffffff, 0x4,
1127 	0x3504, 0xffffffff, 0x1000051e,
1128 	0x3504, 0xffffffff, 0xffff,
1129 	0x3504, 0xffffffff, 0xffff,
1130 	0x3500, 0xffffffff, 0x8,
1131 	0x3504, 0xffffffff, 0x80500,
1132 	0x3500, 0xffffffff, 0x12,
1133 	0x3504, 0xffffffff, 0x9050c,
1134 	0x3500, 0xffffffff, 0x1d,
1135 	0x3504, 0xffffffff, 0xb052c,
1136 	0x3500, 0xffffffff, 0x2a,
1137 	0x3504, 0xffffffff, 0x1053e,
1138 	0x3500, 0xffffffff, 0x2d,
1139 	0x3504, 0xffffffff, 0x10546,
1140 	0x3500, 0xffffffff, 0x30,
1141 	0x3504, 0xffffffff, 0xa054e,
1142 	0x3500, 0xffffffff, 0x3c,
1143 	0x3504, 0xffffffff, 0x1055f,
1144 	0x3500, 0xffffffff, 0x3f,
1145 	0x3504, 0xffffffff, 0x10567,
1146 	0x3500, 0xffffffff, 0x42,
1147 	0x3504, 0xffffffff, 0x1056f,
1148 	0x3500, 0xffffffff, 0x45,
1149 	0x3504, 0xffffffff, 0x10572,
1150 	0x3500, 0xffffffff, 0x48,
1151 	0x3504, 0xffffffff, 0x20575,
1152 	0x3500, 0xffffffff, 0x4c,
1153 	0x3504, 0xffffffff, 0x190801,
1154 	0x3500, 0xffffffff, 0x67,
1155 	0x3504, 0xffffffff, 0x1082a,
1156 	0x3500, 0xffffffff, 0x6a,
1157 	0x3504, 0xffffffff, 0x1b082d,
1158 	0x3500, 0xffffffff, 0x87,
1159 	0x3504, 0xffffffff, 0x310851,
1160 	0x3500, 0xffffffff, 0xba,
1161 	0x3504, 0xffffffff, 0x891,
1162 	0x3500, 0xffffffff, 0xbc,
1163 	0x3504, 0xffffffff, 0x893,
1164 	0x3500, 0xffffffff, 0xbe,
1165 	0x3504, 0xffffffff, 0x20895,
1166 	0x3500, 0xffffffff, 0xc2,
1167 	0x3504, 0xffffffff, 0x20899,
1168 	0x3500, 0xffffffff, 0xc6,
1169 	0x3504, 0xffffffff, 0x2089d,
1170 	0x3500, 0xffffffff, 0xca,
1171 	0x3504, 0xffffffff, 0x8a1,
1172 	0x3500, 0xffffffff, 0xcc,
1173 	0x3504, 0xffffffff, 0x8a3,
1174 	0x3500, 0xffffffff, 0xce,
1175 	0x3504, 0xffffffff, 0x308a5,
1176 	0x3500, 0xffffffff, 0xd3,
1177 	0x3504, 0xffffffff, 0x6d08cd,
1178 	0x3500, 0xffffffff, 0x142,
1179 	0x3504, 0xffffffff, 0x2000095a,
1180 	0x3504, 0xffffffff, 0x1,
1181 	0x3500, 0xffffffff, 0x144,
1182 	0x3504, 0xffffffff, 0x301f095b,
1183 	0x3500, 0xffffffff, 0x165,
1184 	0x3504, 0xffffffff, 0xc094d,
1185 	0x3500, 0xffffffff, 0x173,
1186 	0x3504, 0xffffffff, 0xf096d,
1187 	0x3500, 0xffffffff, 0x184,
1188 	0x3504, 0xffffffff, 0x15097f,
1189 	0x3500, 0xffffffff, 0x19b,
1190 	0x3504, 0xffffffff, 0xc0998,
1191 	0x3500, 0xffffffff, 0x1a9,
1192 	0x3504, 0xffffffff, 0x409a7,
1193 	0x3500, 0xffffffff, 0x1af,
1194 	0x3504, 0xffffffff, 0xcdc,
1195 	0x3500, 0xffffffff, 0x1b1,
1196 	0x3504, 0xffffffff, 0x800,
1197 	0x3508, 0xffffffff, 0x6c9b2000,
1198 	0x3510, 0xfc00, 0x2000,
1199 	0x3544, 0xffffffff, 0xfc0,
1200 	0x28d4, 0x00000100, 0x100
1201 };
1202 
1203 static void si_init_golden_registers(struct radeon_device *rdev)
1204 {
1205 	switch (rdev->family) {
1206 	case CHIP_TAHITI:
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_golden_registers,
1209 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_rlc_registers,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 tahiti_mgcg_cgcg_init,
1215 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1216 		radeon_program_register_sequence(rdev,
1217 						 tahiti_golden_registers2,
1218 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1219 		break;
1220 	case CHIP_PITCAIRN:
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_golden_registers,
1223 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 pitcairn_golden_rlc_registers,
1226 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1227 		radeon_program_register_sequence(rdev,
1228 						 pitcairn_mgcg_cgcg_init,
1229 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1230 		break;
1231 	case CHIP_VERDE:
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_golden_registers,
1234 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 verde_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 verde_pg_init,
1243 						 (const u32)ARRAY_SIZE(verde_pg_init));
1244 		break;
1245 	case CHIP_OLAND:
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_golden_registers,
1248 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 oland_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 oland_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_HAINAN:
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_golden_registers,
1259 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 hainan_golden_registers2,
1262 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1263 		radeon_program_register_sequence(rdev,
1264 						 hainan_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1266 		break;
1267 	default:
1268 		break;
1269 	}
1270 }
1271 
1272 /**
1273  * si_get_allowed_info_register - fetch the register for the info ioctl
1274  *
1275  * @rdev: radeon_device pointer
1276  * @reg: register offset in bytes
1277  * @val: register value
1278  *
1279  * Returns 0 for success or -EINVAL for an invalid register
1280  *
1281  */
1282 int si_get_allowed_info_register(struct radeon_device *rdev,
1283 				 u32 reg, u32 *val)
1284 {
1285 	switch (reg) {
1286 	case GRBM_STATUS:
1287 	case GRBM_STATUS2:
1288 	case GRBM_STATUS_SE0:
1289 	case GRBM_STATUS_SE1:
1290 	case SRBM_STATUS:
1291 	case SRBM_STATUS2:
1292 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1293 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1294 	case UVD_STATUS:
1295 		*val = RREG32(reg);
1296 		return 0;
1297 	default:
1298 		return -EINVAL;
1299 	}
1300 }
1301 
1302 #define PCIE_BUS_CLK                10000
1303 #define TCLK                        (PCIE_BUS_CLK / 10)
1304 
1305 /**
1306  * si_get_xclk - get the xclk
1307  *
1308  * @rdev: radeon_device pointer
1309  *
1310  * Returns the reference clock used by the gfx engine
1311  * (SI).
1312  */
1313 u32 si_get_xclk(struct radeon_device *rdev)
1314 {
1315 	u32 reference_clock = rdev->clock.spll.reference_freq;
1316 	u32 tmp;
1317 
1318 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1319 	if (tmp & MUX_TCLK_TO_XCLK)
1320 		return TCLK;
1321 
1322 	tmp = RREG32(CG_CLKPIN_CNTL);
1323 	if (tmp & XTALIN_DIVIDE)
1324 		return reference_clock / 4;
1325 
1326 	return reference_clock;
1327 }
1328 
1329 /* get temperature in millidegrees */
1330 int si_get_temp(struct radeon_device *rdev)
1331 {
1332 	u32 temp;
1333 	int actual_temp = 0;
1334 
1335 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1336 		CTF_TEMP_SHIFT;
1337 
1338 	if (temp & 0x200)
1339 		actual_temp = 255;
1340 	else
1341 		actual_temp = temp & 0x1ff;
1342 
1343 	actual_temp = (actual_temp * 1000);
1344 
1345 	return actual_temp;
1346 }
1347 
1348 #define TAHITI_IO_MC_REGS_SIZE 36
1349 
1350 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351 	{0x0000006f, 0x03044000},
1352 	{0x00000070, 0x0480c018},
1353 	{0x00000071, 0x00000040},
1354 	{0x00000072, 0x01000000},
1355 	{0x00000074, 0x000000ff},
1356 	{0x00000075, 0x00143400},
1357 	{0x00000076, 0x08ec0800},
1358 	{0x00000077, 0x040000cc},
1359 	{0x00000079, 0x00000000},
1360 	{0x0000007a, 0x21000409},
1361 	{0x0000007c, 0x00000000},
1362 	{0x0000007d, 0xe8000000},
1363 	{0x0000007e, 0x044408a8},
1364 	{0x0000007f, 0x00000003},
1365 	{0x00000080, 0x00000000},
1366 	{0x00000081, 0x01000000},
1367 	{0x00000082, 0x02000000},
1368 	{0x00000083, 0x00000000},
1369 	{0x00000084, 0xe3f3e4f4},
1370 	{0x00000085, 0x00052024},
1371 	{0x00000087, 0x00000000},
1372 	{0x00000088, 0x66036603},
1373 	{0x00000089, 0x01000000},
1374 	{0x0000008b, 0x1c0a0000},
1375 	{0x0000008c, 0xff010000},
1376 	{0x0000008e, 0xffffefff},
1377 	{0x0000008f, 0xfff3efff},
1378 	{0x00000090, 0xfff3efbf},
1379 	{0x00000094, 0x00101101},
1380 	{0x00000095, 0x00000fff},
1381 	{0x00000096, 0x00116fff},
1382 	{0x00000097, 0x60010000},
1383 	{0x00000098, 0x10010000},
1384 	{0x00000099, 0x00006000},
1385 	{0x0000009a, 0x00001000},
1386 	{0x0000009f, 0x00a77400}
1387 };
1388 
1389 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390 	{0x0000006f, 0x03044000},
1391 	{0x00000070, 0x0480c018},
1392 	{0x00000071, 0x00000040},
1393 	{0x00000072, 0x01000000},
1394 	{0x00000074, 0x000000ff},
1395 	{0x00000075, 0x00143400},
1396 	{0x00000076, 0x08ec0800},
1397 	{0x00000077, 0x040000cc},
1398 	{0x00000079, 0x00000000},
1399 	{0x0000007a, 0x21000409},
1400 	{0x0000007c, 0x00000000},
1401 	{0x0000007d, 0xe8000000},
1402 	{0x0000007e, 0x044408a8},
1403 	{0x0000007f, 0x00000003},
1404 	{0x00000080, 0x00000000},
1405 	{0x00000081, 0x01000000},
1406 	{0x00000082, 0x02000000},
1407 	{0x00000083, 0x00000000},
1408 	{0x00000084, 0xe3f3e4f4},
1409 	{0x00000085, 0x00052024},
1410 	{0x00000087, 0x00000000},
1411 	{0x00000088, 0x66036603},
1412 	{0x00000089, 0x01000000},
1413 	{0x0000008b, 0x1c0a0000},
1414 	{0x0000008c, 0xff010000},
1415 	{0x0000008e, 0xffffefff},
1416 	{0x0000008f, 0xfff3efff},
1417 	{0x00000090, 0xfff3efbf},
1418 	{0x00000094, 0x00101101},
1419 	{0x00000095, 0x00000fff},
1420 	{0x00000096, 0x00116fff},
1421 	{0x00000097, 0x60010000},
1422 	{0x00000098, 0x10010000},
1423 	{0x00000099, 0x00006000},
1424 	{0x0000009a, 0x00001000},
1425 	{0x0000009f, 0x00a47400}
1426 };
1427 
1428 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429 	{0x0000006f, 0x03044000},
1430 	{0x00000070, 0x0480c018},
1431 	{0x00000071, 0x00000040},
1432 	{0x00000072, 0x01000000},
1433 	{0x00000074, 0x000000ff},
1434 	{0x00000075, 0x00143400},
1435 	{0x00000076, 0x08ec0800},
1436 	{0x00000077, 0x040000cc},
1437 	{0x00000079, 0x00000000},
1438 	{0x0000007a, 0x21000409},
1439 	{0x0000007c, 0x00000000},
1440 	{0x0000007d, 0xe8000000},
1441 	{0x0000007e, 0x044408a8},
1442 	{0x0000007f, 0x00000003},
1443 	{0x00000080, 0x00000000},
1444 	{0x00000081, 0x01000000},
1445 	{0x00000082, 0x02000000},
1446 	{0x00000083, 0x00000000},
1447 	{0x00000084, 0xe3f3e4f4},
1448 	{0x00000085, 0x00052024},
1449 	{0x00000087, 0x00000000},
1450 	{0x00000088, 0x66036603},
1451 	{0x00000089, 0x01000000},
1452 	{0x0000008b, 0x1c0a0000},
1453 	{0x0000008c, 0xff010000},
1454 	{0x0000008e, 0xffffefff},
1455 	{0x0000008f, 0xfff3efff},
1456 	{0x00000090, 0xfff3efbf},
1457 	{0x00000094, 0x00101101},
1458 	{0x00000095, 0x00000fff},
1459 	{0x00000096, 0x00116fff},
1460 	{0x00000097, 0x60010000},
1461 	{0x00000098, 0x10010000},
1462 	{0x00000099, 0x00006000},
1463 	{0x0000009a, 0x00001000},
1464 	{0x0000009f, 0x00a37400}
1465 };
1466 
1467 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1468 	{0x0000006f, 0x03044000},
1469 	{0x00000070, 0x0480c018},
1470 	{0x00000071, 0x00000040},
1471 	{0x00000072, 0x01000000},
1472 	{0x00000074, 0x000000ff},
1473 	{0x00000075, 0x00143400},
1474 	{0x00000076, 0x08ec0800},
1475 	{0x00000077, 0x040000cc},
1476 	{0x00000079, 0x00000000},
1477 	{0x0000007a, 0x21000409},
1478 	{0x0000007c, 0x00000000},
1479 	{0x0000007d, 0xe8000000},
1480 	{0x0000007e, 0x044408a8},
1481 	{0x0000007f, 0x00000003},
1482 	{0x00000080, 0x00000000},
1483 	{0x00000081, 0x01000000},
1484 	{0x00000082, 0x02000000},
1485 	{0x00000083, 0x00000000},
1486 	{0x00000084, 0xe3f3e4f4},
1487 	{0x00000085, 0x00052024},
1488 	{0x00000087, 0x00000000},
1489 	{0x00000088, 0x66036603},
1490 	{0x00000089, 0x01000000},
1491 	{0x0000008b, 0x1c0a0000},
1492 	{0x0000008c, 0xff010000},
1493 	{0x0000008e, 0xffffefff},
1494 	{0x0000008f, 0xfff3efff},
1495 	{0x00000090, 0xfff3efbf},
1496 	{0x00000094, 0x00101101},
1497 	{0x00000095, 0x00000fff},
1498 	{0x00000096, 0x00116fff},
1499 	{0x00000097, 0x60010000},
1500 	{0x00000098, 0x10010000},
1501 	{0x00000099, 0x00006000},
1502 	{0x0000009a, 0x00001000},
1503 	{0x0000009f, 0x00a17730}
1504 };
1505 
1506 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1507 	{0x0000006f, 0x03044000},
1508 	{0x00000070, 0x0480c018},
1509 	{0x00000071, 0x00000040},
1510 	{0x00000072, 0x01000000},
1511 	{0x00000074, 0x000000ff},
1512 	{0x00000075, 0x00143400},
1513 	{0x00000076, 0x08ec0800},
1514 	{0x00000077, 0x040000cc},
1515 	{0x00000079, 0x00000000},
1516 	{0x0000007a, 0x21000409},
1517 	{0x0000007c, 0x00000000},
1518 	{0x0000007d, 0xe8000000},
1519 	{0x0000007e, 0x044408a8},
1520 	{0x0000007f, 0x00000003},
1521 	{0x00000080, 0x00000000},
1522 	{0x00000081, 0x01000000},
1523 	{0x00000082, 0x02000000},
1524 	{0x00000083, 0x00000000},
1525 	{0x00000084, 0xe3f3e4f4},
1526 	{0x00000085, 0x00052024},
1527 	{0x00000087, 0x00000000},
1528 	{0x00000088, 0x66036603},
1529 	{0x00000089, 0x01000000},
1530 	{0x0000008b, 0x1c0a0000},
1531 	{0x0000008c, 0xff010000},
1532 	{0x0000008e, 0xffffefff},
1533 	{0x0000008f, 0xfff3efff},
1534 	{0x00000090, 0xfff3efbf},
1535 	{0x00000094, 0x00101101},
1536 	{0x00000095, 0x00000fff},
1537 	{0x00000096, 0x00116fff},
1538 	{0x00000097, 0x60010000},
1539 	{0x00000098, 0x10010000},
1540 	{0x00000099, 0x00006000},
1541 	{0x0000009a, 0x00001000},
1542 	{0x0000009f, 0x00a07730}
1543 };
1544 
1545 /* ucode loading */
1546 int si_mc_load_microcode(struct radeon_device *rdev)
1547 {
1548 	const __be32 *fw_data = NULL;
1549 	const __le32 *new_fw_data = NULL;
1550 	u32 running, blackout = 0;
1551 	u32 *io_mc_regs = NULL;
1552 	const __le32 *new_io_mc_regs = NULL;
1553 	int i, regs_size, ucode_size;
1554 
1555 	if (!rdev->mc_fw)
1556 		return -EINVAL;
1557 
1558 	if (rdev->new_fw) {
1559 		const struct mc_firmware_header_v1_0 *hdr =
1560 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1561 
1562 		radeon_ucode_print_mc_hdr(&hdr->header);
1563 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1564 		new_io_mc_regs = (const __le32 *)
1565 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1566 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1567 		new_fw_data = (const __le32 *)
1568 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1569 	} else {
1570 		ucode_size = rdev->mc_fw->size / 4;
1571 
1572 		switch (rdev->family) {
1573 		case CHIP_TAHITI:
1574 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1575 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1576 			break;
1577 		case CHIP_PITCAIRN:
1578 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1579 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580 			break;
1581 		case CHIP_VERDE:
1582 		default:
1583 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1584 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1585 			break;
1586 		case CHIP_OLAND:
1587 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1588 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1589 			break;
1590 		case CHIP_HAINAN:
1591 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1592 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1593 			break;
1594 		}
1595 		fw_data = (const __be32 *)rdev->mc_fw->data;
1596 	}
1597 
1598 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1599 
1600 	if (running == 0) {
1601 		if (running) {
1602 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1603 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1604 		}
1605 
1606 		/* reset the engine and set to writable */
1607 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1608 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1609 
1610 		/* load mc io regs */
1611 		for (i = 0; i < regs_size; i++) {
1612 			if (rdev->new_fw) {
1613 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1614 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1615 			} else {
1616 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1617 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1618 			}
1619 		}
1620 		/* load the MC ucode */
1621 		for (i = 0; i < ucode_size; i++) {
1622 			if (rdev->new_fw)
1623 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1624 			else
1625 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1626 		}
1627 
1628 		/* put the engine back into the active state */
1629 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1630 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1631 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1632 
1633 		/* wait for training to complete */
1634 		for (i = 0; i < rdev->usec_timeout; i++) {
1635 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1636 				break;
1637 			udelay(1);
1638 		}
1639 		for (i = 0; i < rdev->usec_timeout; i++) {
1640 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1641 				break;
1642 			udelay(1);
1643 		}
1644 
1645 		if (running)
1646 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1647 	}
1648 
1649 	return 0;
1650 }
1651 
1652 static int si_init_microcode(struct radeon_device *rdev)
1653 {
1654 	const char *chip_name;
1655 	const char *new_chip_name;
1656 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1657 	size_t smc_req_size, mc2_req_size;
1658 	char fw_name[30];
1659 	int err;
1660 	int new_fw = 0;
1661 	bool new_smc = false;
1662 
1663 	DRM_DEBUG("\n");
1664 
1665 	switch (rdev->family) {
1666 	case CHIP_TAHITI:
1667 		chip_name = "TAHITI";
1668 		/* XXX: figure out which Tahitis need the new ucode */
1669 		if (0)
1670 			new_smc = true;
1671 		new_chip_name = "tahiti";
1672 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1673 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1674 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1675 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1676 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1677 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1678 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1679 		break;
1680 	case CHIP_PITCAIRN:
1681 		chip_name = "PITCAIRN";
1682 		if ((rdev->pdev->revision == 0x81) ||
1683 		    (rdev->pdev->device == 0x6810) ||
1684 		    (rdev->pdev->device == 0x6811) ||
1685 		    (rdev->pdev->device == 0x6816) ||
1686 		    (rdev->pdev->device == 0x6817) ||
1687 		    (rdev->pdev->device == 0x6806))
1688 			new_smc = true;
1689 		new_chip_name = "pitcairn";
1690 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1691 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1692 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1693 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1694 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1695 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1696 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1697 		break;
1698 	case CHIP_VERDE:
1699 		chip_name = "VERDE";
1700 		if ((rdev->pdev->revision == 0x81) ||
1701 		    (rdev->pdev->revision == 0x83) ||
1702 		    (rdev->pdev->revision == 0x87) ||
1703 		    (rdev->pdev->device == 0x6820) ||
1704 		    (rdev->pdev->device == 0x6821) ||
1705 		    (rdev->pdev->device == 0x6822) ||
1706 		    (rdev->pdev->device == 0x6823) ||
1707 		    (rdev->pdev->device == 0x682A) ||
1708 		    (rdev->pdev->device == 0x682B))
1709 			new_smc = true;
1710 		new_chip_name = "verde";
1711 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1712 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1713 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1714 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1715 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1716 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1717 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1718 		break;
1719 	case CHIP_OLAND:
1720 		chip_name = "OLAND";
1721 		if ((rdev->pdev->revision == 0xC7) ||
1722 		    (rdev->pdev->revision == 0x80) ||
1723 		    (rdev->pdev->revision == 0x81) ||
1724 		    (rdev->pdev->revision == 0x83) ||
1725 		    (rdev->pdev->device == 0x6604) ||
1726 		    (rdev->pdev->device == 0x6605))
1727 			new_smc = true;
1728 		new_chip_name = "oland";
1729 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1730 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1731 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1732 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1733 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1734 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1735 		break;
1736 	case CHIP_HAINAN:
1737 		chip_name = "HAINAN";
1738 		if ((rdev->pdev->revision == 0x81) ||
1739 		    (rdev->pdev->revision == 0x83) ||
1740 		    (rdev->pdev->revision == 0xC3) ||
1741 		    (rdev->pdev->device == 0x6664) ||
1742 		    (rdev->pdev->device == 0x6665) ||
1743 		    (rdev->pdev->device == 0x6667))
1744 			new_smc = true;
1745 		new_chip_name = "hainan";
1746 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1747 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1748 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1749 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1750 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1751 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1752 		break;
1753 	default: BUG();
1754 	}
1755 
1756 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1757 
1758 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1759 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1760 	if (err) {
1761 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1762 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1763 		if (err)
1764 			goto out;
1765 		if (rdev->pfp_fw->size != pfp_req_size) {
1766 			printk(KERN_ERR
1767 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1768 			       rdev->pfp_fw->size, fw_name);
1769 			err = -EINVAL;
1770 			goto out;
1771 		}
1772 	} else {
1773 		err = radeon_ucode_validate(rdev->pfp_fw);
1774 		if (err) {
1775 			printk(KERN_ERR
1776 			       "si_cp: validation failed for firmware \"%s\"\n",
1777 			       fw_name);
1778 			goto out;
1779 		} else {
1780 			new_fw++;
1781 		}
1782 	}
1783 
1784 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1785 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1786 	if (err) {
1787 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1788 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1789 		if (err)
1790 			goto out;
1791 		if (rdev->me_fw->size != me_req_size) {
1792 			printk(KERN_ERR
1793 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1794 			       rdev->me_fw->size, fw_name);
1795 			err = -EINVAL;
1796 		}
1797 	} else {
1798 		err = radeon_ucode_validate(rdev->me_fw);
1799 		if (err) {
1800 			printk(KERN_ERR
1801 			       "si_cp: validation failed for firmware \"%s\"\n",
1802 			       fw_name);
1803 			goto out;
1804 		} else {
1805 			new_fw++;
1806 		}
1807 	}
1808 
1809 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1810 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1811 	if (err) {
1812 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1813 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1814 		if (err)
1815 			goto out;
1816 		if (rdev->ce_fw->size != ce_req_size) {
1817 			printk(KERN_ERR
1818 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1819 			       rdev->ce_fw->size, fw_name);
1820 			err = -EINVAL;
1821 		}
1822 	} else {
1823 		err = radeon_ucode_validate(rdev->ce_fw);
1824 		if (err) {
1825 			printk(KERN_ERR
1826 			       "si_cp: validation failed for firmware \"%s\"\n",
1827 			       fw_name);
1828 			goto out;
1829 		} else {
1830 			new_fw++;
1831 		}
1832 	}
1833 
1834 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1835 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1836 	if (err) {
1837 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1838 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1839 		if (err)
1840 			goto out;
1841 		if (rdev->rlc_fw->size != rlc_req_size) {
1842 			printk(KERN_ERR
1843 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1844 			       rdev->rlc_fw->size, fw_name);
1845 			err = -EINVAL;
1846 		}
1847 	} else {
1848 		err = radeon_ucode_validate(rdev->rlc_fw);
1849 		if (err) {
1850 			printk(KERN_ERR
1851 			       "si_cp: validation failed for firmware \"%s\"\n",
1852 			       fw_name);
1853 			goto out;
1854 		} else {
1855 			new_fw++;
1856 		}
1857 	}
1858 
1859 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1860 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1861 	if (err) {
1862 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1863 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1864 		if (err) {
1865 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1866 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1867 			if (err)
1868 				goto out;
1869 		}
1870 		if ((rdev->mc_fw->size != mc_req_size) &&
1871 		    (rdev->mc_fw->size != mc2_req_size)) {
1872 			printk(KERN_ERR
1873 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1874 			       rdev->mc_fw->size, fw_name);
1875 			err = -EINVAL;
1876 		}
1877 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1878 	} else {
1879 		err = radeon_ucode_validate(rdev->mc_fw);
1880 		if (err) {
1881 			printk(KERN_ERR
1882 			       "si_cp: validation failed for firmware \"%s\"\n",
1883 			       fw_name);
1884 			goto out;
1885 		} else {
1886 			new_fw++;
1887 		}
1888 	}
1889 
1890 	if (new_smc)
1891 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1892 	else
1893 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1894 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1895 	if (err) {
1896 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1897 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1898 		if (err) {
1899 			printk(KERN_ERR
1900 			       "smc: error loading firmware \"%s\"\n",
1901 			       fw_name);
1902 			release_firmware(rdev->smc_fw);
1903 			rdev->smc_fw = NULL;
1904 			err = 0;
1905 		} else if (rdev->smc_fw->size != smc_req_size) {
1906 			printk(KERN_ERR
1907 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1908 			       rdev->smc_fw->size, fw_name);
1909 			err = -EINVAL;
1910 		}
1911 	} else {
1912 		err = radeon_ucode_validate(rdev->smc_fw);
1913 		if (err) {
1914 			printk(KERN_ERR
1915 			       "si_cp: validation failed for firmware \"%s\"\n",
1916 			       fw_name);
1917 			goto out;
1918 		} else {
1919 			new_fw++;
1920 		}
1921 	}
1922 
1923 	if (new_fw == 0) {
1924 		rdev->new_fw = false;
1925 	} else if (new_fw < 6) {
1926 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1927 		err = -EINVAL;
1928 	} else {
1929 		rdev->new_fw = true;
1930 	}
1931 out:
1932 	if (err) {
1933 		if (err != -EINVAL)
1934 			printk(KERN_ERR
1935 			       "si_cp: Failed to load firmware \"%s\"\n",
1936 			       fw_name);
1937 		release_firmware(rdev->pfp_fw);
1938 		rdev->pfp_fw = NULL;
1939 		release_firmware(rdev->me_fw);
1940 		rdev->me_fw = NULL;
1941 		release_firmware(rdev->ce_fw);
1942 		rdev->ce_fw = NULL;
1943 		release_firmware(rdev->rlc_fw);
1944 		rdev->rlc_fw = NULL;
1945 		release_firmware(rdev->mc_fw);
1946 		rdev->mc_fw = NULL;
1947 		release_firmware(rdev->smc_fw);
1948 		rdev->smc_fw = NULL;
1949 	}
1950 	return err;
1951 }
1952 
1953 /* watermark setup */
1954 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1955 				   struct radeon_crtc *radeon_crtc,
1956 				   struct drm_display_mode *mode,
1957 				   struct drm_display_mode *other_mode)
1958 {
1959 	u32 tmp, buffer_alloc, i;
1960 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1961 	/*
1962 	 * Line Buffer Setup
1963 	 * There are 3 line buffers, each one shared by 2 display controllers.
1964 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1965 	 * the display controllers.  The paritioning is done via one of four
1966 	 * preset allocations specified in bits 21:20:
1967 	 *  0 - half lb
1968 	 *  2 - whole lb, other crtc must be disabled
1969 	 */
1970 	/* this can get tricky if we have two large displays on a paired group
1971 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1972 	 * non-linked crtcs for maximum line buffer allocation.
1973 	 */
1974 	if (radeon_crtc->base.enabled && mode) {
1975 		if (other_mode) {
1976 			tmp = 0; /* 1/2 */
1977 			buffer_alloc = 1;
1978 		} else {
1979 			tmp = 2; /* whole */
1980 			buffer_alloc = 2;
1981 		}
1982 	} else {
1983 		tmp = 0;
1984 		buffer_alloc = 0;
1985 	}
1986 
1987 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1988 	       DC_LB_MEMORY_CONFIG(tmp));
1989 
1990 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1991 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1992 	for (i = 0; i < rdev->usec_timeout; i++) {
1993 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1994 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1995 			break;
1996 		udelay(1);
1997 	}
1998 
1999 	if (radeon_crtc->base.enabled && mode) {
2000 		switch (tmp) {
2001 		case 0:
2002 		default:
2003 			return 4096 * 2;
2004 		case 2:
2005 			return 8192 * 2;
2006 		}
2007 	}
2008 
2009 	/* controller not enabled, so no lb used */
2010 	return 0;
2011 }
2012 
2013 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2014 {
2015 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2016 
2017 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2018 	case 0:
2019 	default:
2020 		return 1;
2021 	case 1:
2022 		return 2;
2023 	case 2:
2024 		return 4;
2025 	case 3:
2026 		return 8;
2027 	case 4:
2028 		return 3;
2029 	case 5:
2030 		return 6;
2031 	case 6:
2032 		return 10;
2033 	case 7:
2034 		return 12;
2035 	case 8:
2036 		return 16;
2037 	}
2038 }
2039 
2040 struct dce6_wm_params {
2041 	u32 dram_channels; /* number of dram channels */
2042 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2043 	u32 sclk;          /* engine clock in kHz */
2044 	u32 disp_clk;      /* display clock in kHz */
2045 	u32 src_width;     /* viewport width */
2046 	u32 active_time;   /* active display time in ns */
2047 	u32 blank_time;    /* blank time in ns */
2048 	bool interlaced;    /* mode is interlaced */
2049 	fixed20_12 vsc;    /* vertical scale ratio */
2050 	u32 num_heads;     /* number of active crtcs */
2051 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2052 	u32 lb_size;       /* line buffer allocated to pipe */
2053 	u32 vtaps;         /* vertical scaler taps */
2054 };
2055 
2056 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2057 {
2058 	/* Calculate raw DRAM Bandwidth */
2059 	fixed20_12 dram_efficiency; /* 0.7 */
2060 	fixed20_12 yclk, dram_channels, bandwidth;
2061 	fixed20_12 a;
2062 
2063 	a.full = dfixed_const(1000);
2064 	yclk.full = dfixed_const(wm->yclk);
2065 	yclk.full = dfixed_div(yclk, a);
2066 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2067 	a.full = dfixed_const(10);
2068 	dram_efficiency.full = dfixed_const(7);
2069 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2070 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2071 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2072 
2073 	return dfixed_trunc(bandwidth);
2074 }
2075 
2076 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2077 {
2078 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2079 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2080 	fixed20_12 yclk, dram_channels, bandwidth;
2081 	fixed20_12 a;
2082 
2083 	a.full = dfixed_const(1000);
2084 	yclk.full = dfixed_const(wm->yclk);
2085 	yclk.full = dfixed_div(yclk, a);
2086 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2087 	a.full = dfixed_const(10);
2088 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2089 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2090 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2091 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2092 
2093 	return dfixed_trunc(bandwidth);
2094 }
2095 
2096 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2097 {
2098 	/* Calculate the display Data return Bandwidth */
2099 	fixed20_12 return_efficiency; /* 0.8 */
2100 	fixed20_12 sclk, bandwidth;
2101 	fixed20_12 a;
2102 
2103 	a.full = dfixed_const(1000);
2104 	sclk.full = dfixed_const(wm->sclk);
2105 	sclk.full = dfixed_div(sclk, a);
2106 	a.full = dfixed_const(10);
2107 	return_efficiency.full = dfixed_const(8);
2108 	return_efficiency.full = dfixed_div(return_efficiency, a);
2109 	a.full = dfixed_const(32);
2110 	bandwidth.full = dfixed_mul(a, sclk);
2111 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2112 
2113 	return dfixed_trunc(bandwidth);
2114 }
2115 
2116 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2117 {
2118 	return 32;
2119 }
2120 
2121 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2122 {
2123 	/* Calculate the DMIF Request Bandwidth */
2124 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2125 	fixed20_12 disp_clk, sclk, bandwidth;
2126 	fixed20_12 a, b1, b2;
2127 	u32 min_bandwidth;
2128 
2129 	a.full = dfixed_const(1000);
2130 	disp_clk.full = dfixed_const(wm->disp_clk);
2131 	disp_clk.full = dfixed_div(disp_clk, a);
2132 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2133 	b1.full = dfixed_mul(a, disp_clk);
2134 
2135 	a.full = dfixed_const(1000);
2136 	sclk.full = dfixed_const(wm->sclk);
2137 	sclk.full = dfixed_div(sclk, a);
2138 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2139 	b2.full = dfixed_mul(a, sclk);
2140 
2141 	a.full = dfixed_const(10);
2142 	disp_clk_request_efficiency.full = dfixed_const(8);
2143 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2144 
2145 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2146 
2147 	a.full = dfixed_const(min_bandwidth);
2148 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2149 
2150 	return dfixed_trunc(bandwidth);
2151 }
2152 
2153 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2154 {
2155 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2156 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2157 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2158 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2159 
2160 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2161 }
2162 
2163 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2164 {
2165 	/* Calculate the display mode Average Bandwidth
2166 	 * DisplayMode should contain the source and destination dimensions,
2167 	 * timing, etc.
2168 	 */
2169 	fixed20_12 bpp;
2170 	fixed20_12 line_time;
2171 	fixed20_12 src_width;
2172 	fixed20_12 bandwidth;
2173 	fixed20_12 a;
2174 
2175 	a.full = dfixed_const(1000);
2176 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2177 	line_time.full = dfixed_div(line_time, a);
2178 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2179 	src_width.full = dfixed_const(wm->src_width);
2180 	bandwidth.full = dfixed_mul(src_width, bpp);
2181 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2182 	bandwidth.full = dfixed_div(bandwidth, line_time);
2183 
2184 	return dfixed_trunc(bandwidth);
2185 }
2186 
2187 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2188 {
2189 	/* First calcualte the latency in ns */
2190 	u32 mc_latency = 2000; /* 2000 ns. */
2191 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2192 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2193 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2194 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2195 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2196 		(wm->num_heads * cursor_line_pair_return_time);
2197 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2198 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2199 	u32 tmp, dmif_size = 12288;
2200 	fixed20_12 a, b, c;
2201 
2202 	if (wm->num_heads == 0)
2203 		return 0;
2204 
2205 	a.full = dfixed_const(2);
2206 	b.full = dfixed_const(1);
2207 	if ((wm->vsc.full > a.full) ||
2208 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2209 	    (wm->vtaps >= 5) ||
2210 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2211 		max_src_lines_per_dst_line = 4;
2212 	else
2213 		max_src_lines_per_dst_line = 2;
2214 
2215 	a.full = dfixed_const(available_bandwidth);
2216 	b.full = dfixed_const(wm->num_heads);
2217 	a.full = dfixed_div(a, b);
2218 
2219 	b.full = dfixed_const(mc_latency + 512);
2220 	c.full = dfixed_const(wm->disp_clk);
2221 	b.full = dfixed_div(b, c);
2222 
2223 	c.full = dfixed_const(dmif_size);
2224 	b.full = dfixed_div(c, b);
2225 
2226 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2227 
2228 	b.full = dfixed_const(1000);
2229 	c.full = dfixed_const(wm->disp_clk);
2230 	b.full = dfixed_div(c, b);
2231 	c.full = dfixed_const(wm->bytes_per_pixel);
2232 	b.full = dfixed_mul(b, c);
2233 
2234 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2235 
2236 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2237 	b.full = dfixed_const(1000);
2238 	c.full = dfixed_const(lb_fill_bw);
2239 	b.full = dfixed_div(c, b);
2240 	a.full = dfixed_div(a, b);
2241 	line_fill_time = dfixed_trunc(a);
2242 
2243 	if (line_fill_time < wm->active_time)
2244 		return latency;
2245 	else
2246 		return latency + (line_fill_time - wm->active_time);
2247 
2248 }
2249 
2250 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2251 {
2252 	if (dce6_average_bandwidth(wm) <=
2253 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2254 		return true;
2255 	else
2256 		return false;
2257 };
2258 
2259 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2260 {
2261 	if (dce6_average_bandwidth(wm) <=
2262 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2263 		return true;
2264 	else
2265 		return false;
2266 };
2267 
2268 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2269 {
2270 	u32 lb_partitions = wm->lb_size / wm->src_width;
2271 	u32 line_time = wm->active_time + wm->blank_time;
2272 	u32 latency_tolerant_lines;
2273 	u32 latency_hiding;
2274 	fixed20_12 a;
2275 
2276 	a.full = dfixed_const(1);
2277 	if (wm->vsc.full > a.full)
2278 		latency_tolerant_lines = 1;
2279 	else {
2280 		if (lb_partitions <= (wm->vtaps + 1))
2281 			latency_tolerant_lines = 1;
2282 		else
2283 			latency_tolerant_lines = 2;
2284 	}
2285 
2286 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2287 
2288 	if (dce6_latency_watermark(wm) <= latency_hiding)
2289 		return true;
2290 	else
2291 		return false;
2292 }
2293 
2294 static void dce6_program_watermarks(struct radeon_device *rdev,
2295 					 struct radeon_crtc *radeon_crtc,
2296 					 u32 lb_size, u32 num_heads)
2297 {
2298 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2299 	struct dce6_wm_params wm_low, wm_high;
2300 	u32 dram_channels;
2301 	u32 pixel_period;
2302 	u32 line_time = 0;
2303 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2304 	u32 priority_a_mark = 0, priority_b_mark = 0;
2305 	u32 priority_a_cnt = PRIORITY_OFF;
2306 	u32 priority_b_cnt = PRIORITY_OFF;
2307 	u32 tmp, arb_control3;
2308 	fixed20_12 a, b, c;
2309 
2310 	if (radeon_crtc->base.enabled && num_heads && mode) {
2311 		pixel_period = 1000000 / (u32)mode->clock;
2312 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2313 		priority_a_cnt = 0;
2314 		priority_b_cnt = 0;
2315 
2316 		if (rdev->family == CHIP_ARUBA)
2317 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2318 		else
2319 			dram_channels = si_get_number_of_dram_channels(rdev);
2320 
2321 		/* watermark for high clocks */
2322 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2323 			wm_high.yclk =
2324 				radeon_dpm_get_mclk(rdev, false) * 10;
2325 			wm_high.sclk =
2326 				radeon_dpm_get_sclk(rdev, false) * 10;
2327 		} else {
2328 			wm_high.yclk = rdev->pm.current_mclk * 10;
2329 			wm_high.sclk = rdev->pm.current_sclk * 10;
2330 		}
2331 
2332 		wm_high.disp_clk = mode->clock;
2333 		wm_high.src_width = mode->crtc_hdisplay;
2334 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2335 		wm_high.blank_time = line_time - wm_high.active_time;
2336 		wm_high.interlaced = false;
2337 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2338 			wm_high.interlaced = true;
2339 		wm_high.vsc = radeon_crtc->vsc;
2340 		wm_high.vtaps = 1;
2341 		if (radeon_crtc->rmx_type != RMX_OFF)
2342 			wm_high.vtaps = 2;
2343 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2344 		wm_high.lb_size = lb_size;
2345 		wm_high.dram_channels = dram_channels;
2346 		wm_high.num_heads = num_heads;
2347 
2348 		/* watermark for low clocks */
2349 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2350 			wm_low.yclk =
2351 				radeon_dpm_get_mclk(rdev, true) * 10;
2352 			wm_low.sclk =
2353 				radeon_dpm_get_sclk(rdev, true) * 10;
2354 		} else {
2355 			wm_low.yclk = rdev->pm.current_mclk * 10;
2356 			wm_low.sclk = rdev->pm.current_sclk * 10;
2357 		}
2358 
2359 		wm_low.disp_clk = mode->clock;
2360 		wm_low.src_width = mode->crtc_hdisplay;
2361 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2362 		wm_low.blank_time = line_time - wm_low.active_time;
2363 		wm_low.interlaced = false;
2364 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2365 			wm_low.interlaced = true;
2366 		wm_low.vsc = radeon_crtc->vsc;
2367 		wm_low.vtaps = 1;
2368 		if (radeon_crtc->rmx_type != RMX_OFF)
2369 			wm_low.vtaps = 2;
2370 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2371 		wm_low.lb_size = lb_size;
2372 		wm_low.dram_channels = dram_channels;
2373 		wm_low.num_heads = num_heads;
2374 
2375 		/* set for high clocks */
2376 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2377 		/* set for low clocks */
2378 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2379 
2380 		/* possibly force display priority to high */
2381 		/* should really do this at mode validation time... */
2382 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2383 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2384 		    !dce6_check_latency_hiding(&wm_high) ||
2385 		    (rdev->disp_priority == 2)) {
2386 			DRM_DEBUG_KMS("force priority to high\n");
2387 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2388 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2389 		}
2390 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2391 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2392 		    !dce6_check_latency_hiding(&wm_low) ||
2393 		    (rdev->disp_priority == 2)) {
2394 			DRM_DEBUG_KMS("force priority to high\n");
2395 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2396 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2397 		}
2398 
2399 		a.full = dfixed_const(1000);
2400 		b.full = dfixed_const(mode->clock);
2401 		b.full = dfixed_div(b, a);
2402 		c.full = dfixed_const(latency_watermark_a);
2403 		c.full = dfixed_mul(c, b);
2404 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2405 		c.full = dfixed_div(c, a);
2406 		a.full = dfixed_const(16);
2407 		c.full = dfixed_div(c, a);
2408 		priority_a_mark = dfixed_trunc(c);
2409 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2410 
2411 		a.full = dfixed_const(1000);
2412 		b.full = dfixed_const(mode->clock);
2413 		b.full = dfixed_div(b, a);
2414 		c.full = dfixed_const(latency_watermark_b);
2415 		c.full = dfixed_mul(c, b);
2416 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2417 		c.full = dfixed_div(c, a);
2418 		a.full = dfixed_const(16);
2419 		c.full = dfixed_div(c, a);
2420 		priority_b_mark = dfixed_trunc(c);
2421 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2422 
2423 		/* Save number of lines the linebuffer leads before the scanout */
2424 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2425 	}
2426 
2427 	/* select wm A */
2428 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2429 	tmp = arb_control3;
2430 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2431 	tmp |= LATENCY_WATERMARK_MASK(1);
2432 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2433 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2434 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2435 		LATENCY_HIGH_WATERMARK(line_time)));
2436 	/* select wm B */
2437 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2438 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2439 	tmp |= LATENCY_WATERMARK_MASK(2);
2440 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2441 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2442 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2443 		LATENCY_HIGH_WATERMARK(line_time)));
2444 	/* restore original selection */
2445 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2446 
2447 	/* write the priority marks */
2448 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2449 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2450 
2451 	/* save values for DPM */
2452 	radeon_crtc->line_time = line_time;
2453 	radeon_crtc->wm_high = latency_watermark_a;
2454 	radeon_crtc->wm_low = latency_watermark_b;
2455 }
2456 
2457 void dce6_bandwidth_update(struct radeon_device *rdev)
2458 {
2459 	struct drm_display_mode *mode0 = NULL;
2460 	struct drm_display_mode *mode1 = NULL;
2461 	u32 num_heads = 0, lb_size;
2462 	int i;
2463 
2464 	if (!rdev->mode_info.mode_config_initialized)
2465 		return;
2466 
2467 	radeon_update_display_priority(rdev);
2468 
2469 	for (i = 0; i < rdev->num_crtc; i++) {
2470 		if (rdev->mode_info.crtcs[i]->base.enabled)
2471 			num_heads++;
2472 	}
2473 	for (i = 0; i < rdev->num_crtc; i += 2) {
2474 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2475 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2476 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2477 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2478 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2479 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2480 	}
2481 }
2482 
2483 /*
2484  * Core functions
2485  */
2486 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2487 {
2488 	u32 *tile = rdev->config.si.tile_mode_array;
2489 	const u32 num_tile_mode_states =
2490 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2491 	u32 reg_offset, split_equal_to_row_size;
2492 
2493 	switch (rdev->config.si.mem_row_size_in_kb) {
2494 	case 1:
2495 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2496 		break;
2497 	case 2:
2498 	default:
2499 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2500 		break;
2501 	case 4:
2502 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2503 		break;
2504 	}
2505 
2506 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 		tile[reg_offset] = 0;
2508 
2509 	switch(rdev->family) {
2510 	case CHIP_TAHITI:
2511 	case CHIP_PITCAIRN:
2512 		/* non-AA compressed depth or any compressed stencil */
2513 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2518 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2520 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521 		/* 2xAA/4xAA compressed depth only */
2522 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2524 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2525 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2526 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2527 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2530 		/* 8xAA compressed depth only */
2531 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2534 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2535 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2536 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2539 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2540 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2544 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2545 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2548 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2549 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2550 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2553 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2554 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2556 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2557 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2558 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2560 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2561 			   TILE_SPLIT(split_equal_to_row_size) |
2562 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2563 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2565 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2566 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2567 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2569 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2570 			   TILE_SPLIT(split_equal_to_row_size) |
2571 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2572 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2575 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2576 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2579 			   TILE_SPLIT(split_equal_to_row_size) |
2580 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2581 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2583 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2584 		/* 1D and 1D Array Surfaces */
2585 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2588 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2589 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2590 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2593 		/* Displayable maps. */
2594 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2597 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2598 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2599 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2601 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602 		/* Display 8bpp. */
2603 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2605 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2607 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2608 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2611 		/* Display 16bpp. */
2612 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2615 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2617 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2619 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2620 		/* Display 32bpp. */
2621 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2624 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2626 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2629 		/* Thin. */
2630 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2631 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2632 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2633 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2634 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2635 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2638 		/* Thin 8 bpp. */
2639 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2641 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2642 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2643 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2644 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2647 		/* Thin 16 bpp. */
2648 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2650 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2651 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2652 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2653 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2656 		/* Thin 32 bpp. */
2657 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2661 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2662 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2665 		/* Thin 64 bpp. */
2666 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2668 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2669 			   TILE_SPLIT(split_equal_to_row_size) |
2670 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2671 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2674 		/* 8 bpp PRT. */
2675 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2678 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2679 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2680 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2681 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2683 		/* 16 bpp PRT */
2684 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2687 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2688 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2689 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2691 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2692 		/* 32 bpp PRT */
2693 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2695 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2696 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2697 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2698 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2700 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2701 		/* 64 bpp PRT */
2702 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2704 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2705 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2706 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2707 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2709 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2710 		/* 128 bpp PRT */
2711 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2715 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2716 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2718 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2719 
2720 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2722 		break;
2723 
2724 	case CHIP_VERDE:
2725 	case CHIP_OLAND:
2726 	case CHIP_HAINAN:
2727 		/* non-AA compressed depth or any compressed stencil */
2728 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2730 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2732 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2733 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2735 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2736 		/* 2xAA/4xAA compressed depth only */
2737 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2741 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2742 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2745 		/* 8xAA compressed depth only */
2746 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2747 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2748 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2749 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2750 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2751 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2754 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2755 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2757 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2759 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2760 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2763 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2764 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2768 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2769 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2771 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2772 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2773 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			   TILE_SPLIT(split_equal_to_row_size) |
2777 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2778 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2780 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2781 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2782 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2784 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2785 			   TILE_SPLIT(split_equal_to_row_size) |
2786 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2787 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2790 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2791 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2793 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794 			   TILE_SPLIT(split_equal_to_row_size) |
2795 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2796 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2798 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2799 		/* 1D and 1D Array Surfaces */
2800 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2801 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2803 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2804 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2805 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2807 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2808 		/* Displayable maps. */
2809 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2811 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2812 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2813 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2814 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2816 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2817 		/* Display 8bpp. */
2818 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2822 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2823 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2825 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2826 		/* Display 16bpp. */
2827 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2831 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2832 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2835 		/* Display 32bpp. */
2836 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2839 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2840 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2841 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2844 		/* Thin. */
2845 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2849 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2850 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2852 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853 		/* Thin 8 bpp. */
2854 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2855 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2856 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2857 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2858 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2859 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2862 		/* Thin 16 bpp. */
2863 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2865 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2866 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2867 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2868 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2870 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2871 		/* Thin 32 bpp. */
2872 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2874 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2875 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2876 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2877 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2880 		/* Thin 64 bpp. */
2881 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884 			   TILE_SPLIT(split_equal_to_row_size) |
2885 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2886 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2889 		/* 8 bpp PRT. */
2890 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2892 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2893 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2894 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2895 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2896 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2897 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2898 		/* 16 bpp PRT */
2899 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2901 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2902 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2903 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2904 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2906 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2907 		/* 32 bpp PRT */
2908 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2911 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2913 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2915 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2916 		/* 64 bpp PRT */
2917 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2919 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2920 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2921 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2922 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2923 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2924 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2925 		/* 128 bpp PRT */
2926 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2928 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2929 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2930 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2931 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2934 
2935 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2936 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2937 		break;
2938 
2939 	default:
2940 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2941 	}
2942 }
2943 
2944 static void si_select_se_sh(struct radeon_device *rdev,
2945 			    u32 se_num, u32 sh_num)
2946 {
2947 	u32 data = INSTANCE_BROADCAST_WRITES;
2948 
2949 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2950 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2951 	else if (se_num == 0xffffffff)
2952 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2953 	else if (sh_num == 0xffffffff)
2954 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2955 	else
2956 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2957 	WREG32(GRBM_GFX_INDEX, data);
2958 }
2959 
2960 static u32 si_create_bitmask(u32 bit_width)
2961 {
2962 	u32 i, mask = 0;
2963 
2964 	for (i = 0; i < bit_width; i++) {
2965 		mask <<= 1;
2966 		mask |= 1;
2967 	}
2968 	return mask;
2969 }
2970 
2971 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2972 {
2973 	u32 data, mask;
2974 
2975 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2976 	if (data & 1)
2977 		data &= INACTIVE_CUS_MASK;
2978 	else
2979 		data = 0;
2980 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2981 
2982 	data >>= INACTIVE_CUS_SHIFT;
2983 
2984 	mask = si_create_bitmask(cu_per_sh);
2985 
2986 	return ~data & mask;
2987 }
2988 
2989 static void si_setup_spi(struct radeon_device *rdev,
2990 			 u32 se_num, u32 sh_per_se,
2991 			 u32 cu_per_sh)
2992 {
2993 	int i, j, k;
2994 	u32 data, mask, active_cu;
2995 
2996 	for (i = 0; i < se_num; i++) {
2997 		for (j = 0; j < sh_per_se; j++) {
2998 			si_select_se_sh(rdev, i, j);
2999 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3000 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3001 
3002 			mask = 1;
3003 			for (k = 0; k < 16; k++) {
3004 				mask <<= k;
3005 				if (active_cu & mask) {
3006 					data &= ~mask;
3007 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3008 					break;
3009 				}
3010 			}
3011 		}
3012 	}
3013 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3014 }
3015 
3016 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3017 			      u32 max_rb_num_per_se,
3018 			      u32 sh_per_se)
3019 {
3020 	u32 data, mask;
3021 
3022 	data = RREG32(CC_RB_BACKEND_DISABLE);
3023 	if (data & 1)
3024 		data &= BACKEND_DISABLE_MASK;
3025 	else
3026 		data = 0;
3027 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3028 
3029 	data >>= BACKEND_DISABLE_SHIFT;
3030 
3031 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3032 
3033 	return data & mask;
3034 }
3035 
3036 static void si_setup_rb(struct radeon_device *rdev,
3037 			u32 se_num, u32 sh_per_se,
3038 			u32 max_rb_num_per_se)
3039 {
3040 	int i, j;
3041 	u32 data, mask;
3042 	u32 disabled_rbs = 0;
3043 	u32 enabled_rbs = 0;
3044 
3045 	for (i = 0; i < se_num; i++) {
3046 		for (j = 0; j < sh_per_se; j++) {
3047 			si_select_se_sh(rdev, i, j);
3048 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3049 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3050 		}
3051 	}
3052 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3053 
3054 	mask = 1;
3055 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3056 		if (!(disabled_rbs & mask))
3057 			enabled_rbs |= mask;
3058 		mask <<= 1;
3059 	}
3060 
3061 	rdev->config.si.backend_enable_mask = enabled_rbs;
3062 
3063 	for (i = 0; i < se_num; i++) {
3064 		si_select_se_sh(rdev, i, 0xffffffff);
3065 		data = 0;
3066 		for (j = 0; j < sh_per_se; j++) {
3067 			switch (enabled_rbs & 3) {
3068 			case 1:
3069 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3070 				break;
3071 			case 2:
3072 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3073 				break;
3074 			case 3:
3075 			default:
3076 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3077 				break;
3078 			}
3079 			enabled_rbs >>= 2;
3080 		}
3081 		WREG32(PA_SC_RASTER_CONFIG, data);
3082 	}
3083 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3084 }
3085 
3086 static void si_gpu_init(struct radeon_device *rdev)
3087 {
3088 	u32 gb_addr_config = 0;
3089 	u32 mc_shared_chmap, mc_arb_ramcfg;
3090 	u32 sx_debug_1;
3091 	u32 hdp_host_path_cntl;
3092 	u32 tmp;
3093 	int i, j;
3094 
3095 	switch (rdev->family) {
3096 	case CHIP_TAHITI:
3097 		rdev->config.si.max_shader_engines = 2;
3098 		rdev->config.si.max_tile_pipes = 12;
3099 		rdev->config.si.max_cu_per_sh = 8;
3100 		rdev->config.si.max_sh_per_se = 2;
3101 		rdev->config.si.max_backends_per_se = 4;
3102 		rdev->config.si.max_texture_channel_caches = 12;
3103 		rdev->config.si.max_gprs = 256;
3104 		rdev->config.si.max_gs_threads = 32;
3105 		rdev->config.si.max_hw_contexts = 8;
3106 
3107 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3108 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3109 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3110 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3111 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3112 		break;
3113 	case CHIP_PITCAIRN:
3114 		rdev->config.si.max_shader_engines = 2;
3115 		rdev->config.si.max_tile_pipes = 8;
3116 		rdev->config.si.max_cu_per_sh = 5;
3117 		rdev->config.si.max_sh_per_se = 2;
3118 		rdev->config.si.max_backends_per_se = 4;
3119 		rdev->config.si.max_texture_channel_caches = 8;
3120 		rdev->config.si.max_gprs = 256;
3121 		rdev->config.si.max_gs_threads = 32;
3122 		rdev->config.si.max_hw_contexts = 8;
3123 
3124 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3125 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3126 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3127 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3128 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3129 		break;
3130 	case CHIP_VERDE:
3131 	default:
3132 		rdev->config.si.max_shader_engines = 1;
3133 		rdev->config.si.max_tile_pipes = 4;
3134 		rdev->config.si.max_cu_per_sh = 5;
3135 		rdev->config.si.max_sh_per_se = 2;
3136 		rdev->config.si.max_backends_per_se = 4;
3137 		rdev->config.si.max_texture_channel_caches = 4;
3138 		rdev->config.si.max_gprs = 256;
3139 		rdev->config.si.max_gs_threads = 32;
3140 		rdev->config.si.max_hw_contexts = 8;
3141 
3142 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3143 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3144 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3145 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3146 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3147 		break;
3148 	case CHIP_OLAND:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 6;
3152 		rdev->config.si.max_sh_per_se = 1;
3153 		rdev->config.si.max_backends_per_se = 2;
3154 		rdev->config.si.max_texture_channel_caches = 4;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 16;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	case CHIP_HAINAN:
3166 		rdev->config.si.max_shader_engines = 1;
3167 		rdev->config.si.max_tile_pipes = 4;
3168 		rdev->config.si.max_cu_per_sh = 5;
3169 		rdev->config.si.max_sh_per_se = 1;
3170 		rdev->config.si.max_backends_per_se = 1;
3171 		rdev->config.si.max_texture_channel_caches = 2;
3172 		rdev->config.si.max_gprs = 256;
3173 		rdev->config.si.max_gs_threads = 16;
3174 		rdev->config.si.max_hw_contexts = 8;
3175 
3176 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3178 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	}
3183 
3184 	/* Initialize HDP */
3185 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3186 		WREG32((0x2c14 + j), 0x00000000);
3187 		WREG32((0x2c18 + j), 0x00000000);
3188 		WREG32((0x2c1c + j), 0x00000000);
3189 		WREG32((0x2c20 + j), 0x00000000);
3190 		WREG32((0x2c24 + j), 0x00000000);
3191 	}
3192 
3193 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3194 	WREG32(SRBM_INT_CNTL, 1);
3195 	WREG32(SRBM_INT_ACK, 1);
3196 
3197 	evergreen_fix_pci_max_read_req_size(rdev);
3198 
3199 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3200 
3201 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3202 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3203 
3204 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3205 	rdev->config.si.mem_max_burst_length_bytes = 256;
3206 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3207 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3208 	if (rdev->config.si.mem_row_size_in_kb > 4)
3209 		rdev->config.si.mem_row_size_in_kb = 4;
3210 	/* XXX use MC settings? */
3211 	rdev->config.si.shader_engine_tile_size = 32;
3212 	rdev->config.si.num_gpus = 1;
3213 	rdev->config.si.multi_gpu_tile_size = 64;
3214 
3215 	/* fix up row size */
3216 	gb_addr_config &= ~ROW_SIZE_MASK;
3217 	switch (rdev->config.si.mem_row_size_in_kb) {
3218 	case 1:
3219 	default:
3220 		gb_addr_config |= ROW_SIZE(0);
3221 		break;
3222 	case 2:
3223 		gb_addr_config |= ROW_SIZE(1);
3224 		break;
3225 	case 4:
3226 		gb_addr_config |= ROW_SIZE(2);
3227 		break;
3228 	}
3229 
3230 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3231 	 * not have bank info, so create a custom tiling dword.
3232 	 * bits 3:0   num_pipes
3233 	 * bits 7:4   num_banks
3234 	 * bits 11:8  group_size
3235 	 * bits 15:12 row_size
3236 	 */
3237 	rdev->config.si.tile_config = 0;
3238 	switch (rdev->config.si.num_tile_pipes) {
3239 	case 1:
3240 		rdev->config.si.tile_config |= (0 << 0);
3241 		break;
3242 	case 2:
3243 		rdev->config.si.tile_config |= (1 << 0);
3244 		break;
3245 	case 4:
3246 		rdev->config.si.tile_config |= (2 << 0);
3247 		break;
3248 	case 8:
3249 	default:
3250 		/* XXX what about 12? */
3251 		rdev->config.si.tile_config |= (3 << 0);
3252 		break;
3253 	}
3254 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3255 	case 0: /* four banks */
3256 		rdev->config.si.tile_config |= 0 << 4;
3257 		break;
3258 	case 1: /* eight banks */
3259 		rdev->config.si.tile_config |= 1 << 4;
3260 		break;
3261 	case 2: /* sixteen banks */
3262 	default:
3263 		rdev->config.si.tile_config |= 2 << 4;
3264 		break;
3265 	}
3266 	rdev->config.si.tile_config |=
3267 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3268 	rdev->config.si.tile_config |=
3269 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3270 
3271 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3272 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3273 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3274 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3275 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3276 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3277 	if (rdev->has_uvd) {
3278 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3279 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3280 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3281 	}
3282 
3283 	si_tiling_mode_table_init(rdev);
3284 
3285 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3286 		    rdev->config.si.max_sh_per_se,
3287 		    rdev->config.si.max_backends_per_se);
3288 
3289 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3290 		     rdev->config.si.max_sh_per_se,
3291 		     rdev->config.si.max_cu_per_sh);
3292 
3293 	rdev->config.si.active_cus = 0;
3294 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3295 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3296 			rdev->config.si.active_cus +=
3297 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3298 		}
3299 	}
3300 
3301 	/* set HW defaults for 3D engine */
3302 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3303 				     ROQ_IB2_START(0x2b)));
3304 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3305 
3306 	sx_debug_1 = RREG32(SX_DEBUG_1);
3307 	WREG32(SX_DEBUG_1, sx_debug_1);
3308 
3309 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3310 
3311 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3312 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3313 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3314 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3315 
3316 	WREG32(VGT_NUM_INSTANCES, 1);
3317 
3318 	WREG32(CP_PERFMON_CNTL, 0);
3319 
3320 	WREG32(SQ_CONFIG, 0);
3321 
3322 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3323 					  FORCE_EOV_MAX_REZ_CNT(255)));
3324 
3325 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3326 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3327 
3328 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3329 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3330 
3331 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3332 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3333 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3334 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3335 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3336 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3337 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3338 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3339 
3340 	tmp = RREG32(HDP_MISC_CNTL);
3341 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3342 	WREG32(HDP_MISC_CNTL, tmp);
3343 
3344 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3345 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3346 
3347 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3348 
3349 	udelay(50);
3350 }
3351 
3352 /*
3353  * GPU scratch registers helpers function.
3354  */
3355 static void si_scratch_init(struct radeon_device *rdev)
3356 {
3357 	int i;
3358 
3359 	rdev->scratch.num_reg = 7;
3360 	rdev->scratch.reg_base = SCRATCH_REG0;
3361 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3362 		rdev->scratch.free[i] = true;
3363 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3364 	}
3365 }
3366 
3367 void si_fence_ring_emit(struct radeon_device *rdev,
3368 			struct radeon_fence *fence)
3369 {
3370 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3371 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3372 
3373 	/* flush read cache over gart */
3374 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3375 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3376 	radeon_ring_write(ring, 0);
3377 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3378 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3379 			  PACKET3_TC_ACTION_ENA |
3380 			  PACKET3_SH_KCACHE_ACTION_ENA |
3381 			  PACKET3_SH_ICACHE_ACTION_ENA);
3382 	radeon_ring_write(ring, 0xFFFFFFFF);
3383 	radeon_ring_write(ring, 0);
3384 	radeon_ring_write(ring, 10); /* poll interval */
3385 	/* EVENT_WRITE_EOP - flush caches, send int */
3386 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3387 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3388 	radeon_ring_write(ring, lower_32_bits(addr));
3389 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3390 	radeon_ring_write(ring, fence->seq);
3391 	radeon_ring_write(ring, 0);
3392 }
3393 
3394 /*
3395  * IB stuff
3396  */
3397 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3398 {
3399 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3400 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3401 	u32 header;
3402 
3403 	if (ib->is_const_ib) {
3404 		/* set switch buffer packet before const IB */
3405 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3406 		radeon_ring_write(ring, 0);
3407 
3408 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3409 	} else {
3410 		u32 next_rptr;
3411 		if (ring->rptr_save_reg) {
3412 			next_rptr = ring->wptr + 3 + 4 + 8;
3413 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3414 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3415 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3416 			radeon_ring_write(ring, next_rptr);
3417 		} else if (rdev->wb.enabled) {
3418 			next_rptr = ring->wptr + 5 + 4 + 8;
3419 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3420 			radeon_ring_write(ring, (1 << 8));
3421 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3422 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3423 			radeon_ring_write(ring, next_rptr);
3424 		}
3425 
3426 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3427 	}
3428 
3429 	radeon_ring_write(ring, header);
3430 	radeon_ring_write(ring,
3431 #ifdef __BIG_ENDIAN
3432 			  (2 << 0) |
3433 #endif
3434 			  (ib->gpu_addr & 0xFFFFFFFC));
3435 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3436 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3437 
3438 	if (!ib->is_const_ib) {
3439 		/* flush read cache over gart for this vmid */
3440 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3441 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3442 		radeon_ring_write(ring, vm_id);
3443 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3444 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3445 				  PACKET3_TC_ACTION_ENA |
3446 				  PACKET3_SH_KCACHE_ACTION_ENA |
3447 				  PACKET3_SH_ICACHE_ACTION_ENA);
3448 		radeon_ring_write(ring, 0xFFFFFFFF);
3449 		radeon_ring_write(ring, 0);
3450 		radeon_ring_write(ring, 10); /* poll interval */
3451 	}
3452 }
3453 
3454 /*
3455  * CP.
3456  */
3457 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3458 {
3459 	if (enable)
3460 		WREG32(CP_ME_CNTL, 0);
3461 	else {
3462 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3463 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3464 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3465 		WREG32(SCRATCH_UMSK, 0);
3466 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3467 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3468 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3469 	}
3470 	udelay(50);
3471 }
3472 
3473 static int si_cp_load_microcode(struct radeon_device *rdev)
3474 {
3475 	int i;
3476 
3477 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3478 		return -EINVAL;
3479 
3480 	si_cp_enable(rdev, false);
3481 
3482 	if (rdev->new_fw) {
3483 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3484 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3485 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3486 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3487 		const struct gfx_firmware_header_v1_0 *me_hdr =
3488 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3489 		const __le32 *fw_data;
3490 		u32 fw_size;
3491 
3492 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3493 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3494 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3495 
3496 		/* PFP */
3497 		fw_data = (const __le32 *)
3498 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3499 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3500 		WREG32(CP_PFP_UCODE_ADDR, 0);
3501 		for (i = 0; i < fw_size; i++)
3502 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3503 		WREG32(CP_PFP_UCODE_ADDR, 0);
3504 
3505 		/* CE */
3506 		fw_data = (const __le32 *)
3507 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3508 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3509 		WREG32(CP_CE_UCODE_ADDR, 0);
3510 		for (i = 0; i < fw_size; i++)
3511 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3512 		WREG32(CP_CE_UCODE_ADDR, 0);
3513 
3514 		/* ME */
3515 		fw_data = (const __be32 *)
3516 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3517 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3518 		WREG32(CP_ME_RAM_WADDR, 0);
3519 		for (i = 0; i < fw_size; i++)
3520 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3521 		WREG32(CP_ME_RAM_WADDR, 0);
3522 	} else {
3523 		const __be32 *fw_data;
3524 
3525 		/* PFP */
3526 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3527 		WREG32(CP_PFP_UCODE_ADDR, 0);
3528 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3529 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3530 		WREG32(CP_PFP_UCODE_ADDR, 0);
3531 
3532 		/* CE */
3533 		fw_data = (const __be32 *)rdev->ce_fw->data;
3534 		WREG32(CP_CE_UCODE_ADDR, 0);
3535 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3536 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3537 		WREG32(CP_CE_UCODE_ADDR, 0);
3538 
3539 		/* ME */
3540 		fw_data = (const __be32 *)rdev->me_fw->data;
3541 		WREG32(CP_ME_RAM_WADDR, 0);
3542 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3543 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3544 		WREG32(CP_ME_RAM_WADDR, 0);
3545 	}
3546 
3547 	WREG32(CP_PFP_UCODE_ADDR, 0);
3548 	WREG32(CP_CE_UCODE_ADDR, 0);
3549 	WREG32(CP_ME_RAM_WADDR, 0);
3550 	WREG32(CP_ME_RAM_RADDR, 0);
3551 	return 0;
3552 }
3553 
3554 static int si_cp_start(struct radeon_device *rdev)
3555 {
3556 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3557 	int r, i;
3558 
3559 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3560 	if (r) {
3561 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3562 		return r;
3563 	}
3564 	/* init the CP */
3565 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3566 	radeon_ring_write(ring, 0x1);
3567 	radeon_ring_write(ring, 0x0);
3568 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3569 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3570 	radeon_ring_write(ring, 0);
3571 	radeon_ring_write(ring, 0);
3572 
3573 	/* init the CE partitions */
3574 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3575 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3576 	radeon_ring_write(ring, 0xc000);
3577 	radeon_ring_write(ring, 0xe000);
3578 	radeon_ring_unlock_commit(rdev, ring, false);
3579 
3580 	si_cp_enable(rdev, true);
3581 
3582 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3583 	if (r) {
3584 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3585 		return r;
3586 	}
3587 
3588 	/* setup clear context state */
3589 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3590 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3591 
3592 	for (i = 0; i < si_default_size; i++)
3593 		radeon_ring_write(ring, si_default_state[i]);
3594 
3595 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3596 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3597 
3598 	/* set clear context state */
3599 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3600 	radeon_ring_write(ring, 0);
3601 
3602 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3603 	radeon_ring_write(ring, 0x00000316);
3604 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3605 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3606 
3607 	radeon_ring_unlock_commit(rdev, ring, false);
3608 
3609 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3610 		ring = &rdev->ring[i];
3611 		r = radeon_ring_lock(rdev, ring, 2);
3612 
3613 		/* clear the compute context state */
3614 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3615 		radeon_ring_write(ring, 0);
3616 
3617 		radeon_ring_unlock_commit(rdev, ring, false);
3618 	}
3619 
3620 	return 0;
3621 }
3622 
3623 static void si_cp_fini(struct radeon_device *rdev)
3624 {
3625 	struct radeon_ring *ring;
3626 	si_cp_enable(rdev, false);
3627 
3628 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3629 	radeon_ring_fini(rdev, ring);
3630 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3631 
3632 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3633 	radeon_ring_fini(rdev, ring);
3634 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3635 
3636 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3637 	radeon_ring_fini(rdev, ring);
3638 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3639 }
3640 
3641 static int si_cp_resume(struct radeon_device *rdev)
3642 {
3643 	struct radeon_ring *ring;
3644 	u32 tmp;
3645 	u32 rb_bufsz;
3646 	int r;
3647 
3648 	si_enable_gui_idle_interrupt(rdev, false);
3649 
3650 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3651 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3652 
3653 	/* Set the write pointer delay */
3654 	WREG32(CP_RB_WPTR_DELAY, 0);
3655 
3656 	WREG32(CP_DEBUG, 0);
3657 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3658 
3659 	/* ring 0 - compute and gfx */
3660 	/* Set ring buffer size */
3661 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3662 	rb_bufsz = order_base_2(ring->ring_size / 8);
3663 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3664 #ifdef __BIG_ENDIAN
3665 	tmp |= BUF_SWAP_32BIT;
3666 #endif
3667 	WREG32(CP_RB0_CNTL, tmp);
3668 
3669 	/* Initialize the ring buffer's read and write pointers */
3670 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3671 	ring->wptr = 0;
3672 	WREG32(CP_RB0_WPTR, ring->wptr);
3673 
3674 	/* set the wb address whether it's enabled or not */
3675 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3676 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3677 
3678 	if (rdev->wb.enabled)
3679 		WREG32(SCRATCH_UMSK, 0xff);
3680 	else {
3681 		tmp |= RB_NO_UPDATE;
3682 		WREG32(SCRATCH_UMSK, 0);
3683 	}
3684 
3685 	mdelay(1);
3686 	WREG32(CP_RB0_CNTL, tmp);
3687 
3688 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3689 
3690 	/* ring1  - compute only */
3691 	/* Set ring buffer size */
3692 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3693 	rb_bufsz = order_base_2(ring->ring_size / 8);
3694 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3695 #ifdef __BIG_ENDIAN
3696 	tmp |= BUF_SWAP_32BIT;
3697 #endif
3698 	WREG32(CP_RB1_CNTL, tmp);
3699 
3700 	/* Initialize the ring buffer's read and write pointers */
3701 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3702 	ring->wptr = 0;
3703 	WREG32(CP_RB1_WPTR, ring->wptr);
3704 
3705 	/* set the wb address whether it's enabled or not */
3706 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3707 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3708 
3709 	mdelay(1);
3710 	WREG32(CP_RB1_CNTL, tmp);
3711 
3712 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3713 
3714 	/* ring2 - compute only */
3715 	/* Set ring buffer size */
3716 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3717 	rb_bufsz = order_base_2(ring->ring_size / 8);
3718 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3719 #ifdef __BIG_ENDIAN
3720 	tmp |= BUF_SWAP_32BIT;
3721 #endif
3722 	WREG32(CP_RB2_CNTL, tmp);
3723 
3724 	/* Initialize the ring buffer's read and write pointers */
3725 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3726 	ring->wptr = 0;
3727 	WREG32(CP_RB2_WPTR, ring->wptr);
3728 
3729 	/* set the wb address whether it's enabled or not */
3730 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3731 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3732 
3733 	mdelay(1);
3734 	WREG32(CP_RB2_CNTL, tmp);
3735 
3736 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3737 
3738 	/* start the rings */
3739 	si_cp_start(rdev);
3740 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3741 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3742 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3743 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3744 	if (r) {
3745 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3746 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3747 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3748 		return r;
3749 	}
3750 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3751 	if (r) {
3752 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3753 	}
3754 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3755 	if (r) {
3756 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3757 	}
3758 
3759 	si_enable_gui_idle_interrupt(rdev, true);
3760 
3761 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3762 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3763 
3764 	return 0;
3765 }
3766 
3767 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3768 {
3769 	u32 reset_mask = 0;
3770 	u32 tmp;
3771 
3772 	/* GRBM_STATUS */
3773 	tmp = RREG32(GRBM_STATUS);
3774 	if (tmp & (PA_BUSY | SC_BUSY |
3775 		   BCI_BUSY | SX_BUSY |
3776 		   TA_BUSY | VGT_BUSY |
3777 		   DB_BUSY | CB_BUSY |
3778 		   GDS_BUSY | SPI_BUSY |
3779 		   IA_BUSY | IA_BUSY_NO_DMA))
3780 		reset_mask |= RADEON_RESET_GFX;
3781 
3782 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3783 		   CP_BUSY | CP_COHERENCY_BUSY))
3784 		reset_mask |= RADEON_RESET_CP;
3785 
3786 	if (tmp & GRBM_EE_BUSY)
3787 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3788 
3789 	/* GRBM_STATUS2 */
3790 	tmp = RREG32(GRBM_STATUS2);
3791 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3792 		reset_mask |= RADEON_RESET_RLC;
3793 
3794 	/* DMA_STATUS_REG 0 */
3795 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3796 	if (!(tmp & DMA_IDLE))
3797 		reset_mask |= RADEON_RESET_DMA;
3798 
3799 	/* DMA_STATUS_REG 1 */
3800 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3801 	if (!(tmp & DMA_IDLE))
3802 		reset_mask |= RADEON_RESET_DMA1;
3803 
3804 	/* SRBM_STATUS2 */
3805 	tmp = RREG32(SRBM_STATUS2);
3806 	if (tmp & DMA_BUSY)
3807 		reset_mask |= RADEON_RESET_DMA;
3808 
3809 	if (tmp & DMA1_BUSY)
3810 		reset_mask |= RADEON_RESET_DMA1;
3811 
3812 	/* SRBM_STATUS */
3813 	tmp = RREG32(SRBM_STATUS);
3814 
3815 	if (tmp & IH_BUSY)
3816 		reset_mask |= RADEON_RESET_IH;
3817 
3818 	if (tmp & SEM_BUSY)
3819 		reset_mask |= RADEON_RESET_SEM;
3820 
3821 	if (tmp & GRBM_RQ_PENDING)
3822 		reset_mask |= RADEON_RESET_GRBM;
3823 
3824 	if (tmp & VMC_BUSY)
3825 		reset_mask |= RADEON_RESET_VMC;
3826 
3827 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3828 		   MCC_BUSY | MCD_BUSY))
3829 		reset_mask |= RADEON_RESET_MC;
3830 
3831 	if (evergreen_is_display_hung(rdev))
3832 		reset_mask |= RADEON_RESET_DISPLAY;
3833 
3834 	/* VM_L2_STATUS */
3835 	tmp = RREG32(VM_L2_STATUS);
3836 	if (tmp & L2_BUSY)
3837 		reset_mask |= RADEON_RESET_VMC;
3838 
3839 	/* Skip MC reset as it's mostly likely not hung, just busy */
3840 	if (reset_mask & RADEON_RESET_MC) {
3841 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3842 		reset_mask &= ~RADEON_RESET_MC;
3843 	}
3844 
3845 	return reset_mask;
3846 }
3847 
3848 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3849 {
3850 	struct evergreen_mc_save save;
3851 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3852 	u32 tmp;
3853 
3854 	if (reset_mask == 0)
3855 		return;
3856 
3857 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3858 
3859 	evergreen_print_gpu_status_regs(rdev);
3860 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3861 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3862 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3863 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3864 
3865 	/* disable PG/CG */
3866 	si_fini_pg(rdev);
3867 	si_fini_cg(rdev);
3868 
3869 	/* stop the rlc */
3870 	si_rlc_stop(rdev);
3871 
3872 	/* Disable CP parsing/prefetching */
3873 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3874 
3875 	if (reset_mask & RADEON_RESET_DMA) {
3876 		/* dma0 */
3877 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3878 		tmp &= ~DMA_RB_ENABLE;
3879 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3880 	}
3881 	if (reset_mask & RADEON_RESET_DMA1) {
3882 		/* dma1 */
3883 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3884 		tmp &= ~DMA_RB_ENABLE;
3885 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3886 	}
3887 
3888 	udelay(50);
3889 
3890 	evergreen_mc_stop(rdev, &save);
3891 	if (evergreen_mc_wait_for_idle(rdev)) {
3892 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3893 	}
3894 
3895 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3896 		grbm_soft_reset = SOFT_RESET_CB |
3897 			SOFT_RESET_DB |
3898 			SOFT_RESET_GDS |
3899 			SOFT_RESET_PA |
3900 			SOFT_RESET_SC |
3901 			SOFT_RESET_BCI |
3902 			SOFT_RESET_SPI |
3903 			SOFT_RESET_SX |
3904 			SOFT_RESET_TC |
3905 			SOFT_RESET_TA |
3906 			SOFT_RESET_VGT |
3907 			SOFT_RESET_IA;
3908 	}
3909 
3910 	if (reset_mask & RADEON_RESET_CP) {
3911 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3912 
3913 		srbm_soft_reset |= SOFT_RESET_GRBM;
3914 	}
3915 
3916 	if (reset_mask & RADEON_RESET_DMA)
3917 		srbm_soft_reset |= SOFT_RESET_DMA;
3918 
3919 	if (reset_mask & RADEON_RESET_DMA1)
3920 		srbm_soft_reset |= SOFT_RESET_DMA1;
3921 
3922 	if (reset_mask & RADEON_RESET_DISPLAY)
3923 		srbm_soft_reset |= SOFT_RESET_DC;
3924 
3925 	if (reset_mask & RADEON_RESET_RLC)
3926 		grbm_soft_reset |= SOFT_RESET_RLC;
3927 
3928 	if (reset_mask & RADEON_RESET_SEM)
3929 		srbm_soft_reset |= SOFT_RESET_SEM;
3930 
3931 	if (reset_mask & RADEON_RESET_IH)
3932 		srbm_soft_reset |= SOFT_RESET_IH;
3933 
3934 	if (reset_mask & RADEON_RESET_GRBM)
3935 		srbm_soft_reset |= SOFT_RESET_GRBM;
3936 
3937 	if (reset_mask & RADEON_RESET_VMC)
3938 		srbm_soft_reset |= SOFT_RESET_VMC;
3939 
3940 	if (reset_mask & RADEON_RESET_MC)
3941 		srbm_soft_reset |= SOFT_RESET_MC;
3942 
3943 	if (grbm_soft_reset) {
3944 		tmp = RREG32(GRBM_SOFT_RESET);
3945 		tmp |= grbm_soft_reset;
3946 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3947 		WREG32(GRBM_SOFT_RESET, tmp);
3948 		tmp = RREG32(GRBM_SOFT_RESET);
3949 
3950 		udelay(50);
3951 
3952 		tmp &= ~grbm_soft_reset;
3953 		WREG32(GRBM_SOFT_RESET, tmp);
3954 		tmp = RREG32(GRBM_SOFT_RESET);
3955 	}
3956 
3957 	if (srbm_soft_reset) {
3958 		tmp = RREG32(SRBM_SOFT_RESET);
3959 		tmp |= srbm_soft_reset;
3960 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3961 		WREG32(SRBM_SOFT_RESET, tmp);
3962 		tmp = RREG32(SRBM_SOFT_RESET);
3963 
3964 		udelay(50);
3965 
3966 		tmp &= ~srbm_soft_reset;
3967 		WREG32(SRBM_SOFT_RESET, tmp);
3968 		tmp = RREG32(SRBM_SOFT_RESET);
3969 	}
3970 
3971 	/* Wait a little for things to settle down */
3972 	udelay(50);
3973 
3974 	evergreen_mc_resume(rdev, &save);
3975 	udelay(50);
3976 
3977 	evergreen_print_gpu_status_regs(rdev);
3978 }
3979 
3980 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3981 {
3982 	u32 tmp, i;
3983 
3984 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3985 	tmp |= SPLL_BYPASS_EN;
3986 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3987 
3988 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3989 	tmp |= SPLL_CTLREQ_CHG;
3990 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3991 
3992 	for (i = 0; i < rdev->usec_timeout; i++) {
3993 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3994 			break;
3995 		udelay(1);
3996 	}
3997 
3998 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3999 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4000 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4001 
4002 	tmp = RREG32(MPLL_CNTL_MODE);
4003 	tmp &= ~MPLL_MCLK_SEL;
4004 	WREG32(MPLL_CNTL_MODE, tmp);
4005 }
4006 
4007 static void si_spll_powerdown(struct radeon_device *rdev)
4008 {
4009 	u32 tmp;
4010 
4011 	tmp = RREG32(SPLL_CNTL_MODE);
4012 	tmp |= SPLL_SW_DIR_CONTROL;
4013 	WREG32(SPLL_CNTL_MODE, tmp);
4014 
4015 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4016 	tmp |= SPLL_RESET;
4017 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4018 
4019 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4020 	tmp |= SPLL_SLEEP;
4021 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4022 
4023 	tmp = RREG32(SPLL_CNTL_MODE);
4024 	tmp &= ~SPLL_SW_DIR_CONTROL;
4025 	WREG32(SPLL_CNTL_MODE, tmp);
4026 }
4027 
4028 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4029 {
4030 	struct evergreen_mc_save save;
4031 	u32 tmp, i;
4032 
4033 	dev_info(rdev->dev, "GPU pci config reset\n");
4034 
4035 	/* disable dpm? */
4036 
4037 	/* disable cg/pg */
4038 	si_fini_pg(rdev);
4039 	si_fini_cg(rdev);
4040 
4041 	/* Disable CP parsing/prefetching */
4042 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4043 	/* dma0 */
4044 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4045 	tmp &= ~DMA_RB_ENABLE;
4046 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4047 	/* dma1 */
4048 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4049 	tmp &= ~DMA_RB_ENABLE;
4050 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4051 	/* XXX other engines? */
4052 
4053 	/* halt the rlc, disable cp internal ints */
4054 	si_rlc_stop(rdev);
4055 
4056 	udelay(50);
4057 
4058 	/* disable mem access */
4059 	evergreen_mc_stop(rdev, &save);
4060 	if (evergreen_mc_wait_for_idle(rdev)) {
4061 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4062 	}
4063 
4064 	/* set mclk/sclk to bypass */
4065 	si_set_clk_bypass_mode(rdev);
4066 	/* powerdown spll */
4067 	si_spll_powerdown(rdev);
4068 	/* disable BM */
4069 	pci_clear_master(rdev->pdev);
4070 	/* reset */
4071 	radeon_pci_config_reset(rdev);
4072 	/* wait for asic to come out of reset */
4073 	for (i = 0; i < rdev->usec_timeout; i++) {
4074 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4075 			break;
4076 		udelay(1);
4077 	}
4078 }
4079 
4080 int si_asic_reset(struct radeon_device *rdev, bool hard)
4081 {
4082 	u32 reset_mask;
4083 
4084 	if (hard) {
4085 		si_gpu_pci_config_reset(rdev);
4086 		return 0;
4087 	}
4088 
4089 	reset_mask = si_gpu_check_soft_reset(rdev);
4090 
4091 	if (reset_mask)
4092 		r600_set_bios_scratch_engine_hung(rdev, true);
4093 
4094 	/* try soft reset */
4095 	si_gpu_soft_reset(rdev, reset_mask);
4096 
4097 	reset_mask = si_gpu_check_soft_reset(rdev);
4098 
4099 	/* try pci config reset */
4100 	if (reset_mask && radeon_hard_reset)
4101 		si_gpu_pci_config_reset(rdev);
4102 
4103 	reset_mask = si_gpu_check_soft_reset(rdev);
4104 
4105 	if (!reset_mask)
4106 		r600_set_bios_scratch_engine_hung(rdev, false);
4107 
4108 	return 0;
4109 }
4110 
4111 /**
4112  * si_gfx_is_lockup - Check if the GFX engine is locked up
4113  *
4114  * @rdev: radeon_device pointer
4115  * @ring: radeon_ring structure holding ring information
4116  *
4117  * Check if the GFX engine is locked up.
4118  * Returns true if the engine appears to be locked up, false if not.
4119  */
4120 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4121 {
4122 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4123 
4124 	if (!(reset_mask & (RADEON_RESET_GFX |
4125 			    RADEON_RESET_COMPUTE |
4126 			    RADEON_RESET_CP))) {
4127 		radeon_ring_lockup_update(rdev, ring);
4128 		return false;
4129 	}
4130 	return radeon_ring_test_lockup(rdev, ring);
4131 }
4132 
4133 /* MC */
4134 static void si_mc_program(struct radeon_device *rdev)
4135 {
4136 	struct evergreen_mc_save save;
4137 	u32 tmp;
4138 	int i, j;
4139 
4140 	/* Initialize HDP */
4141 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4142 		WREG32((0x2c14 + j), 0x00000000);
4143 		WREG32((0x2c18 + j), 0x00000000);
4144 		WREG32((0x2c1c + j), 0x00000000);
4145 		WREG32((0x2c20 + j), 0x00000000);
4146 		WREG32((0x2c24 + j), 0x00000000);
4147 	}
4148 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4149 
4150 	evergreen_mc_stop(rdev, &save);
4151 	if (radeon_mc_wait_for_idle(rdev)) {
4152 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4153 	}
4154 	if (!ASIC_IS_NODCE(rdev))
4155 		/* Lockout access through VGA aperture*/
4156 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4157 	/* Update configuration */
4158 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4159 	       rdev->mc.vram_start >> 12);
4160 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4161 	       rdev->mc.vram_end >> 12);
4162 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4163 	       rdev->vram_scratch.gpu_addr >> 12);
4164 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4165 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4166 	WREG32(MC_VM_FB_LOCATION, tmp);
4167 	/* XXX double check these! */
4168 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4169 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4170 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4171 	WREG32(MC_VM_AGP_BASE, 0);
4172 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4173 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4174 	if (radeon_mc_wait_for_idle(rdev)) {
4175 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4176 	}
4177 	evergreen_mc_resume(rdev, &save);
4178 	if (!ASIC_IS_NODCE(rdev)) {
4179 		/* we need to own VRAM, so turn off the VGA renderer here
4180 		 * to stop it overwriting our objects */
4181 		rv515_vga_render_disable(rdev);
4182 	}
4183 }
4184 
4185 void si_vram_gtt_location(struct radeon_device *rdev,
4186 			  struct radeon_mc *mc)
4187 {
4188 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4189 		/* leave room for at least 1024M GTT */
4190 		dev_warn(rdev->dev, "limiting VRAM\n");
4191 		mc->real_vram_size = 0xFFC0000000ULL;
4192 		mc->mc_vram_size = 0xFFC0000000ULL;
4193 	}
4194 	radeon_vram_location(rdev, &rdev->mc, 0);
4195 	rdev->mc.gtt_base_align = 0;
4196 	radeon_gtt_location(rdev, mc);
4197 }
4198 
4199 static int si_mc_init(struct radeon_device *rdev)
4200 {
4201 	u32 tmp;
4202 	int chansize, numchan;
4203 
4204 	/* Get VRAM informations */
4205 	rdev->mc.vram_is_ddr = true;
4206 	tmp = RREG32(MC_ARB_RAMCFG);
4207 	if (tmp & CHANSIZE_OVERRIDE) {
4208 		chansize = 16;
4209 	} else if (tmp & CHANSIZE_MASK) {
4210 		chansize = 64;
4211 	} else {
4212 		chansize = 32;
4213 	}
4214 	tmp = RREG32(MC_SHARED_CHMAP);
4215 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4216 	case 0:
4217 	default:
4218 		numchan = 1;
4219 		break;
4220 	case 1:
4221 		numchan = 2;
4222 		break;
4223 	case 2:
4224 		numchan = 4;
4225 		break;
4226 	case 3:
4227 		numchan = 8;
4228 		break;
4229 	case 4:
4230 		numchan = 3;
4231 		break;
4232 	case 5:
4233 		numchan = 6;
4234 		break;
4235 	case 6:
4236 		numchan = 10;
4237 		break;
4238 	case 7:
4239 		numchan = 12;
4240 		break;
4241 	case 8:
4242 		numchan = 16;
4243 		break;
4244 	}
4245 	rdev->mc.vram_width = numchan * chansize;
4246 	/* Could aper size report 0 ? */
4247 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4248 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4249 	/* size in MB on si */
4250 	tmp = RREG32(CONFIG_MEMSIZE);
4251 	/* some boards may have garbage in the upper 16 bits */
4252 	if (tmp & 0xffff0000) {
4253 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4254 		if (tmp & 0xffff)
4255 			tmp &= 0xffff;
4256 	}
4257 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4258 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4259 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4260 	si_vram_gtt_location(rdev, &rdev->mc);
4261 	radeon_update_bandwidth_info(rdev);
4262 
4263 	return 0;
4264 }
4265 
4266 /*
4267  * GART
4268  */
4269 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4270 {
4271 	/* flush hdp cache */
4272 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4273 
4274 	/* bits 0-15 are the VM contexts0-15 */
4275 	WREG32(VM_INVALIDATE_REQUEST, 1);
4276 }
4277 
4278 static int si_pcie_gart_enable(struct radeon_device *rdev)
4279 {
4280 	int r, i;
4281 
4282 	if (rdev->gart.robj == NULL) {
4283 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4284 		return -EINVAL;
4285 	}
4286 	r = radeon_gart_table_vram_pin(rdev);
4287 	if (r)
4288 		return r;
4289 	/* Setup TLB control */
4290 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4291 	       (0xA << 7) |
4292 	       ENABLE_L1_TLB |
4293 	       ENABLE_L1_FRAGMENT_PROCESSING |
4294 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4295 	       ENABLE_ADVANCED_DRIVER_MODEL |
4296 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4297 	/* Setup L2 cache */
4298 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4299 	       ENABLE_L2_FRAGMENT_PROCESSING |
4300 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4301 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4302 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4303 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4304 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4305 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4306 	       BANK_SELECT(4) |
4307 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4308 	/* setup context0 */
4309 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4310 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4311 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4312 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4313 			(u32)(rdev->dummy_page.addr >> 12));
4314 	WREG32(VM_CONTEXT0_CNTL2, 0);
4315 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4316 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4317 
4318 	WREG32(0x15D4, 0);
4319 	WREG32(0x15D8, 0);
4320 	WREG32(0x15DC, 0);
4321 
4322 	/* empty context1-15 */
4323 	/* set vm size, must be a multiple of 4 */
4324 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4325 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4326 	/* Assign the pt base to something valid for now; the pts used for
4327 	 * the VMs are determined by the application and setup and assigned
4328 	 * on the fly in the vm part of radeon_gart.c
4329 	 */
4330 	for (i = 1; i < 16; i++) {
4331 		if (i < 8)
4332 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4333 			       rdev->vm_manager.saved_table_addr[i]);
4334 		else
4335 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4336 			       rdev->vm_manager.saved_table_addr[i]);
4337 	}
4338 
4339 	/* enable context1-15 */
4340 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4341 	       (u32)(rdev->dummy_page.addr >> 12));
4342 	WREG32(VM_CONTEXT1_CNTL2, 4);
4343 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4344 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4345 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4346 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4347 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4349 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4351 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4353 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4355 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4357 
4358 	si_pcie_gart_tlb_flush(rdev);
4359 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4360 		 (unsigned)(rdev->mc.gtt_size >> 20),
4361 		 (unsigned long long)rdev->gart.table_addr);
4362 	rdev->gart.ready = true;
4363 	return 0;
4364 }
4365 
4366 static void si_pcie_gart_disable(struct radeon_device *rdev)
4367 {
4368 	unsigned i;
4369 
4370 	for (i = 1; i < 16; ++i) {
4371 		uint32_t reg;
4372 		if (i < 8)
4373 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4374 		else
4375 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4376 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4377 	}
4378 
4379 	/* Disable all tables */
4380 	WREG32(VM_CONTEXT0_CNTL, 0);
4381 	WREG32(VM_CONTEXT1_CNTL, 0);
4382 	/* Setup TLB control */
4383 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4384 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4385 	/* Setup L2 cache */
4386 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4387 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4388 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4389 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4390 	WREG32(VM_L2_CNTL2, 0);
4391 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4392 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4393 	radeon_gart_table_vram_unpin(rdev);
4394 }
4395 
4396 static void si_pcie_gart_fini(struct radeon_device *rdev)
4397 {
4398 	si_pcie_gart_disable(rdev);
4399 	radeon_gart_table_vram_free(rdev);
4400 	radeon_gart_fini(rdev);
4401 }
4402 
4403 /* vm parser */
4404 static bool si_vm_reg_valid(u32 reg)
4405 {
4406 	/* context regs are fine */
4407 	if (reg >= 0x28000)
4408 		return true;
4409 
4410 	/* shader regs are also fine */
4411 	if (reg >= 0xB000 && reg < 0xC000)
4412 		return true;
4413 
4414 	/* check config regs */
4415 	switch (reg) {
4416 	case GRBM_GFX_INDEX:
4417 	case CP_STRMOUT_CNTL:
4418 	case VGT_VTX_VECT_EJECT_REG:
4419 	case VGT_CACHE_INVALIDATION:
4420 	case VGT_ESGS_RING_SIZE:
4421 	case VGT_GSVS_RING_SIZE:
4422 	case VGT_GS_VERTEX_REUSE:
4423 	case VGT_PRIMITIVE_TYPE:
4424 	case VGT_INDEX_TYPE:
4425 	case VGT_NUM_INDICES:
4426 	case VGT_NUM_INSTANCES:
4427 	case VGT_TF_RING_SIZE:
4428 	case VGT_HS_OFFCHIP_PARAM:
4429 	case VGT_TF_MEMORY_BASE:
4430 	case PA_CL_ENHANCE:
4431 	case PA_SU_LINE_STIPPLE_VALUE:
4432 	case PA_SC_LINE_STIPPLE_STATE:
4433 	case PA_SC_ENHANCE:
4434 	case SQC_CACHES:
4435 	case SPI_STATIC_THREAD_MGMT_1:
4436 	case SPI_STATIC_THREAD_MGMT_2:
4437 	case SPI_STATIC_THREAD_MGMT_3:
4438 	case SPI_PS_MAX_WAVE_ID:
4439 	case SPI_CONFIG_CNTL:
4440 	case SPI_CONFIG_CNTL_1:
4441 	case TA_CNTL_AUX:
4442 		return true;
4443 	default:
4444 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4445 		return false;
4446 	}
4447 }
4448 
4449 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4450 				  u32 *ib, struct radeon_cs_packet *pkt)
4451 {
4452 	switch (pkt->opcode) {
4453 	case PACKET3_NOP:
4454 	case PACKET3_SET_BASE:
4455 	case PACKET3_SET_CE_DE_COUNTERS:
4456 	case PACKET3_LOAD_CONST_RAM:
4457 	case PACKET3_WRITE_CONST_RAM:
4458 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4459 	case PACKET3_DUMP_CONST_RAM:
4460 	case PACKET3_INCREMENT_CE_COUNTER:
4461 	case PACKET3_WAIT_ON_DE_COUNTER:
4462 	case PACKET3_CE_WRITE:
4463 		break;
4464 	default:
4465 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4466 		return -EINVAL;
4467 	}
4468 	return 0;
4469 }
4470 
4471 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4472 {
4473 	u32 start_reg, reg, i;
4474 	u32 command = ib[idx + 4];
4475 	u32 info = ib[idx + 1];
4476 	u32 idx_value = ib[idx];
4477 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4478 		/* src address space is register */
4479 		if (((info & 0x60000000) >> 29) == 0) {
4480 			start_reg = idx_value << 2;
4481 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4482 				reg = start_reg;
4483 				if (!si_vm_reg_valid(reg)) {
4484 					DRM_ERROR("CP DMA Bad SRC register\n");
4485 					return -EINVAL;
4486 				}
4487 			} else {
4488 				for (i = 0; i < (command & 0x1fffff); i++) {
4489 					reg = start_reg + (4 * i);
4490 					if (!si_vm_reg_valid(reg)) {
4491 						DRM_ERROR("CP DMA Bad SRC register\n");
4492 						return -EINVAL;
4493 					}
4494 				}
4495 			}
4496 		}
4497 	}
4498 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4499 		/* dst address space is register */
4500 		if (((info & 0x00300000) >> 20) == 0) {
4501 			start_reg = ib[idx + 2];
4502 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4503 				reg = start_reg;
4504 				if (!si_vm_reg_valid(reg)) {
4505 					DRM_ERROR("CP DMA Bad DST register\n");
4506 					return -EINVAL;
4507 				}
4508 			} else {
4509 				for (i = 0; i < (command & 0x1fffff); i++) {
4510 					reg = start_reg + (4 * i);
4511 				if (!si_vm_reg_valid(reg)) {
4512 						DRM_ERROR("CP DMA Bad DST register\n");
4513 						return -EINVAL;
4514 					}
4515 				}
4516 			}
4517 		}
4518 	}
4519 	return 0;
4520 }
4521 
4522 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4523 				   u32 *ib, struct radeon_cs_packet *pkt)
4524 {
4525 	int r;
4526 	u32 idx = pkt->idx + 1;
4527 	u32 idx_value = ib[idx];
4528 	u32 start_reg, end_reg, reg, i;
4529 
4530 	switch (pkt->opcode) {
4531 	case PACKET3_NOP:
4532 	case PACKET3_SET_BASE:
4533 	case PACKET3_CLEAR_STATE:
4534 	case PACKET3_INDEX_BUFFER_SIZE:
4535 	case PACKET3_DISPATCH_DIRECT:
4536 	case PACKET3_DISPATCH_INDIRECT:
4537 	case PACKET3_ALLOC_GDS:
4538 	case PACKET3_WRITE_GDS_RAM:
4539 	case PACKET3_ATOMIC_GDS:
4540 	case PACKET3_ATOMIC:
4541 	case PACKET3_OCCLUSION_QUERY:
4542 	case PACKET3_SET_PREDICATION:
4543 	case PACKET3_COND_EXEC:
4544 	case PACKET3_PRED_EXEC:
4545 	case PACKET3_DRAW_INDIRECT:
4546 	case PACKET3_DRAW_INDEX_INDIRECT:
4547 	case PACKET3_INDEX_BASE:
4548 	case PACKET3_DRAW_INDEX_2:
4549 	case PACKET3_CONTEXT_CONTROL:
4550 	case PACKET3_INDEX_TYPE:
4551 	case PACKET3_DRAW_INDIRECT_MULTI:
4552 	case PACKET3_DRAW_INDEX_AUTO:
4553 	case PACKET3_DRAW_INDEX_IMMD:
4554 	case PACKET3_NUM_INSTANCES:
4555 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4556 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4557 	case PACKET3_DRAW_INDEX_OFFSET_2:
4558 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4559 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4560 	case PACKET3_MPEG_INDEX:
4561 	case PACKET3_WAIT_REG_MEM:
4562 	case PACKET3_MEM_WRITE:
4563 	case PACKET3_PFP_SYNC_ME:
4564 	case PACKET3_SURFACE_SYNC:
4565 	case PACKET3_EVENT_WRITE:
4566 	case PACKET3_EVENT_WRITE_EOP:
4567 	case PACKET3_EVENT_WRITE_EOS:
4568 	case PACKET3_SET_CONTEXT_REG:
4569 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4570 	case PACKET3_SET_SH_REG:
4571 	case PACKET3_SET_SH_REG_OFFSET:
4572 	case PACKET3_INCREMENT_DE_COUNTER:
4573 	case PACKET3_WAIT_ON_CE_COUNTER:
4574 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4575 	case PACKET3_ME_WRITE:
4576 		break;
4577 	case PACKET3_COPY_DATA:
4578 		if ((idx_value & 0xf00) == 0) {
4579 			reg = ib[idx + 3] * 4;
4580 			if (!si_vm_reg_valid(reg))
4581 				return -EINVAL;
4582 		}
4583 		break;
4584 	case PACKET3_WRITE_DATA:
4585 		if ((idx_value & 0xf00) == 0) {
4586 			start_reg = ib[idx + 1] * 4;
4587 			if (idx_value & 0x10000) {
4588 				if (!si_vm_reg_valid(start_reg))
4589 					return -EINVAL;
4590 			} else {
4591 				for (i = 0; i < (pkt->count - 2); i++) {
4592 					reg = start_reg + (4 * i);
4593 					if (!si_vm_reg_valid(reg))
4594 						return -EINVAL;
4595 				}
4596 			}
4597 		}
4598 		break;
4599 	case PACKET3_COND_WRITE:
4600 		if (idx_value & 0x100) {
4601 			reg = ib[idx + 5] * 4;
4602 			if (!si_vm_reg_valid(reg))
4603 				return -EINVAL;
4604 		}
4605 		break;
4606 	case PACKET3_COPY_DW:
4607 		if (idx_value & 0x2) {
4608 			reg = ib[idx + 3] * 4;
4609 			if (!si_vm_reg_valid(reg))
4610 				return -EINVAL;
4611 		}
4612 		break;
4613 	case PACKET3_SET_CONFIG_REG:
4614 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4615 		end_reg = 4 * pkt->count + start_reg - 4;
4616 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4617 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4618 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4619 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4620 			return -EINVAL;
4621 		}
4622 		for (i = 0; i < pkt->count; i++) {
4623 			reg = start_reg + (4 * i);
4624 			if (!si_vm_reg_valid(reg))
4625 				return -EINVAL;
4626 		}
4627 		break;
4628 	case PACKET3_CP_DMA:
4629 		r = si_vm_packet3_cp_dma_check(ib, idx);
4630 		if (r)
4631 			return r;
4632 		break;
4633 	default:
4634 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4635 		return -EINVAL;
4636 	}
4637 	return 0;
4638 }
4639 
4640 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4641 				       u32 *ib, struct radeon_cs_packet *pkt)
4642 {
4643 	int r;
4644 	u32 idx = pkt->idx + 1;
4645 	u32 idx_value = ib[idx];
4646 	u32 start_reg, reg, i;
4647 
4648 	switch (pkt->opcode) {
4649 	case PACKET3_NOP:
4650 	case PACKET3_SET_BASE:
4651 	case PACKET3_CLEAR_STATE:
4652 	case PACKET3_DISPATCH_DIRECT:
4653 	case PACKET3_DISPATCH_INDIRECT:
4654 	case PACKET3_ALLOC_GDS:
4655 	case PACKET3_WRITE_GDS_RAM:
4656 	case PACKET3_ATOMIC_GDS:
4657 	case PACKET3_ATOMIC:
4658 	case PACKET3_OCCLUSION_QUERY:
4659 	case PACKET3_SET_PREDICATION:
4660 	case PACKET3_COND_EXEC:
4661 	case PACKET3_PRED_EXEC:
4662 	case PACKET3_CONTEXT_CONTROL:
4663 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4664 	case PACKET3_WAIT_REG_MEM:
4665 	case PACKET3_MEM_WRITE:
4666 	case PACKET3_PFP_SYNC_ME:
4667 	case PACKET3_SURFACE_SYNC:
4668 	case PACKET3_EVENT_WRITE:
4669 	case PACKET3_EVENT_WRITE_EOP:
4670 	case PACKET3_EVENT_WRITE_EOS:
4671 	case PACKET3_SET_CONTEXT_REG:
4672 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4673 	case PACKET3_SET_SH_REG:
4674 	case PACKET3_SET_SH_REG_OFFSET:
4675 	case PACKET3_INCREMENT_DE_COUNTER:
4676 	case PACKET3_WAIT_ON_CE_COUNTER:
4677 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4678 	case PACKET3_ME_WRITE:
4679 		break;
4680 	case PACKET3_COPY_DATA:
4681 		if ((idx_value & 0xf00) == 0) {
4682 			reg = ib[idx + 3] * 4;
4683 			if (!si_vm_reg_valid(reg))
4684 				return -EINVAL;
4685 		}
4686 		break;
4687 	case PACKET3_WRITE_DATA:
4688 		if ((idx_value & 0xf00) == 0) {
4689 			start_reg = ib[idx + 1] * 4;
4690 			if (idx_value & 0x10000) {
4691 				if (!si_vm_reg_valid(start_reg))
4692 					return -EINVAL;
4693 			} else {
4694 				for (i = 0; i < (pkt->count - 2); i++) {
4695 					reg = start_reg + (4 * i);
4696 					if (!si_vm_reg_valid(reg))
4697 						return -EINVAL;
4698 				}
4699 			}
4700 		}
4701 		break;
4702 	case PACKET3_COND_WRITE:
4703 		if (idx_value & 0x100) {
4704 			reg = ib[idx + 5] * 4;
4705 			if (!si_vm_reg_valid(reg))
4706 				return -EINVAL;
4707 		}
4708 		break;
4709 	case PACKET3_COPY_DW:
4710 		if (idx_value & 0x2) {
4711 			reg = ib[idx + 3] * 4;
4712 			if (!si_vm_reg_valid(reg))
4713 				return -EINVAL;
4714 		}
4715 		break;
4716 	case PACKET3_CP_DMA:
4717 		r = si_vm_packet3_cp_dma_check(ib, idx);
4718 		if (r)
4719 			return r;
4720 		break;
4721 	default:
4722 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4723 		return -EINVAL;
4724 	}
4725 	return 0;
4726 }
4727 
4728 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4729 {
4730 	int ret = 0;
4731 	u32 idx = 0, i;
4732 	struct radeon_cs_packet pkt;
4733 
4734 	do {
4735 		pkt.idx = idx;
4736 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4737 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4738 		pkt.one_reg_wr = 0;
4739 		switch (pkt.type) {
4740 		case RADEON_PACKET_TYPE0:
4741 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4742 			ret = -EINVAL;
4743 			break;
4744 		case RADEON_PACKET_TYPE2:
4745 			idx += 1;
4746 			break;
4747 		case RADEON_PACKET_TYPE3:
4748 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4749 			if (ib->is_const_ib)
4750 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4751 			else {
4752 				switch (ib->ring) {
4753 				case RADEON_RING_TYPE_GFX_INDEX:
4754 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4755 					break;
4756 				case CAYMAN_RING_TYPE_CP1_INDEX:
4757 				case CAYMAN_RING_TYPE_CP2_INDEX:
4758 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4759 					break;
4760 				default:
4761 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4762 					ret = -EINVAL;
4763 					break;
4764 				}
4765 			}
4766 			idx += pkt.count + 2;
4767 			break;
4768 		default:
4769 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4770 			ret = -EINVAL;
4771 			break;
4772 		}
4773 		if (ret) {
4774 			for (i = 0; i < ib->length_dw; i++) {
4775 				if (i == idx)
4776 					printk("\t0x%08x <---\n", ib->ptr[i]);
4777 				else
4778 					printk("\t0x%08x\n", ib->ptr[i]);
4779 			}
4780 			break;
4781 		}
4782 	} while (idx < ib->length_dw);
4783 
4784 	return ret;
4785 }
4786 
4787 /*
4788  * vm
4789  */
4790 int si_vm_init(struct radeon_device *rdev)
4791 {
4792 	/* number of VMs */
4793 	rdev->vm_manager.nvm = 16;
4794 	/* base offset of vram pages */
4795 	rdev->vm_manager.vram_base_offset = 0;
4796 
4797 	return 0;
4798 }
4799 
4800 void si_vm_fini(struct radeon_device *rdev)
4801 {
4802 }
4803 
4804 /**
4805  * si_vm_decode_fault - print human readable fault info
4806  *
4807  * @rdev: radeon_device pointer
4808  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4809  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4810  *
4811  * Print human readable fault information (SI).
4812  */
4813 static void si_vm_decode_fault(struct radeon_device *rdev,
4814 			       u32 status, u32 addr)
4815 {
4816 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4817 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4818 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4819 	char *block;
4820 
4821 	if (rdev->family == CHIP_TAHITI) {
4822 		switch (mc_id) {
4823 		case 160:
4824 		case 144:
4825 		case 96:
4826 		case 80:
4827 		case 224:
4828 		case 208:
4829 		case 32:
4830 		case 16:
4831 			block = "CB";
4832 			break;
4833 		case 161:
4834 		case 145:
4835 		case 97:
4836 		case 81:
4837 		case 225:
4838 		case 209:
4839 		case 33:
4840 		case 17:
4841 			block = "CB_FMASK";
4842 			break;
4843 		case 162:
4844 		case 146:
4845 		case 98:
4846 		case 82:
4847 		case 226:
4848 		case 210:
4849 		case 34:
4850 		case 18:
4851 			block = "CB_CMASK";
4852 			break;
4853 		case 163:
4854 		case 147:
4855 		case 99:
4856 		case 83:
4857 		case 227:
4858 		case 211:
4859 		case 35:
4860 		case 19:
4861 			block = "CB_IMMED";
4862 			break;
4863 		case 164:
4864 		case 148:
4865 		case 100:
4866 		case 84:
4867 		case 228:
4868 		case 212:
4869 		case 36:
4870 		case 20:
4871 			block = "DB";
4872 			break;
4873 		case 165:
4874 		case 149:
4875 		case 101:
4876 		case 85:
4877 		case 229:
4878 		case 213:
4879 		case 37:
4880 		case 21:
4881 			block = "DB_HTILE";
4882 			break;
4883 		case 167:
4884 		case 151:
4885 		case 103:
4886 		case 87:
4887 		case 231:
4888 		case 215:
4889 		case 39:
4890 		case 23:
4891 			block = "DB_STEN";
4892 			break;
4893 		case 72:
4894 		case 68:
4895 		case 64:
4896 		case 8:
4897 		case 4:
4898 		case 0:
4899 		case 136:
4900 		case 132:
4901 		case 128:
4902 		case 200:
4903 		case 196:
4904 		case 192:
4905 			block = "TC";
4906 			break;
4907 		case 112:
4908 		case 48:
4909 			block = "CP";
4910 			break;
4911 		case 49:
4912 		case 177:
4913 		case 50:
4914 		case 178:
4915 			block = "SH";
4916 			break;
4917 		case 53:
4918 		case 190:
4919 			block = "VGT";
4920 			break;
4921 		case 117:
4922 			block = "IH";
4923 			break;
4924 		case 51:
4925 		case 115:
4926 			block = "RLC";
4927 			break;
4928 		case 119:
4929 		case 183:
4930 			block = "DMA0";
4931 			break;
4932 		case 61:
4933 			block = "DMA1";
4934 			break;
4935 		case 248:
4936 		case 120:
4937 			block = "HDP";
4938 			break;
4939 		default:
4940 			block = "unknown";
4941 			break;
4942 		}
4943 	} else {
4944 		switch (mc_id) {
4945 		case 32:
4946 		case 16:
4947 		case 96:
4948 		case 80:
4949 		case 160:
4950 		case 144:
4951 		case 224:
4952 		case 208:
4953 			block = "CB";
4954 			break;
4955 		case 33:
4956 		case 17:
4957 		case 97:
4958 		case 81:
4959 		case 161:
4960 		case 145:
4961 		case 225:
4962 		case 209:
4963 			block = "CB_FMASK";
4964 			break;
4965 		case 34:
4966 		case 18:
4967 		case 98:
4968 		case 82:
4969 		case 162:
4970 		case 146:
4971 		case 226:
4972 		case 210:
4973 			block = "CB_CMASK";
4974 			break;
4975 		case 35:
4976 		case 19:
4977 		case 99:
4978 		case 83:
4979 		case 163:
4980 		case 147:
4981 		case 227:
4982 		case 211:
4983 			block = "CB_IMMED";
4984 			break;
4985 		case 36:
4986 		case 20:
4987 		case 100:
4988 		case 84:
4989 		case 164:
4990 		case 148:
4991 		case 228:
4992 		case 212:
4993 			block = "DB";
4994 			break;
4995 		case 37:
4996 		case 21:
4997 		case 101:
4998 		case 85:
4999 		case 165:
5000 		case 149:
5001 		case 229:
5002 		case 213:
5003 			block = "DB_HTILE";
5004 			break;
5005 		case 39:
5006 		case 23:
5007 		case 103:
5008 		case 87:
5009 		case 167:
5010 		case 151:
5011 		case 231:
5012 		case 215:
5013 			block = "DB_STEN";
5014 			break;
5015 		case 72:
5016 		case 68:
5017 		case 8:
5018 		case 4:
5019 		case 136:
5020 		case 132:
5021 		case 200:
5022 		case 196:
5023 			block = "TC";
5024 			break;
5025 		case 112:
5026 		case 48:
5027 			block = "CP";
5028 			break;
5029 		case 49:
5030 		case 177:
5031 		case 50:
5032 		case 178:
5033 			block = "SH";
5034 			break;
5035 		case 53:
5036 			block = "VGT";
5037 			break;
5038 		case 117:
5039 			block = "IH";
5040 			break;
5041 		case 51:
5042 		case 115:
5043 			block = "RLC";
5044 			break;
5045 		case 119:
5046 		case 183:
5047 			block = "DMA0";
5048 			break;
5049 		case 61:
5050 			block = "DMA1";
5051 			break;
5052 		case 248:
5053 		case 120:
5054 			block = "HDP";
5055 			break;
5056 		default:
5057 			block = "unknown";
5058 			break;
5059 		}
5060 	}
5061 
5062 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5063 	       protections, vmid, addr,
5064 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5065 	       block, mc_id);
5066 }
5067 
5068 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5069 		 unsigned vm_id, uint64_t pd_addr)
5070 {
5071 	/* write new base address */
5072 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5073 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5074 				 WRITE_DATA_DST_SEL(0)));
5075 
5076 	if (vm_id < 8) {
5077 		radeon_ring_write(ring,
5078 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5079 	} else {
5080 		radeon_ring_write(ring,
5081 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5082 	}
5083 	radeon_ring_write(ring, 0);
5084 	radeon_ring_write(ring, pd_addr >> 12);
5085 
5086 	/* flush hdp cache */
5087 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5088 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5089 				 WRITE_DATA_DST_SEL(0)));
5090 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5091 	radeon_ring_write(ring, 0);
5092 	radeon_ring_write(ring, 0x1);
5093 
5094 	/* bits 0-15 are the VM contexts0-15 */
5095 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5096 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5097 				 WRITE_DATA_DST_SEL(0)));
5098 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5099 	radeon_ring_write(ring, 0);
5100 	radeon_ring_write(ring, 1 << vm_id);
5101 
5102 	/* wait for the invalidate to complete */
5103 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5104 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5105 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5106 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5107 	radeon_ring_write(ring, 0);
5108 	radeon_ring_write(ring, 0); /* ref */
5109 	radeon_ring_write(ring, 0); /* mask */
5110 	radeon_ring_write(ring, 0x20); /* poll interval */
5111 
5112 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5113 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5114 	radeon_ring_write(ring, 0x0);
5115 }
5116 
5117 /*
5118  *  Power and clock gating
5119  */
5120 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5121 {
5122 	int i;
5123 
5124 	for (i = 0; i < rdev->usec_timeout; i++) {
5125 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5126 			break;
5127 		udelay(1);
5128 	}
5129 
5130 	for (i = 0; i < rdev->usec_timeout; i++) {
5131 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5132 			break;
5133 		udelay(1);
5134 	}
5135 }
5136 
5137 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5138 					 bool enable)
5139 {
5140 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5141 	u32 mask;
5142 	int i;
5143 
5144 	if (enable)
5145 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5146 	else
5147 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5148 	WREG32(CP_INT_CNTL_RING0, tmp);
5149 
5150 	if (!enable) {
5151 		/* read a gfx register */
5152 		tmp = RREG32(DB_DEPTH_INFO);
5153 
5154 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5155 		for (i = 0; i < rdev->usec_timeout; i++) {
5156 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5157 				break;
5158 			udelay(1);
5159 		}
5160 	}
5161 }
5162 
5163 static void si_set_uvd_dcm(struct radeon_device *rdev,
5164 			   bool sw_mode)
5165 {
5166 	u32 tmp, tmp2;
5167 
5168 	tmp = RREG32(UVD_CGC_CTRL);
5169 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5170 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5171 
5172 	if (sw_mode) {
5173 		tmp &= ~0x7ffff800;
5174 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5175 	} else {
5176 		tmp |= 0x7ffff800;
5177 		tmp2 = 0;
5178 	}
5179 
5180 	WREG32(UVD_CGC_CTRL, tmp);
5181 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5182 }
5183 
5184 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5185 {
5186 	bool hw_mode = true;
5187 
5188 	if (hw_mode) {
5189 		si_set_uvd_dcm(rdev, false);
5190 	} else {
5191 		u32 tmp = RREG32(UVD_CGC_CTRL);
5192 		tmp &= ~DCM;
5193 		WREG32(UVD_CGC_CTRL, tmp);
5194 	}
5195 }
5196 
5197 static u32 si_halt_rlc(struct radeon_device *rdev)
5198 {
5199 	u32 data, orig;
5200 
5201 	orig = data = RREG32(RLC_CNTL);
5202 
5203 	if (data & RLC_ENABLE) {
5204 		data &= ~RLC_ENABLE;
5205 		WREG32(RLC_CNTL, data);
5206 
5207 		si_wait_for_rlc_serdes(rdev);
5208 	}
5209 
5210 	return orig;
5211 }
5212 
5213 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5214 {
5215 	u32 tmp;
5216 
5217 	tmp = RREG32(RLC_CNTL);
5218 	if (tmp != rlc)
5219 		WREG32(RLC_CNTL, rlc);
5220 }
5221 
5222 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5223 {
5224 	u32 data, orig;
5225 
5226 	orig = data = RREG32(DMA_PG);
5227 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5228 		data |= PG_CNTL_ENABLE;
5229 	else
5230 		data &= ~PG_CNTL_ENABLE;
5231 	if (orig != data)
5232 		WREG32(DMA_PG, data);
5233 }
5234 
5235 static void si_init_dma_pg(struct radeon_device *rdev)
5236 {
5237 	u32 tmp;
5238 
5239 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5240 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5241 
5242 	for (tmp = 0; tmp < 5; tmp++)
5243 		WREG32(DMA_PGFSM_WRITE, 0);
5244 }
5245 
5246 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5247 			       bool enable)
5248 {
5249 	u32 tmp;
5250 
5251 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5252 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5253 		WREG32(RLC_TTOP_D, tmp);
5254 
5255 		tmp = RREG32(RLC_PG_CNTL);
5256 		tmp |= GFX_PG_ENABLE;
5257 		WREG32(RLC_PG_CNTL, tmp);
5258 
5259 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5260 		tmp |= AUTO_PG_EN;
5261 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5262 	} else {
5263 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5264 		tmp &= ~AUTO_PG_EN;
5265 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5266 
5267 		tmp = RREG32(DB_RENDER_CONTROL);
5268 	}
5269 }
5270 
5271 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5272 {
5273 	u32 tmp;
5274 
5275 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5276 
5277 	tmp = RREG32(RLC_PG_CNTL);
5278 	tmp |= GFX_PG_SRC;
5279 	WREG32(RLC_PG_CNTL, tmp);
5280 
5281 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5282 
5283 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5284 
5285 	tmp &= ~GRBM_REG_SGIT_MASK;
5286 	tmp |= GRBM_REG_SGIT(0x700);
5287 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5288 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5289 }
5290 
5291 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5292 {
5293 	u32 mask = 0, tmp, tmp1;
5294 	int i;
5295 
5296 	si_select_se_sh(rdev, se, sh);
5297 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5298 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5299 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5300 
5301 	tmp &= 0xffff0000;
5302 
5303 	tmp |= tmp1;
5304 	tmp >>= 16;
5305 
5306 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5307 		mask <<= 1;
5308 		mask |= 1;
5309 	}
5310 
5311 	return (~tmp) & mask;
5312 }
5313 
5314 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5315 {
5316 	u32 i, j, k, active_cu_number = 0;
5317 	u32 mask, counter, cu_bitmap;
5318 	u32 tmp = 0;
5319 
5320 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5321 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5322 			mask = 1;
5323 			cu_bitmap = 0;
5324 			counter  = 0;
5325 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5326 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5327 					if (counter < 2)
5328 						cu_bitmap |= mask;
5329 					counter++;
5330 				}
5331 				mask <<= 1;
5332 			}
5333 
5334 			active_cu_number += counter;
5335 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5336 		}
5337 	}
5338 
5339 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5340 
5341 	tmp = RREG32(RLC_MAX_PG_CU);
5342 	tmp &= ~MAX_PU_CU_MASK;
5343 	tmp |= MAX_PU_CU(active_cu_number);
5344 	WREG32(RLC_MAX_PG_CU, tmp);
5345 }
5346 
5347 static void si_enable_cgcg(struct radeon_device *rdev,
5348 			   bool enable)
5349 {
5350 	u32 data, orig, tmp;
5351 
5352 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5353 
5354 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5355 		si_enable_gui_idle_interrupt(rdev, true);
5356 
5357 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5358 
5359 		tmp = si_halt_rlc(rdev);
5360 
5361 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5362 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5363 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5364 
5365 		si_wait_for_rlc_serdes(rdev);
5366 
5367 		si_update_rlc(rdev, tmp);
5368 
5369 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5370 
5371 		data |= CGCG_EN | CGLS_EN;
5372 	} else {
5373 		si_enable_gui_idle_interrupt(rdev, false);
5374 
5375 		RREG32(CB_CGTT_SCLK_CTRL);
5376 		RREG32(CB_CGTT_SCLK_CTRL);
5377 		RREG32(CB_CGTT_SCLK_CTRL);
5378 		RREG32(CB_CGTT_SCLK_CTRL);
5379 
5380 		data &= ~(CGCG_EN | CGLS_EN);
5381 	}
5382 
5383 	if (orig != data)
5384 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5385 }
5386 
5387 static void si_enable_mgcg(struct radeon_device *rdev,
5388 			   bool enable)
5389 {
5390 	u32 data, orig, tmp = 0;
5391 
5392 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5393 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5394 		data = 0x96940200;
5395 		if (orig != data)
5396 			WREG32(CGTS_SM_CTRL_REG, data);
5397 
5398 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5399 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5400 			data |= CP_MEM_LS_EN;
5401 			if (orig != data)
5402 				WREG32(CP_MEM_SLP_CNTL, data);
5403 		}
5404 
5405 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5406 		data &= 0xffffffc0;
5407 		if (orig != data)
5408 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5409 
5410 		tmp = si_halt_rlc(rdev);
5411 
5412 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5413 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5414 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5415 
5416 		si_update_rlc(rdev, tmp);
5417 	} else {
5418 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5419 		data |= 0x00000003;
5420 		if (orig != data)
5421 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5422 
5423 		data = RREG32(CP_MEM_SLP_CNTL);
5424 		if (data & CP_MEM_LS_EN) {
5425 			data &= ~CP_MEM_LS_EN;
5426 			WREG32(CP_MEM_SLP_CNTL, data);
5427 		}
5428 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5429 		data |= LS_OVERRIDE | OVERRIDE;
5430 		if (orig != data)
5431 			WREG32(CGTS_SM_CTRL_REG, data);
5432 
5433 		tmp = si_halt_rlc(rdev);
5434 
5435 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5436 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5437 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5438 
5439 		si_update_rlc(rdev, tmp);
5440 	}
5441 }
5442 
5443 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5444 			       bool enable)
5445 {
5446 	u32 orig, data, tmp;
5447 
5448 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5449 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5450 		tmp |= 0x3fff;
5451 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5452 
5453 		orig = data = RREG32(UVD_CGC_CTRL);
5454 		data |= DCM;
5455 		if (orig != data)
5456 			WREG32(UVD_CGC_CTRL, data);
5457 
5458 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5459 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5460 	} else {
5461 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5462 		tmp &= ~0x3fff;
5463 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5464 
5465 		orig = data = RREG32(UVD_CGC_CTRL);
5466 		data &= ~DCM;
5467 		if (orig != data)
5468 			WREG32(UVD_CGC_CTRL, data);
5469 
5470 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5471 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5472 	}
5473 }
5474 
5475 static const u32 mc_cg_registers[] =
5476 {
5477 	MC_HUB_MISC_HUB_CG,
5478 	MC_HUB_MISC_SIP_CG,
5479 	MC_HUB_MISC_VM_CG,
5480 	MC_XPB_CLK_GAT,
5481 	ATC_MISC_CG,
5482 	MC_CITF_MISC_WR_CG,
5483 	MC_CITF_MISC_RD_CG,
5484 	MC_CITF_MISC_VM_CG,
5485 	VM_L2_CG,
5486 };
5487 
5488 static void si_enable_mc_ls(struct radeon_device *rdev,
5489 			    bool enable)
5490 {
5491 	int i;
5492 	u32 orig, data;
5493 
5494 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5495 		orig = data = RREG32(mc_cg_registers[i]);
5496 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5497 			data |= MC_LS_ENABLE;
5498 		else
5499 			data &= ~MC_LS_ENABLE;
5500 		if (data != orig)
5501 			WREG32(mc_cg_registers[i], data);
5502 	}
5503 }
5504 
5505 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5506 			       bool enable)
5507 {
5508 	int i;
5509 	u32 orig, data;
5510 
5511 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5512 		orig = data = RREG32(mc_cg_registers[i]);
5513 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5514 			data |= MC_CG_ENABLE;
5515 		else
5516 			data &= ~MC_CG_ENABLE;
5517 		if (data != orig)
5518 			WREG32(mc_cg_registers[i], data);
5519 	}
5520 }
5521 
5522 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5523 			       bool enable)
5524 {
5525 	u32 orig, data, offset;
5526 	int i;
5527 
5528 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5529 		for (i = 0; i < 2; i++) {
5530 			if (i == 0)
5531 				offset = DMA0_REGISTER_OFFSET;
5532 			else
5533 				offset = DMA1_REGISTER_OFFSET;
5534 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5535 			data &= ~MEM_POWER_OVERRIDE;
5536 			if (data != orig)
5537 				WREG32(DMA_POWER_CNTL + offset, data);
5538 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5539 		}
5540 	} else {
5541 		for (i = 0; i < 2; i++) {
5542 			if (i == 0)
5543 				offset = DMA0_REGISTER_OFFSET;
5544 			else
5545 				offset = DMA1_REGISTER_OFFSET;
5546 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5547 			data |= MEM_POWER_OVERRIDE;
5548 			if (data != orig)
5549 				WREG32(DMA_POWER_CNTL + offset, data);
5550 
5551 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5552 			data = 0xff000000;
5553 			if (data != orig)
5554 				WREG32(DMA_CLK_CTRL + offset, data);
5555 		}
5556 	}
5557 }
5558 
5559 static void si_enable_bif_mgls(struct radeon_device *rdev,
5560 			       bool enable)
5561 {
5562 	u32 orig, data;
5563 
5564 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5565 
5566 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5567 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5568 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5569 	else
5570 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5571 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5572 
5573 	if (orig != data)
5574 		WREG32_PCIE(PCIE_CNTL2, data);
5575 }
5576 
5577 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5578 			       bool enable)
5579 {
5580 	u32 orig, data;
5581 
5582 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5583 
5584 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5585 		data &= ~CLOCK_GATING_DIS;
5586 	else
5587 		data |= CLOCK_GATING_DIS;
5588 
5589 	if (orig != data)
5590 		WREG32(HDP_HOST_PATH_CNTL, data);
5591 }
5592 
5593 static void si_enable_hdp_ls(struct radeon_device *rdev,
5594 			     bool enable)
5595 {
5596 	u32 orig, data;
5597 
5598 	orig = data = RREG32(HDP_MEM_POWER_LS);
5599 
5600 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5601 		data |= HDP_LS_ENABLE;
5602 	else
5603 		data &= ~HDP_LS_ENABLE;
5604 
5605 	if (orig != data)
5606 		WREG32(HDP_MEM_POWER_LS, data);
5607 }
5608 
5609 static void si_update_cg(struct radeon_device *rdev,
5610 			 u32 block, bool enable)
5611 {
5612 	if (block & RADEON_CG_BLOCK_GFX) {
5613 		si_enable_gui_idle_interrupt(rdev, false);
5614 		/* order matters! */
5615 		if (enable) {
5616 			si_enable_mgcg(rdev, true);
5617 			si_enable_cgcg(rdev, true);
5618 		} else {
5619 			si_enable_cgcg(rdev, false);
5620 			si_enable_mgcg(rdev, false);
5621 		}
5622 		si_enable_gui_idle_interrupt(rdev, true);
5623 	}
5624 
5625 	if (block & RADEON_CG_BLOCK_MC) {
5626 		si_enable_mc_mgcg(rdev, enable);
5627 		si_enable_mc_ls(rdev, enable);
5628 	}
5629 
5630 	if (block & RADEON_CG_BLOCK_SDMA) {
5631 		si_enable_dma_mgcg(rdev, enable);
5632 	}
5633 
5634 	if (block & RADEON_CG_BLOCK_BIF) {
5635 		si_enable_bif_mgls(rdev, enable);
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_UVD) {
5639 		if (rdev->has_uvd) {
5640 			si_enable_uvd_mgcg(rdev, enable);
5641 		}
5642 	}
5643 
5644 	if (block & RADEON_CG_BLOCK_HDP) {
5645 		si_enable_hdp_mgcg(rdev, enable);
5646 		si_enable_hdp_ls(rdev, enable);
5647 	}
5648 }
5649 
5650 static void si_init_cg(struct radeon_device *rdev)
5651 {
5652 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5653 			    RADEON_CG_BLOCK_MC |
5654 			    RADEON_CG_BLOCK_SDMA |
5655 			    RADEON_CG_BLOCK_BIF |
5656 			    RADEON_CG_BLOCK_HDP), true);
5657 	if (rdev->has_uvd) {
5658 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5659 		si_init_uvd_internal_cg(rdev);
5660 	}
5661 }
5662 
5663 static void si_fini_cg(struct radeon_device *rdev)
5664 {
5665 	if (rdev->has_uvd) {
5666 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5667 	}
5668 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5669 			    RADEON_CG_BLOCK_MC |
5670 			    RADEON_CG_BLOCK_SDMA |
5671 			    RADEON_CG_BLOCK_BIF |
5672 			    RADEON_CG_BLOCK_HDP), false);
5673 }
5674 
5675 u32 si_get_csb_size(struct radeon_device *rdev)
5676 {
5677 	u32 count = 0;
5678 	const struct cs_section_def *sect = NULL;
5679 	const struct cs_extent_def *ext = NULL;
5680 
5681 	if (rdev->rlc.cs_data == NULL)
5682 		return 0;
5683 
5684 	/* begin clear state */
5685 	count += 2;
5686 	/* context control state */
5687 	count += 3;
5688 
5689 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5690 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5691 			if (sect->id == SECT_CONTEXT)
5692 				count += 2 + ext->reg_count;
5693 			else
5694 				return 0;
5695 		}
5696 	}
5697 	/* pa_sc_raster_config */
5698 	count += 3;
5699 	/* end clear state */
5700 	count += 2;
5701 	/* clear state */
5702 	count += 2;
5703 
5704 	return count;
5705 }
5706 
5707 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5708 {
5709 	u32 count = 0, i;
5710 	const struct cs_section_def *sect = NULL;
5711 	const struct cs_extent_def *ext = NULL;
5712 
5713 	if (rdev->rlc.cs_data == NULL)
5714 		return;
5715 	if (buffer == NULL)
5716 		return;
5717 
5718 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5719 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5720 
5721 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5722 	buffer[count++] = cpu_to_le32(0x80000000);
5723 	buffer[count++] = cpu_to_le32(0x80000000);
5724 
5725 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5726 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5727 			if (sect->id == SECT_CONTEXT) {
5728 				buffer[count++] =
5729 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5730 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5731 				for (i = 0; i < ext->reg_count; i++)
5732 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5733 			} else {
5734 				return;
5735 			}
5736 		}
5737 	}
5738 
5739 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5740 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5741 	switch (rdev->family) {
5742 	case CHIP_TAHITI:
5743 	case CHIP_PITCAIRN:
5744 		buffer[count++] = cpu_to_le32(0x2a00126a);
5745 		break;
5746 	case CHIP_VERDE:
5747 		buffer[count++] = cpu_to_le32(0x0000124a);
5748 		break;
5749 	case CHIP_OLAND:
5750 		buffer[count++] = cpu_to_le32(0x00000082);
5751 		break;
5752 	case CHIP_HAINAN:
5753 		buffer[count++] = cpu_to_le32(0x00000000);
5754 		break;
5755 	default:
5756 		buffer[count++] = cpu_to_le32(0x00000000);
5757 		break;
5758 	}
5759 
5760 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5761 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5762 
5763 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5764 	buffer[count++] = cpu_to_le32(0);
5765 }
5766 
5767 static void si_init_pg(struct radeon_device *rdev)
5768 {
5769 	if (rdev->pg_flags) {
5770 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5771 			si_init_dma_pg(rdev);
5772 		}
5773 		si_init_ao_cu_mask(rdev);
5774 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5775 			si_init_gfx_cgpg(rdev);
5776 		} else {
5777 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5778 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5779 		}
5780 		si_enable_dma_pg(rdev, true);
5781 		si_enable_gfx_cgpg(rdev, true);
5782 	} else {
5783 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5784 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5785 	}
5786 }
5787 
5788 static void si_fini_pg(struct radeon_device *rdev)
5789 {
5790 	if (rdev->pg_flags) {
5791 		si_enable_dma_pg(rdev, false);
5792 		si_enable_gfx_cgpg(rdev, false);
5793 	}
5794 }
5795 
5796 /*
5797  * RLC
5798  */
5799 void si_rlc_reset(struct radeon_device *rdev)
5800 {
5801 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5802 
5803 	tmp |= SOFT_RESET_RLC;
5804 	WREG32(GRBM_SOFT_RESET, tmp);
5805 	udelay(50);
5806 	tmp &= ~SOFT_RESET_RLC;
5807 	WREG32(GRBM_SOFT_RESET, tmp);
5808 	udelay(50);
5809 }
5810 
5811 static void si_rlc_stop(struct radeon_device *rdev)
5812 {
5813 	WREG32(RLC_CNTL, 0);
5814 
5815 	si_enable_gui_idle_interrupt(rdev, false);
5816 
5817 	si_wait_for_rlc_serdes(rdev);
5818 }
5819 
5820 static void si_rlc_start(struct radeon_device *rdev)
5821 {
5822 	WREG32(RLC_CNTL, RLC_ENABLE);
5823 
5824 	si_enable_gui_idle_interrupt(rdev, true);
5825 
5826 	udelay(50);
5827 }
5828 
5829 static bool si_lbpw_supported(struct radeon_device *rdev)
5830 {
5831 	u32 tmp;
5832 
5833 	/* Enable LBPW only for DDR3 */
5834 	tmp = RREG32(MC_SEQ_MISC0);
5835 	if ((tmp & 0xF0000000) == 0xB0000000)
5836 		return true;
5837 	return false;
5838 }
5839 
5840 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5841 {
5842 	u32 tmp;
5843 
5844 	tmp = RREG32(RLC_LB_CNTL);
5845 	if (enable)
5846 		tmp |= LOAD_BALANCE_ENABLE;
5847 	else
5848 		tmp &= ~LOAD_BALANCE_ENABLE;
5849 	WREG32(RLC_LB_CNTL, tmp);
5850 
5851 	if (!enable) {
5852 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5853 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5854 	}
5855 }
5856 
5857 static int si_rlc_resume(struct radeon_device *rdev)
5858 {
5859 	u32 i;
5860 
5861 	if (!rdev->rlc_fw)
5862 		return -EINVAL;
5863 
5864 	si_rlc_stop(rdev);
5865 
5866 	si_rlc_reset(rdev);
5867 
5868 	si_init_pg(rdev);
5869 
5870 	si_init_cg(rdev);
5871 
5872 	WREG32(RLC_RL_BASE, 0);
5873 	WREG32(RLC_RL_SIZE, 0);
5874 	WREG32(RLC_LB_CNTL, 0);
5875 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5876 	WREG32(RLC_LB_CNTR_INIT, 0);
5877 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5878 
5879 	WREG32(RLC_MC_CNTL, 0);
5880 	WREG32(RLC_UCODE_CNTL, 0);
5881 
5882 	if (rdev->new_fw) {
5883 		const struct rlc_firmware_header_v1_0 *hdr =
5884 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5885 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5886 		const __le32 *fw_data = (const __le32 *)
5887 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5888 
5889 		radeon_ucode_print_rlc_hdr(&hdr->header);
5890 
5891 		for (i = 0; i < fw_size; i++) {
5892 			WREG32(RLC_UCODE_ADDR, i);
5893 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5894 		}
5895 	} else {
5896 		const __be32 *fw_data =
5897 			(const __be32 *)rdev->rlc_fw->data;
5898 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5899 			WREG32(RLC_UCODE_ADDR, i);
5900 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5901 		}
5902 	}
5903 	WREG32(RLC_UCODE_ADDR, 0);
5904 
5905 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5906 
5907 	si_rlc_start(rdev);
5908 
5909 	return 0;
5910 }
5911 
5912 static void si_enable_interrupts(struct radeon_device *rdev)
5913 {
5914 	u32 ih_cntl = RREG32(IH_CNTL);
5915 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5916 
5917 	ih_cntl |= ENABLE_INTR;
5918 	ih_rb_cntl |= IH_RB_ENABLE;
5919 	WREG32(IH_CNTL, ih_cntl);
5920 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5921 	rdev->ih.enabled = true;
5922 }
5923 
5924 static void si_disable_interrupts(struct radeon_device *rdev)
5925 {
5926 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5927 	u32 ih_cntl = RREG32(IH_CNTL);
5928 
5929 	ih_rb_cntl &= ~IH_RB_ENABLE;
5930 	ih_cntl &= ~ENABLE_INTR;
5931 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5932 	WREG32(IH_CNTL, ih_cntl);
5933 	/* set rptr, wptr to 0 */
5934 	WREG32(IH_RB_RPTR, 0);
5935 	WREG32(IH_RB_WPTR, 0);
5936 	rdev->ih.enabled = false;
5937 	rdev->ih.rptr = 0;
5938 }
5939 
5940 static void si_disable_interrupt_state(struct radeon_device *rdev)
5941 {
5942 	u32 tmp;
5943 
5944 	tmp = RREG32(CP_INT_CNTL_RING0) &
5945 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5946 	WREG32(CP_INT_CNTL_RING0, tmp);
5947 	WREG32(CP_INT_CNTL_RING1, 0);
5948 	WREG32(CP_INT_CNTL_RING2, 0);
5949 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5950 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5951 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5952 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5953 	WREG32(GRBM_INT_CNTL, 0);
5954 	WREG32(SRBM_INT_CNTL, 0);
5955 	if (rdev->num_crtc >= 2) {
5956 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5957 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5958 	}
5959 	if (rdev->num_crtc >= 4) {
5960 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5961 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5962 	}
5963 	if (rdev->num_crtc >= 6) {
5964 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5965 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5966 	}
5967 
5968 	if (rdev->num_crtc >= 2) {
5969 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5970 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5971 	}
5972 	if (rdev->num_crtc >= 4) {
5973 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5974 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5975 	}
5976 	if (rdev->num_crtc >= 6) {
5977 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5978 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5979 	}
5980 
5981 	if (!ASIC_IS_NODCE(rdev)) {
5982 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5983 
5984 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5986 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5988 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5989 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5990 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5991 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5992 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5993 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5994 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5995 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5996 	}
5997 }
5998 
5999 static int si_irq_init(struct radeon_device *rdev)
6000 {
6001 	int ret = 0;
6002 	int rb_bufsz;
6003 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6004 
6005 	/* allocate ring */
6006 	ret = r600_ih_ring_alloc(rdev);
6007 	if (ret)
6008 		return ret;
6009 
6010 	/* disable irqs */
6011 	si_disable_interrupts(rdev);
6012 
6013 	/* init rlc */
6014 	ret = si_rlc_resume(rdev);
6015 	if (ret) {
6016 		r600_ih_ring_fini(rdev);
6017 		return ret;
6018 	}
6019 
6020 	/* setup interrupt control */
6021 	/* set dummy read address to ring address */
6022 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6023 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6024 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6025 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6026 	 */
6027 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6028 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6029 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6030 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6031 
6032 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6033 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6034 
6035 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6036 		      IH_WPTR_OVERFLOW_CLEAR |
6037 		      (rb_bufsz << 1));
6038 
6039 	if (rdev->wb.enabled)
6040 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6041 
6042 	/* set the writeback address whether it's enabled or not */
6043 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6044 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6045 
6046 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6047 
6048 	/* set rptr, wptr to 0 */
6049 	WREG32(IH_RB_RPTR, 0);
6050 	WREG32(IH_RB_WPTR, 0);
6051 
6052 	/* Default settings for IH_CNTL (disabled at first) */
6053 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6054 	/* RPTR_REARM only works if msi's are enabled */
6055 	if (rdev->msi_enabled)
6056 		ih_cntl |= RPTR_REARM;
6057 	WREG32(IH_CNTL, ih_cntl);
6058 
6059 	/* force the active interrupt state to all disabled */
6060 	si_disable_interrupt_state(rdev);
6061 
6062 	pci_set_master(rdev->pdev);
6063 
6064 	/* enable irqs */
6065 	si_enable_interrupts(rdev);
6066 
6067 	return ret;
6068 }
6069 
6070 int si_irq_set(struct radeon_device *rdev)
6071 {
6072 	u32 cp_int_cntl;
6073 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6074 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6075 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6076 	u32 grbm_int_cntl = 0;
6077 	u32 dma_cntl, dma_cntl1;
6078 	u32 thermal_int = 0;
6079 
6080 	if (!rdev->irq.installed) {
6081 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6082 		return -EINVAL;
6083 	}
6084 	/* don't enable anything if the ih is disabled */
6085 	if (!rdev->ih.enabled) {
6086 		si_disable_interrupts(rdev);
6087 		/* force the active interrupt state to all disabled */
6088 		si_disable_interrupt_state(rdev);
6089 		return 0;
6090 	}
6091 
6092 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6093 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6094 
6095 	if (!ASIC_IS_NODCE(rdev)) {
6096 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6097 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6098 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6099 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6100 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6101 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6102 	}
6103 
6104 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6105 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6106 
6107 	thermal_int = RREG32(CG_THERMAL_INT) &
6108 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6109 
6110 	/* enable CP interrupts on all rings */
6111 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6112 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6113 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6114 	}
6115 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6116 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6117 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6118 	}
6119 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6120 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6121 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6122 	}
6123 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6124 		DRM_DEBUG("si_irq_set: sw int dma\n");
6125 		dma_cntl |= TRAP_ENABLE;
6126 	}
6127 
6128 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6129 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6130 		dma_cntl1 |= TRAP_ENABLE;
6131 	}
6132 	if (rdev->irq.crtc_vblank_int[0] ||
6133 	    atomic_read(&rdev->irq.pflip[0])) {
6134 		DRM_DEBUG("si_irq_set: vblank 0\n");
6135 		crtc1 |= VBLANK_INT_MASK;
6136 	}
6137 	if (rdev->irq.crtc_vblank_int[1] ||
6138 	    atomic_read(&rdev->irq.pflip[1])) {
6139 		DRM_DEBUG("si_irq_set: vblank 1\n");
6140 		crtc2 |= VBLANK_INT_MASK;
6141 	}
6142 	if (rdev->irq.crtc_vblank_int[2] ||
6143 	    atomic_read(&rdev->irq.pflip[2])) {
6144 		DRM_DEBUG("si_irq_set: vblank 2\n");
6145 		crtc3 |= VBLANK_INT_MASK;
6146 	}
6147 	if (rdev->irq.crtc_vblank_int[3] ||
6148 	    atomic_read(&rdev->irq.pflip[3])) {
6149 		DRM_DEBUG("si_irq_set: vblank 3\n");
6150 		crtc4 |= VBLANK_INT_MASK;
6151 	}
6152 	if (rdev->irq.crtc_vblank_int[4] ||
6153 	    atomic_read(&rdev->irq.pflip[4])) {
6154 		DRM_DEBUG("si_irq_set: vblank 4\n");
6155 		crtc5 |= VBLANK_INT_MASK;
6156 	}
6157 	if (rdev->irq.crtc_vblank_int[5] ||
6158 	    atomic_read(&rdev->irq.pflip[5])) {
6159 		DRM_DEBUG("si_irq_set: vblank 5\n");
6160 		crtc6 |= VBLANK_INT_MASK;
6161 	}
6162 	if (rdev->irq.hpd[0]) {
6163 		DRM_DEBUG("si_irq_set: hpd 1\n");
6164 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6165 	}
6166 	if (rdev->irq.hpd[1]) {
6167 		DRM_DEBUG("si_irq_set: hpd 2\n");
6168 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6169 	}
6170 	if (rdev->irq.hpd[2]) {
6171 		DRM_DEBUG("si_irq_set: hpd 3\n");
6172 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6173 	}
6174 	if (rdev->irq.hpd[3]) {
6175 		DRM_DEBUG("si_irq_set: hpd 4\n");
6176 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6177 	}
6178 	if (rdev->irq.hpd[4]) {
6179 		DRM_DEBUG("si_irq_set: hpd 5\n");
6180 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6181 	}
6182 	if (rdev->irq.hpd[5]) {
6183 		DRM_DEBUG("si_irq_set: hpd 6\n");
6184 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6185 	}
6186 
6187 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6188 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6189 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6190 
6191 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6192 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6193 
6194 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6195 
6196 	if (rdev->irq.dpm_thermal) {
6197 		DRM_DEBUG("dpm thermal\n");
6198 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6199 	}
6200 
6201 	if (rdev->num_crtc >= 2) {
6202 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6203 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6204 	}
6205 	if (rdev->num_crtc >= 4) {
6206 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6207 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6208 	}
6209 	if (rdev->num_crtc >= 6) {
6210 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6211 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6212 	}
6213 
6214 	if (rdev->num_crtc >= 2) {
6215 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6216 		       GRPH_PFLIP_INT_MASK);
6217 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6218 		       GRPH_PFLIP_INT_MASK);
6219 	}
6220 	if (rdev->num_crtc >= 4) {
6221 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6222 		       GRPH_PFLIP_INT_MASK);
6223 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6224 		       GRPH_PFLIP_INT_MASK);
6225 	}
6226 	if (rdev->num_crtc >= 6) {
6227 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6228 		       GRPH_PFLIP_INT_MASK);
6229 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6230 		       GRPH_PFLIP_INT_MASK);
6231 	}
6232 
6233 	if (!ASIC_IS_NODCE(rdev)) {
6234 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6235 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6236 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6237 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6238 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6239 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6240 	}
6241 
6242 	WREG32(CG_THERMAL_INT, thermal_int);
6243 
6244 	/* posting read */
6245 	RREG32(SRBM_STATUS);
6246 
6247 	return 0;
6248 }
6249 
6250 static inline void si_irq_ack(struct radeon_device *rdev)
6251 {
6252 	u32 tmp;
6253 
6254 	if (ASIC_IS_NODCE(rdev))
6255 		return;
6256 
6257 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6258 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6259 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6260 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6261 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6262 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6263 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6264 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6265 	if (rdev->num_crtc >= 4) {
6266 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6267 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6268 	}
6269 	if (rdev->num_crtc >= 6) {
6270 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6271 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6272 	}
6273 
6274 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6275 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6276 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6277 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6278 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6279 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6280 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6281 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6282 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6283 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6284 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6285 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6286 
6287 	if (rdev->num_crtc >= 4) {
6288 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6289 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6290 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6291 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6292 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6293 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6294 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6295 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6296 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6297 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6298 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6299 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6300 	}
6301 
6302 	if (rdev->num_crtc >= 6) {
6303 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6304 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6305 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6306 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6307 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6308 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6309 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6310 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6311 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6312 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6313 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6314 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6315 	}
6316 
6317 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6318 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6319 		tmp |= DC_HPDx_INT_ACK;
6320 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6321 	}
6322 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6323 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6324 		tmp |= DC_HPDx_INT_ACK;
6325 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6326 	}
6327 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6328 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6329 		tmp |= DC_HPDx_INT_ACK;
6330 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6331 	}
6332 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6333 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6334 		tmp |= DC_HPDx_INT_ACK;
6335 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6336 	}
6337 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6338 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6339 		tmp |= DC_HPDx_INT_ACK;
6340 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6341 	}
6342 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6343 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6344 		tmp |= DC_HPDx_INT_ACK;
6345 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6346 	}
6347 
6348 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6349 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6350 		tmp |= DC_HPDx_RX_INT_ACK;
6351 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6352 	}
6353 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6354 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6355 		tmp |= DC_HPDx_RX_INT_ACK;
6356 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6357 	}
6358 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6359 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6360 		tmp |= DC_HPDx_RX_INT_ACK;
6361 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6362 	}
6363 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6364 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6365 		tmp |= DC_HPDx_RX_INT_ACK;
6366 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6367 	}
6368 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6369 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6370 		tmp |= DC_HPDx_RX_INT_ACK;
6371 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6372 	}
6373 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6374 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6375 		tmp |= DC_HPDx_RX_INT_ACK;
6376 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6377 	}
6378 }
6379 
6380 static void si_irq_disable(struct radeon_device *rdev)
6381 {
6382 	si_disable_interrupts(rdev);
6383 	/* Wait and acknowledge irq */
6384 	mdelay(1);
6385 	si_irq_ack(rdev);
6386 	si_disable_interrupt_state(rdev);
6387 }
6388 
6389 static void si_irq_suspend(struct radeon_device *rdev)
6390 {
6391 	si_irq_disable(rdev);
6392 	si_rlc_stop(rdev);
6393 }
6394 
6395 static void si_irq_fini(struct radeon_device *rdev)
6396 {
6397 	si_irq_suspend(rdev);
6398 	r600_ih_ring_fini(rdev);
6399 }
6400 
6401 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6402 {
6403 	u32 wptr, tmp;
6404 
6405 	if (rdev->wb.enabled)
6406 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6407 	else
6408 		wptr = RREG32(IH_RB_WPTR);
6409 
6410 	if (wptr & RB_OVERFLOW) {
6411 		wptr &= ~RB_OVERFLOW;
6412 		/* When a ring buffer overflow happen start parsing interrupt
6413 		 * from the last not overwritten vector (wptr + 16). Hopefully
6414 		 * this should allow us to catchup.
6415 		 */
6416 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6417 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6418 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6419 		tmp = RREG32(IH_RB_CNTL);
6420 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6421 		WREG32(IH_RB_CNTL, tmp);
6422 	}
6423 	return (wptr & rdev->ih.ptr_mask);
6424 }
6425 
6426 /*        SI IV Ring
6427  * Each IV ring entry is 128 bits:
6428  * [7:0]    - interrupt source id
6429  * [31:8]   - reserved
6430  * [59:32]  - interrupt source data
6431  * [63:60]  - reserved
6432  * [71:64]  - RINGID
6433  * [79:72]  - VMID
6434  * [127:80] - reserved
6435  */
6436 int si_irq_process(struct radeon_device *rdev)
6437 {
6438 	u32 wptr;
6439 	u32 rptr;
6440 	u32 src_id, src_data, ring_id;
6441 	u32 ring_index;
6442 	bool queue_hotplug = false;
6443 	bool queue_dp = false;
6444 	bool queue_thermal = false;
6445 	u32 status, addr;
6446 
6447 	if (!rdev->ih.enabled || rdev->shutdown)
6448 		return IRQ_NONE;
6449 
6450 	wptr = si_get_ih_wptr(rdev);
6451 
6452 restart_ih:
6453 	/* is somebody else already processing irqs? */
6454 	if (atomic_xchg(&rdev->ih.lock, 1))
6455 		return IRQ_NONE;
6456 
6457 	rptr = rdev->ih.rptr;
6458 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6459 
6460 	/* Order reading of wptr vs. reading of IH ring data */
6461 	rmb();
6462 
6463 	/* display interrupts */
6464 	si_irq_ack(rdev);
6465 
6466 	while (rptr != wptr) {
6467 		/* wptr/rptr are in bytes! */
6468 		ring_index = rptr / 4;
6469 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6470 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6471 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6472 
6473 		switch (src_id) {
6474 		case 1: /* D1 vblank/vline */
6475 			switch (src_data) {
6476 			case 0: /* D1 vblank */
6477 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6478 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6479 
6480 				if (rdev->irq.crtc_vblank_int[0]) {
6481 					drm_handle_vblank(rdev->ddev, 0);
6482 					rdev->pm.vblank_sync = true;
6483 					wake_up(&rdev->irq.vblank_queue);
6484 				}
6485 				if (atomic_read(&rdev->irq.pflip[0]))
6486 					radeon_crtc_handle_vblank(rdev, 0);
6487 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6488 				DRM_DEBUG("IH: D1 vblank\n");
6489 
6490 				break;
6491 			case 1: /* D1 vline */
6492 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6493 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6494 
6495 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6496 				DRM_DEBUG("IH: D1 vline\n");
6497 
6498 				break;
6499 			default:
6500 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6501 				break;
6502 			}
6503 			break;
6504 		case 2: /* D2 vblank/vline */
6505 			switch (src_data) {
6506 			case 0: /* D2 vblank */
6507 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6508 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6509 
6510 				if (rdev->irq.crtc_vblank_int[1]) {
6511 					drm_handle_vblank(rdev->ddev, 1);
6512 					rdev->pm.vblank_sync = true;
6513 					wake_up(&rdev->irq.vblank_queue);
6514 				}
6515 				if (atomic_read(&rdev->irq.pflip[1]))
6516 					radeon_crtc_handle_vblank(rdev, 1);
6517 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6518 				DRM_DEBUG("IH: D2 vblank\n");
6519 
6520 				break;
6521 			case 1: /* D2 vline */
6522 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6523 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6524 
6525 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6526 				DRM_DEBUG("IH: D2 vline\n");
6527 
6528 				break;
6529 			default:
6530 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6531 				break;
6532 			}
6533 			break;
6534 		case 3: /* D3 vblank/vline */
6535 			switch (src_data) {
6536 			case 0: /* D3 vblank */
6537 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6538 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6539 
6540 				if (rdev->irq.crtc_vblank_int[2]) {
6541 					drm_handle_vblank(rdev->ddev, 2);
6542 					rdev->pm.vblank_sync = true;
6543 					wake_up(&rdev->irq.vblank_queue);
6544 				}
6545 				if (atomic_read(&rdev->irq.pflip[2]))
6546 					radeon_crtc_handle_vblank(rdev, 2);
6547 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6548 				DRM_DEBUG("IH: D3 vblank\n");
6549 
6550 				break;
6551 			case 1: /* D3 vline */
6552 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6553 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6554 
6555 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6556 				DRM_DEBUG("IH: D3 vline\n");
6557 
6558 				break;
6559 			default:
6560 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6561 				break;
6562 			}
6563 			break;
6564 		case 4: /* D4 vblank/vline */
6565 			switch (src_data) {
6566 			case 0: /* D4 vblank */
6567 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6568 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6569 
6570 				if (rdev->irq.crtc_vblank_int[3]) {
6571 					drm_handle_vblank(rdev->ddev, 3);
6572 					rdev->pm.vblank_sync = true;
6573 					wake_up(&rdev->irq.vblank_queue);
6574 				}
6575 				if (atomic_read(&rdev->irq.pflip[3]))
6576 					radeon_crtc_handle_vblank(rdev, 3);
6577 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6578 				DRM_DEBUG("IH: D4 vblank\n");
6579 
6580 				break;
6581 			case 1: /* D4 vline */
6582 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6583 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6584 
6585 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6586 				DRM_DEBUG("IH: D4 vline\n");
6587 
6588 				break;
6589 			default:
6590 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6591 				break;
6592 			}
6593 			break;
6594 		case 5: /* D5 vblank/vline */
6595 			switch (src_data) {
6596 			case 0: /* D5 vblank */
6597 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6598 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6599 
6600 				if (rdev->irq.crtc_vblank_int[4]) {
6601 					drm_handle_vblank(rdev->ddev, 4);
6602 					rdev->pm.vblank_sync = true;
6603 					wake_up(&rdev->irq.vblank_queue);
6604 				}
6605 				if (atomic_read(&rdev->irq.pflip[4]))
6606 					radeon_crtc_handle_vblank(rdev, 4);
6607 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6608 				DRM_DEBUG("IH: D5 vblank\n");
6609 
6610 				break;
6611 			case 1: /* D5 vline */
6612 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6613 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6614 
6615 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6616 				DRM_DEBUG("IH: D5 vline\n");
6617 
6618 				break;
6619 			default:
6620 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6621 				break;
6622 			}
6623 			break;
6624 		case 6: /* D6 vblank/vline */
6625 			switch (src_data) {
6626 			case 0: /* D6 vblank */
6627 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6628 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6629 
6630 				if (rdev->irq.crtc_vblank_int[5]) {
6631 					drm_handle_vblank(rdev->ddev, 5);
6632 					rdev->pm.vblank_sync = true;
6633 					wake_up(&rdev->irq.vblank_queue);
6634 				}
6635 				if (atomic_read(&rdev->irq.pflip[5]))
6636 					radeon_crtc_handle_vblank(rdev, 5);
6637 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6638 				DRM_DEBUG("IH: D6 vblank\n");
6639 
6640 				break;
6641 			case 1: /* D6 vline */
6642 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6643 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6644 
6645 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6646 				DRM_DEBUG("IH: D6 vline\n");
6647 
6648 				break;
6649 			default:
6650 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6651 				break;
6652 			}
6653 			break;
6654 		case 8: /* D1 page flip */
6655 		case 10: /* D2 page flip */
6656 		case 12: /* D3 page flip */
6657 		case 14: /* D4 page flip */
6658 		case 16: /* D5 page flip */
6659 		case 18: /* D6 page flip */
6660 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6661 			if (radeon_use_pflipirq > 0)
6662 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6663 			break;
6664 		case 42: /* HPD hotplug */
6665 			switch (src_data) {
6666 			case 0:
6667 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6668 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6669 
6670 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6671 				queue_hotplug = true;
6672 				DRM_DEBUG("IH: HPD1\n");
6673 
6674 				break;
6675 			case 1:
6676 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6677 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6678 
6679 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6680 				queue_hotplug = true;
6681 				DRM_DEBUG("IH: HPD2\n");
6682 
6683 				break;
6684 			case 2:
6685 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6686 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6687 
6688 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6689 				queue_hotplug = true;
6690 				DRM_DEBUG("IH: HPD3\n");
6691 
6692 				break;
6693 			case 3:
6694 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6695 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6696 
6697 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6698 				queue_hotplug = true;
6699 				DRM_DEBUG("IH: HPD4\n");
6700 
6701 				break;
6702 			case 4:
6703 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6704 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6705 
6706 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6707 				queue_hotplug = true;
6708 				DRM_DEBUG("IH: HPD5\n");
6709 
6710 				break;
6711 			case 5:
6712 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6713 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6714 
6715 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6716 				queue_hotplug = true;
6717 				DRM_DEBUG("IH: HPD6\n");
6718 
6719 				break;
6720 			case 6:
6721 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6722 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6723 
6724 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6725 				queue_dp = true;
6726 				DRM_DEBUG("IH: HPD_RX 1\n");
6727 
6728 				break;
6729 			case 7:
6730 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6731 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6732 
6733 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6734 				queue_dp = true;
6735 				DRM_DEBUG("IH: HPD_RX 2\n");
6736 
6737 				break;
6738 			case 8:
6739 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6740 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6741 
6742 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6743 				queue_dp = true;
6744 				DRM_DEBUG("IH: HPD_RX 3\n");
6745 
6746 				break;
6747 			case 9:
6748 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6749 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6750 
6751 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6752 				queue_dp = true;
6753 				DRM_DEBUG("IH: HPD_RX 4\n");
6754 
6755 				break;
6756 			case 10:
6757 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6758 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6759 
6760 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6761 				queue_dp = true;
6762 				DRM_DEBUG("IH: HPD_RX 5\n");
6763 
6764 				break;
6765 			case 11:
6766 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6767 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6768 
6769 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6770 				queue_dp = true;
6771 				DRM_DEBUG("IH: HPD_RX 6\n");
6772 
6773 				break;
6774 			default:
6775 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6776 				break;
6777 			}
6778 			break;
6779 		case 96:
6780 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6781 			WREG32(SRBM_INT_ACK, 0x1);
6782 			break;
6783 		case 124: /* UVD */
6784 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6785 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6786 			break;
6787 		case 146:
6788 		case 147:
6789 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6790 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6791 			/* reset addr and status */
6792 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6793 			if (addr == 0x0 && status == 0x0)
6794 				break;
6795 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6796 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6797 				addr);
6798 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6799 				status);
6800 			si_vm_decode_fault(rdev, status, addr);
6801 			break;
6802 		case 176: /* RINGID0 CP_INT */
6803 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6804 			break;
6805 		case 177: /* RINGID1 CP_INT */
6806 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6807 			break;
6808 		case 178: /* RINGID2 CP_INT */
6809 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6810 			break;
6811 		case 181: /* CP EOP event */
6812 			DRM_DEBUG("IH: CP EOP\n");
6813 			switch (ring_id) {
6814 			case 0:
6815 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6816 				break;
6817 			case 1:
6818 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6819 				break;
6820 			case 2:
6821 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6822 				break;
6823 			}
6824 			break;
6825 		case 224: /* DMA trap event */
6826 			DRM_DEBUG("IH: DMA trap\n");
6827 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6828 			break;
6829 		case 230: /* thermal low to high */
6830 			DRM_DEBUG("IH: thermal low to high\n");
6831 			rdev->pm.dpm.thermal.high_to_low = false;
6832 			queue_thermal = true;
6833 			break;
6834 		case 231: /* thermal high to low */
6835 			DRM_DEBUG("IH: thermal high to low\n");
6836 			rdev->pm.dpm.thermal.high_to_low = true;
6837 			queue_thermal = true;
6838 			break;
6839 		case 233: /* GUI IDLE */
6840 			DRM_DEBUG("IH: GUI idle\n");
6841 			break;
6842 		case 244: /* DMA trap event */
6843 			DRM_DEBUG("IH: DMA1 trap\n");
6844 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6845 			break;
6846 		default:
6847 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6848 			break;
6849 		}
6850 
6851 		/* wptr/rptr are in bytes! */
6852 		rptr += 16;
6853 		rptr &= rdev->ih.ptr_mask;
6854 		WREG32(IH_RB_RPTR, rptr);
6855 	}
6856 	if (queue_dp)
6857 		schedule_work(&rdev->dp_work);
6858 	if (queue_hotplug)
6859 		schedule_delayed_work(&rdev->hotplug_work, 0);
6860 	if (queue_thermal && rdev->pm.dpm_enabled)
6861 		schedule_work(&rdev->pm.dpm.thermal.work);
6862 	rdev->ih.rptr = rptr;
6863 	atomic_set(&rdev->ih.lock, 0);
6864 
6865 	/* make sure wptr hasn't changed while processing */
6866 	wptr = si_get_ih_wptr(rdev);
6867 	if (wptr != rptr)
6868 		goto restart_ih;
6869 
6870 	return IRQ_HANDLED;
6871 }
6872 
6873 /*
6874  * startup/shutdown callbacks
6875  */
6876 static void si_uvd_init(struct radeon_device *rdev)
6877 {
6878 	int r;
6879 
6880 	if (!rdev->has_uvd)
6881 		return;
6882 
6883 	r = radeon_uvd_init(rdev);
6884 	if (r) {
6885 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6886 		/*
6887 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6888 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6889 		 * there. So it is pointless to try to go through that code
6890 		 * hence why we disable uvd here.
6891 		 */
6892 		rdev->has_uvd = 0;
6893 		return;
6894 	}
6895 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6896 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6897 }
6898 
6899 static void si_uvd_start(struct radeon_device *rdev)
6900 {
6901 	int r;
6902 
6903 	if (!rdev->has_uvd)
6904 		return;
6905 
6906 	r = uvd_v2_2_resume(rdev);
6907 	if (r) {
6908 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6909 		goto error;
6910 	}
6911 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6912 	if (r) {
6913 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6914 		goto error;
6915 	}
6916 	return;
6917 
6918 error:
6919 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6920 }
6921 
6922 static void si_uvd_resume(struct radeon_device *rdev)
6923 {
6924 	struct radeon_ring *ring;
6925 	int r;
6926 
6927 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6928 		return;
6929 
6930 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6931 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
6932 	if (r) {
6933 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6934 		return;
6935 	}
6936 	r = uvd_v1_0_init(rdev);
6937 	if (r) {
6938 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6939 		return;
6940 	}
6941 }
6942 
6943 static void si_vce_init(struct radeon_device *rdev)
6944 {
6945 	int r;
6946 
6947 	if (!rdev->has_vce)
6948 		return;
6949 
6950 	r = radeon_vce_init(rdev);
6951 	if (r) {
6952 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6953 		/*
6954 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6955 		 * to early fails si_vce_start() and thus nothing happens
6956 		 * there. So it is pointless to try to go through that code
6957 		 * hence why we disable vce here.
6958 		 */
6959 		rdev->has_vce = 0;
6960 		return;
6961 	}
6962 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6963 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6964 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6965 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6966 }
6967 
6968 static void si_vce_start(struct radeon_device *rdev)
6969 {
6970 	int r;
6971 
6972 	if (!rdev->has_vce)
6973 		return;
6974 
6975 	r = radeon_vce_resume(rdev);
6976 	if (r) {
6977 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6978 		goto error;
6979 	}
6980 	r = vce_v1_0_resume(rdev);
6981 	if (r) {
6982 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6983 		goto error;
6984 	}
6985 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6986 	if (r) {
6987 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6988 		goto error;
6989 	}
6990 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6991 	if (r) {
6992 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6993 		goto error;
6994 	}
6995 	return;
6996 
6997 error:
6998 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6999 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7000 }
7001 
7002 static void si_vce_resume(struct radeon_device *rdev)
7003 {
7004 	struct radeon_ring *ring;
7005 	int r;
7006 
7007 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7008 		return;
7009 
7010 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7011 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7012 	if (r) {
7013 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7014 		return;
7015 	}
7016 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7017 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7018 	if (r) {
7019 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7020 		return;
7021 	}
7022 	r = vce_v1_0_init(rdev);
7023 	if (r) {
7024 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7025 		return;
7026 	}
7027 }
7028 
7029 static int si_startup(struct radeon_device *rdev)
7030 {
7031 	struct radeon_ring *ring;
7032 	int r;
7033 
7034 	/* enable pcie gen2/3 link */
7035 	si_pcie_gen3_enable(rdev);
7036 	/* enable aspm */
7037 	si_program_aspm(rdev);
7038 
7039 	/* scratch needs to be initialized before MC */
7040 	r = r600_vram_scratch_init(rdev);
7041 	if (r)
7042 		return r;
7043 
7044 	si_mc_program(rdev);
7045 
7046 	if (!rdev->pm.dpm_enabled) {
7047 		r = si_mc_load_microcode(rdev);
7048 		if (r) {
7049 			DRM_ERROR("Failed to load MC firmware!\n");
7050 			return r;
7051 		}
7052 	}
7053 
7054 	r = si_pcie_gart_enable(rdev);
7055 	if (r)
7056 		return r;
7057 	si_gpu_init(rdev);
7058 
7059 	/* allocate rlc buffers */
7060 	if (rdev->family == CHIP_VERDE) {
7061 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7062 		rdev->rlc.reg_list_size =
7063 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7064 	}
7065 	rdev->rlc.cs_data = si_cs_data;
7066 	r = sumo_rlc_init(rdev);
7067 	if (r) {
7068 		DRM_ERROR("Failed to init rlc BOs!\n");
7069 		return r;
7070 	}
7071 
7072 	/* allocate wb buffer */
7073 	r = radeon_wb_init(rdev);
7074 	if (r)
7075 		return r;
7076 
7077 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7078 	if (r) {
7079 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7080 		return r;
7081 	}
7082 
7083 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7084 	if (r) {
7085 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7086 		return r;
7087 	}
7088 
7089 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7090 	if (r) {
7091 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7092 		return r;
7093 	}
7094 
7095 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7096 	if (r) {
7097 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7098 		return r;
7099 	}
7100 
7101 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7102 	if (r) {
7103 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7104 		return r;
7105 	}
7106 
7107 	si_uvd_start(rdev);
7108 	si_vce_start(rdev);
7109 
7110 	/* Enable IRQ */
7111 	if (!rdev->irq.installed) {
7112 		r = radeon_irq_kms_init(rdev);
7113 		if (r)
7114 			return r;
7115 	}
7116 
7117 	r = si_irq_init(rdev);
7118 	if (r) {
7119 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7120 		radeon_irq_kms_fini(rdev);
7121 		return r;
7122 	}
7123 	si_irq_set(rdev);
7124 
7125 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7126 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7127 			     RADEON_CP_PACKET2);
7128 	if (r)
7129 		return r;
7130 
7131 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7132 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7133 			     RADEON_CP_PACKET2);
7134 	if (r)
7135 		return r;
7136 
7137 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7138 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7139 			     RADEON_CP_PACKET2);
7140 	if (r)
7141 		return r;
7142 
7143 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7144 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7145 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7146 	if (r)
7147 		return r;
7148 
7149 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7150 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7151 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7152 	if (r)
7153 		return r;
7154 
7155 	r = si_cp_load_microcode(rdev);
7156 	if (r)
7157 		return r;
7158 	r = si_cp_resume(rdev);
7159 	if (r)
7160 		return r;
7161 
7162 	r = cayman_dma_resume(rdev);
7163 	if (r)
7164 		return r;
7165 
7166 	si_uvd_resume(rdev);
7167 	si_vce_resume(rdev);
7168 
7169 	r = radeon_ib_pool_init(rdev);
7170 	if (r) {
7171 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7172 		return r;
7173 	}
7174 
7175 	r = radeon_vm_manager_init(rdev);
7176 	if (r) {
7177 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7178 		return r;
7179 	}
7180 
7181 	r = radeon_audio_init(rdev);
7182 	if (r)
7183 		return r;
7184 
7185 	return 0;
7186 }
7187 
7188 int si_resume(struct radeon_device *rdev)
7189 {
7190 	int r;
7191 
7192 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7193 	 * posting will perform necessary task to bring back GPU into good
7194 	 * shape.
7195 	 */
7196 	/* post card */
7197 	atom_asic_init(rdev->mode_info.atom_context);
7198 
7199 	/* init golden registers */
7200 	si_init_golden_registers(rdev);
7201 
7202 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7203 		radeon_pm_resume(rdev);
7204 
7205 	rdev->accel_working = true;
7206 	r = si_startup(rdev);
7207 	if (r) {
7208 		DRM_ERROR("si startup failed on resume\n");
7209 		rdev->accel_working = false;
7210 		return r;
7211 	}
7212 
7213 	return r;
7214 
7215 }
7216 
7217 int si_suspend(struct radeon_device *rdev)
7218 {
7219 	radeon_pm_suspend(rdev);
7220 	radeon_audio_fini(rdev);
7221 	radeon_vm_manager_fini(rdev);
7222 	si_cp_enable(rdev, false);
7223 	cayman_dma_stop(rdev);
7224 	if (rdev->has_uvd) {
7225 		uvd_v1_0_fini(rdev);
7226 		radeon_uvd_suspend(rdev);
7227 	}
7228 	if (rdev->has_vce)
7229 		radeon_vce_suspend(rdev);
7230 	si_fini_pg(rdev);
7231 	si_fini_cg(rdev);
7232 	si_irq_suspend(rdev);
7233 	radeon_wb_disable(rdev);
7234 	si_pcie_gart_disable(rdev);
7235 	return 0;
7236 }
7237 
7238 /* Plan is to move initialization in that function and use
7239  * helper function so that radeon_device_init pretty much
7240  * do nothing more than calling asic specific function. This
7241  * should also allow to remove a bunch of callback function
7242  * like vram_info.
7243  */
7244 int si_init(struct radeon_device *rdev)
7245 {
7246 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7247 	int r;
7248 
7249 	/* Read BIOS */
7250 	if (!radeon_get_bios(rdev)) {
7251 		if (ASIC_IS_AVIVO(rdev))
7252 			return -EINVAL;
7253 	}
7254 	/* Must be an ATOMBIOS */
7255 	if (!rdev->is_atom_bios) {
7256 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7257 		return -EINVAL;
7258 	}
7259 	r = radeon_atombios_init(rdev);
7260 	if (r)
7261 		return r;
7262 
7263 	/* Post card if necessary */
7264 	if (!radeon_card_posted(rdev)) {
7265 		if (!rdev->bios) {
7266 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7267 			return -EINVAL;
7268 		}
7269 		DRM_INFO("GPU not posted. posting now...\n");
7270 		atom_asic_init(rdev->mode_info.atom_context);
7271 	}
7272 	/* init golden registers */
7273 	si_init_golden_registers(rdev);
7274 	/* Initialize scratch registers */
7275 	si_scratch_init(rdev);
7276 	/* Initialize surface registers */
7277 	radeon_surface_init(rdev);
7278 	/* Initialize clocks */
7279 	radeon_get_clock_info(rdev->ddev);
7280 
7281 	/* Fence driver */
7282 	r = radeon_fence_driver_init(rdev);
7283 	if (r)
7284 		return r;
7285 
7286 	/* initialize memory controller */
7287 	r = si_mc_init(rdev);
7288 	if (r)
7289 		return r;
7290 	/* Memory manager */
7291 	r = radeon_bo_init(rdev);
7292 	if (r)
7293 		return r;
7294 
7295 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7296 	    !rdev->rlc_fw || !rdev->mc_fw) {
7297 		r = si_init_microcode(rdev);
7298 		if (r) {
7299 			DRM_ERROR("Failed to load firmware!\n");
7300 			return r;
7301 		}
7302 	}
7303 
7304 	/* Initialize power management */
7305 	radeon_pm_init(rdev);
7306 
7307 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7308 	ring->ring_obj = NULL;
7309 	r600_ring_init(rdev, ring, 1024 * 1024);
7310 
7311 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7312 	ring->ring_obj = NULL;
7313 	r600_ring_init(rdev, ring, 1024 * 1024);
7314 
7315 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7316 	ring->ring_obj = NULL;
7317 	r600_ring_init(rdev, ring, 1024 * 1024);
7318 
7319 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7320 	ring->ring_obj = NULL;
7321 	r600_ring_init(rdev, ring, 64 * 1024);
7322 
7323 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7324 	ring->ring_obj = NULL;
7325 	r600_ring_init(rdev, ring, 64 * 1024);
7326 
7327 	si_uvd_init(rdev);
7328 	si_vce_init(rdev);
7329 
7330 	rdev->ih.ring_obj = NULL;
7331 	r600_ih_ring_init(rdev, 64 * 1024);
7332 
7333 	r = r600_pcie_gart_init(rdev);
7334 	if (r)
7335 		return r;
7336 
7337 	rdev->accel_working = true;
7338 	r = si_startup(rdev);
7339 	if (r) {
7340 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7341 		si_cp_fini(rdev);
7342 		cayman_dma_fini(rdev);
7343 		si_irq_fini(rdev);
7344 		sumo_rlc_fini(rdev);
7345 		radeon_wb_fini(rdev);
7346 		radeon_ib_pool_fini(rdev);
7347 		radeon_vm_manager_fini(rdev);
7348 		radeon_irq_kms_fini(rdev);
7349 		si_pcie_gart_fini(rdev);
7350 		rdev->accel_working = false;
7351 	}
7352 
7353 	/* Don't start up if the MC ucode is missing.
7354 	 * The default clocks and voltages before the MC ucode
7355 	 * is loaded are not suffient for advanced operations.
7356 	 */
7357 	if (!rdev->mc_fw) {
7358 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7359 		return -EINVAL;
7360 	}
7361 
7362 	return 0;
7363 }
7364 
7365 void si_fini(struct radeon_device *rdev)
7366 {
7367 	radeon_pm_fini(rdev);
7368 	si_cp_fini(rdev);
7369 	cayman_dma_fini(rdev);
7370 	si_fini_pg(rdev);
7371 	si_fini_cg(rdev);
7372 	si_irq_fini(rdev);
7373 	sumo_rlc_fini(rdev);
7374 	radeon_wb_fini(rdev);
7375 	radeon_vm_manager_fini(rdev);
7376 	radeon_ib_pool_fini(rdev);
7377 	radeon_irq_kms_fini(rdev);
7378 	if (rdev->has_uvd) {
7379 		uvd_v1_0_fini(rdev);
7380 		radeon_uvd_fini(rdev);
7381 	}
7382 	if (rdev->has_vce)
7383 		radeon_vce_fini(rdev);
7384 	si_pcie_gart_fini(rdev);
7385 	r600_vram_scratch_fini(rdev);
7386 	radeon_gem_fini(rdev);
7387 	radeon_fence_driver_fini(rdev);
7388 	radeon_bo_fini(rdev);
7389 	radeon_atombios_fini(rdev);
7390 	kfree(rdev->bios);
7391 	rdev->bios = NULL;
7392 }
7393 
7394 /**
7395  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7396  *
7397  * @rdev: radeon_device pointer
7398  *
7399  * Fetches a GPU clock counter snapshot (SI).
7400  * Returns the 64 bit clock counter snapshot.
7401  */
7402 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7403 {
7404 	uint64_t clock;
7405 
7406 	mutex_lock(&rdev->gpu_clock_mutex);
7407 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7408 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7409 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7410 	mutex_unlock(&rdev->gpu_clock_mutex);
7411 	return clock;
7412 }
7413 
7414 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7415 {
7416 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7417 	int r;
7418 
7419 	/* bypass vclk and dclk with bclk */
7420 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7421 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7422 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7423 
7424 	/* put PLL in bypass mode */
7425 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7426 
7427 	if (!vclk || !dclk) {
7428 		/* keep the Bypass mode */
7429 		return 0;
7430 	}
7431 
7432 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7433 					  16384, 0x03FFFFFF, 0, 128, 5,
7434 					  &fb_div, &vclk_div, &dclk_div);
7435 	if (r)
7436 		return r;
7437 
7438 	/* set RESET_ANTI_MUX to 0 */
7439 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7440 
7441 	/* set VCO_MODE to 1 */
7442 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7443 
7444 	/* disable sleep mode */
7445 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7446 
7447 	/* deassert UPLL_RESET */
7448 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7449 
7450 	mdelay(1);
7451 
7452 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7453 	if (r)
7454 		return r;
7455 
7456 	/* assert UPLL_RESET again */
7457 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7458 
7459 	/* disable spread spectrum. */
7460 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7461 
7462 	/* set feedback divider */
7463 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7464 
7465 	/* set ref divider to 0 */
7466 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7467 
7468 	if (fb_div < 307200)
7469 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7470 	else
7471 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7472 
7473 	/* set PDIV_A and PDIV_B */
7474 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7475 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7476 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7477 
7478 	/* give the PLL some time to settle */
7479 	mdelay(15);
7480 
7481 	/* deassert PLL_RESET */
7482 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7483 
7484 	mdelay(15);
7485 
7486 	/* switch from bypass mode to normal mode */
7487 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7488 
7489 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7490 	if (r)
7491 		return r;
7492 
7493 	/* switch VCLK and DCLK selection */
7494 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7495 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7496 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7497 
7498 	mdelay(100);
7499 
7500 	return 0;
7501 }
7502 
7503 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7504 {
7505 	struct pci_dev *root = rdev->pdev->bus->self;
7506 	int bridge_pos, gpu_pos;
7507 	u32 speed_cntl, mask, current_data_rate;
7508 	int ret, i;
7509 	u16 tmp16;
7510 
7511 	if (pci_is_root_bus(rdev->pdev->bus))
7512 		return;
7513 
7514 	if (radeon_pcie_gen2 == 0)
7515 		return;
7516 
7517 	if (rdev->flags & RADEON_IS_IGP)
7518 		return;
7519 
7520 	if (!(rdev->flags & RADEON_IS_PCIE))
7521 		return;
7522 
7523 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7524 	if (ret != 0)
7525 		return;
7526 
7527 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7528 		return;
7529 
7530 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7531 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7532 		LC_CURRENT_DATA_RATE_SHIFT;
7533 	if (mask & DRM_PCIE_SPEED_80) {
7534 		if (current_data_rate == 2) {
7535 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7536 			return;
7537 		}
7538 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7539 	} else if (mask & DRM_PCIE_SPEED_50) {
7540 		if (current_data_rate == 1) {
7541 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7542 			return;
7543 		}
7544 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7545 	}
7546 
7547 	bridge_pos = pci_pcie_cap(root);
7548 	if (!bridge_pos)
7549 		return;
7550 
7551 	gpu_pos = pci_pcie_cap(rdev->pdev);
7552 	if (!gpu_pos)
7553 		return;
7554 
7555 	if (mask & DRM_PCIE_SPEED_80) {
7556 		/* re-try equalization if gen3 is not already enabled */
7557 		if (current_data_rate != 2) {
7558 			u16 bridge_cfg, gpu_cfg;
7559 			u16 bridge_cfg2, gpu_cfg2;
7560 			u32 max_lw, current_lw, tmp;
7561 
7562 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7563 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7564 
7565 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7566 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7567 
7568 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7569 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7570 
7571 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7572 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7573 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7574 
7575 			if (current_lw < max_lw) {
7576 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7577 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7578 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7579 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7580 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7581 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7582 				}
7583 			}
7584 
7585 			for (i = 0; i < 10; i++) {
7586 				/* check status */
7587 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7588 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7589 					break;
7590 
7591 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7592 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7593 
7594 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7595 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7596 
7597 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7598 				tmp |= LC_SET_QUIESCE;
7599 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7600 
7601 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7602 				tmp |= LC_REDO_EQ;
7603 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7604 
7605 				mdelay(100);
7606 
7607 				/* linkctl */
7608 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7609 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7610 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7611 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7612 
7613 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7614 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7615 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7616 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7617 
7618 				/* linkctl2 */
7619 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7620 				tmp16 &= ~((1 << 4) | (7 << 9));
7621 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7622 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7623 
7624 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7625 				tmp16 &= ~((1 << 4) | (7 << 9));
7626 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7627 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7628 
7629 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7630 				tmp &= ~LC_SET_QUIESCE;
7631 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7632 			}
7633 		}
7634 	}
7635 
7636 	/* set the link speed */
7637 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7638 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7639 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7640 
7641 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7642 	tmp16 &= ~0xf;
7643 	if (mask & DRM_PCIE_SPEED_80)
7644 		tmp16 |= 3; /* gen3 */
7645 	else if (mask & DRM_PCIE_SPEED_50)
7646 		tmp16 |= 2; /* gen2 */
7647 	else
7648 		tmp16 |= 1; /* gen1 */
7649 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7650 
7651 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7652 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7653 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7654 
7655 	for (i = 0; i < rdev->usec_timeout; i++) {
7656 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7657 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7658 			break;
7659 		udelay(1);
7660 	}
7661 }
7662 
7663 static void si_program_aspm(struct radeon_device *rdev)
7664 {
7665 	u32 data, orig;
7666 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7667 	bool disable_clkreq = false;
7668 
7669 	if (radeon_aspm == 0)
7670 		return;
7671 
7672 	if (!(rdev->flags & RADEON_IS_PCIE))
7673 		return;
7674 
7675 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7676 	data &= ~LC_XMIT_N_FTS_MASK;
7677 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7678 	if (orig != data)
7679 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7680 
7681 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7682 	data |= LC_GO_TO_RECOVERY;
7683 	if (orig != data)
7684 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7685 
7686 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7687 	data |= P_IGNORE_EDB_ERR;
7688 	if (orig != data)
7689 		WREG32_PCIE(PCIE_P_CNTL, data);
7690 
7691 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7692 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7693 	data |= LC_PMI_TO_L1_DIS;
7694 	if (!disable_l0s)
7695 		data |= LC_L0S_INACTIVITY(7);
7696 
7697 	if (!disable_l1) {
7698 		data |= LC_L1_INACTIVITY(7);
7699 		data &= ~LC_PMI_TO_L1_DIS;
7700 		if (orig != data)
7701 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7702 
7703 		if (!disable_plloff_in_l1) {
7704 			bool clk_req_support;
7705 
7706 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7707 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7708 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7709 			if (orig != data)
7710 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7711 
7712 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7713 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7714 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7715 			if (orig != data)
7716 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7717 
7718 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7719 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7720 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7721 			if (orig != data)
7722 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7723 
7724 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7725 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7726 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7727 			if (orig != data)
7728 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7729 
7730 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7731 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7732 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7733 				if (orig != data)
7734 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7735 
7736 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7737 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7738 				if (orig != data)
7739 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7740 
7741 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7742 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7743 				if (orig != data)
7744 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7745 
7746 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7747 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7748 				if (orig != data)
7749 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7750 
7751 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7752 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7753 				if (orig != data)
7754 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7755 
7756 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7757 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7758 				if (orig != data)
7759 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7760 
7761 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7762 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7763 				if (orig != data)
7764 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7765 
7766 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7767 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7768 				if (orig != data)
7769 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7770 			}
7771 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7772 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7773 			data |= LC_DYN_LANES_PWR_STATE(3);
7774 			if (orig != data)
7775 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7776 
7777 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7778 			data &= ~LS2_EXIT_TIME_MASK;
7779 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7780 				data |= LS2_EXIT_TIME(5);
7781 			if (orig != data)
7782 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7783 
7784 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7785 			data &= ~LS2_EXIT_TIME_MASK;
7786 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7787 				data |= LS2_EXIT_TIME(5);
7788 			if (orig != data)
7789 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7790 
7791 			if (!disable_clkreq &&
7792 			    !pci_is_root_bus(rdev->pdev->bus)) {
7793 				struct pci_dev *root = rdev->pdev->bus->self;
7794 				u32 lnkcap;
7795 
7796 				clk_req_support = false;
7797 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7798 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7799 					clk_req_support = true;
7800 			} else {
7801 				clk_req_support = false;
7802 			}
7803 
7804 			if (clk_req_support) {
7805 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7806 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7807 				if (orig != data)
7808 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7809 
7810 				orig = data = RREG32(THM_CLK_CNTL);
7811 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7812 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7813 				if (orig != data)
7814 					WREG32(THM_CLK_CNTL, data);
7815 
7816 				orig = data = RREG32(MISC_CLK_CNTL);
7817 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7818 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7819 				if (orig != data)
7820 					WREG32(MISC_CLK_CNTL, data);
7821 
7822 				orig = data = RREG32(CG_CLKPIN_CNTL);
7823 				data &= ~BCLK_AS_XCLK;
7824 				if (orig != data)
7825 					WREG32(CG_CLKPIN_CNTL, data);
7826 
7827 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7828 				data &= ~FORCE_BIF_REFCLK_EN;
7829 				if (orig != data)
7830 					WREG32(CG_CLKPIN_CNTL_2, data);
7831 
7832 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7833 				data &= ~MPLL_CLKOUT_SEL_MASK;
7834 				data |= MPLL_CLKOUT_SEL(4);
7835 				if (orig != data)
7836 					WREG32(MPLL_BYPASSCLK_SEL, data);
7837 
7838 				orig = data = RREG32(SPLL_CNTL_MODE);
7839 				data &= ~SPLL_REFCLK_SEL_MASK;
7840 				if (orig != data)
7841 					WREG32(SPLL_CNTL_MODE, data);
7842 			}
7843 		}
7844 	} else {
7845 		if (orig != data)
7846 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7847 	}
7848 
7849 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7850 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7851 	if (orig != data)
7852 		WREG32_PCIE(PCIE_CNTL2, data);
7853 
7854 	if (!disable_l0s) {
7855 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7856 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7857 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7858 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7859 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7860 				data &= ~LC_L0S_INACTIVITY_MASK;
7861 				if (orig != data)
7862 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7863 			}
7864 		}
7865 	}
7866 }
7867 
7868 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7869 {
7870 	unsigned i;
7871 
7872 	/* make sure VCEPLL_CTLREQ is deasserted */
7873 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7874 
7875 	mdelay(10);
7876 
7877 	/* assert UPLL_CTLREQ */
7878 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7879 
7880 	/* wait for CTLACK and CTLACK2 to get asserted */
7881 	for (i = 0; i < 100; ++i) {
7882 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7883 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7884 			break;
7885 		mdelay(10);
7886 	}
7887 
7888 	/* deassert UPLL_CTLREQ */
7889 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7890 
7891 	if (i == 100) {
7892 		DRM_ERROR("Timeout setting UVD clocks!\n");
7893 		return -ETIMEDOUT;
7894 	}
7895 
7896 	return 0;
7897 }
7898 
7899 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7900 {
7901 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7902 	int r;
7903 
7904 	/* bypass evclk and ecclk with bclk */
7905 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7906 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7907 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7908 
7909 	/* put PLL in bypass mode */
7910 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7911 		     ~VCEPLL_BYPASS_EN_MASK);
7912 
7913 	if (!evclk || !ecclk) {
7914 		/* keep the Bypass mode, put PLL to sleep */
7915 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7916 			     ~VCEPLL_SLEEP_MASK);
7917 		return 0;
7918 	}
7919 
7920 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7921 					  16384, 0x03FFFFFF, 0, 128, 5,
7922 					  &fb_div, &evclk_div, &ecclk_div);
7923 	if (r)
7924 		return r;
7925 
7926 	/* set RESET_ANTI_MUX to 0 */
7927 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7928 
7929 	/* set VCO_MODE to 1 */
7930 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7931 		     ~VCEPLL_VCO_MODE_MASK);
7932 
7933 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7934 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7935 		     ~VCEPLL_SLEEP_MASK);
7936 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7937 
7938 	/* deassert VCEPLL_RESET */
7939 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7940 
7941 	mdelay(1);
7942 
7943 	r = si_vce_send_vcepll_ctlreq(rdev);
7944 	if (r)
7945 		return r;
7946 
7947 	/* assert VCEPLL_RESET again */
7948 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7949 
7950 	/* disable spread spectrum. */
7951 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7952 
7953 	/* set feedback divider */
7954 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7955 
7956 	/* set ref divider to 0 */
7957 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7958 
7959 	/* set PDIV_A and PDIV_B */
7960 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7961 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7962 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7963 
7964 	/* give the PLL some time to settle */
7965 	mdelay(15);
7966 
7967 	/* deassert PLL_RESET */
7968 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7969 
7970 	mdelay(15);
7971 
7972 	/* switch from bypass mode to normal mode */
7973 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7974 
7975 	r = si_vce_send_vcepll_ctlreq(rdev);
7976 	if (r)
7977 		return r;
7978 
7979 	/* switch VCLK and DCLK selection */
7980 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7981 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7982 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7983 
7984 	mdelay(100);
7985 
7986 	return 0;
7987 }
7988