xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision de2bdb3d)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
62 
63 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
72 MODULE_FIRMWARE("radeon/VERDE_me.bin");
73 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
76 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
77 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
78 
79 MODULE_FIRMWARE("radeon/verde_pfp.bin");
80 MODULE_FIRMWARE("radeon/verde_me.bin");
81 MODULE_FIRMWARE("radeon/verde_ce.bin");
82 MODULE_FIRMWARE("radeon/verde_mc.bin");
83 MODULE_FIRMWARE("radeon/verde_rlc.bin");
84 MODULE_FIRMWARE("radeon/verde_smc.bin");
85 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
86 
87 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
88 MODULE_FIRMWARE("radeon/OLAND_me.bin");
89 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
92 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
93 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
94 
95 MODULE_FIRMWARE("radeon/oland_pfp.bin");
96 MODULE_FIRMWARE("radeon/oland_me.bin");
97 MODULE_FIRMWARE("radeon/oland_ce.bin");
98 MODULE_FIRMWARE("radeon/oland_mc.bin");
99 MODULE_FIRMWARE("radeon/oland_rlc.bin");
100 MODULE_FIRMWARE("radeon/oland_smc.bin");
101 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
102 
103 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
110 
111 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
112 MODULE_FIRMWARE("radeon/hainan_me.bin");
113 MODULE_FIRMWARE("radeon/hainan_ce.bin");
114 MODULE_FIRMWARE("radeon/hainan_mc.bin");
115 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
116 MODULE_FIRMWARE("radeon/hainan_smc.bin");
117 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
118 
119 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
120 static void si_pcie_gen3_enable(struct radeon_device *rdev);
121 static void si_program_aspm(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
127 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
130 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
133 					 bool enable);
134 static void si_init_pg(struct radeon_device *rdev);
135 static void si_init_cg(struct radeon_device *rdev);
136 static void si_fini_pg(struct radeon_device *rdev);
137 static void si_fini_cg(struct radeon_device *rdev);
138 static void si_rlc_stop(struct radeon_device *rdev);
139 
140 static const u32 verde_rlc_save_restore_register_list[] =
141 {
142 	(0x8000 << 16) | (0x98f4 >> 2),
143 	0x00000000,
144 	(0x8040 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0xe80 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8000 << 16) | (0x89bc >> 2),
151 	0x00000000,
152 	(0x8040 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8000 << 16) | (0x8c1c >> 2),
155 	0x00000000,
156 	(0x8040 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x98f0 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0xe7c >> 2),
161 	0x00000000,
162 	(0x8000 << 16) | (0x9148 >> 2),
163 	0x00000000,
164 	(0x8040 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9150 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x897c >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x8d8c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0xac54 >> 2),
173 	0X00000000,
174 	0x3,
175 	(0x9c00 << 16) | (0x98f8 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9910 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9914 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9918 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x991c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9920 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9924 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9928 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x992c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9930 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9934 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9938 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x993c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9940 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9944 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9948 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x994c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9950 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9954 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9958 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x995c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9960 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9964 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9968 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x996c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9970 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9974 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9978 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x997c >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9980 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9984 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9988 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x998c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c00 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c14 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c04 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c08 >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x9b7c >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0xe84 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8000 << 16) | (0x89c0 >> 2),
258 	0x00000000,
259 	(0x8040 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8000 << 16) | (0x914c >> 2),
262 	0x00000000,
263 	(0x8040 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x8c20 >> 2),
266 	0x00000000,
267 	(0x8040 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x9354 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x9060 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9364 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9100 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x913c >> 2),
280 	0x00000000,
281 	(0x8000 << 16) | (0x90e0 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e4 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e8 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x90e0 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e4 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e8 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x8bcc >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8b24 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x88c4 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x8e50 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8c0c >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8e58 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e5c >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x9508 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x950c >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x9494 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0xac0c >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xae00 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xac08 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x88d4 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88c8 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88cc >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x89b0 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8b10 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8a14 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x9830 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9834 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9838 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9a10 >> 2),
342 	0x00000000,
343 	(0x8000 << 16) | (0x9870 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9874 >> 2),
346 	0x00000000,
347 	(0x8001 << 16) | (0x9870 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9874 >> 2),
350 	0x00000000,
351 	(0x8040 << 16) | (0x9870 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9874 >> 2),
354 	0x00000000,
355 	(0x8041 << 16) | (0x9870 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9874 >> 2),
358 	0x00000000,
359 	0x00000000
360 };
361 
362 static const u32 tahiti_golden_rlc_registers[] =
363 {
364 	0xc424, 0xffffffff, 0x00601005,
365 	0xc47c, 0xffffffff, 0x10104040,
366 	0xc488, 0xffffffff, 0x0100000a,
367 	0xc314, 0xffffffff, 0x00000800,
368 	0xc30c, 0xffffffff, 0x800000f4,
369 	0xf4a8, 0xffffffff, 0x00000000
370 };
371 
372 static const u32 tahiti_golden_registers[] =
373 {
374 	0x9a10, 0x00010000, 0x00018208,
375 	0x9830, 0xffffffff, 0x00000000,
376 	0x9834, 0xf00fffff, 0x00000400,
377 	0x9838, 0x0002021c, 0x00020200,
378 	0xc78, 0x00000080, 0x00000000,
379 	0xd030, 0x000300c0, 0x00800040,
380 	0xd830, 0x000300c0, 0x00800040,
381 	0x5bb0, 0x000000f0, 0x00000070,
382 	0x5bc0, 0x00200000, 0x50100000,
383 	0x7030, 0x31000311, 0x00000011,
384 	0x277c, 0x00000003, 0x000007ff,
385 	0x240c, 0x000007ff, 0x00000000,
386 	0x8a14, 0xf000001f, 0x00000007,
387 	0x8b24, 0xffffffff, 0x00ffffff,
388 	0x8b10, 0x0000ff0f, 0x00000000,
389 	0x28a4c, 0x07ffffff, 0x4e000000,
390 	0x28350, 0x3f3f3fff, 0x2a00126a,
391 	0x30, 0x000000ff, 0x0040,
392 	0x34, 0x00000040, 0x00004040,
393 	0x9100, 0x07ffffff, 0x03000000,
394 	0x8e88, 0x01ff1f3f, 0x00000000,
395 	0x8e84, 0x01ff1f3f, 0x00000000,
396 	0x9060, 0x0000007f, 0x00000020,
397 	0x9508, 0x00010000, 0x00010000,
398 	0xac14, 0x00000200, 0x000002fb,
399 	0xac10, 0xffffffff, 0x0000543b,
400 	0xac0c, 0xffffffff, 0xa9210876,
401 	0x88d0, 0xffffffff, 0x000fff40,
402 	0x88d4, 0x0000001f, 0x00000010,
403 	0x1410, 0x20000000, 0x20fffed8,
404 	0x15c0, 0x000c0fc0, 0x000c0400
405 };
406 
407 static const u32 tahiti_golden_registers2[] =
408 {
409 	0xc64, 0x00000001, 0x00000001
410 };
411 
412 static const u32 pitcairn_golden_rlc_registers[] =
413 {
414 	0xc424, 0xffffffff, 0x00601004,
415 	0xc47c, 0xffffffff, 0x10102020,
416 	0xc488, 0xffffffff, 0x01000020,
417 	0xc314, 0xffffffff, 0x00000800,
418 	0xc30c, 0xffffffff, 0x800000a4
419 };
420 
421 static const u32 pitcairn_golden_registers[] =
422 {
423 	0x9a10, 0x00010000, 0x00018208,
424 	0x9830, 0xffffffff, 0x00000000,
425 	0x9834, 0xf00fffff, 0x00000400,
426 	0x9838, 0x0002021c, 0x00020200,
427 	0xc78, 0x00000080, 0x00000000,
428 	0xd030, 0x000300c0, 0x00800040,
429 	0xd830, 0x000300c0, 0x00800040,
430 	0x5bb0, 0x000000f0, 0x00000070,
431 	0x5bc0, 0x00200000, 0x50100000,
432 	0x7030, 0x31000311, 0x00000011,
433 	0x2ae4, 0x00073ffe, 0x000022a2,
434 	0x240c, 0x000007ff, 0x00000000,
435 	0x8a14, 0xf000001f, 0x00000007,
436 	0x8b24, 0xffffffff, 0x00ffffff,
437 	0x8b10, 0x0000ff0f, 0x00000000,
438 	0x28a4c, 0x07ffffff, 0x4e000000,
439 	0x28350, 0x3f3f3fff, 0x2a00126a,
440 	0x30, 0x000000ff, 0x0040,
441 	0x34, 0x00000040, 0x00004040,
442 	0x9100, 0x07ffffff, 0x03000000,
443 	0x9060, 0x0000007f, 0x00000020,
444 	0x9508, 0x00010000, 0x00010000,
445 	0xac14, 0x000003ff, 0x000000f7,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac0c, 0xffffffff, 0x32761054,
448 	0x88d4, 0x0000001f, 0x00000010,
449 	0x15c0, 0x000c0fc0, 0x000c0400
450 };
451 
452 static const u32 verde_golden_rlc_registers[] =
453 {
454 	0xc424, 0xffffffff, 0x033f1005,
455 	0xc47c, 0xffffffff, 0x10808020,
456 	0xc488, 0xffffffff, 0x00800008,
457 	0xc314, 0xffffffff, 0x00001000,
458 	0xc30c, 0xffffffff, 0x80010014
459 };
460 
461 static const u32 verde_golden_registers[] =
462 {
463 	0x9a10, 0x00010000, 0x00018208,
464 	0x9830, 0xffffffff, 0x00000000,
465 	0x9834, 0xf00fffff, 0x00000400,
466 	0x9838, 0x0002021c, 0x00020200,
467 	0xc78, 0x00000080, 0x00000000,
468 	0xd030, 0x000300c0, 0x00800040,
469 	0xd030, 0x000300c0, 0x00800040,
470 	0xd830, 0x000300c0, 0x00800040,
471 	0xd830, 0x000300c0, 0x00800040,
472 	0x5bb0, 0x000000f0, 0x00000070,
473 	0x5bc0, 0x00200000, 0x50100000,
474 	0x7030, 0x31000311, 0x00000011,
475 	0x2ae4, 0x00073ffe, 0x000022a2,
476 	0x2ae4, 0x00073ffe, 0x000022a2,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x240c, 0x000007ff, 0x00000000,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x8a14, 0xf000001f, 0x00000007,
482 	0x8a14, 0xf000001f, 0x00000007,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8b24, 0xffffffff, 0x00ffffff,
485 	0x8b10, 0x0000ff0f, 0x00000000,
486 	0x28a4c, 0x07ffffff, 0x4e000000,
487 	0x28350, 0x3f3f3fff, 0x0000124a,
488 	0x28350, 0x3f3f3fff, 0x0000124a,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x30, 0x000000ff, 0x0040,
491 	0x34, 0x00000040, 0x00004040,
492 	0x9100, 0x07ffffff, 0x03000000,
493 	0x9100, 0x07ffffff, 0x03000000,
494 	0x8e88, 0x01ff1f3f, 0x00000000,
495 	0x8e88, 0x01ff1f3f, 0x00000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e84, 0x01ff1f3f, 0x00000000,
498 	0x8e84, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x9060, 0x0000007f, 0x00000020,
501 	0x9508, 0x00010000, 0x00010000,
502 	0xac14, 0x000003ff, 0x00000003,
503 	0xac14, 0x000003ff, 0x00000003,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac10, 0xffffffff, 0x00000000,
506 	0xac10, 0xffffffff, 0x00000000,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac0c, 0xffffffff, 0x00001032,
509 	0xac0c, 0xffffffff, 0x00001032,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0x88d4, 0x0000001f, 0x00000010,
512 	0x88d4, 0x0000001f, 0x00000010,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x15c0, 0x000c0fc0, 0x000c0400
515 };
516 
517 static const u32 oland_golden_rlc_registers[] =
518 {
519 	0xc424, 0xffffffff, 0x00601005,
520 	0xc47c, 0xffffffff, 0x10104040,
521 	0xc488, 0xffffffff, 0x0100000a,
522 	0xc314, 0xffffffff, 0x00000800,
523 	0xc30c, 0xffffffff, 0x800000f4
524 };
525 
526 static const u32 oland_golden_registers[] =
527 {
528 	0x9a10, 0x00010000, 0x00018208,
529 	0x9830, 0xffffffff, 0x00000000,
530 	0x9834, 0xf00fffff, 0x00000400,
531 	0x9838, 0x0002021c, 0x00020200,
532 	0xc78, 0x00000080, 0x00000000,
533 	0xd030, 0x000300c0, 0x00800040,
534 	0xd830, 0x000300c0, 0x00800040,
535 	0x5bb0, 0x000000f0, 0x00000070,
536 	0x5bc0, 0x00200000, 0x50100000,
537 	0x7030, 0x31000311, 0x00000011,
538 	0x2ae4, 0x00073ffe, 0x000022a2,
539 	0x240c, 0x000007ff, 0x00000000,
540 	0x8a14, 0xf000001f, 0x00000007,
541 	0x8b24, 0xffffffff, 0x00ffffff,
542 	0x8b10, 0x0000ff0f, 0x00000000,
543 	0x28a4c, 0x07ffffff, 0x4e000000,
544 	0x28350, 0x3f3f3fff, 0x00000082,
545 	0x30, 0x000000ff, 0x0040,
546 	0x34, 0x00000040, 0x00004040,
547 	0x9100, 0x07ffffff, 0x03000000,
548 	0x9060, 0x0000007f, 0x00000020,
549 	0x9508, 0x00010000, 0x00010000,
550 	0xac14, 0x000003ff, 0x000000f3,
551 	0xac10, 0xffffffff, 0x00000000,
552 	0xac0c, 0xffffffff, 0x00003210,
553 	0x88d4, 0x0000001f, 0x00000010,
554 	0x15c0, 0x000c0fc0, 0x000c0400
555 };
556 
557 static const u32 hainan_golden_registers[] =
558 {
559 	0x9a10, 0x00010000, 0x00018208,
560 	0x9830, 0xffffffff, 0x00000000,
561 	0x9834, 0xf00fffff, 0x00000400,
562 	0x9838, 0x0002021c, 0x00020200,
563 	0xd0c0, 0xff000fff, 0x00000100,
564 	0xd030, 0x000300c0, 0x00800040,
565 	0xd8c0, 0xff000fff, 0x00000100,
566 	0xd830, 0x000300c0, 0x00800040,
567 	0x2ae4, 0x00073ffe, 0x000022a2,
568 	0x240c, 0x000007ff, 0x00000000,
569 	0x8a14, 0xf000001f, 0x00000007,
570 	0x8b24, 0xffffffff, 0x00ffffff,
571 	0x8b10, 0x0000ff0f, 0x00000000,
572 	0x28a4c, 0x07ffffff, 0x4e000000,
573 	0x28350, 0x3f3f3fff, 0x00000000,
574 	0x30, 0x000000ff, 0x0040,
575 	0x34, 0x00000040, 0x00004040,
576 	0x9100, 0x03e00000, 0x03600000,
577 	0x9060, 0x0000007f, 0x00000020,
578 	0x9508, 0x00010000, 0x00010000,
579 	0xac14, 0x000003ff, 0x000000f1,
580 	0xac10, 0xffffffff, 0x00000000,
581 	0xac0c, 0xffffffff, 0x00003210,
582 	0x88d4, 0x0000001f, 0x00000010,
583 	0x15c0, 0x000c0fc0, 0x000c0400
584 };
585 
586 static const u32 hainan_golden_registers2[] =
587 {
588 	0x98f8, 0xffffffff, 0x02010001
589 };
590 
591 static const u32 tahiti_mgcg_cgcg_init[] =
592 {
593 	0xc400, 0xffffffff, 0xfffffffc,
594 	0x802c, 0xffffffff, 0xe0000000,
595 	0x9a60, 0xffffffff, 0x00000100,
596 	0x92a4, 0xffffffff, 0x00000100,
597 	0xc164, 0xffffffff, 0x00000100,
598 	0x9774, 0xffffffff, 0x00000100,
599 	0x8984, 0xffffffff, 0x06000100,
600 	0x8a18, 0xffffffff, 0x00000100,
601 	0x92a0, 0xffffffff, 0x00000100,
602 	0xc380, 0xffffffff, 0x00000100,
603 	0x8b28, 0xffffffff, 0x00000100,
604 	0x9144, 0xffffffff, 0x00000100,
605 	0x8d88, 0xffffffff, 0x00000100,
606 	0x8d8c, 0xffffffff, 0x00000100,
607 	0x9030, 0xffffffff, 0x00000100,
608 	0x9034, 0xffffffff, 0x00000100,
609 	0x9038, 0xffffffff, 0x00000100,
610 	0x903c, 0xffffffff, 0x00000100,
611 	0xad80, 0xffffffff, 0x00000100,
612 	0xac54, 0xffffffff, 0x00000100,
613 	0x897c, 0xffffffff, 0x06000100,
614 	0x9868, 0xffffffff, 0x00000100,
615 	0x9510, 0xffffffff, 0x00000100,
616 	0xaf04, 0xffffffff, 0x00000100,
617 	0xae04, 0xffffffff, 0x00000100,
618 	0x949c, 0xffffffff, 0x00000100,
619 	0x802c, 0xffffffff, 0xe0000000,
620 	0x9160, 0xffffffff, 0x00010000,
621 	0x9164, 0xffffffff, 0x00030002,
622 	0x9168, 0xffffffff, 0x00040007,
623 	0x916c, 0xffffffff, 0x00060005,
624 	0x9170, 0xffffffff, 0x00090008,
625 	0x9174, 0xffffffff, 0x00020001,
626 	0x9178, 0xffffffff, 0x00040003,
627 	0x917c, 0xffffffff, 0x00000007,
628 	0x9180, 0xffffffff, 0x00060005,
629 	0x9184, 0xffffffff, 0x00090008,
630 	0x9188, 0xffffffff, 0x00030002,
631 	0x918c, 0xffffffff, 0x00050004,
632 	0x9190, 0xffffffff, 0x00000008,
633 	0x9194, 0xffffffff, 0x00070006,
634 	0x9198, 0xffffffff, 0x000a0009,
635 	0x919c, 0xffffffff, 0x00040003,
636 	0x91a0, 0xffffffff, 0x00060005,
637 	0x91a4, 0xffffffff, 0x00000009,
638 	0x91a8, 0xffffffff, 0x00080007,
639 	0x91ac, 0xffffffff, 0x000b000a,
640 	0x91b0, 0xffffffff, 0x00050004,
641 	0x91b4, 0xffffffff, 0x00070006,
642 	0x91b8, 0xffffffff, 0x0008000b,
643 	0x91bc, 0xffffffff, 0x000a0009,
644 	0x91c0, 0xffffffff, 0x000d000c,
645 	0x91c4, 0xffffffff, 0x00060005,
646 	0x91c8, 0xffffffff, 0x00080007,
647 	0x91cc, 0xffffffff, 0x0000000b,
648 	0x91d0, 0xffffffff, 0x000a0009,
649 	0x91d4, 0xffffffff, 0x000d000c,
650 	0x91d8, 0xffffffff, 0x00070006,
651 	0x91dc, 0xffffffff, 0x00090008,
652 	0x91e0, 0xffffffff, 0x0000000c,
653 	0x91e4, 0xffffffff, 0x000b000a,
654 	0x91e8, 0xffffffff, 0x000e000d,
655 	0x91ec, 0xffffffff, 0x00080007,
656 	0x91f0, 0xffffffff, 0x000a0009,
657 	0x91f4, 0xffffffff, 0x0000000d,
658 	0x91f8, 0xffffffff, 0x000c000b,
659 	0x91fc, 0xffffffff, 0x000f000e,
660 	0x9200, 0xffffffff, 0x00090008,
661 	0x9204, 0xffffffff, 0x000b000a,
662 	0x9208, 0xffffffff, 0x000c000f,
663 	0x920c, 0xffffffff, 0x000e000d,
664 	0x9210, 0xffffffff, 0x00110010,
665 	0x9214, 0xffffffff, 0x000a0009,
666 	0x9218, 0xffffffff, 0x000c000b,
667 	0x921c, 0xffffffff, 0x0000000f,
668 	0x9220, 0xffffffff, 0x000e000d,
669 	0x9224, 0xffffffff, 0x00110010,
670 	0x9228, 0xffffffff, 0x000b000a,
671 	0x922c, 0xffffffff, 0x000d000c,
672 	0x9230, 0xffffffff, 0x00000010,
673 	0x9234, 0xffffffff, 0x000f000e,
674 	0x9238, 0xffffffff, 0x00120011,
675 	0x923c, 0xffffffff, 0x000c000b,
676 	0x9240, 0xffffffff, 0x000e000d,
677 	0x9244, 0xffffffff, 0x00000011,
678 	0x9248, 0xffffffff, 0x0010000f,
679 	0x924c, 0xffffffff, 0x00130012,
680 	0x9250, 0xffffffff, 0x000d000c,
681 	0x9254, 0xffffffff, 0x000f000e,
682 	0x9258, 0xffffffff, 0x00100013,
683 	0x925c, 0xffffffff, 0x00120011,
684 	0x9260, 0xffffffff, 0x00150014,
685 	0x9264, 0xffffffff, 0x000e000d,
686 	0x9268, 0xffffffff, 0x0010000f,
687 	0x926c, 0xffffffff, 0x00000013,
688 	0x9270, 0xffffffff, 0x00120011,
689 	0x9274, 0xffffffff, 0x00150014,
690 	0x9278, 0xffffffff, 0x000f000e,
691 	0x927c, 0xffffffff, 0x00110010,
692 	0x9280, 0xffffffff, 0x00000014,
693 	0x9284, 0xffffffff, 0x00130012,
694 	0x9288, 0xffffffff, 0x00160015,
695 	0x928c, 0xffffffff, 0x0010000f,
696 	0x9290, 0xffffffff, 0x00120011,
697 	0x9294, 0xffffffff, 0x00000015,
698 	0x9298, 0xffffffff, 0x00140013,
699 	0x929c, 0xffffffff, 0x00170016,
700 	0x9150, 0xffffffff, 0x96940200,
701 	0x8708, 0xffffffff, 0x00900100,
702 	0xc478, 0xffffffff, 0x00000080,
703 	0xc404, 0xffffffff, 0x0020003f,
704 	0x30, 0xffffffff, 0x0000001c,
705 	0x34, 0x000f0000, 0x000f0000,
706 	0x160c, 0xffffffff, 0x00000100,
707 	0x1024, 0xffffffff, 0x00000100,
708 	0x102c, 0x00000101, 0x00000000,
709 	0x20a8, 0xffffffff, 0x00000104,
710 	0x264c, 0x000c0000, 0x000c0000,
711 	0x2648, 0x000c0000, 0x000c0000,
712 	0x55e4, 0xff000fff, 0x00000100,
713 	0x55e8, 0x00000001, 0x00000001,
714 	0x2f50, 0x00000001, 0x00000001,
715 	0x30cc, 0xc0000fff, 0x00000104,
716 	0xc1e4, 0x00000001, 0x00000001,
717 	0xd0c0, 0xfffffff0, 0x00000100,
718 	0xd8c0, 0xfffffff0, 0x00000100
719 };
720 
721 static const u32 pitcairn_mgcg_cgcg_init[] =
722 {
723 	0xc400, 0xffffffff, 0xfffffffc,
724 	0x802c, 0xffffffff, 0xe0000000,
725 	0x9a60, 0xffffffff, 0x00000100,
726 	0x92a4, 0xffffffff, 0x00000100,
727 	0xc164, 0xffffffff, 0x00000100,
728 	0x9774, 0xffffffff, 0x00000100,
729 	0x8984, 0xffffffff, 0x06000100,
730 	0x8a18, 0xffffffff, 0x00000100,
731 	0x92a0, 0xffffffff, 0x00000100,
732 	0xc380, 0xffffffff, 0x00000100,
733 	0x8b28, 0xffffffff, 0x00000100,
734 	0x9144, 0xffffffff, 0x00000100,
735 	0x8d88, 0xffffffff, 0x00000100,
736 	0x8d8c, 0xffffffff, 0x00000100,
737 	0x9030, 0xffffffff, 0x00000100,
738 	0x9034, 0xffffffff, 0x00000100,
739 	0x9038, 0xffffffff, 0x00000100,
740 	0x903c, 0xffffffff, 0x00000100,
741 	0xad80, 0xffffffff, 0x00000100,
742 	0xac54, 0xffffffff, 0x00000100,
743 	0x897c, 0xffffffff, 0x06000100,
744 	0x9868, 0xffffffff, 0x00000100,
745 	0x9510, 0xffffffff, 0x00000100,
746 	0xaf04, 0xffffffff, 0x00000100,
747 	0xae04, 0xffffffff, 0x00000100,
748 	0x949c, 0xffffffff, 0x00000100,
749 	0x802c, 0xffffffff, 0xe0000000,
750 	0x9160, 0xffffffff, 0x00010000,
751 	0x9164, 0xffffffff, 0x00030002,
752 	0x9168, 0xffffffff, 0x00040007,
753 	0x916c, 0xffffffff, 0x00060005,
754 	0x9170, 0xffffffff, 0x00090008,
755 	0x9174, 0xffffffff, 0x00020001,
756 	0x9178, 0xffffffff, 0x00040003,
757 	0x917c, 0xffffffff, 0x00000007,
758 	0x9180, 0xffffffff, 0x00060005,
759 	0x9184, 0xffffffff, 0x00090008,
760 	0x9188, 0xffffffff, 0x00030002,
761 	0x918c, 0xffffffff, 0x00050004,
762 	0x9190, 0xffffffff, 0x00000008,
763 	0x9194, 0xffffffff, 0x00070006,
764 	0x9198, 0xffffffff, 0x000a0009,
765 	0x919c, 0xffffffff, 0x00040003,
766 	0x91a0, 0xffffffff, 0x00060005,
767 	0x91a4, 0xffffffff, 0x00000009,
768 	0x91a8, 0xffffffff, 0x00080007,
769 	0x91ac, 0xffffffff, 0x000b000a,
770 	0x91b0, 0xffffffff, 0x00050004,
771 	0x91b4, 0xffffffff, 0x00070006,
772 	0x91b8, 0xffffffff, 0x0008000b,
773 	0x91bc, 0xffffffff, 0x000a0009,
774 	0x91c0, 0xffffffff, 0x000d000c,
775 	0x9200, 0xffffffff, 0x00090008,
776 	0x9204, 0xffffffff, 0x000b000a,
777 	0x9208, 0xffffffff, 0x000c000f,
778 	0x920c, 0xffffffff, 0x000e000d,
779 	0x9210, 0xffffffff, 0x00110010,
780 	0x9214, 0xffffffff, 0x000a0009,
781 	0x9218, 0xffffffff, 0x000c000b,
782 	0x921c, 0xffffffff, 0x0000000f,
783 	0x9220, 0xffffffff, 0x000e000d,
784 	0x9224, 0xffffffff, 0x00110010,
785 	0x9228, 0xffffffff, 0x000b000a,
786 	0x922c, 0xffffffff, 0x000d000c,
787 	0x9230, 0xffffffff, 0x00000010,
788 	0x9234, 0xffffffff, 0x000f000e,
789 	0x9238, 0xffffffff, 0x00120011,
790 	0x923c, 0xffffffff, 0x000c000b,
791 	0x9240, 0xffffffff, 0x000e000d,
792 	0x9244, 0xffffffff, 0x00000011,
793 	0x9248, 0xffffffff, 0x0010000f,
794 	0x924c, 0xffffffff, 0x00130012,
795 	0x9250, 0xffffffff, 0x000d000c,
796 	0x9254, 0xffffffff, 0x000f000e,
797 	0x9258, 0xffffffff, 0x00100013,
798 	0x925c, 0xffffffff, 0x00120011,
799 	0x9260, 0xffffffff, 0x00150014,
800 	0x9150, 0xffffffff, 0x96940200,
801 	0x8708, 0xffffffff, 0x00900100,
802 	0xc478, 0xffffffff, 0x00000080,
803 	0xc404, 0xffffffff, 0x0020003f,
804 	0x30, 0xffffffff, 0x0000001c,
805 	0x34, 0x000f0000, 0x000f0000,
806 	0x160c, 0xffffffff, 0x00000100,
807 	0x1024, 0xffffffff, 0x00000100,
808 	0x102c, 0x00000101, 0x00000000,
809 	0x20a8, 0xffffffff, 0x00000104,
810 	0x55e4, 0xff000fff, 0x00000100,
811 	0x55e8, 0x00000001, 0x00000001,
812 	0x2f50, 0x00000001, 0x00000001,
813 	0x30cc, 0xc0000fff, 0x00000104,
814 	0xc1e4, 0x00000001, 0x00000001,
815 	0xd0c0, 0xfffffff0, 0x00000100,
816 	0xd8c0, 0xfffffff0, 0x00000100
817 };
818 
819 static const u32 verde_mgcg_cgcg_init[] =
820 {
821 	0xc400, 0xffffffff, 0xfffffffc,
822 	0x802c, 0xffffffff, 0xe0000000,
823 	0x9a60, 0xffffffff, 0x00000100,
824 	0x92a4, 0xffffffff, 0x00000100,
825 	0xc164, 0xffffffff, 0x00000100,
826 	0x9774, 0xffffffff, 0x00000100,
827 	0x8984, 0xffffffff, 0x06000100,
828 	0x8a18, 0xffffffff, 0x00000100,
829 	0x92a0, 0xffffffff, 0x00000100,
830 	0xc380, 0xffffffff, 0x00000100,
831 	0x8b28, 0xffffffff, 0x00000100,
832 	0x9144, 0xffffffff, 0x00000100,
833 	0x8d88, 0xffffffff, 0x00000100,
834 	0x8d8c, 0xffffffff, 0x00000100,
835 	0x9030, 0xffffffff, 0x00000100,
836 	0x9034, 0xffffffff, 0x00000100,
837 	0x9038, 0xffffffff, 0x00000100,
838 	0x903c, 0xffffffff, 0x00000100,
839 	0xad80, 0xffffffff, 0x00000100,
840 	0xac54, 0xffffffff, 0x00000100,
841 	0x897c, 0xffffffff, 0x06000100,
842 	0x9868, 0xffffffff, 0x00000100,
843 	0x9510, 0xffffffff, 0x00000100,
844 	0xaf04, 0xffffffff, 0x00000100,
845 	0xae04, 0xffffffff, 0x00000100,
846 	0x949c, 0xffffffff, 0x00000100,
847 	0x802c, 0xffffffff, 0xe0000000,
848 	0x9160, 0xffffffff, 0x00010000,
849 	0x9164, 0xffffffff, 0x00030002,
850 	0x9168, 0xffffffff, 0x00040007,
851 	0x916c, 0xffffffff, 0x00060005,
852 	0x9170, 0xffffffff, 0x00090008,
853 	0x9174, 0xffffffff, 0x00020001,
854 	0x9178, 0xffffffff, 0x00040003,
855 	0x917c, 0xffffffff, 0x00000007,
856 	0x9180, 0xffffffff, 0x00060005,
857 	0x9184, 0xffffffff, 0x00090008,
858 	0x9188, 0xffffffff, 0x00030002,
859 	0x918c, 0xffffffff, 0x00050004,
860 	0x9190, 0xffffffff, 0x00000008,
861 	0x9194, 0xffffffff, 0x00070006,
862 	0x9198, 0xffffffff, 0x000a0009,
863 	0x919c, 0xffffffff, 0x00040003,
864 	0x91a0, 0xffffffff, 0x00060005,
865 	0x91a4, 0xffffffff, 0x00000009,
866 	0x91a8, 0xffffffff, 0x00080007,
867 	0x91ac, 0xffffffff, 0x000b000a,
868 	0x91b0, 0xffffffff, 0x00050004,
869 	0x91b4, 0xffffffff, 0x00070006,
870 	0x91b8, 0xffffffff, 0x0008000b,
871 	0x91bc, 0xffffffff, 0x000a0009,
872 	0x91c0, 0xffffffff, 0x000d000c,
873 	0x9200, 0xffffffff, 0x00090008,
874 	0x9204, 0xffffffff, 0x000b000a,
875 	0x9208, 0xffffffff, 0x000c000f,
876 	0x920c, 0xffffffff, 0x000e000d,
877 	0x9210, 0xffffffff, 0x00110010,
878 	0x9214, 0xffffffff, 0x000a0009,
879 	0x9218, 0xffffffff, 0x000c000b,
880 	0x921c, 0xffffffff, 0x0000000f,
881 	0x9220, 0xffffffff, 0x000e000d,
882 	0x9224, 0xffffffff, 0x00110010,
883 	0x9228, 0xffffffff, 0x000b000a,
884 	0x922c, 0xffffffff, 0x000d000c,
885 	0x9230, 0xffffffff, 0x00000010,
886 	0x9234, 0xffffffff, 0x000f000e,
887 	0x9238, 0xffffffff, 0x00120011,
888 	0x923c, 0xffffffff, 0x000c000b,
889 	0x9240, 0xffffffff, 0x000e000d,
890 	0x9244, 0xffffffff, 0x00000011,
891 	0x9248, 0xffffffff, 0x0010000f,
892 	0x924c, 0xffffffff, 0x00130012,
893 	0x9250, 0xffffffff, 0x000d000c,
894 	0x9254, 0xffffffff, 0x000f000e,
895 	0x9258, 0xffffffff, 0x00100013,
896 	0x925c, 0xffffffff, 0x00120011,
897 	0x9260, 0xffffffff, 0x00150014,
898 	0x9150, 0xffffffff, 0x96940200,
899 	0x8708, 0xffffffff, 0x00900100,
900 	0xc478, 0xffffffff, 0x00000080,
901 	0xc404, 0xffffffff, 0x0020003f,
902 	0x30, 0xffffffff, 0x0000001c,
903 	0x34, 0x000f0000, 0x000f0000,
904 	0x160c, 0xffffffff, 0x00000100,
905 	0x1024, 0xffffffff, 0x00000100,
906 	0x102c, 0x00000101, 0x00000000,
907 	0x20a8, 0xffffffff, 0x00000104,
908 	0x264c, 0x000c0000, 0x000c0000,
909 	0x2648, 0x000c0000, 0x000c0000,
910 	0x55e4, 0xff000fff, 0x00000100,
911 	0x55e8, 0x00000001, 0x00000001,
912 	0x2f50, 0x00000001, 0x00000001,
913 	0x30cc, 0xc0000fff, 0x00000104,
914 	0xc1e4, 0x00000001, 0x00000001,
915 	0xd0c0, 0xfffffff0, 0x00000100,
916 	0xd8c0, 0xfffffff0, 0x00000100
917 };
918 
919 static const u32 oland_mgcg_cgcg_init[] =
920 {
921 	0xc400, 0xffffffff, 0xfffffffc,
922 	0x802c, 0xffffffff, 0xe0000000,
923 	0x9a60, 0xffffffff, 0x00000100,
924 	0x92a4, 0xffffffff, 0x00000100,
925 	0xc164, 0xffffffff, 0x00000100,
926 	0x9774, 0xffffffff, 0x00000100,
927 	0x8984, 0xffffffff, 0x06000100,
928 	0x8a18, 0xffffffff, 0x00000100,
929 	0x92a0, 0xffffffff, 0x00000100,
930 	0xc380, 0xffffffff, 0x00000100,
931 	0x8b28, 0xffffffff, 0x00000100,
932 	0x9144, 0xffffffff, 0x00000100,
933 	0x8d88, 0xffffffff, 0x00000100,
934 	0x8d8c, 0xffffffff, 0x00000100,
935 	0x9030, 0xffffffff, 0x00000100,
936 	0x9034, 0xffffffff, 0x00000100,
937 	0x9038, 0xffffffff, 0x00000100,
938 	0x903c, 0xffffffff, 0x00000100,
939 	0xad80, 0xffffffff, 0x00000100,
940 	0xac54, 0xffffffff, 0x00000100,
941 	0x897c, 0xffffffff, 0x06000100,
942 	0x9868, 0xffffffff, 0x00000100,
943 	0x9510, 0xffffffff, 0x00000100,
944 	0xaf04, 0xffffffff, 0x00000100,
945 	0xae04, 0xffffffff, 0x00000100,
946 	0x949c, 0xffffffff, 0x00000100,
947 	0x802c, 0xffffffff, 0xe0000000,
948 	0x9160, 0xffffffff, 0x00010000,
949 	0x9164, 0xffffffff, 0x00030002,
950 	0x9168, 0xffffffff, 0x00040007,
951 	0x916c, 0xffffffff, 0x00060005,
952 	0x9170, 0xffffffff, 0x00090008,
953 	0x9174, 0xffffffff, 0x00020001,
954 	0x9178, 0xffffffff, 0x00040003,
955 	0x917c, 0xffffffff, 0x00000007,
956 	0x9180, 0xffffffff, 0x00060005,
957 	0x9184, 0xffffffff, 0x00090008,
958 	0x9188, 0xffffffff, 0x00030002,
959 	0x918c, 0xffffffff, 0x00050004,
960 	0x9190, 0xffffffff, 0x00000008,
961 	0x9194, 0xffffffff, 0x00070006,
962 	0x9198, 0xffffffff, 0x000a0009,
963 	0x919c, 0xffffffff, 0x00040003,
964 	0x91a0, 0xffffffff, 0x00060005,
965 	0x91a4, 0xffffffff, 0x00000009,
966 	0x91a8, 0xffffffff, 0x00080007,
967 	0x91ac, 0xffffffff, 0x000b000a,
968 	0x91b0, 0xffffffff, 0x00050004,
969 	0x91b4, 0xffffffff, 0x00070006,
970 	0x91b8, 0xffffffff, 0x0008000b,
971 	0x91bc, 0xffffffff, 0x000a0009,
972 	0x91c0, 0xffffffff, 0x000d000c,
973 	0x91c4, 0xffffffff, 0x00060005,
974 	0x91c8, 0xffffffff, 0x00080007,
975 	0x91cc, 0xffffffff, 0x0000000b,
976 	0x91d0, 0xffffffff, 0x000a0009,
977 	0x91d4, 0xffffffff, 0x000d000c,
978 	0x9150, 0xffffffff, 0x96940200,
979 	0x8708, 0xffffffff, 0x00900100,
980 	0xc478, 0xffffffff, 0x00000080,
981 	0xc404, 0xffffffff, 0x0020003f,
982 	0x30, 0xffffffff, 0x0000001c,
983 	0x34, 0x000f0000, 0x000f0000,
984 	0x160c, 0xffffffff, 0x00000100,
985 	0x1024, 0xffffffff, 0x00000100,
986 	0x102c, 0x00000101, 0x00000000,
987 	0x20a8, 0xffffffff, 0x00000104,
988 	0x264c, 0x000c0000, 0x000c0000,
989 	0x2648, 0x000c0000, 0x000c0000,
990 	0x55e4, 0xff000fff, 0x00000100,
991 	0x55e8, 0x00000001, 0x00000001,
992 	0x2f50, 0x00000001, 0x00000001,
993 	0x30cc, 0xc0000fff, 0x00000104,
994 	0xc1e4, 0x00000001, 0x00000001,
995 	0xd0c0, 0xfffffff0, 0x00000100,
996 	0xd8c0, 0xfffffff0, 0x00000100
997 };
998 
999 static const u32 hainan_mgcg_cgcg_init[] =
1000 {
1001 	0xc400, 0xffffffff, 0xfffffffc,
1002 	0x802c, 0xffffffff, 0xe0000000,
1003 	0x9a60, 0xffffffff, 0x00000100,
1004 	0x92a4, 0xffffffff, 0x00000100,
1005 	0xc164, 0xffffffff, 0x00000100,
1006 	0x9774, 0xffffffff, 0x00000100,
1007 	0x8984, 0xffffffff, 0x06000100,
1008 	0x8a18, 0xffffffff, 0x00000100,
1009 	0x92a0, 0xffffffff, 0x00000100,
1010 	0xc380, 0xffffffff, 0x00000100,
1011 	0x8b28, 0xffffffff, 0x00000100,
1012 	0x9144, 0xffffffff, 0x00000100,
1013 	0x8d88, 0xffffffff, 0x00000100,
1014 	0x8d8c, 0xffffffff, 0x00000100,
1015 	0x9030, 0xffffffff, 0x00000100,
1016 	0x9034, 0xffffffff, 0x00000100,
1017 	0x9038, 0xffffffff, 0x00000100,
1018 	0x903c, 0xffffffff, 0x00000100,
1019 	0xad80, 0xffffffff, 0x00000100,
1020 	0xac54, 0xffffffff, 0x00000100,
1021 	0x897c, 0xffffffff, 0x06000100,
1022 	0x9868, 0xffffffff, 0x00000100,
1023 	0x9510, 0xffffffff, 0x00000100,
1024 	0xaf04, 0xffffffff, 0x00000100,
1025 	0xae04, 0xffffffff, 0x00000100,
1026 	0x949c, 0xffffffff, 0x00000100,
1027 	0x802c, 0xffffffff, 0xe0000000,
1028 	0x9160, 0xffffffff, 0x00010000,
1029 	0x9164, 0xffffffff, 0x00030002,
1030 	0x9168, 0xffffffff, 0x00040007,
1031 	0x916c, 0xffffffff, 0x00060005,
1032 	0x9170, 0xffffffff, 0x00090008,
1033 	0x9174, 0xffffffff, 0x00020001,
1034 	0x9178, 0xffffffff, 0x00040003,
1035 	0x917c, 0xffffffff, 0x00000007,
1036 	0x9180, 0xffffffff, 0x00060005,
1037 	0x9184, 0xffffffff, 0x00090008,
1038 	0x9188, 0xffffffff, 0x00030002,
1039 	0x918c, 0xffffffff, 0x00050004,
1040 	0x9190, 0xffffffff, 0x00000008,
1041 	0x9194, 0xffffffff, 0x00070006,
1042 	0x9198, 0xffffffff, 0x000a0009,
1043 	0x919c, 0xffffffff, 0x00040003,
1044 	0x91a0, 0xffffffff, 0x00060005,
1045 	0x91a4, 0xffffffff, 0x00000009,
1046 	0x91a8, 0xffffffff, 0x00080007,
1047 	0x91ac, 0xffffffff, 0x000b000a,
1048 	0x91b0, 0xffffffff, 0x00050004,
1049 	0x91b4, 0xffffffff, 0x00070006,
1050 	0x91b8, 0xffffffff, 0x0008000b,
1051 	0x91bc, 0xffffffff, 0x000a0009,
1052 	0x91c0, 0xffffffff, 0x000d000c,
1053 	0x91c4, 0xffffffff, 0x00060005,
1054 	0x91c8, 0xffffffff, 0x00080007,
1055 	0x91cc, 0xffffffff, 0x0000000b,
1056 	0x91d0, 0xffffffff, 0x000a0009,
1057 	0x91d4, 0xffffffff, 0x000d000c,
1058 	0x9150, 0xffffffff, 0x96940200,
1059 	0x8708, 0xffffffff, 0x00900100,
1060 	0xc478, 0xffffffff, 0x00000080,
1061 	0xc404, 0xffffffff, 0x0020003f,
1062 	0x30, 0xffffffff, 0x0000001c,
1063 	0x34, 0x000f0000, 0x000f0000,
1064 	0x160c, 0xffffffff, 0x00000100,
1065 	0x1024, 0xffffffff, 0x00000100,
1066 	0x20a8, 0xffffffff, 0x00000104,
1067 	0x264c, 0x000c0000, 0x000c0000,
1068 	0x2648, 0x000c0000, 0x000c0000,
1069 	0x2f50, 0x00000001, 0x00000001,
1070 	0x30cc, 0xc0000fff, 0x00000104,
1071 	0xc1e4, 0x00000001, 0x00000001,
1072 	0xd0c0, 0xfffffff0, 0x00000100,
1073 	0xd8c0, 0xfffffff0, 0x00000100
1074 };
1075 
1076 static u32 verde_pg_init[] =
1077 {
1078 	0x353c, 0xffffffff, 0x40000,
1079 	0x3538, 0xffffffff, 0x200010ff,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x7007,
1086 	0x3538, 0xffffffff, 0x300010ff,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x400000,
1093 	0x3538, 0xffffffff, 0x100010ff,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x120200,
1100 	0x3538, 0xffffffff, 0x500010ff,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x1e1e16,
1107 	0x3538, 0xffffffff, 0x600010ff,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x171f1e,
1114 	0x3538, 0xffffffff, 0x700010ff,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x3538, 0xffffffff, 0x9ff,
1122 	0x3500, 0xffffffff, 0x0,
1123 	0x3504, 0xffffffff, 0x10000800,
1124 	0x3504, 0xffffffff, 0xf,
1125 	0x3504, 0xffffffff, 0xf,
1126 	0x3500, 0xffffffff, 0x4,
1127 	0x3504, 0xffffffff, 0x1000051e,
1128 	0x3504, 0xffffffff, 0xffff,
1129 	0x3504, 0xffffffff, 0xffff,
1130 	0x3500, 0xffffffff, 0x8,
1131 	0x3504, 0xffffffff, 0x80500,
1132 	0x3500, 0xffffffff, 0x12,
1133 	0x3504, 0xffffffff, 0x9050c,
1134 	0x3500, 0xffffffff, 0x1d,
1135 	0x3504, 0xffffffff, 0xb052c,
1136 	0x3500, 0xffffffff, 0x2a,
1137 	0x3504, 0xffffffff, 0x1053e,
1138 	0x3500, 0xffffffff, 0x2d,
1139 	0x3504, 0xffffffff, 0x10546,
1140 	0x3500, 0xffffffff, 0x30,
1141 	0x3504, 0xffffffff, 0xa054e,
1142 	0x3500, 0xffffffff, 0x3c,
1143 	0x3504, 0xffffffff, 0x1055f,
1144 	0x3500, 0xffffffff, 0x3f,
1145 	0x3504, 0xffffffff, 0x10567,
1146 	0x3500, 0xffffffff, 0x42,
1147 	0x3504, 0xffffffff, 0x1056f,
1148 	0x3500, 0xffffffff, 0x45,
1149 	0x3504, 0xffffffff, 0x10572,
1150 	0x3500, 0xffffffff, 0x48,
1151 	0x3504, 0xffffffff, 0x20575,
1152 	0x3500, 0xffffffff, 0x4c,
1153 	0x3504, 0xffffffff, 0x190801,
1154 	0x3500, 0xffffffff, 0x67,
1155 	0x3504, 0xffffffff, 0x1082a,
1156 	0x3500, 0xffffffff, 0x6a,
1157 	0x3504, 0xffffffff, 0x1b082d,
1158 	0x3500, 0xffffffff, 0x87,
1159 	0x3504, 0xffffffff, 0x310851,
1160 	0x3500, 0xffffffff, 0xba,
1161 	0x3504, 0xffffffff, 0x891,
1162 	0x3500, 0xffffffff, 0xbc,
1163 	0x3504, 0xffffffff, 0x893,
1164 	0x3500, 0xffffffff, 0xbe,
1165 	0x3504, 0xffffffff, 0x20895,
1166 	0x3500, 0xffffffff, 0xc2,
1167 	0x3504, 0xffffffff, 0x20899,
1168 	0x3500, 0xffffffff, 0xc6,
1169 	0x3504, 0xffffffff, 0x2089d,
1170 	0x3500, 0xffffffff, 0xca,
1171 	0x3504, 0xffffffff, 0x8a1,
1172 	0x3500, 0xffffffff, 0xcc,
1173 	0x3504, 0xffffffff, 0x8a3,
1174 	0x3500, 0xffffffff, 0xce,
1175 	0x3504, 0xffffffff, 0x308a5,
1176 	0x3500, 0xffffffff, 0xd3,
1177 	0x3504, 0xffffffff, 0x6d08cd,
1178 	0x3500, 0xffffffff, 0x142,
1179 	0x3504, 0xffffffff, 0x2000095a,
1180 	0x3504, 0xffffffff, 0x1,
1181 	0x3500, 0xffffffff, 0x144,
1182 	0x3504, 0xffffffff, 0x301f095b,
1183 	0x3500, 0xffffffff, 0x165,
1184 	0x3504, 0xffffffff, 0xc094d,
1185 	0x3500, 0xffffffff, 0x173,
1186 	0x3504, 0xffffffff, 0xf096d,
1187 	0x3500, 0xffffffff, 0x184,
1188 	0x3504, 0xffffffff, 0x15097f,
1189 	0x3500, 0xffffffff, 0x19b,
1190 	0x3504, 0xffffffff, 0xc0998,
1191 	0x3500, 0xffffffff, 0x1a9,
1192 	0x3504, 0xffffffff, 0x409a7,
1193 	0x3500, 0xffffffff, 0x1af,
1194 	0x3504, 0xffffffff, 0xcdc,
1195 	0x3500, 0xffffffff, 0x1b1,
1196 	0x3504, 0xffffffff, 0x800,
1197 	0x3508, 0xffffffff, 0x6c9b2000,
1198 	0x3510, 0xfc00, 0x2000,
1199 	0x3544, 0xffffffff, 0xfc0,
1200 	0x28d4, 0x00000100, 0x100
1201 };
1202 
1203 static void si_init_golden_registers(struct radeon_device *rdev)
1204 {
1205 	switch (rdev->family) {
1206 	case CHIP_TAHITI:
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_golden_registers,
1209 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 tahiti_golden_rlc_registers,
1212 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 tahiti_mgcg_cgcg_init,
1215 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1216 		radeon_program_register_sequence(rdev,
1217 						 tahiti_golden_registers2,
1218 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1219 		break;
1220 	case CHIP_PITCAIRN:
1221 		radeon_program_register_sequence(rdev,
1222 						 pitcairn_golden_registers,
1223 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 pitcairn_golden_rlc_registers,
1226 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1227 		radeon_program_register_sequence(rdev,
1228 						 pitcairn_mgcg_cgcg_init,
1229 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1230 		break;
1231 	case CHIP_VERDE:
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_golden_registers,
1234 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 verde_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 verde_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1241 		radeon_program_register_sequence(rdev,
1242 						 verde_pg_init,
1243 						 (const u32)ARRAY_SIZE(verde_pg_init));
1244 		break;
1245 	case CHIP_OLAND:
1246 		radeon_program_register_sequence(rdev,
1247 						 oland_golden_registers,
1248 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 oland_golden_rlc_registers,
1251 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1252 		radeon_program_register_sequence(rdev,
1253 						 oland_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1255 		break;
1256 	case CHIP_HAINAN:
1257 		radeon_program_register_sequence(rdev,
1258 						 hainan_golden_registers,
1259 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1260 		radeon_program_register_sequence(rdev,
1261 						 hainan_golden_registers2,
1262 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1263 		radeon_program_register_sequence(rdev,
1264 						 hainan_mgcg_cgcg_init,
1265 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1266 		break;
1267 	default:
1268 		break;
1269 	}
1270 }
1271 
1272 /**
1273  * si_get_allowed_info_register - fetch the register for the info ioctl
1274  *
1275  * @rdev: radeon_device pointer
1276  * @reg: register offset in bytes
1277  * @val: register value
1278  *
1279  * Returns 0 for success or -EINVAL for an invalid register
1280  *
1281  */
1282 int si_get_allowed_info_register(struct radeon_device *rdev,
1283 				 u32 reg, u32 *val)
1284 {
1285 	switch (reg) {
1286 	case GRBM_STATUS:
1287 	case GRBM_STATUS2:
1288 	case GRBM_STATUS_SE0:
1289 	case GRBM_STATUS_SE1:
1290 	case SRBM_STATUS:
1291 	case SRBM_STATUS2:
1292 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1293 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1294 	case UVD_STATUS:
1295 		*val = RREG32(reg);
1296 		return 0;
1297 	default:
1298 		return -EINVAL;
1299 	}
1300 }
1301 
1302 #define PCIE_BUS_CLK                10000
1303 #define TCLK                        (PCIE_BUS_CLK / 10)
1304 
1305 /**
1306  * si_get_xclk - get the xclk
1307  *
1308  * @rdev: radeon_device pointer
1309  *
1310  * Returns the reference clock used by the gfx engine
1311  * (SI).
1312  */
1313 u32 si_get_xclk(struct radeon_device *rdev)
1314 {
1315 	u32 reference_clock = rdev->clock.spll.reference_freq;
1316 	u32 tmp;
1317 
1318 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1319 	if (tmp & MUX_TCLK_TO_XCLK)
1320 		return TCLK;
1321 
1322 	tmp = RREG32(CG_CLKPIN_CNTL);
1323 	if (tmp & XTALIN_DIVIDE)
1324 		return reference_clock / 4;
1325 
1326 	return reference_clock;
1327 }
1328 
1329 /* get temperature in millidegrees */
1330 int si_get_temp(struct radeon_device *rdev)
1331 {
1332 	u32 temp;
1333 	int actual_temp = 0;
1334 
1335 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1336 		CTF_TEMP_SHIFT;
1337 
1338 	if (temp & 0x200)
1339 		actual_temp = 255;
1340 	else
1341 		actual_temp = temp & 0x1ff;
1342 
1343 	actual_temp = (actual_temp * 1000);
1344 
1345 	return actual_temp;
1346 }
1347 
1348 #define TAHITI_IO_MC_REGS_SIZE 36
1349 
1350 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351 	{0x0000006f, 0x03044000},
1352 	{0x00000070, 0x0480c018},
1353 	{0x00000071, 0x00000040},
1354 	{0x00000072, 0x01000000},
1355 	{0x00000074, 0x000000ff},
1356 	{0x00000075, 0x00143400},
1357 	{0x00000076, 0x08ec0800},
1358 	{0x00000077, 0x040000cc},
1359 	{0x00000079, 0x00000000},
1360 	{0x0000007a, 0x21000409},
1361 	{0x0000007c, 0x00000000},
1362 	{0x0000007d, 0xe8000000},
1363 	{0x0000007e, 0x044408a8},
1364 	{0x0000007f, 0x00000003},
1365 	{0x00000080, 0x00000000},
1366 	{0x00000081, 0x01000000},
1367 	{0x00000082, 0x02000000},
1368 	{0x00000083, 0x00000000},
1369 	{0x00000084, 0xe3f3e4f4},
1370 	{0x00000085, 0x00052024},
1371 	{0x00000087, 0x00000000},
1372 	{0x00000088, 0x66036603},
1373 	{0x00000089, 0x01000000},
1374 	{0x0000008b, 0x1c0a0000},
1375 	{0x0000008c, 0xff010000},
1376 	{0x0000008e, 0xffffefff},
1377 	{0x0000008f, 0xfff3efff},
1378 	{0x00000090, 0xfff3efbf},
1379 	{0x00000094, 0x00101101},
1380 	{0x00000095, 0x00000fff},
1381 	{0x00000096, 0x00116fff},
1382 	{0x00000097, 0x60010000},
1383 	{0x00000098, 0x10010000},
1384 	{0x00000099, 0x00006000},
1385 	{0x0000009a, 0x00001000},
1386 	{0x0000009f, 0x00a77400}
1387 };
1388 
1389 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390 	{0x0000006f, 0x03044000},
1391 	{0x00000070, 0x0480c018},
1392 	{0x00000071, 0x00000040},
1393 	{0x00000072, 0x01000000},
1394 	{0x00000074, 0x000000ff},
1395 	{0x00000075, 0x00143400},
1396 	{0x00000076, 0x08ec0800},
1397 	{0x00000077, 0x040000cc},
1398 	{0x00000079, 0x00000000},
1399 	{0x0000007a, 0x21000409},
1400 	{0x0000007c, 0x00000000},
1401 	{0x0000007d, 0xe8000000},
1402 	{0x0000007e, 0x044408a8},
1403 	{0x0000007f, 0x00000003},
1404 	{0x00000080, 0x00000000},
1405 	{0x00000081, 0x01000000},
1406 	{0x00000082, 0x02000000},
1407 	{0x00000083, 0x00000000},
1408 	{0x00000084, 0xe3f3e4f4},
1409 	{0x00000085, 0x00052024},
1410 	{0x00000087, 0x00000000},
1411 	{0x00000088, 0x66036603},
1412 	{0x00000089, 0x01000000},
1413 	{0x0000008b, 0x1c0a0000},
1414 	{0x0000008c, 0xff010000},
1415 	{0x0000008e, 0xffffefff},
1416 	{0x0000008f, 0xfff3efff},
1417 	{0x00000090, 0xfff3efbf},
1418 	{0x00000094, 0x00101101},
1419 	{0x00000095, 0x00000fff},
1420 	{0x00000096, 0x00116fff},
1421 	{0x00000097, 0x60010000},
1422 	{0x00000098, 0x10010000},
1423 	{0x00000099, 0x00006000},
1424 	{0x0000009a, 0x00001000},
1425 	{0x0000009f, 0x00a47400}
1426 };
1427 
1428 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429 	{0x0000006f, 0x03044000},
1430 	{0x00000070, 0x0480c018},
1431 	{0x00000071, 0x00000040},
1432 	{0x00000072, 0x01000000},
1433 	{0x00000074, 0x000000ff},
1434 	{0x00000075, 0x00143400},
1435 	{0x00000076, 0x08ec0800},
1436 	{0x00000077, 0x040000cc},
1437 	{0x00000079, 0x00000000},
1438 	{0x0000007a, 0x21000409},
1439 	{0x0000007c, 0x00000000},
1440 	{0x0000007d, 0xe8000000},
1441 	{0x0000007e, 0x044408a8},
1442 	{0x0000007f, 0x00000003},
1443 	{0x00000080, 0x00000000},
1444 	{0x00000081, 0x01000000},
1445 	{0x00000082, 0x02000000},
1446 	{0x00000083, 0x00000000},
1447 	{0x00000084, 0xe3f3e4f4},
1448 	{0x00000085, 0x00052024},
1449 	{0x00000087, 0x00000000},
1450 	{0x00000088, 0x66036603},
1451 	{0x00000089, 0x01000000},
1452 	{0x0000008b, 0x1c0a0000},
1453 	{0x0000008c, 0xff010000},
1454 	{0x0000008e, 0xffffefff},
1455 	{0x0000008f, 0xfff3efff},
1456 	{0x00000090, 0xfff3efbf},
1457 	{0x00000094, 0x00101101},
1458 	{0x00000095, 0x00000fff},
1459 	{0x00000096, 0x00116fff},
1460 	{0x00000097, 0x60010000},
1461 	{0x00000098, 0x10010000},
1462 	{0x00000099, 0x00006000},
1463 	{0x0000009a, 0x00001000},
1464 	{0x0000009f, 0x00a37400}
1465 };
1466 
1467 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1468 	{0x0000006f, 0x03044000},
1469 	{0x00000070, 0x0480c018},
1470 	{0x00000071, 0x00000040},
1471 	{0x00000072, 0x01000000},
1472 	{0x00000074, 0x000000ff},
1473 	{0x00000075, 0x00143400},
1474 	{0x00000076, 0x08ec0800},
1475 	{0x00000077, 0x040000cc},
1476 	{0x00000079, 0x00000000},
1477 	{0x0000007a, 0x21000409},
1478 	{0x0000007c, 0x00000000},
1479 	{0x0000007d, 0xe8000000},
1480 	{0x0000007e, 0x044408a8},
1481 	{0x0000007f, 0x00000003},
1482 	{0x00000080, 0x00000000},
1483 	{0x00000081, 0x01000000},
1484 	{0x00000082, 0x02000000},
1485 	{0x00000083, 0x00000000},
1486 	{0x00000084, 0xe3f3e4f4},
1487 	{0x00000085, 0x00052024},
1488 	{0x00000087, 0x00000000},
1489 	{0x00000088, 0x66036603},
1490 	{0x00000089, 0x01000000},
1491 	{0x0000008b, 0x1c0a0000},
1492 	{0x0000008c, 0xff010000},
1493 	{0x0000008e, 0xffffefff},
1494 	{0x0000008f, 0xfff3efff},
1495 	{0x00000090, 0xfff3efbf},
1496 	{0x00000094, 0x00101101},
1497 	{0x00000095, 0x00000fff},
1498 	{0x00000096, 0x00116fff},
1499 	{0x00000097, 0x60010000},
1500 	{0x00000098, 0x10010000},
1501 	{0x00000099, 0x00006000},
1502 	{0x0000009a, 0x00001000},
1503 	{0x0000009f, 0x00a17730}
1504 };
1505 
1506 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1507 	{0x0000006f, 0x03044000},
1508 	{0x00000070, 0x0480c018},
1509 	{0x00000071, 0x00000040},
1510 	{0x00000072, 0x01000000},
1511 	{0x00000074, 0x000000ff},
1512 	{0x00000075, 0x00143400},
1513 	{0x00000076, 0x08ec0800},
1514 	{0x00000077, 0x040000cc},
1515 	{0x00000079, 0x00000000},
1516 	{0x0000007a, 0x21000409},
1517 	{0x0000007c, 0x00000000},
1518 	{0x0000007d, 0xe8000000},
1519 	{0x0000007e, 0x044408a8},
1520 	{0x0000007f, 0x00000003},
1521 	{0x00000080, 0x00000000},
1522 	{0x00000081, 0x01000000},
1523 	{0x00000082, 0x02000000},
1524 	{0x00000083, 0x00000000},
1525 	{0x00000084, 0xe3f3e4f4},
1526 	{0x00000085, 0x00052024},
1527 	{0x00000087, 0x00000000},
1528 	{0x00000088, 0x66036603},
1529 	{0x00000089, 0x01000000},
1530 	{0x0000008b, 0x1c0a0000},
1531 	{0x0000008c, 0xff010000},
1532 	{0x0000008e, 0xffffefff},
1533 	{0x0000008f, 0xfff3efff},
1534 	{0x00000090, 0xfff3efbf},
1535 	{0x00000094, 0x00101101},
1536 	{0x00000095, 0x00000fff},
1537 	{0x00000096, 0x00116fff},
1538 	{0x00000097, 0x60010000},
1539 	{0x00000098, 0x10010000},
1540 	{0x00000099, 0x00006000},
1541 	{0x0000009a, 0x00001000},
1542 	{0x0000009f, 0x00a07730}
1543 };
1544 
1545 /* ucode loading */
1546 int si_mc_load_microcode(struct radeon_device *rdev)
1547 {
1548 	const __be32 *fw_data = NULL;
1549 	const __le32 *new_fw_data = NULL;
1550 	u32 running;
1551 	u32 *io_mc_regs = NULL;
1552 	const __le32 *new_io_mc_regs = NULL;
1553 	int i, regs_size, ucode_size;
1554 
1555 	if (!rdev->mc_fw)
1556 		return -EINVAL;
1557 
1558 	if (rdev->new_fw) {
1559 		const struct mc_firmware_header_v1_0 *hdr =
1560 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1561 
1562 		radeon_ucode_print_mc_hdr(&hdr->header);
1563 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1564 		new_io_mc_regs = (const __le32 *)
1565 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1566 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1567 		new_fw_data = (const __le32 *)
1568 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1569 	} else {
1570 		ucode_size = rdev->mc_fw->size / 4;
1571 
1572 		switch (rdev->family) {
1573 		case CHIP_TAHITI:
1574 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1575 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1576 			break;
1577 		case CHIP_PITCAIRN:
1578 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1579 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580 			break;
1581 		case CHIP_VERDE:
1582 		default:
1583 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1584 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1585 			break;
1586 		case CHIP_OLAND:
1587 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1588 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1589 			break;
1590 		case CHIP_HAINAN:
1591 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1592 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1593 			break;
1594 		}
1595 		fw_data = (const __be32 *)rdev->mc_fw->data;
1596 	}
1597 
1598 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1599 
1600 	if (running == 0) {
1601 		/* reset the engine and set to writable */
1602 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1603 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1604 
1605 		/* load mc io regs */
1606 		for (i = 0; i < regs_size; i++) {
1607 			if (rdev->new_fw) {
1608 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1609 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1610 			} else {
1611 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1612 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1613 			}
1614 		}
1615 		/* load the MC ucode */
1616 		for (i = 0; i < ucode_size; i++) {
1617 			if (rdev->new_fw)
1618 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1619 			else
1620 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1621 		}
1622 
1623 		/* put the engine back into the active state */
1624 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1625 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1627 
1628 		/* wait for training to complete */
1629 		for (i = 0; i < rdev->usec_timeout; i++) {
1630 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1631 				break;
1632 			udelay(1);
1633 		}
1634 		for (i = 0; i < rdev->usec_timeout; i++) {
1635 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1636 				break;
1637 			udelay(1);
1638 		}
1639 	}
1640 
1641 	return 0;
1642 }
1643 
1644 static int si_init_microcode(struct radeon_device *rdev)
1645 {
1646 	const char *chip_name;
1647 	const char *new_chip_name;
1648 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1649 	size_t smc_req_size, mc2_req_size;
1650 	char fw_name[30];
1651 	int err;
1652 	int new_fw = 0;
1653 	bool new_smc = false;
1654 
1655 	DRM_DEBUG("\n");
1656 
1657 	switch (rdev->family) {
1658 	case CHIP_TAHITI:
1659 		chip_name = "TAHITI";
1660 		/* XXX: figure out which Tahitis need the new ucode */
1661 		if (0)
1662 			new_smc = true;
1663 		new_chip_name = "tahiti";
1664 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1666 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1667 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1669 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1670 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1671 		break;
1672 	case CHIP_PITCAIRN:
1673 		chip_name = "PITCAIRN";
1674 		if ((rdev->pdev->revision == 0x81) ||
1675 		    (rdev->pdev->device == 0x6810) ||
1676 		    (rdev->pdev->device == 0x6811) ||
1677 		    (rdev->pdev->device == 0x6816) ||
1678 		    (rdev->pdev->device == 0x6817) ||
1679 		    (rdev->pdev->device == 0x6806))
1680 			new_smc = true;
1681 		new_chip_name = "pitcairn";
1682 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1683 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1684 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1685 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1686 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1687 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1688 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1689 		break;
1690 	case CHIP_VERDE:
1691 		chip_name = "VERDE";
1692 		if ((rdev->pdev->revision == 0x81) ||
1693 		    (rdev->pdev->revision == 0x83) ||
1694 		    (rdev->pdev->revision == 0x87) ||
1695 		    (rdev->pdev->device == 0x6820) ||
1696 		    (rdev->pdev->device == 0x6821) ||
1697 		    (rdev->pdev->device == 0x6822) ||
1698 		    (rdev->pdev->device == 0x6823) ||
1699 		    (rdev->pdev->device == 0x682A) ||
1700 		    (rdev->pdev->device == 0x682B))
1701 			new_smc = true;
1702 		new_chip_name = "verde";
1703 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1705 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1706 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1708 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1709 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1710 		break;
1711 	case CHIP_OLAND:
1712 		chip_name = "OLAND";
1713 		if ((rdev->pdev->revision == 0xC7) ||
1714 		    (rdev->pdev->revision == 0x80) ||
1715 		    (rdev->pdev->revision == 0x81) ||
1716 		    (rdev->pdev->revision == 0x83) ||
1717 		    (rdev->pdev->device == 0x6604) ||
1718 		    (rdev->pdev->device == 0x6605))
1719 			new_smc = true;
1720 		new_chip_name = "oland";
1721 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1722 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1723 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1724 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1725 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1726 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1727 		break;
1728 	case CHIP_HAINAN:
1729 		chip_name = "HAINAN";
1730 		if ((rdev->pdev->revision == 0x81) ||
1731 		    (rdev->pdev->revision == 0x83) ||
1732 		    (rdev->pdev->revision == 0xC3) ||
1733 		    (rdev->pdev->device == 0x6664) ||
1734 		    (rdev->pdev->device == 0x6665) ||
1735 		    (rdev->pdev->device == 0x6667))
1736 			new_smc = true;
1737 		new_chip_name = "hainan";
1738 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1739 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1740 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1741 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1742 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1743 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1744 		break;
1745 	default: BUG();
1746 	}
1747 
1748 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1749 
1750 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1751 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1752 	if (err) {
1753 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1754 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1755 		if (err)
1756 			goto out;
1757 		if (rdev->pfp_fw->size != pfp_req_size) {
1758 			printk(KERN_ERR
1759 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1760 			       rdev->pfp_fw->size, fw_name);
1761 			err = -EINVAL;
1762 			goto out;
1763 		}
1764 	} else {
1765 		err = radeon_ucode_validate(rdev->pfp_fw);
1766 		if (err) {
1767 			printk(KERN_ERR
1768 			       "si_cp: validation failed for firmware \"%s\"\n",
1769 			       fw_name);
1770 			goto out;
1771 		} else {
1772 			new_fw++;
1773 		}
1774 	}
1775 
1776 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1777 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1778 	if (err) {
1779 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1780 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1781 		if (err)
1782 			goto out;
1783 		if (rdev->me_fw->size != me_req_size) {
1784 			printk(KERN_ERR
1785 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1786 			       rdev->me_fw->size, fw_name);
1787 			err = -EINVAL;
1788 		}
1789 	} else {
1790 		err = radeon_ucode_validate(rdev->me_fw);
1791 		if (err) {
1792 			printk(KERN_ERR
1793 			       "si_cp: validation failed for firmware \"%s\"\n",
1794 			       fw_name);
1795 			goto out;
1796 		} else {
1797 			new_fw++;
1798 		}
1799 	}
1800 
1801 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1802 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1803 	if (err) {
1804 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1805 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806 		if (err)
1807 			goto out;
1808 		if (rdev->ce_fw->size != ce_req_size) {
1809 			printk(KERN_ERR
1810 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1811 			       rdev->ce_fw->size, fw_name);
1812 			err = -EINVAL;
1813 		}
1814 	} else {
1815 		err = radeon_ucode_validate(rdev->ce_fw);
1816 		if (err) {
1817 			printk(KERN_ERR
1818 			       "si_cp: validation failed for firmware \"%s\"\n",
1819 			       fw_name);
1820 			goto out;
1821 		} else {
1822 			new_fw++;
1823 		}
1824 	}
1825 
1826 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1827 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1828 	if (err) {
1829 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1830 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1831 		if (err)
1832 			goto out;
1833 		if (rdev->rlc_fw->size != rlc_req_size) {
1834 			printk(KERN_ERR
1835 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->rlc_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->rlc_fw);
1841 		if (err) {
1842 			printk(KERN_ERR
1843 			       "si_cp: validation failed for firmware \"%s\"\n",
1844 			       fw_name);
1845 			goto out;
1846 		} else {
1847 			new_fw++;
1848 		}
1849 	}
1850 
1851 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1852 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1853 	if (err) {
1854 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1855 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1856 		if (err) {
1857 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1858 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1859 			if (err)
1860 				goto out;
1861 		}
1862 		if ((rdev->mc_fw->size != mc_req_size) &&
1863 		    (rdev->mc_fw->size != mc2_req_size)) {
1864 			printk(KERN_ERR
1865 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1866 			       rdev->mc_fw->size, fw_name);
1867 			err = -EINVAL;
1868 		}
1869 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1870 	} else {
1871 		err = radeon_ucode_validate(rdev->mc_fw);
1872 		if (err) {
1873 			printk(KERN_ERR
1874 			       "si_cp: validation failed for firmware \"%s\"\n",
1875 			       fw_name);
1876 			goto out;
1877 		} else {
1878 			new_fw++;
1879 		}
1880 	}
1881 
1882 	if (new_smc)
1883 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1884 	else
1885 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1886 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1887 	if (err) {
1888 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1889 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1890 		if (err) {
1891 			printk(KERN_ERR
1892 			       "smc: error loading firmware \"%s\"\n",
1893 			       fw_name);
1894 			release_firmware(rdev->smc_fw);
1895 			rdev->smc_fw = NULL;
1896 			err = 0;
1897 		} else if (rdev->smc_fw->size != smc_req_size) {
1898 			printk(KERN_ERR
1899 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1900 			       rdev->smc_fw->size, fw_name);
1901 			err = -EINVAL;
1902 		}
1903 	} else {
1904 		err = radeon_ucode_validate(rdev->smc_fw);
1905 		if (err) {
1906 			printk(KERN_ERR
1907 			       "si_cp: validation failed for firmware \"%s\"\n",
1908 			       fw_name);
1909 			goto out;
1910 		} else {
1911 			new_fw++;
1912 		}
1913 	}
1914 
1915 	if (new_fw == 0) {
1916 		rdev->new_fw = false;
1917 	} else if (new_fw < 6) {
1918 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1919 		err = -EINVAL;
1920 	} else {
1921 		rdev->new_fw = true;
1922 	}
1923 out:
1924 	if (err) {
1925 		if (err != -EINVAL)
1926 			printk(KERN_ERR
1927 			       "si_cp: Failed to load firmware \"%s\"\n",
1928 			       fw_name);
1929 		release_firmware(rdev->pfp_fw);
1930 		rdev->pfp_fw = NULL;
1931 		release_firmware(rdev->me_fw);
1932 		rdev->me_fw = NULL;
1933 		release_firmware(rdev->ce_fw);
1934 		rdev->ce_fw = NULL;
1935 		release_firmware(rdev->rlc_fw);
1936 		rdev->rlc_fw = NULL;
1937 		release_firmware(rdev->mc_fw);
1938 		rdev->mc_fw = NULL;
1939 		release_firmware(rdev->smc_fw);
1940 		rdev->smc_fw = NULL;
1941 	}
1942 	return err;
1943 }
1944 
1945 /* watermark setup */
1946 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1947 				   struct radeon_crtc *radeon_crtc,
1948 				   struct drm_display_mode *mode,
1949 				   struct drm_display_mode *other_mode)
1950 {
1951 	u32 tmp, buffer_alloc, i;
1952 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1953 	/*
1954 	 * Line Buffer Setup
1955 	 * There are 3 line buffers, each one shared by 2 display controllers.
1956 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1957 	 * the display controllers.  The paritioning is done via one of four
1958 	 * preset allocations specified in bits 21:20:
1959 	 *  0 - half lb
1960 	 *  2 - whole lb, other crtc must be disabled
1961 	 */
1962 	/* this can get tricky if we have two large displays on a paired group
1963 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1964 	 * non-linked crtcs for maximum line buffer allocation.
1965 	 */
1966 	if (radeon_crtc->base.enabled && mode) {
1967 		if (other_mode) {
1968 			tmp = 0; /* 1/2 */
1969 			buffer_alloc = 1;
1970 		} else {
1971 			tmp = 2; /* whole */
1972 			buffer_alloc = 2;
1973 		}
1974 	} else {
1975 		tmp = 0;
1976 		buffer_alloc = 0;
1977 	}
1978 
1979 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1980 	       DC_LB_MEMORY_CONFIG(tmp));
1981 
1982 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1983 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1984 	for (i = 0; i < rdev->usec_timeout; i++) {
1985 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1986 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1987 			break;
1988 		udelay(1);
1989 	}
1990 
1991 	if (radeon_crtc->base.enabled && mode) {
1992 		switch (tmp) {
1993 		case 0:
1994 		default:
1995 			return 4096 * 2;
1996 		case 2:
1997 			return 8192 * 2;
1998 		}
1999 	}
2000 
2001 	/* controller not enabled, so no lb used */
2002 	return 0;
2003 }
2004 
2005 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2006 {
2007 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2008 
2009 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2010 	case 0:
2011 	default:
2012 		return 1;
2013 	case 1:
2014 		return 2;
2015 	case 2:
2016 		return 4;
2017 	case 3:
2018 		return 8;
2019 	case 4:
2020 		return 3;
2021 	case 5:
2022 		return 6;
2023 	case 6:
2024 		return 10;
2025 	case 7:
2026 		return 12;
2027 	case 8:
2028 		return 16;
2029 	}
2030 }
2031 
2032 struct dce6_wm_params {
2033 	u32 dram_channels; /* number of dram channels */
2034 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2035 	u32 sclk;          /* engine clock in kHz */
2036 	u32 disp_clk;      /* display clock in kHz */
2037 	u32 src_width;     /* viewport width */
2038 	u32 active_time;   /* active display time in ns */
2039 	u32 blank_time;    /* blank time in ns */
2040 	bool interlaced;    /* mode is interlaced */
2041 	fixed20_12 vsc;    /* vertical scale ratio */
2042 	u32 num_heads;     /* number of active crtcs */
2043 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2044 	u32 lb_size;       /* line buffer allocated to pipe */
2045 	u32 vtaps;         /* vertical scaler taps */
2046 };
2047 
2048 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2049 {
2050 	/* Calculate raw DRAM Bandwidth */
2051 	fixed20_12 dram_efficiency; /* 0.7 */
2052 	fixed20_12 yclk, dram_channels, bandwidth;
2053 	fixed20_12 a;
2054 
2055 	a.full = dfixed_const(1000);
2056 	yclk.full = dfixed_const(wm->yclk);
2057 	yclk.full = dfixed_div(yclk, a);
2058 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2059 	a.full = dfixed_const(10);
2060 	dram_efficiency.full = dfixed_const(7);
2061 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2062 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2063 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2064 
2065 	return dfixed_trunc(bandwidth);
2066 }
2067 
2068 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2069 {
2070 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2071 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2072 	fixed20_12 yclk, dram_channels, bandwidth;
2073 	fixed20_12 a;
2074 
2075 	a.full = dfixed_const(1000);
2076 	yclk.full = dfixed_const(wm->yclk);
2077 	yclk.full = dfixed_div(yclk, a);
2078 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079 	a.full = dfixed_const(10);
2080 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2081 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2082 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2083 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2084 
2085 	return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2089 {
2090 	/* Calculate the display Data return Bandwidth */
2091 	fixed20_12 return_efficiency; /* 0.8 */
2092 	fixed20_12 sclk, bandwidth;
2093 	fixed20_12 a;
2094 
2095 	a.full = dfixed_const(1000);
2096 	sclk.full = dfixed_const(wm->sclk);
2097 	sclk.full = dfixed_div(sclk, a);
2098 	a.full = dfixed_const(10);
2099 	return_efficiency.full = dfixed_const(8);
2100 	return_efficiency.full = dfixed_div(return_efficiency, a);
2101 	a.full = dfixed_const(32);
2102 	bandwidth.full = dfixed_mul(a, sclk);
2103 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2104 
2105 	return dfixed_trunc(bandwidth);
2106 }
2107 
2108 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2109 {
2110 	return 32;
2111 }
2112 
2113 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2114 {
2115 	/* Calculate the DMIF Request Bandwidth */
2116 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2117 	fixed20_12 disp_clk, sclk, bandwidth;
2118 	fixed20_12 a, b1, b2;
2119 	u32 min_bandwidth;
2120 
2121 	a.full = dfixed_const(1000);
2122 	disp_clk.full = dfixed_const(wm->disp_clk);
2123 	disp_clk.full = dfixed_div(disp_clk, a);
2124 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2125 	b1.full = dfixed_mul(a, disp_clk);
2126 
2127 	a.full = dfixed_const(1000);
2128 	sclk.full = dfixed_const(wm->sclk);
2129 	sclk.full = dfixed_div(sclk, a);
2130 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2131 	b2.full = dfixed_mul(a, sclk);
2132 
2133 	a.full = dfixed_const(10);
2134 	disp_clk_request_efficiency.full = dfixed_const(8);
2135 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2136 
2137 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2138 
2139 	a.full = dfixed_const(min_bandwidth);
2140 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2141 
2142 	return dfixed_trunc(bandwidth);
2143 }
2144 
2145 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2146 {
2147 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2148 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2149 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2150 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2151 
2152 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2153 }
2154 
2155 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2156 {
2157 	/* Calculate the display mode Average Bandwidth
2158 	 * DisplayMode should contain the source and destination dimensions,
2159 	 * timing, etc.
2160 	 */
2161 	fixed20_12 bpp;
2162 	fixed20_12 line_time;
2163 	fixed20_12 src_width;
2164 	fixed20_12 bandwidth;
2165 	fixed20_12 a;
2166 
2167 	a.full = dfixed_const(1000);
2168 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2169 	line_time.full = dfixed_div(line_time, a);
2170 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2171 	src_width.full = dfixed_const(wm->src_width);
2172 	bandwidth.full = dfixed_mul(src_width, bpp);
2173 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2174 	bandwidth.full = dfixed_div(bandwidth, line_time);
2175 
2176 	return dfixed_trunc(bandwidth);
2177 }
2178 
2179 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2180 {
2181 	/* First calcualte the latency in ns */
2182 	u32 mc_latency = 2000; /* 2000 ns. */
2183 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2184 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2185 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2186 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2187 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2188 		(wm->num_heads * cursor_line_pair_return_time);
2189 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2190 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2191 	u32 tmp, dmif_size = 12288;
2192 	fixed20_12 a, b, c;
2193 
2194 	if (wm->num_heads == 0)
2195 		return 0;
2196 
2197 	a.full = dfixed_const(2);
2198 	b.full = dfixed_const(1);
2199 	if ((wm->vsc.full > a.full) ||
2200 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2201 	    (wm->vtaps >= 5) ||
2202 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2203 		max_src_lines_per_dst_line = 4;
2204 	else
2205 		max_src_lines_per_dst_line = 2;
2206 
2207 	a.full = dfixed_const(available_bandwidth);
2208 	b.full = dfixed_const(wm->num_heads);
2209 	a.full = dfixed_div(a, b);
2210 
2211 	b.full = dfixed_const(mc_latency + 512);
2212 	c.full = dfixed_const(wm->disp_clk);
2213 	b.full = dfixed_div(b, c);
2214 
2215 	c.full = dfixed_const(dmif_size);
2216 	b.full = dfixed_div(c, b);
2217 
2218 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2219 
2220 	b.full = dfixed_const(1000);
2221 	c.full = dfixed_const(wm->disp_clk);
2222 	b.full = dfixed_div(c, b);
2223 	c.full = dfixed_const(wm->bytes_per_pixel);
2224 	b.full = dfixed_mul(b, c);
2225 
2226 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2227 
2228 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2229 	b.full = dfixed_const(1000);
2230 	c.full = dfixed_const(lb_fill_bw);
2231 	b.full = dfixed_div(c, b);
2232 	a.full = dfixed_div(a, b);
2233 	line_fill_time = dfixed_trunc(a);
2234 
2235 	if (line_fill_time < wm->active_time)
2236 		return latency;
2237 	else
2238 		return latency + (line_fill_time - wm->active_time);
2239 
2240 }
2241 
2242 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2243 {
2244 	if (dce6_average_bandwidth(wm) <=
2245 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2246 		return true;
2247 	else
2248 		return false;
2249 };
2250 
2251 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2252 {
2253 	if (dce6_average_bandwidth(wm) <=
2254 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2255 		return true;
2256 	else
2257 		return false;
2258 };
2259 
2260 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2261 {
2262 	u32 lb_partitions = wm->lb_size / wm->src_width;
2263 	u32 line_time = wm->active_time + wm->blank_time;
2264 	u32 latency_tolerant_lines;
2265 	u32 latency_hiding;
2266 	fixed20_12 a;
2267 
2268 	a.full = dfixed_const(1);
2269 	if (wm->vsc.full > a.full)
2270 		latency_tolerant_lines = 1;
2271 	else {
2272 		if (lb_partitions <= (wm->vtaps + 1))
2273 			latency_tolerant_lines = 1;
2274 		else
2275 			latency_tolerant_lines = 2;
2276 	}
2277 
2278 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2279 
2280 	if (dce6_latency_watermark(wm) <= latency_hiding)
2281 		return true;
2282 	else
2283 		return false;
2284 }
2285 
2286 static void dce6_program_watermarks(struct radeon_device *rdev,
2287 					 struct radeon_crtc *radeon_crtc,
2288 					 u32 lb_size, u32 num_heads)
2289 {
2290 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2291 	struct dce6_wm_params wm_low, wm_high;
2292 	u32 dram_channels;
2293 	u32 pixel_period;
2294 	u32 line_time = 0;
2295 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2296 	u32 priority_a_mark = 0, priority_b_mark = 0;
2297 	u32 priority_a_cnt = PRIORITY_OFF;
2298 	u32 priority_b_cnt = PRIORITY_OFF;
2299 	u32 tmp, arb_control3;
2300 	fixed20_12 a, b, c;
2301 
2302 	if (radeon_crtc->base.enabled && num_heads && mode) {
2303 		pixel_period = 1000000 / (u32)mode->clock;
2304 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2305 		priority_a_cnt = 0;
2306 		priority_b_cnt = 0;
2307 
2308 		if (rdev->family == CHIP_ARUBA)
2309 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2310 		else
2311 			dram_channels = si_get_number_of_dram_channels(rdev);
2312 
2313 		/* watermark for high clocks */
2314 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2315 			wm_high.yclk =
2316 				radeon_dpm_get_mclk(rdev, false) * 10;
2317 			wm_high.sclk =
2318 				radeon_dpm_get_sclk(rdev, false) * 10;
2319 		} else {
2320 			wm_high.yclk = rdev->pm.current_mclk * 10;
2321 			wm_high.sclk = rdev->pm.current_sclk * 10;
2322 		}
2323 
2324 		wm_high.disp_clk = mode->clock;
2325 		wm_high.src_width = mode->crtc_hdisplay;
2326 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2327 		wm_high.blank_time = line_time - wm_high.active_time;
2328 		wm_high.interlaced = false;
2329 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2330 			wm_high.interlaced = true;
2331 		wm_high.vsc = radeon_crtc->vsc;
2332 		wm_high.vtaps = 1;
2333 		if (radeon_crtc->rmx_type != RMX_OFF)
2334 			wm_high.vtaps = 2;
2335 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2336 		wm_high.lb_size = lb_size;
2337 		wm_high.dram_channels = dram_channels;
2338 		wm_high.num_heads = num_heads;
2339 
2340 		/* watermark for low clocks */
2341 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2342 			wm_low.yclk =
2343 				radeon_dpm_get_mclk(rdev, true) * 10;
2344 			wm_low.sclk =
2345 				radeon_dpm_get_sclk(rdev, true) * 10;
2346 		} else {
2347 			wm_low.yclk = rdev->pm.current_mclk * 10;
2348 			wm_low.sclk = rdev->pm.current_sclk * 10;
2349 		}
2350 
2351 		wm_low.disp_clk = mode->clock;
2352 		wm_low.src_width = mode->crtc_hdisplay;
2353 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2354 		wm_low.blank_time = line_time - wm_low.active_time;
2355 		wm_low.interlaced = false;
2356 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2357 			wm_low.interlaced = true;
2358 		wm_low.vsc = radeon_crtc->vsc;
2359 		wm_low.vtaps = 1;
2360 		if (radeon_crtc->rmx_type != RMX_OFF)
2361 			wm_low.vtaps = 2;
2362 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2363 		wm_low.lb_size = lb_size;
2364 		wm_low.dram_channels = dram_channels;
2365 		wm_low.num_heads = num_heads;
2366 
2367 		/* set for high clocks */
2368 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2369 		/* set for low clocks */
2370 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2371 
2372 		/* possibly force display priority to high */
2373 		/* should really do this at mode validation time... */
2374 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2375 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2376 		    !dce6_check_latency_hiding(&wm_high) ||
2377 		    (rdev->disp_priority == 2)) {
2378 			DRM_DEBUG_KMS("force priority to high\n");
2379 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2380 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2381 		}
2382 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2383 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2384 		    !dce6_check_latency_hiding(&wm_low) ||
2385 		    (rdev->disp_priority == 2)) {
2386 			DRM_DEBUG_KMS("force priority to high\n");
2387 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2388 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2389 		}
2390 
2391 		a.full = dfixed_const(1000);
2392 		b.full = dfixed_const(mode->clock);
2393 		b.full = dfixed_div(b, a);
2394 		c.full = dfixed_const(latency_watermark_a);
2395 		c.full = dfixed_mul(c, b);
2396 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2397 		c.full = dfixed_div(c, a);
2398 		a.full = dfixed_const(16);
2399 		c.full = dfixed_div(c, a);
2400 		priority_a_mark = dfixed_trunc(c);
2401 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2402 
2403 		a.full = dfixed_const(1000);
2404 		b.full = dfixed_const(mode->clock);
2405 		b.full = dfixed_div(b, a);
2406 		c.full = dfixed_const(latency_watermark_b);
2407 		c.full = dfixed_mul(c, b);
2408 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2409 		c.full = dfixed_div(c, a);
2410 		a.full = dfixed_const(16);
2411 		c.full = dfixed_div(c, a);
2412 		priority_b_mark = dfixed_trunc(c);
2413 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2414 
2415 		/* Save number of lines the linebuffer leads before the scanout */
2416 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2417 	}
2418 
2419 	/* select wm A */
2420 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2421 	tmp = arb_control3;
2422 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2423 	tmp |= LATENCY_WATERMARK_MASK(1);
2424 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2425 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2426 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2427 		LATENCY_HIGH_WATERMARK(line_time)));
2428 	/* select wm B */
2429 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2430 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2431 	tmp |= LATENCY_WATERMARK_MASK(2);
2432 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2433 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2434 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2435 		LATENCY_HIGH_WATERMARK(line_time)));
2436 	/* restore original selection */
2437 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2438 
2439 	/* write the priority marks */
2440 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2441 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2442 
2443 	/* save values for DPM */
2444 	radeon_crtc->line_time = line_time;
2445 	radeon_crtc->wm_high = latency_watermark_a;
2446 	radeon_crtc->wm_low = latency_watermark_b;
2447 }
2448 
2449 void dce6_bandwidth_update(struct radeon_device *rdev)
2450 {
2451 	struct drm_display_mode *mode0 = NULL;
2452 	struct drm_display_mode *mode1 = NULL;
2453 	u32 num_heads = 0, lb_size;
2454 	int i;
2455 
2456 	if (!rdev->mode_info.mode_config_initialized)
2457 		return;
2458 
2459 	radeon_update_display_priority(rdev);
2460 
2461 	for (i = 0; i < rdev->num_crtc; i++) {
2462 		if (rdev->mode_info.crtcs[i]->base.enabled)
2463 			num_heads++;
2464 	}
2465 	for (i = 0; i < rdev->num_crtc; i += 2) {
2466 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2467 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2468 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2469 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2470 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2471 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2472 	}
2473 }
2474 
2475 /*
2476  * Core functions
2477  */
2478 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2479 {
2480 	u32 *tile = rdev->config.si.tile_mode_array;
2481 	const u32 num_tile_mode_states =
2482 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2483 	u32 reg_offset, split_equal_to_row_size;
2484 
2485 	switch (rdev->config.si.mem_row_size_in_kb) {
2486 	case 1:
2487 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2488 		break;
2489 	case 2:
2490 	default:
2491 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2492 		break;
2493 	case 4:
2494 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2495 		break;
2496 	}
2497 
2498 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499 		tile[reg_offset] = 0;
2500 
2501 	switch(rdev->family) {
2502 	case CHIP_TAHITI:
2503 	case CHIP_PITCAIRN:
2504 		/* non-AA compressed depth or any compressed stencil */
2505 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2509 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2510 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2513 		/* 2xAA/4xAA compressed depth only */
2514 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2517 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2518 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2519 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522 		/* 8xAA compressed depth only */
2523 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2528 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2532 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2536 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2537 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2544 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2546 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2550 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2553 			   TILE_SPLIT(split_equal_to_row_size) |
2554 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2555 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2558 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2559 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562 			   TILE_SPLIT(split_equal_to_row_size) |
2563 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2564 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2567 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2568 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2569 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2570 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			   TILE_SPLIT(split_equal_to_row_size) |
2572 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2573 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576 		/* 1D and 1D Array Surfaces */
2577 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2578 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2582 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2585 		/* Displayable maps. */
2586 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2587 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2589 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2590 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2591 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2593 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2594 		/* Display 8bpp. */
2595 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2600 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2602 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603 		/* Display 16bpp. */
2604 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2607 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2608 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2609 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612 		/* Display 32bpp. */
2613 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2617 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2618 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2621 		/* Thin. */
2622 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2626 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2627 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2630 		/* Thin 8 bpp. */
2631 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2634 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2636 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2639 		/* Thin 16 bpp. */
2640 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2643 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2644 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2645 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2647 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2648 		/* Thin 32 bpp. */
2649 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2653 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2654 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657 		/* Thin 64 bpp. */
2658 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661 			   TILE_SPLIT(split_equal_to_row_size) |
2662 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2663 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2666 		/* 8 bpp PRT. */
2667 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2669 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2670 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2671 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2672 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2673 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2675 		/* 16 bpp PRT */
2676 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2681 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2684 		/* 32 bpp PRT */
2685 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2690 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693 		/* 64 bpp PRT */
2694 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2698 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2699 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2702 		/* 128 bpp PRT */
2703 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2705 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2706 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2707 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2708 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2711 
2712 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2713 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2714 		break;
2715 
2716 	case CHIP_VERDE:
2717 	case CHIP_OLAND:
2718 	case CHIP_HAINAN:
2719 		/* non-AA compressed depth or any compressed stencil */
2720 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2722 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2724 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2725 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2727 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2728 		/* 2xAA/4xAA compressed depth only */
2729 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2731 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2732 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2733 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2734 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2737 		/* 8xAA compressed depth only */
2738 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2740 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2742 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2743 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2746 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2747 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2751 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2752 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2755 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2756 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2761 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2763 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2764 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2765 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(split_equal_to_row_size) |
2769 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2770 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2774 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2776 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777 			   TILE_SPLIT(split_equal_to_row_size) |
2778 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2779 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2782 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2783 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2784 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2785 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			   TILE_SPLIT(split_equal_to_row_size) |
2787 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2788 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2791 		/* 1D and 1D Array Surfaces */
2792 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2797 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2799 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2800 		/* Displayable maps. */
2801 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2802 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2806 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2809 		/* Display 8bpp. */
2810 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2814 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2815 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2817 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2818 		/* Display 16bpp. */
2819 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2823 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2824 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2826 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2827 		/* Display 32bpp. */
2828 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2831 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2832 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2833 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2836 		/* Thin. */
2837 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2841 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2842 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845 		/* Thin 8 bpp. */
2846 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2848 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2850 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2851 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2853 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2854 		/* Thin 16 bpp. */
2855 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2858 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2860 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2862 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863 		/* Thin 32 bpp. */
2864 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2867 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2868 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2869 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2872 		/* Thin 64 bpp. */
2873 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2875 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2876 			   TILE_SPLIT(split_equal_to_row_size) |
2877 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2878 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2881 		/* 8 bpp PRT. */
2882 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2884 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2885 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2886 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2887 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2888 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2889 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2890 		/* 16 bpp PRT */
2891 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2894 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2895 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2896 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2899 		/* 32 bpp PRT */
2900 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2904 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2905 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2907 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2908 		/* 64 bpp PRT */
2909 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2914 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2917 		/* 128 bpp PRT */
2918 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2920 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2921 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2922 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2923 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2925 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2926 
2927 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2928 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2929 		break;
2930 
2931 	default:
2932 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2933 	}
2934 }
2935 
2936 static void si_select_se_sh(struct radeon_device *rdev,
2937 			    u32 se_num, u32 sh_num)
2938 {
2939 	u32 data = INSTANCE_BROADCAST_WRITES;
2940 
2941 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2942 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2943 	else if (se_num == 0xffffffff)
2944 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2945 	else if (sh_num == 0xffffffff)
2946 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2947 	else
2948 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2949 	WREG32(GRBM_GFX_INDEX, data);
2950 }
2951 
2952 static u32 si_create_bitmask(u32 bit_width)
2953 {
2954 	u32 i, mask = 0;
2955 
2956 	for (i = 0; i < bit_width; i++) {
2957 		mask <<= 1;
2958 		mask |= 1;
2959 	}
2960 	return mask;
2961 }
2962 
2963 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2964 {
2965 	u32 data, mask;
2966 
2967 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2968 	if (data & 1)
2969 		data &= INACTIVE_CUS_MASK;
2970 	else
2971 		data = 0;
2972 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2973 
2974 	data >>= INACTIVE_CUS_SHIFT;
2975 
2976 	mask = si_create_bitmask(cu_per_sh);
2977 
2978 	return ~data & mask;
2979 }
2980 
2981 static void si_setup_spi(struct radeon_device *rdev,
2982 			 u32 se_num, u32 sh_per_se,
2983 			 u32 cu_per_sh)
2984 {
2985 	int i, j, k;
2986 	u32 data, mask, active_cu;
2987 
2988 	for (i = 0; i < se_num; i++) {
2989 		for (j = 0; j < sh_per_se; j++) {
2990 			si_select_se_sh(rdev, i, j);
2991 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2992 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2993 
2994 			mask = 1;
2995 			for (k = 0; k < 16; k++) {
2996 				mask <<= k;
2997 				if (active_cu & mask) {
2998 					data &= ~mask;
2999 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3000 					break;
3001 				}
3002 			}
3003 		}
3004 	}
3005 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3006 }
3007 
3008 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3009 			      u32 max_rb_num_per_se,
3010 			      u32 sh_per_se)
3011 {
3012 	u32 data, mask;
3013 
3014 	data = RREG32(CC_RB_BACKEND_DISABLE);
3015 	if (data & 1)
3016 		data &= BACKEND_DISABLE_MASK;
3017 	else
3018 		data = 0;
3019 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3020 
3021 	data >>= BACKEND_DISABLE_SHIFT;
3022 
3023 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3024 
3025 	return data & mask;
3026 }
3027 
3028 static void si_setup_rb(struct radeon_device *rdev,
3029 			u32 se_num, u32 sh_per_se,
3030 			u32 max_rb_num_per_se)
3031 {
3032 	int i, j;
3033 	u32 data, mask;
3034 	u32 disabled_rbs = 0;
3035 	u32 enabled_rbs = 0;
3036 
3037 	for (i = 0; i < se_num; i++) {
3038 		for (j = 0; j < sh_per_se; j++) {
3039 			si_select_se_sh(rdev, i, j);
3040 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3041 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3042 		}
3043 	}
3044 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3045 
3046 	mask = 1;
3047 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3048 		if (!(disabled_rbs & mask))
3049 			enabled_rbs |= mask;
3050 		mask <<= 1;
3051 	}
3052 
3053 	rdev->config.si.backend_enable_mask = enabled_rbs;
3054 
3055 	for (i = 0; i < se_num; i++) {
3056 		si_select_se_sh(rdev, i, 0xffffffff);
3057 		data = 0;
3058 		for (j = 0; j < sh_per_se; j++) {
3059 			switch (enabled_rbs & 3) {
3060 			case 1:
3061 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3062 				break;
3063 			case 2:
3064 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3065 				break;
3066 			case 3:
3067 			default:
3068 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3069 				break;
3070 			}
3071 			enabled_rbs >>= 2;
3072 		}
3073 		WREG32(PA_SC_RASTER_CONFIG, data);
3074 	}
3075 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3076 }
3077 
3078 static void si_gpu_init(struct radeon_device *rdev)
3079 {
3080 	u32 gb_addr_config = 0;
3081 	u32 mc_shared_chmap, mc_arb_ramcfg;
3082 	u32 sx_debug_1;
3083 	u32 hdp_host_path_cntl;
3084 	u32 tmp;
3085 	int i, j;
3086 
3087 	switch (rdev->family) {
3088 	case CHIP_TAHITI:
3089 		rdev->config.si.max_shader_engines = 2;
3090 		rdev->config.si.max_tile_pipes = 12;
3091 		rdev->config.si.max_cu_per_sh = 8;
3092 		rdev->config.si.max_sh_per_se = 2;
3093 		rdev->config.si.max_backends_per_se = 4;
3094 		rdev->config.si.max_texture_channel_caches = 12;
3095 		rdev->config.si.max_gprs = 256;
3096 		rdev->config.si.max_gs_threads = 32;
3097 		rdev->config.si.max_hw_contexts = 8;
3098 
3099 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3100 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3101 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3102 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3103 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3104 		break;
3105 	case CHIP_PITCAIRN:
3106 		rdev->config.si.max_shader_engines = 2;
3107 		rdev->config.si.max_tile_pipes = 8;
3108 		rdev->config.si.max_cu_per_sh = 5;
3109 		rdev->config.si.max_sh_per_se = 2;
3110 		rdev->config.si.max_backends_per_se = 4;
3111 		rdev->config.si.max_texture_channel_caches = 8;
3112 		rdev->config.si.max_gprs = 256;
3113 		rdev->config.si.max_gs_threads = 32;
3114 		rdev->config.si.max_hw_contexts = 8;
3115 
3116 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3117 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3118 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3119 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3120 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3121 		break;
3122 	case CHIP_VERDE:
3123 	default:
3124 		rdev->config.si.max_shader_engines = 1;
3125 		rdev->config.si.max_tile_pipes = 4;
3126 		rdev->config.si.max_cu_per_sh = 5;
3127 		rdev->config.si.max_sh_per_se = 2;
3128 		rdev->config.si.max_backends_per_se = 4;
3129 		rdev->config.si.max_texture_channel_caches = 4;
3130 		rdev->config.si.max_gprs = 256;
3131 		rdev->config.si.max_gs_threads = 32;
3132 		rdev->config.si.max_hw_contexts = 8;
3133 
3134 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3135 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3136 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3137 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3138 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3139 		break;
3140 	case CHIP_OLAND:
3141 		rdev->config.si.max_shader_engines = 1;
3142 		rdev->config.si.max_tile_pipes = 4;
3143 		rdev->config.si.max_cu_per_sh = 6;
3144 		rdev->config.si.max_sh_per_se = 1;
3145 		rdev->config.si.max_backends_per_se = 2;
3146 		rdev->config.si.max_texture_channel_caches = 4;
3147 		rdev->config.si.max_gprs = 256;
3148 		rdev->config.si.max_gs_threads = 16;
3149 		rdev->config.si.max_hw_contexts = 8;
3150 
3151 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3152 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3153 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3154 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3155 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3156 		break;
3157 	case CHIP_HAINAN:
3158 		rdev->config.si.max_shader_engines = 1;
3159 		rdev->config.si.max_tile_pipes = 4;
3160 		rdev->config.si.max_cu_per_sh = 5;
3161 		rdev->config.si.max_sh_per_se = 1;
3162 		rdev->config.si.max_backends_per_se = 1;
3163 		rdev->config.si.max_texture_channel_caches = 2;
3164 		rdev->config.si.max_gprs = 256;
3165 		rdev->config.si.max_gs_threads = 16;
3166 		rdev->config.si.max_hw_contexts = 8;
3167 
3168 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3169 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3170 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3171 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3172 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3173 		break;
3174 	}
3175 
3176 	/* Initialize HDP */
3177 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3178 		WREG32((0x2c14 + j), 0x00000000);
3179 		WREG32((0x2c18 + j), 0x00000000);
3180 		WREG32((0x2c1c + j), 0x00000000);
3181 		WREG32((0x2c20 + j), 0x00000000);
3182 		WREG32((0x2c24 + j), 0x00000000);
3183 	}
3184 
3185 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3186 	WREG32(SRBM_INT_CNTL, 1);
3187 	WREG32(SRBM_INT_ACK, 1);
3188 
3189 	evergreen_fix_pci_max_read_req_size(rdev);
3190 
3191 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3192 
3193 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3194 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3195 
3196 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3197 	rdev->config.si.mem_max_burst_length_bytes = 256;
3198 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3199 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3200 	if (rdev->config.si.mem_row_size_in_kb > 4)
3201 		rdev->config.si.mem_row_size_in_kb = 4;
3202 	/* XXX use MC settings? */
3203 	rdev->config.si.shader_engine_tile_size = 32;
3204 	rdev->config.si.num_gpus = 1;
3205 	rdev->config.si.multi_gpu_tile_size = 64;
3206 
3207 	/* fix up row size */
3208 	gb_addr_config &= ~ROW_SIZE_MASK;
3209 	switch (rdev->config.si.mem_row_size_in_kb) {
3210 	case 1:
3211 	default:
3212 		gb_addr_config |= ROW_SIZE(0);
3213 		break;
3214 	case 2:
3215 		gb_addr_config |= ROW_SIZE(1);
3216 		break;
3217 	case 4:
3218 		gb_addr_config |= ROW_SIZE(2);
3219 		break;
3220 	}
3221 
3222 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3223 	 * not have bank info, so create a custom tiling dword.
3224 	 * bits 3:0   num_pipes
3225 	 * bits 7:4   num_banks
3226 	 * bits 11:8  group_size
3227 	 * bits 15:12 row_size
3228 	 */
3229 	rdev->config.si.tile_config = 0;
3230 	switch (rdev->config.si.num_tile_pipes) {
3231 	case 1:
3232 		rdev->config.si.tile_config |= (0 << 0);
3233 		break;
3234 	case 2:
3235 		rdev->config.si.tile_config |= (1 << 0);
3236 		break;
3237 	case 4:
3238 		rdev->config.si.tile_config |= (2 << 0);
3239 		break;
3240 	case 8:
3241 	default:
3242 		/* XXX what about 12? */
3243 		rdev->config.si.tile_config |= (3 << 0);
3244 		break;
3245 	}
3246 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3247 	case 0: /* four banks */
3248 		rdev->config.si.tile_config |= 0 << 4;
3249 		break;
3250 	case 1: /* eight banks */
3251 		rdev->config.si.tile_config |= 1 << 4;
3252 		break;
3253 	case 2: /* sixteen banks */
3254 	default:
3255 		rdev->config.si.tile_config |= 2 << 4;
3256 		break;
3257 	}
3258 	rdev->config.si.tile_config |=
3259 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3260 	rdev->config.si.tile_config |=
3261 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3262 
3263 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3264 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3265 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3266 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3267 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3268 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3269 	if (rdev->has_uvd) {
3270 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3271 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3272 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3273 	}
3274 
3275 	si_tiling_mode_table_init(rdev);
3276 
3277 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3278 		    rdev->config.si.max_sh_per_se,
3279 		    rdev->config.si.max_backends_per_se);
3280 
3281 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3282 		     rdev->config.si.max_sh_per_se,
3283 		     rdev->config.si.max_cu_per_sh);
3284 
3285 	rdev->config.si.active_cus = 0;
3286 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3287 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3288 			rdev->config.si.active_cus +=
3289 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3290 		}
3291 	}
3292 
3293 	/* set HW defaults for 3D engine */
3294 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3295 				     ROQ_IB2_START(0x2b)));
3296 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3297 
3298 	sx_debug_1 = RREG32(SX_DEBUG_1);
3299 	WREG32(SX_DEBUG_1, sx_debug_1);
3300 
3301 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3302 
3303 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3304 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3305 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3306 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3307 
3308 	WREG32(VGT_NUM_INSTANCES, 1);
3309 
3310 	WREG32(CP_PERFMON_CNTL, 0);
3311 
3312 	WREG32(SQ_CONFIG, 0);
3313 
3314 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3315 					  FORCE_EOV_MAX_REZ_CNT(255)));
3316 
3317 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3318 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3319 
3320 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3321 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3322 
3323 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3324 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3325 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3326 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3327 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3328 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3329 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3330 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3331 
3332 	tmp = RREG32(HDP_MISC_CNTL);
3333 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3334 	WREG32(HDP_MISC_CNTL, tmp);
3335 
3336 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3337 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3338 
3339 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3340 
3341 	udelay(50);
3342 }
3343 
3344 /*
3345  * GPU scratch registers helpers function.
3346  */
3347 static void si_scratch_init(struct radeon_device *rdev)
3348 {
3349 	int i;
3350 
3351 	rdev->scratch.num_reg = 7;
3352 	rdev->scratch.reg_base = SCRATCH_REG0;
3353 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3354 		rdev->scratch.free[i] = true;
3355 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3356 	}
3357 }
3358 
3359 void si_fence_ring_emit(struct radeon_device *rdev,
3360 			struct radeon_fence *fence)
3361 {
3362 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3363 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3364 
3365 	/* flush read cache over gart */
3366 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3367 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3368 	radeon_ring_write(ring, 0);
3369 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3370 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3371 			  PACKET3_TC_ACTION_ENA |
3372 			  PACKET3_SH_KCACHE_ACTION_ENA |
3373 			  PACKET3_SH_ICACHE_ACTION_ENA);
3374 	radeon_ring_write(ring, 0xFFFFFFFF);
3375 	radeon_ring_write(ring, 0);
3376 	radeon_ring_write(ring, 10); /* poll interval */
3377 	/* EVENT_WRITE_EOP - flush caches, send int */
3378 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3379 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3380 	radeon_ring_write(ring, lower_32_bits(addr));
3381 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3382 	radeon_ring_write(ring, fence->seq);
3383 	radeon_ring_write(ring, 0);
3384 }
3385 
3386 /*
3387  * IB stuff
3388  */
3389 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3390 {
3391 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3392 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3393 	u32 header;
3394 
3395 	if (ib->is_const_ib) {
3396 		/* set switch buffer packet before const IB */
3397 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3398 		radeon_ring_write(ring, 0);
3399 
3400 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3401 	} else {
3402 		u32 next_rptr;
3403 		if (ring->rptr_save_reg) {
3404 			next_rptr = ring->wptr + 3 + 4 + 8;
3405 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3406 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3407 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3408 			radeon_ring_write(ring, next_rptr);
3409 		} else if (rdev->wb.enabled) {
3410 			next_rptr = ring->wptr + 5 + 4 + 8;
3411 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3412 			radeon_ring_write(ring, (1 << 8));
3413 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3414 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3415 			radeon_ring_write(ring, next_rptr);
3416 		}
3417 
3418 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3419 	}
3420 
3421 	radeon_ring_write(ring, header);
3422 	radeon_ring_write(ring,
3423 #ifdef __BIG_ENDIAN
3424 			  (2 << 0) |
3425 #endif
3426 			  (ib->gpu_addr & 0xFFFFFFFC));
3427 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3428 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3429 
3430 	if (!ib->is_const_ib) {
3431 		/* flush read cache over gart for this vmid */
3432 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3433 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3434 		radeon_ring_write(ring, vm_id);
3435 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3436 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3437 				  PACKET3_TC_ACTION_ENA |
3438 				  PACKET3_SH_KCACHE_ACTION_ENA |
3439 				  PACKET3_SH_ICACHE_ACTION_ENA);
3440 		radeon_ring_write(ring, 0xFFFFFFFF);
3441 		radeon_ring_write(ring, 0);
3442 		radeon_ring_write(ring, 10); /* poll interval */
3443 	}
3444 }
3445 
3446 /*
3447  * CP.
3448  */
3449 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3450 {
3451 	if (enable)
3452 		WREG32(CP_ME_CNTL, 0);
3453 	else {
3454 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3455 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3456 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3457 		WREG32(SCRATCH_UMSK, 0);
3458 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3459 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3460 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3461 	}
3462 	udelay(50);
3463 }
3464 
3465 static int si_cp_load_microcode(struct radeon_device *rdev)
3466 {
3467 	int i;
3468 
3469 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3470 		return -EINVAL;
3471 
3472 	si_cp_enable(rdev, false);
3473 
3474 	if (rdev->new_fw) {
3475 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3476 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3477 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3478 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3479 		const struct gfx_firmware_header_v1_0 *me_hdr =
3480 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3481 		const __le32 *fw_data;
3482 		u32 fw_size;
3483 
3484 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3485 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3486 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3487 
3488 		/* PFP */
3489 		fw_data = (const __le32 *)
3490 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3491 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3492 		WREG32(CP_PFP_UCODE_ADDR, 0);
3493 		for (i = 0; i < fw_size; i++)
3494 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3495 		WREG32(CP_PFP_UCODE_ADDR, 0);
3496 
3497 		/* CE */
3498 		fw_data = (const __le32 *)
3499 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3500 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3501 		WREG32(CP_CE_UCODE_ADDR, 0);
3502 		for (i = 0; i < fw_size; i++)
3503 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3504 		WREG32(CP_CE_UCODE_ADDR, 0);
3505 
3506 		/* ME */
3507 		fw_data = (const __be32 *)
3508 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3509 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3510 		WREG32(CP_ME_RAM_WADDR, 0);
3511 		for (i = 0; i < fw_size; i++)
3512 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3513 		WREG32(CP_ME_RAM_WADDR, 0);
3514 	} else {
3515 		const __be32 *fw_data;
3516 
3517 		/* PFP */
3518 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3519 		WREG32(CP_PFP_UCODE_ADDR, 0);
3520 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3521 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3522 		WREG32(CP_PFP_UCODE_ADDR, 0);
3523 
3524 		/* CE */
3525 		fw_data = (const __be32 *)rdev->ce_fw->data;
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3528 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3529 		WREG32(CP_CE_UCODE_ADDR, 0);
3530 
3531 		/* ME */
3532 		fw_data = (const __be32 *)rdev->me_fw->data;
3533 		WREG32(CP_ME_RAM_WADDR, 0);
3534 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3535 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3536 		WREG32(CP_ME_RAM_WADDR, 0);
3537 	}
3538 
3539 	WREG32(CP_PFP_UCODE_ADDR, 0);
3540 	WREG32(CP_CE_UCODE_ADDR, 0);
3541 	WREG32(CP_ME_RAM_WADDR, 0);
3542 	WREG32(CP_ME_RAM_RADDR, 0);
3543 	return 0;
3544 }
3545 
3546 static int si_cp_start(struct radeon_device *rdev)
3547 {
3548 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3549 	int r, i;
3550 
3551 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3552 	if (r) {
3553 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554 		return r;
3555 	}
3556 	/* init the CP */
3557 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3558 	radeon_ring_write(ring, 0x1);
3559 	radeon_ring_write(ring, 0x0);
3560 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3561 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3562 	radeon_ring_write(ring, 0);
3563 	radeon_ring_write(ring, 0);
3564 
3565 	/* init the CE partitions */
3566 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3567 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3568 	radeon_ring_write(ring, 0xc000);
3569 	radeon_ring_write(ring, 0xe000);
3570 	radeon_ring_unlock_commit(rdev, ring, false);
3571 
3572 	si_cp_enable(rdev, true);
3573 
3574 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3575 	if (r) {
3576 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3577 		return r;
3578 	}
3579 
3580 	/* setup clear context state */
3581 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3582 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3583 
3584 	for (i = 0; i < si_default_size; i++)
3585 		radeon_ring_write(ring, si_default_state[i]);
3586 
3587 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3588 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3589 
3590 	/* set clear context state */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3592 	radeon_ring_write(ring, 0);
3593 
3594 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3595 	radeon_ring_write(ring, 0x00000316);
3596 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3597 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3598 
3599 	radeon_ring_unlock_commit(rdev, ring, false);
3600 
3601 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3602 		ring = &rdev->ring[i];
3603 		r = radeon_ring_lock(rdev, ring, 2);
3604 
3605 		/* clear the compute context state */
3606 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3607 		radeon_ring_write(ring, 0);
3608 
3609 		radeon_ring_unlock_commit(rdev, ring, false);
3610 	}
3611 
3612 	return 0;
3613 }
3614 
3615 static void si_cp_fini(struct radeon_device *rdev)
3616 {
3617 	struct radeon_ring *ring;
3618 	si_cp_enable(rdev, false);
3619 
3620 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3621 	radeon_ring_fini(rdev, ring);
3622 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3623 
3624 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3625 	radeon_ring_fini(rdev, ring);
3626 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3627 
3628 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3629 	radeon_ring_fini(rdev, ring);
3630 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3631 }
3632 
3633 static int si_cp_resume(struct radeon_device *rdev)
3634 {
3635 	struct radeon_ring *ring;
3636 	u32 tmp;
3637 	u32 rb_bufsz;
3638 	int r;
3639 
3640 	si_enable_gui_idle_interrupt(rdev, false);
3641 
3642 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3643 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3644 
3645 	/* Set the write pointer delay */
3646 	WREG32(CP_RB_WPTR_DELAY, 0);
3647 
3648 	WREG32(CP_DEBUG, 0);
3649 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3650 
3651 	/* ring 0 - compute and gfx */
3652 	/* Set ring buffer size */
3653 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3654 	rb_bufsz = order_base_2(ring->ring_size / 8);
3655 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3656 #ifdef __BIG_ENDIAN
3657 	tmp |= BUF_SWAP_32BIT;
3658 #endif
3659 	WREG32(CP_RB0_CNTL, tmp);
3660 
3661 	/* Initialize the ring buffer's read and write pointers */
3662 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3663 	ring->wptr = 0;
3664 	WREG32(CP_RB0_WPTR, ring->wptr);
3665 
3666 	/* set the wb address whether it's enabled or not */
3667 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3668 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3669 
3670 	if (rdev->wb.enabled)
3671 		WREG32(SCRATCH_UMSK, 0xff);
3672 	else {
3673 		tmp |= RB_NO_UPDATE;
3674 		WREG32(SCRATCH_UMSK, 0);
3675 	}
3676 
3677 	mdelay(1);
3678 	WREG32(CP_RB0_CNTL, tmp);
3679 
3680 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3681 
3682 	/* ring1  - compute only */
3683 	/* Set ring buffer size */
3684 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3685 	rb_bufsz = order_base_2(ring->ring_size / 8);
3686 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3687 #ifdef __BIG_ENDIAN
3688 	tmp |= BUF_SWAP_32BIT;
3689 #endif
3690 	WREG32(CP_RB1_CNTL, tmp);
3691 
3692 	/* Initialize the ring buffer's read and write pointers */
3693 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3694 	ring->wptr = 0;
3695 	WREG32(CP_RB1_WPTR, ring->wptr);
3696 
3697 	/* set the wb address whether it's enabled or not */
3698 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3699 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3700 
3701 	mdelay(1);
3702 	WREG32(CP_RB1_CNTL, tmp);
3703 
3704 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3705 
3706 	/* ring2 - compute only */
3707 	/* Set ring buffer size */
3708 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3709 	rb_bufsz = order_base_2(ring->ring_size / 8);
3710 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3711 #ifdef __BIG_ENDIAN
3712 	tmp |= BUF_SWAP_32BIT;
3713 #endif
3714 	WREG32(CP_RB2_CNTL, tmp);
3715 
3716 	/* Initialize the ring buffer's read and write pointers */
3717 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3718 	ring->wptr = 0;
3719 	WREG32(CP_RB2_WPTR, ring->wptr);
3720 
3721 	/* set the wb address whether it's enabled or not */
3722 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3723 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3724 
3725 	mdelay(1);
3726 	WREG32(CP_RB2_CNTL, tmp);
3727 
3728 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3729 
3730 	/* start the rings */
3731 	si_cp_start(rdev);
3732 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3733 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3734 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3735 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3736 	if (r) {
3737 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3738 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3739 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3740 		return r;
3741 	}
3742 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3743 	if (r) {
3744 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3745 	}
3746 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3747 	if (r) {
3748 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3749 	}
3750 
3751 	si_enable_gui_idle_interrupt(rdev, true);
3752 
3753 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3754 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3755 
3756 	return 0;
3757 }
3758 
3759 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3760 {
3761 	u32 reset_mask = 0;
3762 	u32 tmp;
3763 
3764 	/* GRBM_STATUS */
3765 	tmp = RREG32(GRBM_STATUS);
3766 	if (tmp & (PA_BUSY | SC_BUSY |
3767 		   BCI_BUSY | SX_BUSY |
3768 		   TA_BUSY | VGT_BUSY |
3769 		   DB_BUSY | CB_BUSY |
3770 		   GDS_BUSY | SPI_BUSY |
3771 		   IA_BUSY | IA_BUSY_NO_DMA))
3772 		reset_mask |= RADEON_RESET_GFX;
3773 
3774 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3775 		   CP_BUSY | CP_COHERENCY_BUSY))
3776 		reset_mask |= RADEON_RESET_CP;
3777 
3778 	if (tmp & GRBM_EE_BUSY)
3779 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3780 
3781 	/* GRBM_STATUS2 */
3782 	tmp = RREG32(GRBM_STATUS2);
3783 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3784 		reset_mask |= RADEON_RESET_RLC;
3785 
3786 	/* DMA_STATUS_REG 0 */
3787 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3788 	if (!(tmp & DMA_IDLE))
3789 		reset_mask |= RADEON_RESET_DMA;
3790 
3791 	/* DMA_STATUS_REG 1 */
3792 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3793 	if (!(tmp & DMA_IDLE))
3794 		reset_mask |= RADEON_RESET_DMA1;
3795 
3796 	/* SRBM_STATUS2 */
3797 	tmp = RREG32(SRBM_STATUS2);
3798 	if (tmp & DMA_BUSY)
3799 		reset_mask |= RADEON_RESET_DMA;
3800 
3801 	if (tmp & DMA1_BUSY)
3802 		reset_mask |= RADEON_RESET_DMA1;
3803 
3804 	/* SRBM_STATUS */
3805 	tmp = RREG32(SRBM_STATUS);
3806 
3807 	if (tmp & IH_BUSY)
3808 		reset_mask |= RADEON_RESET_IH;
3809 
3810 	if (tmp & SEM_BUSY)
3811 		reset_mask |= RADEON_RESET_SEM;
3812 
3813 	if (tmp & GRBM_RQ_PENDING)
3814 		reset_mask |= RADEON_RESET_GRBM;
3815 
3816 	if (tmp & VMC_BUSY)
3817 		reset_mask |= RADEON_RESET_VMC;
3818 
3819 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3820 		   MCC_BUSY | MCD_BUSY))
3821 		reset_mask |= RADEON_RESET_MC;
3822 
3823 	if (evergreen_is_display_hung(rdev))
3824 		reset_mask |= RADEON_RESET_DISPLAY;
3825 
3826 	/* VM_L2_STATUS */
3827 	tmp = RREG32(VM_L2_STATUS);
3828 	if (tmp & L2_BUSY)
3829 		reset_mask |= RADEON_RESET_VMC;
3830 
3831 	/* Skip MC reset as it's mostly likely not hung, just busy */
3832 	if (reset_mask & RADEON_RESET_MC) {
3833 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3834 		reset_mask &= ~RADEON_RESET_MC;
3835 	}
3836 
3837 	return reset_mask;
3838 }
3839 
3840 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3841 {
3842 	struct evergreen_mc_save save;
3843 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3844 	u32 tmp;
3845 
3846 	if (reset_mask == 0)
3847 		return;
3848 
3849 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3850 
3851 	evergreen_print_gpu_status_regs(rdev);
3852 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3853 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3854 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3855 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3856 
3857 	/* disable PG/CG */
3858 	si_fini_pg(rdev);
3859 	si_fini_cg(rdev);
3860 
3861 	/* stop the rlc */
3862 	si_rlc_stop(rdev);
3863 
3864 	/* Disable CP parsing/prefetching */
3865 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3866 
3867 	if (reset_mask & RADEON_RESET_DMA) {
3868 		/* dma0 */
3869 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3870 		tmp &= ~DMA_RB_ENABLE;
3871 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3872 	}
3873 	if (reset_mask & RADEON_RESET_DMA1) {
3874 		/* dma1 */
3875 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3876 		tmp &= ~DMA_RB_ENABLE;
3877 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3878 	}
3879 
3880 	udelay(50);
3881 
3882 	evergreen_mc_stop(rdev, &save);
3883 	if (evergreen_mc_wait_for_idle(rdev)) {
3884 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3885 	}
3886 
3887 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3888 		grbm_soft_reset = SOFT_RESET_CB |
3889 			SOFT_RESET_DB |
3890 			SOFT_RESET_GDS |
3891 			SOFT_RESET_PA |
3892 			SOFT_RESET_SC |
3893 			SOFT_RESET_BCI |
3894 			SOFT_RESET_SPI |
3895 			SOFT_RESET_SX |
3896 			SOFT_RESET_TC |
3897 			SOFT_RESET_TA |
3898 			SOFT_RESET_VGT |
3899 			SOFT_RESET_IA;
3900 	}
3901 
3902 	if (reset_mask & RADEON_RESET_CP) {
3903 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3904 
3905 		srbm_soft_reset |= SOFT_RESET_GRBM;
3906 	}
3907 
3908 	if (reset_mask & RADEON_RESET_DMA)
3909 		srbm_soft_reset |= SOFT_RESET_DMA;
3910 
3911 	if (reset_mask & RADEON_RESET_DMA1)
3912 		srbm_soft_reset |= SOFT_RESET_DMA1;
3913 
3914 	if (reset_mask & RADEON_RESET_DISPLAY)
3915 		srbm_soft_reset |= SOFT_RESET_DC;
3916 
3917 	if (reset_mask & RADEON_RESET_RLC)
3918 		grbm_soft_reset |= SOFT_RESET_RLC;
3919 
3920 	if (reset_mask & RADEON_RESET_SEM)
3921 		srbm_soft_reset |= SOFT_RESET_SEM;
3922 
3923 	if (reset_mask & RADEON_RESET_IH)
3924 		srbm_soft_reset |= SOFT_RESET_IH;
3925 
3926 	if (reset_mask & RADEON_RESET_GRBM)
3927 		srbm_soft_reset |= SOFT_RESET_GRBM;
3928 
3929 	if (reset_mask & RADEON_RESET_VMC)
3930 		srbm_soft_reset |= SOFT_RESET_VMC;
3931 
3932 	if (reset_mask & RADEON_RESET_MC)
3933 		srbm_soft_reset |= SOFT_RESET_MC;
3934 
3935 	if (grbm_soft_reset) {
3936 		tmp = RREG32(GRBM_SOFT_RESET);
3937 		tmp |= grbm_soft_reset;
3938 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3939 		WREG32(GRBM_SOFT_RESET, tmp);
3940 		tmp = RREG32(GRBM_SOFT_RESET);
3941 
3942 		udelay(50);
3943 
3944 		tmp &= ~grbm_soft_reset;
3945 		WREG32(GRBM_SOFT_RESET, tmp);
3946 		tmp = RREG32(GRBM_SOFT_RESET);
3947 	}
3948 
3949 	if (srbm_soft_reset) {
3950 		tmp = RREG32(SRBM_SOFT_RESET);
3951 		tmp |= srbm_soft_reset;
3952 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3953 		WREG32(SRBM_SOFT_RESET, tmp);
3954 		tmp = RREG32(SRBM_SOFT_RESET);
3955 
3956 		udelay(50);
3957 
3958 		tmp &= ~srbm_soft_reset;
3959 		WREG32(SRBM_SOFT_RESET, tmp);
3960 		tmp = RREG32(SRBM_SOFT_RESET);
3961 	}
3962 
3963 	/* Wait a little for things to settle down */
3964 	udelay(50);
3965 
3966 	evergreen_mc_resume(rdev, &save);
3967 	udelay(50);
3968 
3969 	evergreen_print_gpu_status_regs(rdev);
3970 }
3971 
3972 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3973 {
3974 	u32 tmp, i;
3975 
3976 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3977 	tmp |= SPLL_BYPASS_EN;
3978 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3979 
3980 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3981 	tmp |= SPLL_CTLREQ_CHG;
3982 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3983 
3984 	for (i = 0; i < rdev->usec_timeout; i++) {
3985 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3986 			break;
3987 		udelay(1);
3988 	}
3989 
3990 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3992 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993 
3994 	tmp = RREG32(MPLL_CNTL_MODE);
3995 	tmp &= ~MPLL_MCLK_SEL;
3996 	WREG32(MPLL_CNTL_MODE, tmp);
3997 }
3998 
3999 static void si_spll_powerdown(struct radeon_device *rdev)
4000 {
4001 	u32 tmp;
4002 
4003 	tmp = RREG32(SPLL_CNTL_MODE);
4004 	tmp |= SPLL_SW_DIR_CONTROL;
4005 	WREG32(SPLL_CNTL_MODE, tmp);
4006 
4007 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4008 	tmp |= SPLL_RESET;
4009 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4010 
4011 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4012 	tmp |= SPLL_SLEEP;
4013 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4014 
4015 	tmp = RREG32(SPLL_CNTL_MODE);
4016 	tmp &= ~SPLL_SW_DIR_CONTROL;
4017 	WREG32(SPLL_CNTL_MODE, tmp);
4018 }
4019 
4020 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4021 {
4022 	struct evergreen_mc_save save;
4023 	u32 tmp, i;
4024 
4025 	dev_info(rdev->dev, "GPU pci config reset\n");
4026 
4027 	/* disable dpm? */
4028 
4029 	/* disable cg/pg */
4030 	si_fini_pg(rdev);
4031 	si_fini_cg(rdev);
4032 
4033 	/* Disable CP parsing/prefetching */
4034 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4035 	/* dma0 */
4036 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4037 	tmp &= ~DMA_RB_ENABLE;
4038 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4039 	/* dma1 */
4040 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4041 	tmp &= ~DMA_RB_ENABLE;
4042 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4043 	/* XXX other engines? */
4044 
4045 	/* halt the rlc, disable cp internal ints */
4046 	si_rlc_stop(rdev);
4047 
4048 	udelay(50);
4049 
4050 	/* disable mem access */
4051 	evergreen_mc_stop(rdev, &save);
4052 	if (evergreen_mc_wait_for_idle(rdev)) {
4053 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4054 	}
4055 
4056 	/* set mclk/sclk to bypass */
4057 	si_set_clk_bypass_mode(rdev);
4058 	/* powerdown spll */
4059 	si_spll_powerdown(rdev);
4060 	/* disable BM */
4061 	pci_clear_master(rdev->pdev);
4062 	/* reset */
4063 	radeon_pci_config_reset(rdev);
4064 	/* wait for asic to come out of reset */
4065 	for (i = 0; i < rdev->usec_timeout; i++) {
4066 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4067 			break;
4068 		udelay(1);
4069 	}
4070 }
4071 
4072 int si_asic_reset(struct radeon_device *rdev, bool hard)
4073 {
4074 	u32 reset_mask;
4075 
4076 	if (hard) {
4077 		si_gpu_pci_config_reset(rdev);
4078 		return 0;
4079 	}
4080 
4081 	reset_mask = si_gpu_check_soft_reset(rdev);
4082 
4083 	if (reset_mask)
4084 		r600_set_bios_scratch_engine_hung(rdev, true);
4085 
4086 	/* try soft reset */
4087 	si_gpu_soft_reset(rdev, reset_mask);
4088 
4089 	reset_mask = si_gpu_check_soft_reset(rdev);
4090 
4091 	/* try pci config reset */
4092 	if (reset_mask && radeon_hard_reset)
4093 		si_gpu_pci_config_reset(rdev);
4094 
4095 	reset_mask = si_gpu_check_soft_reset(rdev);
4096 
4097 	if (!reset_mask)
4098 		r600_set_bios_scratch_engine_hung(rdev, false);
4099 
4100 	return 0;
4101 }
4102 
4103 /**
4104  * si_gfx_is_lockup - Check if the GFX engine is locked up
4105  *
4106  * @rdev: radeon_device pointer
4107  * @ring: radeon_ring structure holding ring information
4108  *
4109  * Check if the GFX engine is locked up.
4110  * Returns true if the engine appears to be locked up, false if not.
4111  */
4112 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4113 {
4114 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4115 
4116 	if (!(reset_mask & (RADEON_RESET_GFX |
4117 			    RADEON_RESET_COMPUTE |
4118 			    RADEON_RESET_CP))) {
4119 		radeon_ring_lockup_update(rdev, ring);
4120 		return false;
4121 	}
4122 	return radeon_ring_test_lockup(rdev, ring);
4123 }
4124 
4125 /* MC */
4126 static void si_mc_program(struct radeon_device *rdev)
4127 {
4128 	struct evergreen_mc_save save;
4129 	u32 tmp;
4130 	int i, j;
4131 
4132 	/* Initialize HDP */
4133 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4134 		WREG32((0x2c14 + j), 0x00000000);
4135 		WREG32((0x2c18 + j), 0x00000000);
4136 		WREG32((0x2c1c + j), 0x00000000);
4137 		WREG32((0x2c20 + j), 0x00000000);
4138 		WREG32((0x2c24 + j), 0x00000000);
4139 	}
4140 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4141 
4142 	evergreen_mc_stop(rdev, &save);
4143 	if (radeon_mc_wait_for_idle(rdev)) {
4144 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4145 	}
4146 	if (!ASIC_IS_NODCE(rdev))
4147 		/* Lockout access through VGA aperture*/
4148 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4149 	/* Update configuration */
4150 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4151 	       rdev->mc.vram_start >> 12);
4152 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4153 	       rdev->mc.vram_end >> 12);
4154 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4155 	       rdev->vram_scratch.gpu_addr >> 12);
4156 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4157 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4158 	WREG32(MC_VM_FB_LOCATION, tmp);
4159 	/* XXX double check these! */
4160 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4161 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4162 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4163 	WREG32(MC_VM_AGP_BASE, 0);
4164 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4165 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4166 	if (radeon_mc_wait_for_idle(rdev)) {
4167 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4168 	}
4169 	evergreen_mc_resume(rdev, &save);
4170 	if (!ASIC_IS_NODCE(rdev)) {
4171 		/* we need to own VRAM, so turn off the VGA renderer here
4172 		 * to stop it overwriting our objects */
4173 		rv515_vga_render_disable(rdev);
4174 	}
4175 }
4176 
4177 void si_vram_gtt_location(struct radeon_device *rdev,
4178 			  struct radeon_mc *mc)
4179 {
4180 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4181 		/* leave room for at least 1024M GTT */
4182 		dev_warn(rdev->dev, "limiting VRAM\n");
4183 		mc->real_vram_size = 0xFFC0000000ULL;
4184 		mc->mc_vram_size = 0xFFC0000000ULL;
4185 	}
4186 	radeon_vram_location(rdev, &rdev->mc, 0);
4187 	rdev->mc.gtt_base_align = 0;
4188 	radeon_gtt_location(rdev, mc);
4189 }
4190 
4191 static int si_mc_init(struct radeon_device *rdev)
4192 {
4193 	u32 tmp;
4194 	int chansize, numchan;
4195 
4196 	/* Get VRAM informations */
4197 	rdev->mc.vram_is_ddr = true;
4198 	tmp = RREG32(MC_ARB_RAMCFG);
4199 	if (tmp & CHANSIZE_OVERRIDE) {
4200 		chansize = 16;
4201 	} else if (tmp & CHANSIZE_MASK) {
4202 		chansize = 64;
4203 	} else {
4204 		chansize = 32;
4205 	}
4206 	tmp = RREG32(MC_SHARED_CHMAP);
4207 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4208 	case 0:
4209 	default:
4210 		numchan = 1;
4211 		break;
4212 	case 1:
4213 		numchan = 2;
4214 		break;
4215 	case 2:
4216 		numchan = 4;
4217 		break;
4218 	case 3:
4219 		numchan = 8;
4220 		break;
4221 	case 4:
4222 		numchan = 3;
4223 		break;
4224 	case 5:
4225 		numchan = 6;
4226 		break;
4227 	case 6:
4228 		numchan = 10;
4229 		break;
4230 	case 7:
4231 		numchan = 12;
4232 		break;
4233 	case 8:
4234 		numchan = 16;
4235 		break;
4236 	}
4237 	rdev->mc.vram_width = numchan * chansize;
4238 	/* Could aper size report 0 ? */
4239 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4240 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4241 	/* size in MB on si */
4242 	tmp = RREG32(CONFIG_MEMSIZE);
4243 	/* some boards may have garbage in the upper 16 bits */
4244 	if (tmp & 0xffff0000) {
4245 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4246 		if (tmp & 0xffff)
4247 			tmp &= 0xffff;
4248 	}
4249 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4250 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4251 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4252 	si_vram_gtt_location(rdev, &rdev->mc);
4253 	radeon_update_bandwidth_info(rdev);
4254 
4255 	return 0;
4256 }
4257 
4258 /*
4259  * GART
4260  */
4261 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4262 {
4263 	/* flush hdp cache */
4264 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4265 
4266 	/* bits 0-15 are the VM contexts0-15 */
4267 	WREG32(VM_INVALIDATE_REQUEST, 1);
4268 }
4269 
4270 static int si_pcie_gart_enable(struct radeon_device *rdev)
4271 {
4272 	int r, i;
4273 
4274 	if (rdev->gart.robj == NULL) {
4275 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4276 		return -EINVAL;
4277 	}
4278 	r = radeon_gart_table_vram_pin(rdev);
4279 	if (r)
4280 		return r;
4281 	/* Setup TLB control */
4282 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4283 	       (0xA << 7) |
4284 	       ENABLE_L1_TLB |
4285 	       ENABLE_L1_FRAGMENT_PROCESSING |
4286 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4287 	       ENABLE_ADVANCED_DRIVER_MODEL |
4288 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4289 	/* Setup L2 cache */
4290 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4291 	       ENABLE_L2_FRAGMENT_PROCESSING |
4292 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4293 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4294 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4295 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4296 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4297 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4298 	       BANK_SELECT(4) |
4299 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4300 	/* setup context0 */
4301 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4302 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4303 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4304 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4305 			(u32)(rdev->dummy_page.addr >> 12));
4306 	WREG32(VM_CONTEXT0_CNTL2, 0);
4307 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4308 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4309 
4310 	WREG32(0x15D4, 0);
4311 	WREG32(0x15D8, 0);
4312 	WREG32(0x15DC, 0);
4313 
4314 	/* empty context1-15 */
4315 	/* set vm size, must be a multiple of 4 */
4316 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4317 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4318 	/* Assign the pt base to something valid for now; the pts used for
4319 	 * the VMs are determined by the application and setup and assigned
4320 	 * on the fly in the vm part of radeon_gart.c
4321 	 */
4322 	for (i = 1; i < 16; i++) {
4323 		if (i < 8)
4324 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4325 			       rdev->vm_manager.saved_table_addr[i]);
4326 		else
4327 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4328 			       rdev->vm_manager.saved_table_addr[i]);
4329 	}
4330 
4331 	/* enable context1-15 */
4332 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4333 	       (u32)(rdev->dummy_page.addr >> 12));
4334 	WREG32(VM_CONTEXT1_CNTL2, 4);
4335 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4336 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4337 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4338 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4339 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4340 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4341 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4342 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4343 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4344 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4345 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4346 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4347 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4349 
4350 	si_pcie_gart_tlb_flush(rdev);
4351 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352 		 (unsigned)(rdev->mc.gtt_size >> 20),
4353 		 (unsigned long long)rdev->gart.table_addr);
4354 	rdev->gart.ready = true;
4355 	return 0;
4356 }
4357 
4358 static void si_pcie_gart_disable(struct radeon_device *rdev)
4359 {
4360 	unsigned i;
4361 
4362 	for (i = 1; i < 16; ++i) {
4363 		uint32_t reg;
4364 		if (i < 8)
4365 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4366 		else
4367 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4368 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4369 	}
4370 
4371 	/* Disable all tables */
4372 	WREG32(VM_CONTEXT0_CNTL, 0);
4373 	WREG32(VM_CONTEXT1_CNTL, 0);
4374 	/* Setup TLB control */
4375 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4376 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4377 	/* Setup L2 cache */
4378 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4379 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4380 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4381 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4382 	WREG32(VM_L2_CNTL2, 0);
4383 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4384 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4385 	radeon_gart_table_vram_unpin(rdev);
4386 }
4387 
4388 static void si_pcie_gart_fini(struct radeon_device *rdev)
4389 {
4390 	si_pcie_gart_disable(rdev);
4391 	radeon_gart_table_vram_free(rdev);
4392 	radeon_gart_fini(rdev);
4393 }
4394 
4395 /* vm parser */
4396 static bool si_vm_reg_valid(u32 reg)
4397 {
4398 	/* context regs are fine */
4399 	if (reg >= 0x28000)
4400 		return true;
4401 
4402 	/* shader regs are also fine */
4403 	if (reg >= 0xB000 && reg < 0xC000)
4404 		return true;
4405 
4406 	/* check config regs */
4407 	switch (reg) {
4408 	case GRBM_GFX_INDEX:
4409 	case CP_STRMOUT_CNTL:
4410 	case VGT_VTX_VECT_EJECT_REG:
4411 	case VGT_CACHE_INVALIDATION:
4412 	case VGT_ESGS_RING_SIZE:
4413 	case VGT_GSVS_RING_SIZE:
4414 	case VGT_GS_VERTEX_REUSE:
4415 	case VGT_PRIMITIVE_TYPE:
4416 	case VGT_INDEX_TYPE:
4417 	case VGT_NUM_INDICES:
4418 	case VGT_NUM_INSTANCES:
4419 	case VGT_TF_RING_SIZE:
4420 	case VGT_HS_OFFCHIP_PARAM:
4421 	case VGT_TF_MEMORY_BASE:
4422 	case PA_CL_ENHANCE:
4423 	case PA_SU_LINE_STIPPLE_VALUE:
4424 	case PA_SC_LINE_STIPPLE_STATE:
4425 	case PA_SC_ENHANCE:
4426 	case SQC_CACHES:
4427 	case SPI_STATIC_THREAD_MGMT_1:
4428 	case SPI_STATIC_THREAD_MGMT_2:
4429 	case SPI_STATIC_THREAD_MGMT_3:
4430 	case SPI_PS_MAX_WAVE_ID:
4431 	case SPI_CONFIG_CNTL:
4432 	case SPI_CONFIG_CNTL_1:
4433 	case TA_CNTL_AUX:
4434 	case TA_CS_BC_BASE_ADDR:
4435 		return true;
4436 	default:
4437 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4438 		return false;
4439 	}
4440 }
4441 
4442 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4443 				  u32 *ib, struct radeon_cs_packet *pkt)
4444 {
4445 	switch (pkt->opcode) {
4446 	case PACKET3_NOP:
4447 	case PACKET3_SET_BASE:
4448 	case PACKET3_SET_CE_DE_COUNTERS:
4449 	case PACKET3_LOAD_CONST_RAM:
4450 	case PACKET3_WRITE_CONST_RAM:
4451 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4452 	case PACKET3_DUMP_CONST_RAM:
4453 	case PACKET3_INCREMENT_CE_COUNTER:
4454 	case PACKET3_WAIT_ON_DE_COUNTER:
4455 	case PACKET3_CE_WRITE:
4456 		break;
4457 	default:
4458 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4459 		return -EINVAL;
4460 	}
4461 	return 0;
4462 }
4463 
4464 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4465 {
4466 	u32 start_reg, reg, i;
4467 	u32 command = ib[idx + 4];
4468 	u32 info = ib[idx + 1];
4469 	u32 idx_value = ib[idx];
4470 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4471 		/* src address space is register */
4472 		if (((info & 0x60000000) >> 29) == 0) {
4473 			start_reg = idx_value << 2;
4474 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4475 				reg = start_reg;
4476 				if (!si_vm_reg_valid(reg)) {
4477 					DRM_ERROR("CP DMA Bad SRC register\n");
4478 					return -EINVAL;
4479 				}
4480 			} else {
4481 				for (i = 0; i < (command & 0x1fffff); i++) {
4482 					reg = start_reg + (4 * i);
4483 					if (!si_vm_reg_valid(reg)) {
4484 						DRM_ERROR("CP DMA Bad SRC register\n");
4485 						return -EINVAL;
4486 					}
4487 				}
4488 			}
4489 		}
4490 	}
4491 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4492 		/* dst address space is register */
4493 		if (((info & 0x00300000) >> 20) == 0) {
4494 			start_reg = ib[idx + 2];
4495 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4496 				reg = start_reg;
4497 				if (!si_vm_reg_valid(reg)) {
4498 					DRM_ERROR("CP DMA Bad DST register\n");
4499 					return -EINVAL;
4500 				}
4501 			} else {
4502 				for (i = 0; i < (command & 0x1fffff); i++) {
4503 					reg = start_reg + (4 * i);
4504 				if (!si_vm_reg_valid(reg)) {
4505 						DRM_ERROR("CP DMA Bad DST register\n");
4506 						return -EINVAL;
4507 					}
4508 				}
4509 			}
4510 		}
4511 	}
4512 	return 0;
4513 }
4514 
4515 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4516 				   u32 *ib, struct radeon_cs_packet *pkt)
4517 {
4518 	int r;
4519 	u32 idx = pkt->idx + 1;
4520 	u32 idx_value = ib[idx];
4521 	u32 start_reg, end_reg, reg, i;
4522 
4523 	switch (pkt->opcode) {
4524 	case PACKET3_NOP:
4525 	case PACKET3_SET_BASE:
4526 	case PACKET3_CLEAR_STATE:
4527 	case PACKET3_INDEX_BUFFER_SIZE:
4528 	case PACKET3_DISPATCH_DIRECT:
4529 	case PACKET3_DISPATCH_INDIRECT:
4530 	case PACKET3_ALLOC_GDS:
4531 	case PACKET3_WRITE_GDS_RAM:
4532 	case PACKET3_ATOMIC_GDS:
4533 	case PACKET3_ATOMIC:
4534 	case PACKET3_OCCLUSION_QUERY:
4535 	case PACKET3_SET_PREDICATION:
4536 	case PACKET3_COND_EXEC:
4537 	case PACKET3_PRED_EXEC:
4538 	case PACKET3_DRAW_INDIRECT:
4539 	case PACKET3_DRAW_INDEX_INDIRECT:
4540 	case PACKET3_INDEX_BASE:
4541 	case PACKET3_DRAW_INDEX_2:
4542 	case PACKET3_CONTEXT_CONTROL:
4543 	case PACKET3_INDEX_TYPE:
4544 	case PACKET3_DRAW_INDIRECT_MULTI:
4545 	case PACKET3_DRAW_INDEX_AUTO:
4546 	case PACKET3_DRAW_INDEX_IMMD:
4547 	case PACKET3_NUM_INSTANCES:
4548 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4549 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4550 	case PACKET3_DRAW_INDEX_OFFSET_2:
4551 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4552 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4553 	case PACKET3_MPEG_INDEX:
4554 	case PACKET3_WAIT_REG_MEM:
4555 	case PACKET3_MEM_WRITE:
4556 	case PACKET3_PFP_SYNC_ME:
4557 	case PACKET3_SURFACE_SYNC:
4558 	case PACKET3_EVENT_WRITE:
4559 	case PACKET3_EVENT_WRITE_EOP:
4560 	case PACKET3_EVENT_WRITE_EOS:
4561 	case PACKET3_SET_CONTEXT_REG:
4562 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4563 	case PACKET3_SET_SH_REG:
4564 	case PACKET3_SET_SH_REG_OFFSET:
4565 	case PACKET3_INCREMENT_DE_COUNTER:
4566 	case PACKET3_WAIT_ON_CE_COUNTER:
4567 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4568 	case PACKET3_ME_WRITE:
4569 		break;
4570 	case PACKET3_COPY_DATA:
4571 		if ((idx_value & 0xf00) == 0) {
4572 			reg = ib[idx + 3] * 4;
4573 			if (!si_vm_reg_valid(reg))
4574 				return -EINVAL;
4575 		}
4576 		break;
4577 	case PACKET3_WRITE_DATA:
4578 		if ((idx_value & 0xf00) == 0) {
4579 			start_reg = ib[idx + 1] * 4;
4580 			if (idx_value & 0x10000) {
4581 				if (!si_vm_reg_valid(start_reg))
4582 					return -EINVAL;
4583 			} else {
4584 				for (i = 0; i < (pkt->count - 2); i++) {
4585 					reg = start_reg + (4 * i);
4586 					if (!si_vm_reg_valid(reg))
4587 						return -EINVAL;
4588 				}
4589 			}
4590 		}
4591 		break;
4592 	case PACKET3_COND_WRITE:
4593 		if (idx_value & 0x100) {
4594 			reg = ib[idx + 5] * 4;
4595 			if (!si_vm_reg_valid(reg))
4596 				return -EINVAL;
4597 		}
4598 		break;
4599 	case PACKET3_COPY_DW:
4600 		if (idx_value & 0x2) {
4601 			reg = ib[idx + 3] * 4;
4602 			if (!si_vm_reg_valid(reg))
4603 				return -EINVAL;
4604 		}
4605 		break;
4606 	case PACKET3_SET_CONFIG_REG:
4607 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4608 		end_reg = 4 * pkt->count + start_reg - 4;
4609 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4610 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4611 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4612 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4613 			return -EINVAL;
4614 		}
4615 		for (i = 0; i < pkt->count; i++) {
4616 			reg = start_reg + (4 * i);
4617 			if (!si_vm_reg_valid(reg))
4618 				return -EINVAL;
4619 		}
4620 		break;
4621 	case PACKET3_CP_DMA:
4622 		r = si_vm_packet3_cp_dma_check(ib, idx);
4623 		if (r)
4624 			return r;
4625 		break;
4626 	default:
4627 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4628 		return -EINVAL;
4629 	}
4630 	return 0;
4631 }
4632 
4633 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4634 				       u32 *ib, struct radeon_cs_packet *pkt)
4635 {
4636 	int r;
4637 	u32 idx = pkt->idx + 1;
4638 	u32 idx_value = ib[idx];
4639 	u32 start_reg, reg, i;
4640 
4641 	switch (pkt->opcode) {
4642 	case PACKET3_NOP:
4643 	case PACKET3_SET_BASE:
4644 	case PACKET3_CLEAR_STATE:
4645 	case PACKET3_DISPATCH_DIRECT:
4646 	case PACKET3_DISPATCH_INDIRECT:
4647 	case PACKET3_ALLOC_GDS:
4648 	case PACKET3_WRITE_GDS_RAM:
4649 	case PACKET3_ATOMIC_GDS:
4650 	case PACKET3_ATOMIC:
4651 	case PACKET3_OCCLUSION_QUERY:
4652 	case PACKET3_SET_PREDICATION:
4653 	case PACKET3_COND_EXEC:
4654 	case PACKET3_PRED_EXEC:
4655 	case PACKET3_CONTEXT_CONTROL:
4656 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4657 	case PACKET3_WAIT_REG_MEM:
4658 	case PACKET3_MEM_WRITE:
4659 	case PACKET3_PFP_SYNC_ME:
4660 	case PACKET3_SURFACE_SYNC:
4661 	case PACKET3_EVENT_WRITE:
4662 	case PACKET3_EVENT_WRITE_EOP:
4663 	case PACKET3_EVENT_WRITE_EOS:
4664 	case PACKET3_SET_CONTEXT_REG:
4665 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4666 	case PACKET3_SET_SH_REG:
4667 	case PACKET3_SET_SH_REG_OFFSET:
4668 	case PACKET3_INCREMENT_DE_COUNTER:
4669 	case PACKET3_WAIT_ON_CE_COUNTER:
4670 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4671 	case PACKET3_ME_WRITE:
4672 		break;
4673 	case PACKET3_COPY_DATA:
4674 		if ((idx_value & 0xf00) == 0) {
4675 			reg = ib[idx + 3] * 4;
4676 			if (!si_vm_reg_valid(reg))
4677 				return -EINVAL;
4678 		}
4679 		break;
4680 	case PACKET3_WRITE_DATA:
4681 		if ((idx_value & 0xf00) == 0) {
4682 			start_reg = ib[idx + 1] * 4;
4683 			if (idx_value & 0x10000) {
4684 				if (!si_vm_reg_valid(start_reg))
4685 					return -EINVAL;
4686 			} else {
4687 				for (i = 0; i < (pkt->count - 2); i++) {
4688 					reg = start_reg + (4 * i);
4689 					if (!si_vm_reg_valid(reg))
4690 						return -EINVAL;
4691 				}
4692 			}
4693 		}
4694 		break;
4695 	case PACKET3_COND_WRITE:
4696 		if (idx_value & 0x100) {
4697 			reg = ib[idx + 5] * 4;
4698 			if (!si_vm_reg_valid(reg))
4699 				return -EINVAL;
4700 		}
4701 		break;
4702 	case PACKET3_COPY_DW:
4703 		if (idx_value & 0x2) {
4704 			reg = ib[idx + 3] * 4;
4705 			if (!si_vm_reg_valid(reg))
4706 				return -EINVAL;
4707 		}
4708 		break;
4709 	case PACKET3_CP_DMA:
4710 		r = si_vm_packet3_cp_dma_check(ib, idx);
4711 		if (r)
4712 			return r;
4713 		break;
4714 	default:
4715 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4716 		return -EINVAL;
4717 	}
4718 	return 0;
4719 }
4720 
4721 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4722 {
4723 	int ret = 0;
4724 	u32 idx = 0, i;
4725 	struct radeon_cs_packet pkt;
4726 
4727 	do {
4728 		pkt.idx = idx;
4729 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4730 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4731 		pkt.one_reg_wr = 0;
4732 		switch (pkt.type) {
4733 		case RADEON_PACKET_TYPE0:
4734 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4735 			ret = -EINVAL;
4736 			break;
4737 		case RADEON_PACKET_TYPE2:
4738 			idx += 1;
4739 			break;
4740 		case RADEON_PACKET_TYPE3:
4741 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4742 			if (ib->is_const_ib)
4743 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4744 			else {
4745 				switch (ib->ring) {
4746 				case RADEON_RING_TYPE_GFX_INDEX:
4747 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4748 					break;
4749 				case CAYMAN_RING_TYPE_CP1_INDEX:
4750 				case CAYMAN_RING_TYPE_CP2_INDEX:
4751 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4752 					break;
4753 				default:
4754 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4755 					ret = -EINVAL;
4756 					break;
4757 				}
4758 			}
4759 			idx += pkt.count + 2;
4760 			break;
4761 		default:
4762 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4763 			ret = -EINVAL;
4764 			break;
4765 		}
4766 		if (ret) {
4767 			for (i = 0; i < ib->length_dw; i++) {
4768 				if (i == idx)
4769 					printk("\t0x%08x <---\n", ib->ptr[i]);
4770 				else
4771 					printk("\t0x%08x\n", ib->ptr[i]);
4772 			}
4773 			break;
4774 		}
4775 	} while (idx < ib->length_dw);
4776 
4777 	return ret;
4778 }
4779 
4780 /*
4781  * vm
4782  */
4783 int si_vm_init(struct radeon_device *rdev)
4784 {
4785 	/* number of VMs */
4786 	rdev->vm_manager.nvm = 16;
4787 	/* base offset of vram pages */
4788 	rdev->vm_manager.vram_base_offset = 0;
4789 
4790 	return 0;
4791 }
4792 
4793 void si_vm_fini(struct radeon_device *rdev)
4794 {
4795 }
4796 
4797 /**
4798  * si_vm_decode_fault - print human readable fault info
4799  *
4800  * @rdev: radeon_device pointer
4801  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4802  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4803  *
4804  * Print human readable fault information (SI).
4805  */
4806 static void si_vm_decode_fault(struct radeon_device *rdev,
4807 			       u32 status, u32 addr)
4808 {
4809 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4810 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4811 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4812 	char *block;
4813 
4814 	if (rdev->family == CHIP_TAHITI) {
4815 		switch (mc_id) {
4816 		case 160:
4817 		case 144:
4818 		case 96:
4819 		case 80:
4820 		case 224:
4821 		case 208:
4822 		case 32:
4823 		case 16:
4824 			block = "CB";
4825 			break;
4826 		case 161:
4827 		case 145:
4828 		case 97:
4829 		case 81:
4830 		case 225:
4831 		case 209:
4832 		case 33:
4833 		case 17:
4834 			block = "CB_FMASK";
4835 			break;
4836 		case 162:
4837 		case 146:
4838 		case 98:
4839 		case 82:
4840 		case 226:
4841 		case 210:
4842 		case 34:
4843 		case 18:
4844 			block = "CB_CMASK";
4845 			break;
4846 		case 163:
4847 		case 147:
4848 		case 99:
4849 		case 83:
4850 		case 227:
4851 		case 211:
4852 		case 35:
4853 		case 19:
4854 			block = "CB_IMMED";
4855 			break;
4856 		case 164:
4857 		case 148:
4858 		case 100:
4859 		case 84:
4860 		case 228:
4861 		case 212:
4862 		case 36:
4863 		case 20:
4864 			block = "DB";
4865 			break;
4866 		case 165:
4867 		case 149:
4868 		case 101:
4869 		case 85:
4870 		case 229:
4871 		case 213:
4872 		case 37:
4873 		case 21:
4874 			block = "DB_HTILE";
4875 			break;
4876 		case 167:
4877 		case 151:
4878 		case 103:
4879 		case 87:
4880 		case 231:
4881 		case 215:
4882 		case 39:
4883 		case 23:
4884 			block = "DB_STEN";
4885 			break;
4886 		case 72:
4887 		case 68:
4888 		case 64:
4889 		case 8:
4890 		case 4:
4891 		case 0:
4892 		case 136:
4893 		case 132:
4894 		case 128:
4895 		case 200:
4896 		case 196:
4897 		case 192:
4898 			block = "TC";
4899 			break;
4900 		case 112:
4901 		case 48:
4902 			block = "CP";
4903 			break;
4904 		case 49:
4905 		case 177:
4906 		case 50:
4907 		case 178:
4908 			block = "SH";
4909 			break;
4910 		case 53:
4911 		case 190:
4912 			block = "VGT";
4913 			break;
4914 		case 117:
4915 			block = "IH";
4916 			break;
4917 		case 51:
4918 		case 115:
4919 			block = "RLC";
4920 			break;
4921 		case 119:
4922 		case 183:
4923 			block = "DMA0";
4924 			break;
4925 		case 61:
4926 			block = "DMA1";
4927 			break;
4928 		case 248:
4929 		case 120:
4930 			block = "HDP";
4931 			break;
4932 		default:
4933 			block = "unknown";
4934 			break;
4935 		}
4936 	} else {
4937 		switch (mc_id) {
4938 		case 32:
4939 		case 16:
4940 		case 96:
4941 		case 80:
4942 		case 160:
4943 		case 144:
4944 		case 224:
4945 		case 208:
4946 			block = "CB";
4947 			break;
4948 		case 33:
4949 		case 17:
4950 		case 97:
4951 		case 81:
4952 		case 161:
4953 		case 145:
4954 		case 225:
4955 		case 209:
4956 			block = "CB_FMASK";
4957 			break;
4958 		case 34:
4959 		case 18:
4960 		case 98:
4961 		case 82:
4962 		case 162:
4963 		case 146:
4964 		case 226:
4965 		case 210:
4966 			block = "CB_CMASK";
4967 			break;
4968 		case 35:
4969 		case 19:
4970 		case 99:
4971 		case 83:
4972 		case 163:
4973 		case 147:
4974 		case 227:
4975 		case 211:
4976 			block = "CB_IMMED";
4977 			break;
4978 		case 36:
4979 		case 20:
4980 		case 100:
4981 		case 84:
4982 		case 164:
4983 		case 148:
4984 		case 228:
4985 		case 212:
4986 			block = "DB";
4987 			break;
4988 		case 37:
4989 		case 21:
4990 		case 101:
4991 		case 85:
4992 		case 165:
4993 		case 149:
4994 		case 229:
4995 		case 213:
4996 			block = "DB_HTILE";
4997 			break;
4998 		case 39:
4999 		case 23:
5000 		case 103:
5001 		case 87:
5002 		case 167:
5003 		case 151:
5004 		case 231:
5005 		case 215:
5006 			block = "DB_STEN";
5007 			break;
5008 		case 72:
5009 		case 68:
5010 		case 8:
5011 		case 4:
5012 		case 136:
5013 		case 132:
5014 		case 200:
5015 		case 196:
5016 			block = "TC";
5017 			break;
5018 		case 112:
5019 		case 48:
5020 			block = "CP";
5021 			break;
5022 		case 49:
5023 		case 177:
5024 		case 50:
5025 		case 178:
5026 			block = "SH";
5027 			break;
5028 		case 53:
5029 			block = "VGT";
5030 			break;
5031 		case 117:
5032 			block = "IH";
5033 			break;
5034 		case 51:
5035 		case 115:
5036 			block = "RLC";
5037 			break;
5038 		case 119:
5039 		case 183:
5040 			block = "DMA0";
5041 			break;
5042 		case 61:
5043 			block = "DMA1";
5044 			break;
5045 		case 248:
5046 		case 120:
5047 			block = "HDP";
5048 			break;
5049 		default:
5050 			block = "unknown";
5051 			break;
5052 		}
5053 	}
5054 
5055 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5056 	       protections, vmid, addr,
5057 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5058 	       block, mc_id);
5059 }
5060 
5061 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5062 		 unsigned vm_id, uint64_t pd_addr)
5063 {
5064 	/* write new base address */
5065 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5066 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5067 				 WRITE_DATA_DST_SEL(0)));
5068 
5069 	if (vm_id < 8) {
5070 		radeon_ring_write(ring,
5071 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5072 	} else {
5073 		radeon_ring_write(ring,
5074 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5075 	}
5076 	radeon_ring_write(ring, 0);
5077 	radeon_ring_write(ring, pd_addr >> 12);
5078 
5079 	/* flush hdp cache */
5080 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5081 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5082 				 WRITE_DATA_DST_SEL(0)));
5083 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5084 	radeon_ring_write(ring, 0);
5085 	radeon_ring_write(ring, 0x1);
5086 
5087 	/* bits 0-15 are the VM contexts0-15 */
5088 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5089 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5090 				 WRITE_DATA_DST_SEL(0)));
5091 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5092 	radeon_ring_write(ring, 0);
5093 	radeon_ring_write(ring, 1 << vm_id);
5094 
5095 	/* wait for the invalidate to complete */
5096 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5097 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5098 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5099 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5100 	radeon_ring_write(ring, 0);
5101 	radeon_ring_write(ring, 0); /* ref */
5102 	radeon_ring_write(ring, 0); /* mask */
5103 	radeon_ring_write(ring, 0x20); /* poll interval */
5104 
5105 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5106 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5107 	radeon_ring_write(ring, 0x0);
5108 }
5109 
5110 /*
5111  *  Power and clock gating
5112  */
5113 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5114 {
5115 	int i;
5116 
5117 	for (i = 0; i < rdev->usec_timeout; i++) {
5118 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5119 			break;
5120 		udelay(1);
5121 	}
5122 
5123 	for (i = 0; i < rdev->usec_timeout; i++) {
5124 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5125 			break;
5126 		udelay(1);
5127 	}
5128 }
5129 
5130 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5131 					 bool enable)
5132 {
5133 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5134 	u32 mask;
5135 	int i;
5136 
5137 	if (enable)
5138 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5139 	else
5140 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5141 	WREG32(CP_INT_CNTL_RING0, tmp);
5142 
5143 	if (!enable) {
5144 		/* read a gfx register */
5145 		tmp = RREG32(DB_DEPTH_INFO);
5146 
5147 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5148 		for (i = 0; i < rdev->usec_timeout; i++) {
5149 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5150 				break;
5151 			udelay(1);
5152 		}
5153 	}
5154 }
5155 
5156 static void si_set_uvd_dcm(struct radeon_device *rdev,
5157 			   bool sw_mode)
5158 {
5159 	u32 tmp, tmp2;
5160 
5161 	tmp = RREG32(UVD_CGC_CTRL);
5162 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5163 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5164 
5165 	if (sw_mode) {
5166 		tmp &= ~0x7ffff800;
5167 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5168 	} else {
5169 		tmp |= 0x7ffff800;
5170 		tmp2 = 0;
5171 	}
5172 
5173 	WREG32(UVD_CGC_CTRL, tmp);
5174 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5175 }
5176 
5177 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5178 {
5179 	bool hw_mode = true;
5180 
5181 	if (hw_mode) {
5182 		si_set_uvd_dcm(rdev, false);
5183 	} else {
5184 		u32 tmp = RREG32(UVD_CGC_CTRL);
5185 		tmp &= ~DCM;
5186 		WREG32(UVD_CGC_CTRL, tmp);
5187 	}
5188 }
5189 
5190 static u32 si_halt_rlc(struct radeon_device *rdev)
5191 {
5192 	u32 data, orig;
5193 
5194 	orig = data = RREG32(RLC_CNTL);
5195 
5196 	if (data & RLC_ENABLE) {
5197 		data &= ~RLC_ENABLE;
5198 		WREG32(RLC_CNTL, data);
5199 
5200 		si_wait_for_rlc_serdes(rdev);
5201 	}
5202 
5203 	return orig;
5204 }
5205 
5206 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5207 {
5208 	u32 tmp;
5209 
5210 	tmp = RREG32(RLC_CNTL);
5211 	if (tmp != rlc)
5212 		WREG32(RLC_CNTL, rlc);
5213 }
5214 
5215 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5216 {
5217 	u32 data, orig;
5218 
5219 	orig = data = RREG32(DMA_PG);
5220 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5221 		data |= PG_CNTL_ENABLE;
5222 	else
5223 		data &= ~PG_CNTL_ENABLE;
5224 	if (orig != data)
5225 		WREG32(DMA_PG, data);
5226 }
5227 
5228 static void si_init_dma_pg(struct radeon_device *rdev)
5229 {
5230 	u32 tmp;
5231 
5232 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5233 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5234 
5235 	for (tmp = 0; tmp < 5; tmp++)
5236 		WREG32(DMA_PGFSM_WRITE, 0);
5237 }
5238 
5239 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5240 			       bool enable)
5241 {
5242 	u32 tmp;
5243 
5244 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5245 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5246 		WREG32(RLC_TTOP_D, tmp);
5247 
5248 		tmp = RREG32(RLC_PG_CNTL);
5249 		tmp |= GFX_PG_ENABLE;
5250 		WREG32(RLC_PG_CNTL, tmp);
5251 
5252 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5253 		tmp |= AUTO_PG_EN;
5254 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5255 	} else {
5256 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5257 		tmp &= ~AUTO_PG_EN;
5258 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5259 
5260 		tmp = RREG32(DB_RENDER_CONTROL);
5261 	}
5262 }
5263 
5264 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5265 {
5266 	u32 tmp;
5267 
5268 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5269 
5270 	tmp = RREG32(RLC_PG_CNTL);
5271 	tmp |= GFX_PG_SRC;
5272 	WREG32(RLC_PG_CNTL, tmp);
5273 
5274 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5275 
5276 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5277 
5278 	tmp &= ~GRBM_REG_SGIT_MASK;
5279 	tmp |= GRBM_REG_SGIT(0x700);
5280 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5281 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5282 }
5283 
5284 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5285 {
5286 	u32 mask = 0, tmp, tmp1;
5287 	int i;
5288 
5289 	si_select_se_sh(rdev, se, sh);
5290 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5291 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5292 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5293 
5294 	tmp &= 0xffff0000;
5295 
5296 	tmp |= tmp1;
5297 	tmp >>= 16;
5298 
5299 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5300 		mask <<= 1;
5301 		mask |= 1;
5302 	}
5303 
5304 	return (~tmp) & mask;
5305 }
5306 
5307 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5308 {
5309 	u32 i, j, k, active_cu_number = 0;
5310 	u32 mask, counter, cu_bitmap;
5311 	u32 tmp = 0;
5312 
5313 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5314 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5315 			mask = 1;
5316 			cu_bitmap = 0;
5317 			counter  = 0;
5318 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5319 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5320 					if (counter < 2)
5321 						cu_bitmap |= mask;
5322 					counter++;
5323 				}
5324 				mask <<= 1;
5325 			}
5326 
5327 			active_cu_number += counter;
5328 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5329 		}
5330 	}
5331 
5332 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5333 
5334 	tmp = RREG32(RLC_MAX_PG_CU);
5335 	tmp &= ~MAX_PU_CU_MASK;
5336 	tmp |= MAX_PU_CU(active_cu_number);
5337 	WREG32(RLC_MAX_PG_CU, tmp);
5338 }
5339 
5340 static void si_enable_cgcg(struct radeon_device *rdev,
5341 			   bool enable)
5342 {
5343 	u32 data, orig, tmp;
5344 
5345 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5346 
5347 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5348 		si_enable_gui_idle_interrupt(rdev, true);
5349 
5350 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5351 
5352 		tmp = si_halt_rlc(rdev);
5353 
5354 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5355 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5356 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5357 
5358 		si_wait_for_rlc_serdes(rdev);
5359 
5360 		si_update_rlc(rdev, tmp);
5361 
5362 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5363 
5364 		data |= CGCG_EN | CGLS_EN;
5365 	} else {
5366 		si_enable_gui_idle_interrupt(rdev, false);
5367 
5368 		RREG32(CB_CGTT_SCLK_CTRL);
5369 		RREG32(CB_CGTT_SCLK_CTRL);
5370 		RREG32(CB_CGTT_SCLK_CTRL);
5371 		RREG32(CB_CGTT_SCLK_CTRL);
5372 
5373 		data &= ~(CGCG_EN | CGLS_EN);
5374 	}
5375 
5376 	if (orig != data)
5377 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5378 }
5379 
5380 static void si_enable_mgcg(struct radeon_device *rdev,
5381 			   bool enable)
5382 {
5383 	u32 data, orig, tmp = 0;
5384 
5385 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5386 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5387 		data = 0x96940200;
5388 		if (orig != data)
5389 			WREG32(CGTS_SM_CTRL_REG, data);
5390 
5391 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5392 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5393 			data |= CP_MEM_LS_EN;
5394 			if (orig != data)
5395 				WREG32(CP_MEM_SLP_CNTL, data);
5396 		}
5397 
5398 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5399 		data &= 0xffffffc0;
5400 		if (orig != data)
5401 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5402 
5403 		tmp = si_halt_rlc(rdev);
5404 
5405 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5406 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5407 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5408 
5409 		si_update_rlc(rdev, tmp);
5410 	} else {
5411 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5412 		data |= 0x00000003;
5413 		if (orig != data)
5414 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5415 
5416 		data = RREG32(CP_MEM_SLP_CNTL);
5417 		if (data & CP_MEM_LS_EN) {
5418 			data &= ~CP_MEM_LS_EN;
5419 			WREG32(CP_MEM_SLP_CNTL, data);
5420 		}
5421 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5422 		data |= LS_OVERRIDE | OVERRIDE;
5423 		if (orig != data)
5424 			WREG32(CGTS_SM_CTRL_REG, data);
5425 
5426 		tmp = si_halt_rlc(rdev);
5427 
5428 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5429 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5430 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5431 
5432 		si_update_rlc(rdev, tmp);
5433 	}
5434 }
5435 
5436 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5437 			       bool enable)
5438 {
5439 	u32 orig, data, tmp;
5440 
5441 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5442 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5443 		tmp |= 0x3fff;
5444 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5445 
5446 		orig = data = RREG32(UVD_CGC_CTRL);
5447 		data |= DCM;
5448 		if (orig != data)
5449 			WREG32(UVD_CGC_CTRL, data);
5450 
5451 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5452 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5453 	} else {
5454 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5455 		tmp &= ~0x3fff;
5456 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5457 
5458 		orig = data = RREG32(UVD_CGC_CTRL);
5459 		data &= ~DCM;
5460 		if (orig != data)
5461 			WREG32(UVD_CGC_CTRL, data);
5462 
5463 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5464 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5465 	}
5466 }
5467 
5468 static const u32 mc_cg_registers[] =
5469 {
5470 	MC_HUB_MISC_HUB_CG,
5471 	MC_HUB_MISC_SIP_CG,
5472 	MC_HUB_MISC_VM_CG,
5473 	MC_XPB_CLK_GAT,
5474 	ATC_MISC_CG,
5475 	MC_CITF_MISC_WR_CG,
5476 	MC_CITF_MISC_RD_CG,
5477 	MC_CITF_MISC_VM_CG,
5478 	VM_L2_CG,
5479 };
5480 
5481 static void si_enable_mc_ls(struct radeon_device *rdev,
5482 			    bool enable)
5483 {
5484 	int i;
5485 	u32 orig, data;
5486 
5487 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5488 		orig = data = RREG32(mc_cg_registers[i]);
5489 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5490 			data |= MC_LS_ENABLE;
5491 		else
5492 			data &= ~MC_LS_ENABLE;
5493 		if (data != orig)
5494 			WREG32(mc_cg_registers[i], data);
5495 	}
5496 }
5497 
5498 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5499 			       bool enable)
5500 {
5501 	int i;
5502 	u32 orig, data;
5503 
5504 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5505 		orig = data = RREG32(mc_cg_registers[i]);
5506 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5507 			data |= MC_CG_ENABLE;
5508 		else
5509 			data &= ~MC_CG_ENABLE;
5510 		if (data != orig)
5511 			WREG32(mc_cg_registers[i], data);
5512 	}
5513 }
5514 
5515 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5516 			       bool enable)
5517 {
5518 	u32 orig, data, offset;
5519 	int i;
5520 
5521 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5522 		for (i = 0; i < 2; i++) {
5523 			if (i == 0)
5524 				offset = DMA0_REGISTER_OFFSET;
5525 			else
5526 				offset = DMA1_REGISTER_OFFSET;
5527 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5528 			data &= ~MEM_POWER_OVERRIDE;
5529 			if (data != orig)
5530 				WREG32(DMA_POWER_CNTL + offset, data);
5531 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5532 		}
5533 	} else {
5534 		for (i = 0; i < 2; i++) {
5535 			if (i == 0)
5536 				offset = DMA0_REGISTER_OFFSET;
5537 			else
5538 				offset = DMA1_REGISTER_OFFSET;
5539 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5540 			data |= MEM_POWER_OVERRIDE;
5541 			if (data != orig)
5542 				WREG32(DMA_POWER_CNTL + offset, data);
5543 
5544 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5545 			data = 0xff000000;
5546 			if (data != orig)
5547 				WREG32(DMA_CLK_CTRL + offset, data);
5548 		}
5549 	}
5550 }
5551 
5552 static void si_enable_bif_mgls(struct radeon_device *rdev,
5553 			       bool enable)
5554 {
5555 	u32 orig, data;
5556 
5557 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5558 
5559 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5560 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5561 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5562 	else
5563 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5564 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5565 
5566 	if (orig != data)
5567 		WREG32_PCIE(PCIE_CNTL2, data);
5568 }
5569 
5570 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5571 			       bool enable)
5572 {
5573 	u32 orig, data;
5574 
5575 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5576 
5577 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5578 		data &= ~CLOCK_GATING_DIS;
5579 	else
5580 		data |= CLOCK_GATING_DIS;
5581 
5582 	if (orig != data)
5583 		WREG32(HDP_HOST_PATH_CNTL, data);
5584 }
5585 
5586 static void si_enable_hdp_ls(struct radeon_device *rdev,
5587 			     bool enable)
5588 {
5589 	u32 orig, data;
5590 
5591 	orig = data = RREG32(HDP_MEM_POWER_LS);
5592 
5593 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5594 		data |= HDP_LS_ENABLE;
5595 	else
5596 		data &= ~HDP_LS_ENABLE;
5597 
5598 	if (orig != data)
5599 		WREG32(HDP_MEM_POWER_LS, data);
5600 }
5601 
5602 static void si_update_cg(struct radeon_device *rdev,
5603 			 u32 block, bool enable)
5604 {
5605 	if (block & RADEON_CG_BLOCK_GFX) {
5606 		si_enable_gui_idle_interrupt(rdev, false);
5607 		/* order matters! */
5608 		if (enable) {
5609 			si_enable_mgcg(rdev, true);
5610 			si_enable_cgcg(rdev, true);
5611 		} else {
5612 			si_enable_cgcg(rdev, false);
5613 			si_enable_mgcg(rdev, false);
5614 		}
5615 		si_enable_gui_idle_interrupt(rdev, true);
5616 	}
5617 
5618 	if (block & RADEON_CG_BLOCK_MC) {
5619 		si_enable_mc_mgcg(rdev, enable);
5620 		si_enable_mc_ls(rdev, enable);
5621 	}
5622 
5623 	if (block & RADEON_CG_BLOCK_SDMA) {
5624 		si_enable_dma_mgcg(rdev, enable);
5625 	}
5626 
5627 	if (block & RADEON_CG_BLOCK_BIF) {
5628 		si_enable_bif_mgls(rdev, enable);
5629 	}
5630 
5631 	if (block & RADEON_CG_BLOCK_UVD) {
5632 		if (rdev->has_uvd) {
5633 			si_enable_uvd_mgcg(rdev, enable);
5634 		}
5635 	}
5636 
5637 	if (block & RADEON_CG_BLOCK_HDP) {
5638 		si_enable_hdp_mgcg(rdev, enable);
5639 		si_enable_hdp_ls(rdev, enable);
5640 	}
5641 }
5642 
5643 static void si_init_cg(struct radeon_device *rdev)
5644 {
5645 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5646 			    RADEON_CG_BLOCK_MC |
5647 			    RADEON_CG_BLOCK_SDMA |
5648 			    RADEON_CG_BLOCK_BIF |
5649 			    RADEON_CG_BLOCK_HDP), true);
5650 	if (rdev->has_uvd) {
5651 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5652 		si_init_uvd_internal_cg(rdev);
5653 	}
5654 }
5655 
5656 static void si_fini_cg(struct radeon_device *rdev)
5657 {
5658 	if (rdev->has_uvd) {
5659 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5660 	}
5661 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5662 			    RADEON_CG_BLOCK_MC |
5663 			    RADEON_CG_BLOCK_SDMA |
5664 			    RADEON_CG_BLOCK_BIF |
5665 			    RADEON_CG_BLOCK_HDP), false);
5666 }
5667 
5668 u32 si_get_csb_size(struct radeon_device *rdev)
5669 {
5670 	u32 count = 0;
5671 	const struct cs_section_def *sect = NULL;
5672 	const struct cs_extent_def *ext = NULL;
5673 
5674 	if (rdev->rlc.cs_data == NULL)
5675 		return 0;
5676 
5677 	/* begin clear state */
5678 	count += 2;
5679 	/* context control state */
5680 	count += 3;
5681 
5682 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5683 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5684 			if (sect->id == SECT_CONTEXT)
5685 				count += 2 + ext->reg_count;
5686 			else
5687 				return 0;
5688 		}
5689 	}
5690 	/* pa_sc_raster_config */
5691 	count += 3;
5692 	/* end clear state */
5693 	count += 2;
5694 	/* clear state */
5695 	count += 2;
5696 
5697 	return count;
5698 }
5699 
5700 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5701 {
5702 	u32 count = 0, i;
5703 	const struct cs_section_def *sect = NULL;
5704 	const struct cs_extent_def *ext = NULL;
5705 
5706 	if (rdev->rlc.cs_data == NULL)
5707 		return;
5708 	if (buffer == NULL)
5709 		return;
5710 
5711 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5712 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5713 
5714 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5715 	buffer[count++] = cpu_to_le32(0x80000000);
5716 	buffer[count++] = cpu_to_le32(0x80000000);
5717 
5718 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5719 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5720 			if (sect->id == SECT_CONTEXT) {
5721 				buffer[count++] =
5722 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5723 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5724 				for (i = 0; i < ext->reg_count; i++)
5725 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5726 			} else {
5727 				return;
5728 			}
5729 		}
5730 	}
5731 
5732 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5733 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5734 	switch (rdev->family) {
5735 	case CHIP_TAHITI:
5736 	case CHIP_PITCAIRN:
5737 		buffer[count++] = cpu_to_le32(0x2a00126a);
5738 		break;
5739 	case CHIP_VERDE:
5740 		buffer[count++] = cpu_to_le32(0x0000124a);
5741 		break;
5742 	case CHIP_OLAND:
5743 		buffer[count++] = cpu_to_le32(0x00000082);
5744 		break;
5745 	case CHIP_HAINAN:
5746 		buffer[count++] = cpu_to_le32(0x00000000);
5747 		break;
5748 	default:
5749 		buffer[count++] = cpu_to_le32(0x00000000);
5750 		break;
5751 	}
5752 
5753 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5754 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5755 
5756 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5757 	buffer[count++] = cpu_to_le32(0);
5758 }
5759 
5760 static void si_init_pg(struct radeon_device *rdev)
5761 {
5762 	if (rdev->pg_flags) {
5763 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5764 			si_init_dma_pg(rdev);
5765 		}
5766 		si_init_ao_cu_mask(rdev);
5767 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5768 			si_init_gfx_cgpg(rdev);
5769 		} else {
5770 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5771 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5772 		}
5773 		si_enable_dma_pg(rdev, true);
5774 		si_enable_gfx_cgpg(rdev, true);
5775 	} else {
5776 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5777 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5778 	}
5779 }
5780 
5781 static void si_fini_pg(struct radeon_device *rdev)
5782 {
5783 	if (rdev->pg_flags) {
5784 		si_enable_dma_pg(rdev, false);
5785 		si_enable_gfx_cgpg(rdev, false);
5786 	}
5787 }
5788 
5789 /*
5790  * RLC
5791  */
5792 void si_rlc_reset(struct radeon_device *rdev)
5793 {
5794 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5795 
5796 	tmp |= SOFT_RESET_RLC;
5797 	WREG32(GRBM_SOFT_RESET, tmp);
5798 	udelay(50);
5799 	tmp &= ~SOFT_RESET_RLC;
5800 	WREG32(GRBM_SOFT_RESET, tmp);
5801 	udelay(50);
5802 }
5803 
5804 static void si_rlc_stop(struct radeon_device *rdev)
5805 {
5806 	WREG32(RLC_CNTL, 0);
5807 
5808 	si_enable_gui_idle_interrupt(rdev, false);
5809 
5810 	si_wait_for_rlc_serdes(rdev);
5811 }
5812 
5813 static void si_rlc_start(struct radeon_device *rdev)
5814 {
5815 	WREG32(RLC_CNTL, RLC_ENABLE);
5816 
5817 	si_enable_gui_idle_interrupt(rdev, true);
5818 
5819 	udelay(50);
5820 }
5821 
5822 static bool si_lbpw_supported(struct radeon_device *rdev)
5823 {
5824 	u32 tmp;
5825 
5826 	/* Enable LBPW only for DDR3 */
5827 	tmp = RREG32(MC_SEQ_MISC0);
5828 	if ((tmp & 0xF0000000) == 0xB0000000)
5829 		return true;
5830 	return false;
5831 }
5832 
5833 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5834 {
5835 	u32 tmp;
5836 
5837 	tmp = RREG32(RLC_LB_CNTL);
5838 	if (enable)
5839 		tmp |= LOAD_BALANCE_ENABLE;
5840 	else
5841 		tmp &= ~LOAD_BALANCE_ENABLE;
5842 	WREG32(RLC_LB_CNTL, tmp);
5843 
5844 	if (!enable) {
5845 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5846 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5847 	}
5848 }
5849 
5850 static int si_rlc_resume(struct radeon_device *rdev)
5851 {
5852 	u32 i;
5853 
5854 	if (!rdev->rlc_fw)
5855 		return -EINVAL;
5856 
5857 	si_rlc_stop(rdev);
5858 
5859 	si_rlc_reset(rdev);
5860 
5861 	si_init_pg(rdev);
5862 
5863 	si_init_cg(rdev);
5864 
5865 	WREG32(RLC_RL_BASE, 0);
5866 	WREG32(RLC_RL_SIZE, 0);
5867 	WREG32(RLC_LB_CNTL, 0);
5868 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5869 	WREG32(RLC_LB_CNTR_INIT, 0);
5870 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5871 
5872 	WREG32(RLC_MC_CNTL, 0);
5873 	WREG32(RLC_UCODE_CNTL, 0);
5874 
5875 	if (rdev->new_fw) {
5876 		const struct rlc_firmware_header_v1_0 *hdr =
5877 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5878 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5879 		const __le32 *fw_data = (const __le32 *)
5880 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5881 
5882 		radeon_ucode_print_rlc_hdr(&hdr->header);
5883 
5884 		for (i = 0; i < fw_size; i++) {
5885 			WREG32(RLC_UCODE_ADDR, i);
5886 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5887 		}
5888 	} else {
5889 		const __be32 *fw_data =
5890 			(const __be32 *)rdev->rlc_fw->data;
5891 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5892 			WREG32(RLC_UCODE_ADDR, i);
5893 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5894 		}
5895 	}
5896 	WREG32(RLC_UCODE_ADDR, 0);
5897 
5898 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5899 
5900 	si_rlc_start(rdev);
5901 
5902 	return 0;
5903 }
5904 
5905 static void si_enable_interrupts(struct radeon_device *rdev)
5906 {
5907 	u32 ih_cntl = RREG32(IH_CNTL);
5908 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5909 
5910 	ih_cntl |= ENABLE_INTR;
5911 	ih_rb_cntl |= IH_RB_ENABLE;
5912 	WREG32(IH_CNTL, ih_cntl);
5913 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5914 	rdev->ih.enabled = true;
5915 }
5916 
5917 static void si_disable_interrupts(struct radeon_device *rdev)
5918 {
5919 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5920 	u32 ih_cntl = RREG32(IH_CNTL);
5921 
5922 	ih_rb_cntl &= ~IH_RB_ENABLE;
5923 	ih_cntl &= ~ENABLE_INTR;
5924 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5925 	WREG32(IH_CNTL, ih_cntl);
5926 	/* set rptr, wptr to 0 */
5927 	WREG32(IH_RB_RPTR, 0);
5928 	WREG32(IH_RB_WPTR, 0);
5929 	rdev->ih.enabled = false;
5930 	rdev->ih.rptr = 0;
5931 }
5932 
5933 static void si_disable_interrupt_state(struct radeon_device *rdev)
5934 {
5935 	u32 tmp;
5936 
5937 	tmp = RREG32(CP_INT_CNTL_RING0) &
5938 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5939 	WREG32(CP_INT_CNTL_RING0, tmp);
5940 	WREG32(CP_INT_CNTL_RING1, 0);
5941 	WREG32(CP_INT_CNTL_RING2, 0);
5942 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5943 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5944 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5945 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5946 	WREG32(GRBM_INT_CNTL, 0);
5947 	WREG32(SRBM_INT_CNTL, 0);
5948 	if (rdev->num_crtc >= 2) {
5949 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5950 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5951 	}
5952 	if (rdev->num_crtc >= 4) {
5953 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5954 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5955 	}
5956 	if (rdev->num_crtc >= 6) {
5957 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5958 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5959 	}
5960 
5961 	if (rdev->num_crtc >= 2) {
5962 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5963 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5964 	}
5965 	if (rdev->num_crtc >= 4) {
5966 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5968 	}
5969 	if (rdev->num_crtc >= 6) {
5970 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5972 	}
5973 
5974 	if (!ASIC_IS_NODCE(rdev)) {
5975 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5976 
5977 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5978 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5979 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5980 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5981 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5982 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5983 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5984 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5985 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5986 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5987 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5988 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5989 	}
5990 }
5991 
5992 static int si_irq_init(struct radeon_device *rdev)
5993 {
5994 	int ret = 0;
5995 	int rb_bufsz;
5996 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5997 
5998 	/* allocate ring */
5999 	ret = r600_ih_ring_alloc(rdev);
6000 	if (ret)
6001 		return ret;
6002 
6003 	/* disable irqs */
6004 	si_disable_interrupts(rdev);
6005 
6006 	/* init rlc */
6007 	ret = si_rlc_resume(rdev);
6008 	if (ret) {
6009 		r600_ih_ring_fini(rdev);
6010 		return ret;
6011 	}
6012 
6013 	/* setup interrupt control */
6014 	/* set dummy read address to ring address */
6015 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6016 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6017 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6018 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6019 	 */
6020 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6021 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6022 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6023 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6024 
6025 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6026 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6027 
6028 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6029 		      IH_WPTR_OVERFLOW_CLEAR |
6030 		      (rb_bufsz << 1));
6031 
6032 	if (rdev->wb.enabled)
6033 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6034 
6035 	/* set the writeback address whether it's enabled or not */
6036 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6037 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6038 
6039 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6040 
6041 	/* set rptr, wptr to 0 */
6042 	WREG32(IH_RB_RPTR, 0);
6043 	WREG32(IH_RB_WPTR, 0);
6044 
6045 	/* Default settings for IH_CNTL (disabled at first) */
6046 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6047 	/* RPTR_REARM only works if msi's are enabled */
6048 	if (rdev->msi_enabled)
6049 		ih_cntl |= RPTR_REARM;
6050 	WREG32(IH_CNTL, ih_cntl);
6051 
6052 	/* force the active interrupt state to all disabled */
6053 	si_disable_interrupt_state(rdev);
6054 
6055 	pci_set_master(rdev->pdev);
6056 
6057 	/* enable irqs */
6058 	si_enable_interrupts(rdev);
6059 
6060 	return ret;
6061 }
6062 
6063 int si_irq_set(struct radeon_device *rdev)
6064 {
6065 	u32 cp_int_cntl;
6066 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6067 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6068 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6069 	u32 grbm_int_cntl = 0;
6070 	u32 dma_cntl, dma_cntl1;
6071 	u32 thermal_int = 0;
6072 
6073 	if (!rdev->irq.installed) {
6074 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6075 		return -EINVAL;
6076 	}
6077 	/* don't enable anything if the ih is disabled */
6078 	if (!rdev->ih.enabled) {
6079 		si_disable_interrupts(rdev);
6080 		/* force the active interrupt state to all disabled */
6081 		si_disable_interrupt_state(rdev);
6082 		return 0;
6083 	}
6084 
6085 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6086 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6087 
6088 	if (!ASIC_IS_NODCE(rdev)) {
6089 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6093 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6094 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6095 	}
6096 
6097 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6098 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6099 
6100 	thermal_int = RREG32(CG_THERMAL_INT) &
6101 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6102 
6103 	/* enable CP interrupts on all rings */
6104 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6105 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6106 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6107 	}
6108 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6109 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6110 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6111 	}
6112 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6113 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6114 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6115 	}
6116 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6117 		DRM_DEBUG("si_irq_set: sw int dma\n");
6118 		dma_cntl |= TRAP_ENABLE;
6119 	}
6120 
6121 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6122 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6123 		dma_cntl1 |= TRAP_ENABLE;
6124 	}
6125 	if (rdev->irq.crtc_vblank_int[0] ||
6126 	    atomic_read(&rdev->irq.pflip[0])) {
6127 		DRM_DEBUG("si_irq_set: vblank 0\n");
6128 		crtc1 |= VBLANK_INT_MASK;
6129 	}
6130 	if (rdev->irq.crtc_vblank_int[1] ||
6131 	    atomic_read(&rdev->irq.pflip[1])) {
6132 		DRM_DEBUG("si_irq_set: vblank 1\n");
6133 		crtc2 |= VBLANK_INT_MASK;
6134 	}
6135 	if (rdev->irq.crtc_vblank_int[2] ||
6136 	    atomic_read(&rdev->irq.pflip[2])) {
6137 		DRM_DEBUG("si_irq_set: vblank 2\n");
6138 		crtc3 |= VBLANK_INT_MASK;
6139 	}
6140 	if (rdev->irq.crtc_vblank_int[3] ||
6141 	    atomic_read(&rdev->irq.pflip[3])) {
6142 		DRM_DEBUG("si_irq_set: vblank 3\n");
6143 		crtc4 |= VBLANK_INT_MASK;
6144 	}
6145 	if (rdev->irq.crtc_vblank_int[4] ||
6146 	    atomic_read(&rdev->irq.pflip[4])) {
6147 		DRM_DEBUG("si_irq_set: vblank 4\n");
6148 		crtc5 |= VBLANK_INT_MASK;
6149 	}
6150 	if (rdev->irq.crtc_vblank_int[5] ||
6151 	    atomic_read(&rdev->irq.pflip[5])) {
6152 		DRM_DEBUG("si_irq_set: vblank 5\n");
6153 		crtc6 |= VBLANK_INT_MASK;
6154 	}
6155 	if (rdev->irq.hpd[0]) {
6156 		DRM_DEBUG("si_irq_set: hpd 1\n");
6157 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6158 	}
6159 	if (rdev->irq.hpd[1]) {
6160 		DRM_DEBUG("si_irq_set: hpd 2\n");
6161 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6162 	}
6163 	if (rdev->irq.hpd[2]) {
6164 		DRM_DEBUG("si_irq_set: hpd 3\n");
6165 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6166 	}
6167 	if (rdev->irq.hpd[3]) {
6168 		DRM_DEBUG("si_irq_set: hpd 4\n");
6169 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6170 	}
6171 	if (rdev->irq.hpd[4]) {
6172 		DRM_DEBUG("si_irq_set: hpd 5\n");
6173 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6174 	}
6175 	if (rdev->irq.hpd[5]) {
6176 		DRM_DEBUG("si_irq_set: hpd 6\n");
6177 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6178 	}
6179 
6180 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6181 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6182 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6183 
6184 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6185 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6186 
6187 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6188 
6189 	if (rdev->irq.dpm_thermal) {
6190 		DRM_DEBUG("dpm thermal\n");
6191 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6192 	}
6193 
6194 	if (rdev->num_crtc >= 2) {
6195 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6196 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6197 	}
6198 	if (rdev->num_crtc >= 4) {
6199 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6200 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6201 	}
6202 	if (rdev->num_crtc >= 6) {
6203 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6204 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6205 	}
6206 
6207 	if (rdev->num_crtc >= 2) {
6208 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6209 		       GRPH_PFLIP_INT_MASK);
6210 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6211 		       GRPH_PFLIP_INT_MASK);
6212 	}
6213 	if (rdev->num_crtc >= 4) {
6214 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6215 		       GRPH_PFLIP_INT_MASK);
6216 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6217 		       GRPH_PFLIP_INT_MASK);
6218 	}
6219 	if (rdev->num_crtc >= 6) {
6220 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6221 		       GRPH_PFLIP_INT_MASK);
6222 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6223 		       GRPH_PFLIP_INT_MASK);
6224 	}
6225 
6226 	if (!ASIC_IS_NODCE(rdev)) {
6227 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6228 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6229 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6230 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6231 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6232 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6233 	}
6234 
6235 	WREG32(CG_THERMAL_INT, thermal_int);
6236 
6237 	/* posting read */
6238 	RREG32(SRBM_STATUS);
6239 
6240 	return 0;
6241 }
6242 
6243 static inline void si_irq_ack(struct radeon_device *rdev)
6244 {
6245 	u32 tmp;
6246 
6247 	if (ASIC_IS_NODCE(rdev))
6248 		return;
6249 
6250 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6251 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6252 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6253 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6254 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6255 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6256 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6257 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6258 	if (rdev->num_crtc >= 4) {
6259 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6260 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6261 	}
6262 	if (rdev->num_crtc >= 6) {
6263 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6264 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6265 	}
6266 
6267 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6268 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6269 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6270 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6271 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6272 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6273 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6274 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6275 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6276 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6277 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6278 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6279 
6280 	if (rdev->num_crtc >= 4) {
6281 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6282 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6283 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6284 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6285 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6286 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6287 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6288 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6289 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6290 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6291 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6292 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6293 	}
6294 
6295 	if (rdev->num_crtc >= 6) {
6296 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6297 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6298 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6299 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6300 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6301 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6302 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6303 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6304 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6305 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6306 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6307 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6308 	}
6309 
6310 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6311 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6312 		tmp |= DC_HPDx_INT_ACK;
6313 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6314 	}
6315 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6316 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6317 		tmp |= DC_HPDx_INT_ACK;
6318 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6319 	}
6320 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6321 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6322 		tmp |= DC_HPDx_INT_ACK;
6323 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6324 	}
6325 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6326 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6327 		tmp |= DC_HPDx_INT_ACK;
6328 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6329 	}
6330 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6331 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6332 		tmp |= DC_HPDx_INT_ACK;
6333 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6334 	}
6335 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6336 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6337 		tmp |= DC_HPDx_INT_ACK;
6338 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6339 	}
6340 
6341 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6342 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6343 		tmp |= DC_HPDx_RX_INT_ACK;
6344 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6345 	}
6346 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6347 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6348 		tmp |= DC_HPDx_RX_INT_ACK;
6349 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6350 	}
6351 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6352 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6353 		tmp |= DC_HPDx_RX_INT_ACK;
6354 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6355 	}
6356 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6357 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6358 		tmp |= DC_HPDx_RX_INT_ACK;
6359 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6360 	}
6361 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6362 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6363 		tmp |= DC_HPDx_RX_INT_ACK;
6364 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6365 	}
6366 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6367 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6368 		tmp |= DC_HPDx_RX_INT_ACK;
6369 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6370 	}
6371 }
6372 
6373 static void si_irq_disable(struct radeon_device *rdev)
6374 {
6375 	si_disable_interrupts(rdev);
6376 	/* Wait and acknowledge irq */
6377 	mdelay(1);
6378 	si_irq_ack(rdev);
6379 	si_disable_interrupt_state(rdev);
6380 }
6381 
6382 static void si_irq_suspend(struct radeon_device *rdev)
6383 {
6384 	si_irq_disable(rdev);
6385 	si_rlc_stop(rdev);
6386 }
6387 
6388 static void si_irq_fini(struct radeon_device *rdev)
6389 {
6390 	si_irq_suspend(rdev);
6391 	r600_ih_ring_fini(rdev);
6392 }
6393 
6394 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6395 {
6396 	u32 wptr, tmp;
6397 
6398 	if (rdev->wb.enabled)
6399 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6400 	else
6401 		wptr = RREG32(IH_RB_WPTR);
6402 
6403 	if (wptr & RB_OVERFLOW) {
6404 		wptr &= ~RB_OVERFLOW;
6405 		/* When a ring buffer overflow happen start parsing interrupt
6406 		 * from the last not overwritten vector (wptr + 16). Hopefully
6407 		 * this should allow us to catchup.
6408 		 */
6409 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6410 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6411 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6412 		tmp = RREG32(IH_RB_CNTL);
6413 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6414 		WREG32(IH_RB_CNTL, tmp);
6415 	}
6416 	return (wptr & rdev->ih.ptr_mask);
6417 }
6418 
6419 /*        SI IV Ring
6420  * Each IV ring entry is 128 bits:
6421  * [7:0]    - interrupt source id
6422  * [31:8]   - reserved
6423  * [59:32]  - interrupt source data
6424  * [63:60]  - reserved
6425  * [71:64]  - RINGID
6426  * [79:72]  - VMID
6427  * [127:80] - reserved
6428  */
6429 int si_irq_process(struct radeon_device *rdev)
6430 {
6431 	u32 wptr;
6432 	u32 rptr;
6433 	u32 src_id, src_data, ring_id;
6434 	u32 ring_index;
6435 	bool queue_hotplug = false;
6436 	bool queue_dp = false;
6437 	bool queue_thermal = false;
6438 	u32 status, addr;
6439 
6440 	if (!rdev->ih.enabled || rdev->shutdown)
6441 		return IRQ_NONE;
6442 
6443 	wptr = si_get_ih_wptr(rdev);
6444 
6445 restart_ih:
6446 	/* is somebody else already processing irqs? */
6447 	if (atomic_xchg(&rdev->ih.lock, 1))
6448 		return IRQ_NONE;
6449 
6450 	rptr = rdev->ih.rptr;
6451 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6452 
6453 	/* Order reading of wptr vs. reading of IH ring data */
6454 	rmb();
6455 
6456 	/* display interrupts */
6457 	si_irq_ack(rdev);
6458 
6459 	while (rptr != wptr) {
6460 		/* wptr/rptr are in bytes! */
6461 		ring_index = rptr / 4;
6462 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6463 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6464 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6465 
6466 		switch (src_id) {
6467 		case 1: /* D1 vblank/vline */
6468 			switch (src_data) {
6469 			case 0: /* D1 vblank */
6470 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6471 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6472 
6473 				if (rdev->irq.crtc_vblank_int[0]) {
6474 					drm_handle_vblank(rdev->ddev, 0);
6475 					rdev->pm.vblank_sync = true;
6476 					wake_up(&rdev->irq.vblank_queue);
6477 				}
6478 				if (atomic_read(&rdev->irq.pflip[0]))
6479 					radeon_crtc_handle_vblank(rdev, 0);
6480 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6481 				DRM_DEBUG("IH: D1 vblank\n");
6482 
6483 				break;
6484 			case 1: /* D1 vline */
6485 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6486 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6487 
6488 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6489 				DRM_DEBUG("IH: D1 vline\n");
6490 
6491 				break;
6492 			default:
6493 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6494 				break;
6495 			}
6496 			break;
6497 		case 2: /* D2 vblank/vline */
6498 			switch (src_data) {
6499 			case 0: /* D2 vblank */
6500 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6501 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6502 
6503 				if (rdev->irq.crtc_vblank_int[1]) {
6504 					drm_handle_vblank(rdev->ddev, 1);
6505 					rdev->pm.vblank_sync = true;
6506 					wake_up(&rdev->irq.vblank_queue);
6507 				}
6508 				if (atomic_read(&rdev->irq.pflip[1]))
6509 					radeon_crtc_handle_vblank(rdev, 1);
6510 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6511 				DRM_DEBUG("IH: D2 vblank\n");
6512 
6513 				break;
6514 			case 1: /* D2 vline */
6515 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6516 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6517 
6518 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6519 				DRM_DEBUG("IH: D2 vline\n");
6520 
6521 				break;
6522 			default:
6523 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6524 				break;
6525 			}
6526 			break;
6527 		case 3: /* D3 vblank/vline */
6528 			switch (src_data) {
6529 			case 0: /* D3 vblank */
6530 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6531 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6532 
6533 				if (rdev->irq.crtc_vblank_int[2]) {
6534 					drm_handle_vblank(rdev->ddev, 2);
6535 					rdev->pm.vblank_sync = true;
6536 					wake_up(&rdev->irq.vblank_queue);
6537 				}
6538 				if (atomic_read(&rdev->irq.pflip[2]))
6539 					radeon_crtc_handle_vblank(rdev, 2);
6540 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6541 				DRM_DEBUG("IH: D3 vblank\n");
6542 
6543 				break;
6544 			case 1: /* D3 vline */
6545 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6546 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6547 
6548 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6549 				DRM_DEBUG("IH: D3 vline\n");
6550 
6551 				break;
6552 			default:
6553 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6554 				break;
6555 			}
6556 			break;
6557 		case 4: /* D4 vblank/vline */
6558 			switch (src_data) {
6559 			case 0: /* D4 vblank */
6560 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6561 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6562 
6563 				if (rdev->irq.crtc_vblank_int[3]) {
6564 					drm_handle_vblank(rdev->ddev, 3);
6565 					rdev->pm.vblank_sync = true;
6566 					wake_up(&rdev->irq.vblank_queue);
6567 				}
6568 				if (atomic_read(&rdev->irq.pflip[3]))
6569 					radeon_crtc_handle_vblank(rdev, 3);
6570 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6571 				DRM_DEBUG("IH: D4 vblank\n");
6572 
6573 				break;
6574 			case 1: /* D4 vline */
6575 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6576 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6577 
6578 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6579 				DRM_DEBUG("IH: D4 vline\n");
6580 
6581 				break;
6582 			default:
6583 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6584 				break;
6585 			}
6586 			break;
6587 		case 5: /* D5 vblank/vline */
6588 			switch (src_data) {
6589 			case 0: /* D5 vblank */
6590 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6591 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6592 
6593 				if (rdev->irq.crtc_vblank_int[4]) {
6594 					drm_handle_vblank(rdev->ddev, 4);
6595 					rdev->pm.vblank_sync = true;
6596 					wake_up(&rdev->irq.vblank_queue);
6597 				}
6598 				if (atomic_read(&rdev->irq.pflip[4]))
6599 					radeon_crtc_handle_vblank(rdev, 4);
6600 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6601 				DRM_DEBUG("IH: D5 vblank\n");
6602 
6603 				break;
6604 			case 1: /* D5 vline */
6605 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6606 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6607 
6608 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6609 				DRM_DEBUG("IH: D5 vline\n");
6610 
6611 				break;
6612 			default:
6613 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6614 				break;
6615 			}
6616 			break;
6617 		case 6: /* D6 vblank/vline */
6618 			switch (src_data) {
6619 			case 0: /* D6 vblank */
6620 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6621 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6622 
6623 				if (rdev->irq.crtc_vblank_int[5]) {
6624 					drm_handle_vblank(rdev->ddev, 5);
6625 					rdev->pm.vblank_sync = true;
6626 					wake_up(&rdev->irq.vblank_queue);
6627 				}
6628 				if (atomic_read(&rdev->irq.pflip[5]))
6629 					radeon_crtc_handle_vblank(rdev, 5);
6630 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6631 				DRM_DEBUG("IH: D6 vblank\n");
6632 
6633 				break;
6634 			case 1: /* D6 vline */
6635 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6636 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6637 
6638 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6639 				DRM_DEBUG("IH: D6 vline\n");
6640 
6641 				break;
6642 			default:
6643 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6644 				break;
6645 			}
6646 			break;
6647 		case 8: /* D1 page flip */
6648 		case 10: /* D2 page flip */
6649 		case 12: /* D3 page flip */
6650 		case 14: /* D4 page flip */
6651 		case 16: /* D5 page flip */
6652 		case 18: /* D6 page flip */
6653 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6654 			if (radeon_use_pflipirq > 0)
6655 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6656 			break;
6657 		case 42: /* HPD hotplug */
6658 			switch (src_data) {
6659 			case 0:
6660 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6661 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6662 
6663 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6664 				queue_hotplug = true;
6665 				DRM_DEBUG("IH: HPD1\n");
6666 
6667 				break;
6668 			case 1:
6669 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6670 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6671 
6672 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6673 				queue_hotplug = true;
6674 				DRM_DEBUG("IH: HPD2\n");
6675 
6676 				break;
6677 			case 2:
6678 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6679 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6680 
6681 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6682 				queue_hotplug = true;
6683 				DRM_DEBUG("IH: HPD3\n");
6684 
6685 				break;
6686 			case 3:
6687 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6688 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6689 
6690 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6691 				queue_hotplug = true;
6692 				DRM_DEBUG("IH: HPD4\n");
6693 
6694 				break;
6695 			case 4:
6696 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6697 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6698 
6699 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6700 				queue_hotplug = true;
6701 				DRM_DEBUG("IH: HPD5\n");
6702 
6703 				break;
6704 			case 5:
6705 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6706 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6707 
6708 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6709 				queue_hotplug = true;
6710 				DRM_DEBUG("IH: HPD6\n");
6711 
6712 				break;
6713 			case 6:
6714 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6715 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6716 
6717 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6718 				queue_dp = true;
6719 				DRM_DEBUG("IH: HPD_RX 1\n");
6720 
6721 				break;
6722 			case 7:
6723 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6724 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6725 
6726 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6727 				queue_dp = true;
6728 				DRM_DEBUG("IH: HPD_RX 2\n");
6729 
6730 				break;
6731 			case 8:
6732 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6733 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6734 
6735 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6736 				queue_dp = true;
6737 				DRM_DEBUG("IH: HPD_RX 3\n");
6738 
6739 				break;
6740 			case 9:
6741 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6742 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6743 
6744 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6745 				queue_dp = true;
6746 				DRM_DEBUG("IH: HPD_RX 4\n");
6747 
6748 				break;
6749 			case 10:
6750 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6751 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6752 
6753 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6754 				queue_dp = true;
6755 				DRM_DEBUG("IH: HPD_RX 5\n");
6756 
6757 				break;
6758 			case 11:
6759 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6760 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6761 
6762 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6763 				queue_dp = true;
6764 				DRM_DEBUG("IH: HPD_RX 6\n");
6765 
6766 				break;
6767 			default:
6768 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6769 				break;
6770 			}
6771 			break;
6772 		case 96:
6773 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6774 			WREG32(SRBM_INT_ACK, 0x1);
6775 			break;
6776 		case 124: /* UVD */
6777 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6778 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6779 			break;
6780 		case 146:
6781 		case 147:
6782 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6783 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6784 			/* reset addr and status */
6785 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6786 			if (addr == 0x0 && status == 0x0)
6787 				break;
6788 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6789 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6790 				addr);
6791 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6792 				status);
6793 			si_vm_decode_fault(rdev, status, addr);
6794 			break;
6795 		case 176: /* RINGID0 CP_INT */
6796 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6797 			break;
6798 		case 177: /* RINGID1 CP_INT */
6799 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6800 			break;
6801 		case 178: /* RINGID2 CP_INT */
6802 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6803 			break;
6804 		case 181: /* CP EOP event */
6805 			DRM_DEBUG("IH: CP EOP\n");
6806 			switch (ring_id) {
6807 			case 0:
6808 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6809 				break;
6810 			case 1:
6811 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6812 				break;
6813 			case 2:
6814 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6815 				break;
6816 			}
6817 			break;
6818 		case 224: /* DMA trap event */
6819 			DRM_DEBUG("IH: DMA trap\n");
6820 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6821 			break;
6822 		case 230: /* thermal low to high */
6823 			DRM_DEBUG("IH: thermal low to high\n");
6824 			rdev->pm.dpm.thermal.high_to_low = false;
6825 			queue_thermal = true;
6826 			break;
6827 		case 231: /* thermal high to low */
6828 			DRM_DEBUG("IH: thermal high to low\n");
6829 			rdev->pm.dpm.thermal.high_to_low = true;
6830 			queue_thermal = true;
6831 			break;
6832 		case 233: /* GUI IDLE */
6833 			DRM_DEBUG("IH: GUI idle\n");
6834 			break;
6835 		case 244: /* DMA trap event */
6836 			DRM_DEBUG("IH: DMA1 trap\n");
6837 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6838 			break;
6839 		default:
6840 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6841 			break;
6842 		}
6843 
6844 		/* wptr/rptr are in bytes! */
6845 		rptr += 16;
6846 		rptr &= rdev->ih.ptr_mask;
6847 		WREG32(IH_RB_RPTR, rptr);
6848 	}
6849 	if (queue_dp)
6850 		schedule_work(&rdev->dp_work);
6851 	if (queue_hotplug)
6852 		schedule_delayed_work(&rdev->hotplug_work, 0);
6853 	if (queue_thermal && rdev->pm.dpm_enabled)
6854 		schedule_work(&rdev->pm.dpm.thermal.work);
6855 	rdev->ih.rptr = rptr;
6856 	atomic_set(&rdev->ih.lock, 0);
6857 
6858 	/* make sure wptr hasn't changed while processing */
6859 	wptr = si_get_ih_wptr(rdev);
6860 	if (wptr != rptr)
6861 		goto restart_ih;
6862 
6863 	return IRQ_HANDLED;
6864 }
6865 
6866 /*
6867  * startup/shutdown callbacks
6868  */
6869 static void si_uvd_init(struct radeon_device *rdev)
6870 {
6871 	int r;
6872 
6873 	if (!rdev->has_uvd)
6874 		return;
6875 
6876 	r = radeon_uvd_init(rdev);
6877 	if (r) {
6878 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6879 		/*
6880 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6881 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6882 		 * there. So it is pointless to try to go through that code
6883 		 * hence why we disable uvd here.
6884 		 */
6885 		rdev->has_uvd = 0;
6886 		return;
6887 	}
6888 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6889 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6890 }
6891 
6892 static void si_uvd_start(struct radeon_device *rdev)
6893 {
6894 	int r;
6895 
6896 	if (!rdev->has_uvd)
6897 		return;
6898 
6899 	r = uvd_v2_2_resume(rdev);
6900 	if (r) {
6901 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6902 		goto error;
6903 	}
6904 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6905 	if (r) {
6906 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6907 		goto error;
6908 	}
6909 	return;
6910 
6911 error:
6912 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6913 }
6914 
6915 static void si_uvd_resume(struct radeon_device *rdev)
6916 {
6917 	struct radeon_ring *ring;
6918 	int r;
6919 
6920 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6921 		return;
6922 
6923 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6924 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6925 	if (r) {
6926 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6927 		return;
6928 	}
6929 	r = uvd_v1_0_init(rdev);
6930 	if (r) {
6931 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6932 		return;
6933 	}
6934 }
6935 
6936 static void si_vce_init(struct radeon_device *rdev)
6937 {
6938 	int r;
6939 
6940 	if (!rdev->has_vce)
6941 		return;
6942 
6943 	r = radeon_vce_init(rdev);
6944 	if (r) {
6945 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6946 		/*
6947 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6948 		 * to early fails si_vce_start() and thus nothing happens
6949 		 * there. So it is pointless to try to go through that code
6950 		 * hence why we disable vce here.
6951 		 */
6952 		rdev->has_vce = 0;
6953 		return;
6954 	}
6955 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6956 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6957 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6958 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6959 }
6960 
6961 static void si_vce_start(struct radeon_device *rdev)
6962 {
6963 	int r;
6964 
6965 	if (!rdev->has_vce)
6966 		return;
6967 
6968 	r = radeon_vce_resume(rdev);
6969 	if (r) {
6970 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6971 		goto error;
6972 	}
6973 	r = vce_v1_0_resume(rdev);
6974 	if (r) {
6975 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6976 		goto error;
6977 	}
6978 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6979 	if (r) {
6980 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6981 		goto error;
6982 	}
6983 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6984 	if (r) {
6985 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6986 		goto error;
6987 	}
6988 	return;
6989 
6990 error:
6991 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6992 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6993 }
6994 
6995 static void si_vce_resume(struct radeon_device *rdev)
6996 {
6997 	struct radeon_ring *ring;
6998 	int r;
6999 
7000 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7001 		return;
7002 
7003 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7004 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7005 	if (r) {
7006 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7007 		return;
7008 	}
7009 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7010 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7011 	if (r) {
7012 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7013 		return;
7014 	}
7015 	r = vce_v1_0_init(rdev);
7016 	if (r) {
7017 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7018 		return;
7019 	}
7020 }
7021 
7022 static int si_startup(struct radeon_device *rdev)
7023 {
7024 	struct radeon_ring *ring;
7025 	int r;
7026 
7027 	/* enable pcie gen2/3 link */
7028 	si_pcie_gen3_enable(rdev);
7029 	/* enable aspm */
7030 	si_program_aspm(rdev);
7031 
7032 	/* scratch needs to be initialized before MC */
7033 	r = r600_vram_scratch_init(rdev);
7034 	if (r)
7035 		return r;
7036 
7037 	si_mc_program(rdev);
7038 
7039 	if (!rdev->pm.dpm_enabled) {
7040 		r = si_mc_load_microcode(rdev);
7041 		if (r) {
7042 			DRM_ERROR("Failed to load MC firmware!\n");
7043 			return r;
7044 		}
7045 	}
7046 
7047 	r = si_pcie_gart_enable(rdev);
7048 	if (r)
7049 		return r;
7050 	si_gpu_init(rdev);
7051 
7052 	/* allocate rlc buffers */
7053 	if (rdev->family == CHIP_VERDE) {
7054 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7055 		rdev->rlc.reg_list_size =
7056 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7057 	}
7058 	rdev->rlc.cs_data = si_cs_data;
7059 	r = sumo_rlc_init(rdev);
7060 	if (r) {
7061 		DRM_ERROR("Failed to init rlc BOs!\n");
7062 		return r;
7063 	}
7064 
7065 	/* allocate wb buffer */
7066 	r = radeon_wb_init(rdev);
7067 	if (r)
7068 		return r;
7069 
7070 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7071 	if (r) {
7072 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7073 		return r;
7074 	}
7075 
7076 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7077 	if (r) {
7078 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7079 		return r;
7080 	}
7081 
7082 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7083 	if (r) {
7084 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7085 		return r;
7086 	}
7087 
7088 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7089 	if (r) {
7090 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7091 		return r;
7092 	}
7093 
7094 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7095 	if (r) {
7096 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7097 		return r;
7098 	}
7099 
7100 	si_uvd_start(rdev);
7101 	si_vce_start(rdev);
7102 
7103 	/* Enable IRQ */
7104 	if (!rdev->irq.installed) {
7105 		r = radeon_irq_kms_init(rdev);
7106 		if (r)
7107 			return r;
7108 	}
7109 
7110 	r = si_irq_init(rdev);
7111 	if (r) {
7112 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113 		radeon_irq_kms_fini(rdev);
7114 		return r;
7115 	}
7116 	si_irq_set(rdev);
7117 
7118 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120 			     RADEON_CP_PACKET2);
7121 	if (r)
7122 		return r;
7123 
7124 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7125 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7126 			     RADEON_CP_PACKET2);
7127 	if (r)
7128 		return r;
7129 
7130 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7131 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7132 			     RADEON_CP_PACKET2);
7133 	if (r)
7134 		return r;
7135 
7136 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7137 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7138 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7139 	if (r)
7140 		return r;
7141 
7142 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7143 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7144 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7145 	if (r)
7146 		return r;
7147 
7148 	r = si_cp_load_microcode(rdev);
7149 	if (r)
7150 		return r;
7151 	r = si_cp_resume(rdev);
7152 	if (r)
7153 		return r;
7154 
7155 	r = cayman_dma_resume(rdev);
7156 	if (r)
7157 		return r;
7158 
7159 	si_uvd_resume(rdev);
7160 	si_vce_resume(rdev);
7161 
7162 	r = radeon_ib_pool_init(rdev);
7163 	if (r) {
7164 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7165 		return r;
7166 	}
7167 
7168 	r = radeon_vm_manager_init(rdev);
7169 	if (r) {
7170 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7171 		return r;
7172 	}
7173 
7174 	r = radeon_audio_init(rdev);
7175 	if (r)
7176 		return r;
7177 
7178 	return 0;
7179 }
7180 
7181 int si_resume(struct radeon_device *rdev)
7182 {
7183 	int r;
7184 
7185 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7186 	 * posting will perform necessary task to bring back GPU into good
7187 	 * shape.
7188 	 */
7189 	/* post card */
7190 	atom_asic_init(rdev->mode_info.atom_context);
7191 
7192 	/* init golden registers */
7193 	si_init_golden_registers(rdev);
7194 
7195 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7196 		radeon_pm_resume(rdev);
7197 
7198 	rdev->accel_working = true;
7199 	r = si_startup(rdev);
7200 	if (r) {
7201 		DRM_ERROR("si startup failed on resume\n");
7202 		rdev->accel_working = false;
7203 		return r;
7204 	}
7205 
7206 	return r;
7207 
7208 }
7209 
7210 int si_suspend(struct radeon_device *rdev)
7211 {
7212 	radeon_pm_suspend(rdev);
7213 	radeon_audio_fini(rdev);
7214 	radeon_vm_manager_fini(rdev);
7215 	si_cp_enable(rdev, false);
7216 	cayman_dma_stop(rdev);
7217 	if (rdev->has_uvd) {
7218 		uvd_v1_0_fini(rdev);
7219 		radeon_uvd_suspend(rdev);
7220 	}
7221 	if (rdev->has_vce)
7222 		radeon_vce_suspend(rdev);
7223 	si_fini_pg(rdev);
7224 	si_fini_cg(rdev);
7225 	si_irq_suspend(rdev);
7226 	radeon_wb_disable(rdev);
7227 	si_pcie_gart_disable(rdev);
7228 	return 0;
7229 }
7230 
7231 /* Plan is to move initialization in that function and use
7232  * helper function so that radeon_device_init pretty much
7233  * do nothing more than calling asic specific function. This
7234  * should also allow to remove a bunch of callback function
7235  * like vram_info.
7236  */
7237 int si_init(struct radeon_device *rdev)
7238 {
7239 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7240 	int r;
7241 
7242 	/* Read BIOS */
7243 	if (!radeon_get_bios(rdev)) {
7244 		if (ASIC_IS_AVIVO(rdev))
7245 			return -EINVAL;
7246 	}
7247 	/* Must be an ATOMBIOS */
7248 	if (!rdev->is_atom_bios) {
7249 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7250 		return -EINVAL;
7251 	}
7252 	r = radeon_atombios_init(rdev);
7253 	if (r)
7254 		return r;
7255 
7256 	/* Post card if necessary */
7257 	if (!radeon_card_posted(rdev)) {
7258 		if (!rdev->bios) {
7259 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7260 			return -EINVAL;
7261 		}
7262 		DRM_INFO("GPU not posted. posting now...\n");
7263 		atom_asic_init(rdev->mode_info.atom_context);
7264 	}
7265 	/* init golden registers */
7266 	si_init_golden_registers(rdev);
7267 	/* Initialize scratch registers */
7268 	si_scratch_init(rdev);
7269 	/* Initialize surface registers */
7270 	radeon_surface_init(rdev);
7271 	/* Initialize clocks */
7272 	radeon_get_clock_info(rdev->ddev);
7273 
7274 	/* Fence driver */
7275 	r = radeon_fence_driver_init(rdev);
7276 	if (r)
7277 		return r;
7278 
7279 	/* initialize memory controller */
7280 	r = si_mc_init(rdev);
7281 	if (r)
7282 		return r;
7283 	/* Memory manager */
7284 	r = radeon_bo_init(rdev);
7285 	if (r)
7286 		return r;
7287 
7288 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7289 	    !rdev->rlc_fw || !rdev->mc_fw) {
7290 		r = si_init_microcode(rdev);
7291 		if (r) {
7292 			DRM_ERROR("Failed to load firmware!\n");
7293 			return r;
7294 		}
7295 	}
7296 
7297 	/* Initialize power management */
7298 	radeon_pm_init(rdev);
7299 
7300 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7301 	ring->ring_obj = NULL;
7302 	r600_ring_init(rdev, ring, 1024 * 1024);
7303 
7304 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7305 	ring->ring_obj = NULL;
7306 	r600_ring_init(rdev, ring, 1024 * 1024);
7307 
7308 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7309 	ring->ring_obj = NULL;
7310 	r600_ring_init(rdev, ring, 1024 * 1024);
7311 
7312 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7313 	ring->ring_obj = NULL;
7314 	r600_ring_init(rdev, ring, 64 * 1024);
7315 
7316 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7317 	ring->ring_obj = NULL;
7318 	r600_ring_init(rdev, ring, 64 * 1024);
7319 
7320 	si_uvd_init(rdev);
7321 	si_vce_init(rdev);
7322 
7323 	rdev->ih.ring_obj = NULL;
7324 	r600_ih_ring_init(rdev, 64 * 1024);
7325 
7326 	r = r600_pcie_gart_init(rdev);
7327 	if (r)
7328 		return r;
7329 
7330 	rdev->accel_working = true;
7331 	r = si_startup(rdev);
7332 	if (r) {
7333 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7334 		si_cp_fini(rdev);
7335 		cayman_dma_fini(rdev);
7336 		si_irq_fini(rdev);
7337 		sumo_rlc_fini(rdev);
7338 		radeon_wb_fini(rdev);
7339 		radeon_ib_pool_fini(rdev);
7340 		radeon_vm_manager_fini(rdev);
7341 		radeon_irq_kms_fini(rdev);
7342 		si_pcie_gart_fini(rdev);
7343 		rdev->accel_working = false;
7344 	}
7345 
7346 	/* Don't start up if the MC ucode is missing.
7347 	 * The default clocks and voltages before the MC ucode
7348 	 * is loaded are not suffient for advanced operations.
7349 	 */
7350 	if (!rdev->mc_fw) {
7351 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7352 		return -EINVAL;
7353 	}
7354 
7355 	return 0;
7356 }
7357 
7358 void si_fini(struct radeon_device *rdev)
7359 {
7360 	radeon_pm_fini(rdev);
7361 	si_cp_fini(rdev);
7362 	cayman_dma_fini(rdev);
7363 	si_fini_pg(rdev);
7364 	si_fini_cg(rdev);
7365 	si_irq_fini(rdev);
7366 	sumo_rlc_fini(rdev);
7367 	radeon_wb_fini(rdev);
7368 	radeon_vm_manager_fini(rdev);
7369 	radeon_ib_pool_fini(rdev);
7370 	radeon_irq_kms_fini(rdev);
7371 	if (rdev->has_uvd) {
7372 		uvd_v1_0_fini(rdev);
7373 		radeon_uvd_fini(rdev);
7374 	}
7375 	if (rdev->has_vce)
7376 		radeon_vce_fini(rdev);
7377 	si_pcie_gart_fini(rdev);
7378 	r600_vram_scratch_fini(rdev);
7379 	radeon_gem_fini(rdev);
7380 	radeon_fence_driver_fini(rdev);
7381 	radeon_bo_fini(rdev);
7382 	radeon_atombios_fini(rdev);
7383 	kfree(rdev->bios);
7384 	rdev->bios = NULL;
7385 }
7386 
7387 /**
7388  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7389  *
7390  * @rdev: radeon_device pointer
7391  *
7392  * Fetches a GPU clock counter snapshot (SI).
7393  * Returns the 64 bit clock counter snapshot.
7394  */
7395 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7396 {
7397 	uint64_t clock;
7398 
7399 	mutex_lock(&rdev->gpu_clock_mutex);
7400 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7401 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7402 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7403 	mutex_unlock(&rdev->gpu_clock_mutex);
7404 	return clock;
7405 }
7406 
7407 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7408 {
7409 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7410 	int r;
7411 
7412 	/* bypass vclk and dclk with bclk */
7413 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7414 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7415 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7416 
7417 	/* put PLL in bypass mode */
7418 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7419 
7420 	if (!vclk || !dclk) {
7421 		/* keep the Bypass mode */
7422 		return 0;
7423 	}
7424 
7425 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7426 					  16384, 0x03FFFFFF, 0, 128, 5,
7427 					  &fb_div, &vclk_div, &dclk_div);
7428 	if (r)
7429 		return r;
7430 
7431 	/* set RESET_ANTI_MUX to 0 */
7432 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7433 
7434 	/* set VCO_MODE to 1 */
7435 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7436 
7437 	/* disable sleep mode */
7438 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7439 
7440 	/* deassert UPLL_RESET */
7441 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7442 
7443 	mdelay(1);
7444 
7445 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7446 	if (r)
7447 		return r;
7448 
7449 	/* assert UPLL_RESET again */
7450 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7451 
7452 	/* disable spread spectrum. */
7453 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7454 
7455 	/* set feedback divider */
7456 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7457 
7458 	/* set ref divider to 0 */
7459 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7460 
7461 	if (fb_div < 307200)
7462 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7463 	else
7464 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7465 
7466 	/* set PDIV_A and PDIV_B */
7467 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7468 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7469 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7470 
7471 	/* give the PLL some time to settle */
7472 	mdelay(15);
7473 
7474 	/* deassert PLL_RESET */
7475 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7476 
7477 	mdelay(15);
7478 
7479 	/* switch from bypass mode to normal mode */
7480 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7481 
7482 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7483 	if (r)
7484 		return r;
7485 
7486 	/* switch VCLK and DCLK selection */
7487 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7488 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7489 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7490 
7491 	mdelay(100);
7492 
7493 	return 0;
7494 }
7495 
7496 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7497 {
7498 	struct pci_dev *root = rdev->pdev->bus->self;
7499 	int bridge_pos, gpu_pos;
7500 	u32 speed_cntl, mask, current_data_rate;
7501 	int ret, i;
7502 	u16 tmp16;
7503 
7504 	if (pci_is_root_bus(rdev->pdev->bus))
7505 		return;
7506 
7507 	if (radeon_pcie_gen2 == 0)
7508 		return;
7509 
7510 	if (rdev->flags & RADEON_IS_IGP)
7511 		return;
7512 
7513 	if (!(rdev->flags & RADEON_IS_PCIE))
7514 		return;
7515 
7516 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7517 	if (ret != 0)
7518 		return;
7519 
7520 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7521 		return;
7522 
7523 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7524 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7525 		LC_CURRENT_DATA_RATE_SHIFT;
7526 	if (mask & DRM_PCIE_SPEED_80) {
7527 		if (current_data_rate == 2) {
7528 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7529 			return;
7530 		}
7531 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7532 	} else if (mask & DRM_PCIE_SPEED_50) {
7533 		if (current_data_rate == 1) {
7534 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7535 			return;
7536 		}
7537 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7538 	}
7539 
7540 	bridge_pos = pci_pcie_cap(root);
7541 	if (!bridge_pos)
7542 		return;
7543 
7544 	gpu_pos = pci_pcie_cap(rdev->pdev);
7545 	if (!gpu_pos)
7546 		return;
7547 
7548 	if (mask & DRM_PCIE_SPEED_80) {
7549 		/* re-try equalization if gen3 is not already enabled */
7550 		if (current_data_rate != 2) {
7551 			u16 bridge_cfg, gpu_cfg;
7552 			u16 bridge_cfg2, gpu_cfg2;
7553 			u32 max_lw, current_lw, tmp;
7554 
7555 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7556 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7557 
7558 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7559 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7560 
7561 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7562 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7563 
7564 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7565 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7566 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7567 
7568 			if (current_lw < max_lw) {
7569 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7570 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7571 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7572 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7573 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7574 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7575 				}
7576 			}
7577 
7578 			for (i = 0; i < 10; i++) {
7579 				/* check status */
7580 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7581 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7582 					break;
7583 
7584 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7585 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7586 
7587 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7588 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7589 
7590 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7591 				tmp |= LC_SET_QUIESCE;
7592 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7593 
7594 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7595 				tmp |= LC_REDO_EQ;
7596 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7597 
7598 				mdelay(100);
7599 
7600 				/* linkctl */
7601 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7602 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7603 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7604 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7605 
7606 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7607 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7608 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7609 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7610 
7611 				/* linkctl2 */
7612 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7613 				tmp16 &= ~((1 << 4) | (7 << 9));
7614 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7615 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7616 
7617 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7618 				tmp16 &= ~((1 << 4) | (7 << 9));
7619 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7620 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7621 
7622 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7623 				tmp &= ~LC_SET_QUIESCE;
7624 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7625 			}
7626 		}
7627 	}
7628 
7629 	/* set the link speed */
7630 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7631 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7632 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7633 
7634 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7635 	tmp16 &= ~0xf;
7636 	if (mask & DRM_PCIE_SPEED_80)
7637 		tmp16 |= 3; /* gen3 */
7638 	else if (mask & DRM_PCIE_SPEED_50)
7639 		tmp16 |= 2; /* gen2 */
7640 	else
7641 		tmp16 |= 1; /* gen1 */
7642 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7643 
7644 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7645 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7646 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7647 
7648 	for (i = 0; i < rdev->usec_timeout; i++) {
7649 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7650 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7651 			break;
7652 		udelay(1);
7653 	}
7654 }
7655 
7656 static void si_program_aspm(struct radeon_device *rdev)
7657 {
7658 	u32 data, orig;
7659 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7660 	bool disable_clkreq = false;
7661 
7662 	if (radeon_aspm == 0)
7663 		return;
7664 
7665 	if (!(rdev->flags & RADEON_IS_PCIE))
7666 		return;
7667 
7668 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7669 	data &= ~LC_XMIT_N_FTS_MASK;
7670 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7671 	if (orig != data)
7672 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7673 
7674 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7675 	data |= LC_GO_TO_RECOVERY;
7676 	if (orig != data)
7677 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7678 
7679 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7680 	data |= P_IGNORE_EDB_ERR;
7681 	if (orig != data)
7682 		WREG32_PCIE(PCIE_P_CNTL, data);
7683 
7684 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7685 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7686 	data |= LC_PMI_TO_L1_DIS;
7687 	if (!disable_l0s)
7688 		data |= LC_L0S_INACTIVITY(7);
7689 
7690 	if (!disable_l1) {
7691 		data |= LC_L1_INACTIVITY(7);
7692 		data &= ~LC_PMI_TO_L1_DIS;
7693 		if (orig != data)
7694 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7695 
7696 		if (!disable_plloff_in_l1) {
7697 			bool clk_req_support;
7698 
7699 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7700 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7701 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7702 			if (orig != data)
7703 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7704 
7705 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7706 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7707 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7708 			if (orig != data)
7709 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7710 
7711 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7712 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7713 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7714 			if (orig != data)
7715 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7716 
7717 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7718 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7719 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7720 			if (orig != data)
7721 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7722 
7723 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7724 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7725 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7726 				if (orig != data)
7727 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7728 
7729 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7730 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7731 				if (orig != data)
7732 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7733 
7734 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7735 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7736 				if (orig != data)
7737 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7738 
7739 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7740 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7741 				if (orig != data)
7742 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7743 
7744 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7745 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7746 				if (orig != data)
7747 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7748 
7749 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7750 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7751 				if (orig != data)
7752 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7753 
7754 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7755 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7756 				if (orig != data)
7757 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7758 
7759 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7760 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7761 				if (orig != data)
7762 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7763 			}
7764 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7765 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7766 			data |= LC_DYN_LANES_PWR_STATE(3);
7767 			if (orig != data)
7768 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7769 
7770 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7771 			data &= ~LS2_EXIT_TIME_MASK;
7772 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7773 				data |= LS2_EXIT_TIME(5);
7774 			if (orig != data)
7775 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7776 
7777 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7778 			data &= ~LS2_EXIT_TIME_MASK;
7779 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7780 				data |= LS2_EXIT_TIME(5);
7781 			if (orig != data)
7782 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7783 
7784 			if (!disable_clkreq &&
7785 			    !pci_is_root_bus(rdev->pdev->bus)) {
7786 				struct pci_dev *root = rdev->pdev->bus->self;
7787 				u32 lnkcap;
7788 
7789 				clk_req_support = false;
7790 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7791 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7792 					clk_req_support = true;
7793 			} else {
7794 				clk_req_support = false;
7795 			}
7796 
7797 			if (clk_req_support) {
7798 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7799 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7800 				if (orig != data)
7801 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7802 
7803 				orig = data = RREG32(THM_CLK_CNTL);
7804 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7805 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7806 				if (orig != data)
7807 					WREG32(THM_CLK_CNTL, data);
7808 
7809 				orig = data = RREG32(MISC_CLK_CNTL);
7810 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7811 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7812 				if (orig != data)
7813 					WREG32(MISC_CLK_CNTL, data);
7814 
7815 				orig = data = RREG32(CG_CLKPIN_CNTL);
7816 				data &= ~BCLK_AS_XCLK;
7817 				if (orig != data)
7818 					WREG32(CG_CLKPIN_CNTL, data);
7819 
7820 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7821 				data &= ~FORCE_BIF_REFCLK_EN;
7822 				if (orig != data)
7823 					WREG32(CG_CLKPIN_CNTL_2, data);
7824 
7825 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7826 				data &= ~MPLL_CLKOUT_SEL_MASK;
7827 				data |= MPLL_CLKOUT_SEL(4);
7828 				if (orig != data)
7829 					WREG32(MPLL_BYPASSCLK_SEL, data);
7830 
7831 				orig = data = RREG32(SPLL_CNTL_MODE);
7832 				data &= ~SPLL_REFCLK_SEL_MASK;
7833 				if (orig != data)
7834 					WREG32(SPLL_CNTL_MODE, data);
7835 			}
7836 		}
7837 	} else {
7838 		if (orig != data)
7839 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7840 	}
7841 
7842 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7843 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7844 	if (orig != data)
7845 		WREG32_PCIE(PCIE_CNTL2, data);
7846 
7847 	if (!disable_l0s) {
7848 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7849 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7850 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7851 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7852 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7853 				data &= ~LC_L0S_INACTIVITY_MASK;
7854 				if (orig != data)
7855 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7856 			}
7857 		}
7858 	}
7859 }
7860 
7861 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7862 {
7863 	unsigned i;
7864 
7865 	/* make sure VCEPLL_CTLREQ is deasserted */
7866 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7867 
7868 	mdelay(10);
7869 
7870 	/* assert UPLL_CTLREQ */
7871 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7872 
7873 	/* wait for CTLACK and CTLACK2 to get asserted */
7874 	for (i = 0; i < 100; ++i) {
7875 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7876 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7877 			break;
7878 		mdelay(10);
7879 	}
7880 
7881 	/* deassert UPLL_CTLREQ */
7882 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7883 
7884 	if (i == 100) {
7885 		DRM_ERROR("Timeout setting UVD clocks!\n");
7886 		return -ETIMEDOUT;
7887 	}
7888 
7889 	return 0;
7890 }
7891 
7892 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7893 {
7894 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7895 	int r;
7896 
7897 	/* bypass evclk and ecclk with bclk */
7898 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7899 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7900 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7901 
7902 	/* put PLL in bypass mode */
7903 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7904 		     ~VCEPLL_BYPASS_EN_MASK);
7905 
7906 	if (!evclk || !ecclk) {
7907 		/* keep the Bypass mode, put PLL to sleep */
7908 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7909 			     ~VCEPLL_SLEEP_MASK);
7910 		return 0;
7911 	}
7912 
7913 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7914 					  16384, 0x03FFFFFF, 0, 128, 5,
7915 					  &fb_div, &evclk_div, &ecclk_div);
7916 	if (r)
7917 		return r;
7918 
7919 	/* set RESET_ANTI_MUX to 0 */
7920 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7921 
7922 	/* set VCO_MODE to 1 */
7923 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7924 		     ~VCEPLL_VCO_MODE_MASK);
7925 
7926 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7927 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7928 		     ~VCEPLL_SLEEP_MASK);
7929 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7930 
7931 	/* deassert VCEPLL_RESET */
7932 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7933 
7934 	mdelay(1);
7935 
7936 	r = si_vce_send_vcepll_ctlreq(rdev);
7937 	if (r)
7938 		return r;
7939 
7940 	/* assert VCEPLL_RESET again */
7941 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7942 
7943 	/* disable spread spectrum. */
7944 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7945 
7946 	/* set feedback divider */
7947 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7948 
7949 	/* set ref divider to 0 */
7950 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7951 
7952 	/* set PDIV_A and PDIV_B */
7953 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7954 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7955 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7956 
7957 	/* give the PLL some time to settle */
7958 	mdelay(15);
7959 
7960 	/* deassert PLL_RESET */
7961 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7962 
7963 	mdelay(15);
7964 
7965 	/* switch from bypass mode to normal mode */
7966 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7967 
7968 	r = si_vce_send_vcepll_ctlreq(rdev);
7969 	if (r)
7970 		return r;
7971 
7972 	/* switch VCLK and DCLK selection */
7973 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7974 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7975 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7976 
7977 	mdelay(100);
7978 
7979 	return 0;
7980 }
7981