xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision e5f586c763a079349398e2b0c7c271386193ac34)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118 
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120 
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
132 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
133 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
134 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
135 					 bool enable);
136 static void si_init_pg(struct radeon_device *rdev);
137 static void si_init_cg(struct radeon_device *rdev);
138 static void si_fini_pg(struct radeon_device *rdev);
139 static void si_fini_cg(struct radeon_device *rdev);
140 static void si_rlc_stop(struct radeon_device *rdev);
141 
142 static const u32 verde_rlc_save_restore_register_list[] =
143 {
144 	(0x8000 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8040 << 16) | (0x98f4 >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0xe80 >> 2),
151 	0x00000000,
152 	(0x8000 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8040 << 16) | (0x89bc >> 2),
155 	0x00000000,
156 	(0x8000 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x8040 << 16) | (0x8c1c >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x98f0 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0xe7c >> 2),
163 	0x00000000,
164 	(0x8000 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x8040 << 16) | (0x9148 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9150 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x897c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x8d8c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0xac54 >> 2),
175 	0X00000000,
176 	0x3,
177 	(0x9c00 << 16) | (0x98f8 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9910 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9914 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9918 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x991c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9920 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9924 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9928 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x992c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9930 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9934 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9938 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x993c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9940 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9944 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9948 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x994c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9950 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9954 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9958 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x995c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9960 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9964 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9968 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x996c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9970 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9974 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9978 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x997c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9980 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9984 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9988 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x998c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c00 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c14 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c04 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8c08 >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x9b7c >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0xe84 >> 2),
258 	0x00000000,
259 	(0x8000 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8040 << 16) | (0x89c0 >> 2),
262 	0x00000000,
263 	(0x8000 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8040 << 16) | (0x914c >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8040 << 16) | (0x8c20 >> 2),
270 	0x00000000,
271 	(0x8000 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x9354 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9060 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9364 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9100 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x913c >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e0 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e4 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x90e8 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e0 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e4 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x90e8 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8bcc >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x8b24 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x88c4 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8e50 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8c0c >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e58 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x8e5c >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x9508 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x950c >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x9494 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac0c >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac10 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xac14 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xae00 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0xac08 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88d4 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88c8 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x88cc >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x89b0 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8b10 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x8a14 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9830 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9834 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9838 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0x9a10 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9870 >> 2),
346 	0x00000000,
347 	(0x8000 << 16) | (0x9874 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9870 >> 2),
350 	0x00000000,
351 	(0x8001 << 16) | (0x9874 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9870 >> 2),
354 	0x00000000,
355 	(0x8040 << 16) | (0x9874 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9870 >> 2),
358 	0x00000000,
359 	(0x8041 << 16) | (0x9874 >> 2),
360 	0x00000000,
361 	0x00000000
362 };
363 
364 static const u32 tahiti_golden_rlc_registers[] =
365 {
366 	0xc424, 0xffffffff, 0x00601005,
367 	0xc47c, 0xffffffff, 0x10104040,
368 	0xc488, 0xffffffff, 0x0100000a,
369 	0xc314, 0xffffffff, 0x00000800,
370 	0xc30c, 0xffffffff, 0x800000f4,
371 	0xf4a8, 0xffffffff, 0x00000000
372 };
373 
374 static const u32 tahiti_golden_registers[] =
375 {
376 	0x9a10, 0x00010000, 0x00018208,
377 	0x9830, 0xffffffff, 0x00000000,
378 	0x9834, 0xf00fffff, 0x00000400,
379 	0x9838, 0x0002021c, 0x00020200,
380 	0xc78, 0x00000080, 0x00000000,
381 	0xd030, 0x000300c0, 0x00800040,
382 	0xd830, 0x000300c0, 0x00800040,
383 	0x5bb0, 0x000000f0, 0x00000070,
384 	0x5bc0, 0x00200000, 0x50100000,
385 	0x7030, 0x31000311, 0x00000011,
386 	0x277c, 0x00000003, 0x000007ff,
387 	0x240c, 0x000007ff, 0x00000000,
388 	0x8a14, 0xf000001f, 0x00000007,
389 	0x8b24, 0xffffffff, 0x00ffffff,
390 	0x8b10, 0x0000ff0f, 0x00000000,
391 	0x28a4c, 0x07ffffff, 0x4e000000,
392 	0x28350, 0x3f3f3fff, 0x2a00126a,
393 	0x30, 0x000000ff, 0x0040,
394 	0x34, 0x00000040, 0x00004040,
395 	0x9100, 0x07ffffff, 0x03000000,
396 	0x8e88, 0x01ff1f3f, 0x00000000,
397 	0x8e84, 0x01ff1f3f, 0x00000000,
398 	0x9060, 0x0000007f, 0x00000020,
399 	0x9508, 0x00010000, 0x00010000,
400 	0xac14, 0x00000200, 0x000002fb,
401 	0xac10, 0xffffffff, 0x0000543b,
402 	0xac0c, 0xffffffff, 0xa9210876,
403 	0x88d0, 0xffffffff, 0x000fff40,
404 	0x88d4, 0x0000001f, 0x00000010,
405 	0x1410, 0x20000000, 0x20fffed8,
406 	0x15c0, 0x000c0fc0, 0x000c0400
407 };
408 
409 static const u32 tahiti_golden_registers2[] =
410 {
411 	0xc64, 0x00000001, 0x00000001
412 };
413 
414 static const u32 pitcairn_golden_rlc_registers[] =
415 {
416 	0xc424, 0xffffffff, 0x00601004,
417 	0xc47c, 0xffffffff, 0x10102020,
418 	0xc488, 0xffffffff, 0x01000020,
419 	0xc314, 0xffffffff, 0x00000800,
420 	0xc30c, 0xffffffff, 0x800000a4
421 };
422 
423 static const u32 pitcairn_golden_registers[] =
424 {
425 	0x9a10, 0x00010000, 0x00018208,
426 	0x9830, 0xffffffff, 0x00000000,
427 	0x9834, 0xf00fffff, 0x00000400,
428 	0x9838, 0x0002021c, 0x00020200,
429 	0xc78, 0x00000080, 0x00000000,
430 	0xd030, 0x000300c0, 0x00800040,
431 	0xd830, 0x000300c0, 0x00800040,
432 	0x5bb0, 0x000000f0, 0x00000070,
433 	0x5bc0, 0x00200000, 0x50100000,
434 	0x7030, 0x31000311, 0x00000011,
435 	0x2ae4, 0x00073ffe, 0x000022a2,
436 	0x240c, 0x000007ff, 0x00000000,
437 	0x8a14, 0xf000001f, 0x00000007,
438 	0x8b24, 0xffffffff, 0x00ffffff,
439 	0x8b10, 0x0000ff0f, 0x00000000,
440 	0x28a4c, 0x07ffffff, 0x4e000000,
441 	0x28350, 0x3f3f3fff, 0x2a00126a,
442 	0x30, 0x000000ff, 0x0040,
443 	0x34, 0x00000040, 0x00004040,
444 	0x9100, 0x07ffffff, 0x03000000,
445 	0x9060, 0x0000007f, 0x00000020,
446 	0x9508, 0x00010000, 0x00010000,
447 	0xac14, 0x000003ff, 0x000000f7,
448 	0xac10, 0xffffffff, 0x00000000,
449 	0xac0c, 0xffffffff, 0x32761054,
450 	0x88d4, 0x0000001f, 0x00000010,
451 	0x15c0, 0x000c0fc0, 0x000c0400
452 };
453 
454 static const u32 verde_golden_rlc_registers[] =
455 {
456 	0xc424, 0xffffffff, 0x033f1005,
457 	0xc47c, 0xffffffff, 0x10808020,
458 	0xc488, 0xffffffff, 0x00800008,
459 	0xc314, 0xffffffff, 0x00001000,
460 	0xc30c, 0xffffffff, 0x80010014
461 };
462 
463 static const u32 verde_golden_registers[] =
464 {
465 	0x9a10, 0x00010000, 0x00018208,
466 	0x9830, 0xffffffff, 0x00000000,
467 	0x9834, 0xf00fffff, 0x00000400,
468 	0x9838, 0x0002021c, 0x00020200,
469 	0xc78, 0x00000080, 0x00000000,
470 	0xd030, 0x000300c0, 0x00800040,
471 	0xd030, 0x000300c0, 0x00800040,
472 	0xd830, 0x000300c0, 0x00800040,
473 	0xd830, 0x000300c0, 0x00800040,
474 	0x5bb0, 0x000000f0, 0x00000070,
475 	0x5bc0, 0x00200000, 0x50100000,
476 	0x7030, 0x31000311, 0x00000011,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x2ae4, 0x00073ffe, 0x000022a2,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x240c, 0x000007ff, 0x00000000,
482 	0x240c, 0x000007ff, 0x00000000,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8a14, 0xf000001f, 0x00000007,
485 	0x8a14, 0xf000001f, 0x00000007,
486 	0x8b24, 0xffffffff, 0x00ffffff,
487 	0x8b10, 0x0000ff0f, 0x00000000,
488 	0x28a4c, 0x07ffffff, 0x4e000000,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x28350, 0x3f3f3fff, 0x0000124a,
491 	0x28350, 0x3f3f3fff, 0x0000124a,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9100, 0x07ffffff, 0x03000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e88, 0x01ff1f3f, 0x00000000,
498 	0x8e88, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x8e84, 0x01ff1f3f, 0x00000000,
501 	0x8e84, 0x01ff1f3f, 0x00000000,
502 	0x9060, 0x0000007f, 0x00000020,
503 	0x9508, 0x00010000, 0x00010000,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac14, 0x000003ff, 0x00000003,
506 	0xac14, 0x000003ff, 0x00000003,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac10, 0xffffffff, 0x00000000,
509 	0xac10, 0xffffffff, 0x00000000,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0xac0c, 0xffffffff, 0x00001032,
512 	0xac0c, 0xffffffff, 0x00001032,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x88d4, 0x0000001f, 0x00000010,
515 	0x88d4, 0x0000001f, 0x00000010,
516 	0x15c0, 0x000c0fc0, 0x000c0400
517 };
518 
519 static const u32 oland_golden_rlc_registers[] =
520 {
521 	0xc424, 0xffffffff, 0x00601005,
522 	0xc47c, 0xffffffff, 0x10104040,
523 	0xc488, 0xffffffff, 0x0100000a,
524 	0xc314, 0xffffffff, 0x00000800,
525 	0xc30c, 0xffffffff, 0x800000f4
526 };
527 
528 static const u32 oland_golden_registers[] =
529 {
530 	0x9a10, 0x00010000, 0x00018208,
531 	0x9830, 0xffffffff, 0x00000000,
532 	0x9834, 0xf00fffff, 0x00000400,
533 	0x9838, 0x0002021c, 0x00020200,
534 	0xc78, 0x00000080, 0x00000000,
535 	0xd030, 0x000300c0, 0x00800040,
536 	0xd830, 0x000300c0, 0x00800040,
537 	0x5bb0, 0x000000f0, 0x00000070,
538 	0x5bc0, 0x00200000, 0x50100000,
539 	0x7030, 0x31000311, 0x00000011,
540 	0x2ae4, 0x00073ffe, 0x000022a2,
541 	0x240c, 0x000007ff, 0x00000000,
542 	0x8a14, 0xf000001f, 0x00000007,
543 	0x8b24, 0xffffffff, 0x00ffffff,
544 	0x8b10, 0x0000ff0f, 0x00000000,
545 	0x28a4c, 0x07ffffff, 0x4e000000,
546 	0x28350, 0x3f3f3fff, 0x00000082,
547 	0x30, 0x000000ff, 0x0040,
548 	0x34, 0x00000040, 0x00004040,
549 	0x9100, 0x07ffffff, 0x03000000,
550 	0x9060, 0x0000007f, 0x00000020,
551 	0x9508, 0x00010000, 0x00010000,
552 	0xac14, 0x000003ff, 0x000000f3,
553 	0xac10, 0xffffffff, 0x00000000,
554 	0xac0c, 0xffffffff, 0x00003210,
555 	0x88d4, 0x0000001f, 0x00000010,
556 	0x15c0, 0x000c0fc0, 0x000c0400
557 };
558 
559 static const u32 hainan_golden_registers[] =
560 {
561 	0x9a10, 0x00010000, 0x00018208,
562 	0x9830, 0xffffffff, 0x00000000,
563 	0x9834, 0xf00fffff, 0x00000400,
564 	0x9838, 0x0002021c, 0x00020200,
565 	0xd0c0, 0xff000fff, 0x00000100,
566 	0xd030, 0x000300c0, 0x00800040,
567 	0xd8c0, 0xff000fff, 0x00000100,
568 	0xd830, 0x000300c0, 0x00800040,
569 	0x2ae4, 0x00073ffe, 0x000022a2,
570 	0x240c, 0x000007ff, 0x00000000,
571 	0x8a14, 0xf000001f, 0x00000007,
572 	0x8b24, 0xffffffff, 0x00ffffff,
573 	0x8b10, 0x0000ff0f, 0x00000000,
574 	0x28a4c, 0x07ffffff, 0x4e000000,
575 	0x28350, 0x3f3f3fff, 0x00000000,
576 	0x30, 0x000000ff, 0x0040,
577 	0x34, 0x00000040, 0x00004040,
578 	0x9100, 0x03e00000, 0x03600000,
579 	0x9060, 0x0000007f, 0x00000020,
580 	0x9508, 0x00010000, 0x00010000,
581 	0xac14, 0x000003ff, 0x000000f1,
582 	0xac10, 0xffffffff, 0x00000000,
583 	0xac0c, 0xffffffff, 0x00003210,
584 	0x88d4, 0x0000001f, 0x00000010,
585 	0x15c0, 0x000c0fc0, 0x000c0400
586 };
587 
588 static const u32 hainan_golden_registers2[] =
589 {
590 	0x98f8, 0xffffffff, 0x02010001
591 };
592 
593 static const u32 tahiti_mgcg_cgcg_init[] =
594 {
595 	0xc400, 0xffffffff, 0xfffffffc,
596 	0x802c, 0xffffffff, 0xe0000000,
597 	0x9a60, 0xffffffff, 0x00000100,
598 	0x92a4, 0xffffffff, 0x00000100,
599 	0xc164, 0xffffffff, 0x00000100,
600 	0x9774, 0xffffffff, 0x00000100,
601 	0x8984, 0xffffffff, 0x06000100,
602 	0x8a18, 0xffffffff, 0x00000100,
603 	0x92a0, 0xffffffff, 0x00000100,
604 	0xc380, 0xffffffff, 0x00000100,
605 	0x8b28, 0xffffffff, 0x00000100,
606 	0x9144, 0xffffffff, 0x00000100,
607 	0x8d88, 0xffffffff, 0x00000100,
608 	0x8d8c, 0xffffffff, 0x00000100,
609 	0x9030, 0xffffffff, 0x00000100,
610 	0x9034, 0xffffffff, 0x00000100,
611 	0x9038, 0xffffffff, 0x00000100,
612 	0x903c, 0xffffffff, 0x00000100,
613 	0xad80, 0xffffffff, 0x00000100,
614 	0xac54, 0xffffffff, 0x00000100,
615 	0x897c, 0xffffffff, 0x06000100,
616 	0x9868, 0xffffffff, 0x00000100,
617 	0x9510, 0xffffffff, 0x00000100,
618 	0xaf04, 0xffffffff, 0x00000100,
619 	0xae04, 0xffffffff, 0x00000100,
620 	0x949c, 0xffffffff, 0x00000100,
621 	0x802c, 0xffffffff, 0xe0000000,
622 	0x9160, 0xffffffff, 0x00010000,
623 	0x9164, 0xffffffff, 0x00030002,
624 	0x9168, 0xffffffff, 0x00040007,
625 	0x916c, 0xffffffff, 0x00060005,
626 	0x9170, 0xffffffff, 0x00090008,
627 	0x9174, 0xffffffff, 0x00020001,
628 	0x9178, 0xffffffff, 0x00040003,
629 	0x917c, 0xffffffff, 0x00000007,
630 	0x9180, 0xffffffff, 0x00060005,
631 	0x9184, 0xffffffff, 0x00090008,
632 	0x9188, 0xffffffff, 0x00030002,
633 	0x918c, 0xffffffff, 0x00050004,
634 	0x9190, 0xffffffff, 0x00000008,
635 	0x9194, 0xffffffff, 0x00070006,
636 	0x9198, 0xffffffff, 0x000a0009,
637 	0x919c, 0xffffffff, 0x00040003,
638 	0x91a0, 0xffffffff, 0x00060005,
639 	0x91a4, 0xffffffff, 0x00000009,
640 	0x91a8, 0xffffffff, 0x00080007,
641 	0x91ac, 0xffffffff, 0x000b000a,
642 	0x91b0, 0xffffffff, 0x00050004,
643 	0x91b4, 0xffffffff, 0x00070006,
644 	0x91b8, 0xffffffff, 0x0008000b,
645 	0x91bc, 0xffffffff, 0x000a0009,
646 	0x91c0, 0xffffffff, 0x000d000c,
647 	0x91c4, 0xffffffff, 0x00060005,
648 	0x91c8, 0xffffffff, 0x00080007,
649 	0x91cc, 0xffffffff, 0x0000000b,
650 	0x91d0, 0xffffffff, 0x000a0009,
651 	0x91d4, 0xffffffff, 0x000d000c,
652 	0x91d8, 0xffffffff, 0x00070006,
653 	0x91dc, 0xffffffff, 0x00090008,
654 	0x91e0, 0xffffffff, 0x0000000c,
655 	0x91e4, 0xffffffff, 0x000b000a,
656 	0x91e8, 0xffffffff, 0x000e000d,
657 	0x91ec, 0xffffffff, 0x00080007,
658 	0x91f0, 0xffffffff, 0x000a0009,
659 	0x91f4, 0xffffffff, 0x0000000d,
660 	0x91f8, 0xffffffff, 0x000c000b,
661 	0x91fc, 0xffffffff, 0x000f000e,
662 	0x9200, 0xffffffff, 0x00090008,
663 	0x9204, 0xffffffff, 0x000b000a,
664 	0x9208, 0xffffffff, 0x000c000f,
665 	0x920c, 0xffffffff, 0x000e000d,
666 	0x9210, 0xffffffff, 0x00110010,
667 	0x9214, 0xffffffff, 0x000a0009,
668 	0x9218, 0xffffffff, 0x000c000b,
669 	0x921c, 0xffffffff, 0x0000000f,
670 	0x9220, 0xffffffff, 0x000e000d,
671 	0x9224, 0xffffffff, 0x00110010,
672 	0x9228, 0xffffffff, 0x000b000a,
673 	0x922c, 0xffffffff, 0x000d000c,
674 	0x9230, 0xffffffff, 0x00000010,
675 	0x9234, 0xffffffff, 0x000f000e,
676 	0x9238, 0xffffffff, 0x00120011,
677 	0x923c, 0xffffffff, 0x000c000b,
678 	0x9240, 0xffffffff, 0x000e000d,
679 	0x9244, 0xffffffff, 0x00000011,
680 	0x9248, 0xffffffff, 0x0010000f,
681 	0x924c, 0xffffffff, 0x00130012,
682 	0x9250, 0xffffffff, 0x000d000c,
683 	0x9254, 0xffffffff, 0x000f000e,
684 	0x9258, 0xffffffff, 0x00100013,
685 	0x925c, 0xffffffff, 0x00120011,
686 	0x9260, 0xffffffff, 0x00150014,
687 	0x9264, 0xffffffff, 0x000e000d,
688 	0x9268, 0xffffffff, 0x0010000f,
689 	0x926c, 0xffffffff, 0x00000013,
690 	0x9270, 0xffffffff, 0x00120011,
691 	0x9274, 0xffffffff, 0x00150014,
692 	0x9278, 0xffffffff, 0x000f000e,
693 	0x927c, 0xffffffff, 0x00110010,
694 	0x9280, 0xffffffff, 0x00000014,
695 	0x9284, 0xffffffff, 0x00130012,
696 	0x9288, 0xffffffff, 0x00160015,
697 	0x928c, 0xffffffff, 0x0010000f,
698 	0x9290, 0xffffffff, 0x00120011,
699 	0x9294, 0xffffffff, 0x00000015,
700 	0x9298, 0xffffffff, 0x00140013,
701 	0x929c, 0xffffffff, 0x00170016,
702 	0x9150, 0xffffffff, 0x96940200,
703 	0x8708, 0xffffffff, 0x00900100,
704 	0xc478, 0xffffffff, 0x00000080,
705 	0xc404, 0xffffffff, 0x0020003f,
706 	0x30, 0xffffffff, 0x0000001c,
707 	0x34, 0x000f0000, 0x000f0000,
708 	0x160c, 0xffffffff, 0x00000100,
709 	0x1024, 0xffffffff, 0x00000100,
710 	0x102c, 0x00000101, 0x00000000,
711 	0x20a8, 0xffffffff, 0x00000104,
712 	0x264c, 0x000c0000, 0x000c0000,
713 	0x2648, 0x000c0000, 0x000c0000,
714 	0x55e4, 0xff000fff, 0x00000100,
715 	0x55e8, 0x00000001, 0x00000001,
716 	0x2f50, 0x00000001, 0x00000001,
717 	0x30cc, 0xc0000fff, 0x00000104,
718 	0xc1e4, 0x00000001, 0x00000001,
719 	0xd0c0, 0xfffffff0, 0x00000100,
720 	0xd8c0, 0xfffffff0, 0x00000100
721 };
722 
723 static const u32 pitcairn_mgcg_cgcg_init[] =
724 {
725 	0xc400, 0xffffffff, 0xfffffffc,
726 	0x802c, 0xffffffff, 0xe0000000,
727 	0x9a60, 0xffffffff, 0x00000100,
728 	0x92a4, 0xffffffff, 0x00000100,
729 	0xc164, 0xffffffff, 0x00000100,
730 	0x9774, 0xffffffff, 0x00000100,
731 	0x8984, 0xffffffff, 0x06000100,
732 	0x8a18, 0xffffffff, 0x00000100,
733 	0x92a0, 0xffffffff, 0x00000100,
734 	0xc380, 0xffffffff, 0x00000100,
735 	0x8b28, 0xffffffff, 0x00000100,
736 	0x9144, 0xffffffff, 0x00000100,
737 	0x8d88, 0xffffffff, 0x00000100,
738 	0x8d8c, 0xffffffff, 0x00000100,
739 	0x9030, 0xffffffff, 0x00000100,
740 	0x9034, 0xffffffff, 0x00000100,
741 	0x9038, 0xffffffff, 0x00000100,
742 	0x903c, 0xffffffff, 0x00000100,
743 	0xad80, 0xffffffff, 0x00000100,
744 	0xac54, 0xffffffff, 0x00000100,
745 	0x897c, 0xffffffff, 0x06000100,
746 	0x9868, 0xffffffff, 0x00000100,
747 	0x9510, 0xffffffff, 0x00000100,
748 	0xaf04, 0xffffffff, 0x00000100,
749 	0xae04, 0xffffffff, 0x00000100,
750 	0x949c, 0xffffffff, 0x00000100,
751 	0x802c, 0xffffffff, 0xe0000000,
752 	0x9160, 0xffffffff, 0x00010000,
753 	0x9164, 0xffffffff, 0x00030002,
754 	0x9168, 0xffffffff, 0x00040007,
755 	0x916c, 0xffffffff, 0x00060005,
756 	0x9170, 0xffffffff, 0x00090008,
757 	0x9174, 0xffffffff, 0x00020001,
758 	0x9178, 0xffffffff, 0x00040003,
759 	0x917c, 0xffffffff, 0x00000007,
760 	0x9180, 0xffffffff, 0x00060005,
761 	0x9184, 0xffffffff, 0x00090008,
762 	0x9188, 0xffffffff, 0x00030002,
763 	0x918c, 0xffffffff, 0x00050004,
764 	0x9190, 0xffffffff, 0x00000008,
765 	0x9194, 0xffffffff, 0x00070006,
766 	0x9198, 0xffffffff, 0x000a0009,
767 	0x919c, 0xffffffff, 0x00040003,
768 	0x91a0, 0xffffffff, 0x00060005,
769 	0x91a4, 0xffffffff, 0x00000009,
770 	0x91a8, 0xffffffff, 0x00080007,
771 	0x91ac, 0xffffffff, 0x000b000a,
772 	0x91b0, 0xffffffff, 0x00050004,
773 	0x91b4, 0xffffffff, 0x00070006,
774 	0x91b8, 0xffffffff, 0x0008000b,
775 	0x91bc, 0xffffffff, 0x000a0009,
776 	0x91c0, 0xffffffff, 0x000d000c,
777 	0x9200, 0xffffffff, 0x00090008,
778 	0x9204, 0xffffffff, 0x000b000a,
779 	0x9208, 0xffffffff, 0x000c000f,
780 	0x920c, 0xffffffff, 0x000e000d,
781 	0x9210, 0xffffffff, 0x00110010,
782 	0x9214, 0xffffffff, 0x000a0009,
783 	0x9218, 0xffffffff, 0x000c000b,
784 	0x921c, 0xffffffff, 0x0000000f,
785 	0x9220, 0xffffffff, 0x000e000d,
786 	0x9224, 0xffffffff, 0x00110010,
787 	0x9228, 0xffffffff, 0x000b000a,
788 	0x922c, 0xffffffff, 0x000d000c,
789 	0x9230, 0xffffffff, 0x00000010,
790 	0x9234, 0xffffffff, 0x000f000e,
791 	0x9238, 0xffffffff, 0x00120011,
792 	0x923c, 0xffffffff, 0x000c000b,
793 	0x9240, 0xffffffff, 0x000e000d,
794 	0x9244, 0xffffffff, 0x00000011,
795 	0x9248, 0xffffffff, 0x0010000f,
796 	0x924c, 0xffffffff, 0x00130012,
797 	0x9250, 0xffffffff, 0x000d000c,
798 	0x9254, 0xffffffff, 0x000f000e,
799 	0x9258, 0xffffffff, 0x00100013,
800 	0x925c, 0xffffffff, 0x00120011,
801 	0x9260, 0xffffffff, 0x00150014,
802 	0x9150, 0xffffffff, 0x96940200,
803 	0x8708, 0xffffffff, 0x00900100,
804 	0xc478, 0xffffffff, 0x00000080,
805 	0xc404, 0xffffffff, 0x0020003f,
806 	0x30, 0xffffffff, 0x0000001c,
807 	0x34, 0x000f0000, 0x000f0000,
808 	0x160c, 0xffffffff, 0x00000100,
809 	0x1024, 0xffffffff, 0x00000100,
810 	0x102c, 0x00000101, 0x00000000,
811 	0x20a8, 0xffffffff, 0x00000104,
812 	0x55e4, 0xff000fff, 0x00000100,
813 	0x55e8, 0x00000001, 0x00000001,
814 	0x2f50, 0x00000001, 0x00000001,
815 	0x30cc, 0xc0000fff, 0x00000104,
816 	0xc1e4, 0x00000001, 0x00000001,
817 	0xd0c0, 0xfffffff0, 0x00000100,
818 	0xd8c0, 0xfffffff0, 0x00000100
819 };
820 
821 static const u32 verde_mgcg_cgcg_init[] =
822 {
823 	0xc400, 0xffffffff, 0xfffffffc,
824 	0x802c, 0xffffffff, 0xe0000000,
825 	0x9a60, 0xffffffff, 0x00000100,
826 	0x92a4, 0xffffffff, 0x00000100,
827 	0xc164, 0xffffffff, 0x00000100,
828 	0x9774, 0xffffffff, 0x00000100,
829 	0x8984, 0xffffffff, 0x06000100,
830 	0x8a18, 0xffffffff, 0x00000100,
831 	0x92a0, 0xffffffff, 0x00000100,
832 	0xc380, 0xffffffff, 0x00000100,
833 	0x8b28, 0xffffffff, 0x00000100,
834 	0x9144, 0xffffffff, 0x00000100,
835 	0x8d88, 0xffffffff, 0x00000100,
836 	0x8d8c, 0xffffffff, 0x00000100,
837 	0x9030, 0xffffffff, 0x00000100,
838 	0x9034, 0xffffffff, 0x00000100,
839 	0x9038, 0xffffffff, 0x00000100,
840 	0x903c, 0xffffffff, 0x00000100,
841 	0xad80, 0xffffffff, 0x00000100,
842 	0xac54, 0xffffffff, 0x00000100,
843 	0x897c, 0xffffffff, 0x06000100,
844 	0x9868, 0xffffffff, 0x00000100,
845 	0x9510, 0xffffffff, 0x00000100,
846 	0xaf04, 0xffffffff, 0x00000100,
847 	0xae04, 0xffffffff, 0x00000100,
848 	0x949c, 0xffffffff, 0x00000100,
849 	0x802c, 0xffffffff, 0xe0000000,
850 	0x9160, 0xffffffff, 0x00010000,
851 	0x9164, 0xffffffff, 0x00030002,
852 	0x9168, 0xffffffff, 0x00040007,
853 	0x916c, 0xffffffff, 0x00060005,
854 	0x9170, 0xffffffff, 0x00090008,
855 	0x9174, 0xffffffff, 0x00020001,
856 	0x9178, 0xffffffff, 0x00040003,
857 	0x917c, 0xffffffff, 0x00000007,
858 	0x9180, 0xffffffff, 0x00060005,
859 	0x9184, 0xffffffff, 0x00090008,
860 	0x9188, 0xffffffff, 0x00030002,
861 	0x918c, 0xffffffff, 0x00050004,
862 	0x9190, 0xffffffff, 0x00000008,
863 	0x9194, 0xffffffff, 0x00070006,
864 	0x9198, 0xffffffff, 0x000a0009,
865 	0x919c, 0xffffffff, 0x00040003,
866 	0x91a0, 0xffffffff, 0x00060005,
867 	0x91a4, 0xffffffff, 0x00000009,
868 	0x91a8, 0xffffffff, 0x00080007,
869 	0x91ac, 0xffffffff, 0x000b000a,
870 	0x91b0, 0xffffffff, 0x00050004,
871 	0x91b4, 0xffffffff, 0x00070006,
872 	0x91b8, 0xffffffff, 0x0008000b,
873 	0x91bc, 0xffffffff, 0x000a0009,
874 	0x91c0, 0xffffffff, 0x000d000c,
875 	0x9200, 0xffffffff, 0x00090008,
876 	0x9204, 0xffffffff, 0x000b000a,
877 	0x9208, 0xffffffff, 0x000c000f,
878 	0x920c, 0xffffffff, 0x000e000d,
879 	0x9210, 0xffffffff, 0x00110010,
880 	0x9214, 0xffffffff, 0x000a0009,
881 	0x9218, 0xffffffff, 0x000c000b,
882 	0x921c, 0xffffffff, 0x0000000f,
883 	0x9220, 0xffffffff, 0x000e000d,
884 	0x9224, 0xffffffff, 0x00110010,
885 	0x9228, 0xffffffff, 0x000b000a,
886 	0x922c, 0xffffffff, 0x000d000c,
887 	0x9230, 0xffffffff, 0x00000010,
888 	0x9234, 0xffffffff, 0x000f000e,
889 	0x9238, 0xffffffff, 0x00120011,
890 	0x923c, 0xffffffff, 0x000c000b,
891 	0x9240, 0xffffffff, 0x000e000d,
892 	0x9244, 0xffffffff, 0x00000011,
893 	0x9248, 0xffffffff, 0x0010000f,
894 	0x924c, 0xffffffff, 0x00130012,
895 	0x9250, 0xffffffff, 0x000d000c,
896 	0x9254, 0xffffffff, 0x000f000e,
897 	0x9258, 0xffffffff, 0x00100013,
898 	0x925c, 0xffffffff, 0x00120011,
899 	0x9260, 0xffffffff, 0x00150014,
900 	0x9150, 0xffffffff, 0x96940200,
901 	0x8708, 0xffffffff, 0x00900100,
902 	0xc478, 0xffffffff, 0x00000080,
903 	0xc404, 0xffffffff, 0x0020003f,
904 	0x30, 0xffffffff, 0x0000001c,
905 	0x34, 0x000f0000, 0x000f0000,
906 	0x160c, 0xffffffff, 0x00000100,
907 	0x1024, 0xffffffff, 0x00000100,
908 	0x102c, 0x00000101, 0x00000000,
909 	0x20a8, 0xffffffff, 0x00000104,
910 	0x264c, 0x000c0000, 0x000c0000,
911 	0x2648, 0x000c0000, 0x000c0000,
912 	0x55e4, 0xff000fff, 0x00000100,
913 	0x55e8, 0x00000001, 0x00000001,
914 	0x2f50, 0x00000001, 0x00000001,
915 	0x30cc, 0xc0000fff, 0x00000104,
916 	0xc1e4, 0x00000001, 0x00000001,
917 	0xd0c0, 0xfffffff0, 0x00000100,
918 	0xd8c0, 0xfffffff0, 0x00000100
919 };
920 
921 static const u32 oland_mgcg_cgcg_init[] =
922 {
923 	0xc400, 0xffffffff, 0xfffffffc,
924 	0x802c, 0xffffffff, 0xe0000000,
925 	0x9a60, 0xffffffff, 0x00000100,
926 	0x92a4, 0xffffffff, 0x00000100,
927 	0xc164, 0xffffffff, 0x00000100,
928 	0x9774, 0xffffffff, 0x00000100,
929 	0x8984, 0xffffffff, 0x06000100,
930 	0x8a18, 0xffffffff, 0x00000100,
931 	0x92a0, 0xffffffff, 0x00000100,
932 	0xc380, 0xffffffff, 0x00000100,
933 	0x8b28, 0xffffffff, 0x00000100,
934 	0x9144, 0xffffffff, 0x00000100,
935 	0x8d88, 0xffffffff, 0x00000100,
936 	0x8d8c, 0xffffffff, 0x00000100,
937 	0x9030, 0xffffffff, 0x00000100,
938 	0x9034, 0xffffffff, 0x00000100,
939 	0x9038, 0xffffffff, 0x00000100,
940 	0x903c, 0xffffffff, 0x00000100,
941 	0xad80, 0xffffffff, 0x00000100,
942 	0xac54, 0xffffffff, 0x00000100,
943 	0x897c, 0xffffffff, 0x06000100,
944 	0x9868, 0xffffffff, 0x00000100,
945 	0x9510, 0xffffffff, 0x00000100,
946 	0xaf04, 0xffffffff, 0x00000100,
947 	0xae04, 0xffffffff, 0x00000100,
948 	0x949c, 0xffffffff, 0x00000100,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9160, 0xffffffff, 0x00010000,
951 	0x9164, 0xffffffff, 0x00030002,
952 	0x9168, 0xffffffff, 0x00040007,
953 	0x916c, 0xffffffff, 0x00060005,
954 	0x9170, 0xffffffff, 0x00090008,
955 	0x9174, 0xffffffff, 0x00020001,
956 	0x9178, 0xffffffff, 0x00040003,
957 	0x917c, 0xffffffff, 0x00000007,
958 	0x9180, 0xffffffff, 0x00060005,
959 	0x9184, 0xffffffff, 0x00090008,
960 	0x9188, 0xffffffff, 0x00030002,
961 	0x918c, 0xffffffff, 0x00050004,
962 	0x9190, 0xffffffff, 0x00000008,
963 	0x9194, 0xffffffff, 0x00070006,
964 	0x9198, 0xffffffff, 0x000a0009,
965 	0x919c, 0xffffffff, 0x00040003,
966 	0x91a0, 0xffffffff, 0x00060005,
967 	0x91a4, 0xffffffff, 0x00000009,
968 	0x91a8, 0xffffffff, 0x00080007,
969 	0x91ac, 0xffffffff, 0x000b000a,
970 	0x91b0, 0xffffffff, 0x00050004,
971 	0x91b4, 0xffffffff, 0x00070006,
972 	0x91b8, 0xffffffff, 0x0008000b,
973 	0x91bc, 0xffffffff, 0x000a0009,
974 	0x91c0, 0xffffffff, 0x000d000c,
975 	0x91c4, 0xffffffff, 0x00060005,
976 	0x91c8, 0xffffffff, 0x00080007,
977 	0x91cc, 0xffffffff, 0x0000000b,
978 	0x91d0, 0xffffffff, 0x000a0009,
979 	0x91d4, 0xffffffff, 0x000d000c,
980 	0x9150, 0xffffffff, 0x96940200,
981 	0x8708, 0xffffffff, 0x00900100,
982 	0xc478, 0xffffffff, 0x00000080,
983 	0xc404, 0xffffffff, 0x0020003f,
984 	0x30, 0xffffffff, 0x0000001c,
985 	0x34, 0x000f0000, 0x000f0000,
986 	0x160c, 0xffffffff, 0x00000100,
987 	0x1024, 0xffffffff, 0x00000100,
988 	0x102c, 0x00000101, 0x00000000,
989 	0x20a8, 0xffffffff, 0x00000104,
990 	0x264c, 0x000c0000, 0x000c0000,
991 	0x2648, 0x000c0000, 0x000c0000,
992 	0x55e4, 0xff000fff, 0x00000100,
993 	0x55e8, 0x00000001, 0x00000001,
994 	0x2f50, 0x00000001, 0x00000001,
995 	0x30cc, 0xc0000fff, 0x00000104,
996 	0xc1e4, 0x00000001, 0x00000001,
997 	0xd0c0, 0xfffffff0, 0x00000100,
998 	0xd8c0, 0xfffffff0, 0x00000100
999 };
1000 
1001 static const u32 hainan_mgcg_cgcg_init[] =
1002 {
1003 	0xc400, 0xffffffff, 0xfffffffc,
1004 	0x802c, 0xffffffff, 0xe0000000,
1005 	0x9a60, 0xffffffff, 0x00000100,
1006 	0x92a4, 0xffffffff, 0x00000100,
1007 	0xc164, 0xffffffff, 0x00000100,
1008 	0x9774, 0xffffffff, 0x00000100,
1009 	0x8984, 0xffffffff, 0x06000100,
1010 	0x8a18, 0xffffffff, 0x00000100,
1011 	0x92a0, 0xffffffff, 0x00000100,
1012 	0xc380, 0xffffffff, 0x00000100,
1013 	0x8b28, 0xffffffff, 0x00000100,
1014 	0x9144, 0xffffffff, 0x00000100,
1015 	0x8d88, 0xffffffff, 0x00000100,
1016 	0x8d8c, 0xffffffff, 0x00000100,
1017 	0x9030, 0xffffffff, 0x00000100,
1018 	0x9034, 0xffffffff, 0x00000100,
1019 	0x9038, 0xffffffff, 0x00000100,
1020 	0x903c, 0xffffffff, 0x00000100,
1021 	0xad80, 0xffffffff, 0x00000100,
1022 	0xac54, 0xffffffff, 0x00000100,
1023 	0x897c, 0xffffffff, 0x06000100,
1024 	0x9868, 0xffffffff, 0x00000100,
1025 	0x9510, 0xffffffff, 0x00000100,
1026 	0xaf04, 0xffffffff, 0x00000100,
1027 	0xae04, 0xffffffff, 0x00000100,
1028 	0x949c, 0xffffffff, 0x00000100,
1029 	0x802c, 0xffffffff, 0xe0000000,
1030 	0x9160, 0xffffffff, 0x00010000,
1031 	0x9164, 0xffffffff, 0x00030002,
1032 	0x9168, 0xffffffff, 0x00040007,
1033 	0x916c, 0xffffffff, 0x00060005,
1034 	0x9170, 0xffffffff, 0x00090008,
1035 	0x9174, 0xffffffff, 0x00020001,
1036 	0x9178, 0xffffffff, 0x00040003,
1037 	0x917c, 0xffffffff, 0x00000007,
1038 	0x9180, 0xffffffff, 0x00060005,
1039 	0x9184, 0xffffffff, 0x00090008,
1040 	0x9188, 0xffffffff, 0x00030002,
1041 	0x918c, 0xffffffff, 0x00050004,
1042 	0x9190, 0xffffffff, 0x00000008,
1043 	0x9194, 0xffffffff, 0x00070006,
1044 	0x9198, 0xffffffff, 0x000a0009,
1045 	0x919c, 0xffffffff, 0x00040003,
1046 	0x91a0, 0xffffffff, 0x00060005,
1047 	0x91a4, 0xffffffff, 0x00000009,
1048 	0x91a8, 0xffffffff, 0x00080007,
1049 	0x91ac, 0xffffffff, 0x000b000a,
1050 	0x91b0, 0xffffffff, 0x00050004,
1051 	0x91b4, 0xffffffff, 0x00070006,
1052 	0x91b8, 0xffffffff, 0x0008000b,
1053 	0x91bc, 0xffffffff, 0x000a0009,
1054 	0x91c0, 0xffffffff, 0x000d000c,
1055 	0x91c4, 0xffffffff, 0x00060005,
1056 	0x91c8, 0xffffffff, 0x00080007,
1057 	0x91cc, 0xffffffff, 0x0000000b,
1058 	0x91d0, 0xffffffff, 0x000a0009,
1059 	0x91d4, 0xffffffff, 0x000d000c,
1060 	0x9150, 0xffffffff, 0x96940200,
1061 	0x8708, 0xffffffff, 0x00900100,
1062 	0xc478, 0xffffffff, 0x00000080,
1063 	0xc404, 0xffffffff, 0x0020003f,
1064 	0x30, 0xffffffff, 0x0000001c,
1065 	0x34, 0x000f0000, 0x000f0000,
1066 	0x160c, 0xffffffff, 0x00000100,
1067 	0x1024, 0xffffffff, 0x00000100,
1068 	0x20a8, 0xffffffff, 0x00000104,
1069 	0x264c, 0x000c0000, 0x000c0000,
1070 	0x2648, 0x000c0000, 0x000c0000,
1071 	0x2f50, 0x00000001, 0x00000001,
1072 	0x30cc, 0xc0000fff, 0x00000104,
1073 	0xc1e4, 0x00000001, 0x00000001,
1074 	0xd0c0, 0xfffffff0, 0x00000100,
1075 	0xd8c0, 0xfffffff0, 0x00000100
1076 };
1077 
1078 static u32 verde_pg_init[] =
1079 {
1080 	0x353c, 0xffffffff, 0x40000,
1081 	0x3538, 0xffffffff, 0x200010ff,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x7007,
1088 	0x3538, 0xffffffff, 0x300010ff,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x400000,
1095 	0x3538, 0xffffffff, 0x100010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x120200,
1102 	0x3538, 0xffffffff, 0x500010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x1e1e16,
1109 	0x3538, 0xffffffff, 0x600010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x171f1e,
1116 	0x3538, 0xffffffff, 0x700010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x3538, 0xffffffff, 0x9ff,
1124 	0x3500, 0xffffffff, 0x0,
1125 	0x3504, 0xffffffff, 0x10000800,
1126 	0x3504, 0xffffffff, 0xf,
1127 	0x3504, 0xffffffff, 0xf,
1128 	0x3500, 0xffffffff, 0x4,
1129 	0x3504, 0xffffffff, 0x1000051e,
1130 	0x3504, 0xffffffff, 0xffff,
1131 	0x3504, 0xffffffff, 0xffff,
1132 	0x3500, 0xffffffff, 0x8,
1133 	0x3504, 0xffffffff, 0x80500,
1134 	0x3500, 0xffffffff, 0x12,
1135 	0x3504, 0xffffffff, 0x9050c,
1136 	0x3500, 0xffffffff, 0x1d,
1137 	0x3504, 0xffffffff, 0xb052c,
1138 	0x3500, 0xffffffff, 0x2a,
1139 	0x3504, 0xffffffff, 0x1053e,
1140 	0x3500, 0xffffffff, 0x2d,
1141 	0x3504, 0xffffffff, 0x10546,
1142 	0x3500, 0xffffffff, 0x30,
1143 	0x3504, 0xffffffff, 0xa054e,
1144 	0x3500, 0xffffffff, 0x3c,
1145 	0x3504, 0xffffffff, 0x1055f,
1146 	0x3500, 0xffffffff, 0x3f,
1147 	0x3504, 0xffffffff, 0x10567,
1148 	0x3500, 0xffffffff, 0x42,
1149 	0x3504, 0xffffffff, 0x1056f,
1150 	0x3500, 0xffffffff, 0x45,
1151 	0x3504, 0xffffffff, 0x10572,
1152 	0x3500, 0xffffffff, 0x48,
1153 	0x3504, 0xffffffff, 0x20575,
1154 	0x3500, 0xffffffff, 0x4c,
1155 	0x3504, 0xffffffff, 0x190801,
1156 	0x3500, 0xffffffff, 0x67,
1157 	0x3504, 0xffffffff, 0x1082a,
1158 	0x3500, 0xffffffff, 0x6a,
1159 	0x3504, 0xffffffff, 0x1b082d,
1160 	0x3500, 0xffffffff, 0x87,
1161 	0x3504, 0xffffffff, 0x310851,
1162 	0x3500, 0xffffffff, 0xba,
1163 	0x3504, 0xffffffff, 0x891,
1164 	0x3500, 0xffffffff, 0xbc,
1165 	0x3504, 0xffffffff, 0x893,
1166 	0x3500, 0xffffffff, 0xbe,
1167 	0x3504, 0xffffffff, 0x20895,
1168 	0x3500, 0xffffffff, 0xc2,
1169 	0x3504, 0xffffffff, 0x20899,
1170 	0x3500, 0xffffffff, 0xc6,
1171 	0x3504, 0xffffffff, 0x2089d,
1172 	0x3500, 0xffffffff, 0xca,
1173 	0x3504, 0xffffffff, 0x8a1,
1174 	0x3500, 0xffffffff, 0xcc,
1175 	0x3504, 0xffffffff, 0x8a3,
1176 	0x3500, 0xffffffff, 0xce,
1177 	0x3504, 0xffffffff, 0x308a5,
1178 	0x3500, 0xffffffff, 0xd3,
1179 	0x3504, 0xffffffff, 0x6d08cd,
1180 	0x3500, 0xffffffff, 0x142,
1181 	0x3504, 0xffffffff, 0x2000095a,
1182 	0x3504, 0xffffffff, 0x1,
1183 	0x3500, 0xffffffff, 0x144,
1184 	0x3504, 0xffffffff, 0x301f095b,
1185 	0x3500, 0xffffffff, 0x165,
1186 	0x3504, 0xffffffff, 0xc094d,
1187 	0x3500, 0xffffffff, 0x173,
1188 	0x3504, 0xffffffff, 0xf096d,
1189 	0x3500, 0xffffffff, 0x184,
1190 	0x3504, 0xffffffff, 0x15097f,
1191 	0x3500, 0xffffffff, 0x19b,
1192 	0x3504, 0xffffffff, 0xc0998,
1193 	0x3500, 0xffffffff, 0x1a9,
1194 	0x3504, 0xffffffff, 0x409a7,
1195 	0x3500, 0xffffffff, 0x1af,
1196 	0x3504, 0xffffffff, 0xcdc,
1197 	0x3500, 0xffffffff, 0x1b1,
1198 	0x3504, 0xffffffff, 0x800,
1199 	0x3508, 0xffffffff, 0x6c9b2000,
1200 	0x3510, 0xfc00, 0x2000,
1201 	0x3544, 0xffffffff, 0xfc0,
1202 	0x28d4, 0x00000100, 0x100
1203 };
1204 
1205 static void si_init_golden_registers(struct radeon_device *rdev)
1206 {
1207 	switch (rdev->family) {
1208 	case CHIP_TAHITI:
1209 		radeon_program_register_sequence(rdev,
1210 						 tahiti_golden_registers,
1211 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1212 		radeon_program_register_sequence(rdev,
1213 						 tahiti_golden_rlc_registers,
1214 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 tahiti_mgcg_cgcg_init,
1217 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1218 		radeon_program_register_sequence(rdev,
1219 						 tahiti_golden_registers2,
1220 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1221 		break;
1222 	case CHIP_PITCAIRN:
1223 		radeon_program_register_sequence(rdev,
1224 						 pitcairn_golden_registers,
1225 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1226 		radeon_program_register_sequence(rdev,
1227 						 pitcairn_golden_rlc_registers,
1228 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 pitcairn_mgcg_cgcg_init,
1231 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1232 		break;
1233 	case CHIP_VERDE:
1234 		radeon_program_register_sequence(rdev,
1235 						 verde_golden_registers,
1236 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1237 		radeon_program_register_sequence(rdev,
1238 						 verde_golden_rlc_registers,
1239 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 verde_mgcg_cgcg_init,
1242 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1243 		radeon_program_register_sequence(rdev,
1244 						 verde_pg_init,
1245 						 (const u32)ARRAY_SIZE(verde_pg_init));
1246 		break;
1247 	case CHIP_OLAND:
1248 		radeon_program_register_sequence(rdev,
1249 						 oland_golden_registers,
1250 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1251 		radeon_program_register_sequence(rdev,
1252 						 oland_golden_rlc_registers,
1253 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 oland_mgcg_cgcg_init,
1256 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1257 		break;
1258 	case CHIP_HAINAN:
1259 		radeon_program_register_sequence(rdev,
1260 						 hainan_golden_registers,
1261 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1262 		radeon_program_register_sequence(rdev,
1263 						 hainan_golden_registers2,
1264 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1265 		radeon_program_register_sequence(rdev,
1266 						 hainan_mgcg_cgcg_init,
1267 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1268 		break;
1269 	default:
1270 		break;
1271 	}
1272 }
1273 
1274 /**
1275  * si_get_allowed_info_register - fetch the register for the info ioctl
1276  *
1277  * @rdev: radeon_device pointer
1278  * @reg: register offset in bytes
1279  * @val: register value
1280  *
1281  * Returns 0 for success or -EINVAL for an invalid register
1282  *
1283  */
1284 int si_get_allowed_info_register(struct radeon_device *rdev,
1285 				 u32 reg, u32 *val)
1286 {
1287 	switch (reg) {
1288 	case GRBM_STATUS:
1289 	case GRBM_STATUS2:
1290 	case GRBM_STATUS_SE0:
1291 	case GRBM_STATUS_SE1:
1292 	case SRBM_STATUS:
1293 	case SRBM_STATUS2:
1294 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1295 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1296 	case UVD_STATUS:
1297 		*val = RREG32(reg);
1298 		return 0;
1299 	default:
1300 		return -EINVAL;
1301 	}
1302 }
1303 
1304 #define PCIE_BUS_CLK                10000
1305 #define TCLK                        (PCIE_BUS_CLK / 10)
1306 
1307 /**
1308  * si_get_xclk - get the xclk
1309  *
1310  * @rdev: radeon_device pointer
1311  *
1312  * Returns the reference clock used by the gfx engine
1313  * (SI).
1314  */
1315 u32 si_get_xclk(struct radeon_device *rdev)
1316 {
1317 	u32 reference_clock = rdev->clock.spll.reference_freq;
1318 	u32 tmp;
1319 
1320 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1321 	if (tmp & MUX_TCLK_TO_XCLK)
1322 		return TCLK;
1323 
1324 	tmp = RREG32(CG_CLKPIN_CNTL);
1325 	if (tmp & XTALIN_DIVIDE)
1326 		return reference_clock / 4;
1327 
1328 	return reference_clock;
1329 }
1330 
1331 /* get temperature in millidegrees */
1332 int si_get_temp(struct radeon_device *rdev)
1333 {
1334 	u32 temp;
1335 	int actual_temp = 0;
1336 
1337 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1338 		CTF_TEMP_SHIFT;
1339 
1340 	if (temp & 0x200)
1341 		actual_temp = 255;
1342 	else
1343 		actual_temp = temp & 0x1ff;
1344 
1345 	actual_temp = (actual_temp * 1000);
1346 
1347 	return actual_temp;
1348 }
1349 
1350 #define TAHITI_IO_MC_REGS_SIZE 36
1351 
1352 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353 	{0x0000006f, 0x03044000},
1354 	{0x00000070, 0x0480c018},
1355 	{0x00000071, 0x00000040},
1356 	{0x00000072, 0x01000000},
1357 	{0x00000074, 0x000000ff},
1358 	{0x00000075, 0x00143400},
1359 	{0x00000076, 0x08ec0800},
1360 	{0x00000077, 0x040000cc},
1361 	{0x00000079, 0x00000000},
1362 	{0x0000007a, 0x21000409},
1363 	{0x0000007c, 0x00000000},
1364 	{0x0000007d, 0xe8000000},
1365 	{0x0000007e, 0x044408a8},
1366 	{0x0000007f, 0x00000003},
1367 	{0x00000080, 0x00000000},
1368 	{0x00000081, 0x01000000},
1369 	{0x00000082, 0x02000000},
1370 	{0x00000083, 0x00000000},
1371 	{0x00000084, 0xe3f3e4f4},
1372 	{0x00000085, 0x00052024},
1373 	{0x00000087, 0x00000000},
1374 	{0x00000088, 0x66036603},
1375 	{0x00000089, 0x01000000},
1376 	{0x0000008b, 0x1c0a0000},
1377 	{0x0000008c, 0xff010000},
1378 	{0x0000008e, 0xffffefff},
1379 	{0x0000008f, 0xfff3efff},
1380 	{0x00000090, 0xfff3efbf},
1381 	{0x00000094, 0x00101101},
1382 	{0x00000095, 0x00000fff},
1383 	{0x00000096, 0x00116fff},
1384 	{0x00000097, 0x60010000},
1385 	{0x00000098, 0x10010000},
1386 	{0x00000099, 0x00006000},
1387 	{0x0000009a, 0x00001000},
1388 	{0x0000009f, 0x00a77400}
1389 };
1390 
1391 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392 	{0x0000006f, 0x03044000},
1393 	{0x00000070, 0x0480c018},
1394 	{0x00000071, 0x00000040},
1395 	{0x00000072, 0x01000000},
1396 	{0x00000074, 0x000000ff},
1397 	{0x00000075, 0x00143400},
1398 	{0x00000076, 0x08ec0800},
1399 	{0x00000077, 0x040000cc},
1400 	{0x00000079, 0x00000000},
1401 	{0x0000007a, 0x21000409},
1402 	{0x0000007c, 0x00000000},
1403 	{0x0000007d, 0xe8000000},
1404 	{0x0000007e, 0x044408a8},
1405 	{0x0000007f, 0x00000003},
1406 	{0x00000080, 0x00000000},
1407 	{0x00000081, 0x01000000},
1408 	{0x00000082, 0x02000000},
1409 	{0x00000083, 0x00000000},
1410 	{0x00000084, 0xe3f3e4f4},
1411 	{0x00000085, 0x00052024},
1412 	{0x00000087, 0x00000000},
1413 	{0x00000088, 0x66036603},
1414 	{0x00000089, 0x01000000},
1415 	{0x0000008b, 0x1c0a0000},
1416 	{0x0000008c, 0xff010000},
1417 	{0x0000008e, 0xffffefff},
1418 	{0x0000008f, 0xfff3efff},
1419 	{0x00000090, 0xfff3efbf},
1420 	{0x00000094, 0x00101101},
1421 	{0x00000095, 0x00000fff},
1422 	{0x00000096, 0x00116fff},
1423 	{0x00000097, 0x60010000},
1424 	{0x00000098, 0x10010000},
1425 	{0x00000099, 0x00006000},
1426 	{0x0000009a, 0x00001000},
1427 	{0x0000009f, 0x00a47400}
1428 };
1429 
1430 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431 	{0x0000006f, 0x03044000},
1432 	{0x00000070, 0x0480c018},
1433 	{0x00000071, 0x00000040},
1434 	{0x00000072, 0x01000000},
1435 	{0x00000074, 0x000000ff},
1436 	{0x00000075, 0x00143400},
1437 	{0x00000076, 0x08ec0800},
1438 	{0x00000077, 0x040000cc},
1439 	{0x00000079, 0x00000000},
1440 	{0x0000007a, 0x21000409},
1441 	{0x0000007c, 0x00000000},
1442 	{0x0000007d, 0xe8000000},
1443 	{0x0000007e, 0x044408a8},
1444 	{0x0000007f, 0x00000003},
1445 	{0x00000080, 0x00000000},
1446 	{0x00000081, 0x01000000},
1447 	{0x00000082, 0x02000000},
1448 	{0x00000083, 0x00000000},
1449 	{0x00000084, 0xe3f3e4f4},
1450 	{0x00000085, 0x00052024},
1451 	{0x00000087, 0x00000000},
1452 	{0x00000088, 0x66036603},
1453 	{0x00000089, 0x01000000},
1454 	{0x0000008b, 0x1c0a0000},
1455 	{0x0000008c, 0xff010000},
1456 	{0x0000008e, 0xffffefff},
1457 	{0x0000008f, 0xfff3efff},
1458 	{0x00000090, 0xfff3efbf},
1459 	{0x00000094, 0x00101101},
1460 	{0x00000095, 0x00000fff},
1461 	{0x00000096, 0x00116fff},
1462 	{0x00000097, 0x60010000},
1463 	{0x00000098, 0x10010000},
1464 	{0x00000099, 0x00006000},
1465 	{0x0000009a, 0x00001000},
1466 	{0x0000009f, 0x00a37400}
1467 };
1468 
1469 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1470 	{0x0000006f, 0x03044000},
1471 	{0x00000070, 0x0480c018},
1472 	{0x00000071, 0x00000040},
1473 	{0x00000072, 0x01000000},
1474 	{0x00000074, 0x000000ff},
1475 	{0x00000075, 0x00143400},
1476 	{0x00000076, 0x08ec0800},
1477 	{0x00000077, 0x040000cc},
1478 	{0x00000079, 0x00000000},
1479 	{0x0000007a, 0x21000409},
1480 	{0x0000007c, 0x00000000},
1481 	{0x0000007d, 0xe8000000},
1482 	{0x0000007e, 0x044408a8},
1483 	{0x0000007f, 0x00000003},
1484 	{0x00000080, 0x00000000},
1485 	{0x00000081, 0x01000000},
1486 	{0x00000082, 0x02000000},
1487 	{0x00000083, 0x00000000},
1488 	{0x00000084, 0xe3f3e4f4},
1489 	{0x00000085, 0x00052024},
1490 	{0x00000087, 0x00000000},
1491 	{0x00000088, 0x66036603},
1492 	{0x00000089, 0x01000000},
1493 	{0x0000008b, 0x1c0a0000},
1494 	{0x0000008c, 0xff010000},
1495 	{0x0000008e, 0xffffefff},
1496 	{0x0000008f, 0xfff3efff},
1497 	{0x00000090, 0xfff3efbf},
1498 	{0x00000094, 0x00101101},
1499 	{0x00000095, 0x00000fff},
1500 	{0x00000096, 0x00116fff},
1501 	{0x00000097, 0x60010000},
1502 	{0x00000098, 0x10010000},
1503 	{0x00000099, 0x00006000},
1504 	{0x0000009a, 0x00001000},
1505 	{0x0000009f, 0x00a17730}
1506 };
1507 
1508 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1509 	{0x0000006f, 0x03044000},
1510 	{0x00000070, 0x0480c018},
1511 	{0x00000071, 0x00000040},
1512 	{0x00000072, 0x01000000},
1513 	{0x00000074, 0x000000ff},
1514 	{0x00000075, 0x00143400},
1515 	{0x00000076, 0x08ec0800},
1516 	{0x00000077, 0x040000cc},
1517 	{0x00000079, 0x00000000},
1518 	{0x0000007a, 0x21000409},
1519 	{0x0000007c, 0x00000000},
1520 	{0x0000007d, 0xe8000000},
1521 	{0x0000007e, 0x044408a8},
1522 	{0x0000007f, 0x00000003},
1523 	{0x00000080, 0x00000000},
1524 	{0x00000081, 0x01000000},
1525 	{0x00000082, 0x02000000},
1526 	{0x00000083, 0x00000000},
1527 	{0x00000084, 0xe3f3e4f4},
1528 	{0x00000085, 0x00052024},
1529 	{0x00000087, 0x00000000},
1530 	{0x00000088, 0x66036603},
1531 	{0x00000089, 0x01000000},
1532 	{0x0000008b, 0x1c0a0000},
1533 	{0x0000008c, 0xff010000},
1534 	{0x0000008e, 0xffffefff},
1535 	{0x0000008f, 0xfff3efff},
1536 	{0x00000090, 0xfff3efbf},
1537 	{0x00000094, 0x00101101},
1538 	{0x00000095, 0x00000fff},
1539 	{0x00000096, 0x00116fff},
1540 	{0x00000097, 0x60010000},
1541 	{0x00000098, 0x10010000},
1542 	{0x00000099, 0x00006000},
1543 	{0x0000009a, 0x00001000},
1544 	{0x0000009f, 0x00a07730}
1545 };
1546 
1547 /* ucode loading */
1548 int si_mc_load_microcode(struct radeon_device *rdev)
1549 {
1550 	const __be32 *fw_data = NULL;
1551 	const __le32 *new_fw_data = NULL;
1552 	u32 running;
1553 	u32 *io_mc_regs = NULL;
1554 	const __le32 *new_io_mc_regs = NULL;
1555 	int i, regs_size, ucode_size;
1556 
1557 	if (!rdev->mc_fw)
1558 		return -EINVAL;
1559 
1560 	if (rdev->new_fw) {
1561 		const struct mc_firmware_header_v1_0 *hdr =
1562 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1563 
1564 		radeon_ucode_print_mc_hdr(&hdr->header);
1565 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1566 		new_io_mc_regs = (const __le32 *)
1567 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1568 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1569 		new_fw_data = (const __le32 *)
1570 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1571 	} else {
1572 		ucode_size = rdev->mc_fw->size / 4;
1573 
1574 		switch (rdev->family) {
1575 		case CHIP_TAHITI:
1576 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1577 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1578 			break;
1579 		case CHIP_PITCAIRN:
1580 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1581 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1582 			break;
1583 		case CHIP_VERDE:
1584 		default:
1585 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1586 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1587 			break;
1588 		case CHIP_OLAND:
1589 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1590 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1591 			break;
1592 		case CHIP_HAINAN:
1593 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1594 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1595 			break;
1596 		}
1597 		fw_data = (const __be32 *)rdev->mc_fw->data;
1598 	}
1599 
1600 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1601 
1602 	if (running == 0) {
1603 		/* reset the engine and set to writable */
1604 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1605 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1606 
1607 		/* load mc io regs */
1608 		for (i = 0; i < regs_size; i++) {
1609 			if (rdev->new_fw) {
1610 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1611 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1612 			} else {
1613 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1614 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1615 			}
1616 		}
1617 		/* load the MC ucode */
1618 		for (i = 0; i < ucode_size; i++) {
1619 			if (rdev->new_fw)
1620 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1621 			else
1622 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1623 		}
1624 
1625 		/* put the engine back into the active state */
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1627 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1629 
1630 		/* wait for training to complete */
1631 		for (i = 0; i < rdev->usec_timeout; i++) {
1632 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1633 				break;
1634 			udelay(1);
1635 		}
1636 		for (i = 0; i < rdev->usec_timeout; i++) {
1637 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1638 				break;
1639 			udelay(1);
1640 		}
1641 	}
1642 
1643 	return 0;
1644 }
1645 
1646 static int si_init_microcode(struct radeon_device *rdev)
1647 {
1648 	const char *chip_name;
1649 	const char *new_chip_name;
1650 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1651 	size_t smc_req_size, mc2_req_size;
1652 	char fw_name[30];
1653 	int err;
1654 	int new_fw = 0;
1655 	bool new_smc = false;
1656 	bool si58_fw = false;
1657 	bool banks2_fw = false;
1658 
1659 	DRM_DEBUG("\n");
1660 
1661 	switch (rdev->family) {
1662 	case CHIP_TAHITI:
1663 		chip_name = "TAHITI";
1664 		new_chip_name = "tahiti";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1670 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_PITCAIRN:
1674 		chip_name = "PITCAIRN";
1675 		if ((rdev->pdev->revision == 0x81) &&
1676 		    ((rdev->pdev->device == 0x6810) ||
1677 		     (rdev->pdev->device == 0x6811)))
1678 			new_smc = true;
1679 		new_chip_name = "pitcairn";
1680 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1681 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1682 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1683 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1684 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1685 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1686 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1687 		break;
1688 	case CHIP_VERDE:
1689 		chip_name = "VERDE";
1690 		if (((rdev->pdev->device == 0x6820) &&
1691 		     ((rdev->pdev->revision == 0x81) ||
1692 		      (rdev->pdev->revision == 0x83))) ||
1693 		    ((rdev->pdev->device == 0x6821) &&
1694 		     ((rdev->pdev->revision == 0x83) ||
1695 		      (rdev->pdev->revision == 0x87))) ||
1696 		    ((rdev->pdev->revision == 0x87) &&
1697 		     ((rdev->pdev->device == 0x6823) ||
1698 		      (rdev->pdev->device == 0x682b))))
1699 			new_smc = true;
1700 		new_chip_name = "verde";
1701 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1702 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1703 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1704 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1705 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1706 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1707 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1708 		break;
1709 	case CHIP_OLAND:
1710 		chip_name = "OLAND";
1711 		if (((rdev->pdev->revision == 0x81) &&
1712 		     ((rdev->pdev->device == 0x6600) ||
1713 		      (rdev->pdev->device == 0x6604) ||
1714 		      (rdev->pdev->device == 0x6605) ||
1715 		      (rdev->pdev->device == 0x6610))) ||
1716 		    ((rdev->pdev->revision == 0x83) &&
1717 		     (rdev->pdev->device == 0x6610)))
1718 			new_smc = true;
1719 		new_chip_name = "oland";
1720 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1721 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1722 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1723 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1724 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1725 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1726 		break;
1727 	case CHIP_HAINAN:
1728 		chip_name = "HAINAN";
1729 		if (((rdev->pdev->revision == 0x81) &&
1730 		     (rdev->pdev->device == 0x6660)) ||
1731 		    ((rdev->pdev->revision == 0x83) &&
1732 		     ((rdev->pdev->device == 0x6660) ||
1733 		      (rdev->pdev->device == 0x6663) ||
1734 		      (rdev->pdev->device == 0x6665) ||
1735 		      (rdev->pdev->device == 0x6667))))
1736 			new_smc = true;
1737 		else if ((rdev->pdev->revision == 0xc3) &&
1738 			 (rdev->pdev->device == 0x6665))
1739 			banks2_fw = true;
1740 		new_chip_name = "hainan";
1741 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1742 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1743 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1744 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1745 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1746 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1747 		break;
1748 	default: BUG();
1749 	}
1750 
1751 	/* this memory configuration requires special firmware */
1752 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1753 		si58_fw = true;
1754 
1755 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1756 
1757 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1758 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1759 	if (err) {
1760 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1761 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1762 		if (err)
1763 			goto out;
1764 		if (rdev->pfp_fw->size != pfp_req_size) {
1765 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1766 			       rdev->pfp_fw->size, fw_name);
1767 			err = -EINVAL;
1768 			goto out;
1769 		}
1770 	} else {
1771 		err = radeon_ucode_validate(rdev->pfp_fw);
1772 		if (err) {
1773 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1774 			       fw_name);
1775 			goto out;
1776 		} else {
1777 			new_fw++;
1778 		}
1779 	}
1780 
1781 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1782 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1783 	if (err) {
1784 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1785 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1786 		if (err)
1787 			goto out;
1788 		if (rdev->me_fw->size != me_req_size) {
1789 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790 			       rdev->me_fw->size, fw_name);
1791 			err = -EINVAL;
1792 		}
1793 	} else {
1794 		err = radeon_ucode_validate(rdev->me_fw);
1795 		if (err) {
1796 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797 			       fw_name);
1798 			goto out;
1799 		} else {
1800 			new_fw++;
1801 		}
1802 	}
1803 
1804 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1805 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806 	if (err) {
1807 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1808 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1809 		if (err)
1810 			goto out;
1811 		if (rdev->ce_fw->size != ce_req_size) {
1812 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813 			       rdev->ce_fw->size, fw_name);
1814 			err = -EINVAL;
1815 		}
1816 	} else {
1817 		err = radeon_ucode_validate(rdev->ce_fw);
1818 		if (err) {
1819 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1828 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1831 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->rlc_fw->size != rlc_req_size) {
1835 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->rlc_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->rlc_fw);
1841 		if (err) {
1842 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	if (si58_fw)
1851 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1852 	else
1853 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1854 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1855 	if (err) {
1856 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1857 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1858 		if (err) {
1859 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1860 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1861 			if (err)
1862 				goto out;
1863 		}
1864 		if ((rdev->mc_fw->size != mc_req_size) &&
1865 		    (rdev->mc_fw->size != mc2_req_size)) {
1866 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1867 			       rdev->mc_fw->size, fw_name);
1868 			err = -EINVAL;
1869 		}
1870 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1871 	} else {
1872 		err = radeon_ucode_validate(rdev->mc_fw);
1873 		if (err) {
1874 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1875 			       fw_name);
1876 			goto out;
1877 		} else {
1878 			new_fw++;
1879 		}
1880 	}
1881 
1882 	if (banks2_fw)
1883 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1884 	else if (new_smc)
1885 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1886 	else
1887 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1888 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1889 	if (err) {
1890 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1891 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1892 		if (err) {
1893 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1894 			release_firmware(rdev->smc_fw);
1895 			rdev->smc_fw = NULL;
1896 			err = 0;
1897 		} else if (rdev->smc_fw->size != smc_req_size) {
1898 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1899 			       rdev->smc_fw->size, fw_name);
1900 			err = -EINVAL;
1901 		}
1902 	} else {
1903 		err = radeon_ucode_validate(rdev->smc_fw);
1904 		if (err) {
1905 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1906 			       fw_name);
1907 			goto out;
1908 		} else {
1909 			new_fw++;
1910 		}
1911 	}
1912 
1913 	if (new_fw == 0) {
1914 		rdev->new_fw = false;
1915 	} else if (new_fw < 6) {
1916 		pr_err("si_fw: mixing new and old firmware!\n");
1917 		err = -EINVAL;
1918 	} else {
1919 		rdev->new_fw = true;
1920 	}
1921 out:
1922 	if (err) {
1923 		if (err != -EINVAL)
1924 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1925 			       fw_name);
1926 		release_firmware(rdev->pfp_fw);
1927 		rdev->pfp_fw = NULL;
1928 		release_firmware(rdev->me_fw);
1929 		rdev->me_fw = NULL;
1930 		release_firmware(rdev->ce_fw);
1931 		rdev->ce_fw = NULL;
1932 		release_firmware(rdev->rlc_fw);
1933 		rdev->rlc_fw = NULL;
1934 		release_firmware(rdev->mc_fw);
1935 		rdev->mc_fw = NULL;
1936 		release_firmware(rdev->smc_fw);
1937 		rdev->smc_fw = NULL;
1938 	}
1939 	return err;
1940 }
1941 
1942 /* watermark setup */
1943 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1944 				   struct radeon_crtc *radeon_crtc,
1945 				   struct drm_display_mode *mode,
1946 				   struct drm_display_mode *other_mode)
1947 {
1948 	u32 tmp, buffer_alloc, i;
1949 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1950 	/*
1951 	 * Line Buffer Setup
1952 	 * There are 3 line buffers, each one shared by 2 display controllers.
1953 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1954 	 * the display controllers.  The paritioning is done via one of four
1955 	 * preset allocations specified in bits 21:20:
1956 	 *  0 - half lb
1957 	 *  2 - whole lb, other crtc must be disabled
1958 	 */
1959 	/* this can get tricky if we have two large displays on a paired group
1960 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1961 	 * non-linked crtcs for maximum line buffer allocation.
1962 	 */
1963 	if (radeon_crtc->base.enabled && mode) {
1964 		if (other_mode) {
1965 			tmp = 0; /* 1/2 */
1966 			buffer_alloc = 1;
1967 		} else {
1968 			tmp = 2; /* whole */
1969 			buffer_alloc = 2;
1970 		}
1971 	} else {
1972 		tmp = 0;
1973 		buffer_alloc = 0;
1974 	}
1975 
1976 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1977 	       DC_LB_MEMORY_CONFIG(tmp));
1978 
1979 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1980 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1981 	for (i = 0; i < rdev->usec_timeout; i++) {
1982 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1983 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1984 			break;
1985 		udelay(1);
1986 	}
1987 
1988 	if (radeon_crtc->base.enabled && mode) {
1989 		switch (tmp) {
1990 		case 0:
1991 		default:
1992 			return 4096 * 2;
1993 		case 2:
1994 			return 8192 * 2;
1995 		}
1996 	}
1997 
1998 	/* controller not enabled, so no lb used */
1999 	return 0;
2000 }
2001 
2002 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2003 {
2004 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2005 
2006 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2007 	case 0:
2008 	default:
2009 		return 1;
2010 	case 1:
2011 		return 2;
2012 	case 2:
2013 		return 4;
2014 	case 3:
2015 		return 8;
2016 	case 4:
2017 		return 3;
2018 	case 5:
2019 		return 6;
2020 	case 6:
2021 		return 10;
2022 	case 7:
2023 		return 12;
2024 	case 8:
2025 		return 16;
2026 	}
2027 }
2028 
2029 struct dce6_wm_params {
2030 	u32 dram_channels; /* number of dram channels */
2031 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2032 	u32 sclk;          /* engine clock in kHz */
2033 	u32 disp_clk;      /* display clock in kHz */
2034 	u32 src_width;     /* viewport width */
2035 	u32 active_time;   /* active display time in ns */
2036 	u32 blank_time;    /* blank time in ns */
2037 	bool interlaced;    /* mode is interlaced */
2038 	fixed20_12 vsc;    /* vertical scale ratio */
2039 	u32 num_heads;     /* number of active crtcs */
2040 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2041 	u32 lb_size;       /* line buffer allocated to pipe */
2042 	u32 vtaps;         /* vertical scaler taps */
2043 };
2044 
2045 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2046 {
2047 	/* Calculate raw DRAM Bandwidth */
2048 	fixed20_12 dram_efficiency; /* 0.7 */
2049 	fixed20_12 yclk, dram_channels, bandwidth;
2050 	fixed20_12 a;
2051 
2052 	a.full = dfixed_const(1000);
2053 	yclk.full = dfixed_const(wm->yclk);
2054 	yclk.full = dfixed_div(yclk, a);
2055 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2056 	a.full = dfixed_const(10);
2057 	dram_efficiency.full = dfixed_const(7);
2058 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2059 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2060 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2061 
2062 	return dfixed_trunc(bandwidth);
2063 }
2064 
2065 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2066 {
2067 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2068 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2069 	fixed20_12 yclk, dram_channels, bandwidth;
2070 	fixed20_12 a;
2071 
2072 	a.full = dfixed_const(1000);
2073 	yclk.full = dfixed_const(wm->yclk);
2074 	yclk.full = dfixed_div(yclk, a);
2075 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2076 	a.full = dfixed_const(10);
2077 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2078 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2079 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2080 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2081 
2082 	return dfixed_trunc(bandwidth);
2083 }
2084 
2085 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2086 {
2087 	/* Calculate the display Data return Bandwidth */
2088 	fixed20_12 return_efficiency; /* 0.8 */
2089 	fixed20_12 sclk, bandwidth;
2090 	fixed20_12 a;
2091 
2092 	a.full = dfixed_const(1000);
2093 	sclk.full = dfixed_const(wm->sclk);
2094 	sclk.full = dfixed_div(sclk, a);
2095 	a.full = dfixed_const(10);
2096 	return_efficiency.full = dfixed_const(8);
2097 	return_efficiency.full = dfixed_div(return_efficiency, a);
2098 	a.full = dfixed_const(32);
2099 	bandwidth.full = dfixed_mul(a, sclk);
2100 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2101 
2102 	return dfixed_trunc(bandwidth);
2103 }
2104 
2105 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2106 {
2107 	return 32;
2108 }
2109 
2110 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2111 {
2112 	/* Calculate the DMIF Request Bandwidth */
2113 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2114 	fixed20_12 disp_clk, sclk, bandwidth;
2115 	fixed20_12 a, b1, b2;
2116 	u32 min_bandwidth;
2117 
2118 	a.full = dfixed_const(1000);
2119 	disp_clk.full = dfixed_const(wm->disp_clk);
2120 	disp_clk.full = dfixed_div(disp_clk, a);
2121 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2122 	b1.full = dfixed_mul(a, disp_clk);
2123 
2124 	a.full = dfixed_const(1000);
2125 	sclk.full = dfixed_const(wm->sclk);
2126 	sclk.full = dfixed_div(sclk, a);
2127 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2128 	b2.full = dfixed_mul(a, sclk);
2129 
2130 	a.full = dfixed_const(10);
2131 	disp_clk_request_efficiency.full = dfixed_const(8);
2132 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2133 
2134 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2135 
2136 	a.full = dfixed_const(min_bandwidth);
2137 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2138 
2139 	return dfixed_trunc(bandwidth);
2140 }
2141 
2142 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2143 {
2144 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2145 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2146 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2147 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2148 
2149 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2150 }
2151 
2152 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2153 {
2154 	/* Calculate the display mode Average Bandwidth
2155 	 * DisplayMode should contain the source and destination dimensions,
2156 	 * timing, etc.
2157 	 */
2158 	fixed20_12 bpp;
2159 	fixed20_12 line_time;
2160 	fixed20_12 src_width;
2161 	fixed20_12 bandwidth;
2162 	fixed20_12 a;
2163 
2164 	a.full = dfixed_const(1000);
2165 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2166 	line_time.full = dfixed_div(line_time, a);
2167 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2168 	src_width.full = dfixed_const(wm->src_width);
2169 	bandwidth.full = dfixed_mul(src_width, bpp);
2170 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2171 	bandwidth.full = dfixed_div(bandwidth, line_time);
2172 
2173 	return dfixed_trunc(bandwidth);
2174 }
2175 
2176 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2177 {
2178 	/* First calcualte the latency in ns */
2179 	u32 mc_latency = 2000; /* 2000 ns. */
2180 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2181 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2182 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2183 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2184 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2185 		(wm->num_heads * cursor_line_pair_return_time);
2186 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2187 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2188 	u32 tmp, dmif_size = 12288;
2189 	fixed20_12 a, b, c;
2190 
2191 	if (wm->num_heads == 0)
2192 		return 0;
2193 
2194 	a.full = dfixed_const(2);
2195 	b.full = dfixed_const(1);
2196 	if ((wm->vsc.full > a.full) ||
2197 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2198 	    (wm->vtaps >= 5) ||
2199 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2200 		max_src_lines_per_dst_line = 4;
2201 	else
2202 		max_src_lines_per_dst_line = 2;
2203 
2204 	a.full = dfixed_const(available_bandwidth);
2205 	b.full = dfixed_const(wm->num_heads);
2206 	a.full = dfixed_div(a, b);
2207 
2208 	b.full = dfixed_const(mc_latency + 512);
2209 	c.full = dfixed_const(wm->disp_clk);
2210 	b.full = dfixed_div(b, c);
2211 
2212 	c.full = dfixed_const(dmif_size);
2213 	b.full = dfixed_div(c, b);
2214 
2215 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2216 
2217 	b.full = dfixed_const(1000);
2218 	c.full = dfixed_const(wm->disp_clk);
2219 	b.full = dfixed_div(c, b);
2220 	c.full = dfixed_const(wm->bytes_per_pixel);
2221 	b.full = dfixed_mul(b, c);
2222 
2223 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2224 
2225 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2226 	b.full = dfixed_const(1000);
2227 	c.full = dfixed_const(lb_fill_bw);
2228 	b.full = dfixed_div(c, b);
2229 	a.full = dfixed_div(a, b);
2230 	line_fill_time = dfixed_trunc(a);
2231 
2232 	if (line_fill_time < wm->active_time)
2233 		return latency;
2234 	else
2235 		return latency + (line_fill_time - wm->active_time);
2236 
2237 }
2238 
2239 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2240 {
2241 	if (dce6_average_bandwidth(wm) <=
2242 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2243 		return true;
2244 	else
2245 		return false;
2246 };
2247 
2248 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2249 {
2250 	if (dce6_average_bandwidth(wm) <=
2251 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2252 		return true;
2253 	else
2254 		return false;
2255 };
2256 
2257 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2258 {
2259 	u32 lb_partitions = wm->lb_size / wm->src_width;
2260 	u32 line_time = wm->active_time + wm->blank_time;
2261 	u32 latency_tolerant_lines;
2262 	u32 latency_hiding;
2263 	fixed20_12 a;
2264 
2265 	a.full = dfixed_const(1);
2266 	if (wm->vsc.full > a.full)
2267 		latency_tolerant_lines = 1;
2268 	else {
2269 		if (lb_partitions <= (wm->vtaps + 1))
2270 			latency_tolerant_lines = 1;
2271 		else
2272 			latency_tolerant_lines = 2;
2273 	}
2274 
2275 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2276 
2277 	if (dce6_latency_watermark(wm) <= latency_hiding)
2278 		return true;
2279 	else
2280 		return false;
2281 }
2282 
2283 static void dce6_program_watermarks(struct radeon_device *rdev,
2284 					 struct radeon_crtc *radeon_crtc,
2285 					 u32 lb_size, u32 num_heads)
2286 {
2287 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2288 	struct dce6_wm_params wm_low, wm_high;
2289 	u32 dram_channels;
2290 	u32 pixel_period;
2291 	u32 line_time = 0;
2292 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2293 	u32 priority_a_mark = 0, priority_b_mark = 0;
2294 	u32 priority_a_cnt = PRIORITY_OFF;
2295 	u32 priority_b_cnt = PRIORITY_OFF;
2296 	u32 tmp, arb_control3;
2297 	fixed20_12 a, b, c;
2298 
2299 	if (radeon_crtc->base.enabled && num_heads && mode) {
2300 		pixel_period = 1000000 / (u32)mode->clock;
2301 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2302 		priority_a_cnt = 0;
2303 		priority_b_cnt = 0;
2304 
2305 		if (rdev->family == CHIP_ARUBA)
2306 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2307 		else
2308 			dram_channels = si_get_number_of_dram_channels(rdev);
2309 
2310 		/* watermark for high clocks */
2311 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2312 			wm_high.yclk =
2313 				radeon_dpm_get_mclk(rdev, false) * 10;
2314 			wm_high.sclk =
2315 				radeon_dpm_get_sclk(rdev, false) * 10;
2316 		} else {
2317 			wm_high.yclk = rdev->pm.current_mclk * 10;
2318 			wm_high.sclk = rdev->pm.current_sclk * 10;
2319 		}
2320 
2321 		wm_high.disp_clk = mode->clock;
2322 		wm_high.src_width = mode->crtc_hdisplay;
2323 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2324 		wm_high.blank_time = line_time - wm_high.active_time;
2325 		wm_high.interlaced = false;
2326 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2327 			wm_high.interlaced = true;
2328 		wm_high.vsc = radeon_crtc->vsc;
2329 		wm_high.vtaps = 1;
2330 		if (radeon_crtc->rmx_type != RMX_OFF)
2331 			wm_high.vtaps = 2;
2332 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2333 		wm_high.lb_size = lb_size;
2334 		wm_high.dram_channels = dram_channels;
2335 		wm_high.num_heads = num_heads;
2336 
2337 		/* watermark for low clocks */
2338 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2339 			wm_low.yclk =
2340 				radeon_dpm_get_mclk(rdev, true) * 10;
2341 			wm_low.sclk =
2342 				radeon_dpm_get_sclk(rdev, true) * 10;
2343 		} else {
2344 			wm_low.yclk = rdev->pm.current_mclk * 10;
2345 			wm_low.sclk = rdev->pm.current_sclk * 10;
2346 		}
2347 
2348 		wm_low.disp_clk = mode->clock;
2349 		wm_low.src_width = mode->crtc_hdisplay;
2350 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2351 		wm_low.blank_time = line_time - wm_low.active_time;
2352 		wm_low.interlaced = false;
2353 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2354 			wm_low.interlaced = true;
2355 		wm_low.vsc = radeon_crtc->vsc;
2356 		wm_low.vtaps = 1;
2357 		if (radeon_crtc->rmx_type != RMX_OFF)
2358 			wm_low.vtaps = 2;
2359 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2360 		wm_low.lb_size = lb_size;
2361 		wm_low.dram_channels = dram_channels;
2362 		wm_low.num_heads = num_heads;
2363 
2364 		/* set for high clocks */
2365 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2366 		/* set for low clocks */
2367 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2368 
2369 		/* possibly force display priority to high */
2370 		/* should really do this at mode validation time... */
2371 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2372 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2373 		    !dce6_check_latency_hiding(&wm_high) ||
2374 		    (rdev->disp_priority == 2)) {
2375 			DRM_DEBUG_KMS("force priority to high\n");
2376 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2377 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2378 		}
2379 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2380 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2381 		    !dce6_check_latency_hiding(&wm_low) ||
2382 		    (rdev->disp_priority == 2)) {
2383 			DRM_DEBUG_KMS("force priority to high\n");
2384 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2385 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2386 		}
2387 
2388 		a.full = dfixed_const(1000);
2389 		b.full = dfixed_const(mode->clock);
2390 		b.full = dfixed_div(b, a);
2391 		c.full = dfixed_const(latency_watermark_a);
2392 		c.full = dfixed_mul(c, b);
2393 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2394 		c.full = dfixed_div(c, a);
2395 		a.full = dfixed_const(16);
2396 		c.full = dfixed_div(c, a);
2397 		priority_a_mark = dfixed_trunc(c);
2398 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2399 
2400 		a.full = dfixed_const(1000);
2401 		b.full = dfixed_const(mode->clock);
2402 		b.full = dfixed_div(b, a);
2403 		c.full = dfixed_const(latency_watermark_b);
2404 		c.full = dfixed_mul(c, b);
2405 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2406 		c.full = dfixed_div(c, a);
2407 		a.full = dfixed_const(16);
2408 		c.full = dfixed_div(c, a);
2409 		priority_b_mark = dfixed_trunc(c);
2410 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2411 
2412 		/* Save number of lines the linebuffer leads before the scanout */
2413 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2414 	}
2415 
2416 	/* select wm A */
2417 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2418 	tmp = arb_control3;
2419 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2420 	tmp |= LATENCY_WATERMARK_MASK(1);
2421 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2422 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2423 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2424 		LATENCY_HIGH_WATERMARK(line_time)));
2425 	/* select wm B */
2426 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2427 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2428 	tmp |= LATENCY_WATERMARK_MASK(2);
2429 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2430 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2431 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2432 		LATENCY_HIGH_WATERMARK(line_time)));
2433 	/* restore original selection */
2434 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2435 
2436 	/* write the priority marks */
2437 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2438 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2439 
2440 	/* save values for DPM */
2441 	radeon_crtc->line_time = line_time;
2442 	radeon_crtc->wm_high = latency_watermark_a;
2443 	radeon_crtc->wm_low = latency_watermark_b;
2444 }
2445 
2446 void dce6_bandwidth_update(struct radeon_device *rdev)
2447 {
2448 	struct drm_display_mode *mode0 = NULL;
2449 	struct drm_display_mode *mode1 = NULL;
2450 	u32 num_heads = 0, lb_size;
2451 	int i;
2452 
2453 	if (!rdev->mode_info.mode_config_initialized)
2454 		return;
2455 
2456 	radeon_update_display_priority(rdev);
2457 
2458 	for (i = 0; i < rdev->num_crtc; i++) {
2459 		if (rdev->mode_info.crtcs[i]->base.enabled)
2460 			num_heads++;
2461 	}
2462 	for (i = 0; i < rdev->num_crtc; i += 2) {
2463 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2464 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2465 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2466 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2467 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2468 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2469 	}
2470 }
2471 
2472 /*
2473  * Core functions
2474  */
2475 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2476 {
2477 	u32 *tile = rdev->config.si.tile_mode_array;
2478 	const u32 num_tile_mode_states =
2479 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2480 	u32 reg_offset, split_equal_to_row_size;
2481 
2482 	switch (rdev->config.si.mem_row_size_in_kb) {
2483 	case 1:
2484 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2485 		break;
2486 	case 2:
2487 	default:
2488 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2489 		break;
2490 	case 4:
2491 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2492 		break;
2493 	}
2494 
2495 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496 		tile[reg_offset] = 0;
2497 
2498 	switch(rdev->family) {
2499 	case CHIP_TAHITI:
2500 	case CHIP_PITCAIRN:
2501 		/* non-AA compressed depth or any compressed stencil */
2502 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2507 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2510 		/* 2xAA/4xAA compressed depth only */
2511 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2514 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2516 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2519 		/* 8xAA compressed depth only */
2520 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2523 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2525 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2529 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2533 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2534 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2538 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2541 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2542 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2543 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2547 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2550 			   TILE_SPLIT(split_equal_to_row_size) |
2551 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2552 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2555 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2556 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2559 			   TILE_SPLIT(split_equal_to_row_size) |
2560 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2561 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2564 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2565 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568 			   TILE_SPLIT(split_equal_to_row_size) |
2569 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2570 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2572 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573 		/* 1D and 1D Array Surfaces */
2574 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2575 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2577 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2578 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2579 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2581 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2582 		/* Displayable maps. */
2583 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2585 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2587 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2588 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591 		/* Display 8bpp. */
2592 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2596 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2597 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600 		/* Display 16bpp. */
2601 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2606 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609 		/* Display 32bpp. */
2610 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2614 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2615 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2618 		/* Thin. */
2619 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2620 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2621 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2623 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2624 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2627 		/* Thin 8 bpp. */
2628 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2630 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2631 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2632 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2633 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2636 		/* Thin 16 bpp. */
2637 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2639 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2640 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2641 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2642 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2645 		/* Thin 32 bpp. */
2646 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2650 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2651 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2654 		/* Thin 64 bpp. */
2655 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658 			   TILE_SPLIT(split_equal_to_row_size) |
2659 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2660 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2663 		/* 8 bpp PRT. */
2664 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2669 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2670 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2672 		/* 16 bpp PRT */
2673 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2677 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2678 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2681 		/* 32 bpp PRT */
2682 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2687 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2689 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690 		/* 64 bpp PRT */
2691 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2695 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2696 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699 		/* 128 bpp PRT */
2700 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2704 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2705 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2707 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2708 
2709 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2710 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2711 		break;
2712 
2713 	case CHIP_VERDE:
2714 	case CHIP_OLAND:
2715 	case CHIP_HAINAN:
2716 		/* non-AA compressed depth or any compressed stencil */
2717 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2719 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2721 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2722 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2725 		/* 2xAA/4xAA compressed depth only */
2726 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2727 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2728 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2730 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2731 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2733 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2734 		/* 8xAA compressed depth only */
2735 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2739 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2740 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2744 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2748 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2749 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2753 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2757 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2758 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2761 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2762 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 			   TILE_SPLIT(split_equal_to_row_size) |
2766 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2767 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2769 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2770 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2771 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   TILE_SPLIT(split_equal_to_row_size) |
2775 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2776 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2780 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   TILE_SPLIT(split_equal_to_row_size) |
2784 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2785 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2787 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2788 		/* 1D and 1D Array Surfaces */
2789 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2791 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2792 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2793 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2794 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2796 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2797 		/* Displayable maps. */
2798 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2800 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2802 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2803 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2805 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2806 		/* Display 8bpp. */
2807 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2811 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2812 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2815 		/* Display 16bpp. */
2816 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2821 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2824 		/* Display 32bpp. */
2825 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2829 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2830 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2832 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833 		/* Thin. */
2834 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2836 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2838 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2839 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842 		/* Thin 8 bpp. */
2843 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2845 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2846 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2847 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2848 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2851 		/* Thin 16 bpp. */
2852 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2854 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2855 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2856 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2857 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2860 		/* Thin 32 bpp. */
2861 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2865 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2866 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2868 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869 		/* Thin 64 bpp. */
2870 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2872 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873 			   TILE_SPLIT(split_equal_to_row_size) |
2874 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2875 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2878 		/* 8 bpp PRT. */
2879 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2881 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2882 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2883 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2884 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2885 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2886 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2887 		/* 16 bpp PRT */
2888 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2890 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2891 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2892 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2893 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2895 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2896 		/* 32 bpp PRT */
2897 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2901 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2902 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2904 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905 		/* 64 bpp PRT */
2906 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2908 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2909 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2910 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2911 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2913 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2914 		/* 128 bpp PRT */
2915 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2919 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2920 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2922 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2923 
2924 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2925 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2926 		break;
2927 
2928 	default:
2929 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2930 	}
2931 }
2932 
2933 static void si_select_se_sh(struct radeon_device *rdev,
2934 			    u32 se_num, u32 sh_num)
2935 {
2936 	u32 data = INSTANCE_BROADCAST_WRITES;
2937 
2938 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2939 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2940 	else if (se_num == 0xffffffff)
2941 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2942 	else if (sh_num == 0xffffffff)
2943 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2944 	else
2945 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2946 	WREG32(GRBM_GFX_INDEX, data);
2947 }
2948 
2949 static u32 si_create_bitmask(u32 bit_width)
2950 {
2951 	u32 i, mask = 0;
2952 
2953 	for (i = 0; i < bit_width; i++) {
2954 		mask <<= 1;
2955 		mask |= 1;
2956 	}
2957 	return mask;
2958 }
2959 
2960 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2961 {
2962 	u32 data, mask;
2963 
2964 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2965 	if (data & 1)
2966 		data &= INACTIVE_CUS_MASK;
2967 	else
2968 		data = 0;
2969 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2970 
2971 	data >>= INACTIVE_CUS_SHIFT;
2972 
2973 	mask = si_create_bitmask(cu_per_sh);
2974 
2975 	return ~data & mask;
2976 }
2977 
2978 static void si_setup_spi(struct radeon_device *rdev,
2979 			 u32 se_num, u32 sh_per_se,
2980 			 u32 cu_per_sh)
2981 {
2982 	int i, j, k;
2983 	u32 data, mask, active_cu;
2984 
2985 	for (i = 0; i < se_num; i++) {
2986 		for (j = 0; j < sh_per_se; j++) {
2987 			si_select_se_sh(rdev, i, j);
2988 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2989 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2990 
2991 			mask = 1;
2992 			for (k = 0; k < 16; k++) {
2993 				mask <<= k;
2994 				if (active_cu & mask) {
2995 					data &= ~mask;
2996 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2997 					break;
2998 				}
2999 			}
3000 		}
3001 	}
3002 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3003 }
3004 
3005 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3006 			      u32 max_rb_num_per_se,
3007 			      u32 sh_per_se)
3008 {
3009 	u32 data, mask;
3010 
3011 	data = RREG32(CC_RB_BACKEND_DISABLE);
3012 	if (data & 1)
3013 		data &= BACKEND_DISABLE_MASK;
3014 	else
3015 		data = 0;
3016 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3017 
3018 	data >>= BACKEND_DISABLE_SHIFT;
3019 
3020 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3021 
3022 	return data & mask;
3023 }
3024 
3025 static void si_setup_rb(struct radeon_device *rdev,
3026 			u32 se_num, u32 sh_per_se,
3027 			u32 max_rb_num_per_se)
3028 {
3029 	int i, j;
3030 	u32 data, mask;
3031 	u32 disabled_rbs = 0;
3032 	u32 enabled_rbs = 0;
3033 
3034 	for (i = 0; i < se_num; i++) {
3035 		for (j = 0; j < sh_per_se; j++) {
3036 			si_select_se_sh(rdev, i, j);
3037 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3038 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3039 		}
3040 	}
3041 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3042 
3043 	mask = 1;
3044 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3045 		if (!(disabled_rbs & mask))
3046 			enabled_rbs |= mask;
3047 		mask <<= 1;
3048 	}
3049 
3050 	rdev->config.si.backend_enable_mask = enabled_rbs;
3051 
3052 	for (i = 0; i < se_num; i++) {
3053 		si_select_se_sh(rdev, i, 0xffffffff);
3054 		data = 0;
3055 		for (j = 0; j < sh_per_se; j++) {
3056 			switch (enabled_rbs & 3) {
3057 			case 1:
3058 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3059 				break;
3060 			case 2:
3061 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3062 				break;
3063 			case 3:
3064 			default:
3065 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3066 				break;
3067 			}
3068 			enabled_rbs >>= 2;
3069 		}
3070 		WREG32(PA_SC_RASTER_CONFIG, data);
3071 	}
3072 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3073 }
3074 
3075 static void si_gpu_init(struct radeon_device *rdev)
3076 {
3077 	u32 gb_addr_config = 0;
3078 	u32 mc_shared_chmap, mc_arb_ramcfg;
3079 	u32 sx_debug_1;
3080 	u32 hdp_host_path_cntl;
3081 	u32 tmp;
3082 	int i, j;
3083 
3084 	switch (rdev->family) {
3085 	case CHIP_TAHITI:
3086 		rdev->config.si.max_shader_engines = 2;
3087 		rdev->config.si.max_tile_pipes = 12;
3088 		rdev->config.si.max_cu_per_sh = 8;
3089 		rdev->config.si.max_sh_per_se = 2;
3090 		rdev->config.si.max_backends_per_se = 4;
3091 		rdev->config.si.max_texture_channel_caches = 12;
3092 		rdev->config.si.max_gprs = 256;
3093 		rdev->config.si.max_gs_threads = 32;
3094 		rdev->config.si.max_hw_contexts = 8;
3095 
3096 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3097 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3098 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3099 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3100 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3101 		break;
3102 	case CHIP_PITCAIRN:
3103 		rdev->config.si.max_shader_engines = 2;
3104 		rdev->config.si.max_tile_pipes = 8;
3105 		rdev->config.si.max_cu_per_sh = 5;
3106 		rdev->config.si.max_sh_per_se = 2;
3107 		rdev->config.si.max_backends_per_se = 4;
3108 		rdev->config.si.max_texture_channel_caches = 8;
3109 		rdev->config.si.max_gprs = 256;
3110 		rdev->config.si.max_gs_threads = 32;
3111 		rdev->config.si.max_hw_contexts = 8;
3112 
3113 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3115 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3118 		break;
3119 	case CHIP_VERDE:
3120 	default:
3121 		rdev->config.si.max_shader_engines = 1;
3122 		rdev->config.si.max_tile_pipes = 4;
3123 		rdev->config.si.max_cu_per_sh = 5;
3124 		rdev->config.si.max_sh_per_se = 2;
3125 		rdev->config.si.max_backends_per_se = 4;
3126 		rdev->config.si.max_texture_channel_caches = 4;
3127 		rdev->config.si.max_gprs = 256;
3128 		rdev->config.si.max_gs_threads = 32;
3129 		rdev->config.si.max_hw_contexts = 8;
3130 
3131 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3133 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3136 		break;
3137 	case CHIP_OLAND:
3138 		rdev->config.si.max_shader_engines = 1;
3139 		rdev->config.si.max_tile_pipes = 4;
3140 		rdev->config.si.max_cu_per_sh = 6;
3141 		rdev->config.si.max_sh_per_se = 1;
3142 		rdev->config.si.max_backends_per_se = 2;
3143 		rdev->config.si.max_texture_channel_caches = 4;
3144 		rdev->config.si.max_gprs = 256;
3145 		rdev->config.si.max_gs_threads = 16;
3146 		rdev->config.si.max_hw_contexts = 8;
3147 
3148 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3149 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3150 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3151 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3152 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3153 		break;
3154 	case CHIP_HAINAN:
3155 		rdev->config.si.max_shader_engines = 1;
3156 		rdev->config.si.max_tile_pipes = 4;
3157 		rdev->config.si.max_cu_per_sh = 5;
3158 		rdev->config.si.max_sh_per_se = 1;
3159 		rdev->config.si.max_backends_per_se = 1;
3160 		rdev->config.si.max_texture_channel_caches = 2;
3161 		rdev->config.si.max_gprs = 256;
3162 		rdev->config.si.max_gs_threads = 16;
3163 		rdev->config.si.max_hw_contexts = 8;
3164 
3165 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3166 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3167 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3168 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3169 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3170 		break;
3171 	}
3172 
3173 	/* Initialize HDP */
3174 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3175 		WREG32((0x2c14 + j), 0x00000000);
3176 		WREG32((0x2c18 + j), 0x00000000);
3177 		WREG32((0x2c1c + j), 0x00000000);
3178 		WREG32((0x2c20 + j), 0x00000000);
3179 		WREG32((0x2c24 + j), 0x00000000);
3180 	}
3181 
3182 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3183 	WREG32(SRBM_INT_CNTL, 1);
3184 	WREG32(SRBM_INT_ACK, 1);
3185 
3186 	evergreen_fix_pci_max_read_req_size(rdev);
3187 
3188 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3189 
3190 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3191 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3192 
3193 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3194 	rdev->config.si.mem_max_burst_length_bytes = 256;
3195 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3196 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3197 	if (rdev->config.si.mem_row_size_in_kb > 4)
3198 		rdev->config.si.mem_row_size_in_kb = 4;
3199 	/* XXX use MC settings? */
3200 	rdev->config.si.shader_engine_tile_size = 32;
3201 	rdev->config.si.num_gpus = 1;
3202 	rdev->config.si.multi_gpu_tile_size = 64;
3203 
3204 	/* fix up row size */
3205 	gb_addr_config &= ~ROW_SIZE_MASK;
3206 	switch (rdev->config.si.mem_row_size_in_kb) {
3207 	case 1:
3208 	default:
3209 		gb_addr_config |= ROW_SIZE(0);
3210 		break;
3211 	case 2:
3212 		gb_addr_config |= ROW_SIZE(1);
3213 		break;
3214 	case 4:
3215 		gb_addr_config |= ROW_SIZE(2);
3216 		break;
3217 	}
3218 
3219 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3220 	 * not have bank info, so create a custom tiling dword.
3221 	 * bits 3:0   num_pipes
3222 	 * bits 7:4   num_banks
3223 	 * bits 11:8  group_size
3224 	 * bits 15:12 row_size
3225 	 */
3226 	rdev->config.si.tile_config = 0;
3227 	switch (rdev->config.si.num_tile_pipes) {
3228 	case 1:
3229 		rdev->config.si.tile_config |= (0 << 0);
3230 		break;
3231 	case 2:
3232 		rdev->config.si.tile_config |= (1 << 0);
3233 		break;
3234 	case 4:
3235 		rdev->config.si.tile_config |= (2 << 0);
3236 		break;
3237 	case 8:
3238 	default:
3239 		/* XXX what about 12? */
3240 		rdev->config.si.tile_config |= (3 << 0);
3241 		break;
3242 	}
3243 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3244 	case 0: /* four banks */
3245 		rdev->config.si.tile_config |= 0 << 4;
3246 		break;
3247 	case 1: /* eight banks */
3248 		rdev->config.si.tile_config |= 1 << 4;
3249 		break;
3250 	case 2: /* sixteen banks */
3251 	default:
3252 		rdev->config.si.tile_config |= 2 << 4;
3253 		break;
3254 	}
3255 	rdev->config.si.tile_config |=
3256 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3257 	rdev->config.si.tile_config |=
3258 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3259 
3260 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3261 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3262 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3263 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3264 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3265 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3266 	if (rdev->has_uvd) {
3267 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3268 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3269 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3270 	}
3271 
3272 	si_tiling_mode_table_init(rdev);
3273 
3274 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3275 		    rdev->config.si.max_sh_per_se,
3276 		    rdev->config.si.max_backends_per_se);
3277 
3278 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3279 		     rdev->config.si.max_sh_per_se,
3280 		     rdev->config.si.max_cu_per_sh);
3281 
3282 	rdev->config.si.active_cus = 0;
3283 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3284 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3285 			rdev->config.si.active_cus +=
3286 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3287 		}
3288 	}
3289 
3290 	/* set HW defaults for 3D engine */
3291 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3292 				     ROQ_IB2_START(0x2b)));
3293 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3294 
3295 	sx_debug_1 = RREG32(SX_DEBUG_1);
3296 	WREG32(SX_DEBUG_1, sx_debug_1);
3297 
3298 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3299 
3300 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3301 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3302 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3303 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3304 
3305 	WREG32(VGT_NUM_INSTANCES, 1);
3306 
3307 	WREG32(CP_PERFMON_CNTL, 0);
3308 
3309 	WREG32(SQ_CONFIG, 0);
3310 
3311 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3312 					  FORCE_EOV_MAX_REZ_CNT(255)));
3313 
3314 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3315 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3316 
3317 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3318 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3319 
3320 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3321 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3322 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3323 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3324 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3325 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3326 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3327 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3328 
3329 	tmp = RREG32(HDP_MISC_CNTL);
3330 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3331 	WREG32(HDP_MISC_CNTL, tmp);
3332 
3333 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3334 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3335 
3336 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3337 
3338 	udelay(50);
3339 }
3340 
3341 /*
3342  * GPU scratch registers helpers function.
3343  */
3344 static void si_scratch_init(struct radeon_device *rdev)
3345 {
3346 	int i;
3347 
3348 	rdev->scratch.num_reg = 7;
3349 	rdev->scratch.reg_base = SCRATCH_REG0;
3350 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3351 		rdev->scratch.free[i] = true;
3352 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3353 	}
3354 }
3355 
3356 void si_fence_ring_emit(struct radeon_device *rdev,
3357 			struct radeon_fence *fence)
3358 {
3359 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3360 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3361 
3362 	/* flush read cache over gart */
3363 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3364 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3365 	radeon_ring_write(ring, 0);
3366 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3367 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3368 			  PACKET3_TC_ACTION_ENA |
3369 			  PACKET3_SH_KCACHE_ACTION_ENA |
3370 			  PACKET3_SH_ICACHE_ACTION_ENA);
3371 	radeon_ring_write(ring, 0xFFFFFFFF);
3372 	radeon_ring_write(ring, 0);
3373 	radeon_ring_write(ring, 10); /* poll interval */
3374 	/* EVENT_WRITE_EOP - flush caches, send int */
3375 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3376 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3377 	radeon_ring_write(ring, lower_32_bits(addr));
3378 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3379 	radeon_ring_write(ring, fence->seq);
3380 	radeon_ring_write(ring, 0);
3381 }
3382 
3383 /*
3384  * IB stuff
3385  */
3386 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3387 {
3388 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3389 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3390 	u32 header;
3391 
3392 	if (ib->is_const_ib) {
3393 		/* set switch buffer packet before const IB */
3394 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3395 		radeon_ring_write(ring, 0);
3396 
3397 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3398 	} else {
3399 		u32 next_rptr;
3400 		if (ring->rptr_save_reg) {
3401 			next_rptr = ring->wptr + 3 + 4 + 8;
3402 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3403 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3404 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3405 			radeon_ring_write(ring, next_rptr);
3406 		} else if (rdev->wb.enabled) {
3407 			next_rptr = ring->wptr + 5 + 4 + 8;
3408 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3409 			radeon_ring_write(ring, (1 << 8));
3410 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3411 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3412 			radeon_ring_write(ring, next_rptr);
3413 		}
3414 
3415 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3416 	}
3417 
3418 	radeon_ring_write(ring, header);
3419 	radeon_ring_write(ring,
3420 #ifdef __BIG_ENDIAN
3421 			  (2 << 0) |
3422 #endif
3423 			  (ib->gpu_addr & 0xFFFFFFFC));
3424 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3425 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3426 
3427 	if (!ib->is_const_ib) {
3428 		/* flush read cache over gart for this vmid */
3429 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3430 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3431 		radeon_ring_write(ring, vm_id);
3432 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3433 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3434 				  PACKET3_TC_ACTION_ENA |
3435 				  PACKET3_SH_KCACHE_ACTION_ENA |
3436 				  PACKET3_SH_ICACHE_ACTION_ENA);
3437 		radeon_ring_write(ring, 0xFFFFFFFF);
3438 		radeon_ring_write(ring, 0);
3439 		radeon_ring_write(ring, 10); /* poll interval */
3440 	}
3441 }
3442 
3443 /*
3444  * CP.
3445  */
3446 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3447 {
3448 	if (enable)
3449 		WREG32(CP_ME_CNTL, 0);
3450 	else {
3451 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3452 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3453 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3454 		WREG32(SCRATCH_UMSK, 0);
3455 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3456 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3457 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3458 	}
3459 	udelay(50);
3460 }
3461 
3462 static int si_cp_load_microcode(struct radeon_device *rdev)
3463 {
3464 	int i;
3465 
3466 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3467 		return -EINVAL;
3468 
3469 	si_cp_enable(rdev, false);
3470 
3471 	if (rdev->new_fw) {
3472 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3473 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3474 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3475 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3476 		const struct gfx_firmware_header_v1_0 *me_hdr =
3477 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3478 		const __le32 *fw_data;
3479 		u32 fw_size;
3480 
3481 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3482 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3483 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3484 
3485 		/* PFP */
3486 		fw_data = (const __le32 *)
3487 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3488 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3489 		WREG32(CP_PFP_UCODE_ADDR, 0);
3490 		for (i = 0; i < fw_size; i++)
3491 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3492 		WREG32(CP_PFP_UCODE_ADDR, 0);
3493 
3494 		/* CE */
3495 		fw_data = (const __le32 *)
3496 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3497 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3498 		WREG32(CP_CE_UCODE_ADDR, 0);
3499 		for (i = 0; i < fw_size; i++)
3500 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3501 		WREG32(CP_CE_UCODE_ADDR, 0);
3502 
3503 		/* ME */
3504 		fw_data = (const __be32 *)
3505 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3506 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3507 		WREG32(CP_ME_RAM_WADDR, 0);
3508 		for (i = 0; i < fw_size; i++)
3509 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3510 		WREG32(CP_ME_RAM_WADDR, 0);
3511 	} else {
3512 		const __be32 *fw_data;
3513 
3514 		/* PFP */
3515 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3516 		WREG32(CP_PFP_UCODE_ADDR, 0);
3517 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3518 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3519 		WREG32(CP_PFP_UCODE_ADDR, 0);
3520 
3521 		/* CE */
3522 		fw_data = (const __be32 *)rdev->ce_fw->data;
3523 		WREG32(CP_CE_UCODE_ADDR, 0);
3524 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3525 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 
3528 		/* ME */
3529 		fw_data = (const __be32 *)rdev->me_fw->data;
3530 		WREG32(CP_ME_RAM_WADDR, 0);
3531 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3532 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3533 		WREG32(CP_ME_RAM_WADDR, 0);
3534 	}
3535 
3536 	WREG32(CP_PFP_UCODE_ADDR, 0);
3537 	WREG32(CP_CE_UCODE_ADDR, 0);
3538 	WREG32(CP_ME_RAM_WADDR, 0);
3539 	WREG32(CP_ME_RAM_RADDR, 0);
3540 	return 0;
3541 }
3542 
3543 static int si_cp_start(struct radeon_device *rdev)
3544 {
3545 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3546 	int r, i;
3547 
3548 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3549 	if (r) {
3550 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3551 		return r;
3552 	}
3553 	/* init the CP */
3554 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3555 	radeon_ring_write(ring, 0x1);
3556 	radeon_ring_write(ring, 0x0);
3557 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3558 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3559 	radeon_ring_write(ring, 0);
3560 	radeon_ring_write(ring, 0);
3561 
3562 	/* init the CE partitions */
3563 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3564 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3565 	radeon_ring_write(ring, 0xc000);
3566 	radeon_ring_write(ring, 0xe000);
3567 	radeon_ring_unlock_commit(rdev, ring, false);
3568 
3569 	si_cp_enable(rdev, true);
3570 
3571 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3572 	if (r) {
3573 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3574 		return r;
3575 	}
3576 
3577 	/* setup clear context state */
3578 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3579 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3580 
3581 	for (i = 0; i < si_default_size; i++)
3582 		radeon_ring_write(ring, si_default_state[i]);
3583 
3584 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3585 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3586 
3587 	/* set clear context state */
3588 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3589 	radeon_ring_write(ring, 0);
3590 
3591 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3592 	radeon_ring_write(ring, 0x00000316);
3593 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3594 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3595 
3596 	radeon_ring_unlock_commit(rdev, ring, false);
3597 
3598 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3599 		ring = &rdev->ring[i];
3600 		r = radeon_ring_lock(rdev, ring, 2);
3601 
3602 		/* clear the compute context state */
3603 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3604 		radeon_ring_write(ring, 0);
3605 
3606 		radeon_ring_unlock_commit(rdev, ring, false);
3607 	}
3608 
3609 	return 0;
3610 }
3611 
3612 static void si_cp_fini(struct radeon_device *rdev)
3613 {
3614 	struct radeon_ring *ring;
3615 	si_cp_enable(rdev, false);
3616 
3617 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3618 	radeon_ring_fini(rdev, ring);
3619 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3620 
3621 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3622 	radeon_ring_fini(rdev, ring);
3623 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3624 
3625 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3626 	radeon_ring_fini(rdev, ring);
3627 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3628 }
3629 
3630 static int si_cp_resume(struct radeon_device *rdev)
3631 {
3632 	struct radeon_ring *ring;
3633 	u32 tmp;
3634 	u32 rb_bufsz;
3635 	int r;
3636 
3637 	si_enable_gui_idle_interrupt(rdev, false);
3638 
3639 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3640 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3641 
3642 	/* Set the write pointer delay */
3643 	WREG32(CP_RB_WPTR_DELAY, 0);
3644 
3645 	WREG32(CP_DEBUG, 0);
3646 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3647 
3648 	/* ring 0 - compute and gfx */
3649 	/* Set ring buffer size */
3650 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3651 	rb_bufsz = order_base_2(ring->ring_size / 8);
3652 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3653 #ifdef __BIG_ENDIAN
3654 	tmp |= BUF_SWAP_32BIT;
3655 #endif
3656 	WREG32(CP_RB0_CNTL, tmp);
3657 
3658 	/* Initialize the ring buffer's read and write pointers */
3659 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3660 	ring->wptr = 0;
3661 	WREG32(CP_RB0_WPTR, ring->wptr);
3662 
3663 	/* set the wb address whether it's enabled or not */
3664 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3665 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3666 
3667 	if (rdev->wb.enabled)
3668 		WREG32(SCRATCH_UMSK, 0xff);
3669 	else {
3670 		tmp |= RB_NO_UPDATE;
3671 		WREG32(SCRATCH_UMSK, 0);
3672 	}
3673 
3674 	mdelay(1);
3675 	WREG32(CP_RB0_CNTL, tmp);
3676 
3677 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3678 
3679 	/* ring1  - compute only */
3680 	/* Set ring buffer size */
3681 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3682 	rb_bufsz = order_base_2(ring->ring_size / 8);
3683 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3684 #ifdef __BIG_ENDIAN
3685 	tmp |= BUF_SWAP_32BIT;
3686 #endif
3687 	WREG32(CP_RB1_CNTL, tmp);
3688 
3689 	/* Initialize the ring buffer's read and write pointers */
3690 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3691 	ring->wptr = 0;
3692 	WREG32(CP_RB1_WPTR, ring->wptr);
3693 
3694 	/* set the wb address whether it's enabled or not */
3695 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3696 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3697 
3698 	mdelay(1);
3699 	WREG32(CP_RB1_CNTL, tmp);
3700 
3701 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3702 
3703 	/* ring2 - compute only */
3704 	/* Set ring buffer size */
3705 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3706 	rb_bufsz = order_base_2(ring->ring_size / 8);
3707 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3708 #ifdef __BIG_ENDIAN
3709 	tmp |= BUF_SWAP_32BIT;
3710 #endif
3711 	WREG32(CP_RB2_CNTL, tmp);
3712 
3713 	/* Initialize the ring buffer's read and write pointers */
3714 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3715 	ring->wptr = 0;
3716 	WREG32(CP_RB2_WPTR, ring->wptr);
3717 
3718 	/* set the wb address whether it's enabled or not */
3719 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3720 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3721 
3722 	mdelay(1);
3723 	WREG32(CP_RB2_CNTL, tmp);
3724 
3725 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3726 
3727 	/* start the rings */
3728 	si_cp_start(rdev);
3729 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3730 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3731 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3732 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3733 	if (r) {
3734 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3735 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3736 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3737 		return r;
3738 	}
3739 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3740 	if (r) {
3741 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3742 	}
3743 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3744 	if (r) {
3745 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3746 	}
3747 
3748 	si_enable_gui_idle_interrupt(rdev, true);
3749 
3750 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3751 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3752 
3753 	return 0;
3754 }
3755 
3756 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3757 {
3758 	u32 reset_mask = 0;
3759 	u32 tmp;
3760 
3761 	/* GRBM_STATUS */
3762 	tmp = RREG32(GRBM_STATUS);
3763 	if (tmp & (PA_BUSY | SC_BUSY |
3764 		   BCI_BUSY | SX_BUSY |
3765 		   TA_BUSY | VGT_BUSY |
3766 		   DB_BUSY | CB_BUSY |
3767 		   GDS_BUSY | SPI_BUSY |
3768 		   IA_BUSY | IA_BUSY_NO_DMA))
3769 		reset_mask |= RADEON_RESET_GFX;
3770 
3771 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3772 		   CP_BUSY | CP_COHERENCY_BUSY))
3773 		reset_mask |= RADEON_RESET_CP;
3774 
3775 	if (tmp & GRBM_EE_BUSY)
3776 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3777 
3778 	/* GRBM_STATUS2 */
3779 	tmp = RREG32(GRBM_STATUS2);
3780 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3781 		reset_mask |= RADEON_RESET_RLC;
3782 
3783 	/* DMA_STATUS_REG 0 */
3784 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3785 	if (!(tmp & DMA_IDLE))
3786 		reset_mask |= RADEON_RESET_DMA;
3787 
3788 	/* DMA_STATUS_REG 1 */
3789 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3790 	if (!(tmp & DMA_IDLE))
3791 		reset_mask |= RADEON_RESET_DMA1;
3792 
3793 	/* SRBM_STATUS2 */
3794 	tmp = RREG32(SRBM_STATUS2);
3795 	if (tmp & DMA_BUSY)
3796 		reset_mask |= RADEON_RESET_DMA;
3797 
3798 	if (tmp & DMA1_BUSY)
3799 		reset_mask |= RADEON_RESET_DMA1;
3800 
3801 	/* SRBM_STATUS */
3802 	tmp = RREG32(SRBM_STATUS);
3803 
3804 	if (tmp & IH_BUSY)
3805 		reset_mask |= RADEON_RESET_IH;
3806 
3807 	if (tmp & SEM_BUSY)
3808 		reset_mask |= RADEON_RESET_SEM;
3809 
3810 	if (tmp & GRBM_RQ_PENDING)
3811 		reset_mask |= RADEON_RESET_GRBM;
3812 
3813 	if (tmp & VMC_BUSY)
3814 		reset_mask |= RADEON_RESET_VMC;
3815 
3816 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3817 		   MCC_BUSY | MCD_BUSY))
3818 		reset_mask |= RADEON_RESET_MC;
3819 
3820 	if (evergreen_is_display_hung(rdev))
3821 		reset_mask |= RADEON_RESET_DISPLAY;
3822 
3823 	/* VM_L2_STATUS */
3824 	tmp = RREG32(VM_L2_STATUS);
3825 	if (tmp & L2_BUSY)
3826 		reset_mask |= RADEON_RESET_VMC;
3827 
3828 	/* Skip MC reset as it's mostly likely not hung, just busy */
3829 	if (reset_mask & RADEON_RESET_MC) {
3830 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3831 		reset_mask &= ~RADEON_RESET_MC;
3832 	}
3833 
3834 	return reset_mask;
3835 }
3836 
3837 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3838 {
3839 	struct evergreen_mc_save save;
3840 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3841 	u32 tmp;
3842 
3843 	if (reset_mask == 0)
3844 		return;
3845 
3846 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3847 
3848 	evergreen_print_gpu_status_regs(rdev);
3849 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3850 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3851 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3852 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3853 
3854 	/* disable PG/CG */
3855 	si_fini_pg(rdev);
3856 	si_fini_cg(rdev);
3857 
3858 	/* stop the rlc */
3859 	si_rlc_stop(rdev);
3860 
3861 	/* Disable CP parsing/prefetching */
3862 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3863 
3864 	if (reset_mask & RADEON_RESET_DMA) {
3865 		/* dma0 */
3866 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3867 		tmp &= ~DMA_RB_ENABLE;
3868 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3869 	}
3870 	if (reset_mask & RADEON_RESET_DMA1) {
3871 		/* dma1 */
3872 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3873 		tmp &= ~DMA_RB_ENABLE;
3874 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3875 	}
3876 
3877 	udelay(50);
3878 
3879 	evergreen_mc_stop(rdev, &save);
3880 	if (evergreen_mc_wait_for_idle(rdev)) {
3881 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3882 	}
3883 
3884 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3885 		grbm_soft_reset = SOFT_RESET_CB |
3886 			SOFT_RESET_DB |
3887 			SOFT_RESET_GDS |
3888 			SOFT_RESET_PA |
3889 			SOFT_RESET_SC |
3890 			SOFT_RESET_BCI |
3891 			SOFT_RESET_SPI |
3892 			SOFT_RESET_SX |
3893 			SOFT_RESET_TC |
3894 			SOFT_RESET_TA |
3895 			SOFT_RESET_VGT |
3896 			SOFT_RESET_IA;
3897 	}
3898 
3899 	if (reset_mask & RADEON_RESET_CP) {
3900 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3901 
3902 		srbm_soft_reset |= SOFT_RESET_GRBM;
3903 	}
3904 
3905 	if (reset_mask & RADEON_RESET_DMA)
3906 		srbm_soft_reset |= SOFT_RESET_DMA;
3907 
3908 	if (reset_mask & RADEON_RESET_DMA1)
3909 		srbm_soft_reset |= SOFT_RESET_DMA1;
3910 
3911 	if (reset_mask & RADEON_RESET_DISPLAY)
3912 		srbm_soft_reset |= SOFT_RESET_DC;
3913 
3914 	if (reset_mask & RADEON_RESET_RLC)
3915 		grbm_soft_reset |= SOFT_RESET_RLC;
3916 
3917 	if (reset_mask & RADEON_RESET_SEM)
3918 		srbm_soft_reset |= SOFT_RESET_SEM;
3919 
3920 	if (reset_mask & RADEON_RESET_IH)
3921 		srbm_soft_reset |= SOFT_RESET_IH;
3922 
3923 	if (reset_mask & RADEON_RESET_GRBM)
3924 		srbm_soft_reset |= SOFT_RESET_GRBM;
3925 
3926 	if (reset_mask & RADEON_RESET_VMC)
3927 		srbm_soft_reset |= SOFT_RESET_VMC;
3928 
3929 	if (reset_mask & RADEON_RESET_MC)
3930 		srbm_soft_reset |= SOFT_RESET_MC;
3931 
3932 	if (grbm_soft_reset) {
3933 		tmp = RREG32(GRBM_SOFT_RESET);
3934 		tmp |= grbm_soft_reset;
3935 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3936 		WREG32(GRBM_SOFT_RESET, tmp);
3937 		tmp = RREG32(GRBM_SOFT_RESET);
3938 
3939 		udelay(50);
3940 
3941 		tmp &= ~grbm_soft_reset;
3942 		WREG32(GRBM_SOFT_RESET, tmp);
3943 		tmp = RREG32(GRBM_SOFT_RESET);
3944 	}
3945 
3946 	if (srbm_soft_reset) {
3947 		tmp = RREG32(SRBM_SOFT_RESET);
3948 		tmp |= srbm_soft_reset;
3949 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3950 		WREG32(SRBM_SOFT_RESET, tmp);
3951 		tmp = RREG32(SRBM_SOFT_RESET);
3952 
3953 		udelay(50);
3954 
3955 		tmp &= ~srbm_soft_reset;
3956 		WREG32(SRBM_SOFT_RESET, tmp);
3957 		tmp = RREG32(SRBM_SOFT_RESET);
3958 	}
3959 
3960 	/* Wait a little for things to settle down */
3961 	udelay(50);
3962 
3963 	evergreen_mc_resume(rdev, &save);
3964 	udelay(50);
3965 
3966 	evergreen_print_gpu_status_regs(rdev);
3967 }
3968 
3969 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3970 {
3971 	u32 tmp, i;
3972 
3973 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3974 	tmp |= SPLL_BYPASS_EN;
3975 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3976 
3977 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3978 	tmp |= SPLL_CTLREQ_CHG;
3979 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3980 
3981 	for (i = 0; i < rdev->usec_timeout; i++) {
3982 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3983 			break;
3984 		udelay(1);
3985 	}
3986 
3987 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3988 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3989 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3990 
3991 	tmp = RREG32(MPLL_CNTL_MODE);
3992 	tmp &= ~MPLL_MCLK_SEL;
3993 	WREG32(MPLL_CNTL_MODE, tmp);
3994 }
3995 
3996 static void si_spll_powerdown(struct radeon_device *rdev)
3997 {
3998 	u32 tmp;
3999 
4000 	tmp = RREG32(SPLL_CNTL_MODE);
4001 	tmp |= SPLL_SW_DIR_CONTROL;
4002 	WREG32(SPLL_CNTL_MODE, tmp);
4003 
4004 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4005 	tmp |= SPLL_RESET;
4006 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4007 
4008 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4009 	tmp |= SPLL_SLEEP;
4010 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4011 
4012 	tmp = RREG32(SPLL_CNTL_MODE);
4013 	tmp &= ~SPLL_SW_DIR_CONTROL;
4014 	WREG32(SPLL_CNTL_MODE, tmp);
4015 }
4016 
4017 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4018 {
4019 	struct evergreen_mc_save save;
4020 	u32 tmp, i;
4021 
4022 	dev_info(rdev->dev, "GPU pci config reset\n");
4023 
4024 	/* disable dpm? */
4025 
4026 	/* disable cg/pg */
4027 	si_fini_pg(rdev);
4028 	si_fini_cg(rdev);
4029 
4030 	/* Disable CP parsing/prefetching */
4031 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4032 	/* dma0 */
4033 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4034 	tmp &= ~DMA_RB_ENABLE;
4035 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4036 	/* dma1 */
4037 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4038 	tmp &= ~DMA_RB_ENABLE;
4039 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4040 	/* XXX other engines? */
4041 
4042 	/* halt the rlc, disable cp internal ints */
4043 	si_rlc_stop(rdev);
4044 
4045 	udelay(50);
4046 
4047 	/* disable mem access */
4048 	evergreen_mc_stop(rdev, &save);
4049 	if (evergreen_mc_wait_for_idle(rdev)) {
4050 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4051 	}
4052 
4053 	/* set mclk/sclk to bypass */
4054 	si_set_clk_bypass_mode(rdev);
4055 	/* powerdown spll */
4056 	si_spll_powerdown(rdev);
4057 	/* disable BM */
4058 	pci_clear_master(rdev->pdev);
4059 	/* reset */
4060 	radeon_pci_config_reset(rdev);
4061 	/* wait for asic to come out of reset */
4062 	for (i = 0; i < rdev->usec_timeout; i++) {
4063 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4064 			break;
4065 		udelay(1);
4066 	}
4067 }
4068 
4069 int si_asic_reset(struct radeon_device *rdev, bool hard)
4070 {
4071 	u32 reset_mask;
4072 
4073 	if (hard) {
4074 		si_gpu_pci_config_reset(rdev);
4075 		return 0;
4076 	}
4077 
4078 	reset_mask = si_gpu_check_soft_reset(rdev);
4079 
4080 	if (reset_mask)
4081 		r600_set_bios_scratch_engine_hung(rdev, true);
4082 
4083 	/* try soft reset */
4084 	si_gpu_soft_reset(rdev, reset_mask);
4085 
4086 	reset_mask = si_gpu_check_soft_reset(rdev);
4087 
4088 	/* try pci config reset */
4089 	if (reset_mask && radeon_hard_reset)
4090 		si_gpu_pci_config_reset(rdev);
4091 
4092 	reset_mask = si_gpu_check_soft_reset(rdev);
4093 
4094 	if (!reset_mask)
4095 		r600_set_bios_scratch_engine_hung(rdev, false);
4096 
4097 	return 0;
4098 }
4099 
4100 /**
4101  * si_gfx_is_lockup - Check if the GFX engine is locked up
4102  *
4103  * @rdev: radeon_device pointer
4104  * @ring: radeon_ring structure holding ring information
4105  *
4106  * Check if the GFX engine is locked up.
4107  * Returns true if the engine appears to be locked up, false if not.
4108  */
4109 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4110 {
4111 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4112 
4113 	if (!(reset_mask & (RADEON_RESET_GFX |
4114 			    RADEON_RESET_COMPUTE |
4115 			    RADEON_RESET_CP))) {
4116 		radeon_ring_lockup_update(rdev, ring);
4117 		return false;
4118 	}
4119 	return radeon_ring_test_lockup(rdev, ring);
4120 }
4121 
4122 /* MC */
4123 static void si_mc_program(struct radeon_device *rdev)
4124 {
4125 	struct evergreen_mc_save save;
4126 	u32 tmp;
4127 	int i, j;
4128 
4129 	/* Initialize HDP */
4130 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4131 		WREG32((0x2c14 + j), 0x00000000);
4132 		WREG32((0x2c18 + j), 0x00000000);
4133 		WREG32((0x2c1c + j), 0x00000000);
4134 		WREG32((0x2c20 + j), 0x00000000);
4135 		WREG32((0x2c24 + j), 0x00000000);
4136 	}
4137 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4138 
4139 	evergreen_mc_stop(rdev, &save);
4140 	if (radeon_mc_wait_for_idle(rdev)) {
4141 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4142 	}
4143 	if (!ASIC_IS_NODCE(rdev))
4144 		/* Lockout access through VGA aperture*/
4145 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4146 	/* Update configuration */
4147 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4148 	       rdev->mc.vram_start >> 12);
4149 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4150 	       rdev->mc.vram_end >> 12);
4151 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4152 	       rdev->vram_scratch.gpu_addr >> 12);
4153 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4154 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4155 	WREG32(MC_VM_FB_LOCATION, tmp);
4156 	/* XXX double check these! */
4157 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4158 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4159 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4160 	WREG32(MC_VM_AGP_BASE, 0);
4161 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4162 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4163 	if (radeon_mc_wait_for_idle(rdev)) {
4164 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4165 	}
4166 	evergreen_mc_resume(rdev, &save);
4167 	if (!ASIC_IS_NODCE(rdev)) {
4168 		/* we need to own VRAM, so turn off the VGA renderer here
4169 		 * to stop it overwriting our objects */
4170 		rv515_vga_render_disable(rdev);
4171 	}
4172 }
4173 
4174 void si_vram_gtt_location(struct radeon_device *rdev,
4175 			  struct radeon_mc *mc)
4176 {
4177 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4178 		/* leave room for at least 1024M GTT */
4179 		dev_warn(rdev->dev, "limiting VRAM\n");
4180 		mc->real_vram_size = 0xFFC0000000ULL;
4181 		mc->mc_vram_size = 0xFFC0000000ULL;
4182 	}
4183 	radeon_vram_location(rdev, &rdev->mc, 0);
4184 	rdev->mc.gtt_base_align = 0;
4185 	radeon_gtt_location(rdev, mc);
4186 }
4187 
4188 static int si_mc_init(struct radeon_device *rdev)
4189 {
4190 	u32 tmp;
4191 	int chansize, numchan;
4192 
4193 	/* Get VRAM informations */
4194 	rdev->mc.vram_is_ddr = true;
4195 	tmp = RREG32(MC_ARB_RAMCFG);
4196 	if (tmp & CHANSIZE_OVERRIDE) {
4197 		chansize = 16;
4198 	} else if (tmp & CHANSIZE_MASK) {
4199 		chansize = 64;
4200 	} else {
4201 		chansize = 32;
4202 	}
4203 	tmp = RREG32(MC_SHARED_CHMAP);
4204 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4205 	case 0:
4206 	default:
4207 		numchan = 1;
4208 		break;
4209 	case 1:
4210 		numchan = 2;
4211 		break;
4212 	case 2:
4213 		numchan = 4;
4214 		break;
4215 	case 3:
4216 		numchan = 8;
4217 		break;
4218 	case 4:
4219 		numchan = 3;
4220 		break;
4221 	case 5:
4222 		numchan = 6;
4223 		break;
4224 	case 6:
4225 		numchan = 10;
4226 		break;
4227 	case 7:
4228 		numchan = 12;
4229 		break;
4230 	case 8:
4231 		numchan = 16;
4232 		break;
4233 	}
4234 	rdev->mc.vram_width = numchan * chansize;
4235 	/* Could aper size report 0 ? */
4236 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4237 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4238 	/* size in MB on si */
4239 	tmp = RREG32(CONFIG_MEMSIZE);
4240 	/* some boards may have garbage in the upper 16 bits */
4241 	if (tmp & 0xffff0000) {
4242 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4243 		if (tmp & 0xffff)
4244 			tmp &= 0xffff;
4245 	}
4246 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4247 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4248 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4249 	si_vram_gtt_location(rdev, &rdev->mc);
4250 	radeon_update_bandwidth_info(rdev);
4251 
4252 	return 0;
4253 }
4254 
4255 /*
4256  * GART
4257  */
4258 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4259 {
4260 	/* flush hdp cache */
4261 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4262 
4263 	/* bits 0-15 are the VM contexts0-15 */
4264 	WREG32(VM_INVALIDATE_REQUEST, 1);
4265 }
4266 
4267 static int si_pcie_gart_enable(struct radeon_device *rdev)
4268 {
4269 	int r, i;
4270 
4271 	if (rdev->gart.robj == NULL) {
4272 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4273 		return -EINVAL;
4274 	}
4275 	r = radeon_gart_table_vram_pin(rdev);
4276 	if (r)
4277 		return r;
4278 	/* Setup TLB control */
4279 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4280 	       (0xA << 7) |
4281 	       ENABLE_L1_TLB |
4282 	       ENABLE_L1_FRAGMENT_PROCESSING |
4283 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4284 	       ENABLE_ADVANCED_DRIVER_MODEL |
4285 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4286 	/* Setup L2 cache */
4287 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4288 	       ENABLE_L2_FRAGMENT_PROCESSING |
4289 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4290 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4291 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4292 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4293 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4294 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4295 	       BANK_SELECT(4) |
4296 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4297 	/* setup context0 */
4298 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4299 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4300 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4301 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4302 			(u32)(rdev->dummy_page.addr >> 12));
4303 	WREG32(VM_CONTEXT0_CNTL2, 0);
4304 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4305 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4306 
4307 	WREG32(0x15D4, 0);
4308 	WREG32(0x15D8, 0);
4309 	WREG32(0x15DC, 0);
4310 
4311 	/* empty context1-15 */
4312 	/* set vm size, must be a multiple of 4 */
4313 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4314 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4315 	/* Assign the pt base to something valid for now; the pts used for
4316 	 * the VMs are determined by the application and setup and assigned
4317 	 * on the fly in the vm part of radeon_gart.c
4318 	 */
4319 	for (i = 1; i < 16; i++) {
4320 		if (i < 8)
4321 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4322 			       rdev->vm_manager.saved_table_addr[i]);
4323 		else
4324 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4325 			       rdev->vm_manager.saved_table_addr[i]);
4326 	}
4327 
4328 	/* enable context1-15 */
4329 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4330 	       (u32)(rdev->dummy_page.addr >> 12));
4331 	WREG32(VM_CONTEXT1_CNTL2, 4);
4332 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4333 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4334 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4335 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4336 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4337 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4338 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4339 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4340 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4341 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4342 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4343 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4344 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4345 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4346 
4347 	si_pcie_gart_tlb_flush(rdev);
4348 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4349 		 (unsigned)(rdev->mc.gtt_size >> 20),
4350 		 (unsigned long long)rdev->gart.table_addr);
4351 	rdev->gart.ready = true;
4352 	return 0;
4353 }
4354 
4355 static void si_pcie_gart_disable(struct radeon_device *rdev)
4356 {
4357 	unsigned i;
4358 
4359 	for (i = 1; i < 16; ++i) {
4360 		uint32_t reg;
4361 		if (i < 8)
4362 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4363 		else
4364 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4365 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4366 	}
4367 
4368 	/* Disable all tables */
4369 	WREG32(VM_CONTEXT0_CNTL, 0);
4370 	WREG32(VM_CONTEXT1_CNTL, 0);
4371 	/* Setup TLB control */
4372 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4373 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4374 	/* Setup L2 cache */
4375 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4376 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4377 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4378 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4379 	WREG32(VM_L2_CNTL2, 0);
4380 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4381 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4382 	radeon_gart_table_vram_unpin(rdev);
4383 }
4384 
4385 static void si_pcie_gart_fini(struct radeon_device *rdev)
4386 {
4387 	si_pcie_gart_disable(rdev);
4388 	radeon_gart_table_vram_free(rdev);
4389 	radeon_gart_fini(rdev);
4390 }
4391 
4392 /* vm parser */
4393 static bool si_vm_reg_valid(u32 reg)
4394 {
4395 	/* context regs are fine */
4396 	if (reg >= 0x28000)
4397 		return true;
4398 
4399 	/* shader regs are also fine */
4400 	if (reg >= 0xB000 && reg < 0xC000)
4401 		return true;
4402 
4403 	/* check config regs */
4404 	switch (reg) {
4405 	case GRBM_GFX_INDEX:
4406 	case CP_STRMOUT_CNTL:
4407 	case VGT_VTX_VECT_EJECT_REG:
4408 	case VGT_CACHE_INVALIDATION:
4409 	case VGT_ESGS_RING_SIZE:
4410 	case VGT_GSVS_RING_SIZE:
4411 	case VGT_GS_VERTEX_REUSE:
4412 	case VGT_PRIMITIVE_TYPE:
4413 	case VGT_INDEX_TYPE:
4414 	case VGT_NUM_INDICES:
4415 	case VGT_NUM_INSTANCES:
4416 	case VGT_TF_RING_SIZE:
4417 	case VGT_HS_OFFCHIP_PARAM:
4418 	case VGT_TF_MEMORY_BASE:
4419 	case PA_CL_ENHANCE:
4420 	case PA_SU_LINE_STIPPLE_VALUE:
4421 	case PA_SC_LINE_STIPPLE_STATE:
4422 	case PA_SC_ENHANCE:
4423 	case SQC_CACHES:
4424 	case SPI_STATIC_THREAD_MGMT_1:
4425 	case SPI_STATIC_THREAD_MGMT_2:
4426 	case SPI_STATIC_THREAD_MGMT_3:
4427 	case SPI_PS_MAX_WAVE_ID:
4428 	case SPI_CONFIG_CNTL:
4429 	case SPI_CONFIG_CNTL_1:
4430 	case TA_CNTL_AUX:
4431 	case TA_CS_BC_BASE_ADDR:
4432 		return true;
4433 	default:
4434 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4435 		return false;
4436 	}
4437 }
4438 
4439 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4440 				  u32 *ib, struct radeon_cs_packet *pkt)
4441 {
4442 	switch (pkt->opcode) {
4443 	case PACKET3_NOP:
4444 	case PACKET3_SET_BASE:
4445 	case PACKET3_SET_CE_DE_COUNTERS:
4446 	case PACKET3_LOAD_CONST_RAM:
4447 	case PACKET3_WRITE_CONST_RAM:
4448 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4449 	case PACKET3_DUMP_CONST_RAM:
4450 	case PACKET3_INCREMENT_CE_COUNTER:
4451 	case PACKET3_WAIT_ON_DE_COUNTER:
4452 	case PACKET3_CE_WRITE:
4453 		break;
4454 	default:
4455 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4456 		return -EINVAL;
4457 	}
4458 	return 0;
4459 }
4460 
4461 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4462 {
4463 	u32 start_reg, reg, i;
4464 	u32 command = ib[idx + 4];
4465 	u32 info = ib[idx + 1];
4466 	u32 idx_value = ib[idx];
4467 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4468 		/* src address space is register */
4469 		if (((info & 0x60000000) >> 29) == 0) {
4470 			start_reg = idx_value << 2;
4471 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4472 				reg = start_reg;
4473 				if (!si_vm_reg_valid(reg)) {
4474 					DRM_ERROR("CP DMA Bad SRC register\n");
4475 					return -EINVAL;
4476 				}
4477 			} else {
4478 				for (i = 0; i < (command & 0x1fffff); i++) {
4479 					reg = start_reg + (4 * i);
4480 					if (!si_vm_reg_valid(reg)) {
4481 						DRM_ERROR("CP DMA Bad SRC register\n");
4482 						return -EINVAL;
4483 					}
4484 				}
4485 			}
4486 		}
4487 	}
4488 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4489 		/* dst address space is register */
4490 		if (((info & 0x00300000) >> 20) == 0) {
4491 			start_reg = ib[idx + 2];
4492 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4493 				reg = start_reg;
4494 				if (!si_vm_reg_valid(reg)) {
4495 					DRM_ERROR("CP DMA Bad DST register\n");
4496 					return -EINVAL;
4497 				}
4498 			} else {
4499 				for (i = 0; i < (command & 0x1fffff); i++) {
4500 					reg = start_reg + (4 * i);
4501 				if (!si_vm_reg_valid(reg)) {
4502 						DRM_ERROR("CP DMA Bad DST register\n");
4503 						return -EINVAL;
4504 					}
4505 				}
4506 			}
4507 		}
4508 	}
4509 	return 0;
4510 }
4511 
4512 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4513 				   u32 *ib, struct radeon_cs_packet *pkt)
4514 {
4515 	int r;
4516 	u32 idx = pkt->idx + 1;
4517 	u32 idx_value = ib[idx];
4518 	u32 start_reg, end_reg, reg, i;
4519 
4520 	switch (pkt->opcode) {
4521 	case PACKET3_NOP:
4522 	case PACKET3_SET_BASE:
4523 	case PACKET3_CLEAR_STATE:
4524 	case PACKET3_INDEX_BUFFER_SIZE:
4525 	case PACKET3_DISPATCH_DIRECT:
4526 	case PACKET3_DISPATCH_INDIRECT:
4527 	case PACKET3_ALLOC_GDS:
4528 	case PACKET3_WRITE_GDS_RAM:
4529 	case PACKET3_ATOMIC_GDS:
4530 	case PACKET3_ATOMIC:
4531 	case PACKET3_OCCLUSION_QUERY:
4532 	case PACKET3_SET_PREDICATION:
4533 	case PACKET3_COND_EXEC:
4534 	case PACKET3_PRED_EXEC:
4535 	case PACKET3_DRAW_INDIRECT:
4536 	case PACKET3_DRAW_INDEX_INDIRECT:
4537 	case PACKET3_INDEX_BASE:
4538 	case PACKET3_DRAW_INDEX_2:
4539 	case PACKET3_CONTEXT_CONTROL:
4540 	case PACKET3_INDEX_TYPE:
4541 	case PACKET3_DRAW_INDIRECT_MULTI:
4542 	case PACKET3_DRAW_INDEX_AUTO:
4543 	case PACKET3_DRAW_INDEX_IMMD:
4544 	case PACKET3_NUM_INSTANCES:
4545 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4546 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4547 	case PACKET3_DRAW_INDEX_OFFSET_2:
4548 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4549 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4550 	case PACKET3_MPEG_INDEX:
4551 	case PACKET3_WAIT_REG_MEM:
4552 	case PACKET3_MEM_WRITE:
4553 	case PACKET3_PFP_SYNC_ME:
4554 	case PACKET3_SURFACE_SYNC:
4555 	case PACKET3_EVENT_WRITE:
4556 	case PACKET3_EVENT_WRITE_EOP:
4557 	case PACKET3_EVENT_WRITE_EOS:
4558 	case PACKET3_SET_CONTEXT_REG:
4559 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4560 	case PACKET3_SET_SH_REG:
4561 	case PACKET3_SET_SH_REG_OFFSET:
4562 	case PACKET3_INCREMENT_DE_COUNTER:
4563 	case PACKET3_WAIT_ON_CE_COUNTER:
4564 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4565 	case PACKET3_ME_WRITE:
4566 		break;
4567 	case PACKET3_COPY_DATA:
4568 		if ((idx_value & 0xf00) == 0) {
4569 			reg = ib[idx + 3] * 4;
4570 			if (!si_vm_reg_valid(reg))
4571 				return -EINVAL;
4572 		}
4573 		break;
4574 	case PACKET3_WRITE_DATA:
4575 		if ((idx_value & 0xf00) == 0) {
4576 			start_reg = ib[idx + 1] * 4;
4577 			if (idx_value & 0x10000) {
4578 				if (!si_vm_reg_valid(start_reg))
4579 					return -EINVAL;
4580 			} else {
4581 				for (i = 0; i < (pkt->count - 2); i++) {
4582 					reg = start_reg + (4 * i);
4583 					if (!si_vm_reg_valid(reg))
4584 						return -EINVAL;
4585 				}
4586 			}
4587 		}
4588 		break;
4589 	case PACKET3_COND_WRITE:
4590 		if (idx_value & 0x100) {
4591 			reg = ib[idx + 5] * 4;
4592 			if (!si_vm_reg_valid(reg))
4593 				return -EINVAL;
4594 		}
4595 		break;
4596 	case PACKET3_COPY_DW:
4597 		if (idx_value & 0x2) {
4598 			reg = ib[idx + 3] * 4;
4599 			if (!si_vm_reg_valid(reg))
4600 				return -EINVAL;
4601 		}
4602 		break;
4603 	case PACKET3_SET_CONFIG_REG:
4604 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4605 		end_reg = 4 * pkt->count + start_reg - 4;
4606 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4607 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4608 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4609 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4610 			return -EINVAL;
4611 		}
4612 		for (i = 0; i < pkt->count; i++) {
4613 			reg = start_reg + (4 * i);
4614 			if (!si_vm_reg_valid(reg))
4615 				return -EINVAL;
4616 		}
4617 		break;
4618 	case PACKET3_CP_DMA:
4619 		r = si_vm_packet3_cp_dma_check(ib, idx);
4620 		if (r)
4621 			return r;
4622 		break;
4623 	default:
4624 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4625 		return -EINVAL;
4626 	}
4627 	return 0;
4628 }
4629 
4630 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4631 				       u32 *ib, struct radeon_cs_packet *pkt)
4632 {
4633 	int r;
4634 	u32 idx = pkt->idx + 1;
4635 	u32 idx_value = ib[idx];
4636 	u32 start_reg, reg, i;
4637 
4638 	switch (pkt->opcode) {
4639 	case PACKET3_NOP:
4640 	case PACKET3_SET_BASE:
4641 	case PACKET3_CLEAR_STATE:
4642 	case PACKET3_DISPATCH_DIRECT:
4643 	case PACKET3_DISPATCH_INDIRECT:
4644 	case PACKET3_ALLOC_GDS:
4645 	case PACKET3_WRITE_GDS_RAM:
4646 	case PACKET3_ATOMIC_GDS:
4647 	case PACKET3_ATOMIC:
4648 	case PACKET3_OCCLUSION_QUERY:
4649 	case PACKET3_SET_PREDICATION:
4650 	case PACKET3_COND_EXEC:
4651 	case PACKET3_PRED_EXEC:
4652 	case PACKET3_CONTEXT_CONTROL:
4653 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4654 	case PACKET3_WAIT_REG_MEM:
4655 	case PACKET3_MEM_WRITE:
4656 	case PACKET3_PFP_SYNC_ME:
4657 	case PACKET3_SURFACE_SYNC:
4658 	case PACKET3_EVENT_WRITE:
4659 	case PACKET3_EVENT_WRITE_EOP:
4660 	case PACKET3_EVENT_WRITE_EOS:
4661 	case PACKET3_SET_CONTEXT_REG:
4662 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4663 	case PACKET3_SET_SH_REG:
4664 	case PACKET3_SET_SH_REG_OFFSET:
4665 	case PACKET3_INCREMENT_DE_COUNTER:
4666 	case PACKET3_WAIT_ON_CE_COUNTER:
4667 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4668 	case PACKET3_ME_WRITE:
4669 		break;
4670 	case PACKET3_COPY_DATA:
4671 		if ((idx_value & 0xf00) == 0) {
4672 			reg = ib[idx + 3] * 4;
4673 			if (!si_vm_reg_valid(reg))
4674 				return -EINVAL;
4675 		}
4676 		break;
4677 	case PACKET3_WRITE_DATA:
4678 		if ((idx_value & 0xf00) == 0) {
4679 			start_reg = ib[idx + 1] * 4;
4680 			if (idx_value & 0x10000) {
4681 				if (!si_vm_reg_valid(start_reg))
4682 					return -EINVAL;
4683 			} else {
4684 				for (i = 0; i < (pkt->count - 2); i++) {
4685 					reg = start_reg + (4 * i);
4686 					if (!si_vm_reg_valid(reg))
4687 						return -EINVAL;
4688 				}
4689 			}
4690 		}
4691 		break;
4692 	case PACKET3_COND_WRITE:
4693 		if (idx_value & 0x100) {
4694 			reg = ib[idx + 5] * 4;
4695 			if (!si_vm_reg_valid(reg))
4696 				return -EINVAL;
4697 		}
4698 		break;
4699 	case PACKET3_COPY_DW:
4700 		if (idx_value & 0x2) {
4701 			reg = ib[idx + 3] * 4;
4702 			if (!si_vm_reg_valid(reg))
4703 				return -EINVAL;
4704 		}
4705 		break;
4706 	case PACKET3_CP_DMA:
4707 		r = si_vm_packet3_cp_dma_check(ib, idx);
4708 		if (r)
4709 			return r;
4710 		break;
4711 	default:
4712 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4713 		return -EINVAL;
4714 	}
4715 	return 0;
4716 }
4717 
4718 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4719 {
4720 	int ret = 0;
4721 	u32 idx = 0, i;
4722 	struct radeon_cs_packet pkt;
4723 
4724 	do {
4725 		pkt.idx = idx;
4726 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4727 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4728 		pkt.one_reg_wr = 0;
4729 		switch (pkt.type) {
4730 		case RADEON_PACKET_TYPE0:
4731 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4732 			ret = -EINVAL;
4733 			break;
4734 		case RADEON_PACKET_TYPE2:
4735 			idx += 1;
4736 			break;
4737 		case RADEON_PACKET_TYPE3:
4738 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4739 			if (ib->is_const_ib)
4740 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4741 			else {
4742 				switch (ib->ring) {
4743 				case RADEON_RING_TYPE_GFX_INDEX:
4744 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4745 					break;
4746 				case CAYMAN_RING_TYPE_CP1_INDEX:
4747 				case CAYMAN_RING_TYPE_CP2_INDEX:
4748 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4749 					break;
4750 				default:
4751 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4752 					ret = -EINVAL;
4753 					break;
4754 				}
4755 			}
4756 			idx += pkt.count + 2;
4757 			break;
4758 		default:
4759 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4760 			ret = -EINVAL;
4761 			break;
4762 		}
4763 		if (ret) {
4764 			for (i = 0; i < ib->length_dw; i++) {
4765 				if (i == idx)
4766 					printk("\t0x%08x <---\n", ib->ptr[i]);
4767 				else
4768 					printk("\t0x%08x\n", ib->ptr[i]);
4769 			}
4770 			break;
4771 		}
4772 	} while (idx < ib->length_dw);
4773 
4774 	return ret;
4775 }
4776 
4777 /*
4778  * vm
4779  */
4780 int si_vm_init(struct radeon_device *rdev)
4781 {
4782 	/* number of VMs */
4783 	rdev->vm_manager.nvm = 16;
4784 	/* base offset of vram pages */
4785 	rdev->vm_manager.vram_base_offset = 0;
4786 
4787 	return 0;
4788 }
4789 
4790 void si_vm_fini(struct radeon_device *rdev)
4791 {
4792 }
4793 
4794 /**
4795  * si_vm_decode_fault - print human readable fault info
4796  *
4797  * @rdev: radeon_device pointer
4798  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4799  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4800  *
4801  * Print human readable fault information (SI).
4802  */
4803 static void si_vm_decode_fault(struct radeon_device *rdev,
4804 			       u32 status, u32 addr)
4805 {
4806 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4807 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4808 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4809 	char *block;
4810 
4811 	if (rdev->family == CHIP_TAHITI) {
4812 		switch (mc_id) {
4813 		case 160:
4814 		case 144:
4815 		case 96:
4816 		case 80:
4817 		case 224:
4818 		case 208:
4819 		case 32:
4820 		case 16:
4821 			block = "CB";
4822 			break;
4823 		case 161:
4824 		case 145:
4825 		case 97:
4826 		case 81:
4827 		case 225:
4828 		case 209:
4829 		case 33:
4830 		case 17:
4831 			block = "CB_FMASK";
4832 			break;
4833 		case 162:
4834 		case 146:
4835 		case 98:
4836 		case 82:
4837 		case 226:
4838 		case 210:
4839 		case 34:
4840 		case 18:
4841 			block = "CB_CMASK";
4842 			break;
4843 		case 163:
4844 		case 147:
4845 		case 99:
4846 		case 83:
4847 		case 227:
4848 		case 211:
4849 		case 35:
4850 		case 19:
4851 			block = "CB_IMMED";
4852 			break;
4853 		case 164:
4854 		case 148:
4855 		case 100:
4856 		case 84:
4857 		case 228:
4858 		case 212:
4859 		case 36:
4860 		case 20:
4861 			block = "DB";
4862 			break;
4863 		case 165:
4864 		case 149:
4865 		case 101:
4866 		case 85:
4867 		case 229:
4868 		case 213:
4869 		case 37:
4870 		case 21:
4871 			block = "DB_HTILE";
4872 			break;
4873 		case 167:
4874 		case 151:
4875 		case 103:
4876 		case 87:
4877 		case 231:
4878 		case 215:
4879 		case 39:
4880 		case 23:
4881 			block = "DB_STEN";
4882 			break;
4883 		case 72:
4884 		case 68:
4885 		case 64:
4886 		case 8:
4887 		case 4:
4888 		case 0:
4889 		case 136:
4890 		case 132:
4891 		case 128:
4892 		case 200:
4893 		case 196:
4894 		case 192:
4895 			block = "TC";
4896 			break;
4897 		case 112:
4898 		case 48:
4899 			block = "CP";
4900 			break;
4901 		case 49:
4902 		case 177:
4903 		case 50:
4904 		case 178:
4905 			block = "SH";
4906 			break;
4907 		case 53:
4908 		case 190:
4909 			block = "VGT";
4910 			break;
4911 		case 117:
4912 			block = "IH";
4913 			break;
4914 		case 51:
4915 		case 115:
4916 			block = "RLC";
4917 			break;
4918 		case 119:
4919 		case 183:
4920 			block = "DMA0";
4921 			break;
4922 		case 61:
4923 			block = "DMA1";
4924 			break;
4925 		case 248:
4926 		case 120:
4927 			block = "HDP";
4928 			break;
4929 		default:
4930 			block = "unknown";
4931 			break;
4932 		}
4933 	} else {
4934 		switch (mc_id) {
4935 		case 32:
4936 		case 16:
4937 		case 96:
4938 		case 80:
4939 		case 160:
4940 		case 144:
4941 		case 224:
4942 		case 208:
4943 			block = "CB";
4944 			break;
4945 		case 33:
4946 		case 17:
4947 		case 97:
4948 		case 81:
4949 		case 161:
4950 		case 145:
4951 		case 225:
4952 		case 209:
4953 			block = "CB_FMASK";
4954 			break;
4955 		case 34:
4956 		case 18:
4957 		case 98:
4958 		case 82:
4959 		case 162:
4960 		case 146:
4961 		case 226:
4962 		case 210:
4963 			block = "CB_CMASK";
4964 			break;
4965 		case 35:
4966 		case 19:
4967 		case 99:
4968 		case 83:
4969 		case 163:
4970 		case 147:
4971 		case 227:
4972 		case 211:
4973 			block = "CB_IMMED";
4974 			break;
4975 		case 36:
4976 		case 20:
4977 		case 100:
4978 		case 84:
4979 		case 164:
4980 		case 148:
4981 		case 228:
4982 		case 212:
4983 			block = "DB";
4984 			break;
4985 		case 37:
4986 		case 21:
4987 		case 101:
4988 		case 85:
4989 		case 165:
4990 		case 149:
4991 		case 229:
4992 		case 213:
4993 			block = "DB_HTILE";
4994 			break;
4995 		case 39:
4996 		case 23:
4997 		case 103:
4998 		case 87:
4999 		case 167:
5000 		case 151:
5001 		case 231:
5002 		case 215:
5003 			block = "DB_STEN";
5004 			break;
5005 		case 72:
5006 		case 68:
5007 		case 8:
5008 		case 4:
5009 		case 136:
5010 		case 132:
5011 		case 200:
5012 		case 196:
5013 			block = "TC";
5014 			break;
5015 		case 112:
5016 		case 48:
5017 			block = "CP";
5018 			break;
5019 		case 49:
5020 		case 177:
5021 		case 50:
5022 		case 178:
5023 			block = "SH";
5024 			break;
5025 		case 53:
5026 			block = "VGT";
5027 			break;
5028 		case 117:
5029 			block = "IH";
5030 			break;
5031 		case 51:
5032 		case 115:
5033 			block = "RLC";
5034 			break;
5035 		case 119:
5036 		case 183:
5037 			block = "DMA0";
5038 			break;
5039 		case 61:
5040 			block = "DMA1";
5041 			break;
5042 		case 248:
5043 		case 120:
5044 			block = "HDP";
5045 			break;
5046 		default:
5047 			block = "unknown";
5048 			break;
5049 		}
5050 	}
5051 
5052 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5053 	       protections, vmid, addr,
5054 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5055 	       block, mc_id);
5056 }
5057 
5058 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5059 		 unsigned vm_id, uint64_t pd_addr)
5060 {
5061 	/* write new base address */
5062 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5063 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5064 				 WRITE_DATA_DST_SEL(0)));
5065 
5066 	if (vm_id < 8) {
5067 		radeon_ring_write(ring,
5068 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5069 	} else {
5070 		radeon_ring_write(ring,
5071 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5072 	}
5073 	radeon_ring_write(ring, 0);
5074 	radeon_ring_write(ring, pd_addr >> 12);
5075 
5076 	/* flush hdp cache */
5077 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5078 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5079 				 WRITE_DATA_DST_SEL(0)));
5080 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5081 	radeon_ring_write(ring, 0);
5082 	radeon_ring_write(ring, 0x1);
5083 
5084 	/* bits 0-15 are the VM contexts0-15 */
5085 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5086 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5087 				 WRITE_DATA_DST_SEL(0)));
5088 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5089 	radeon_ring_write(ring, 0);
5090 	radeon_ring_write(ring, 1 << vm_id);
5091 
5092 	/* wait for the invalidate to complete */
5093 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5094 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5095 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5096 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5097 	radeon_ring_write(ring, 0);
5098 	radeon_ring_write(ring, 0); /* ref */
5099 	radeon_ring_write(ring, 0); /* mask */
5100 	radeon_ring_write(ring, 0x20); /* poll interval */
5101 
5102 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5103 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5104 	radeon_ring_write(ring, 0x0);
5105 }
5106 
5107 /*
5108  *  Power and clock gating
5109  */
5110 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5111 {
5112 	int i;
5113 
5114 	for (i = 0; i < rdev->usec_timeout; i++) {
5115 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5116 			break;
5117 		udelay(1);
5118 	}
5119 
5120 	for (i = 0; i < rdev->usec_timeout; i++) {
5121 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5122 			break;
5123 		udelay(1);
5124 	}
5125 }
5126 
5127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5128 					 bool enable)
5129 {
5130 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5131 	u32 mask;
5132 	int i;
5133 
5134 	if (enable)
5135 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5136 	else
5137 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5138 	WREG32(CP_INT_CNTL_RING0, tmp);
5139 
5140 	if (!enable) {
5141 		/* read a gfx register */
5142 		tmp = RREG32(DB_DEPTH_INFO);
5143 
5144 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5145 		for (i = 0; i < rdev->usec_timeout; i++) {
5146 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5147 				break;
5148 			udelay(1);
5149 		}
5150 	}
5151 }
5152 
5153 static void si_set_uvd_dcm(struct radeon_device *rdev,
5154 			   bool sw_mode)
5155 {
5156 	u32 tmp, tmp2;
5157 
5158 	tmp = RREG32(UVD_CGC_CTRL);
5159 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5160 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5161 
5162 	if (sw_mode) {
5163 		tmp &= ~0x7ffff800;
5164 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5165 	} else {
5166 		tmp |= 0x7ffff800;
5167 		tmp2 = 0;
5168 	}
5169 
5170 	WREG32(UVD_CGC_CTRL, tmp);
5171 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5172 }
5173 
5174 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5175 {
5176 	bool hw_mode = true;
5177 
5178 	if (hw_mode) {
5179 		si_set_uvd_dcm(rdev, false);
5180 	} else {
5181 		u32 tmp = RREG32(UVD_CGC_CTRL);
5182 		tmp &= ~DCM;
5183 		WREG32(UVD_CGC_CTRL, tmp);
5184 	}
5185 }
5186 
5187 static u32 si_halt_rlc(struct radeon_device *rdev)
5188 {
5189 	u32 data, orig;
5190 
5191 	orig = data = RREG32(RLC_CNTL);
5192 
5193 	if (data & RLC_ENABLE) {
5194 		data &= ~RLC_ENABLE;
5195 		WREG32(RLC_CNTL, data);
5196 
5197 		si_wait_for_rlc_serdes(rdev);
5198 	}
5199 
5200 	return orig;
5201 }
5202 
5203 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5204 {
5205 	u32 tmp;
5206 
5207 	tmp = RREG32(RLC_CNTL);
5208 	if (tmp != rlc)
5209 		WREG32(RLC_CNTL, rlc);
5210 }
5211 
5212 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5213 {
5214 	u32 data, orig;
5215 
5216 	orig = data = RREG32(DMA_PG);
5217 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5218 		data |= PG_CNTL_ENABLE;
5219 	else
5220 		data &= ~PG_CNTL_ENABLE;
5221 	if (orig != data)
5222 		WREG32(DMA_PG, data);
5223 }
5224 
5225 static void si_init_dma_pg(struct radeon_device *rdev)
5226 {
5227 	u32 tmp;
5228 
5229 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5230 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5231 
5232 	for (tmp = 0; tmp < 5; tmp++)
5233 		WREG32(DMA_PGFSM_WRITE, 0);
5234 }
5235 
5236 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5237 			       bool enable)
5238 {
5239 	u32 tmp;
5240 
5241 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5242 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5243 		WREG32(RLC_TTOP_D, tmp);
5244 
5245 		tmp = RREG32(RLC_PG_CNTL);
5246 		tmp |= GFX_PG_ENABLE;
5247 		WREG32(RLC_PG_CNTL, tmp);
5248 
5249 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5250 		tmp |= AUTO_PG_EN;
5251 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5252 	} else {
5253 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5254 		tmp &= ~AUTO_PG_EN;
5255 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5256 
5257 		tmp = RREG32(DB_RENDER_CONTROL);
5258 	}
5259 }
5260 
5261 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5262 {
5263 	u32 tmp;
5264 
5265 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5266 
5267 	tmp = RREG32(RLC_PG_CNTL);
5268 	tmp |= GFX_PG_SRC;
5269 	WREG32(RLC_PG_CNTL, tmp);
5270 
5271 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5272 
5273 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5274 
5275 	tmp &= ~GRBM_REG_SGIT_MASK;
5276 	tmp |= GRBM_REG_SGIT(0x700);
5277 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5278 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5279 }
5280 
5281 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5282 {
5283 	u32 mask = 0, tmp, tmp1;
5284 	int i;
5285 
5286 	si_select_se_sh(rdev, se, sh);
5287 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5288 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5289 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5290 
5291 	tmp &= 0xffff0000;
5292 
5293 	tmp |= tmp1;
5294 	tmp >>= 16;
5295 
5296 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5297 		mask <<= 1;
5298 		mask |= 1;
5299 	}
5300 
5301 	return (~tmp) & mask;
5302 }
5303 
5304 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5305 {
5306 	u32 i, j, k, active_cu_number = 0;
5307 	u32 mask, counter, cu_bitmap;
5308 	u32 tmp = 0;
5309 
5310 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5311 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5312 			mask = 1;
5313 			cu_bitmap = 0;
5314 			counter  = 0;
5315 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5316 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5317 					if (counter < 2)
5318 						cu_bitmap |= mask;
5319 					counter++;
5320 				}
5321 				mask <<= 1;
5322 			}
5323 
5324 			active_cu_number += counter;
5325 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5326 		}
5327 	}
5328 
5329 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5330 
5331 	tmp = RREG32(RLC_MAX_PG_CU);
5332 	tmp &= ~MAX_PU_CU_MASK;
5333 	tmp |= MAX_PU_CU(active_cu_number);
5334 	WREG32(RLC_MAX_PG_CU, tmp);
5335 }
5336 
5337 static void si_enable_cgcg(struct radeon_device *rdev,
5338 			   bool enable)
5339 {
5340 	u32 data, orig, tmp;
5341 
5342 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5343 
5344 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5345 		si_enable_gui_idle_interrupt(rdev, true);
5346 
5347 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5348 
5349 		tmp = si_halt_rlc(rdev);
5350 
5351 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5352 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5353 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5354 
5355 		si_wait_for_rlc_serdes(rdev);
5356 
5357 		si_update_rlc(rdev, tmp);
5358 
5359 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5360 
5361 		data |= CGCG_EN | CGLS_EN;
5362 	} else {
5363 		si_enable_gui_idle_interrupt(rdev, false);
5364 
5365 		RREG32(CB_CGTT_SCLK_CTRL);
5366 		RREG32(CB_CGTT_SCLK_CTRL);
5367 		RREG32(CB_CGTT_SCLK_CTRL);
5368 		RREG32(CB_CGTT_SCLK_CTRL);
5369 
5370 		data &= ~(CGCG_EN | CGLS_EN);
5371 	}
5372 
5373 	if (orig != data)
5374 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5375 }
5376 
5377 static void si_enable_mgcg(struct radeon_device *rdev,
5378 			   bool enable)
5379 {
5380 	u32 data, orig, tmp = 0;
5381 
5382 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5383 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5384 		data = 0x96940200;
5385 		if (orig != data)
5386 			WREG32(CGTS_SM_CTRL_REG, data);
5387 
5388 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5389 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5390 			data |= CP_MEM_LS_EN;
5391 			if (orig != data)
5392 				WREG32(CP_MEM_SLP_CNTL, data);
5393 		}
5394 
5395 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5396 		data &= 0xffffffc0;
5397 		if (orig != data)
5398 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5399 
5400 		tmp = si_halt_rlc(rdev);
5401 
5402 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5403 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5404 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5405 
5406 		si_update_rlc(rdev, tmp);
5407 	} else {
5408 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409 		data |= 0x00000003;
5410 		if (orig != data)
5411 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412 
5413 		data = RREG32(CP_MEM_SLP_CNTL);
5414 		if (data & CP_MEM_LS_EN) {
5415 			data &= ~CP_MEM_LS_EN;
5416 			WREG32(CP_MEM_SLP_CNTL, data);
5417 		}
5418 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5419 		data |= LS_OVERRIDE | OVERRIDE;
5420 		if (orig != data)
5421 			WREG32(CGTS_SM_CTRL_REG, data);
5422 
5423 		tmp = si_halt_rlc(rdev);
5424 
5425 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5426 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5427 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5428 
5429 		si_update_rlc(rdev, tmp);
5430 	}
5431 }
5432 
5433 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5434 			       bool enable)
5435 {
5436 	u32 orig, data, tmp;
5437 
5438 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5439 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5440 		tmp |= 0x3fff;
5441 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5442 
5443 		orig = data = RREG32(UVD_CGC_CTRL);
5444 		data |= DCM;
5445 		if (orig != data)
5446 			WREG32(UVD_CGC_CTRL, data);
5447 
5448 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5449 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5450 	} else {
5451 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5452 		tmp &= ~0x3fff;
5453 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5454 
5455 		orig = data = RREG32(UVD_CGC_CTRL);
5456 		data &= ~DCM;
5457 		if (orig != data)
5458 			WREG32(UVD_CGC_CTRL, data);
5459 
5460 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5461 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5462 	}
5463 }
5464 
5465 static const u32 mc_cg_registers[] =
5466 {
5467 	MC_HUB_MISC_HUB_CG,
5468 	MC_HUB_MISC_SIP_CG,
5469 	MC_HUB_MISC_VM_CG,
5470 	MC_XPB_CLK_GAT,
5471 	ATC_MISC_CG,
5472 	MC_CITF_MISC_WR_CG,
5473 	MC_CITF_MISC_RD_CG,
5474 	MC_CITF_MISC_VM_CG,
5475 	VM_L2_CG,
5476 };
5477 
5478 static void si_enable_mc_ls(struct radeon_device *rdev,
5479 			    bool enable)
5480 {
5481 	int i;
5482 	u32 orig, data;
5483 
5484 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5485 		orig = data = RREG32(mc_cg_registers[i]);
5486 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5487 			data |= MC_LS_ENABLE;
5488 		else
5489 			data &= ~MC_LS_ENABLE;
5490 		if (data != orig)
5491 			WREG32(mc_cg_registers[i], data);
5492 	}
5493 }
5494 
5495 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5496 			       bool enable)
5497 {
5498 	int i;
5499 	u32 orig, data;
5500 
5501 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5502 		orig = data = RREG32(mc_cg_registers[i]);
5503 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5504 			data |= MC_CG_ENABLE;
5505 		else
5506 			data &= ~MC_CG_ENABLE;
5507 		if (data != orig)
5508 			WREG32(mc_cg_registers[i], data);
5509 	}
5510 }
5511 
5512 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5513 			       bool enable)
5514 {
5515 	u32 orig, data, offset;
5516 	int i;
5517 
5518 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5519 		for (i = 0; i < 2; i++) {
5520 			if (i == 0)
5521 				offset = DMA0_REGISTER_OFFSET;
5522 			else
5523 				offset = DMA1_REGISTER_OFFSET;
5524 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5525 			data &= ~MEM_POWER_OVERRIDE;
5526 			if (data != orig)
5527 				WREG32(DMA_POWER_CNTL + offset, data);
5528 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5529 		}
5530 	} else {
5531 		for (i = 0; i < 2; i++) {
5532 			if (i == 0)
5533 				offset = DMA0_REGISTER_OFFSET;
5534 			else
5535 				offset = DMA1_REGISTER_OFFSET;
5536 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5537 			data |= MEM_POWER_OVERRIDE;
5538 			if (data != orig)
5539 				WREG32(DMA_POWER_CNTL + offset, data);
5540 
5541 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5542 			data = 0xff000000;
5543 			if (data != orig)
5544 				WREG32(DMA_CLK_CTRL + offset, data);
5545 		}
5546 	}
5547 }
5548 
5549 static void si_enable_bif_mgls(struct radeon_device *rdev,
5550 			       bool enable)
5551 {
5552 	u32 orig, data;
5553 
5554 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5555 
5556 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5557 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5558 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5559 	else
5560 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5561 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5562 
5563 	if (orig != data)
5564 		WREG32_PCIE(PCIE_CNTL2, data);
5565 }
5566 
5567 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5568 			       bool enable)
5569 {
5570 	u32 orig, data;
5571 
5572 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5573 
5574 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5575 		data &= ~CLOCK_GATING_DIS;
5576 	else
5577 		data |= CLOCK_GATING_DIS;
5578 
5579 	if (orig != data)
5580 		WREG32(HDP_HOST_PATH_CNTL, data);
5581 }
5582 
5583 static void si_enable_hdp_ls(struct radeon_device *rdev,
5584 			     bool enable)
5585 {
5586 	u32 orig, data;
5587 
5588 	orig = data = RREG32(HDP_MEM_POWER_LS);
5589 
5590 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5591 		data |= HDP_LS_ENABLE;
5592 	else
5593 		data &= ~HDP_LS_ENABLE;
5594 
5595 	if (orig != data)
5596 		WREG32(HDP_MEM_POWER_LS, data);
5597 }
5598 
5599 static void si_update_cg(struct radeon_device *rdev,
5600 			 u32 block, bool enable)
5601 {
5602 	if (block & RADEON_CG_BLOCK_GFX) {
5603 		si_enable_gui_idle_interrupt(rdev, false);
5604 		/* order matters! */
5605 		if (enable) {
5606 			si_enable_mgcg(rdev, true);
5607 			si_enable_cgcg(rdev, true);
5608 		} else {
5609 			si_enable_cgcg(rdev, false);
5610 			si_enable_mgcg(rdev, false);
5611 		}
5612 		si_enable_gui_idle_interrupt(rdev, true);
5613 	}
5614 
5615 	if (block & RADEON_CG_BLOCK_MC) {
5616 		si_enable_mc_mgcg(rdev, enable);
5617 		si_enable_mc_ls(rdev, enable);
5618 	}
5619 
5620 	if (block & RADEON_CG_BLOCK_SDMA) {
5621 		si_enable_dma_mgcg(rdev, enable);
5622 	}
5623 
5624 	if (block & RADEON_CG_BLOCK_BIF) {
5625 		si_enable_bif_mgls(rdev, enable);
5626 	}
5627 
5628 	if (block & RADEON_CG_BLOCK_UVD) {
5629 		if (rdev->has_uvd) {
5630 			si_enable_uvd_mgcg(rdev, enable);
5631 		}
5632 	}
5633 
5634 	if (block & RADEON_CG_BLOCK_HDP) {
5635 		si_enable_hdp_mgcg(rdev, enable);
5636 		si_enable_hdp_ls(rdev, enable);
5637 	}
5638 }
5639 
5640 static void si_init_cg(struct radeon_device *rdev)
5641 {
5642 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5643 			    RADEON_CG_BLOCK_MC |
5644 			    RADEON_CG_BLOCK_SDMA |
5645 			    RADEON_CG_BLOCK_BIF |
5646 			    RADEON_CG_BLOCK_HDP), true);
5647 	if (rdev->has_uvd) {
5648 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5649 		si_init_uvd_internal_cg(rdev);
5650 	}
5651 }
5652 
5653 static void si_fini_cg(struct radeon_device *rdev)
5654 {
5655 	if (rdev->has_uvd) {
5656 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5657 	}
5658 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5659 			    RADEON_CG_BLOCK_MC |
5660 			    RADEON_CG_BLOCK_SDMA |
5661 			    RADEON_CG_BLOCK_BIF |
5662 			    RADEON_CG_BLOCK_HDP), false);
5663 }
5664 
5665 u32 si_get_csb_size(struct radeon_device *rdev)
5666 {
5667 	u32 count = 0;
5668 	const struct cs_section_def *sect = NULL;
5669 	const struct cs_extent_def *ext = NULL;
5670 
5671 	if (rdev->rlc.cs_data == NULL)
5672 		return 0;
5673 
5674 	/* begin clear state */
5675 	count += 2;
5676 	/* context control state */
5677 	count += 3;
5678 
5679 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5680 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5681 			if (sect->id == SECT_CONTEXT)
5682 				count += 2 + ext->reg_count;
5683 			else
5684 				return 0;
5685 		}
5686 	}
5687 	/* pa_sc_raster_config */
5688 	count += 3;
5689 	/* end clear state */
5690 	count += 2;
5691 	/* clear state */
5692 	count += 2;
5693 
5694 	return count;
5695 }
5696 
5697 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5698 {
5699 	u32 count = 0, i;
5700 	const struct cs_section_def *sect = NULL;
5701 	const struct cs_extent_def *ext = NULL;
5702 
5703 	if (rdev->rlc.cs_data == NULL)
5704 		return;
5705 	if (buffer == NULL)
5706 		return;
5707 
5708 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5709 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5710 
5711 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5712 	buffer[count++] = cpu_to_le32(0x80000000);
5713 	buffer[count++] = cpu_to_le32(0x80000000);
5714 
5715 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5716 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5717 			if (sect->id == SECT_CONTEXT) {
5718 				buffer[count++] =
5719 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5720 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5721 				for (i = 0; i < ext->reg_count; i++)
5722 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5723 			} else {
5724 				return;
5725 			}
5726 		}
5727 	}
5728 
5729 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5730 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5731 	switch (rdev->family) {
5732 	case CHIP_TAHITI:
5733 	case CHIP_PITCAIRN:
5734 		buffer[count++] = cpu_to_le32(0x2a00126a);
5735 		break;
5736 	case CHIP_VERDE:
5737 		buffer[count++] = cpu_to_le32(0x0000124a);
5738 		break;
5739 	case CHIP_OLAND:
5740 		buffer[count++] = cpu_to_le32(0x00000082);
5741 		break;
5742 	case CHIP_HAINAN:
5743 		buffer[count++] = cpu_to_le32(0x00000000);
5744 		break;
5745 	default:
5746 		buffer[count++] = cpu_to_le32(0x00000000);
5747 		break;
5748 	}
5749 
5750 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5751 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5752 
5753 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5754 	buffer[count++] = cpu_to_le32(0);
5755 }
5756 
5757 static void si_init_pg(struct radeon_device *rdev)
5758 {
5759 	if (rdev->pg_flags) {
5760 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5761 			si_init_dma_pg(rdev);
5762 		}
5763 		si_init_ao_cu_mask(rdev);
5764 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5765 			si_init_gfx_cgpg(rdev);
5766 		} else {
5767 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5768 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5769 		}
5770 		si_enable_dma_pg(rdev, true);
5771 		si_enable_gfx_cgpg(rdev, true);
5772 	} else {
5773 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5774 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5775 	}
5776 }
5777 
5778 static void si_fini_pg(struct radeon_device *rdev)
5779 {
5780 	if (rdev->pg_flags) {
5781 		si_enable_dma_pg(rdev, false);
5782 		si_enable_gfx_cgpg(rdev, false);
5783 	}
5784 }
5785 
5786 /*
5787  * RLC
5788  */
5789 void si_rlc_reset(struct radeon_device *rdev)
5790 {
5791 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5792 
5793 	tmp |= SOFT_RESET_RLC;
5794 	WREG32(GRBM_SOFT_RESET, tmp);
5795 	udelay(50);
5796 	tmp &= ~SOFT_RESET_RLC;
5797 	WREG32(GRBM_SOFT_RESET, tmp);
5798 	udelay(50);
5799 }
5800 
5801 static void si_rlc_stop(struct radeon_device *rdev)
5802 {
5803 	WREG32(RLC_CNTL, 0);
5804 
5805 	si_enable_gui_idle_interrupt(rdev, false);
5806 
5807 	si_wait_for_rlc_serdes(rdev);
5808 }
5809 
5810 static void si_rlc_start(struct radeon_device *rdev)
5811 {
5812 	WREG32(RLC_CNTL, RLC_ENABLE);
5813 
5814 	si_enable_gui_idle_interrupt(rdev, true);
5815 
5816 	udelay(50);
5817 }
5818 
5819 static bool si_lbpw_supported(struct radeon_device *rdev)
5820 {
5821 	u32 tmp;
5822 
5823 	/* Enable LBPW only for DDR3 */
5824 	tmp = RREG32(MC_SEQ_MISC0);
5825 	if ((tmp & 0xF0000000) == 0xB0000000)
5826 		return true;
5827 	return false;
5828 }
5829 
5830 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5831 {
5832 	u32 tmp;
5833 
5834 	tmp = RREG32(RLC_LB_CNTL);
5835 	if (enable)
5836 		tmp |= LOAD_BALANCE_ENABLE;
5837 	else
5838 		tmp &= ~LOAD_BALANCE_ENABLE;
5839 	WREG32(RLC_LB_CNTL, tmp);
5840 
5841 	if (!enable) {
5842 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5843 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5844 	}
5845 }
5846 
5847 static int si_rlc_resume(struct radeon_device *rdev)
5848 {
5849 	u32 i;
5850 
5851 	if (!rdev->rlc_fw)
5852 		return -EINVAL;
5853 
5854 	si_rlc_stop(rdev);
5855 
5856 	si_rlc_reset(rdev);
5857 
5858 	si_init_pg(rdev);
5859 
5860 	si_init_cg(rdev);
5861 
5862 	WREG32(RLC_RL_BASE, 0);
5863 	WREG32(RLC_RL_SIZE, 0);
5864 	WREG32(RLC_LB_CNTL, 0);
5865 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5866 	WREG32(RLC_LB_CNTR_INIT, 0);
5867 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5868 
5869 	WREG32(RLC_MC_CNTL, 0);
5870 	WREG32(RLC_UCODE_CNTL, 0);
5871 
5872 	if (rdev->new_fw) {
5873 		const struct rlc_firmware_header_v1_0 *hdr =
5874 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5875 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5876 		const __le32 *fw_data = (const __le32 *)
5877 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5878 
5879 		radeon_ucode_print_rlc_hdr(&hdr->header);
5880 
5881 		for (i = 0; i < fw_size; i++) {
5882 			WREG32(RLC_UCODE_ADDR, i);
5883 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5884 		}
5885 	} else {
5886 		const __be32 *fw_data =
5887 			(const __be32 *)rdev->rlc_fw->data;
5888 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5889 			WREG32(RLC_UCODE_ADDR, i);
5890 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5891 		}
5892 	}
5893 	WREG32(RLC_UCODE_ADDR, 0);
5894 
5895 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5896 
5897 	si_rlc_start(rdev);
5898 
5899 	return 0;
5900 }
5901 
5902 static void si_enable_interrupts(struct radeon_device *rdev)
5903 {
5904 	u32 ih_cntl = RREG32(IH_CNTL);
5905 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5906 
5907 	ih_cntl |= ENABLE_INTR;
5908 	ih_rb_cntl |= IH_RB_ENABLE;
5909 	WREG32(IH_CNTL, ih_cntl);
5910 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5911 	rdev->ih.enabled = true;
5912 }
5913 
5914 static void si_disable_interrupts(struct radeon_device *rdev)
5915 {
5916 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5917 	u32 ih_cntl = RREG32(IH_CNTL);
5918 
5919 	ih_rb_cntl &= ~IH_RB_ENABLE;
5920 	ih_cntl &= ~ENABLE_INTR;
5921 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5922 	WREG32(IH_CNTL, ih_cntl);
5923 	/* set rptr, wptr to 0 */
5924 	WREG32(IH_RB_RPTR, 0);
5925 	WREG32(IH_RB_WPTR, 0);
5926 	rdev->ih.enabled = false;
5927 	rdev->ih.rptr = 0;
5928 }
5929 
5930 static void si_disable_interrupt_state(struct radeon_device *rdev)
5931 {
5932 	u32 tmp;
5933 
5934 	tmp = RREG32(CP_INT_CNTL_RING0) &
5935 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5936 	WREG32(CP_INT_CNTL_RING0, tmp);
5937 	WREG32(CP_INT_CNTL_RING1, 0);
5938 	WREG32(CP_INT_CNTL_RING2, 0);
5939 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5940 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5941 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5942 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5943 	WREG32(GRBM_INT_CNTL, 0);
5944 	WREG32(SRBM_INT_CNTL, 0);
5945 	if (rdev->num_crtc >= 2) {
5946 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5947 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5948 	}
5949 	if (rdev->num_crtc >= 4) {
5950 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5951 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5952 	}
5953 	if (rdev->num_crtc >= 6) {
5954 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5955 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5956 	}
5957 
5958 	if (rdev->num_crtc >= 2) {
5959 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5960 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5961 	}
5962 	if (rdev->num_crtc >= 4) {
5963 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5964 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5965 	}
5966 	if (rdev->num_crtc >= 6) {
5967 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5968 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5969 	}
5970 
5971 	if (!ASIC_IS_NODCE(rdev)) {
5972 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5973 
5974 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5975 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5976 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5977 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5978 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5980 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5982 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5984 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5986 	}
5987 }
5988 
5989 static int si_irq_init(struct radeon_device *rdev)
5990 {
5991 	int ret = 0;
5992 	int rb_bufsz;
5993 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5994 
5995 	/* allocate ring */
5996 	ret = r600_ih_ring_alloc(rdev);
5997 	if (ret)
5998 		return ret;
5999 
6000 	/* disable irqs */
6001 	si_disable_interrupts(rdev);
6002 
6003 	/* init rlc */
6004 	ret = si_rlc_resume(rdev);
6005 	if (ret) {
6006 		r600_ih_ring_fini(rdev);
6007 		return ret;
6008 	}
6009 
6010 	/* setup interrupt control */
6011 	/* set dummy read address to ring address */
6012 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6013 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6014 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6015 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6016 	 */
6017 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6018 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6019 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6020 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6021 
6022 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6023 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6024 
6025 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6026 		      IH_WPTR_OVERFLOW_CLEAR |
6027 		      (rb_bufsz << 1));
6028 
6029 	if (rdev->wb.enabled)
6030 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6031 
6032 	/* set the writeback address whether it's enabled or not */
6033 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6034 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6035 
6036 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6037 
6038 	/* set rptr, wptr to 0 */
6039 	WREG32(IH_RB_RPTR, 0);
6040 	WREG32(IH_RB_WPTR, 0);
6041 
6042 	/* Default settings for IH_CNTL (disabled at first) */
6043 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6044 	/* RPTR_REARM only works if msi's are enabled */
6045 	if (rdev->msi_enabled)
6046 		ih_cntl |= RPTR_REARM;
6047 	WREG32(IH_CNTL, ih_cntl);
6048 
6049 	/* force the active interrupt state to all disabled */
6050 	si_disable_interrupt_state(rdev);
6051 
6052 	pci_set_master(rdev->pdev);
6053 
6054 	/* enable irqs */
6055 	si_enable_interrupts(rdev);
6056 
6057 	return ret;
6058 }
6059 
6060 int si_irq_set(struct radeon_device *rdev)
6061 {
6062 	u32 cp_int_cntl;
6063 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6064 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6065 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6066 	u32 grbm_int_cntl = 0;
6067 	u32 dma_cntl, dma_cntl1;
6068 	u32 thermal_int = 0;
6069 
6070 	if (!rdev->irq.installed) {
6071 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6072 		return -EINVAL;
6073 	}
6074 	/* don't enable anything if the ih is disabled */
6075 	if (!rdev->ih.enabled) {
6076 		si_disable_interrupts(rdev);
6077 		/* force the active interrupt state to all disabled */
6078 		si_disable_interrupt_state(rdev);
6079 		return 0;
6080 	}
6081 
6082 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6083 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6084 
6085 	if (!ASIC_IS_NODCE(rdev)) {
6086 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6087 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6088 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6089 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092 	}
6093 
6094 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6095 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6096 
6097 	thermal_int = RREG32(CG_THERMAL_INT) &
6098 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6099 
6100 	/* enable CP interrupts on all rings */
6101 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6102 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6103 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6104 	}
6105 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6106 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6107 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6108 	}
6109 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6110 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6111 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6112 	}
6113 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6114 		DRM_DEBUG("si_irq_set: sw int dma\n");
6115 		dma_cntl |= TRAP_ENABLE;
6116 	}
6117 
6118 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6119 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6120 		dma_cntl1 |= TRAP_ENABLE;
6121 	}
6122 	if (rdev->irq.crtc_vblank_int[0] ||
6123 	    atomic_read(&rdev->irq.pflip[0])) {
6124 		DRM_DEBUG("si_irq_set: vblank 0\n");
6125 		crtc1 |= VBLANK_INT_MASK;
6126 	}
6127 	if (rdev->irq.crtc_vblank_int[1] ||
6128 	    atomic_read(&rdev->irq.pflip[1])) {
6129 		DRM_DEBUG("si_irq_set: vblank 1\n");
6130 		crtc2 |= VBLANK_INT_MASK;
6131 	}
6132 	if (rdev->irq.crtc_vblank_int[2] ||
6133 	    atomic_read(&rdev->irq.pflip[2])) {
6134 		DRM_DEBUG("si_irq_set: vblank 2\n");
6135 		crtc3 |= VBLANK_INT_MASK;
6136 	}
6137 	if (rdev->irq.crtc_vblank_int[3] ||
6138 	    atomic_read(&rdev->irq.pflip[3])) {
6139 		DRM_DEBUG("si_irq_set: vblank 3\n");
6140 		crtc4 |= VBLANK_INT_MASK;
6141 	}
6142 	if (rdev->irq.crtc_vblank_int[4] ||
6143 	    atomic_read(&rdev->irq.pflip[4])) {
6144 		DRM_DEBUG("si_irq_set: vblank 4\n");
6145 		crtc5 |= VBLANK_INT_MASK;
6146 	}
6147 	if (rdev->irq.crtc_vblank_int[5] ||
6148 	    atomic_read(&rdev->irq.pflip[5])) {
6149 		DRM_DEBUG("si_irq_set: vblank 5\n");
6150 		crtc6 |= VBLANK_INT_MASK;
6151 	}
6152 	if (rdev->irq.hpd[0]) {
6153 		DRM_DEBUG("si_irq_set: hpd 1\n");
6154 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6155 	}
6156 	if (rdev->irq.hpd[1]) {
6157 		DRM_DEBUG("si_irq_set: hpd 2\n");
6158 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6159 	}
6160 	if (rdev->irq.hpd[2]) {
6161 		DRM_DEBUG("si_irq_set: hpd 3\n");
6162 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6163 	}
6164 	if (rdev->irq.hpd[3]) {
6165 		DRM_DEBUG("si_irq_set: hpd 4\n");
6166 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6167 	}
6168 	if (rdev->irq.hpd[4]) {
6169 		DRM_DEBUG("si_irq_set: hpd 5\n");
6170 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6171 	}
6172 	if (rdev->irq.hpd[5]) {
6173 		DRM_DEBUG("si_irq_set: hpd 6\n");
6174 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6175 	}
6176 
6177 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6178 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6179 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6180 
6181 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6182 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6183 
6184 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6185 
6186 	if (rdev->irq.dpm_thermal) {
6187 		DRM_DEBUG("dpm thermal\n");
6188 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6189 	}
6190 
6191 	if (rdev->num_crtc >= 2) {
6192 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6193 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6194 	}
6195 	if (rdev->num_crtc >= 4) {
6196 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6197 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6198 	}
6199 	if (rdev->num_crtc >= 6) {
6200 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6201 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6202 	}
6203 
6204 	if (rdev->num_crtc >= 2) {
6205 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6206 		       GRPH_PFLIP_INT_MASK);
6207 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6208 		       GRPH_PFLIP_INT_MASK);
6209 	}
6210 	if (rdev->num_crtc >= 4) {
6211 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6212 		       GRPH_PFLIP_INT_MASK);
6213 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6214 		       GRPH_PFLIP_INT_MASK);
6215 	}
6216 	if (rdev->num_crtc >= 6) {
6217 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6218 		       GRPH_PFLIP_INT_MASK);
6219 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6220 		       GRPH_PFLIP_INT_MASK);
6221 	}
6222 
6223 	if (!ASIC_IS_NODCE(rdev)) {
6224 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6225 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6226 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6227 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6228 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6229 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6230 	}
6231 
6232 	WREG32(CG_THERMAL_INT, thermal_int);
6233 
6234 	/* posting read */
6235 	RREG32(SRBM_STATUS);
6236 
6237 	return 0;
6238 }
6239 
6240 static inline void si_irq_ack(struct radeon_device *rdev)
6241 {
6242 	u32 tmp;
6243 
6244 	if (ASIC_IS_NODCE(rdev))
6245 		return;
6246 
6247 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6248 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6249 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6250 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6251 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6252 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6253 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6254 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6255 	if (rdev->num_crtc >= 4) {
6256 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6257 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6258 	}
6259 	if (rdev->num_crtc >= 6) {
6260 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6261 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6262 	}
6263 
6264 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6265 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6266 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6267 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6268 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6269 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6270 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6271 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6272 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6273 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6274 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6275 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6276 
6277 	if (rdev->num_crtc >= 4) {
6278 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6279 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6280 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6281 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6283 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6284 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6285 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6286 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6287 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6288 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6289 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6290 	}
6291 
6292 	if (rdev->num_crtc >= 6) {
6293 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6294 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6295 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6296 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6297 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6298 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6299 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6300 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6301 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6302 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6303 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6304 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6305 	}
6306 
6307 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6308 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6309 		tmp |= DC_HPDx_INT_ACK;
6310 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6311 	}
6312 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6313 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6314 		tmp |= DC_HPDx_INT_ACK;
6315 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6316 	}
6317 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6318 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6319 		tmp |= DC_HPDx_INT_ACK;
6320 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6321 	}
6322 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6323 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6324 		tmp |= DC_HPDx_INT_ACK;
6325 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6326 	}
6327 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6328 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6329 		tmp |= DC_HPDx_INT_ACK;
6330 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6331 	}
6332 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6333 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6334 		tmp |= DC_HPDx_INT_ACK;
6335 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6336 	}
6337 
6338 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6339 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6340 		tmp |= DC_HPDx_RX_INT_ACK;
6341 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6342 	}
6343 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6344 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6345 		tmp |= DC_HPDx_RX_INT_ACK;
6346 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6347 	}
6348 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6349 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6350 		tmp |= DC_HPDx_RX_INT_ACK;
6351 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6352 	}
6353 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6354 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6355 		tmp |= DC_HPDx_RX_INT_ACK;
6356 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6357 	}
6358 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6359 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6360 		tmp |= DC_HPDx_RX_INT_ACK;
6361 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6362 	}
6363 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6364 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6365 		tmp |= DC_HPDx_RX_INT_ACK;
6366 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6367 	}
6368 }
6369 
6370 static void si_irq_disable(struct radeon_device *rdev)
6371 {
6372 	si_disable_interrupts(rdev);
6373 	/* Wait and acknowledge irq */
6374 	mdelay(1);
6375 	si_irq_ack(rdev);
6376 	si_disable_interrupt_state(rdev);
6377 }
6378 
6379 static void si_irq_suspend(struct radeon_device *rdev)
6380 {
6381 	si_irq_disable(rdev);
6382 	si_rlc_stop(rdev);
6383 }
6384 
6385 static void si_irq_fini(struct radeon_device *rdev)
6386 {
6387 	si_irq_suspend(rdev);
6388 	r600_ih_ring_fini(rdev);
6389 }
6390 
6391 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6392 {
6393 	u32 wptr, tmp;
6394 
6395 	if (rdev->wb.enabled)
6396 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6397 	else
6398 		wptr = RREG32(IH_RB_WPTR);
6399 
6400 	if (wptr & RB_OVERFLOW) {
6401 		wptr &= ~RB_OVERFLOW;
6402 		/* When a ring buffer overflow happen start parsing interrupt
6403 		 * from the last not overwritten vector (wptr + 16). Hopefully
6404 		 * this should allow us to catchup.
6405 		 */
6406 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6407 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6408 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6409 		tmp = RREG32(IH_RB_CNTL);
6410 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6411 		WREG32(IH_RB_CNTL, tmp);
6412 	}
6413 	return (wptr & rdev->ih.ptr_mask);
6414 }
6415 
6416 /*        SI IV Ring
6417  * Each IV ring entry is 128 bits:
6418  * [7:0]    - interrupt source id
6419  * [31:8]   - reserved
6420  * [59:32]  - interrupt source data
6421  * [63:60]  - reserved
6422  * [71:64]  - RINGID
6423  * [79:72]  - VMID
6424  * [127:80] - reserved
6425  */
6426 int si_irq_process(struct radeon_device *rdev)
6427 {
6428 	u32 wptr;
6429 	u32 rptr;
6430 	u32 src_id, src_data, ring_id;
6431 	u32 ring_index;
6432 	bool queue_hotplug = false;
6433 	bool queue_dp = false;
6434 	bool queue_thermal = false;
6435 	u32 status, addr;
6436 
6437 	if (!rdev->ih.enabled || rdev->shutdown)
6438 		return IRQ_NONE;
6439 
6440 	wptr = si_get_ih_wptr(rdev);
6441 
6442 restart_ih:
6443 	/* is somebody else already processing irqs? */
6444 	if (atomic_xchg(&rdev->ih.lock, 1))
6445 		return IRQ_NONE;
6446 
6447 	rptr = rdev->ih.rptr;
6448 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6449 
6450 	/* Order reading of wptr vs. reading of IH ring data */
6451 	rmb();
6452 
6453 	/* display interrupts */
6454 	si_irq_ack(rdev);
6455 
6456 	while (rptr != wptr) {
6457 		/* wptr/rptr are in bytes! */
6458 		ring_index = rptr / 4;
6459 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6460 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6461 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6462 
6463 		switch (src_id) {
6464 		case 1: /* D1 vblank/vline */
6465 			switch (src_data) {
6466 			case 0: /* D1 vblank */
6467 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6468 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6469 
6470 				if (rdev->irq.crtc_vblank_int[0]) {
6471 					drm_handle_vblank(rdev->ddev, 0);
6472 					rdev->pm.vblank_sync = true;
6473 					wake_up(&rdev->irq.vblank_queue);
6474 				}
6475 				if (atomic_read(&rdev->irq.pflip[0]))
6476 					radeon_crtc_handle_vblank(rdev, 0);
6477 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6478 				DRM_DEBUG("IH: D1 vblank\n");
6479 
6480 				break;
6481 			case 1: /* D1 vline */
6482 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6483 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6484 
6485 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6486 				DRM_DEBUG("IH: D1 vline\n");
6487 
6488 				break;
6489 			default:
6490 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6491 				break;
6492 			}
6493 			break;
6494 		case 2: /* D2 vblank/vline */
6495 			switch (src_data) {
6496 			case 0: /* D2 vblank */
6497 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6498 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6499 
6500 				if (rdev->irq.crtc_vblank_int[1]) {
6501 					drm_handle_vblank(rdev->ddev, 1);
6502 					rdev->pm.vblank_sync = true;
6503 					wake_up(&rdev->irq.vblank_queue);
6504 				}
6505 				if (atomic_read(&rdev->irq.pflip[1]))
6506 					radeon_crtc_handle_vblank(rdev, 1);
6507 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6508 				DRM_DEBUG("IH: D2 vblank\n");
6509 
6510 				break;
6511 			case 1: /* D2 vline */
6512 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6513 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6514 
6515 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6516 				DRM_DEBUG("IH: D2 vline\n");
6517 
6518 				break;
6519 			default:
6520 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6521 				break;
6522 			}
6523 			break;
6524 		case 3: /* D3 vblank/vline */
6525 			switch (src_data) {
6526 			case 0: /* D3 vblank */
6527 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6528 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6529 
6530 				if (rdev->irq.crtc_vblank_int[2]) {
6531 					drm_handle_vblank(rdev->ddev, 2);
6532 					rdev->pm.vblank_sync = true;
6533 					wake_up(&rdev->irq.vblank_queue);
6534 				}
6535 				if (atomic_read(&rdev->irq.pflip[2]))
6536 					radeon_crtc_handle_vblank(rdev, 2);
6537 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6538 				DRM_DEBUG("IH: D3 vblank\n");
6539 
6540 				break;
6541 			case 1: /* D3 vline */
6542 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6543 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6544 
6545 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6546 				DRM_DEBUG("IH: D3 vline\n");
6547 
6548 				break;
6549 			default:
6550 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6551 				break;
6552 			}
6553 			break;
6554 		case 4: /* D4 vblank/vline */
6555 			switch (src_data) {
6556 			case 0: /* D4 vblank */
6557 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6558 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6559 
6560 				if (rdev->irq.crtc_vblank_int[3]) {
6561 					drm_handle_vblank(rdev->ddev, 3);
6562 					rdev->pm.vblank_sync = true;
6563 					wake_up(&rdev->irq.vblank_queue);
6564 				}
6565 				if (atomic_read(&rdev->irq.pflip[3]))
6566 					radeon_crtc_handle_vblank(rdev, 3);
6567 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6568 				DRM_DEBUG("IH: D4 vblank\n");
6569 
6570 				break;
6571 			case 1: /* D4 vline */
6572 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6573 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6574 
6575 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6576 				DRM_DEBUG("IH: D4 vline\n");
6577 
6578 				break;
6579 			default:
6580 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581 				break;
6582 			}
6583 			break;
6584 		case 5: /* D5 vblank/vline */
6585 			switch (src_data) {
6586 			case 0: /* D5 vblank */
6587 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6588 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6589 
6590 				if (rdev->irq.crtc_vblank_int[4]) {
6591 					drm_handle_vblank(rdev->ddev, 4);
6592 					rdev->pm.vblank_sync = true;
6593 					wake_up(&rdev->irq.vblank_queue);
6594 				}
6595 				if (atomic_read(&rdev->irq.pflip[4]))
6596 					radeon_crtc_handle_vblank(rdev, 4);
6597 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6598 				DRM_DEBUG("IH: D5 vblank\n");
6599 
6600 				break;
6601 			case 1: /* D5 vline */
6602 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6603 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6604 
6605 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6606 				DRM_DEBUG("IH: D5 vline\n");
6607 
6608 				break;
6609 			default:
6610 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6611 				break;
6612 			}
6613 			break;
6614 		case 6: /* D6 vblank/vline */
6615 			switch (src_data) {
6616 			case 0: /* D6 vblank */
6617 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6618 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6619 
6620 				if (rdev->irq.crtc_vblank_int[5]) {
6621 					drm_handle_vblank(rdev->ddev, 5);
6622 					rdev->pm.vblank_sync = true;
6623 					wake_up(&rdev->irq.vblank_queue);
6624 				}
6625 				if (atomic_read(&rdev->irq.pflip[5]))
6626 					radeon_crtc_handle_vblank(rdev, 5);
6627 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6628 				DRM_DEBUG("IH: D6 vblank\n");
6629 
6630 				break;
6631 			case 1: /* D6 vline */
6632 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6633 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6634 
6635 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6636 				DRM_DEBUG("IH: D6 vline\n");
6637 
6638 				break;
6639 			default:
6640 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6641 				break;
6642 			}
6643 			break;
6644 		case 8: /* D1 page flip */
6645 		case 10: /* D2 page flip */
6646 		case 12: /* D3 page flip */
6647 		case 14: /* D4 page flip */
6648 		case 16: /* D5 page flip */
6649 		case 18: /* D6 page flip */
6650 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6651 			if (radeon_use_pflipirq > 0)
6652 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6653 			break;
6654 		case 42: /* HPD hotplug */
6655 			switch (src_data) {
6656 			case 0:
6657 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6658 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6659 
6660 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6661 				queue_hotplug = true;
6662 				DRM_DEBUG("IH: HPD1\n");
6663 
6664 				break;
6665 			case 1:
6666 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6667 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6668 
6669 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6670 				queue_hotplug = true;
6671 				DRM_DEBUG("IH: HPD2\n");
6672 
6673 				break;
6674 			case 2:
6675 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6676 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6677 
6678 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6679 				queue_hotplug = true;
6680 				DRM_DEBUG("IH: HPD3\n");
6681 
6682 				break;
6683 			case 3:
6684 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6685 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6686 
6687 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6688 				queue_hotplug = true;
6689 				DRM_DEBUG("IH: HPD4\n");
6690 
6691 				break;
6692 			case 4:
6693 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6694 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6695 
6696 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6697 				queue_hotplug = true;
6698 				DRM_DEBUG("IH: HPD5\n");
6699 
6700 				break;
6701 			case 5:
6702 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6703 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6704 
6705 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6706 				queue_hotplug = true;
6707 				DRM_DEBUG("IH: HPD6\n");
6708 
6709 				break;
6710 			case 6:
6711 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6712 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6713 
6714 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6715 				queue_dp = true;
6716 				DRM_DEBUG("IH: HPD_RX 1\n");
6717 
6718 				break;
6719 			case 7:
6720 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6721 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6722 
6723 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6724 				queue_dp = true;
6725 				DRM_DEBUG("IH: HPD_RX 2\n");
6726 
6727 				break;
6728 			case 8:
6729 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6730 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6731 
6732 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6733 				queue_dp = true;
6734 				DRM_DEBUG("IH: HPD_RX 3\n");
6735 
6736 				break;
6737 			case 9:
6738 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6739 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6740 
6741 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6742 				queue_dp = true;
6743 				DRM_DEBUG("IH: HPD_RX 4\n");
6744 
6745 				break;
6746 			case 10:
6747 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6748 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6749 
6750 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6751 				queue_dp = true;
6752 				DRM_DEBUG("IH: HPD_RX 5\n");
6753 
6754 				break;
6755 			case 11:
6756 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6757 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6758 
6759 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6760 				queue_dp = true;
6761 				DRM_DEBUG("IH: HPD_RX 6\n");
6762 
6763 				break;
6764 			default:
6765 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6766 				break;
6767 			}
6768 			break;
6769 		case 96:
6770 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6771 			WREG32(SRBM_INT_ACK, 0x1);
6772 			break;
6773 		case 124: /* UVD */
6774 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6775 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6776 			break;
6777 		case 146:
6778 		case 147:
6779 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6780 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6781 			/* reset addr and status */
6782 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6783 			if (addr == 0x0 && status == 0x0)
6784 				break;
6785 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6786 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6787 				addr);
6788 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6789 				status);
6790 			si_vm_decode_fault(rdev, status, addr);
6791 			break;
6792 		case 176: /* RINGID0 CP_INT */
6793 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6794 			break;
6795 		case 177: /* RINGID1 CP_INT */
6796 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6797 			break;
6798 		case 178: /* RINGID2 CP_INT */
6799 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6800 			break;
6801 		case 181: /* CP EOP event */
6802 			DRM_DEBUG("IH: CP EOP\n");
6803 			switch (ring_id) {
6804 			case 0:
6805 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6806 				break;
6807 			case 1:
6808 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6809 				break;
6810 			case 2:
6811 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6812 				break;
6813 			}
6814 			break;
6815 		case 224: /* DMA trap event */
6816 			DRM_DEBUG("IH: DMA trap\n");
6817 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6818 			break;
6819 		case 230: /* thermal low to high */
6820 			DRM_DEBUG("IH: thermal low to high\n");
6821 			rdev->pm.dpm.thermal.high_to_low = false;
6822 			queue_thermal = true;
6823 			break;
6824 		case 231: /* thermal high to low */
6825 			DRM_DEBUG("IH: thermal high to low\n");
6826 			rdev->pm.dpm.thermal.high_to_low = true;
6827 			queue_thermal = true;
6828 			break;
6829 		case 233: /* GUI IDLE */
6830 			DRM_DEBUG("IH: GUI idle\n");
6831 			break;
6832 		case 244: /* DMA trap event */
6833 			DRM_DEBUG("IH: DMA1 trap\n");
6834 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6835 			break;
6836 		default:
6837 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6838 			break;
6839 		}
6840 
6841 		/* wptr/rptr are in bytes! */
6842 		rptr += 16;
6843 		rptr &= rdev->ih.ptr_mask;
6844 		WREG32(IH_RB_RPTR, rptr);
6845 	}
6846 	if (queue_dp)
6847 		schedule_work(&rdev->dp_work);
6848 	if (queue_hotplug)
6849 		schedule_delayed_work(&rdev->hotplug_work, 0);
6850 	if (queue_thermal && rdev->pm.dpm_enabled)
6851 		schedule_work(&rdev->pm.dpm.thermal.work);
6852 	rdev->ih.rptr = rptr;
6853 	atomic_set(&rdev->ih.lock, 0);
6854 
6855 	/* make sure wptr hasn't changed while processing */
6856 	wptr = si_get_ih_wptr(rdev);
6857 	if (wptr != rptr)
6858 		goto restart_ih;
6859 
6860 	return IRQ_HANDLED;
6861 }
6862 
6863 /*
6864  * startup/shutdown callbacks
6865  */
6866 static void si_uvd_init(struct radeon_device *rdev)
6867 {
6868 	int r;
6869 
6870 	if (!rdev->has_uvd)
6871 		return;
6872 
6873 	r = radeon_uvd_init(rdev);
6874 	if (r) {
6875 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6876 		/*
6877 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6878 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6879 		 * there. So it is pointless to try to go through that code
6880 		 * hence why we disable uvd here.
6881 		 */
6882 		rdev->has_uvd = 0;
6883 		return;
6884 	}
6885 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6886 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6887 }
6888 
6889 static void si_uvd_start(struct radeon_device *rdev)
6890 {
6891 	int r;
6892 
6893 	if (!rdev->has_uvd)
6894 		return;
6895 
6896 	r = uvd_v2_2_resume(rdev);
6897 	if (r) {
6898 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6899 		goto error;
6900 	}
6901 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6902 	if (r) {
6903 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6904 		goto error;
6905 	}
6906 	return;
6907 
6908 error:
6909 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6910 }
6911 
6912 static void si_uvd_resume(struct radeon_device *rdev)
6913 {
6914 	struct radeon_ring *ring;
6915 	int r;
6916 
6917 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6918 		return;
6919 
6920 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6921 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6922 	if (r) {
6923 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6924 		return;
6925 	}
6926 	r = uvd_v1_0_init(rdev);
6927 	if (r) {
6928 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6929 		return;
6930 	}
6931 }
6932 
6933 static void si_vce_init(struct radeon_device *rdev)
6934 {
6935 	int r;
6936 
6937 	if (!rdev->has_vce)
6938 		return;
6939 
6940 	r = radeon_vce_init(rdev);
6941 	if (r) {
6942 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6943 		/*
6944 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6945 		 * to early fails si_vce_start() and thus nothing happens
6946 		 * there. So it is pointless to try to go through that code
6947 		 * hence why we disable vce here.
6948 		 */
6949 		rdev->has_vce = 0;
6950 		return;
6951 	}
6952 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6953 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6954 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6955 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6956 }
6957 
6958 static void si_vce_start(struct radeon_device *rdev)
6959 {
6960 	int r;
6961 
6962 	if (!rdev->has_vce)
6963 		return;
6964 
6965 	r = radeon_vce_resume(rdev);
6966 	if (r) {
6967 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6968 		goto error;
6969 	}
6970 	r = vce_v1_0_resume(rdev);
6971 	if (r) {
6972 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6973 		goto error;
6974 	}
6975 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6976 	if (r) {
6977 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6978 		goto error;
6979 	}
6980 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6981 	if (r) {
6982 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6983 		goto error;
6984 	}
6985 	return;
6986 
6987 error:
6988 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6989 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6990 }
6991 
6992 static void si_vce_resume(struct radeon_device *rdev)
6993 {
6994 	struct radeon_ring *ring;
6995 	int r;
6996 
6997 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6998 		return;
6999 
7000 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7001 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7002 	if (r) {
7003 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7004 		return;
7005 	}
7006 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7007 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7008 	if (r) {
7009 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7010 		return;
7011 	}
7012 	r = vce_v1_0_init(rdev);
7013 	if (r) {
7014 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7015 		return;
7016 	}
7017 }
7018 
7019 static int si_startup(struct radeon_device *rdev)
7020 {
7021 	struct radeon_ring *ring;
7022 	int r;
7023 
7024 	/* enable pcie gen2/3 link */
7025 	si_pcie_gen3_enable(rdev);
7026 	/* enable aspm */
7027 	si_program_aspm(rdev);
7028 
7029 	/* scratch needs to be initialized before MC */
7030 	r = r600_vram_scratch_init(rdev);
7031 	if (r)
7032 		return r;
7033 
7034 	si_mc_program(rdev);
7035 
7036 	if (!rdev->pm.dpm_enabled) {
7037 		r = si_mc_load_microcode(rdev);
7038 		if (r) {
7039 			DRM_ERROR("Failed to load MC firmware!\n");
7040 			return r;
7041 		}
7042 	}
7043 
7044 	r = si_pcie_gart_enable(rdev);
7045 	if (r)
7046 		return r;
7047 	si_gpu_init(rdev);
7048 
7049 	/* allocate rlc buffers */
7050 	if (rdev->family == CHIP_VERDE) {
7051 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7052 		rdev->rlc.reg_list_size =
7053 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7054 	}
7055 	rdev->rlc.cs_data = si_cs_data;
7056 	r = sumo_rlc_init(rdev);
7057 	if (r) {
7058 		DRM_ERROR("Failed to init rlc BOs!\n");
7059 		return r;
7060 	}
7061 
7062 	/* allocate wb buffer */
7063 	r = radeon_wb_init(rdev);
7064 	if (r)
7065 		return r;
7066 
7067 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7068 	if (r) {
7069 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7070 		return r;
7071 	}
7072 
7073 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7074 	if (r) {
7075 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7076 		return r;
7077 	}
7078 
7079 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7080 	if (r) {
7081 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7082 		return r;
7083 	}
7084 
7085 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7086 	if (r) {
7087 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7088 		return r;
7089 	}
7090 
7091 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7092 	if (r) {
7093 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7094 		return r;
7095 	}
7096 
7097 	si_uvd_start(rdev);
7098 	si_vce_start(rdev);
7099 
7100 	/* Enable IRQ */
7101 	if (!rdev->irq.installed) {
7102 		r = radeon_irq_kms_init(rdev);
7103 		if (r)
7104 			return r;
7105 	}
7106 
7107 	r = si_irq_init(rdev);
7108 	if (r) {
7109 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7110 		radeon_irq_kms_fini(rdev);
7111 		return r;
7112 	}
7113 	si_irq_set(rdev);
7114 
7115 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7116 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7117 			     RADEON_CP_PACKET2);
7118 	if (r)
7119 		return r;
7120 
7121 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7122 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7123 			     RADEON_CP_PACKET2);
7124 	if (r)
7125 		return r;
7126 
7127 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7128 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7129 			     RADEON_CP_PACKET2);
7130 	if (r)
7131 		return r;
7132 
7133 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7134 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7135 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7136 	if (r)
7137 		return r;
7138 
7139 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7140 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7141 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7142 	if (r)
7143 		return r;
7144 
7145 	r = si_cp_load_microcode(rdev);
7146 	if (r)
7147 		return r;
7148 	r = si_cp_resume(rdev);
7149 	if (r)
7150 		return r;
7151 
7152 	r = cayman_dma_resume(rdev);
7153 	if (r)
7154 		return r;
7155 
7156 	si_uvd_resume(rdev);
7157 	si_vce_resume(rdev);
7158 
7159 	r = radeon_ib_pool_init(rdev);
7160 	if (r) {
7161 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7162 		return r;
7163 	}
7164 
7165 	r = radeon_vm_manager_init(rdev);
7166 	if (r) {
7167 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7168 		return r;
7169 	}
7170 
7171 	r = radeon_audio_init(rdev);
7172 	if (r)
7173 		return r;
7174 
7175 	return 0;
7176 }
7177 
7178 int si_resume(struct radeon_device *rdev)
7179 {
7180 	int r;
7181 
7182 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7183 	 * posting will perform necessary task to bring back GPU into good
7184 	 * shape.
7185 	 */
7186 	/* post card */
7187 	atom_asic_init(rdev->mode_info.atom_context);
7188 
7189 	/* init golden registers */
7190 	si_init_golden_registers(rdev);
7191 
7192 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7193 		radeon_pm_resume(rdev);
7194 
7195 	rdev->accel_working = true;
7196 	r = si_startup(rdev);
7197 	if (r) {
7198 		DRM_ERROR("si startup failed on resume\n");
7199 		rdev->accel_working = false;
7200 		return r;
7201 	}
7202 
7203 	return r;
7204 
7205 }
7206 
7207 int si_suspend(struct radeon_device *rdev)
7208 {
7209 	radeon_pm_suspend(rdev);
7210 	radeon_audio_fini(rdev);
7211 	radeon_vm_manager_fini(rdev);
7212 	si_cp_enable(rdev, false);
7213 	cayman_dma_stop(rdev);
7214 	if (rdev->has_uvd) {
7215 		uvd_v1_0_fini(rdev);
7216 		radeon_uvd_suspend(rdev);
7217 	}
7218 	if (rdev->has_vce)
7219 		radeon_vce_suspend(rdev);
7220 	si_fini_pg(rdev);
7221 	si_fini_cg(rdev);
7222 	si_irq_suspend(rdev);
7223 	radeon_wb_disable(rdev);
7224 	si_pcie_gart_disable(rdev);
7225 	return 0;
7226 }
7227 
7228 /* Plan is to move initialization in that function and use
7229  * helper function so that radeon_device_init pretty much
7230  * do nothing more than calling asic specific function. This
7231  * should also allow to remove a bunch of callback function
7232  * like vram_info.
7233  */
7234 int si_init(struct radeon_device *rdev)
7235 {
7236 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7237 	int r;
7238 
7239 	/* Read BIOS */
7240 	if (!radeon_get_bios(rdev)) {
7241 		if (ASIC_IS_AVIVO(rdev))
7242 			return -EINVAL;
7243 	}
7244 	/* Must be an ATOMBIOS */
7245 	if (!rdev->is_atom_bios) {
7246 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7247 		return -EINVAL;
7248 	}
7249 	r = radeon_atombios_init(rdev);
7250 	if (r)
7251 		return r;
7252 
7253 	/* Post card if necessary */
7254 	if (!radeon_card_posted(rdev)) {
7255 		if (!rdev->bios) {
7256 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7257 			return -EINVAL;
7258 		}
7259 		DRM_INFO("GPU not posted. posting now...\n");
7260 		atom_asic_init(rdev->mode_info.atom_context);
7261 	}
7262 	/* init golden registers */
7263 	si_init_golden_registers(rdev);
7264 	/* Initialize scratch registers */
7265 	si_scratch_init(rdev);
7266 	/* Initialize surface registers */
7267 	radeon_surface_init(rdev);
7268 	/* Initialize clocks */
7269 	radeon_get_clock_info(rdev->ddev);
7270 
7271 	/* Fence driver */
7272 	r = radeon_fence_driver_init(rdev);
7273 	if (r)
7274 		return r;
7275 
7276 	/* initialize memory controller */
7277 	r = si_mc_init(rdev);
7278 	if (r)
7279 		return r;
7280 	/* Memory manager */
7281 	r = radeon_bo_init(rdev);
7282 	if (r)
7283 		return r;
7284 
7285 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7286 	    !rdev->rlc_fw || !rdev->mc_fw) {
7287 		r = si_init_microcode(rdev);
7288 		if (r) {
7289 			DRM_ERROR("Failed to load firmware!\n");
7290 			return r;
7291 		}
7292 	}
7293 
7294 	/* Initialize power management */
7295 	radeon_pm_init(rdev);
7296 
7297 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7298 	ring->ring_obj = NULL;
7299 	r600_ring_init(rdev, ring, 1024 * 1024);
7300 
7301 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7302 	ring->ring_obj = NULL;
7303 	r600_ring_init(rdev, ring, 1024 * 1024);
7304 
7305 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7306 	ring->ring_obj = NULL;
7307 	r600_ring_init(rdev, ring, 1024 * 1024);
7308 
7309 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7310 	ring->ring_obj = NULL;
7311 	r600_ring_init(rdev, ring, 64 * 1024);
7312 
7313 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7314 	ring->ring_obj = NULL;
7315 	r600_ring_init(rdev, ring, 64 * 1024);
7316 
7317 	si_uvd_init(rdev);
7318 	si_vce_init(rdev);
7319 
7320 	rdev->ih.ring_obj = NULL;
7321 	r600_ih_ring_init(rdev, 64 * 1024);
7322 
7323 	r = r600_pcie_gart_init(rdev);
7324 	if (r)
7325 		return r;
7326 
7327 	rdev->accel_working = true;
7328 	r = si_startup(rdev);
7329 	if (r) {
7330 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7331 		si_cp_fini(rdev);
7332 		cayman_dma_fini(rdev);
7333 		si_irq_fini(rdev);
7334 		sumo_rlc_fini(rdev);
7335 		radeon_wb_fini(rdev);
7336 		radeon_ib_pool_fini(rdev);
7337 		radeon_vm_manager_fini(rdev);
7338 		radeon_irq_kms_fini(rdev);
7339 		si_pcie_gart_fini(rdev);
7340 		rdev->accel_working = false;
7341 	}
7342 
7343 	/* Don't start up if the MC ucode is missing.
7344 	 * The default clocks and voltages before the MC ucode
7345 	 * is loaded are not suffient for advanced operations.
7346 	 */
7347 	if (!rdev->mc_fw) {
7348 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7349 		return -EINVAL;
7350 	}
7351 
7352 	return 0;
7353 }
7354 
7355 void si_fini(struct radeon_device *rdev)
7356 {
7357 	radeon_pm_fini(rdev);
7358 	si_cp_fini(rdev);
7359 	cayman_dma_fini(rdev);
7360 	si_fini_pg(rdev);
7361 	si_fini_cg(rdev);
7362 	si_irq_fini(rdev);
7363 	sumo_rlc_fini(rdev);
7364 	radeon_wb_fini(rdev);
7365 	radeon_vm_manager_fini(rdev);
7366 	radeon_ib_pool_fini(rdev);
7367 	radeon_irq_kms_fini(rdev);
7368 	if (rdev->has_uvd) {
7369 		uvd_v1_0_fini(rdev);
7370 		radeon_uvd_fini(rdev);
7371 	}
7372 	if (rdev->has_vce)
7373 		radeon_vce_fini(rdev);
7374 	si_pcie_gart_fini(rdev);
7375 	r600_vram_scratch_fini(rdev);
7376 	radeon_gem_fini(rdev);
7377 	radeon_fence_driver_fini(rdev);
7378 	radeon_bo_fini(rdev);
7379 	radeon_atombios_fini(rdev);
7380 	kfree(rdev->bios);
7381 	rdev->bios = NULL;
7382 }
7383 
7384 /**
7385  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7386  *
7387  * @rdev: radeon_device pointer
7388  *
7389  * Fetches a GPU clock counter snapshot (SI).
7390  * Returns the 64 bit clock counter snapshot.
7391  */
7392 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7393 {
7394 	uint64_t clock;
7395 
7396 	mutex_lock(&rdev->gpu_clock_mutex);
7397 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7398 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7399 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7400 	mutex_unlock(&rdev->gpu_clock_mutex);
7401 	return clock;
7402 }
7403 
7404 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7405 {
7406 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7407 	int r;
7408 
7409 	/* bypass vclk and dclk with bclk */
7410 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7411 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7412 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7413 
7414 	/* put PLL in bypass mode */
7415 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7416 
7417 	if (!vclk || !dclk) {
7418 		/* keep the Bypass mode */
7419 		return 0;
7420 	}
7421 
7422 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7423 					  16384, 0x03FFFFFF, 0, 128, 5,
7424 					  &fb_div, &vclk_div, &dclk_div);
7425 	if (r)
7426 		return r;
7427 
7428 	/* set RESET_ANTI_MUX to 0 */
7429 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7430 
7431 	/* set VCO_MODE to 1 */
7432 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7433 
7434 	/* disable sleep mode */
7435 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7436 
7437 	/* deassert UPLL_RESET */
7438 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7439 
7440 	mdelay(1);
7441 
7442 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7443 	if (r)
7444 		return r;
7445 
7446 	/* assert UPLL_RESET again */
7447 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7448 
7449 	/* disable spread spectrum. */
7450 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7451 
7452 	/* set feedback divider */
7453 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7454 
7455 	/* set ref divider to 0 */
7456 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7457 
7458 	if (fb_div < 307200)
7459 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7460 	else
7461 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7462 
7463 	/* set PDIV_A and PDIV_B */
7464 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7465 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7466 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7467 
7468 	/* give the PLL some time to settle */
7469 	mdelay(15);
7470 
7471 	/* deassert PLL_RESET */
7472 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7473 
7474 	mdelay(15);
7475 
7476 	/* switch from bypass mode to normal mode */
7477 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7478 
7479 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7480 	if (r)
7481 		return r;
7482 
7483 	/* switch VCLK and DCLK selection */
7484 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7485 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7486 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7487 
7488 	mdelay(100);
7489 
7490 	return 0;
7491 }
7492 
7493 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7494 {
7495 	struct pci_dev *root = rdev->pdev->bus->self;
7496 	int bridge_pos, gpu_pos;
7497 	u32 speed_cntl, mask, current_data_rate;
7498 	int ret, i;
7499 	u16 tmp16;
7500 
7501 	if (pci_is_root_bus(rdev->pdev->bus))
7502 		return;
7503 
7504 	if (radeon_pcie_gen2 == 0)
7505 		return;
7506 
7507 	if (rdev->flags & RADEON_IS_IGP)
7508 		return;
7509 
7510 	if (!(rdev->flags & RADEON_IS_PCIE))
7511 		return;
7512 
7513 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7514 	if (ret != 0)
7515 		return;
7516 
7517 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7518 		return;
7519 
7520 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7521 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7522 		LC_CURRENT_DATA_RATE_SHIFT;
7523 	if (mask & DRM_PCIE_SPEED_80) {
7524 		if (current_data_rate == 2) {
7525 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7526 			return;
7527 		}
7528 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7529 	} else if (mask & DRM_PCIE_SPEED_50) {
7530 		if (current_data_rate == 1) {
7531 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7532 			return;
7533 		}
7534 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7535 	}
7536 
7537 	bridge_pos = pci_pcie_cap(root);
7538 	if (!bridge_pos)
7539 		return;
7540 
7541 	gpu_pos = pci_pcie_cap(rdev->pdev);
7542 	if (!gpu_pos)
7543 		return;
7544 
7545 	if (mask & DRM_PCIE_SPEED_80) {
7546 		/* re-try equalization if gen3 is not already enabled */
7547 		if (current_data_rate != 2) {
7548 			u16 bridge_cfg, gpu_cfg;
7549 			u16 bridge_cfg2, gpu_cfg2;
7550 			u32 max_lw, current_lw, tmp;
7551 
7552 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7553 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7554 
7555 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7556 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7557 
7558 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7559 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7560 
7561 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7562 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7563 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7564 
7565 			if (current_lw < max_lw) {
7566 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7567 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7568 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7569 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7570 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7571 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7572 				}
7573 			}
7574 
7575 			for (i = 0; i < 10; i++) {
7576 				/* check status */
7577 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7578 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7579 					break;
7580 
7581 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7582 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7583 
7584 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7585 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7586 
7587 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7588 				tmp |= LC_SET_QUIESCE;
7589 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7590 
7591 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7592 				tmp |= LC_REDO_EQ;
7593 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7594 
7595 				mdelay(100);
7596 
7597 				/* linkctl */
7598 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7599 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7600 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7601 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7602 
7603 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7604 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7605 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7606 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7607 
7608 				/* linkctl2 */
7609 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7610 				tmp16 &= ~((1 << 4) | (7 << 9));
7611 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7612 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7613 
7614 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7615 				tmp16 &= ~((1 << 4) | (7 << 9));
7616 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7617 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7618 
7619 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7620 				tmp &= ~LC_SET_QUIESCE;
7621 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7622 			}
7623 		}
7624 	}
7625 
7626 	/* set the link speed */
7627 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7628 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7629 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7630 
7631 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7632 	tmp16 &= ~0xf;
7633 	if (mask & DRM_PCIE_SPEED_80)
7634 		tmp16 |= 3; /* gen3 */
7635 	else if (mask & DRM_PCIE_SPEED_50)
7636 		tmp16 |= 2; /* gen2 */
7637 	else
7638 		tmp16 |= 1; /* gen1 */
7639 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7640 
7641 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7642 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7643 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7644 
7645 	for (i = 0; i < rdev->usec_timeout; i++) {
7646 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7647 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7648 			break;
7649 		udelay(1);
7650 	}
7651 }
7652 
7653 static void si_program_aspm(struct radeon_device *rdev)
7654 {
7655 	u32 data, orig;
7656 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7657 	bool disable_clkreq = false;
7658 
7659 	if (radeon_aspm == 0)
7660 		return;
7661 
7662 	if (!(rdev->flags & RADEON_IS_PCIE))
7663 		return;
7664 
7665 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7666 	data &= ~LC_XMIT_N_FTS_MASK;
7667 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7668 	if (orig != data)
7669 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7670 
7671 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7672 	data |= LC_GO_TO_RECOVERY;
7673 	if (orig != data)
7674 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7675 
7676 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7677 	data |= P_IGNORE_EDB_ERR;
7678 	if (orig != data)
7679 		WREG32_PCIE(PCIE_P_CNTL, data);
7680 
7681 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7682 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7683 	data |= LC_PMI_TO_L1_DIS;
7684 	if (!disable_l0s)
7685 		data |= LC_L0S_INACTIVITY(7);
7686 
7687 	if (!disable_l1) {
7688 		data |= LC_L1_INACTIVITY(7);
7689 		data &= ~LC_PMI_TO_L1_DIS;
7690 		if (orig != data)
7691 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7692 
7693 		if (!disable_plloff_in_l1) {
7694 			bool clk_req_support;
7695 
7696 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7697 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7698 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7699 			if (orig != data)
7700 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7701 
7702 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7703 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7704 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7705 			if (orig != data)
7706 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7707 
7708 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7709 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7710 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7711 			if (orig != data)
7712 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7713 
7714 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7715 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7716 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7717 			if (orig != data)
7718 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7719 
7720 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7721 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7722 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7723 				if (orig != data)
7724 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7725 
7726 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7727 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7728 				if (orig != data)
7729 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7730 
7731 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7732 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7733 				if (orig != data)
7734 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7735 
7736 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7737 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7738 				if (orig != data)
7739 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7740 
7741 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7742 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7743 				if (orig != data)
7744 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7745 
7746 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7747 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7748 				if (orig != data)
7749 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7750 
7751 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7752 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7753 				if (orig != data)
7754 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7755 
7756 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7757 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7758 				if (orig != data)
7759 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7760 			}
7761 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7762 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7763 			data |= LC_DYN_LANES_PWR_STATE(3);
7764 			if (orig != data)
7765 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7766 
7767 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7768 			data &= ~LS2_EXIT_TIME_MASK;
7769 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7770 				data |= LS2_EXIT_TIME(5);
7771 			if (orig != data)
7772 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7773 
7774 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7775 			data &= ~LS2_EXIT_TIME_MASK;
7776 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7777 				data |= LS2_EXIT_TIME(5);
7778 			if (orig != data)
7779 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7780 
7781 			if (!disable_clkreq &&
7782 			    !pci_is_root_bus(rdev->pdev->bus)) {
7783 				struct pci_dev *root = rdev->pdev->bus->self;
7784 				u32 lnkcap;
7785 
7786 				clk_req_support = false;
7787 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7788 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7789 					clk_req_support = true;
7790 			} else {
7791 				clk_req_support = false;
7792 			}
7793 
7794 			if (clk_req_support) {
7795 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7796 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7797 				if (orig != data)
7798 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7799 
7800 				orig = data = RREG32(THM_CLK_CNTL);
7801 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7802 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7803 				if (orig != data)
7804 					WREG32(THM_CLK_CNTL, data);
7805 
7806 				orig = data = RREG32(MISC_CLK_CNTL);
7807 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7808 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7809 				if (orig != data)
7810 					WREG32(MISC_CLK_CNTL, data);
7811 
7812 				orig = data = RREG32(CG_CLKPIN_CNTL);
7813 				data &= ~BCLK_AS_XCLK;
7814 				if (orig != data)
7815 					WREG32(CG_CLKPIN_CNTL, data);
7816 
7817 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7818 				data &= ~FORCE_BIF_REFCLK_EN;
7819 				if (orig != data)
7820 					WREG32(CG_CLKPIN_CNTL_2, data);
7821 
7822 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7823 				data &= ~MPLL_CLKOUT_SEL_MASK;
7824 				data |= MPLL_CLKOUT_SEL(4);
7825 				if (orig != data)
7826 					WREG32(MPLL_BYPASSCLK_SEL, data);
7827 
7828 				orig = data = RREG32(SPLL_CNTL_MODE);
7829 				data &= ~SPLL_REFCLK_SEL_MASK;
7830 				if (orig != data)
7831 					WREG32(SPLL_CNTL_MODE, data);
7832 			}
7833 		}
7834 	} else {
7835 		if (orig != data)
7836 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7837 	}
7838 
7839 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7840 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7841 	if (orig != data)
7842 		WREG32_PCIE(PCIE_CNTL2, data);
7843 
7844 	if (!disable_l0s) {
7845 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7846 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7847 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7848 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7849 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7850 				data &= ~LC_L0S_INACTIVITY_MASK;
7851 				if (orig != data)
7852 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7853 			}
7854 		}
7855 	}
7856 }
7857 
7858 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7859 {
7860 	unsigned i;
7861 
7862 	/* make sure VCEPLL_CTLREQ is deasserted */
7863 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7864 
7865 	mdelay(10);
7866 
7867 	/* assert UPLL_CTLREQ */
7868 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7869 
7870 	/* wait for CTLACK and CTLACK2 to get asserted */
7871 	for (i = 0; i < 100; ++i) {
7872 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7873 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7874 			break;
7875 		mdelay(10);
7876 	}
7877 
7878 	/* deassert UPLL_CTLREQ */
7879 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7880 
7881 	if (i == 100) {
7882 		DRM_ERROR("Timeout setting UVD clocks!\n");
7883 		return -ETIMEDOUT;
7884 	}
7885 
7886 	return 0;
7887 }
7888 
7889 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7890 {
7891 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7892 	int r;
7893 
7894 	/* bypass evclk and ecclk with bclk */
7895 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7896 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7897 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7898 
7899 	/* put PLL in bypass mode */
7900 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7901 		     ~VCEPLL_BYPASS_EN_MASK);
7902 
7903 	if (!evclk || !ecclk) {
7904 		/* keep the Bypass mode, put PLL to sleep */
7905 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7906 			     ~VCEPLL_SLEEP_MASK);
7907 		return 0;
7908 	}
7909 
7910 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7911 					  16384, 0x03FFFFFF, 0, 128, 5,
7912 					  &fb_div, &evclk_div, &ecclk_div);
7913 	if (r)
7914 		return r;
7915 
7916 	/* set RESET_ANTI_MUX to 0 */
7917 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7918 
7919 	/* set VCO_MODE to 1 */
7920 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7921 		     ~VCEPLL_VCO_MODE_MASK);
7922 
7923 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7924 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7925 		     ~VCEPLL_SLEEP_MASK);
7926 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7927 
7928 	/* deassert VCEPLL_RESET */
7929 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7930 
7931 	mdelay(1);
7932 
7933 	r = si_vce_send_vcepll_ctlreq(rdev);
7934 	if (r)
7935 		return r;
7936 
7937 	/* assert VCEPLL_RESET again */
7938 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7939 
7940 	/* disable spread spectrum. */
7941 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7942 
7943 	/* set feedback divider */
7944 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7945 
7946 	/* set ref divider to 0 */
7947 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7948 
7949 	/* set PDIV_A and PDIV_B */
7950 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7951 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7952 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7953 
7954 	/* give the PLL some time to settle */
7955 	mdelay(15);
7956 
7957 	/* deassert PLL_RESET */
7958 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7959 
7960 	mdelay(15);
7961 
7962 	/* switch from bypass mode to normal mode */
7963 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7964 
7965 	r = si_vce_send_vcepll_ctlreq(rdev);
7966 	if (r)
7967 		return r;
7968 
7969 	/* switch VCLK and DCLK selection */
7970 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7971 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7972 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7973 
7974 	mdelay(100);
7975 
7976 	return 0;
7977 }
7978