xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision df3305156f989339529b3d6744b898d498fb1f7b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83 
84 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85 MODULE_FIRMWARE("radeon/OLAND_me.bin");
86 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91 
92 MODULE_FIRMWARE("radeon/oland_pfp.bin");
93 MODULE_FIRMWARE("radeon/oland_me.bin");
94 MODULE_FIRMWARE("radeon/oland_ce.bin");
95 MODULE_FIRMWARE("radeon/oland_mc.bin");
96 MODULE_FIRMWARE("radeon/oland_rlc.bin");
97 MODULE_FIRMWARE("radeon/oland_smc.bin");
98 
99 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106 
107 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108 MODULE_FIRMWARE("radeon/hainan_me.bin");
109 MODULE_FIRMWARE("radeon/hainan_ce.bin");
110 MODULE_FIRMWARE("radeon/hainan_mc.bin");
111 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112 MODULE_FIRMWARE("radeon/hainan_smc.bin");
113 
114 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115 static void si_pcie_gen3_enable(struct radeon_device *rdev);
116 static void si_program_aspm(struct radeon_device *rdev);
117 extern void sumo_rlc_fini(struct radeon_device *rdev);
118 extern int sumo_rlc_init(struct radeon_device *rdev);
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128 					 bool enable);
129 static void si_init_pg(struct radeon_device *rdev);
130 static void si_init_cg(struct radeon_device *rdev);
131 static void si_fini_pg(struct radeon_device *rdev);
132 static void si_fini_cg(struct radeon_device *rdev);
133 static void si_rlc_stop(struct radeon_device *rdev);
134 
135 static const u32 verde_rlc_save_restore_register_list[] =
136 {
137 	(0x8000 << 16) | (0x98f4 >> 2),
138 	0x00000000,
139 	(0x8040 << 16) | (0x98f4 >> 2),
140 	0x00000000,
141 	(0x8000 << 16) | (0xe80 >> 2),
142 	0x00000000,
143 	(0x8040 << 16) | (0xe80 >> 2),
144 	0x00000000,
145 	(0x8000 << 16) | (0x89bc >> 2),
146 	0x00000000,
147 	(0x8040 << 16) | (0x89bc >> 2),
148 	0x00000000,
149 	(0x8000 << 16) | (0x8c1c >> 2),
150 	0x00000000,
151 	(0x8040 << 16) | (0x8c1c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x98f0 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0xe7c >> 2),
156 	0x00000000,
157 	(0x8000 << 16) | (0x9148 >> 2),
158 	0x00000000,
159 	(0x8040 << 16) | (0x9148 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9150 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x897c >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x8d8c >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0xac54 >> 2),
168 	0X00000000,
169 	0x3,
170 	(0x9c00 << 16) | (0x98f8 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9910 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9914 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9918 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x991c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9920 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9924 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9928 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x992c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9930 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x9934 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9938 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x993c >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9940 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x9944 >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9948 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x994c >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9950 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x9954 >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9958 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x995c >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9960 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9964 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9968 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x996c >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9970 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9974 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9978 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x997c >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9980 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9984 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x9988 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x998c >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x8c00 >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x8c14 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x8c04 >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8c08 >> 2),
243 	0x00000000,
244 	(0x8000 << 16) | (0x9b7c >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0x9b7c >> 2),
247 	0x00000000,
248 	(0x8000 << 16) | (0xe84 >> 2),
249 	0x00000000,
250 	(0x8040 << 16) | (0xe84 >> 2),
251 	0x00000000,
252 	(0x8000 << 16) | (0x89c0 >> 2),
253 	0x00000000,
254 	(0x8040 << 16) | (0x89c0 >> 2),
255 	0x00000000,
256 	(0x8000 << 16) | (0x914c >> 2),
257 	0x00000000,
258 	(0x8040 << 16) | (0x914c >> 2),
259 	0x00000000,
260 	(0x8000 << 16) | (0x8c20 >> 2),
261 	0x00000000,
262 	(0x8040 << 16) | (0x8c20 >> 2),
263 	0x00000000,
264 	(0x8000 << 16) | (0x9354 >> 2),
265 	0x00000000,
266 	(0x8040 << 16) | (0x9354 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x9060 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x9364 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x9100 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x913c >> 2),
275 	0x00000000,
276 	(0x8000 << 16) | (0x90e0 >> 2),
277 	0x00000000,
278 	(0x8000 << 16) | (0x90e4 >> 2),
279 	0x00000000,
280 	(0x8000 << 16) | (0x90e8 >> 2),
281 	0x00000000,
282 	(0x8040 << 16) | (0x90e0 >> 2),
283 	0x00000000,
284 	(0x8040 << 16) | (0x90e4 >> 2),
285 	0x00000000,
286 	(0x8040 << 16) | (0x90e8 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8bcc >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x8b24 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x88c4 >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x8e50 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0x8c0c >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x8e58 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x8e5c >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0x9508 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0x950c >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0x9494 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0xac0c >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0xac10 >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0xac14 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0xae00 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0xac08 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x88d4 >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x88c8 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x88cc >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x89b0 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x8b10 >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x8a14 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x9830 >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x9834 >> 2),
333 	0x00000000,
334 	(0x9c00 << 16) | (0x9838 >> 2),
335 	0x00000000,
336 	(0x9c00 << 16) | (0x9a10 >> 2),
337 	0x00000000,
338 	(0x8000 << 16) | (0x9870 >> 2),
339 	0x00000000,
340 	(0x8000 << 16) | (0x9874 >> 2),
341 	0x00000000,
342 	(0x8001 << 16) | (0x9870 >> 2),
343 	0x00000000,
344 	(0x8001 << 16) | (0x9874 >> 2),
345 	0x00000000,
346 	(0x8040 << 16) | (0x9870 >> 2),
347 	0x00000000,
348 	(0x8040 << 16) | (0x9874 >> 2),
349 	0x00000000,
350 	(0x8041 << 16) | (0x9870 >> 2),
351 	0x00000000,
352 	(0x8041 << 16) | (0x9874 >> 2),
353 	0x00000000,
354 	0x00000000
355 };
356 
357 static const u32 tahiti_golden_rlc_registers[] =
358 {
359 	0xc424, 0xffffffff, 0x00601005,
360 	0xc47c, 0xffffffff, 0x10104040,
361 	0xc488, 0xffffffff, 0x0100000a,
362 	0xc314, 0xffffffff, 0x00000800,
363 	0xc30c, 0xffffffff, 0x800000f4,
364 	0xf4a8, 0xffffffff, 0x00000000
365 };
366 
367 static const u32 tahiti_golden_registers[] =
368 {
369 	0x9a10, 0x00010000, 0x00018208,
370 	0x9830, 0xffffffff, 0x00000000,
371 	0x9834, 0xf00fffff, 0x00000400,
372 	0x9838, 0x0002021c, 0x00020200,
373 	0xc78, 0x00000080, 0x00000000,
374 	0xd030, 0x000300c0, 0x00800040,
375 	0xd830, 0x000300c0, 0x00800040,
376 	0x5bb0, 0x000000f0, 0x00000070,
377 	0x5bc0, 0x00200000, 0x50100000,
378 	0x7030, 0x31000311, 0x00000011,
379 	0x277c, 0x00000003, 0x000007ff,
380 	0x240c, 0x000007ff, 0x00000000,
381 	0x8a14, 0xf000001f, 0x00000007,
382 	0x8b24, 0xffffffff, 0x00ffffff,
383 	0x8b10, 0x0000ff0f, 0x00000000,
384 	0x28a4c, 0x07ffffff, 0x4e000000,
385 	0x28350, 0x3f3f3fff, 0x2a00126a,
386 	0x30, 0x000000ff, 0x0040,
387 	0x34, 0x00000040, 0x00004040,
388 	0x9100, 0x07ffffff, 0x03000000,
389 	0x8e88, 0x01ff1f3f, 0x00000000,
390 	0x8e84, 0x01ff1f3f, 0x00000000,
391 	0x9060, 0x0000007f, 0x00000020,
392 	0x9508, 0x00010000, 0x00010000,
393 	0xac14, 0x00000200, 0x000002fb,
394 	0xac10, 0xffffffff, 0x0000543b,
395 	0xac0c, 0xffffffff, 0xa9210876,
396 	0x88d0, 0xffffffff, 0x000fff40,
397 	0x88d4, 0x0000001f, 0x00000010,
398 	0x1410, 0x20000000, 0x20fffed8,
399 	0x15c0, 0x000c0fc0, 0x000c0400
400 };
401 
402 static const u32 tahiti_golden_registers2[] =
403 {
404 	0xc64, 0x00000001, 0x00000001
405 };
406 
407 static const u32 pitcairn_golden_rlc_registers[] =
408 {
409 	0xc424, 0xffffffff, 0x00601004,
410 	0xc47c, 0xffffffff, 0x10102020,
411 	0xc488, 0xffffffff, 0x01000020,
412 	0xc314, 0xffffffff, 0x00000800,
413 	0xc30c, 0xffffffff, 0x800000a4
414 };
415 
416 static const u32 pitcairn_golden_registers[] =
417 {
418 	0x9a10, 0x00010000, 0x00018208,
419 	0x9830, 0xffffffff, 0x00000000,
420 	0x9834, 0xf00fffff, 0x00000400,
421 	0x9838, 0x0002021c, 0x00020200,
422 	0xc78, 0x00000080, 0x00000000,
423 	0xd030, 0x000300c0, 0x00800040,
424 	0xd830, 0x000300c0, 0x00800040,
425 	0x5bb0, 0x000000f0, 0x00000070,
426 	0x5bc0, 0x00200000, 0x50100000,
427 	0x7030, 0x31000311, 0x00000011,
428 	0x2ae4, 0x00073ffe, 0x000022a2,
429 	0x240c, 0x000007ff, 0x00000000,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8b24, 0xffffffff, 0x00ffffff,
432 	0x8b10, 0x0000ff0f, 0x00000000,
433 	0x28a4c, 0x07ffffff, 0x4e000000,
434 	0x28350, 0x3f3f3fff, 0x2a00126a,
435 	0x30, 0x000000ff, 0x0040,
436 	0x34, 0x00000040, 0x00004040,
437 	0x9100, 0x07ffffff, 0x03000000,
438 	0x9060, 0x0000007f, 0x00000020,
439 	0x9508, 0x00010000, 0x00010000,
440 	0xac14, 0x000003ff, 0x000000f7,
441 	0xac10, 0xffffffff, 0x00000000,
442 	0xac0c, 0xffffffff, 0x32761054,
443 	0x88d4, 0x0000001f, 0x00000010,
444 	0x15c0, 0x000c0fc0, 0x000c0400
445 };
446 
447 static const u32 verde_golden_rlc_registers[] =
448 {
449 	0xc424, 0xffffffff, 0x033f1005,
450 	0xc47c, 0xffffffff, 0x10808020,
451 	0xc488, 0xffffffff, 0x00800008,
452 	0xc314, 0xffffffff, 0x00001000,
453 	0xc30c, 0xffffffff, 0x80010014
454 };
455 
456 static const u32 verde_golden_registers[] =
457 {
458 	0x9a10, 0x00010000, 0x00018208,
459 	0x9830, 0xffffffff, 0x00000000,
460 	0x9834, 0xf00fffff, 0x00000400,
461 	0x9838, 0x0002021c, 0x00020200,
462 	0xc78, 0x00000080, 0x00000000,
463 	0xd030, 0x000300c0, 0x00800040,
464 	0xd030, 0x000300c0, 0x00800040,
465 	0xd830, 0x000300c0, 0x00800040,
466 	0xd830, 0x000300c0, 0x00800040,
467 	0x5bb0, 0x000000f0, 0x00000070,
468 	0x5bc0, 0x00200000, 0x50100000,
469 	0x7030, 0x31000311, 0x00000011,
470 	0x2ae4, 0x00073ffe, 0x000022a2,
471 	0x2ae4, 0x00073ffe, 0x000022a2,
472 	0x2ae4, 0x00073ffe, 0x000022a2,
473 	0x240c, 0x000007ff, 0x00000000,
474 	0x240c, 0x000007ff, 0x00000000,
475 	0x240c, 0x000007ff, 0x00000000,
476 	0x8a14, 0xf000001f, 0x00000007,
477 	0x8a14, 0xf000001f, 0x00000007,
478 	0x8a14, 0xf000001f, 0x00000007,
479 	0x8b24, 0xffffffff, 0x00ffffff,
480 	0x8b10, 0x0000ff0f, 0x00000000,
481 	0x28a4c, 0x07ffffff, 0x4e000000,
482 	0x28350, 0x3f3f3fff, 0x0000124a,
483 	0x28350, 0x3f3f3fff, 0x0000124a,
484 	0x28350, 0x3f3f3fff, 0x0000124a,
485 	0x30, 0x000000ff, 0x0040,
486 	0x34, 0x00000040, 0x00004040,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x9100, 0x07ffffff, 0x03000000,
489 	0x8e88, 0x01ff1f3f, 0x00000000,
490 	0x8e88, 0x01ff1f3f, 0x00000000,
491 	0x8e88, 0x01ff1f3f, 0x00000000,
492 	0x8e84, 0x01ff1f3f, 0x00000000,
493 	0x8e84, 0x01ff1f3f, 0x00000000,
494 	0x8e84, 0x01ff1f3f, 0x00000000,
495 	0x9060, 0x0000007f, 0x00000020,
496 	0x9508, 0x00010000, 0x00010000,
497 	0xac14, 0x000003ff, 0x00000003,
498 	0xac14, 0x000003ff, 0x00000003,
499 	0xac14, 0x000003ff, 0x00000003,
500 	0xac10, 0xffffffff, 0x00000000,
501 	0xac10, 0xffffffff, 0x00000000,
502 	0xac10, 0xffffffff, 0x00000000,
503 	0xac0c, 0xffffffff, 0x00001032,
504 	0xac0c, 0xffffffff, 0x00001032,
505 	0xac0c, 0xffffffff, 0x00001032,
506 	0x88d4, 0x0000001f, 0x00000010,
507 	0x88d4, 0x0000001f, 0x00000010,
508 	0x88d4, 0x0000001f, 0x00000010,
509 	0x15c0, 0x000c0fc0, 0x000c0400
510 };
511 
512 static const u32 oland_golden_rlc_registers[] =
513 {
514 	0xc424, 0xffffffff, 0x00601005,
515 	0xc47c, 0xffffffff, 0x10104040,
516 	0xc488, 0xffffffff, 0x0100000a,
517 	0xc314, 0xffffffff, 0x00000800,
518 	0xc30c, 0xffffffff, 0x800000f4
519 };
520 
521 static const u32 oland_golden_registers[] =
522 {
523 	0x9a10, 0x00010000, 0x00018208,
524 	0x9830, 0xffffffff, 0x00000000,
525 	0x9834, 0xf00fffff, 0x00000400,
526 	0x9838, 0x0002021c, 0x00020200,
527 	0xc78, 0x00000080, 0x00000000,
528 	0xd030, 0x000300c0, 0x00800040,
529 	0xd830, 0x000300c0, 0x00800040,
530 	0x5bb0, 0x000000f0, 0x00000070,
531 	0x5bc0, 0x00200000, 0x50100000,
532 	0x7030, 0x31000311, 0x00000011,
533 	0x2ae4, 0x00073ffe, 0x000022a2,
534 	0x240c, 0x000007ff, 0x00000000,
535 	0x8a14, 0xf000001f, 0x00000007,
536 	0x8b24, 0xffffffff, 0x00ffffff,
537 	0x8b10, 0x0000ff0f, 0x00000000,
538 	0x28a4c, 0x07ffffff, 0x4e000000,
539 	0x28350, 0x3f3f3fff, 0x00000082,
540 	0x30, 0x000000ff, 0x0040,
541 	0x34, 0x00000040, 0x00004040,
542 	0x9100, 0x07ffffff, 0x03000000,
543 	0x9060, 0x0000007f, 0x00000020,
544 	0x9508, 0x00010000, 0x00010000,
545 	0xac14, 0x000003ff, 0x000000f3,
546 	0xac10, 0xffffffff, 0x00000000,
547 	0xac0c, 0xffffffff, 0x00003210,
548 	0x88d4, 0x0000001f, 0x00000010,
549 	0x15c0, 0x000c0fc0, 0x000c0400
550 };
551 
552 static const u32 hainan_golden_registers[] =
553 {
554 	0x9a10, 0x00010000, 0x00018208,
555 	0x9830, 0xffffffff, 0x00000000,
556 	0x9834, 0xf00fffff, 0x00000400,
557 	0x9838, 0x0002021c, 0x00020200,
558 	0xd0c0, 0xff000fff, 0x00000100,
559 	0xd030, 0x000300c0, 0x00800040,
560 	0xd8c0, 0xff000fff, 0x00000100,
561 	0xd830, 0x000300c0, 0x00800040,
562 	0x2ae4, 0x00073ffe, 0x000022a2,
563 	0x240c, 0x000007ff, 0x00000000,
564 	0x8a14, 0xf000001f, 0x00000007,
565 	0x8b24, 0xffffffff, 0x00ffffff,
566 	0x8b10, 0x0000ff0f, 0x00000000,
567 	0x28a4c, 0x07ffffff, 0x4e000000,
568 	0x28350, 0x3f3f3fff, 0x00000000,
569 	0x30, 0x000000ff, 0x0040,
570 	0x34, 0x00000040, 0x00004040,
571 	0x9100, 0x03e00000, 0x03600000,
572 	0x9060, 0x0000007f, 0x00000020,
573 	0x9508, 0x00010000, 0x00010000,
574 	0xac14, 0x000003ff, 0x000000f1,
575 	0xac10, 0xffffffff, 0x00000000,
576 	0xac0c, 0xffffffff, 0x00003210,
577 	0x88d4, 0x0000001f, 0x00000010,
578 	0x15c0, 0x000c0fc0, 0x000c0400
579 };
580 
581 static const u32 hainan_golden_registers2[] =
582 {
583 	0x98f8, 0xffffffff, 0x02010001
584 };
585 
586 static const u32 tahiti_mgcg_cgcg_init[] =
587 {
588 	0xc400, 0xffffffff, 0xfffffffc,
589 	0x802c, 0xffffffff, 0xe0000000,
590 	0x9a60, 0xffffffff, 0x00000100,
591 	0x92a4, 0xffffffff, 0x00000100,
592 	0xc164, 0xffffffff, 0x00000100,
593 	0x9774, 0xffffffff, 0x00000100,
594 	0x8984, 0xffffffff, 0x06000100,
595 	0x8a18, 0xffffffff, 0x00000100,
596 	0x92a0, 0xffffffff, 0x00000100,
597 	0xc380, 0xffffffff, 0x00000100,
598 	0x8b28, 0xffffffff, 0x00000100,
599 	0x9144, 0xffffffff, 0x00000100,
600 	0x8d88, 0xffffffff, 0x00000100,
601 	0x8d8c, 0xffffffff, 0x00000100,
602 	0x9030, 0xffffffff, 0x00000100,
603 	0x9034, 0xffffffff, 0x00000100,
604 	0x9038, 0xffffffff, 0x00000100,
605 	0x903c, 0xffffffff, 0x00000100,
606 	0xad80, 0xffffffff, 0x00000100,
607 	0xac54, 0xffffffff, 0x00000100,
608 	0x897c, 0xffffffff, 0x06000100,
609 	0x9868, 0xffffffff, 0x00000100,
610 	0x9510, 0xffffffff, 0x00000100,
611 	0xaf04, 0xffffffff, 0x00000100,
612 	0xae04, 0xffffffff, 0x00000100,
613 	0x949c, 0xffffffff, 0x00000100,
614 	0x802c, 0xffffffff, 0xe0000000,
615 	0x9160, 0xffffffff, 0x00010000,
616 	0x9164, 0xffffffff, 0x00030002,
617 	0x9168, 0xffffffff, 0x00040007,
618 	0x916c, 0xffffffff, 0x00060005,
619 	0x9170, 0xffffffff, 0x00090008,
620 	0x9174, 0xffffffff, 0x00020001,
621 	0x9178, 0xffffffff, 0x00040003,
622 	0x917c, 0xffffffff, 0x00000007,
623 	0x9180, 0xffffffff, 0x00060005,
624 	0x9184, 0xffffffff, 0x00090008,
625 	0x9188, 0xffffffff, 0x00030002,
626 	0x918c, 0xffffffff, 0x00050004,
627 	0x9190, 0xffffffff, 0x00000008,
628 	0x9194, 0xffffffff, 0x00070006,
629 	0x9198, 0xffffffff, 0x000a0009,
630 	0x919c, 0xffffffff, 0x00040003,
631 	0x91a0, 0xffffffff, 0x00060005,
632 	0x91a4, 0xffffffff, 0x00000009,
633 	0x91a8, 0xffffffff, 0x00080007,
634 	0x91ac, 0xffffffff, 0x000b000a,
635 	0x91b0, 0xffffffff, 0x00050004,
636 	0x91b4, 0xffffffff, 0x00070006,
637 	0x91b8, 0xffffffff, 0x0008000b,
638 	0x91bc, 0xffffffff, 0x000a0009,
639 	0x91c0, 0xffffffff, 0x000d000c,
640 	0x91c4, 0xffffffff, 0x00060005,
641 	0x91c8, 0xffffffff, 0x00080007,
642 	0x91cc, 0xffffffff, 0x0000000b,
643 	0x91d0, 0xffffffff, 0x000a0009,
644 	0x91d4, 0xffffffff, 0x000d000c,
645 	0x91d8, 0xffffffff, 0x00070006,
646 	0x91dc, 0xffffffff, 0x00090008,
647 	0x91e0, 0xffffffff, 0x0000000c,
648 	0x91e4, 0xffffffff, 0x000b000a,
649 	0x91e8, 0xffffffff, 0x000e000d,
650 	0x91ec, 0xffffffff, 0x00080007,
651 	0x91f0, 0xffffffff, 0x000a0009,
652 	0x91f4, 0xffffffff, 0x0000000d,
653 	0x91f8, 0xffffffff, 0x000c000b,
654 	0x91fc, 0xffffffff, 0x000f000e,
655 	0x9200, 0xffffffff, 0x00090008,
656 	0x9204, 0xffffffff, 0x000b000a,
657 	0x9208, 0xffffffff, 0x000c000f,
658 	0x920c, 0xffffffff, 0x000e000d,
659 	0x9210, 0xffffffff, 0x00110010,
660 	0x9214, 0xffffffff, 0x000a0009,
661 	0x9218, 0xffffffff, 0x000c000b,
662 	0x921c, 0xffffffff, 0x0000000f,
663 	0x9220, 0xffffffff, 0x000e000d,
664 	0x9224, 0xffffffff, 0x00110010,
665 	0x9228, 0xffffffff, 0x000b000a,
666 	0x922c, 0xffffffff, 0x000d000c,
667 	0x9230, 0xffffffff, 0x00000010,
668 	0x9234, 0xffffffff, 0x000f000e,
669 	0x9238, 0xffffffff, 0x00120011,
670 	0x923c, 0xffffffff, 0x000c000b,
671 	0x9240, 0xffffffff, 0x000e000d,
672 	0x9244, 0xffffffff, 0x00000011,
673 	0x9248, 0xffffffff, 0x0010000f,
674 	0x924c, 0xffffffff, 0x00130012,
675 	0x9250, 0xffffffff, 0x000d000c,
676 	0x9254, 0xffffffff, 0x000f000e,
677 	0x9258, 0xffffffff, 0x00100013,
678 	0x925c, 0xffffffff, 0x00120011,
679 	0x9260, 0xffffffff, 0x00150014,
680 	0x9264, 0xffffffff, 0x000e000d,
681 	0x9268, 0xffffffff, 0x0010000f,
682 	0x926c, 0xffffffff, 0x00000013,
683 	0x9270, 0xffffffff, 0x00120011,
684 	0x9274, 0xffffffff, 0x00150014,
685 	0x9278, 0xffffffff, 0x000f000e,
686 	0x927c, 0xffffffff, 0x00110010,
687 	0x9280, 0xffffffff, 0x00000014,
688 	0x9284, 0xffffffff, 0x00130012,
689 	0x9288, 0xffffffff, 0x00160015,
690 	0x928c, 0xffffffff, 0x0010000f,
691 	0x9290, 0xffffffff, 0x00120011,
692 	0x9294, 0xffffffff, 0x00000015,
693 	0x9298, 0xffffffff, 0x00140013,
694 	0x929c, 0xffffffff, 0x00170016,
695 	0x9150, 0xffffffff, 0x96940200,
696 	0x8708, 0xffffffff, 0x00900100,
697 	0xc478, 0xffffffff, 0x00000080,
698 	0xc404, 0xffffffff, 0x0020003f,
699 	0x30, 0xffffffff, 0x0000001c,
700 	0x34, 0x000f0000, 0x000f0000,
701 	0x160c, 0xffffffff, 0x00000100,
702 	0x1024, 0xffffffff, 0x00000100,
703 	0x102c, 0x00000101, 0x00000000,
704 	0x20a8, 0xffffffff, 0x00000104,
705 	0x264c, 0x000c0000, 0x000c0000,
706 	0x2648, 0x000c0000, 0x000c0000,
707 	0x55e4, 0xff000fff, 0x00000100,
708 	0x55e8, 0x00000001, 0x00000001,
709 	0x2f50, 0x00000001, 0x00000001,
710 	0x30cc, 0xc0000fff, 0x00000104,
711 	0xc1e4, 0x00000001, 0x00000001,
712 	0xd0c0, 0xfffffff0, 0x00000100,
713 	0xd8c0, 0xfffffff0, 0x00000100
714 };
715 
716 static const u32 pitcairn_mgcg_cgcg_init[] =
717 {
718 	0xc400, 0xffffffff, 0xfffffffc,
719 	0x802c, 0xffffffff, 0xe0000000,
720 	0x9a60, 0xffffffff, 0x00000100,
721 	0x92a4, 0xffffffff, 0x00000100,
722 	0xc164, 0xffffffff, 0x00000100,
723 	0x9774, 0xffffffff, 0x00000100,
724 	0x8984, 0xffffffff, 0x06000100,
725 	0x8a18, 0xffffffff, 0x00000100,
726 	0x92a0, 0xffffffff, 0x00000100,
727 	0xc380, 0xffffffff, 0x00000100,
728 	0x8b28, 0xffffffff, 0x00000100,
729 	0x9144, 0xffffffff, 0x00000100,
730 	0x8d88, 0xffffffff, 0x00000100,
731 	0x8d8c, 0xffffffff, 0x00000100,
732 	0x9030, 0xffffffff, 0x00000100,
733 	0x9034, 0xffffffff, 0x00000100,
734 	0x9038, 0xffffffff, 0x00000100,
735 	0x903c, 0xffffffff, 0x00000100,
736 	0xad80, 0xffffffff, 0x00000100,
737 	0xac54, 0xffffffff, 0x00000100,
738 	0x897c, 0xffffffff, 0x06000100,
739 	0x9868, 0xffffffff, 0x00000100,
740 	0x9510, 0xffffffff, 0x00000100,
741 	0xaf04, 0xffffffff, 0x00000100,
742 	0xae04, 0xffffffff, 0x00000100,
743 	0x949c, 0xffffffff, 0x00000100,
744 	0x802c, 0xffffffff, 0xe0000000,
745 	0x9160, 0xffffffff, 0x00010000,
746 	0x9164, 0xffffffff, 0x00030002,
747 	0x9168, 0xffffffff, 0x00040007,
748 	0x916c, 0xffffffff, 0x00060005,
749 	0x9170, 0xffffffff, 0x00090008,
750 	0x9174, 0xffffffff, 0x00020001,
751 	0x9178, 0xffffffff, 0x00040003,
752 	0x917c, 0xffffffff, 0x00000007,
753 	0x9180, 0xffffffff, 0x00060005,
754 	0x9184, 0xffffffff, 0x00090008,
755 	0x9188, 0xffffffff, 0x00030002,
756 	0x918c, 0xffffffff, 0x00050004,
757 	0x9190, 0xffffffff, 0x00000008,
758 	0x9194, 0xffffffff, 0x00070006,
759 	0x9198, 0xffffffff, 0x000a0009,
760 	0x919c, 0xffffffff, 0x00040003,
761 	0x91a0, 0xffffffff, 0x00060005,
762 	0x91a4, 0xffffffff, 0x00000009,
763 	0x91a8, 0xffffffff, 0x00080007,
764 	0x91ac, 0xffffffff, 0x000b000a,
765 	0x91b0, 0xffffffff, 0x00050004,
766 	0x91b4, 0xffffffff, 0x00070006,
767 	0x91b8, 0xffffffff, 0x0008000b,
768 	0x91bc, 0xffffffff, 0x000a0009,
769 	0x91c0, 0xffffffff, 0x000d000c,
770 	0x9200, 0xffffffff, 0x00090008,
771 	0x9204, 0xffffffff, 0x000b000a,
772 	0x9208, 0xffffffff, 0x000c000f,
773 	0x920c, 0xffffffff, 0x000e000d,
774 	0x9210, 0xffffffff, 0x00110010,
775 	0x9214, 0xffffffff, 0x000a0009,
776 	0x9218, 0xffffffff, 0x000c000b,
777 	0x921c, 0xffffffff, 0x0000000f,
778 	0x9220, 0xffffffff, 0x000e000d,
779 	0x9224, 0xffffffff, 0x00110010,
780 	0x9228, 0xffffffff, 0x000b000a,
781 	0x922c, 0xffffffff, 0x000d000c,
782 	0x9230, 0xffffffff, 0x00000010,
783 	0x9234, 0xffffffff, 0x000f000e,
784 	0x9238, 0xffffffff, 0x00120011,
785 	0x923c, 0xffffffff, 0x000c000b,
786 	0x9240, 0xffffffff, 0x000e000d,
787 	0x9244, 0xffffffff, 0x00000011,
788 	0x9248, 0xffffffff, 0x0010000f,
789 	0x924c, 0xffffffff, 0x00130012,
790 	0x9250, 0xffffffff, 0x000d000c,
791 	0x9254, 0xffffffff, 0x000f000e,
792 	0x9258, 0xffffffff, 0x00100013,
793 	0x925c, 0xffffffff, 0x00120011,
794 	0x9260, 0xffffffff, 0x00150014,
795 	0x9150, 0xffffffff, 0x96940200,
796 	0x8708, 0xffffffff, 0x00900100,
797 	0xc478, 0xffffffff, 0x00000080,
798 	0xc404, 0xffffffff, 0x0020003f,
799 	0x30, 0xffffffff, 0x0000001c,
800 	0x34, 0x000f0000, 0x000f0000,
801 	0x160c, 0xffffffff, 0x00000100,
802 	0x1024, 0xffffffff, 0x00000100,
803 	0x102c, 0x00000101, 0x00000000,
804 	0x20a8, 0xffffffff, 0x00000104,
805 	0x55e4, 0xff000fff, 0x00000100,
806 	0x55e8, 0x00000001, 0x00000001,
807 	0x2f50, 0x00000001, 0x00000001,
808 	0x30cc, 0xc0000fff, 0x00000104,
809 	0xc1e4, 0x00000001, 0x00000001,
810 	0xd0c0, 0xfffffff0, 0x00000100,
811 	0xd8c0, 0xfffffff0, 0x00000100
812 };
813 
814 static const u32 verde_mgcg_cgcg_init[] =
815 {
816 	0xc400, 0xffffffff, 0xfffffffc,
817 	0x802c, 0xffffffff, 0xe0000000,
818 	0x9a60, 0xffffffff, 0x00000100,
819 	0x92a4, 0xffffffff, 0x00000100,
820 	0xc164, 0xffffffff, 0x00000100,
821 	0x9774, 0xffffffff, 0x00000100,
822 	0x8984, 0xffffffff, 0x06000100,
823 	0x8a18, 0xffffffff, 0x00000100,
824 	0x92a0, 0xffffffff, 0x00000100,
825 	0xc380, 0xffffffff, 0x00000100,
826 	0x8b28, 0xffffffff, 0x00000100,
827 	0x9144, 0xffffffff, 0x00000100,
828 	0x8d88, 0xffffffff, 0x00000100,
829 	0x8d8c, 0xffffffff, 0x00000100,
830 	0x9030, 0xffffffff, 0x00000100,
831 	0x9034, 0xffffffff, 0x00000100,
832 	0x9038, 0xffffffff, 0x00000100,
833 	0x903c, 0xffffffff, 0x00000100,
834 	0xad80, 0xffffffff, 0x00000100,
835 	0xac54, 0xffffffff, 0x00000100,
836 	0x897c, 0xffffffff, 0x06000100,
837 	0x9868, 0xffffffff, 0x00000100,
838 	0x9510, 0xffffffff, 0x00000100,
839 	0xaf04, 0xffffffff, 0x00000100,
840 	0xae04, 0xffffffff, 0x00000100,
841 	0x949c, 0xffffffff, 0x00000100,
842 	0x802c, 0xffffffff, 0xe0000000,
843 	0x9160, 0xffffffff, 0x00010000,
844 	0x9164, 0xffffffff, 0x00030002,
845 	0x9168, 0xffffffff, 0x00040007,
846 	0x916c, 0xffffffff, 0x00060005,
847 	0x9170, 0xffffffff, 0x00090008,
848 	0x9174, 0xffffffff, 0x00020001,
849 	0x9178, 0xffffffff, 0x00040003,
850 	0x917c, 0xffffffff, 0x00000007,
851 	0x9180, 0xffffffff, 0x00060005,
852 	0x9184, 0xffffffff, 0x00090008,
853 	0x9188, 0xffffffff, 0x00030002,
854 	0x918c, 0xffffffff, 0x00050004,
855 	0x9190, 0xffffffff, 0x00000008,
856 	0x9194, 0xffffffff, 0x00070006,
857 	0x9198, 0xffffffff, 0x000a0009,
858 	0x919c, 0xffffffff, 0x00040003,
859 	0x91a0, 0xffffffff, 0x00060005,
860 	0x91a4, 0xffffffff, 0x00000009,
861 	0x91a8, 0xffffffff, 0x00080007,
862 	0x91ac, 0xffffffff, 0x000b000a,
863 	0x91b0, 0xffffffff, 0x00050004,
864 	0x91b4, 0xffffffff, 0x00070006,
865 	0x91b8, 0xffffffff, 0x0008000b,
866 	0x91bc, 0xffffffff, 0x000a0009,
867 	0x91c0, 0xffffffff, 0x000d000c,
868 	0x9200, 0xffffffff, 0x00090008,
869 	0x9204, 0xffffffff, 0x000b000a,
870 	0x9208, 0xffffffff, 0x000c000f,
871 	0x920c, 0xffffffff, 0x000e000d,
872 	0x9210, 0xffffffff, 0x00110010,
873 	0x9214, 0xffffffff, 0x000a0009,
874 	0x9218, 0xffffffff, 0x000c000b,
875 	0x921c, 0xffffffff, 0x0000000f,
876 	0x9220, 0xffffffff, 0x000e000d,
877 	0x9224, 0xffffffff, 0x00110010,
878 	0x9228, 0xffffffff, 0x000b000a,
879 	0x922c, 0xffffffff, 0x000d000c,
880 	0x9230, 0xffffffff, 0x00000010,
881 	0x9234, 0xffffffff, 0x000f000e,
882 	0x9238, 0xffffffff, 0x00120011,
883 	0x923c, 0xffffffff, 0x000c000b,
884 	0x9240, 0xffffffff, 0x000e000d,
885 	0x9244, 0xffffffff, 0x00000011,
886 	0x9248, 0xffffffff, 0x0010000f,
887 	0x924c, 0xffffffff, 0x00130012,
888 	0x9250, 0xffffffff, 0x000d000c,
889 	0x9254, 0xffffffff, 0x000f000e,
890 	0x9258, 0xffffffff, 0x00100013,
891 	0x925c, 0xffffffff, 0x00120011,
892 	0x9260, 0xffffffff, 0x00150014,
893 	0x9150, 0xffffffff, 0x96940200,
894 	0x8708, 0xffffffff, 0x00900100,
895 	0xc478, 0xffffffff, 0x00000080,
896 	0xc404, 0xffffffff, 0x0020003f,
897 	0x30, 0xffffffff, 0x0000001c,
898 	0x34, 0x000f0000, 0x000f0000,
899 	0x160c, 0xffffffff, 0x00000100,
900 	0x1024, 0xffffffff, 0x00000100,
901 	0x102c, 0x00000101, 0x00000000,
902 	0x20a8, 0xffffffff, 0x00000104,
903 	0x264c, 0x000c0000, 0x000c0000,
904 	0x2648, 0x000c0000, 0x000c0000,
905 	0x55e4, 0xff000fff, 0x00000100,
906 	0x55e8, 0x00000001, 0x00000001,
907 	0x2f50, 0x00000001, 0x00000001,
908 	0x30cc, 0xc0000fff, 0x00000104,
909 	0xc1e4, 0x00000001, 0x00000001,
910 	0xd0c0, 0xfffffff0, 0x00000100,
911 	0xd8c0, 0xfffffff0, 0x00000100
912 };
913 
914 static const u32 oland_mgcg_cgcg_init[] =
915 {
916 	0xc400, 0xffffffff, 0xfffffffc,
917 	0x802c, 0xffffffff, 0xe0000000,
918 	0x9a60, 0xffffffff, 0x00000100,
919 	0x92a4, 0xffffffff, 0x00000100,
920 	0xc164, 0xffffffff, 0x00000100,
921 	0x9774, 0xffffffff, 0x00000100,
922 	0x8984, 0xffffffff, 0x06000100,
923 	0x8a18, 0xffffffff, 0x00000100,
924 	0x92a0, 0xffffffff, 0x00000100,
925 	0xc380, 0xffffffff, 0x00000100,
926 	0x8b28, 0xffffffff, 0x00000100,
927 	0x9144, 0xffffffff, 0x00000100,
928 	0x8d88, 0xffffffff, 0x00000100,
929 	0x8d8c, 0xffffffff, 0x00000100,
930 	0x9030, 0xffffffff, 0x00000100,
931 	0x9034, 0xffffffff, 0x00000100,
932 	0x9038, 0xffffffff, 0x00000100,
933 	0x903c, 0xffffffff, 0x00000100,
934 	0xad80, 0xffffffff, 0x00000100,
935 	0xac54, 0xffffffff, 0x00000100,
936 	0x897c, 0xffffffff, 0x06000100,
937 	0x9868, 0xffffffff, 0x00000100,
938 	0x9510, 0xffffffff, 0x00000100,
939 	0xaf04, 0xffffffff, 0x00000100,
940 	0xae04, 0xffffffff, 0x00000100,
941 	0x949c, 0xffffffff, 0x00000100,
942 	0x802c, 0xffffffff, 0xe0000000,
943 	0x9160, 0xffffffff, 0x00010000,
944 	0x9164, 0xffffffff, 0x00030002,
945 	0x9168, 0xffffffff, 0x00040007,
946 	0x916c, 0xffffffff, 0x00060005,
947 	0x9170, 0xffffffff, 0x00090008,
948 	0x9174, 0xffffffff, 0x00020001,
949 	0x9178, 0xffffffff, 0x00040003,
950 	0x917c, 0xffffffff, 0x00000007,
951 	0x9180, 0xffffffff, 0x00060005,
952 	0x9184, 0xffffffff, 0x00090008,
953 	0x9188, 0xffffffff, 0x00030002,
954 	0x918c, 0xffffffff, 0x00050004,
955 	0x9190, 0xffffffff, 0x00000008,
956 	0x9194, 0xffffffff, 0x00070006,
957 	0x9198, 0xffffffff, 0x000a0009,
958 	0x919c, 0xffffffff, 0x00040003,
959 	0x91a0, 0xffffffff, 0x00060005,
960 	0x91a4, 0xffffffff, 0x00000009,
961 	0x91a8, 0xffffffff, 0x00080007,
962 	0x91ac, 0xffffffff, 0x000b000a,
963 	0x91b0, 0xffffffff, 0x00050004,
964 	0x91b4, 0xffffffff, 0x00070006,
965 	0x91b8, 0xffffffff, 0x0008000b,
966 	0x91bc, 0xffffffff, 0x000a0009,
967 	0x91c0, 0xffffffff, 0x000d000c,
968 	0x91c4, 0xffffffff, 0x00060005,
969 	0x91c8, 0xffffffff, 0x00080007,
970 	0x91cc, 0xffffffff, 0x0000000b,
971 	0x91d0, 0xffffffff, 0x000a0009,
972 	0x91d4, 0xffffffff, 0x000d000c,
973 	0x9150, 0xffffffff, 0x96940200,
974 	0x8708, 0xffffffff, 0x00900100,
975 	0xc478, 0xffffffff, 0x00000080,
976 	0xc404, 0xffffffff, 0x0020003f,
977 	0x30, 0xffffffff, 0x0000001c,
978 	0x34, 0x000f0000, 0x000f0000,
979 	0x160c, 0xffffffff, 0x00000100,
980 	0x1024, 0xffffffff, 0x00000100,
981 	0x102c, 0x00000101, 0x00000000,
982 	0x20a8, 0xffffffff, 0x00000104,
983 	0x264c, 0x000c0000, 0x000c0000,
984 	0x2648, 0x000c0000, 0x000c0000,
985 	0x55e4, 0xff000fff, 0x00000100,
986 	0x55e8, 0x00000001, 0x00000001,
987 	0x2f50, 0x00000001, 0x00000001,
988 	0x30cc, 0xc0000fff, 0x00000104,
989 	0xc1e4, 0x00000001, 0x00000001,
990 	0xd0c0, 0xfffffff0, 0x00000100,
991 	0xd8c0, 0xfffffff0, 0x00000100
992 };
993 
994 static const u32 hainan_mgcg_cgcg_init[] =
995 {
996 	0xc400, 0xffffffff, 0xfffffffc,
997 	0x802c, 0xffffffff, 0xe0000000,
998 	0x9a60, 0xffffffff, 0x00000100,
999 	0x92a4, 0xffffffff, 0x00000100,
1000 	0xc164, 0xffffffff, 0x00000100,
1001 	0x9774, 0xffffffff, 0x00000100,
1002 	0x8984, 0xffffffff, 0x06000100,
1003 	0x8a18, 0xffffffff, 0x00000100,
1004 	0x92a0, 0xffffffff, 0x00000100,
1005 	0xc380, 0xffffffff, 0x00000100,
1006 	0x8b28, 0xffffffff, 0x00000100,
1007 	0x9144, 0xffffffff, 0x00000100,
1008 	0x8d88, 0xffffffff, 0x00000100,
1009 	0x8d8c, 0xffffffff, 0x00000100,
1010 	0x9030, 0xffffffff, 0x00000100,
1011 	0x9034, 0xffffffff, 0x00000100,
1012 	0x9038, 0xffffffff, 0x00000100,
1013 	0x903c, 0xffffffff, 0x00000100,
1014 	0xad80, 0xffffffff, 0x00000100,
1015 	0xac54, 0xffffffff, 0x00000100,
1016 	0x897c, 0xffffffff, 0x06000100,
1017 	0x9868, 0xffffffff, 0x00000100,
1018 	0x9510, 0xffffffff, 0x00000100,
1019 	0xaf04, 0xffffffff, 0x00000100,
1020 	0xae04, 0xffffffff, 0x00000100,
1021 	0x949c, 0xffffffff, 0x00000100,
1022 	0x802c, 0xffffffff, 0xe0000000,
1023 	0x9160, 0xffffffff, 0x00010000,
1024 	0x9164, 0xffffffff, 0x00030002,
1025 	0x9168, 0xffffffff, 0x00040007,
1026 	0x916c, 0xffffffff, 0x00060005,
1027 	0x9170, 0xffffffff, 0x00090008,
1028 	0x9174, 0xffffffff, 0x00020001,
1029 	0x9178, 0xffffffff, 0x00040003,
1030 	0x917c, 0xffffffff, 0x00000007,
1031 	0x9180, 0xffffffff, 0x00060005,
1032 	0x9184, 0xffffffff, 0x00090008,
1033 	0x9188, 0xffffffff, 0x00030002,
1034 	0x918c, 0xffffffff, 0x00050004,
1035 	0x9190, 0xffffffff, 0x00000008,
1036 	0x9194, 0xffffffff, 0x00070006,
1037 	0x9198, 0xffffffff, 0x000a0009,
1038 	0x919c, 0xffffffff, 0x00040003,
1039 	0x91a0, 0xffffffff, 0x00060005,
1040 	0x91a4, 0xffffffff, 0x00000009,
1041 	0x91a8, 0xffffffff, 0x00080007,
1042 	0x91ac, 0xffffffff, 0x000b000a,
1043 	0x91b0, 0xffffffff, 0x00050004,
1044 	0x91b4, 0xffffffff, 0x00070006,
1045 	0x91b8, 0xffffffff, 0x0008000b,
1046 	0x91bc, 0xffffffff, 0x000a0009,
1047 	0x91c0, 0xffffffff, 0x000d000c,
1048 	0x91c4, 0xffffffff, 0x00060005,
1049 	0x91c8, 0xffffffff, 0x00080007,
1050 	0x91cc, 0xffffffff, 0x0000000b,
1051 	0x91d0, 0xffffffff, 0x000a0009,
1052 	0x91d4, 0xffffffff, 0x000d000c,
1053 	0x9150, 0xffffffff, 0x96940200,
1054 	0x8708, 0xffffffff, 0x00900100,
1055 	0xc478, 0xffffffff, 0x00000080,
1056 	0xc404, 0xffffffff, 0x0020003f,
1057 	0x30, 0xffffffff, 0x0000001c,
1058 	0x34, 0x000f0000, 0x000f0000,
1059 	0x160c, 0xffffffff, 0x00000100,
1060 	0x1024, 0xffffffff, 0x00000100,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x264c, 0x000c0000, 0x000c0000,
1063 	0x2648, 0x000c0000, 0x000c0000,
1064 	0x2f50, 0x00000001, 0x00000001,
1065 	0x30cc, 0xc0000fff, 0x00000104,
1066 	0xc1e4, 0x00000001, 0x00000001,
1067 	0xd0c0, 0xfffffff0, 0x00000100,
1068 	0xd8c0, 0xfffffff0, 0x00000100
1069 };
1070 
1071 static u32 verde_pg_init[] =
1072 {
1073 	0x353c, 0xffffffff, 0x40000,
1074 	0x3538, 0xffffffff, 0x200010ff,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x7007,
1081 	0x3538, 0xffffffff, 0x300010ff,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x400000,
1088 	0x3538, 0xffffffff, 0x100010ff,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x120200,
1095 	0x3538, 0xffffffff, 0x500010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x1e1e16,
1102 	0x3538, 0xffffffff, 0x600010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x171f1e,
1109 	0x3538, 0xffffffff, 0x700010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x3538, 0xffffffff, 0x9ff,
1117 	0x3500, 0xffffffff, 0x0,
1118 	0x3504, 0xffffffff, 0x10000800,
1119 	0x3504, 0xffffffff, 0xf,
1120 	0x3504, 0xffffffff, 0xf,
1121 	0x3500, 0xffffffff, 0x4,
1122 	0x3504, 0xffffffff, 0x1000051e,
1123 	0x3504, 0xffffffff, 0xffff,
1124 	0x3504, 0xffffffff, 0xffff,
1125 	0x3500, 0xffffffff, 0x8,
1126 	0x3504, 0xffffffff, 0x80500,
1127 	0x3500, 0xffffffff, 0x12,
1128 	0x3504, 0xffffffff, 0x9050c,
1129 	0x3500, 0xffffffff, 0x1d,
1130 	0x3504, 0xffffffff, 0xb052c,
1131 	0x3500, 0xffffffff, 0x2a,
1132 	0x3504, 0xffffffff, 0x1053e,
1133 	0x3500, 0xffffffff, 0x2d,
1134 	0x3504, 0xffffffff, 0x10546,
1135 	0x3500, 0xffffffff, 0x30,
1136 	0x3504, 0xffffffff, 0xa054e,
1137 	0x3500, 0xffffffff, 0x3c,
1138 	0x3504, 0xffffffff, 0x1055f,
1139 	0x3500, 0xffffffff, 0x3f,
1140 	0x3504, 0xffffffff, 0x10567,
1141 	0x3500, 0xffffffff, 0x42,
1142 	0x3504, 0xffffffff, 0x1056f,
1143 	0x3500, 0xffffffff, 0x45,
1144 	0x3504, 0xffffffff, 0x10572,
1145 	0x3500, 0xffffffff, 0x48,
1146 	0x3504, 0xffffffff, 0x20575,
1147 	0x3500, 0xffffffff, 0x4c,
1148 	0x3504, 0xffffffff, 0x190801,
1149 	0x3500, 0xffffffff, 0x67,
1150 	0x3504, 0xffffffff, 0x1082a,
1151 	0x3500, 0xffffffff, 0x6a,
1152 	0x3504, 0xffffffff, 0x1b082d,
1153 	0x3500, 0xffffffff, 0x87,
1154 	0x3504, 0xffffffff, 0x310851,
1155 	0x3500, 0xffffffff, 0xba,
1156 	0x3504, 0xffffffff, 0x891,
1157 	0x3500, 0xffffffff, 0xbc,
1158 	0x3504, 0xffffffff, 0x893,
1159 	0x3500, 0xffffffff, 0xbe,
1160 	0x3504, 0xffffffff, 0x20895,
1161 	0x3500, 0xffffffff, 0xc2,
1162 	0x3504, 0xffffffff, 0x20899,
1163 	0x3500, 0xffffffff, 0xc6,
1164 	0x3504, 0xffffffff, 0x2089d,
1165 	0x3500, 0xffffffff, 0xca,
1166 	0x3504, 0xffffffff, 0x8a1,
1167 	0x3500, 0xffffffff, 0xcc,
1168 	0x3504, 0xffffffff, 0x8a3,
1169 	0x3500, 0xffffffff, 0xce,
1170 	0x3504, 0xffffffff, 0x308a5,
1171 	0x3500, 0xffffffff, 0xd3,
1172 	0x3504, 0xffffffff, 0x6d08cd,
1173 	0x3500, 0xffffffff, 0x142,
1174 	0x3504, 0xffffffff, 0x2000095a,
1175 	0x3504, 0xffffffff, 0x1,
1176 	0x3500, 0xffffffff, 0x144,
1177 	0x3504, 0xffffffff, 0x301f095b,
1178 	0x3500, 0xffffffff, 0x165,
1179 	0x3504, 0xffffffff, 0xc094d,
1180 	0x3500, 0xffffffff, 0x173,
1181 	0x3504, 0xffffffff, 0xf096d,
1182 	0x3500, 0xffffffff, 0x184,
1183 	0x3504, 0xffffffff, 0x15097f,
1184 	0x3500, 0xffffffff, 0x19b,
1185 	0x3504, 0xffffffff, 0xc0998,
1186 	0x3500, 0xffffffff, 0x1a9,
1187 	0x3504, 0xffffffff, 0x409a7,
1188 	0x3500, 0xffffffff, 0x1af,
1189 	0x3504, 0xffffffff, 0xcdc,
1190 	0x3500, 0xffffffff, 0x1b1,
1191 	0x3504, 0xffffffff, 0x800,
1192 	0x3508, 0xffffffff, 0x6c9b2000,
1193 	0x3510, 0xfc00, 0x2000,
1194 	0x3544, 0xffffffff, 0xfc0,
1195 	0x28d4, 0x00000100, 0x100
1196 };
1197 
1198 static void si_init_golden_registers(struct radeon_device *rdev)
1199 {
1200 	switch (rdev->family) {
1201 	case CHIP_TAHITI:
1202 		radeon_program_register_sequence(rdev,
1203 						 tahiti_golden_registers,
1204 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205 		radeon_program_register_sequence(rdev,
1206 						 tahiti_golden_rlc_registers,
1207 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208 		radeon_program_register_sequence(rdev,
1209 						 tahiti_mgcg_cgcg_init,
1210 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211 		radeon_program_register_sequence(rdev,
1212 						 tahiti_golden_registers2,
1213 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214 		break;
1215 	case CHIP_PITCAIRN:
1216 		radeon_program_register_sequence(rdev,
1217 						 pitcairn_golden_registers,
1218 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219 		radeon_program_register_sequence(rdev,
1220 						 pitcairn_golden_rlc_registers,
1221 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222 		radeon_program_register_sequence(rdev,
1223 						 pitcairn_mgcg_cgcg_init,
1224 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225 		break;
1226 	case CHIP_VERDE:
1227 		radeon_program_register_sequence(rdev,
1228 						 verde_golden_registers,
1229 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1230 		radeon_program_register_sequence(rdev,
1231 						 verde_golden_rlc_registers,
1232 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233 		radeon_program_register_sequence(rdev,
1234 						 verde_mgcg_cgcg_init,
1235 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236 		radeon_program_register_sequence(rdev,
1237 						 verde_pg_init,
1238 						 (const u32)ARRAY_SIZE(verde_pg_init));
1239 		break;
1240 	case CHIP_OLAND:
1241 		radeon_program_register_sequence(rdev,
1242 						 oland_golden_registers,
1243 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1244 		radeon_program_register_sequence(rdev,
1245 						 oland_golden_rlc_registers,
1246 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247 		radeon_program_register_sequence(rdev,
1248 						 oland_mgcg_cgcg_init,
1249 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250 		break;
1251 	case CHIP_HAINAN:
1252 		radeon_program_register_sequence(rdev,
1253 						 hainan_golden_registers,
1254 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1255 		radeon_program_register_sequence(rdev,
1256 						 hainan_golden_registers2,
1257 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258 		radeon_program_register_sequence(rdev,
1259 						 hainan_mgcg_cgcg_init,
1260 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261 		break;
1262 	default:
1263 		break;
1264 	}
1265 }
1266 
1267 #define PCIE_BUS_CLK                10000
1268 #define TCLK                        (PCIE_BUS_CLK / 10)
1269 
1270 /**
1271  * si_get_xclk - get the xclk
1272  *
1273  * @rdev: radeon_device pointer
1274  *
1275  * Returns the reference clock used by the gfx engine
1276  * (SI).
1277  */
1278 u32 si_get_xclk(struct radeon_device *rdev)
1279 {
1280         u32 reference_clock = rdev->clock.spll.reference_freq;
1281 	u32 tmp;
1282 
1283 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1284 	if (tmp & MUX_TCLK_TO_XCLK)
1285 		return TCLK;
1286 
1287 	tmp = RREG32(CG_CLKPIN_CNTL);
1288 	if (tmp & XTALIN_DIVIDE)
1289 		return reference_clock / 4;
1290 
1291 	return reference_clock;
1292 }
1293 
1294 /* get temperature in millidegrees */
1295 int si_get_temp(struct radeon_device *rdev)
1296 {
1297 	u32 temp;
1298 	int actual_temp = 0;
1299 
1300 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1301 		CTF_TEMP_SHIFT;
1302 
1303 	if (temp & 0x200)
1304 		actual_temp = 255;
1305 	else
1306 		actual_temp = temp & 0x1ff;
1307 
1308 	actual_temp = (actual_temp * 1000);
1309 
1310 	return actual_temp;
1311 }
1312 
1313 #define TAHITI_IO_MC_REGS_SIZE 36
1314 
1315 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1316 	{0x0000006f, 0x03044000},
1317 	{0x00000070, 0x0480c018},
1318 	{0x00000071, 0x00000040},
1319 	{0x00000072, 0x01000000},
1320 	{0x00000074, 0x000000ff},
1321 	{0x00000075, 0x00143400},
1322 	{0x00000076, 0x08ec0800},
1323 	{0x00000077, 0x040000cc},
1324 	{0x00000079, 0x00000000},
1325 	{0x0000007a, 0x21000409},
1326 	{0x0000007c, 0x00000000},
1327 	{0x0000007d, 0xe8000000},
1328 	{0x0000007e, 0x044408a8},
1329 	{0x0000007f, 0x00000003},
1330 	{0x00000080, 0x00000000},
1331 	{0x00000081, 0x01000000},
1332 	{0x00000082, 0x02000000},
1333 	{0x00000083, 0x00000000},
1334 	{0x00000084, 0xe3f3e4f4},
1335 	{0x00000085, 0x00052024},
1336 	{0x00000087, 0x00000000},
1337 	{0x00000088, 0x66036603},
1338 	{0x00000089, 0x01000000},
1339 	{0x0000008b, 0x1c0a0000},
1340 	{0x0000008c, 0xff010000},
1341 	{0x0000008e, 0xffffefff},
1342 	{0x0000008f, 0xfff3efff},
1343 	{0x00000090, 0xfff3efbf},
1344 	{0x00000094, 0x00101101},
1345 	{0x00000095, 0x00000fff},
1346 	{0x00000096, 0x00116fff},
1347 	{0x00000097, 0x60010000},
1348 	{0x00000098, 0x10010000},
1349 	{0x00000099, 0x00006000},
1350 	{0x0000009a, 0x00001000},
1351 	{0x0000009f, 0x00a77400}
1352 };
1353 
1354 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1355 	{0x0000006f, 0x03044000},
1356 	{0x00000070, 0x0480c018},
1357 	{0x00000071, 0x00000040},
1358 	{0x00000072, 0x01000000},
1359 	{0x00000074, 0x000000ff},
1360 	{0x00000075, 0x00143400},
1361 	{0x00000076, 0x08ec0800},
1362 	{0x00000077, 0x040000cc},
1363 	{0x00000079, 0x00000000},
1364 	{0x0000007a, 0x21000409},
1365 	{0x0000007c, 0x00000000},
1366 	{0x0000007d, 0xe8000000},
1367 	{0x0000007e, 0x044408a8},
1368 	{0x0000007f, 0x00000003},
1369 	{0x00000080, 0x00000000},
1370 	{0x00000081, 0x01000000},
1371 	{0x00000082, 0x02000000},
1372 	{0x00000083, 0x00000000},
1373 	{0x00000084, 0xe3f3e4f4},
1374 	{0x00000085, 0x00052024},
1375 	{0x00000087, 0x00000000},
1376 	{0x00000088, 0x66036603},
1377 	{0x00000089, 0x01000000},
1378 	{0x0000008b, 0x1c0a0000},
1379 	{0x0000008c, 0xff010000},
1380 	{0x0000008e, 0xffffefff},
1381 	{0x0000008f, 0xfff3efff},
1382 	{0x00000090, 0xfff3efbf},
1383 	{0x00000094, 0x00101101},
1384 	{0x00000095, 0x00000fff},
1385 	{0x00000096, 0x00116fff},
1386 	{0x00000097, 0x60010000},
1387 	{0x00000098, 0x10010000},
1388 	{0x00000099, 0x00006000},
1389 	{0x0000009a, 0x00001000},
1390 	{0x0000009f, 0x00a47400}
1391 };
1392 
1393 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1394 	{0x0000006f, 0x03044000},
1395 	{0x00000070, 0x0480c018},
1396 	{0x00000071, 0x00000040},
1397 	{0x00000072, 0x01000000},
1398 	{0x00000074, 0x000000ff},
1399 	{0x00000075, 0x00143400},
1400 	{0x00000076, 0x08ec0800},
1401 	{0x00000077, 0x040000cc},
1402 	{0x00000079, 0x00000000},
1403 	{0x0000007a, 0x21000409},
1404 	{0x0000007c, 0x00000000},
1405 	{0x0000007d, 0xe8000000},
1406 	{0x0000007e, 0x044408a8},
1407 	{0x0000007f, 0x00000003},
1408 	{0x00000080, 0x00000000},
1409 	{0x00000081, 0x01000000},
1410 	{0x00000082, 0x02000000},
1411 	{0x00000083, 0x00000000},
1412 	{0x00000084, 0xe3f3e4f4},
1413 	{0x00000085, 0x00052024},
1414 	{0x00000087, 0x00000000},
1415 	{0x00000088, 0x66036603},
1416 	{0x00000089, 0x01000000},
1417 	{0x0000008b, 0x1c0a0000},
1418 	{0x0000008c, 0xff010000},
1419 	{0x0000008e, 0xffffefff},
1420 	{0x0000008f, 0xfff3efff},
1421 	{0x00000090, 0xfff3efbf},
1422 	{0x00000094, 0x00101101},
1423 	{0x00000095, 0x00000fff},
1424 	{0x00000096, 0x00116fff},
1425 	{0x00000097, 0x60010000},
1426 	{0x00000098, 0x10010000},
1427 	{0x00000099, 0x00006000},
1428 	{0x0000009a, 0x00001000},
1429 	{0x0000009f, 0x00a37400}
1430 };
1431 
1432 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1433 	{0x0000006f, 0x03044000},
1434 	{0x00000070, 0x0480c018},
1435 	{0x00000071, 0x00000040},
1436 	{0x00000072, 0x01000000},
1437 	{0x00000074, 0x000000ff},
1438 	{0x00000075, 0x00143400},
1439 	{0x00000076, 0x08ec0800},
1440 	{0x00000077, 0x040000cc},
1441 	{0x00000079, 0x00000000},
1442 	{0x0000007a, 0x21000409},
1443 	{0x0000007c, 0x00000000},
1444 	{0x0000007d, 0xe8000000},
1445 	{0x0000007e, 0x044408a8},
1446 	{0x0000007f, 0x00000003},
1447 	{0x00000080, 0x00000000},
1448 	{0x00000081, 0x01000000},
1449 	{0x00000082, 0x02000000},
1450 	{0x00000083, 0x00000000},
1451 	{0x00000084, 0xe3f3e4f4},
1452 	{0x00000085, 0x00052024},
1453 	{0x00000087, 0x00000000},
1454 	{0x00000088, 0x66036603},
1455 	{0x00000089, 0x01000000},
1456 	{0x0000008b, 0x1c0a0000},
1457 	{0x0000008c, 0xff010000},
1458 	{0x0000008e, 0xffffefff},
1459 	{0x0000008f, 0xfff3efff},
1460 	{0x00000090, 0xfff3efbf},
1461 	{0x00000094, 0x00101101},
1462 	{0x00000095, 0x00000fff},
1463 	{0x00000096, 0x00116fff},
1464 	{0x00000097, 0x60010000},
1465 	{0x00000098, 0x10010000},
1466 	{0x00000099, 0x00006000},
1467 	{0x0000009a, 0x00001000},
1468 	{0x0000009f, 0x00a17730}
1469 };
1470 
1471 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1472 	{0x0000006f, 0x03044000},
1473 	{0x00000070, 0x0480c018},
1474 	{0x00000071, 0x00000040},
1475 	{0x00000072, 0x01000000},
1476 	{0x00000074, 0x000000ff},
1477 	{0x00000075, 0x00143400},
1478 	{0x00000076, 0x08ec0800},
1479 	{0x00000077, 0x040000cc},
1480 	{0x00000079, 0x00000000},
1481 	{0x0000007a, 0x21000409},
1482 	{0x0000007c, 0x00000000},
1483 	{0x0000007d, 0xe8000000},
1484 	{0x0000007e, 0x044408a8},
1485 	{0x0000007f, 0x00000003},
1486 	{0x00000080, 0x00000000},
1487 	{0x00000081, 0x01000000},
1488 	{0x00000082, 0x02000000},
1489 	{0x00000083, 0x00000000},
1490 	{0x00000084, 0xe3f3e4f4},
1491 	{0x00000085, 0x00052024},
1492 	{0x00000087, 0x00000000},
1493 	{0x00000088, 0x66036603},
1494 	{0x00000089, 0x01000000},
1495 	{0x0000008b, 0x1c0a0000},
1496 	{0x0000008c, 0xff010000},
1497 	{0x0000008e, 0xffffefff},
1498 	{0x0000008f, 0xfff3efff},
1499 	{0x00000090, 0xfff3efbf},
1500 	{0x00000094, 0x00101101},
1501 	{0x00000095, 0x00000fff},
1502 	{0x00000096, 0x00116fff},
1503 	{0x00000097, 0x60010000},
1504 	{0x00000098, 0x10010000},
1505 	{0x00000099, 0x00006000},
1506 	{0x0000009a, 0x00001000},
1507 	{0x0000009f, 0x00a07730}
1508 };
1509 
1510 /* ucode loading */
1511 int si_mc_load_microcode(struct radeon_device *rdev)
1512 {
1513 	const __be32 *fw_data = NULL;
1514 	const __le32 *new_fw_data = NULL;
1515 	u32 running, blackout = 0;
1516 	u32 *io_mc_regs = NULL;
1517 	const __le32 *new_io_mc_regs = NULL;
1518 	int i, regs_size, ucode_size;
1519 
1520 	if (!rdev->mc_fw)
1521 		return -EINVAL;
1522 
1523 	if (rdev->new_fw) {
1524 		const struct mc_firmware_header_v1_0 *hdr =
1525 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1526 
1527 		radeon_ucode_print_mc_hdr(&hdr->header);
1528 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1529 		new_io_mc_regs = (const __le32 *)
1530 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1531 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1532 		new_fw_data = (const __le32 *)
1533 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1534 	} else {
1535 		ucode_size = rdev->mc_fw->size / 4;
1536 
1537 		switch (rdev->family) {
1538 		case CHIP_TAHITI:
1539 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1540 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1541 			break;
1542 		case CHIP_PITCAIRN:
1543 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1544 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1545 			break;
1546 		case CHIP_VERDE:
1547 		default:
1548 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1549 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1550 			break;
1551 		case CHIP_OLAND:
1552 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1553 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1554 			break;
1555 		case CHIP_HAINAN:
1556 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1557 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1558 			break;
1559 		}
1560 		fw_data = (const __be32 *)rdev->mc_fw->data;
1561 	}
1562 
1563 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1564 
1565 	if (running == 0) {
1566 		if (running) {
1567 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1568 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1569 		}
1570 
1571 		/* reset the engine and set to writable */
1572 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1573 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1574 
1575 		/* load mc io regs */
1576 		for (i = 0; i < regs_size; i++) {
1577 			if (rdev->new_fw) {
1578 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1579 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1580 			} else {
1581 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1582 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1583 			}
1584 		}
1585 		/* load the MC ucode */
1586 		for (i = 0; i < ucode_size; i++) {
1587 			if (rdev->new_fw)
1588 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1589 			else
1590 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1591 		}
1592 
1593 		/* put the engine back into the active state */
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1595 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1596 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1597 
1598 		/* wait for training to complete */
1599 		for (i = 0; i < rdev->usec_timeout; i++) {
1600 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1601 				break;
1602 			udelay(1);
1603 		}
1604 		for (i = 0; i < rdev->usec_timeout; i++) {
1605 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1606 				break;
1607 			udelay(1);
1608 		}
1609 
1610 		if (running)
1611 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1612 	}
1613 
1614 	return 0;
1615 }
1616 
1617 static int si_init_microcode(struct radeon_device *rdev)
1618 {
1619 	const char *chip_name;
1620 	const char *new_chip_name;
1621 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1622 	size_t smc_req_size, mc2_req_size;
1623 	char fw_name[30];
1624 	int err;
1625 	int new_fw = 0;
1626 
1627 	DRM_DEBUG("\n");
1628 
1629 	switch (rdev->family) {
1630 	case CHIP_TAHITI:
1631 		chip_name = "TAHITI";
1632 		new_chip_name = "tahiti";
1633 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1634 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1635 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1636 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1637 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1638 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1639 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1640 		break;
1641 	case CHIP_PITCAIRN:
1642 		chip_name = "PITCAIRN";
1643 		new_chip_name = "pitcairn";
1644 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1646 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1647 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1649 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1650 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1651 		break;
1652 	case CHIP_VERDE:
1653 		chip_name = "VERDE";
1654 		new_chip_name = "verde";
1655 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1656 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1657 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1658 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1659 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1660 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1661 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1662 		break;
1663 	case CHIP_OLAND:
1664 		chip_name = "OLAND";
1665 		new_chip_name = "oland";
1666 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1667 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1668 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1669 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1670 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_HAINAN:
1674 		chip_name = "HAINAN";
1675 		new_chip_name = "hainan";
1676 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1678 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1679 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1681 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1682 		break;
1683 	default: BUG();
1684 	}
1685 
1686 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1687 
1688 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1689 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1690 	if (err) {
1691 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1692 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1693 		if (err)
1694 			goto out;
1695 		if (rdev->pfp_fw->size != pfp_req_size) {
1696 			printk(KERN_ERR
1697 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1698 			       rdev->pfp_fw->size, fw_name);
1699 			err = -EINVAL;
1700 			goto out;
1701 		}
1702 	} else {
1703 		err = radeon_ucode_validate(rdev->pfp_fw);
1704 		if (err) {
1705 			printk(KERN_ERR
1706 			       "si_cp: validation failed for firmware \"%s\"\n",
1707 			       fw_name);
1708 			goto out;
1709 		} else {
1710 			new_fw++;
1711 		}
1712 	}
1713 
1714 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1715 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1716 	if (err) {
1717 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1718 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1719 		if (err)
1720 			goto out;
1721 		if (rdev->me_fw->size != me_req_size) {
1722 			printk(KERN_ERR
1723 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1724 			       rdev->me_fw->size, fw_name);
1725 			err = -EINVAL;
1726 		}
1727 	} else {
1728 		err = radeon_ucode_validate(rdev->me_fw);
1729 		if (err) {
1730 			printk(KERN_ERR
1731 			       "si_cp: validation failed for firmware \"%s\"\n",
1732 			       fw_name);
1733 			goto out;
1734 		} else {
1735 			new_fw++;
1736 		}
1737 	}
1738 
1739 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1740 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1741 	if (err) {
1742 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1743 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1744 		if (err)
1745 			goto out;
1746 		if (rdev->ce_fw->size != ce_req_size) {
1747 			printk(KERN_ERR
1748 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749 			       rdev->ce_fw->size, fw_name);
1750 			err = -EINVAL;
1751 		}
1752 	} else {
1753 		err = radeon_ucode_validate(rdev->ce_fw);
1754 		if (err) {
1755 			printk(KERN_ERR
1756 			       "si_cp: validation failed for firmware \"%s\"\n",
1757 			       fw_name);
1758 			goto out;
1759 		} else {
1760 			new_fw++;
1761 		}
1762 	}
1763 
1764 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1765 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1766 	if (err) {
1767 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1768 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1769 		if (err)
1770 			goto out;
1771 		if (rdev->rlc_fw->size != rlc_req_size) {
1772 			printk(KERN_ERR
1773 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1774 			       rdev->rlc_fw->size, fw_name);
1775 			err = -EINVAL;
1776 		}
1777 	} else {
1778 		err = radeon_ucode_validate(rdev->rlc_fw);
1779 		if (err) {
1780 			printk(KERN_ERR
1781 			       "si_cp: validation failed for firmware \"%s\"\n",
1782 			       fw_name);
1783 			goto out;
1784 		} else {
1785 			new_fw++;
1786 		}
1787 	}
1788 
1789 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1790 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1791 	if (err) {
1792 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1793 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1794 		if (err) {
1795 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1796 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1797 			if (err)
1798 				goto out;
1799 		}
1800 		if ((rdev->mc_fw->size != mc_req_size) &&
1801 		    (rdev->mc_fw->size != mc2_req_size)) {
1802 			printk(KERN_ERR
1803 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1804 			       rdev->mc_fw->size, fw_name);
1805 			err = -EINVAL;
1806 		}
1807 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1808 	} else {
1809 		err = radeon_ucode_validate(rdev->mc_fw);
1810 		if (err) {
1811 			printk(KERN_ERR
1812 			       "si_cp: validation failed for firmware \"%s\"\n",
1813 			       fw_name);
1814 			goto out;
1815 		} else {
1816 			new_fw++;
1817 		}
1818 	}
1819 
1820 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1821 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1822 	if (err) {
1823 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1824 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1825 		if (err) {
1826 			printk(KERN_ERR
1827 			       "smc: error loading firmware \"%s\"\n",
1828 			       fw_name);
1829 			release_firmware(rdev->smc_fw);
1830 			rdev->smc_fw = NULL;
1831 			err = 0;
1832 		} else if (rdev->smc_fw->size != smc_req_size) {
1833 			printk(KERN_ERR
1834 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1835 			       rdev->smc_fw->size, fw_name);
1836 			err = -EINVAL;
1837 		}
1838 	} else {
1839 		err = radeon_ucode_validate(rdev->smc_fw);
1840 		if (err) {
1841 			printk(KERN_ERR
1842 			       "si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	if (new_fw == 0) {
1851 		rdev->new_fw = false;
1852 	} else if (new_fw < 6) {
1853 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1854 		err = -EINVAL;
1855 	} else {
1856 		rdev->new_fw = true;
1857 	}
1858 out:
1859 	if (err) {
1860 		if (err != -EINVAL)
1861 			printk(KERN_ERR
1862 			       "si_cp: Failed to load firmware \"%s\"\n",
1863 			       fw_name);
1864 		release_firmware(rdev->pfp_fw);
1865 		rdev->pfp_fw = NULL;
1866 		release_firmware(rdev->me_fw);
1867 		rdev->me_fw = NULL;
1868 		release_firmware(rdev->ce_fw);
1869 		rdev->ce_fw = NULL;
1870 		release_firmware(rdev->rlc_fw);
1871 		rdev->rlc_fw = NULL;
1872 		release_firmware(rdev->mc_fw);
1873 		rdev->mc_fw = NULL;
1874 		release_firmware(rdev->smc_fw);
1875 		rdev->smc_fw = NULL;
1876 	}
1877 	return err;
1878 }
1879 
1880 /* watermark setup */
1881 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1882 				   struct radeon_crtc *radeon_crtc,
1883 				   struct drm_display_mode *mode,
1884 				   struct drm_display_mode *other_mode)
1885 {
1886 	u32 tmp, buffer_alloc, i;
1887 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1888 	/*
1889 	 * Line Buffer Setup
1890 	 * There are 3 line buffers, each one shared by 2 display controllers.
1891 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1892 	 * the display controllers.  The paritioning is done via one of four
1893 	 * preset allocations specified in bits 21:20:
1894 	 *  0 - half lb
1895 	 *  2 - whole lb, other crtc must be disabled
1896 	 */
1897 	/* this can get tricky if we have two large displays on a paired group
1898 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1899 	 * non-linked crtcs for maximum line buffer allocation.
1900 	 */
1901 	if (radeon_crtc->base.enabled && mode) {
1902 		if (other_mode) {
1903 			tmp = 0; /* 1/2 */
1904 			buffer_alloc = 1;
1905 		} else {
1906 			tmp = 2; /* whole */
1907 			buffer_alloc = 2;
1908 		}
1909 	} else {
1910 		tmp = 0;
1911 		buffer_alloc = 0;
1912 	}
1913 
1914 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1915 	       DC_LB_MEMORY_CONFIG(tmp));
1916 
1917 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1918 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1919 	for (i = 0; i < rdev->usec_timeout; i++) {
1920 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1921 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1922 			break;
1923 		udelay(1);
1924 	}
1925 
1926 	if (radeon_crtc->base.enabled && mode) {
1927 		switch (tmp) {
1928 		case 0:
1929 		default:
1930 			return 4096 * 2;
1931 		case 2:
1932 			return 8192 * 2;
1933 		}
1934 	}
1935 
1936 	/* controller not enabled, so no lb used */
1937 	return 0;
1938 }
1939 
1940 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1941 {
1942 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1943 
1944 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1945 	case 0:
1946 	default:
1947 		return 1;
1948 	case 1:
1949 		return 2;
1950 	case 2:
1951 		return 4;
1952 	case 3:
1953 		return 8;
1954 	case 4:
1955 		return 3;
1956 	case 5:
1957 		return 6;
1958 	case 6:
1959 		return 10;
1960 	case 7:
1961 		return 12;
1962 	case 8:
1963 		return 16;
1964 	}
1965 }
1966 
1967 struct dce6_wm_params {
1968 	u32 dram_channels; /* number of dram channels */
1969 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1970 	u32 sclk;          /* engine clock in kHz */
1971 	u32 disp_clk;      /* display clock in kHz */
1972 	u32 src_width;     /* viewport width */
1973 	u32 active_time;   /* active display time in ns */
1974 	u32 blank_time;    /* blank time in ns */
1975 	bool interlaced;    /* mode is interlaced */
1976 	fixed20_12 vsc;    /* vertical scale ratio */
1977 	u32 num_heads;     /* number of active crtcs */
1978 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1979 	u32 lb_size;       /* line buffer allocated to pipe */
1980 	u32 vtaps;         /* vertical scaler taps */
1981 };
1982 
1983 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1984 {
1985 	/* Calculate raw DRAM Bandwidth */
1986 	fixed20_12 dram_efficiency; /* 0.7 */
1987 	fixed20_12 yclk, dram_channels, bandwidth;
1988 	fixed20_12 a;
1989 
1990 	a.full = dfixed_const(1000);
1991 	yclk.full = dfixed_const(wm->yclk);
1992 	yclk.full = dfixed_div(yclk, a);
1993 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1994 	a.full = dfixed_const(10);
1995 	dram_efficiency.full = dfixed_const(7);
1996 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1997 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1998 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1999 
2000 	return dfixed_trunc(bandwidth);
2001 }
2002 
2003 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2004 {
2005 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2006 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2007 	fixed20_12 yclk, dram_channels, bandwidth;
2008 	fixed20_12 a;
2009 
2010 	a.full = dfixed_const(1000);
2011 	yclk.full = dfixed_const(wm->yclk);
2012 	yclk.full = dfixed_div(yclk, a);
2013 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2014 	a.full = dfixed_const(10);
2015 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2016 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2017 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2018 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2019 
2020 	return dfixed_trunc(bandwidth);
2021 }
2022 
2023 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2024 {
2025 	/* Calculate the display Data return Bandwidth */
2026 	fixed20_12 return_efficiency; /* 0.8 */
2027 	fixed20_12 sclk, bandwidth;
2028 	fixed20_12 a;
2029 
2030 	a.full = dfixed_const(1000);
2031 	sclk.full = dfixed_const(wm->sclk);
2032 	sclk.full = dfixed_div(sclk, a);
2033 	a.full = dfixed_const(10);
2034 	return_efficiency.full = dfixed_const(8);
2035 	return_efficiency.full = dfixed_div(return_efficiency, a);
2036 	a.full = dfixed_const(32);
2037 	bandwidth.full = dfixed_mul(a, sclk);
2038 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2039 
2040 	return dfixed_trunc(bandwidth);
2041 }
2042 
2043 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2044 {
2045 	return 32;
2046 }
2047 
2048 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2049 {
2050 	/* Calculate the DMIF Request Bandwidth */
2051 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2052 	fixed20_12 disp_clk, sclk, bandwidth;
2053 	fixed20_12 a, b1, b2;
2054 	u32 min_bandwidth;
2055 
2056 	a.full = dfixed_const(1000);
2057 	disp_clk.full = dfixed_const(wm->disp_clk);
2058 	disp_clk.full = dfixed_div(disp_clk, a);
2059 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2060 	b1.full = dfixed_mul(a, disp_clk);
2061 
2062 	a.full = dfixed_const(1000);
2063 	sclk.full = dfixed_const(wm->sclk);
2064 	sclk.full = dfixed_div(sclk, a);
2065 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2066 	b2.full = dfixed_mul(a, sclk);
2067 
2068 	a.full = dfixed_const(10);
2069 	disp_clk_request_efficiency.full = dfixed_const(8);
2070 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2071 
2072 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2073 
2074 	a.full = dfixed_const(min_bandwidth);
2075 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2076 
2077 	return dfixed_trunc(bandwidth);
2078 }
2079 
2080 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2081 {
2082 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2083 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2084 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2085 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2086 
2087 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2088 }
2089 
2090 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2091 {
2092 	/* Calculate the display mode Average Bandwidth
2093 	 * DisplayMode should contain the source and destination dimensions,
2094 	 * timing, etc.
2095 	 */
2096 	fixed20_12 bpp;
2097 	fixed20_12 line_time;
2098 	fixed20_12 src_width;
2099 	fixed20_12 bandwidth;
2100 	fixed20_12 a;
2101 
2102 	a.full = dfixed_const(1000);
2103 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2104 	line_time.full = dfixed_div(line_time, a);
2105 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2106 	src_width.full = dfixed_const(wm->src_width);
2107 	bandwidth.full = dfixed_mul(src_width, bpp);
2108 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2109 	bandwidth.full = dfixed_div(bandwidth, line_time);
2110 
2111 	return dfixed_trunc(bandwidth);
2112 }
2113 
2114 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2115 {
2116 	/* First calcualte the latency in ns */
2117 	u32 mc_latency = 2000; /* 2000 ns. */
2118 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2119 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2120 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2121 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2122 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2123 		(wm->num_heads * cursor_line_pair_return_time);
2124 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2125 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2126 	u32 tmp, dmif_size = 12288;
2127 	fixed20_12 a, b, c;
2128 
2129 	if (wm->num_heads == 0)
2130 		return 0;
2131 
2132 	a.full = dfixed_const(2);
2133 	b.full = dfixed_const(1);
2134 	if ((wm->vsc.full > a.full) ||
2135 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2136 	    (wm->vtaps >= 5) ||
2137 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2138 		max_src_lines_per_dst_line = 4;
2139 	else
2140 		max_src_lines_per_dst_line = 2;
2141 
2142 	a.full = dfixed_const(available_bandwidth);
2143 	b.full = dfixed_const(wm->num_heads);
2144 	a.full = dfixed_div(a, b);
2145 
2146 	b.full = dfixed_const(mc_latency + 512);
2147 	c.full = dfixed_const(wm->disp_clk);
2148 	b.full = dfixed_div(b, c);
2149 
2150 	c.full = dfixed_const(dmif_size);
2151 	b.full = dfixed_div(c, b);
2152 
2153 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2154 
2155 	b.full = dfixed_const(1000);
2156 	c.full = dfixed_const(wm->disp_clk);
2157 	b.full = dfixed_div(c, b);
2158 	c.full = dfixed_const(wm->bytes_per_pixel);
2159 	b.full = dfixed_mul(b, c);
2160 
2161 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2162 
2163 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2164 	b.full = dfixed_const(1000);
2165 	c.full = dfixed_const(lb_fill_bw);
2166 	b.full = dfixed_div(c, b);
2167 	a.full = dfixed_div(a, b);
2168 	line_fill_time = dfixed_trunc(a);
2169 
2170 	if (line_fill_time < wm->active_time)
2171 		return latency;
2172 	else
2173 		return latency + (line_fill_time - wm->active_time);
2174 
2175 }
2176 
2177 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2178 {
2179 	if (dce6_average_bandwidth(wm) <=
2180 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2181 		return true;
2182 	else
2183 		return false;
2184 };
2185 
2186 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2187 {
2188 	if (dce6_average_bandwidth(wm) <=
2189 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2190 		return true;
2191 	else
2192 		return false;
2193 };
2194 
2195 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2196 {
2197 	u32 lb_partitions = wm->lb_size / wm->src_width;
2198 	u32 line_time = wm->active_time + wm->blank_time;
2199 	u32 latency_tolerant_lines;
2200 	u32 latency_hiding;
2201 	fixed20_12 a;
2202 
2203 	a.full = dfixed_const(1);
2204 	if (wm->vsc.full > a.full)
2205 		latency_tolerant_lines = 1;
2206 	else {
2207 		if (lb_partitions <= (wm->vtaps + 1))
2208 			latency_tolerant_lines = 1;
2209 		else
2210 			latency_tolerant_lines = 2;
2211 	}
2212 
2213 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2214 
2215 	if (dce6_latency_watermark(wm) <= latency_hiding)
2216 		return true;
2217 	else
2218 		return false;
2219 }
2220 
2221 static void dce6_program_watermarks(struct radeon_device *rdev,
2222 					 struct radeon_crtc *radeon_crtc,
2223 					 u32 lb_size, u32 num_heads)
2224 {
2225 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2226 	struct dce6_wm_params wm_low, wm_high;
2227 	u32 dram_channels;
2228 	u32 pixel_period;
2229 	u32 line_time = 0;
2230 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2231 	u32 priority_a_mark = 0, priority_b_mark = 0;
2232 	u32 priority_a_cnt = PRIORITY_OFF;
2233 	u32 priority_b_cnt = PRIORITY_OFF;
2234 	u32 tmp, arb_control3;
2235 	fixed20_12 a, b, c;
2236 
2237 	if (radeon_crtc->base.enabled && num_heads && mode) {
2238 		pixel_period = 1000000 / (u32)mode->clock;
2239 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2240 		priority_a_cnt = 0;
2241 		priority_b_cnt = 0;
2242 
2243 		if (rdev->family == CHIP_ARUBA)
2244 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245 		else
2246 			dram_channels = si_get_number_of_dram_channels(rdev);
2247 
2248 		/* watermark for high clocks */
2249 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250 			wm_high.yclk =
2251 				radeon_dpm_get_mclk(rdev, false) * 10;
2252 			wm_high.sclk =
2253 				radeon_dpm_get_sclk(rdev, false) * 10;
2254 		} else {
2255 			wm_high.yclk = rdev->pm.current_mclk * 10;
2256 			wm_high.sclk = rdev->pm.current_sclk * 10;
2257 		}
2258 
2259 		wm_high.disp_clk = mode->clock;
2260 		wm_high.src_width = mode->crtc_hdisplay;
2261 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2262 		wm_high.blank_time = line_time - wm_high.active_time;
2263 		wm_high.interlaced = false;
2264 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265 			wm_high.interlaced = true;
2266 		wm_high.vsc = radeon_crtc->vsc;
2267 		wm_high.vtaps = 1;
2268 		if (radeon_crtc->rmx_type != RMX_OFF)
2269 			wm_high.vtaps = 2;
2270 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271 		wm_high.lb_size = lb_size;
2272 		wm_high.dram_channels = dram_channels;
2273 		wm_high.num_heads = num_heads;
2274 
2275 		/* watermark for low clocks */
2276 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277 			wm_low.yclk =
2278 				radeon_dpm_get_mclk(rdev, true) * 10;
2279 			wm_low.sclk =
2280 				radeon_dpm_get_sclk(rdev, true) * 10;
2281 		} else {
2282 			wm_low.yclk = rdev->pm.current_mclk * 10;
2283 			wm_low.sclk = rdev->pm.current_sclk * 10;
2284 		}
2285 
2286 		wm_low.disp_clk = mode->clock;
2287 		wm_low.src_width = mode->crtc_hdisplay;
2288 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2289 		wm_low.blank_time = line_time - wm_low.active_time;
2290 		wm_low.interlaced = false;
2291 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292 			wm_low.interlaced = true;
2293 		wm_low.vsc = radeon_crtc->vsc;
2294 		wm_low.vtaps = 1;
2295 		if (radeon_crtc->rmx_type != RMX_OFF)
2296 			wm_low.vtaps = 2;
2297 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298 		wm_low.lb_size = lb_size;
2299 		wm_low.dram_channels = dram_channels;
2300 		wm_low.num_heads = num_heads;
2301 
2302 		/* set for high clocks */
2303 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304 		/* set for low clocks */
2305 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306 
2307 		/* possibly force display priority to high */
2308 		/* should really do this at mode validation time... */
2309 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311 		    !dce6_check_latency_hiding(&wm_high) ||
2312 		    (rdev->disp_priority == 2)) {
2313 			DRM_DEBUG_KMS("force priority to high\n");
2314 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316 		}
2317 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319 		    !dce6_check_latency_hiding(&wm_low) ||
2320 		    (rdev->disp_priority == 2)) {
2321 			DRM_DEBUG_KMS("force priority to high\n");
2322 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324 		}
2325 
2326 		a.full = dfixed_const(1000);
2327 		b.full = dfixed_const(mode->clock);
2328 		b.full = dfixed_div(b, a);
2329 		c.full = dfixed_const(latency_watermark_a);
2330 		c.full = dfixed_mul(c, b);
2331 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2332 		c.full = dfixed_div(c, a);
2333 		a.full = dfixed_const(16);
2334 		c.full = dfixed_div(c, a);
2335 		priority_a_mark = dfixed_trunc(c);
2336 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337 
2338 		a.full = dfixed_const(1000);
2339 		b.full = dfixed_const(mode->clock);
2340 		b.full = dfixed_div(b, a);
2341 		c.full = dfixed_const(latency_watermark_b);
2342 		c.full = dfixed_mul(c, b);
2343 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2344 		c.full = dfixed_div(c, a);
2345 		a.full = dfixed_const(16);
2346 		c.full = dfixed_div(c, a);
2347 		priority_b_mark = dfixed_trunc(c);
2348 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349 	}
2350 
2351 	/* select wm A */
2352 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2353 	tmp = arb_control3;
2354 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2355 	tmp |= LATENCY_WATERMARK_MASK(1);
2356 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2357 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2358 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2359 		LATENCY_HIGH_WATERMARK(line_time)));
2360 	/* select wm B */
2361 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2362 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2363 	tmp |= LATENCY_WATERMARK_MASK(2);
2364 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2365 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2366 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2367 		LATENCY_HIGH_WATERMARK(line_time)));
2368 	/* restore original selection */
2369 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2370 
2371 	/* write the priority marks */
2372 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2373 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2374 
2375 	/* save values for DPM */
2376 	radeon_crtc->line_time = line_time;
2377 	radeon_crtc->wm_high = latency_watermark_a;
2378 	radeon_crtc->wm_low = latency_watermark_b;
2379 }
2380 
2381 void dce6_bandwidth_update(struct radeon_device *rdev)
2382 {
2383 	struct drm_display_mode *mode0 = NULL;
2384 	struct drm_display_mode *mode1 = NULL;
2385 	u32 num_heads = 0, lb_size;
2386 	int i;
2387 
2388 	if (!rdev->mode_info.mode_config_initialized)
2389 		return;
2390 
2391 	radeon_update_display_priority(rdev);
2392 
2393 	for (i = 0; i < rdev->num_crtc; i++) {
2394 		if (rdev->mode_info.crtcs[i]->base.enabled)
2395 			num_heads++;
2396 	}
2397 	for (i = 0; i < rdev->num_crtc; i += 2) {
2398 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2399 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2400 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2401 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2402 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2403 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2404 	}
2405 }
2406 
2407 /*
2408  * Core functions
2409  */
2410 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2411 {
2412 	const u32 num_tile_mode_states = 32;
2413 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2414 
2415 	switch (rdev->config.si.mem_row_size_in_kb) {
2416 	case 1:
2417 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418 		break;
2419 	case 2:
2420 	default:
2421 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422 		break;
2423 	case 4:
2424 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425 		break;
2426 	}
2427 
2428 	if ((rdev->family == CHIP_TAHITI) ||
2429 	    (rdev->family == CHIP_PITCAIRN)) {
2430 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2431 			switch (reg_offset) {
2432 			case 0:  /* non-AA compressed depth or any compressed stencil */
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2438 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2441 				break;
2442 			case 1:  /* 2xAA/4xAA compressed depth only */
2443 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2445 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2447 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2448 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451 				break;
2452 			case 2:  /* 8xAA compressed depth only */
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2458 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461 				break;
2462 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2468 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471 				break;
2472 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2473 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2478 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481 				break;
2482 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2483 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 						 TILE_SPLIT(split_equal_to_row_size) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2488 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491 				break;
2492 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2493 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496 						 TILE_SPLIT(split_equal_to_row_size) |
2497 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2498 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501 				break;
2502 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506 						 TILE_SPLIT(split_equal_to_row_size) |
2507 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2508 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511 				break;
2512 			case 8:  /* 1D and 1D Array Surfaces */
2513 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2518 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521 				break;
2522 			case 9:  /* Displayable maps. */
2523 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2527 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2528 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531 				break;
2532 			case 10:  /* Display 8bpp. */
2533 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2538 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 				break;
2542 			case 11:  /* Display 16bpp. */
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2547 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2548 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551 				break;
2552 			case 12:  /* Display 32bpp. */
2553 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2557 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2558 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2560 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2561 				break;
2562 			case 13:  /* Thin. */
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2568 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571 				break;
2572 			case 14:  /* Thin 8 bpp. */
2573 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2575 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2578 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2581 				break;
2582 			case 15:  /* Thin 16 bpp. */
2583 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2585 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2587 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2588 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591 				break;
2592 			case 16:  /* Thin 32 bpp. */
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2598 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601 				break;
2602 			case 17:  /* Thin 64 bpp. */
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606 						 TILE_SPLIT(split_equal_to_row_size) |
2607 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2608 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611 				break;
2612 			case 21:  /* 8 bpp PRT. */
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2618 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2619 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621 				break;
2622 			case 22:  /* 16 bpp PRT */
2623 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2628 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2630 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2631 				break;
2632 			case 23:  /* 32 bpp PRT */
2633 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2637 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2638 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641 				break;
2642 			case 24:  /* 64 bpp PRT */
2643 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2648 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651 				break;
2652 			case 25:  /* 128 bpp PRT */
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2657 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2658 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661 				break;
2662 			default:
2663 				gb_tile_moden = 0;
2664 				break;
2665 			}
2666 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2667 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2668 		}
2669 	} else if ((rdev->family == CHIP_VERDE) ||
2670 		   (rdev->family == CHIP_OLAND) ||
2671 		   (rdev->family == CHIP_HAINAN)) {
2672 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2673 			switch (reg_offset) {
2674 			case 0:  /* non-AA compressed depth or any compressed stencil */
2675 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2677 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2680 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683 				break;
2684 			case 1:  /* 2xAA/4xAA compressed depth only */
2685 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2687 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2689 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2690 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2693 				break;
2694 			case 2:  /* 8xAA compressed depth only */
2695 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2700 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703 				break;
2704 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2710 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713 				break;
2714 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2715 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2719 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2720 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723 				break;
2724 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2725 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 						 TILE_SPLIT(split_equal_to_row_size) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2730 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2732 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733 				break;
2734 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2735 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 						 TILE_SPLIT(split_equal_to_row_size) |
2739 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2740 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2743 				break;
2744 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2745 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 						 TILE_SPLIT(split_equal_to_row_size) |
2749 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2750 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753 				break;
2754 			case 8:  /* 1D and 1D Array Surfaces */
2755 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2760 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763 				break;
2764 			case 9:  /* Displayable maps. */
2765 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2766 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2767 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2769 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2770 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773 				break;
2774 			case 10:  /* Display 8bpp. */
2775 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2780 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783 				break;
2784 			case 11:  /* Display 16bpp. */
2785 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2790 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793 				break;
2794 			case 12:  /* Display 32bpp. */
2795 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2800 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803 				break;
2804 			case 13:  /* Thin. */
2805 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2807 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2810 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813 				break;
2814 			case 14:  /* Thin 8 bpp. */
2815 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2820 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2822 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823 				break;
2824 			case 15:  /* Thin 16 bpp. */
2825 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2827 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2830 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2832 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833 				break;
2834 			case 16:  /* Thin 32 bpp. */
2835 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2840 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843 				break;
2844 			case 17:  /* Thin 64 bpp. */
2845 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848 						 TILE_SPLIT(split_equal_to_row_size) |
2849 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2850 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853 				break;
2854 			case 21:  /* 8 bpp PRT. */
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2858 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2860 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863 				break;
2864 			case 22:  /* 16 bpp PRT */
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2868 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2870 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2873 				break;
2874 			case 23:  /* 32 bpp PRT */
2875 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2880 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883 				break;
2884 			case 24:  /* 64 bpp PRT */
2885 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2889 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2890 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893 				break;
2894 			case 25:  /* 128 bpp PRT */
2895 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2899 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2900 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2903 				break;
2904 			default:
2905 				gb_tile_moden = 0;
2906 				break;
2907 			}
2908 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2909 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2910 		}
2911 	} else
2912 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 }
2914 
2915 static void si_select_se_sh(struct radeon_device *rdev,
2916 			    u32 se_num, u32 sh_num)
2917 {
2918 	u32 data = INSTANCE_BROADCAST_WRITES;
2919 
2920 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2921 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2922 	else if (se_num == 0xffffffff)
2923 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2924 	else if (sh_num == 0xffffffff)
2925 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2926 	else
2927 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2928 	WREG32(GRBM_GFX_INDEX, data);
2929 }
2930 
2931 static u32 si_create_bitmask(u32 bit_width)
2932 {
2933 	u32 i, mask = 0;
2934 
2935 	for (i = 0; i < bit_width; i++) {
2936 		mask <<= 1;
2937 		mask |= 1;
2938 	}
2939 	return mask;
2940 }
2941 
2942 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2943 {
2944 	u32 data, mask;
2945 
2946 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2947 	if (data & 1)
2948 		data &= INACTIVE_CUS_MASK;
2949 	else
2950 		data = 0;
2951 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2952 
2953 	data >>= INACTIVE_CUS_SHIFT;
2954 
2955 	mask = si_create_bitmask(cu_per_sh);
2956 
2957 	return ~data & mask;
2958 }
2959 
2960 static void si_setup_spi(struct radeon_device *rdev,
2961 			 u32 se_num, u32 sh_per_se,
2962 			 u32 cu_per_sh)
2963 {
2964 	int i, j, k;
2965 	u32 data, mask, active_cu;
2966 
2967 	for (i = 0; i < se_num; i++) {
2968 		for (j = 0; j < sh_per_se; j++) {
2969 			si_select_se_sh(rdev, i, j);
2970 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2971 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2972 
2973 			mask = 1;
2974 			for (k = 0; k < 16; k++) {
2975 				mask <<= k;
2976 				if (active_cu & mask) {
2977 					data &= ~mask;
2978 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2979 					break;
2980 				}
2981 			}
2982 		}
2983 	}
2984 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2985 }
2986 
2987 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2988 			      u32 max_rb_num_per_se,
2989 			      u32 sh_per_se)
2990 {
2991 	u32 data, mask;
2992 
2993 	data = RREG32(CC_RB_BACKEND_DISABLE);
2994 	if (data & 1)
2995 		data &= BACKEND_DISABLE_MASK;
2996 	else
2997 		data = 0;
2998 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2999 
3000 	data >>= BACKEND_DISABLE_SHIFT;
3001 
3002 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3003 
3004 	return data & mask;
3005 }
3006 
3007 static void si_setup_rb(struct radeon_device *rdev,
3008 			u32 se_num, u32 sh_per_se,
3009 			u32 max_rb_num_per_se)
3010 {
3011 	int i, j;
3012 	u32 data, mask;
3013 	u32 disabled_rbs = 0;
3014 	u32 enabled_rbs = 0;
3015 
3016 	for (i = 0; i < se_num; i++) {
3017 		for (j = 0; j < sh_per_se; j++) {
3018 			si_select_se_sh(rdev, i, j);
3019 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3020 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3021 		}
3022 	}
3023 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3024 
3025 	mask = 1;
3026 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3027 		if (!(disabled_rbs & mask))
3028 			enabled_rbs |= mask;
3029 		mask <<= 1;
3030 	}
3031 
3032 	rdev->config.si.backend_enable_mask = enabled_rbs;
3033 
3034 	for (i = 0; i < se_num; i++) {
3035 		si_select_se_sh(rdev, i, 0xffffffff);
3036 		data = 0;
3037 		for (j = 0; j < sh_per_se; j++) {
3038 			switch (enabled_rbs & 3) {
3039 			case 1:
3040 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3041 				break;
3042 			case 2:
3043 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3044 				break;
3045 			case 3:
3046 			default:
3047 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3048 				break;
3049 			}
3050 			enabled_rbs >>= 2;
3051 		}
3052 		WREG32(PA_SC_RASTER_CONFIG, data);
3053 	}
3054 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 }
3056 
3057 static void si_gpu_init(struct radeon_device *rdev)
3058 {
3059 	u32 gb_addr_config = 0;
3060 	u32 mc_shared_chmap, mc_arb_ramcfg;
3061 	u32 sx_debug_1;
3062 	u32 hdp_host_path_cntl;
3063 	u32 tmp;
3064 	int i, j;
3065 
3066 	switch (rdev->family) {
3067 	case CHIP_TAHITI:
3068 		rdev->config.si.max_shader_engines = 2;
3069 		rdev->config.si.max_tile_pipes = 12;
3070 		rdev->config.si.max_cu_per_sh = 8;
3071 		rdev->config.si.max_sh_per_se = 2;
3072 		rdev->config.si.max_backends_per_se = 4;
3073 		rdev->config.si.max_texture_channel_caches = 12;
3074 		rdev->config.si.max_gprs = 256;
3075 		rdev->config.si.max_gs_threads = 32;
3076 		rdev->config.si.max_hw_contexts = 8;
3077 
3078 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3079 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3080 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3081 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3082 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3083 		break;
3084 	case CHIP_PITCAIRN:
3085 		rdev->config.si.max_shader_engines = 2;
3086 		rdev->config.si.max_tile_pipes = 8;
3087 		rdev->config.si.max_cu_per_sh = 5;
3088 		rdev->config.si.max_sh_per_se = 2;
3089 		rdev->config.si.max_backends_per_se = 4;
3090 		rdev->config.si.max_texture_channel_caches = 8;
3091 		rdev->config.si.max_gprs = 256;
3092 		rdev->config.si.max_gs_threads = 32;
3093 		rdev->config.si.max_hw_contexts = 8;
3094 
3095 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3096 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3097 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3098 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3099 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3100 		break;
3101 	case CHIP_VERDE:
3102 	default:
3103 		rdev->config.si.max_shader_engines = 1;
3104 		rdev->config.si.max_tile_pipes = 4;
3105 		rdev->config.si.max_cu_per_sh = 5;
3106 		rdev->config.si.max_sh_per_se = 2;
3107 		rdev->config.si.max_backends_per_se = 4;
3108 		rdev->config.si.max_texture_channel_caches = 4;
3109 		rdev->config.si.max_gprs = 256;
3110 		rdev->config.si.max_gs_threads = 32;
3111 		rdev->config.si.max_hw_contexts = 8;
3112 
3113 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3115 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3118 		break;
3119 	case CHIP_OLAND:
3120 		rdev->config.si.max_shader_engines = 1;
3121 		rdev->config.si.max_tile_pipes = 4;
3122 		rdev->config.si.max_cu_per_sh = 6;
3123 		rdev->config.si.max_sh_per_se = 1;
3124 		rdev->config.si.max_backends_per_se = 2;
3125 		rdev->config.si.max_texture_channel_caches = 4;
3126 		rdev->config.si.max_gprs = 256;
3127 		rdev->config.si.max_gs_threads = 16;
3128 		rdev->config.si.max_hw_contexts = 8;
3129 
3130 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3131 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3132 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3133 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3134 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3135 		break;
3136 	case CHIP_HAINAN:
3137 		rdev->config.si.max_shader_engines = 1;
3138 		rdev->config.si.max_tile_pipes = 4;
3139 		rdev->config.si.max_cu_per_sh = 5;
3140 		rdev->config.si.max_sh_per_se = 1;
3141 		rdev->config.si.max_backends_per_se = 1;
3142 		rdev->config.si.max_texture_channel_caches = 2;
3143 		rdev->config.si.max_gprs = 256;
3144 		rdev->config.si.max_gs_threads = 16;
3145 		rdev->config.si.max_hw_contexts = 8;
3146 
3147 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3152 		break;
3153 	}
3154 
3155 	/* Initialize HDP */
3156 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3157 		WREG32((0x2c14 + j), 0x00000000);
3158 		WREG32((0x2c18 + j), 0x00000000);
3159 		WREG32((0x2c1c + j), 0x00000000);
3160 		WREG32((0x2c20 + j), 0x00000000);
3161 		WREG32((0x2c24 + j), 0x00000000);
3162 	}
3163 
3164 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3165 
3166 	evergreen_fix_pci_max_read_req_size(rdev);
3167 
3168 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3169 
3170 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3171 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3172 
3173 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3174 	rdev->config.si.mem_max_burst_length_bytes = 256;
3175 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3176 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3177 	if (rdev->config.si.mem_row_size_in_kb > 4)
3178 		rdev->config.si.mem_row_size_in_kb = 4;
3179 	/* XXX use MC settings? */
3180 	rdev->config.si.shader_engine_tile_size = 32;
3181 	rdev->config.si.num_gpus = 1;
3182 	rdev->config.si.multi_gpu_tile_size = 64;
3183 
3184 	/* fix up row size */
3185 	gb_addr_config &= ~ROW_SIZE_MASK;
3186 	switch (rdev->config.si.mem_row_size_in_kb) {
3187 	case 1:
3188 	default:
3189 		gb_addr_config |= ROW_SIZE(0);
3190 		break;
3191 	case 2:
3192 		gb_addr_config |= ROW_SIZE(1);
3193 		break;
3194 	case 4:
3195 		gb_addr_config |= ROW_SIZE(2);
3196 		break;
3197 	}
3198 
3199 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3200 	 * not have bank info, so create a custom tiling dword.
3201 	 * bits 3:0   num_pipes
3202 	 * bits 7:4   num_banks
3203 	 * bits 11:8  group_size
3204 	 * bits 15:12 row_size
3205 	 */
3206 	rdev->config.si.tile_config = 0;
3207 	switch (rdev->config.si.num_tile_pipes) {
3208 	case 1:
3209 		rdev->config.si.tile_config |= (0 << 0);
3210 		break;
3211 	case 2:
3212 		rdev->config.si.tile_config |= (1 << 0);
3213 		break;
3214 	case 4:
3215 		rdev->config.si.tile_config |= (2 << 0);
3216 		break;
3217 	case 8:
3218 	default:
3219 		/* XXX what about 12? */
3220 		rdev->config.si.tile_config |= (3 << 0);
3221 		break;
3222 	}
3223 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3224 	case 0: /* four banks */
3225 		rdev->config.si.tile_config |= 0 << 4;
3226 		break;
3227 	case 1: /* eight banks */
3228 		rdev->config.si.tile_config |= 1 << 4;
3229 		break;
3230 	case 2: /* sixteen banks */
3231 	default:
3232 		rdev->config.si.tile_config |= 2 << 4;
3233 		break;
3234 	}
3235 	rdev->config.si.tile_config |=
3236 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3237 	rdev->config.si.tile_config |=
3238 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3239 
3240 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3241 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3242 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3243 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3244 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3245 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3246 	if (rdev->has_uvd) {
3247 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3248 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3249 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3250 	}
3251 
3252 	si_tiling_mode_table_init(rdev);
3253 
3254 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3255 		    rdev->config.si.max_sh_per_se,
3256 		    rdev->config.si.max_backends_per_se);
3257 
3258 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3259 		     rdev->config.si.max_sh_per_se,
3260 		     rdev->config.si.max_cu_per_sh);
3261 
3262 	rdev->config.si.active_cus = 0;
3263 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3264 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3265 			rdev->config.si.active_cus +=
3266 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3267 		}
3268 	}
3269 
3270 	/* set HW defaults for 3D engine */
3271 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3272 				     ROQ_IB2_START(0x2b)));
3273 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3274 
3275 	sx_debug_1 = RREG32(SX_DEBUG_1);
3276 	WREG32(SX_DEBUG_1, sx_debug_1);
3277 
3278 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3279 
3280 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3281 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3282 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3283 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3284 
3285 	WREG32(VGT_NUM_INSTANCES, 1);
3286 
3287 	WREG32(CP_PERFMON_CNTL, 0);
3288 
3289 	WREG32(SQ_CONFIG, 0);
3290 
3291 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3292 					  FORCE_EOV_MAX_REZ_CNT(255)));
3293 
3294 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3295 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3296 
3297 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3298 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3299 
3300 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3301 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3302 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3303 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3304 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3305 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3306 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3307 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3308 
3309 	tmp = RREG32(HDP_MISC_CNTL);
3310 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3311 	WREG32(HDP_MISC_CNTL, tmp);
3312 
3313 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3314 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3315 
3316 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3317 
3318 	udelay(50);
3319 }
3320 
3321 /*
3322  * GPU scratch registers helpers function.
3323  */
3324 static void si_scratch_init(struct radeon_device *rdev)
3325 {
3326 	int i;
3327 
3328 	rdev->scratch.num_reg = 7;
3329 	rdev->scratch.reg_base = SCRATCH_REG0;
3330 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3331 		rdev->scratch.free[i] = true;
3332 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3333 	}
3334 }
3335 
3336 void si_fence_ring_emit(struct radeon_device *rdev,
3337 			struct radeon_fence *fence)
3338 {
3339 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3340 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3341 
3342 	/* flush read cache over gart */
3343 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3344 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3345 	radeon_ring_write(ring, 0);
3346 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3347 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3348 			  PACKET3_TC_ACTION_ENA |
3349 			  PACKET3_SH_KCACHE_ACTION_ENA |
3350 			  PACKET3_SH_ICACHE_ACTION_ENA);
3351 	radeon_ring_write(ring, 0xFFFFFFFF);
3352 	radeon_ring_write(ring, 0);
3353 	radeon_ring_write(ring, 10); /* poll interval */
3354 	/* EVENT_WRITE_EOP - flush caches, send int */
3355 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3356 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3357 	radeon_ring_write(ring, lower_32_bits(addr));
3358 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3359 	radeon_ring_write(ring, fence->seq);
3360 	radeon_ring_write(ring, 0);
3361 }
3362 
3363 /*
3364  * IB stuff
3365  */
3366 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3367 {
3368 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3369 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3370 	u32 header;
3371 
3372 	if (ib->is_const_ib) {
3373 		/* set switch buffer packet before const IB */
3374 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3375 		radeon_ring_write(ring, 0);
3376 
3377 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3378 	} else {
3379 		u32 next_rptr;
3380 		if (ring->rptr_save_reg) {
3381 			next_rptr = ring->wptr + 3 + 4 + 8;
3382 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3383 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3384 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3385 			radeon_ring_write(ring, next_rptr);
3386 		} else if (rdev->wb.enabled) {
3387 			next_rptr = ring->wptr + 5 + 4 + 8;
3388 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3389 			radeon_ring_write(ring, (1 << 8));
3390 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3391 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3392 			radeon_ring_write(ring, next_rptr);
3393 		}
3394 
3395 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3396 	}
3397 
3398 	radeon_ring_write(ring, header);
3399 	radeon_ring_write(ring,
3400 #ifdef __BIG_ENDIAN
3401 			  (2 << 0) |
3402 #endif
3403 			  (ib->gpu_addr & 0xFFFFFFFC));
3404 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3405 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3406 
3407 	if (!ib->is_const_ib) {
3408 		/* flush read cache over gart for this vmid */
3409 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3410 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3411 		radeon_ring_write(ring, vm_id);
3412 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3413 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3414 				  PACKET3_TC_ACTION_ENA |
3415 				  PACKET3_SH_KCACHE_ACTION_ENA |
3416 				  PACKET3_SH_ICACHE_ACTION_ENA);
3417 		radeon_ring_write(ring, 0xFFFFFFFF);
3418 		radeon_ring_write(ring, 0);
3419 		radeon_ring_write(ring, 10); /* poll interval */
3420 	}
3421 }
3422 
3423 /*
3424  * CP.
3425  */
3426 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3427 {
3428 	if (enable)
3429 		WREG32(CP_ME_CNTL, 0);
3430 	else {
3431 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3432 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3433 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3434 		WREG32(SCRATCH_UMSK, 0);
3435 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3436 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3437 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3438 	}
3439 	udelay(50);
3440 }
3441 
3442 static int si_cp_load_microcode(struct radeon_device *rdev)
3443 {
3444 	int i;
3445 
3446 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3447 		return -EINVAL;
3448 
3449 	si_cp_enable(rdev, false);
3450 
3451 	if (rdev->new_fw) {
3452 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3453 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3454 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3455 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3456 		const struct gfx_firmware_header_v1_0 *me_hdr =
3457 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3458 		const __le32 *fw_data;
3459 		u32 fw_size;
3460 
3461 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3462 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3463 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3464 
3465 		/* PFP */
3466 		fw_data = (const __le32 *)
3467 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3468 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3469 		WREG32(CP_PFP_UCODE_ADDR, 0);
3470 		for (i = 0; i < fw_size; i++)
3471 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3472 		WREG32(CP_PFP_UCODE_ADDR, 0);
3473 
3474 		/* CE */
3475 		fw_data = (const __le32 *)
3476 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3477 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3478 		WREG32(CP_CE_UCODE_ADDR, 0);
3479 		for (i = 0; i < fw_size; i++)
3480 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3481 		WREG32(CP_CE_UCODE_ADDR, 0);
3482 
3483 		/* ME */
3484 		fw_data = (const __be32 *)
3485 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3486 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3487 		WREG32(CP_ME_RAM_WADDR, 0);
3488 		for (i = 0; i < fw_size; i++)
3489 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3490 		WREG32(CP_ME_RAM_WADDR, 0);
3491 	} else {
3492 		const __be32 *fw_data;
3493 
3494 		/* PFP */
3495 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3496 		WREG32(CP_PFP_UCODE_ADDR, 0);
3497 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3498 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3499 		WREG32(CP_PFP_UCODE_ADDR, 0);
3500 
3501 		/* CE */
3502 		fw_data = (const __be32 *)rdev->ce_fw->data;
3503 		WREG32(CP_CE_UCODE_ADDR, 0);
3504 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3505 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3506 		WREG32(CP_CE_UCODE_ADDR, 0);
3507 
3508 		/* ME */
3509 		fw_data = (const __be32 *)rdev->me_fw->data;
3510 		WREG32(CP_ME_RAM_WADDR, 0);
3511 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3512 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3513 		WREG32(CP_ME_RAM_WADDR, 0);
3514 	}
3515 
3516 	WREG32(CP_PFP_UCODE_ADDR, 0);
3517 	WREG32(CP_CE_UCODE_ADDR, 0);
3518 	WREG32(CP_ME_RAM_WADDR, 0);
3519 	WREG32(CP_ME_RAM_RADDR, 0);
3520 	return 0;
3521 }
3522 
3523 static int si_cp_start(struct radeon_device *rdev)
3524 {
3525 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3526 	int r, i;
3527 
3528 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3529 	if (r) {
3530 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3531 		return r;
3532 	}
3533 	/* init the CP */
3534 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3535 	radeon_ring_write(ring, 0x1);
3536 	radeon_ring_write(ring, 0x0);
3537 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3538 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3539 	radeon_ring_write(ring, 0);
3540 	radeon_ring_write(ring, 0);
3541 
3542 	/* init the CE partitions */
3543 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3544 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3545 	radeon_ring_write(ring, 0xc000);
3546 	radeon_ring_write(ring, 0xe000);
3547 	radeon_ring_unlock_commit(rdev, ring, false);
3548 
3549 	si_cp_enable(rdev, true);
3550 
3551 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3552 	if (r) {
3553 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554 		return r;
3555 	}
3556 
3557 	/* setup clear context state */
3558 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3559 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3560 
3561 	for (i = 0; i < si_default_size; i++)
3562 		radeon_ring_write(ring, si_default_state[i]);
3563 
3564 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3565 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3566 
3567 	/* set clear context state */
3568 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3569 	radeon_ring_write(ring, 0);
3570 
3571 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3572 	radeon_ring_write(ring, 0x00000316);
3573 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3574 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3575 
3576 	radeon_ring_unlock_commit(rdev, ring, false);
3577 
3578 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3579 		ring = &rdev->ring[i];
3580 		r = radeon_ring_lock(rdev, ring, 2);
3581 
3582 		/* clear the compute context state */
3583 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3584 		radeon_ring_write(ring, 0);
3585 
3586 		radeon_ring_unlock_commit(rdev, ring, false);
3587 	}
3588 
3589 	return 0;
3590 }
3591 
3592 static void si_cp_fini(struct radeon_device *rdev)
3593 {
3594 	struct radeon_ring *ring;
3595 	si_cp_enable(rdev, false);
3596 
3597 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3598 	radeon_ring_fini(rdev, ring);
3599 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3600 
3601 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3602 	radeon_ring_fini(rdev, ring);
3603 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3604 
3605 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3606 	radeon_ring_fini(rdev, ring);
3607 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3608 }
3609 
3610 static int si_cp_resume(struct radeon_device *rdev)
3611 {
3612 	struct radeon_ring *ring;
3613 	u32 tmp;
3614 	u32 rb_bufsz;
3615 	int r;
3616 
3617 	si_enable_gui_idle_interrupt(rdev, false);
3618 
3619 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3620 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3621 
3622 	/* Set the write pointer delay */
3623 	WREG32(CP_RB_WPTR_DELAY, 0);
3624 
3625 	WREG32(CP_DEBUG, 0);
3626 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3627 
3628 	/* ring 0 - compute and gfx */
3629 	/* Set ring buffer size */
3630 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631 	rb_bufsz = order_base_2(ring->ring_size / 8);
3632 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3633 #ifdef __BIG_ENDIAN
3634 	tmp |= BUF_SWAP_32BIT;
3635 #endif
3636 	WREG32(CP_RB0_CNTL, tmp);
3637 
3638 	/* Initialize the ring buffer's read and write pointers */
3639 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3640 	ring->wptr = 0;
3641 	WREG32(CP_RB0_WPTR, ring->wptr);
3642 
3643 	/* set the wb address whether it's enabled or not */
3644 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3645 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3646 
3647 	if (rdev->wb.enabled)
3648 		WREG32(SCRATCH_UMSK, 0xff);
3649 	else {
3650 		tmp |= RB_NO_UPDATE;
3651 		WREG32(SCRATCH_UMSK, 0);
3652 	}
3653 
3654 	mdelay(1);
3655 	WREG32(CP_RB0_CNTL, tmp);
3656 
3657 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3658 
3659 	/* ring1  - compute only */
3660 	/* Set ring buffer size */
3661 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3662 	rb_bufsz = order_base_2(ring->ring_size / 8);
3663 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3664 #ifdef __BIG_ENDIAN
3665 	tmp |= BUF_SWAP_32BIT;
3666 #endif
3667 	WREG32(CP_RB1_CNTL, tmp);
3668 
3669 	/* Initialize the ring buffer's read and write pointers */
3670 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3671 	ring->wptr = 0;
3672 	WREG32(CP_RB1_WPTR, ring->wptr);
3673 
3674 	/* set the wb address whether it's enabled or not */
3675 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3676 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3677 
3678 	mdelay(1);
3679 	WREG32(CP_RB1_CNTL, tmp);
3680 
3681 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3682 
3683 	/* ring2 - compute only */
3684 	/* Set ring buffer size */
3685 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3686 	rb_bufsz = order_base_2(ring->ring_size / 8);
3687 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3688 #ifdef __BIG_ENDIAN
3689 	tmp |= BUF_SWAP_32BIT;
3690 #endif
3691 	WREG32(CP_RB2_CNTL, tmp);
3692 
3693 	/* Initialize the ring buffer's read and write pointers */
3694 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3695 	ring->wptr = 0;
3696 	WREG32(CP_RB2_WPTR, ring->wptr);
3697 
3698 	/* set the wb address whether it's enabled or not */
3699 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3700 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3701 
3702 	mdelay(1);
3703 	WREG32(CP_RB2_CNTL, tmp);
3704 
3705 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3706 
3707 	/* start the rings */
3708 	si_cp_start(rdev);
3709 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3710 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3711 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3712 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3713 	if (r) {
3714 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3715 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3716 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3717 		return r;
3718 	}
3719 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3720 	if (r) {
3721 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3722 	}
3723 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3724 	if (r) {
3725 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3726 	}
3727 
3728 	si_enable_gui_idle_interrupt(rdev, true);
3729 
3730 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3731 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3732 
3733 	return 0;
3734 }
3735 
3736 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3737 {
3738 	u32 reset_mask = 0;
3739 	u32 tmp;
3740 
3741 	/* GRBM_STATUS */
3742 	tmp = RREG32(GRBM_STATUS);
3743 	if (tmp & (PA_BUSY | SC_BUSY |
3744 		   BCI_BUSY | SX_BUSY |
3745 		   TA_BUSY | VGT_BUSY |
3746 		   DB_BUSY | CB_BUSY |
3747 		   GDS_BUSY | SPI_BUSY |
3748 		   IA_BUSY | IA_BUSY_NO_DMA))
3749 		reset_mask |= RADEON_RESET_GFX;
3750 
3751 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3752 		   CP_BUSY | CP_COHERENCY_BUSY))
3753 		reset_mask |= RADEON_RESET_CP;
3754 
3755 	if (tmp & GRBM_EE_BUSY)
3756 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3757 
3758 	/* GRBM_STATUS2 */
3759 	tmp = RREG32(GRBM_STATUS2);
3760 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3761 		reset_mask |= RADEON_RESET_RLC;
3762 
3763 	/* DMA_STATUS_REG 0 */
3764 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3765 	if (!(tmp & DMA_IDLE))
3766 		reset_mask |= RADEON_RESET_DMA;
3767 
3768 	/* DMA_STATUS_REG 1 */
3769 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3770 	if (!(tmp & DMA_IDLE))
3771 		reset_mask |= RADEON_RESET_DMA1;
3772 
3773 	/* SRBM_STATUS2 */
3774 	tmp = RREG32(SRBM_STATUS2);
3775 	if (tmp & DMA_BUSY)
3776 		reset_mask |= RADEON_RESET_DMA;
3777 
3778 	if (tmp & DMA1_BUSY)
3779 		reset_mask |= RADEON_RESET_DMA1;
3780 
3781 	/* SRBM_STATUS */
3782 	tmp = RREG32(SRBM_STATUS);
3783 
3784 	if (tmp & IH_BUSY)
3785 		reset_mask |= RADEON_RESET_IH;
3786 
3787 	if (tmp & SEM_BUSY)
3788 		reset_mask |= RADEON_RESET_SEM;
3789 
3790 	if (tmp & GRBM_RQ_PENDING)
3791 		reset_mask |= RADEON_RESET_GRBM;
3792 
3793 	if (tmp & VMC_BUSY)
3794 		reset_mask |= RADEON_RESET_VMC;
3795 
3796 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3797 		   MCC_BUSY | MCD_BUSY))
3798 		reset_mask |= RADEON_RESET_MC;
3799 
3800 	if (evergreen_is_display_hung(rdev))
3801 		reset_mask |= RADEON_RESET_DISPLAY;
3802 
3803 	/* VM_L2_STATUS */
3804 	tmp = RREG32(VM_L2_STATUS);
3805 	if (tmp & L2_BUSY)
3806 		reset_mask |= RADEON_RESET_VMC;
3807 
3808 	/* Skip MC reset as it's mostly likely not hung, just busy */
3809 	if (reset_mask & RADEON_RESET_MC) {
3810 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3811 		reset_mask &= ~RADEON_RESET_MC;
3812 	}
3813 
3814 	return reset_mask;
3815 }
3816 
3817 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3818 {
3819 	struct evergreen_mc_save save;
3820 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3821 	u32 tmp;
3822 
3823 	if (reset_mask == 0)
3824 		return;
3825 
3826 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3827 
3828 	evergreen_print_gpu_status_regs(rdev);
3829 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3830 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3831 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3832 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3833 
3834 	/* disable PG/CG */
3835 	si_fini_pg(rdev);
3836 	si_fini_cg(rdev);
3837 
3838 	/* stop the rlc */
3839 	si_rlc_stop(rdev);
3840 
3841 	/* Disable CP parsing/prefetching */
3842 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3843 
3844 	if (reset_mask & RADEON_RESET_DMA) {
3845 		/* dma0 */
3846 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3847 		tmp &= ~DMA_RB_ENABLE;
3848 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3849 	}
3850 	if (reset_mask & RADEON_RESET_DMA1) {
3851 		/* dma1 */
3852 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3853 		tmp &= ~DMA_RB_ENABLE;
3854 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3855 	}
3856 
3857 	udelay(50);
3858 
3859 	evergreen_mc_stop(rdev, &save);
3860 	if (evergreen_mc_wait_for_idle(rdev)) {
3861 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3862 	}
3863 
3864 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3865 		grbm_soft_reset = SOFT_RESET_CB |
3866 			SOFT_RESET_DB |
3867 			SOFT_RESET_GDS |
3868 			SOFT_RESET_PA |
3869 			SOFT_RESET_SC |
3870 			SOFT_RESET_BCI |
3871 			SOFT_RESET_SPI |
3872 			SOFT_RESET_SX |
3873 			SOFT_RESET_TC |
3874 			SOFT_RESET_TA |
3875 			SOFT_RESET_VGT |
3876 			SOFT_RESET_IA;
3877 	}
3878 
3879 	if (reset_mask & RADEON_RESET_CP) {
3880 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3881 
3882 		srbm_soft_reset |= SOFT_RESET_GRBM;
3883 	}
3884 
3885 	if (reset_mask & RADEON_RESET_DMA)
3886 		srbm_soft_reset |= SOFT_RESET_DMA;
3887 
3888 	if (reset_mask & RADEON_RESET_DMA1)
3889 		srbm_soft_reset |= SOFT_RESET_DMA1;
3890 
3891 	if (reset_mask & RADEON_RESET_DISPLAY)
3892 		srbm_soft_reset |= SOFT_RESET_DC;
3893 
3894 	if (reset_mask & RADEON_RESET_RLC)
3895 		grbm_soft_reset |= SOFT_RESET_RLC;
3896 
3897 	if (reset_mask & RADEON_RESET_SEM)
3898 		srbm_soft_reset |= SOFT_RESET_SEM;
3899 
3900 	if (reset_mask & RADEON_RESET_IH)
3901 		srbm_soft_reset |= SOFT_RESET_IH;
3902 
3903 	if (reset_mask & RADEON_RESET_GRBM)
3904 		srbm_soft_reset |= SOFT_RESET_GRBM;
3905 
3906 	if (reset_mask & RADEON_RESET_VMC)
3907 		srbm_soft_reset |= SOFT_RESET_VMC;
3908 
3909 	if (reset_mask & RADEON_RESET_MC)
3910 		srbm_soft_reset |= SOFT_RESET_MC;
3911 
3912 	if (grbm_soft_reset) {
3913 		tmp = RREG32(GRBM_SOFT_RESET);
3914 		tmp |= grbm_soft_reset;
3915 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3916 		WREG32(GRBM_SOFT_RESET, tmp);
3917 		tmp = RREG32(GRBM_SOFT_RESET);
3918 
3919 		udelay(50);
3920 
3921 		tmp &= ~grbm_soft_reset;
3922 		WREG32(GRBM_SOFT_RESET, tmp);
3923 		tmp = RREG32(GRBM_SOFT_RESET);
3924 	}
3925 
3926 	if (srbm_soft_reset) {
3927 		tmp = RREG32(SRBM_SOFT_RESET);
3928 		tmp |= srbm_soft_reset;
3929 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3930 		WREG32(SRBM_SOFT_RESET, tmp);
3931 		tmp = RREG32(SRBM_SOFT_RESET);
3932 
3933 		udelay(50);
3934 
3935 		tmp &= ~srbm_soft_reset;
3936 		WREG32(SRBM_SOFT_RESET, tmp);
3937 		tmp = RREG32(SRBM_SOFT_RESET);
3938 	}
3939 
3940 	/* Wait a little for things to settle down */
3941 	udelay(50);
3942 
3943 	evergreen_mc_resume(rdev, &save);
3944 	udelay(50);
3945 
3946 	evergreen_print_gpu_status_regs(rdev);
3947 }
3948 
3949 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3950 {
3951 	u32 tmp, i;
3952 
3953 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3954 	tmp |= SPLL_BYPASS_EN;
3955 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3956 
3957 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3958 	tmp |= SPLL_CTLREQ_CHG;
3959 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3960 
3961 	for (i = 0; i < rdev->usec_timeout; i++) {
3962 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3963 			break;
3964 		udelay(1);
3965 	}
3966 
3967 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3968 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3969 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3970 
3971 	tmp = RREG32(MPLL_CNTL_MODE);
3972 	tmp &= ~MPLL_MCLK_SEL;
3973 	WREG32(MPLL_CNTL_MODE, tmp);
3974 }
3975 
3976 static void si_spll_powerdown(struct radeon_device *rdev)
3977 {
3978 	u32 tmp;
3979 
3980 	tmp = RREG32(SPLL_CNTL_MODE);
3981 	tmp |= SPLL_SW_DIR_CONTROL;
3982 	WREG32(SPLL_CNTL_MODE, tmp);
3983 
3984 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3985 	tmp |= SPLL_RESET;
3986 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3987 
3988 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3989 	tmp |= SPLL_SLEEP;
3990 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3991 
3992 	tmp = RREG32(SPLL_CNTL_MODE);
3993 	tmp &= ~SPLL_SW_DIR_CONTROL;
3994 	WREG32(SPLL_CNTL_MODE, tmp);
3995 }
3996 
3997 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3998 {
3999 	struct evergreen_mc_save save;
4000 	u32 tmp, i;
4001 
4002 	dev_info(rdev->dev, "GPU pci config reset\n");
4003 
4004 	/* disable dpm? */
4005 
4006 	/* disable cg/pg */
4007 	si_fini_pg(rdev);
4008 	si_fini_cg(rdev);
4009 
4010 	/* Disable CP parsing/prefetching */
4011 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4012 	/* dma0 */
4013 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4014 	tmp &= ~DMA_RB_ENABLE;
4015 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4016 	/* dma1 */
4017 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4018 	tmp &= ~DMA_RB_ENABLE;
4019 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4020 	/* XXX other engines? */
4021 
4022 	/* halt the rlc, disable cp internal ints */
4023 	si_rlc_stop(rdev);
4024 
4025 	udelay(50);
4026 
4027 	/* disable mem access */
4028 	evergreen_mc_stop(rdev, &save);
4029 	if (evergreen_mc_wait_for_idle(rdev)) {
4030 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4031 	}
4032 
4033 	/* set mclk/sclk to bypass */
4034 	si_set_clk_bypass_mode(rdev);
4035 	/* powerdown spll */
4036 	si_spll_powerdown(rdev);
4037 	/* disable BM */
4038 	pci_clear_master(rdev->pdev);
4039 	/* reset */
4040 	radeon_pci_config_reset(rdev);
4041 	/* wait for asic to come out of reset */
4042 	for (i = 0; i < rdev->usec_timeout; i++) {
4043 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4044 			break;
4045 		udelay(1);
4046 	}
4047 }
4048 
4049 int si_asic_reset(struct radeon_device *rdev)
4050 {
4051 	u32 reset_mask;
4052 
4053 	reset_mask = si_gpu_check_soft_reset(rdev);
4054 
4055 	if (reset_mask)
4056 		r600_set_bios_scratch_engine_hung(rdev, true);
4057 
4058 	/* try soft reset */
4059 	si_gpu_soft_reset(rdev, reset_mask);
4060 
4061 	reset_mask = si_gpu_check_soft_reset(rdev);
4062 
4063 	/* try pci config reset */
4064 	if (reset_mask && radeon_hard_reset)
4065 		si_gpu_pci_config_reset(rdev);
4066 
4067 	reset_mask = si_gpu_check_soft_reset(rdev);
4068 
4069 	if (!reset_mask)
4070 		r600_set_bios_scratch_engine_hung(rdev, false);
4071 
4072 	return 0;
4073 }
4074 
4075 /**
4076  * si_gfx_is_lockup - Check if the GFX engine is locked up
4077  *
4078  * @rdev: radeon_device pointer
4079  * @ring: radeon_ring structure holding ring information
4080  *
4081  * Check if the GFX engine is locked up.
4082  * Returns true if the engine appears to be locked up, false if not.
4083  */
4084 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4085 {
4086 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4087 
4088 	if (!(reset_mask & (RADEON_RESET_GFX |
4089 			    RADEON_RESET_COMPUTE |
4090 			    RADEON_RESET_CP))) {
4091 		radeon_ring_lockup_update(rdev, ring);
4092 		return false;
4093 	}
4094 	return radeon_ring_test_lockup(rdev, ring);
4095 }
4096 
4097 /* MC */
4098 static void si_mc_program(struct radeon_device *rdev)
4099 {
4100 	struct evergreen_mc_save save;
4101 	u32 tmp;
4102 	int i, j;
4103 
4104 	/* Initialize HDP */
4105 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4106 		WREG32((0x2c14 + j), 0x00000000);
4107 		WREG32((0x2c18 + j), 0x00000000);
4108 		WREG32((0x2c1c + j), 0x00000000);
4109 		WREG32((0x2c20 + j), 0x00000000);
4110 		WREG32((0x2c24 + j), 0x00000000);
4111 	}
4112 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4113 
4114 	evergreen_mc_stop(rdev, &save);
4115 	if (radeon_mc_wait_for_idle(rdev)) {
4116 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117 	}
4118 	if (!ASIC_IS_NODCE(rdev))
4119 		/* Lockout access through VGA aperture*/
4120 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4121 	/* Update configuration */
4122 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4123 	       rdev->mc.vram_start >> 12);
4124 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4125 	       rdev->mc.vram_end >> 12);
4126 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4127 	       rdev->vram_scratch.gpu_addr >> 12);
4128 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4129 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4130 	WREG32(MC_VM_FB_LOCATION, tmp);
4131 	/* XXX double check these! */
4132 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4133 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4134 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4135 	WREG32(MC_VM_AGP_BASE, 0);
4136 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4137 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4138 	if (radeon_mc_wait_for_idle(rdev)) {
4139 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4140 	}
4141 	evergreen_mc_resume(rdev, &save);
4142 	if (!ASIC_IS_NODCE(rdev)) {
4143 		/* we need to own VRAM, so turn off the VGA renderer here
4144 		 * to stop it overwriting our objects */
4145 		rv515_vga_render_disable(rdev);
4146 	}
4147 }
4148 
4149 void si_vram_gtt_location(struct radeon_device *rdev,
4150 			  struct radeon_mc *mc)
4151 {
4152 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4153 		/* leave room for at least 1024M GTT */
4154 		dev_warn(rdev->dev, "limiting VRAM\n");
4155 		mc->real_vram_size = 0xFFC0000000ULL;
4156 		mc->mc_vram_size = 0xFFC0000000ULL;
4157 	}
4158 	radeon_vram_location(rdev, &rdev->mc, 0);
4159 	rdev->mc.gtt_base_align = 0;
4160 	radeon_gtt_location(rdev, mc);
4161 }
4162 
4163 static int si_mc_init(struct radeon_device *rdev)
4164 {
4165 	u32 tmp;
4166 	int chansize, numchan;
4167 
4168 	/* Get VRAM informations */
4169 	rdev->mc.vram_is_ddr = true;
4170 	tmp = RREG32(MC_ARB_RAMCFG);
4171 	if (tmp & CHANSIZE_OVERRIDE) {
4172 		chansize = 16;
4173 	} else if (tmp & CHANSIZE_MASK) {
4174 		chansize = 64;
4175 	} else {
4176 		chansize = 32;
4177 	}
4178 	tmp = RREG32(MC_SHARED_CHMAP);
4179 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4180 	case 0:
4181 	default:
4182 		numchan = 1;
4183 		break;
4184 	case 1:
4185 		numchan = 2;
4186 		break;
4187 	case 2:
4188 		numchan = 4;
4189 		break;
4190 	case 3:
4191 		numchan = 8;
4192 		break;
4193 	case 4:
4194 		numchan = 3;
4195 		break;
4196 	case 5:
4197 		numchan = 6;
4198 		break;
4199 	case 6:
4200 		numchan = 10;
4201 		break;
4202 	case 7:
4203 		numchan = 12;
4204 		break;
4205 	case 8:
4206 		numchan = 16;
4207 		break;
4208 	}
4209 	rdev->mc.vram_width = numchan * chansize;
4210 	/* Could aper size report 0 ? */
4211 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4212 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4213 	/* size in MB on si */
4214 	tmp = RREG32(CONFIG_MEMSIZE);
4215 	/* some boards may have garbage in the upper 16 bits */
4216 	if (tmp & 0xffff0000) {
4217 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4218 		if (tmp & 0xffff)
4219 			tmp &= 0xffff;
4220 	}
4221 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4222 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4223 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4224 	si_vram_gtt_location(rdev, &rdev->mc);
4225 	radeon_update_bandwidth_info(rdev);
4226 
4227 	return 0;
4228 }
4229 
4230 /*
4231  * GART
4232  */
4233 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4234 {
4235 	/* flush hdp cache */
4236 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4237 
4238 	/* bits 0-15 are the VM contexts0-15 */
4239 	WREG32(VM_INVALIDATE_REQUEST, 1);
4240 }
4241 
4242 static int si_pcie_gart_enable(struct radeon_device *rdev)
4243 {
4244 	int r, i;
4245 
4246 	if (rdev->gart.robj == NULL) {
4247 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4248 		return -EINVAL;
4249 	}
4250 	r = radeon_gart_table_vram_pin(rdev);
4251 	if (r)
4252 		return r;
4253 	/* Setup TLB control */
4254 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4255 	       (0xA << 7) |
4256 	       ENABLE_L1_TLB |
4257 	       ENABLE_L1_FRAGMENT_PROCESSING |
4258 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4259 	       ENABLE_ADVANCED_DRIVER_MODEL |
4260 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4261 	/* Setup L2 cache */
4262 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4263 	       ENABLE_L2_FRAGMENT_PROCESSING |
4264 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4265 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4266 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4267 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4268 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4269 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4270 	       BANK_SELECT(4) |
4271 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4272 	/* setup context0 */
4273 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4274 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4275 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4276 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4277 			(u32)(rdev->dummy_page.addr >> 12));
4278 	WREG32(VM_CONTEXT0_CNTL2, 0);
4279 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4280 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4281 
4282 	WREG32(0x15D4, 0);
4283 	WREG32(0x15D8, 0);
4284 	WREG32(0x15DC, 0);
4285 
4286 	/* empty context1-15 */
4287 	/* set vm size, must be a multiple of 4 */
4288 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4289 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4290 	/* Assign the pt base to something valid for now; the pts used for
4291 	 * the VMs are determined by the application and setup and assigned
4292 	 * on the fly in the vm part of radeon_gart.c
4293 	 */
4294 	for (i = 1; i < 16; i++) {
4295 		if (i < 8)
4296 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4297 			       rdev->vm_manager.saved_table_addr[i]);
4298 		else
4299 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4300 			       rdev->vm_manager.saved_table_addr[i]);
4301 	}
4302 
4303 	/* enable context1-15 */
4304 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4305 	       (u32)(rdev->dummy_page.addr >> 12));
4306 	WREG32(VM_CONTEXT1_CNTL2, 4);
4307 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4308 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4309 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4311 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4313 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4315 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4317 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4319 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4321 
4322 	si_pcie_gart_tlb_flush(rdev);
4323 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4324 		 (unsigned)(rdev->mc.gtt_size >> 20),
4325 		 (unsigned long long)rdev->gart.table_addr);
4326 	rdev->gart.ready = true;
4327 	return 0;
4328 }
4329 
4330 static void si_pcie_gart_disable(struct radeon_device *rdev)
4331 {
4332 	unsigned i;
4333 
4334 	for (i = 1; i < 16; ++i) {
4335 		uint32_t reg;
4336 		if (i < 8)
4337 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4338 		else
4339 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4340 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4341 	}
4342 
4343 	/* Disable all tables */
4344 	WREG32(VM_CONTEXT0_CNTL, 0);
4345 	WREG32(VM_CONTEXT1_CNTL, 0);
4346 	/* Setup TLB control */
4347 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4348 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4349 	/* Setup L2 cache */
4350 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4351 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4352 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4353 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4354 	WREG32(VM_L2_CNTL2, 0);
4355 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4356 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4357 	radeon_gart_table_vram_unpin(rdev);
4358 }
4359 
4360 static void si_pcie_gart_fini(struct radeon_device *rdev)
4361 {
4362 	si_pcie_gart_disable(rdev);
4363 	radeon_gart_table_vram_free(rdev);
4364 	radeon_gart_fini(rdev);
4365 }
4366 
4367 /* vm parser */
4368 static bool si_vm_reg_valid(u32 reg)
4369 {
4370 	/* context regs are fine */
4371 	if (reg >= 0x28000)
4372 		return true;
4373 
4374 	/* check config regs */
4375 	switch (reg) {
4376 	case GRBM_GFX_INDEX:
4377 	case CP_STRMOUT_CNTL:
4378 	case VGT_VTX_VECT_EJECT_REG:
4379 	case VGT_CACHE_INVALIDATION:
4380 	case VGT_ESGS_RING_SIZE:
4381 	case VGT_GSVS_RING_SIZE:
4382 	case VGT_GS_VERTEX_REUSE:
4383 	case VGT_PRIMITIVE_TYPE:
4384 	case VGT_INDEX_TYPE:
4385 	case VGT_NUM_INDICES:
4386 	case VGT_NUM_INSTANCES:
4387 	case VGT_TF_RING_SIZE:
4388 	case VGT_HS_OFFCHIP_PARAM:
4389 	case VGT_TF_MEMORY_BASE:
4390 	case PA_CL_ENHANCE:
4391 	case PA_SU_LINE_STIPPLE_VALUE:
4392 	case PA_SC_LINE_STIPPLE_STATE:
4393 	case PA_SC_ENHANCE:
4394 	case SQC_CACHES:
4395 	case SPI_STATIC_THREAD_MGMT_1:
4396 	case SPI_STATIC_THREAD_MGMT_2:
4397 	case SPI_STATIC_THREAD_MGMT_3:
4398 	case SPI_PS_MAX_WAVE_ID:
4399 	case SPI_CONFIG_CNTL:
4400 	case SPI_CONFIG_CNTL_1:
4401 	case TA_CNTL_AUX:
4402 		return true;
4403 	default:
4404 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4405 		return false;
4406 	}
4407 }
4408 
4409 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4410 				  u32 *ib, struct radeon_cs_packet *pkt)
4411 {
4412 	switch (pkt->opcode) {
4413 	case PACKET3_NOP:
4414 	case PACKET3_SET_BASE:
4415 	case PACKET3_SET_CE_DE_COUNTERS:
4416 	case PACKET3_LOAD_CONST_RAM:
4417 	case PACKET3_WRITE_CONST_RAM:
4418 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4419 	case PACKET3_DUMP_CONST_RAM:
4420 	case PACKET3_INCREMENT_CE_COUNTER:
4421 	case PACKET3_WAIT_ON_DE_COUNTER:
4422 	case PACKET3_CE_WRITE:
4423 		break;
4424 	default:
4425 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4426 		return -EINVAL;
4427 	}
4428 	return 0;
4429 }
4430 
4431 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4432 {
4433 	u32 start_reg, reg, i;
4434 	u32 command = ib[idx + 4];
4435 	u32 info = ib[idx + 1];
4436 	u32 idx_value = ib[idx];
4437 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4438 		/* src address space is register */
4439 		if (((info & 0x60000000) >> 29) == 0) {
4440 			start_reg = idx_value << 2;
4441 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4442 				reg = start_reg;
4443 				if (!si_vm_reg_valid(reg)) {
4444 					DRM_ERROR("CP DMA Bad SRC register\n");
4445 					return -EINVAL;
4446 				}
4447 			} else {
4448 				for (i = 0; i < (command & 0x1fffff); i++) {
4449 					reg = start_reg + (4 * i);
4450 					if (!si_vm_reg_valid(reg)) {
4451 						DRM_ERROR("CP DMA Bad SRC register\n");
4452 						return -EINVAL;
4453 					}
4454 				}
4455 			}
4456 		}
4457 	}
4458 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4459 		/* dst address space is register */
4460 		if (((info & 0x00300000) >> 20) == 0) {
4461 			start_reg = ib[idx + 2];
4462 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4463 				reg = start_reg;
4464 				if (!si_vm_reg_valid(reg)) {
4465 					DRM_ERROR("CP DMA Bad DST register\n");
4466 					return -EINVAL;
4467 				}
4468 			} else {
4469 				for (i = 0; i < (command & 0x1fffff); i++) {
4470 					reg = start_reg + (4 * i);
4471 				if (!si_vm_reg_valid(reg)) {
4472 						DRM_ERROR("CP DMA Bad DST register\n");
4473 						return -EINVAL;
4474 					}
4475 				}
4476 			}
4477 		}
4478 	}
4479 	return 0;
4480 }
4481 
4482 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4483 				   u32 *ib, struct radeon_cs_packet *pkt)
4484 {
4485 	int r;
4486 	u32 idx = pkt->idx + 1;
4487 	u32 idx_value = ib[idx];
4488 	u32 start_reg, end_reg, reg, i;
4489 
4490 	switch (pkt->opcode) {
4491 	case PACKET3_NOP:
4492 	case PACKET3_SET_BASE:
4493 	case PACKET3_CLEAR_STATE:
4494 	case PACKET3_INDEX_BUFFER_SIZE:
4495 	case PACKET3_DISPATCH_DIRECT:
4496 	case PACKET3_DISPATCH_INDIRECT:
4497 	case PACKET3_ALLOC_GDS:
4498 	case PACKET3_WRITE_GDS_RAM:
4499 	case PACKET3_ATOMIC_GDS:
4500 	case PACKET3_ATOMIC:
4501 	case PACKET3_OCCLUSION_QUERY:
4502 	case PACKET3_SET_PREDICATION:
4503 	case PACKET3_COND_EXEC:
4504 	case PACKET3_PRED_EXEC:
4505 	case PACKET3_DRAW_INDIRECT:
4506 	case PACKET3_DRAW_INDEX_INDIRECT:
4507 	case PACKET3_INDEX_BASE:
4508 	case PACKET3_DRAW_INDEX_2:
4509 	case PACKET3_CONTEXT_CONTROL:
4510 	case PACKET3_INDEX_TYPE:
4511 	case PACKET3_DRAW_INDIRECT_MULTI:
4512 	case PACKET3_DRAW_INDEX_AUTO:
4513 	case PACKET3_DRAW_INDEX_IMMD:
4514 	case PACKET3_NUM_INSTANCES:
4515 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4516 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4517 	case PACKET3_DRAW_INDEX_OFFSET_2:
4518 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4519 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4520 	case PACKET3_MPEG_INDEX:
4521 	case PACKET3_WAIT_REG_MEM:
4522 	case PACKET3_MEM_WRITE:
4523 	case PACKET3_PFP_SYNC_ME:
4524 	case PACKET3_SURFACE_SYNC:
4525 	case PACKET3_EVENT_WRITE:
4526 	case PACKET3_EVENT_WRITE_EOP:
4527 	case PACKET3_EVENT_WRITE_EOS:
4528 	case PACKET3_SET_CONTEXT_REG:
4529 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4530 	case PACKET3_SET_SH_REG:
4531 	case PACKET3_SET_SH_REG_OFFSET:
4532 	case PACKET3_INCREMENT_DE_COUNTER:
4533 	case PACKET3_WAIT_ON_CE_COUNTER:
4534 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4535 	case PACKET3_ME_WRITE:
4536 		break;
4537 	case PACKET3_COPY_DATA:
4538 		if ((idx_value & 0xf00) == 0) {
4539 			reg = ib[idx + 3] * 4;
4540 			if (!si_vm_reg_valid(reg))
4541 				return -EINVAL;
4542 		}
4543 		break;
4544 	case PACKET3_WRITE_DATA:
4545 		if ((idx_value & 0xf00) == 0) {
4546 			start_reg = ib[idx + 1] * 4;
4547 			if (idx_value & 0x10000) {
4548 				if (!si_vm_reg_valid(start_reg))
4549 					return -EINVAL;
4550 			} else {
4551 				for (i = 0; i < (pkt->count - 2); i++) {
4552 					reg = start_reg + (4 * i);
4553 					if (!si_vm_reg_valid(reg))
4554 						return -EINVAL;
4555 				}
4556 			}
4557 		}
4558 		break;
4559 	case PACKET3_COND_WRITE:
4560 		if (idx_value & 0x100) {
4561 			reg = ib[idx + 5] * 4;
4562 			if (!si_vm_reg_valid(reg))
4563 				return -EINVAL;
4564 		}
4565 		break;
4566 	case PACKET3_COPY_DW:
4567 		if (idx_value & 0x2) {
4568 			reg = ib[idx + 3] * 4;
4569 			if (!si_vm_reg_valid(reg))
4570 				return -EINVAL;
4571 		}
4572 		break;
4573 	case PACKET3_SET_CONFIG_REG:
4574 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4575 		end_reg = 4 * pkt->count + start_reg - 4;
4576 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4577 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4578 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4579 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4580 			return -EINVAL;
4581 		}
4582 		for (i = 0; i < pkt->count; i++) {
4583 			reg = start_reg + (4 * i);
4584 			if (!si_vm_reg_valid(reg))
4585 				return -EINVAL;
4586 		}
4587 		break;
4588 	case PACKET3_CP_DMA:
4589 		r = si_vm_packet3_cp_dma_check(ib, idx);
4590 		if (r)
4591 			return r;
4592 		break;
4593 	default:
4594 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4595 		return -EINVAL;
4596 	}
4597 	return 0;
4598 }
4599 
4600 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4601 				       u32 *ib, struct radeon_cs_packet *pkt)
4602 {
4603 	int r;
4604 	u32 idx = pkt->idx + 1;
4605 	u32 idx_value = ib[idx];
4606 	u32 start_reg, reg, i;
4607 
4608 	switch (pkt->opcode) {
4609 	case PACKET3_NOP:
4610 	case PACKET3_SET_BASE:
4611 	case PACKET3_CLEAR_STATE:
4612 	case PACKET3_DISPATCH_DIRECT:
4613 	case PACKET3_DISPATCH_INDIRECT:
4614 	case PACKET3_ALLOC_GDS:
4615 	case PACKET3_WRITE_GDS_RAM:
4616 	case PACKET3_ATOMIC_GDS:
4617 	case PACKET3_ATOMIC:
4618 	case PACKET3_OCCLUSION_QUERY:
4619 	case PACKET3_SET_PREDICATION:
4620 	case PACKET3_COND_EXEC:
4621 	case PACKET3_PRED_EXEC:
4622 	case PACKET3_CONTEXT_CONTROL:
4623 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4624 	case PACKET3_WAIT_REG_MEM:
4625 	case PACKET3_MEM_WRITE:
4626 	case PACKET3_PFP_SYNC_ME:
4627 	case PACKET3_SURFACE_SYNC:
4628 	case PACKET3_EVENT_WRITE:
4629 	case PACKET3_EVENT_WRITE_EOP:
4630 	case PACKET3_EVENT_WRITE_EOS:
4631 	case PACKET3_SET_CONTEXT_REG:
4632 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4633 	case PACKET3_SET_SH_REG:
4634 	case PACKET3_SET_SH_REG_OFFSET:
4635 	case PACKET3_INCREMENT_DE_COUNTER:
4636 	case PACKET3_WAIT_ON_CE_COUNTER:
4637 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4638 	case PACKET3_ME_WRITE:
4639 		break;
4640 	case PACKET3_COPY_DATA:
4641 		if ((idx_value & 0xf00) == 0) {
4642 			reg = ib[idx + 3] * 4;
4643 			if (!si_vm_reg_valid(reg))
4644 				return -EINVAL;
4645 		}
4646 		break;
4647 	case PACKET3_WRITE_DATA:
4648 		if ((idx_value & 0xf00) == 0) {
4649 			start_reg = ib[idx + 1] * 4;
4650 			if (idx_value & 0x10000) {
4651 				if (!si_vm_reg_valid(start_reg))
4652 					return -EINVAL;
4653 			} else {
4654 				for (i = 0; i < (pkt->count - 2); i++) {
4655 					reg = start_reg + (4 * i);
4656 					if (!si_vm_reg_valid(reg))
4657 						return -EINVAL;
4658 				}
4659 			}
4660 		}
4661 		break;
4662 	case PACKET3_COND_WRITE:
4663 		if (idx_value & 0x100) {
4664 			reg = ib[idx + 5] * 4;
4665 			if (!si_vm_reg_valid(reg))
4666 				return -EINVAL;
4667 		}
4668 		break;
4669 	case PACKET3_COPY_DW:
4670 		if (idx_value & 0x2) {
4671 			reg = ib[idx + 3] * 4;
4672 			if (!si_vm_reg_valid(reg))
4673 				return -EINVAL;
4674 		}
4675 		break;
4676 	case PACKET3_CP_DMA:
4677 		r = si_vm_packet3_cp_dma_check(ib, idx);
4678 		if (r)
4679 			return r;
4680 		break;
4681 	default:
4682 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4683 		return -EINVAL;
4684 	}
4685 	return 0;
4686 }
4687 
4688 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4689 {
4690 	int ret = 0;
4691 	u32 idx = 0, i;
4692 	struct radeon_cs_packet pkt;
4693 
4694 	do {
4695 		pkt.idx = idx;
4696 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4697 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4698 		pkt.one_reg_wr = 0;
4699 		switch (pkt.type) {
4700 		case RADEON_PACKET_TYPE0:
4701 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4702 			for (i = 0; i < ib->length_dw; i++) {
4703 				if (i == idx)
4704 					printk("\t0x%08x <---\n", ib->ptr[i]);
4705 				else
4706 					printk("\t0x%08x\n", ib->ptr[i]);
4707 			}
4708 			ret = -EINVAL;
4709 			break;
4710 		case RADEON_PACKET_TYPE2:
4711 			idx += 1;
4712 			break;
4713 		case RADEON_PACKET_TYPE3:
4714 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4715 			if (ib->is_const_ib)
4716 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4717 			else {
4718 				switch (ib->ring) {
4719 				case RADEON_RING_TYPE_GFX_INDEX:
4720 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4721 					break;
4722 				case CAYMAN_RING_TYPE_CP1_INDEX:
4723 				case CAYMAN_RING_TYPE_CP2_INDEX:
4724 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4725 					break;
4726 				default:
4727 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4728 					ret = -EINVAL;
4729 					break;
4730 				}
4731 			}
4732 			idx += pkt.count + 2;
4733 			break;
4734 		default:
4735 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4736 			ret = -EINVAL;
4737 			break;
4738 		}
4739 		if (ret)
4740 			break;
4741 	} while (idx < ib->length_dw);
4742 
4743 	return ret;
4744 }
4745 
4746 /*
4747  * vm
4748  */
4749 int si_vm_init(struct radeon_device *rdev)
4750 {
4751 	/* number of VMs */
4752 	rdev->vm_manager.nvm = 16;
4753 	/* base offset of vram pages */
4754 	rdev->vm_manager.vram_base_offset = 0;
4755 
4756 	return 0;
4757 }
4758 
4759 void si_vm_fini(struct radeon_device *rdev)
4760 {
4761 }
4762 
4763 /**
4764  * si_vm_decode_fault - print human readable fault info
4765  *
4766  * @rdev: radeon_device pointer
4767  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4768  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4769  *
4770  * Print human readable fault information (SI).
4771  */
4772 static void si_vm_decode_fault(struct radeon_device *rdev,
4773 			       u32 status, u32 addr)
4774 {
4775 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4776 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4777 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4778 	char *block;
4779 
4780 	if (rdev->family == CHIP_TAHITI) {
4781 		switch (mc_id) {
4782 		case 160:
4783 		case 144:
4784 		case 96:
4785 		case 80:
4786 		case 224:
4787 		case 208:
4788 		case 32:
4789 		case 16:
4790 			block = "CB";
4791 			break;
4792 		case 161:
4793 		case 145:
4794 		case 97:
4795 		case 81:
4796 		case 225:
4797 		case 209:
4798 		case 33:
4799 		case 17:
4800 			block = "CB_FMASK";
4801 			break;
4802 		case 162:
4803 		case 146:
4804 		case 98:
4805 		case 82:
4806 		case 226:
4807 		case 210:
4808 		case 34:
4809 		case 18:
4810 			block = "CB_CMASK";
4811 			break;
4812 		case 163:
4813 		case 147:
4814 		case 99:
4815 		case 83:
4816 		case 227:
4817 		case 211:
4818 		case 35:
4819 		case 19:
4820 			block = "CB_IMMED";
4821 			break;
4822 		case 164:
4823 		case 148:
4824 		case 100:
4825 		case 84:
4826 		case 228:
4827 		case 212:
4828 		case 36:
4829 		case 20:
4830 			block = "DB";
4831 			break;
4832 		case 165:
4833 		case 149:
4834 		case 101:
4835 		case 85:
4836 		case 229:
4837 		case 213:
4838 		case 37:
4839 		case 21:
4840 			block = "DB_HTILE";
4841 			break;
4842 		case 167:
4843 		case 151:
4844 		case 103:
4845 		case 87:
4846 		case 231:
4847 		case 215:
4848 		case 39:
4849 		case 23:
4850 			block = "DB_STEN";
4851 			break;
4852 		case 72:
4853 		case 68:
4854 		case 64:
4855 		case 8:
4856 		case 4:
4857 		case 0:
4858 		case 136:
4859 		case 132:
4860 		case 128:
4861 		case 200:
4862 		case 196:
4863 		case 192:
4864 			block = "TC";
4865 			break;
4866 		case 112:
4867 		case 48:
4868 			block = "CP";
4869 			break;
4870 		case 49:
4871 		case 177:
4872 		case 50:
4873 		case 178:
4874 			block = "SH";
4875 			break;
4876 		case 53:
4877 		case 190:
4878 			block = "VGT";
4879 			break;
4880 		case 117:
4881 			block = "IH";
4882 			break;
4883 		case 51:
4884 		case 115:
4885 			block = "RLC";
4886 			break;
4887 		case 119:
4888 		case 183:
4889 			block = "DMA0";
4890 			break;
4891 		case 61:
4892 			block = "DMA1";
4893 			break;
4894 		case 248:
4895 		case 120:
4896 			block = "HDP";
4897 			break;
4898 		default:
4899 			block = "unknown";
4900 			break;
4901 		}
4902 	} else {
4903 		switch (mc_id) {
4904 		case 32:
4905 		case 16:
4906 		case 96:
4907 		case 80:
4908 		case 160:
4909 		case 144:
4910 		case 224:
4911 		case 208:
4912 			block = "CB";
4913 			break;
4914 		case 33:
4915 		case 17:
4916 		case 97:
4917 		case 81:
4918 		case 161:
4919 		case 145:
4920 		case 225:
4921 		case 209:
4922 			block = "CB_FMASK";
4923 			break;
4924 		case 34:
4925 		case 18:
4926 		case 98:
4927 		case 82:
4928 		case 162:
4929 		case 146:
4930 		case 226:
4931 		case 210:
4932 			block = "CB_CMASK";
4933 			break;
4934 		case 35:
4935 		case 19:
4936 		case 99:
4937 		case 83:
4938 		case 163:
4939 		case 147:
4940 		case 227:
4941 		case 211:
4942 			block = "CB_IMMED";
4943 			break;
4944 		case 36:
4945 		case 20:
4946 		case 100:
4947 		case 84:
4948 		case 164:
4949 		case 148:
4950 		case 228:
4951 		case 212:
4952 			block = "DB";
4953 			break;
4954 		case 37:
4955 		case 21:
4956 		case 101:
4957 		case 85:
4958 		case 165:
4959 		case 149:
4960 		case 229:
4961 		case 213:
4962 			block = "DB_HTILE";
4963 			break;
4964 		case 39:
4965 		case 23:
4966 		case 103:
4967 		case 87:
4968 		case 167:
4969 		case 151:
4970 		case 231:
4971 		case 215:
4972 			block = "DB_STEN";
4973 			break;
4974 		case 72:
4975 		case 68:
4976 		case 8:
4977 		case 4:
4978 		case 136:
4979 		case 132:
4980 		case 200:
4981 		case 196:
4982 			block = "TC";
4983 			break;
4984 		case 112:
4985 		case 48:
4986 			block = "CP";
4987 			break;
4988 		case 49:
4989 		case 177:
4990 		case 50:
4991 		case 178:
4992 			block = "SH";
4993 			break;
4994 		case 53:
4995 			block = "VGT";
4996 			break;
4997 		case 117:
4998 			block = "IH";
4999 			break;
5000 		case 51:
5001 		case 115:
5002 			block = "RLC";
5003 			break;
5004 		case 119:
5005 		case 183:
5006 			block = "DMA0";
5007 			break;
5008 		case 61:
5009 			block = "DMA1";
5010 			break;
5011 		case 248:
5012 		case 120:
5013 			block = "HDP";
5014 			break;
5015 		default:
5016 			block = "unknown";
5017 			break;
5018 		}
5019 	}
5020 
5021 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5022 	       protections, vmid, addr,
5023 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5024 	       block, mc_id);
5025 }
5026 
5027 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5028 		 unsigned vm_id, uint64_t pd_addr)
5029 {
5030 	/* write new base address */
5031 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5033 				 WRITE_DATA_DST_SEL(0)));
5034 
5035 	if (vm_id < 8) {
5036 		radeon_ring_write(ring,
5037 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5038 	} else {
5039 		radeon_ring_write(ring,
5040 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5041 	}
5042 	radeon_ring_write(ring, 0);
5043 	radeon_ring_write(ring, pd_addr >> 12);
5044 
5045 	/* flush hdp cache */
5046 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5047 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5048 				 WRITE_DATA_DST_SEL(0)));
5049 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5050 	radeon_ring_write(ring, 0);
5051 	radeon_ring_write(ring, 0x1);
5052 
5053 	/* bits 0-15 are the VM contexts0-15 */
5054 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5055 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5056 				 WRITE_DATA_DST_SEL(0)));
5057 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5058 	radeon_ring_write(ring, 0);
5059 	radeon_ring_write(ring, 1 << vm_id);
5060 
5061 	/* wait for the invalidate to complete */
5062 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5063 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5064 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5065 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5066 	radeon_ring_write(ring, 0);
5067 	radeon_ring_write(ring, 0); /* ref */
5068 	radeon_ring_write(ring, 0); /* mask */
5069 	radeon_ring_write(ring, 0x20); /* poll interval */
5070 
5071 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5072 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5073 	radeon_ring_write(ring, 0x0);
5074 }
5075 
5076 /*
5077  *  Power and clock gating
5078  */
5079 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5080 {
5081 	int i;
5082 
5083 	for (i = 0; i < rdev->usec_timeout; i++) {
5084 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5085 			break;
5086 		udelay(1);
5087 	}
5088 
5089 	for (i = 0; i < rdev->usec_timeout; i++) {
5090 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5091 			break;
5092 		udelay(1);
5093 	}
5094 }
5095 
5096 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5097 					 bool enable)
5098 {
5099 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5100 	u32 mask;
5101 	int i;
5102 
5103 	if (enable)
5104 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5105 	else
5106 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5107 	WREG32(CP_INT_CNTL_RING0, tmp);
5108 
5109 	if (!enable) {
5110 		/* read a gfx register */
5111 		tmp = RREG32(DB_DEPTH_INFO);
5112 
5113 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5114 		for (i = 0; i < rdev->usec_timeout; i++) {
5115 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5116 				break;
5117 			udelay(1);
5118 		}
5119 	}
5120 }
5121 
5122 static void si_set_uvd_dcm(struct radeon_device *rdev,
5123 			   bool sw_mode)
5124 {
5125 	u32 tmp, tmp2;
5126 
5127 	tmp = RREG32(UVD_CGC_CTRL);
5128 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5129 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5130 
5131 	if (sw_mode) {
5132 		tmp &= ~0x7ffff800;
5133 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5134 	} else {
5135 		tmp |= 0x7ffff800;
5136 		tmp2 = 0;
5137 	}
5138 
5139 	WREG32(UVD_CGC_CTRL, tmp);
5140 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5141 }
5142 
5143 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5144 {
5145 	bool hw_mode = true;
5146 
5147 	if (hw_mode) {
5148 		si_set_uvd_dcm(rdev, false);
5149 	} else {
5150 		u32 tmp = RREG32(UVD_CGC_CTRL);
5151 		tmp &= ~DCM;
5152 		WREG32(UVD_CGC_CTRL, tmp);
5153 	}
5154 }
5155 
5156 static u32 si_halt_rlc(struct radeon_device *rdev)
5157 {
5158 	u32 data, orig;
5159 
5160 	orig = data = RREG32(RLC_CNTL);
5161 
5162 	if (data & RLC_ENABLE) {
5163 		data &= ~RLC_ENABLE;
5164 		WREG32(RLC_CNTL, data);
5165 
5166 		si_wait_for_rlc_serdes(rdev);
5167 	}
5168 
5169 	return orig;
5170 }
5171 
5172 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5173 {
5174 	u32 tmp;
5175 
5176 	tmp = RREG32(RLC_CNTL);
5177 	if (tmp != rlc)
5178 		WREG32(RLC_CNTL, rlc);
5179 }
5180 
5181 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5182 {
5183 	u32 data, orig;
5184 
5185 	orig = data = RREG32(DMA_PG);
5186 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5187 		data |= PG_CNTL_ENABLE;
5188 	else
5189 		data &= ~PG_CNTL_ENABLE;
5190 	if (orig != data)
5191 		WREG32(DMA_PG, data);
5192 }
5193 
5194 static void si_init_dma_pg(struct radeon_device *rdev)
5195 {
5196 	u32 tmp;
5197 
5198 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5199 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5200 
5201 	for (tmp = 0; tmp < 5; tmp++)
5202 		WREG32(DMA_PGFSM_WRITE, 0);
5203 }
5204 
5205 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5206 			       bool enable)
5207 {
5208 	u32 tmp;
5209 
5210 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5211 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5212 		WREG32(RLC_TTOP_D, tmp);
5213 
5214 		tmp = RREG32(RLC_PG_CNTL);
5215 		tmp |= GFX_PG_ENABLE;
5216 		WREG32(RLC_PG_CNTL, tmp);
5217 
5218 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5219 		tmp |= AUTO_PG_EN;
5220 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5221 	} else {
5222 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5223 		tmp &= ~AUTO_PG_EN;
5224 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5225 
5226 		tmp = RREG32(DB_RENDER_CONTROL);
5227 	}
5228 }
5229 
5230 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5231 {
5232 	u32 tmp;
5233 
5234 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5235 
5236 	tmp = RREG32(RLC_PG_CNTL);
5237 	tmp |= GFX_PG_SRC;
5238 	WREG32(RLC_PG_CNTL, tmp);
5239 
5240 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5241 
5242 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5243 
5244 	tmp &= ~GRBM_REG_SGIT_MASK;
5245 	tmp |= GRBM_REG_SGIT(0x700);
5246 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5247 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5248 }
5249 
5250 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5251 {
5252 	u32 mask = 0, tmp, tmp1;
5253 	int i;
5254 
5255 	si_select_se_sh(rdev, se, sh);
5256 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5257 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5258 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5259 
5260 	tmp &= 0xffff0000;
5261 
5262 	tmp |= tmp1;
5263 	tmp >>= 16;
5264 
5265 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5266 		mask <<= 1;
5267 		mask |= 1;
5268 	}
5269 
5270 	return (~tmp) & mask;
5271 }
5272 
5273 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5274 {
5275 	u32 i, j, k, active_cu_number = 0;
5276 	u32 mask, counter, cu_bitmap;
5277 	u32 tmp = 0;
5278 
5279 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5280 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5281 			mask = 1;
5282 			cu_bitmap = 0;
5283 			counter  = 0;
5284 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5285 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5286 					if (counter < 2)
5287 						cu_bitmap |= mask;
5288 					counter++;
5289 				}
5290 				mask <<= 1;
5291 			}
5292 
5293 			active_cu_number += counter;
5294 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5295 		}
5296 	}
5297 
5298 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5299 
5300 	tmp = RREG32(RLC_MAX_PG_CU);
5301 	tmp &= ~MAX_PU_CU_MASK;
5302 	tmp |= MAX_PU_CU(active_cu_number);
5303 	WREG32(RLC_MAX_PG_CU, tmp);
5304 }
5305 
5306 static void si_enable_cgcg(struct radeon_device *rdev,
5307 			   bool enable)
5308 {
5309 	u32 data, orig, tmp;
5310 
5311 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5312 
5313 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5314 		si_enable_gui_idle_interrupt(rdev, true);
5315 
5316 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5317 
5318 		tmp = si_halt_rlc(rdev);
5319 
5320 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5321 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5322 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5323 
5324 		si_wait_for_rlc_serdes(rdev);
5325 
5326 		si_update_rlc(rdev, tmp);
5327 
5328 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5329 
5330 		data |= CGCG_EN | CGLS_EN;
5331 	} else {
5332 		si_enable_gui_idle_interrupt(rdev, false);
5333 
5334 		RREG32(CB_CGTT_SCLK_CTRL);
5335 		RREG32(CB_CGTT_SCLK_CTRL);
5336 		RREG32(CB_CGTT_SCLK_CTRL);
5337 		RREG32(CB_CGTT_SCLK_CTRL);
5338 
5339 		data &= ~(CGCG_EN | CGLS_EN);
5340 	}
5341 
5342 	if (orig != data)
5343 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5344 }
5345 
5346 static void si_enable_mgcg(struct radeon_device *rdev,
5347 			   bool enable)
5348 {
5349 	u32 data, orig, tmp = 0;
5350 
5351 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5352 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5353 		data = 0x96940200;
5354 		if (orig != data)
5355 			WREG32(CGTS_SM_CTRL_REG, data);
5356 
5357 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5358 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5359 			data |= CP_MEM_LS_EN;
5360 			if (orig != data)
5361 				WREG32(CP_MEM_SLP_CNTL, data);
5362 		}
5363 
5364 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5365 		data &= 0xffffffc0;
5366 		if (orig != data)
5367 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5368 
5369 		tmp = si_halt_rlc(rdev);
5370 
5371 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5372 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5373 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5374 
5375 		si_update_rlc(rdev, tmp);
5376 	} else {
5377 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5378 		data |= 0x00000003;
5379 		if (orig != data)
5380 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5381 
5382 		data = RREG32(CP_MEM_SLP_CNTL);
5383 		if (data & CP_MEM_LS_EN) {
5384 			data &= ~CP_MEM_LS_EN;
5385 			WREG32(CP_MEM_SLP_CNTL, data);
5386 		}
5387 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5388 		data |= LS_OVERRIDE | OVERRIDE;
5389 		if (orig != data)
5390 			WREG32(CGTS_SM_CTRL_REG, data);
5391 
5392 		tmp = si_halt_rlc(rdev);
5393 
5394 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5395 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5396 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5397 
5398 		si_update_rlc(rdev, tmp);
5399 	}
5400 }
5401 
5402 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5403 			       bool enable)
5404 {
5405 	u32 orig, data, tmp;
5406 
5407 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5408 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5409 		tmp |= 0x3fff;
5410 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5411 
5412 		orig = data = RREG32(UVD_CGC_CTRL);
5413 		data |= DCM;
5414 		if (orig != data)
5415 			WREG32(UVD_CGC_CTRL, data);
5416 
5417 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5418 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5419 	} else {
5420 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5421 		tmp &= ~0x3fff;
5422 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5423 
5424 		orig = data = RREG32(UVD_CGC_CTRL);
5425 		data &= ~DCM;
5426 		if (orig != data)
5427 			WREG32(UVD_CGC_CTRL, data);
5428 
5429 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5430 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5431 	}
5432 }
5433 
5434 static const u32 mc_cg_registers[] =
5435 {
5436 	MC_HUB_MISC_HUB_CG,
5437 	MC_HUB_MISC_SIP_CG,
5438 	MC_HUB_MISC_VM_CG,
5439 	MC_XPB_CLK_GAT,
5440 	ATC_MISC_CG,
5441 	MC_CITF_MISC_WR_CG,
5442 	MC_CITF_MISC_RD_CG,
5443 	MC_CITF_MISC_VM_CG,
5444 	VM_L2_CG,
5445 };
5446 
5447 static void si_enable_mc_ls(struct radeon_device *rdev,
5448 			    bool enable)
5449 {
5450 	int i;
5451 	u32 orig, data;
5452 
5453 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5454 		orig = data = RREG32(mc_cg_registers[i]);
5455 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5456 			data |= MC_LS_ENABLE;
5457 		else
5458 			data &= ~MC_LS_ENABLE;
5459 		if (data != orig)
5460 			WREG32(mc_cg_registers[i], data);
5461 	}
5462 }
5463 
5464 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5465 			       bool enable)
5466 {
5467 	int i;
5468 	u32 orig, data;
5469 
5470 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5471 		orig = data = RREG32(mc_cg_registers[i]);
5472 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5473 			data |= MC_CG_ENABLE;
5474 		else
5475 			data &= ~MC_CG_ENABLE;
5476 		if (data != orig)
5477 			WREG32(mc_cg_registers[i], data);
5478 	}
5479 }
5480 
5481 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5482 			       bool enable)
5483 {
5484 	u32 orig, data, offset;
5485 	int i;
5486 
5487 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5488 		for (i = 0; i < 2; i++) {
5489 			if (i == 0)
5490 				offset = DMA0_REGISTER_OFFSET;
5491 			else
5492 				offset = DMA1_REGISTER_OFFSET;
5493 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5494 			data &= ~MEM_POWER_OVERRIDE;
5495 			if (data != orig)
5496 				WREG32(DMA_POWER_CNTL + offset, data);
5497 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5498 		}
5499 	} else {
5500 		for (i = 0; i < 2; i++) {
5501 			if (i == 0)
5502 				offset = DMA0_REGISTER_OFFSET;
5503 			else
5504 				offset = DMA1_REGISTER_OFFSET;
5505 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5506 			data |= MEM_POWER_OVERRIDE;
5507 			if (data != orig)
5508 				WREG32(DMA_POWER_CNTL + offset, data);
5509 
5510 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5511 			data = 0xff000000;
5512 			if (data != orig)
5513 				WREG32(DMA_CLK_CTRL + offset, data);
5514 		}
5515 	}
5516 }
5517 
5518 static void si_enable_bif_mgls(struct radeon_device *rdev,
5519 			       bool enable)
5520 {
5521 	u32 orig, data;
5522 
5523 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5524 
5525 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5526 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5527 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5528 	else
5529 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5530 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5531 
5532 	if (orig != data)
5533 		WREG32_PCIE(PCIE_CNTL2, data);
5534 }
5535 
5536 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5537 			       bool enable)
5538 {
5539 	u32 orig, data;
5540 
5541 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5542 
5543 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5544 		data &= ~CLOCK_GATING_DIS;
5545 	else
5546 		data |= CLOCK_GATING_DIS;
5547 
5548 	if (orig != data)
5549 		WREG32(HDP_HOST_PATH_CNTL, data);
5550 }
5551 
5552 static void si_enable_hdp_ls(struct radeon_device *rdev,
5553 			     bool enable)
5554 {
5555 	u32 orig, data;
5556 
5557 	orig = data = RREG32(HDP_MEM_POWER_LS);
5558 
5559 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5560 		data |= HDP_LS_ENABLE;
5561 	else
5562 		data &= ~HDP_LS_ENABLE;
5563 
5564 	if (orig != data)
5565 		WREG32(HDP_MEM_POWER_LS, data);
5566 }
5567 
5568 static void si_update_cg(struct radeon_device *rdev,
5569 			 u32 block, bool enable)
5570 {
5571 	if (block & RADEON_CG_BLOCK_GFX) {
5572 		si_enable_gui_idle_interrupt(rdev, false);
5573 		/* order matters! */
5574 		if (enable) {
5575 			si_enable_mgcg(rdev, true);
5576 			si_enable_cgcg(rdev, true);
5577 		} else {
5578 			si_enable_cgcg(rdev, false);
5579 			si_enable_mgcg(rdev, false);
5580 		}
5581 		si_enable_gui_idle_interrupt(rdev, true);
5582 	}
5583 
5584 	if (block & RADEON_CG_BLOCK_MC) {
5585 		si_enable_mc_mgcg(rdev, enable);
5586 		si_enable_mc_ls(rdev, enable);
5587 	}
5588 
5589 	if (block & RADEON_CG_BLOCK_SDMA) {
5590 		si_enable_dma_mgcg(rdev, enable);
5591 	}
5592 
5593 	if (block & RADEON_CG_BLOCK_BIF) {
5594 		si_enable_bif_mgls(rdev, enable);
5595 	}
5596 
5597 	if (block & RADEON_CG_BLOCK_UVD) {
5598 		if (rdev->has_uvd) {
5599 			si_enable_uvd_mgcg(rdev, enable);
5600 		}
5601 	}
5602 
5603 	if (block & RADEON_CG_BLOCK_HDP) {
5604 		si_enable_hdp_mgcg(rdev, enable);
5605 		si_enable_hdp_ls(rdev, enable);
5606 	}
5607 }
5608 
5609 static void si_init_cg(struct radeon_device *rdev)
5610 {
5611 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5612 			    RADEON_CG_BLOCK_MC |
5613 			    RADEON_CG_BLOCK_SDMA |
5614 			    RADEON_CG_BLOCK_BIF |
5615 			    RADEON_CG_BLOCK_HDP), true);
5616 	if (rdev->has_uvd) {
5617 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5618 		si_init_uvd_internal_cg(rdev);
5619 	}
5620 }
5621 
5622 static void si_fini_cg(struct radeon_device *rdev)
5623 {
5624 	if (rdev->has_uvd) {
5625 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5626 	}
5627 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5628 			    RADEON_CG_BLOCK_MC |
5629 			    RADEON_CG_BLOCK_SDMA |
5630 			    RADEON_CG_BLOCK_BIF |
5631 			    RADEON_CG_BLOCK_HDP), false);
5632 }
5633 
5634 u32 si_get_csb_size(struct radeon_device *rdev)
5635 {
5636 	u32 count = 0;
5637 	const struct cs_section_def *sect = NULL;
5638 	const struct cs_extent_def *ext = NULL;
5639 
5640 	if (rdev->rlc.cs_data == NULL)
5641 		return 0;
5642 
5643 	/* begin clear state */
5644 	count += 2;
5645 	/* context control state */
5646 	count += 3;
5647 
5648 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5649 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5650 			if (sect->id == SECT_CONTEXT)
5651 				count += 2 + ext->reg_count;
5652 			else
5653 				return 0;
5654 		}
5655 	}
5656 	/* pa_sc_raster_config */
5657 	count += 3;
5658 	/* end clear state */
5659 	count += 2;
5660 	/* clear state */
5661 	count += 2;
5662 
5663 	return count;
5664 }
5665 
5666 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5667 {
5668 	u32 count = 0, i;
5669 	const struct cs_section_def *sect = NULL;
5670 	const struct cs_extent_def *ext = NULL;
5671 
5672 	if (rdev->rlc.cs_data == NULL)
5673 		return;
5674 	if (buffer == NULL)
5675 		return;
5676 
5677 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5678 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5679 
5680 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5681 	buffer[count++] = cpu_to_le32(0x80000000);
5682 	buffer[count++] = cpu_to_le32(0x80000000);
5683 
5684 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5685 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5686 			if (sect->id == SECT_CONTEXT) {
5687 				buffer[count++] =
5688 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5689 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5690 				for (i = 0; i < ext->reg_count; i++)
5691 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5692 			} else {
5693 				return;
5694 			}
5695 		}
5696 	}
5697 
5698 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5699 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5700 	switch (rdev->family) {
5701 	case CHIP_TAHITI:
5702 	case CHIP_PITCAIRN:
5703 		buffer[count++] = cpu_to_le32(0x2a00126a);
5704 		break;
5705 	case CHIP_VERDE:
5706 		buffer[count++] = cpu_to_le32(0x0000124a);
5707 		break;
5708 	case CHIP_OLAND:
5709 		buffer[count++] = cpu_to_le32(0x00000082);
5710 		break;
5711 	case CHIP_HAINAN:
5712 		buffer[count++] = cpu_to_le32(0x00000000);
5713 		break;
5714 	default:
5715 		buffer[count++] = cpu_to_le32(0x00000000);
5716 		break;
5717 	}
5718 
5719 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5720 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5721 
5722 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5723 	buffer[count++] = cpu_to_le32(0);
5724 }
5725 
5726 static void si_init_pg(struct radeon_device *rdev)
5727 {
5728 	if (rdev->pg_flags) {
5729 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5730 			si_init_dma_pg(rdev);
5731 		}
5732 		si_init_ao_cu_mask(rdev);
5733 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5734 			si_init_gfx_cgpg(rdev);
5735 		} else {
5736 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5737 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5738 		}
5739 		si_enable_dma_pg(rdev, true);
5740 		si_enable_gfx_cgpg(rdev, true);
5741 	} else {
5742 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5743 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5744 	}
5745 }
5746 
5747 static void si_fini_pg(struct radeon_device *rdev)
5748 {
5749 	if (rdev->pg_flags) {
5750 		si_enable_dma_pg(rdev, false);
5751 		si_enable_gfx_cgpg(rdev, false);
5752 	}
5753 }
5754 
5755 /*
5756  * RLC
5757  */
5758 void si_rlc_reset(struct radeon_device *rdev)
5759 {
5760 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5761 
5762 	tmp |= SOFT_RESET_RLC;
5763 	WREG32(GRBM_SOFT_RESET, tmp);
5764 	udelay(50);
5765 	tmp &= ~SOFT_RESET_RLC;
5766 	WREG32(GRBM_SOFT_RESET, tmp);
5767 	udelay(50);
5768 }
5769 
5770 static void si_rlc_stop(struct radeon_device *rdev)
5771 {
5772 	WREG32(RLC_CNTL, 0);
5773 
5774 	si_enable_gui_idle_interrupt(rdev, false);
5775 
5776 	si_wait_for_rlc_serdes(rdev);
5777 }
5778 
5779 static void si_rlc_start(struct radeon_device *rdev)
5780 {
5781 	WREG32(RLC_CNTL, RLC_ENABLE);
5782 
5783 	si_enable_gui_idle_interrupt(rdev, true);
5784 
5785 	udelay(50);
5786 }
5787 
5788 static bool si_lbpw_supported(struct radeon_device *rdev)
5789 {
5790 	u32 tmp;
5791 
5792 	/* Enable LBPW only for DDR3 */
5793 	tmp = RREG32(MC_SEQ_MISC0);
5794 	if ((tmp & 0xF0000000) == 0xB0000000)
5795 		return true;
5796 	return false;
5797 }
5798 
5799 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5800 {
5801 	u32 tmp;
5802 
5803 	tmp = RREG32(RLC_LB_CNTL);
5804 	if (enable)
5805 		tmp |= LOAD_BALANCE_ENABLE;
5806 	else
5807 		tmp &= ~LOAD_BALANCE_ENABLE;
5808 	WREG32(RLC_LB_CNTL, tmp);
5809 
5810 	if (!enable) {
5811 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5813 	}
5814 }
5815 
5816 static int si_rlc_resume(struct radeon_device *rdev)
5817 {
5818 	u32 i;
5819 
5820 	if (!rdev->rlc_fw)
5821 		return -EINVAL;
5822 
5823 	si_rlc_stop(rdev);
5824 
5825 	si_rlc_reset(rdev);
5826 
5827 	si_init_pg(rdev);
5828 
5829 	si_init_cg(rdev);
5830 
5831 	WREG32(RLC_RL_BASE, 0);
5832 	WREG32(RLC_RL_SIZE, 0);
5833 	WREG32(RLC_LB_CNTL, 0);
5834 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5835 	WREG32(RLC_LB_CNTR_INIT, 0);
5836 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5837 
5838 	WREG32(RLC_MC_CNTL, 0);
5839 	WREG32(RLC_UCODE_CNTL, 0);
5840 
5841 	if (rdev->new_fw) {
5842 		const struct rlc_firmware_header_v1_0 *hdr =
5843 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5844 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5845 		const __le32 *fw_data = (const __le32 *)
5846 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5847 
5848 		radeon_ucode_print_rlc_hdr(&hdr->header);
5849 
5850 		for (i = 0; i < fw_size; i++) {
5851 			WREG32(RLC_UCODE_ADDR, i);
5852 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5853 		}
5854 	} else {
5855 		const __be32 *fw_data =
5856 			(const __be32 *)rdev->rlc_fw->data;
5857 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5858 			WREG32(RLC_UCODE_ADDR, i);
5859 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5860 		}
5861 	}
5862 	WREG32(RLC_UCODE_ADDR, 0);
5863 
5864 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5865 
5866 	si_rlc_start(rdev);
5867 
5868 	return 0;
5869 }
5870 
5871 static void si_enable_interrupts(struct radeon_device *rdev)
5872 {
5873 	u32 ih_cntl = RREG32(IH_CNTL);
5874 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5875 
5876 	ih_cntl |= ENABLE_INTR;
5877 	ih_rb_cntl |= IH_RB_ENABLE;
5878 	WREG32(IH_CNTL, ih_cntl);
5879 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5880 	rdev->ih.enabled = true;
5881 }
5882 
5883 static void si_disable_interrupts(struct radeon_device *rdev)
5884 {
5885 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5886 	u32 ih_cntl = RREG32(IH_CNTL);
5887 
5888 	ih_rb_cntl &= ~IH_RB_ENABLE;
5889 	ih_cntl &= ~ENABLE_INTR;
5890 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5891 	WREG32(IH_CNTL, ih_cntl);
5892 	/* set rptr, wptr to 0 */
5893 	WREG32(IH_RB_RPTR, 0);
5894 	WREG32(IH_RB_WPTR, 0);
5895 	rdev->ih.enabled = false;
5896 	rdev->ih.rptr = 0;
5897 }
5898 
5899 static void si_disable_interrupt_state(struct radeon_device *rdev)
5900 {
5901 	u32 tmp;
5902 
5903 	tmp = RREG32(CP_INT_CNTL_RING0) &
5904 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5905 	WREG32(CP_INT_CNTL_RING0, tmp);
5906 	WREG32(CP_INT_CNTL_RING1, 0);
5907 	WREG32(CP_INT_CNTL_RING2, 0);
5908 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5909 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5910 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5911 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5912 	WREG32(GRBM_INT_CNTL, 0);
5913 	if (rdev->num_crtc >= 2) {
5914 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5915 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5916 	}
5917 	if (rdev->num_crtc >= 4) {
5918 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5919 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5920 	}
5921 	if (rdev->num_crtc >= 6) {
5922 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5923 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5924 	}
5925 
5926 	if (rdev->num_crtc >= 2) {
5927 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5928 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5929 	}
5930 	if (rdev->num_crtc >= 4) {
5931 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5932 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5933 	}
5934 	if (rdev->num_crtc >= 6) {
5935 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5936 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5937 	}
5938 
5939 	if (!ASIC_IS_NODCE(rdev)) {
5940 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5941 
5942 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5943 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5944 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5945 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5946 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5947 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5948 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5950 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5952 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5954 	}
5955 }
5956 
5957 static int si_irq_init(struct radeon_device *rdev)
5958 {
5959 	int ret = 0;
5960 	int rb_bufsz;
5961 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5962 
5963 	/* allocate ring */
5964 	ret = r600_ih_ring_alloc(rdev);
5965 	if (ret)
5966 		return ret;
5967 
5968 	/* disable irqs */
5969 	si_disable_interrupts(rdev);
5970 
5971 	/* init rlc */
5972 	ret = si_rlc_resume(rdev);
5973 	if (ret) {
5974 		r600_ih_ring_fini(rdev);
5975 		return ret;
5976 	}
5977 
5978 	/* setup interrupt control */
5979 	/* set dummy read address to ring address */
5980 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5981 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5982 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5983 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5984 	 */
5985 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5986 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5987 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5988 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5989 
5990 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5991 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5992 
5993 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5994 		      IH_WPTR_OVERFLOW_CLEAR |
5995 		      (rb_bufsz << 1));
5996 
5997 	if (rdev->wb.enabled)
5998 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5999 
6000 	/* set the writeback address whether it's enabled or not */
6001 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6002 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6003 
6004 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6005 
6006 	/* set rptr, wptr to 0 */
6007 	WREG32(IH_RB_RPTR, 0);
6008 	WREG32(IH_RB_WPTR, 0);
6009 
6010 	/* Default settings for IH_CNTL (disabled at first) */
6011 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6012 	/* RPTR_REARM only works if msi's are enabled */
6013 	if (rdev->msi_enabled)
6014 		ih_cntl |= RPTR_REARM;
6015 	WREG32(IH_CNTL, ih_cntl);
6016 
6017 	/* force the active interrupt state to all disabled */
6018 	si_disable_interrupt_state(rdev);
6019 
6020 	pci_set_master(rdev->pdev);
6021 
6022 	/* enable irqs */
6023 	si_enable_interrupts(rdev);
6024 
6025 	return ret;
6026 }
6027 
6028 int si_irq_set(struct radeon_device *rdev)
6029 {
6030 	u32 cp_int_cntl;
6031 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6032 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6033 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6034 	u32 grbm_int_cntl = 0;
6035 	u32 dma_cntl, dma_cntl1;
6036 	u32 thermal_int = 0;
6037 
6038 	if (!rdev->irq.installed) {
6039 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6040 		return -EINVAL;
6041 	}
6042 	/* don't enable anything if the ih is disabled */
6043 	if (!rdev->ih.enabled) {
6044 		si_disable_interrupts(rdev);
6045 		/* force the active interrupt state to all disabled */
6046 		si_disable_interrupt_state(rdev);
6047 		return 0;
6048 	}
6049 
6050 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6051 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6052 
6053 	if (!ASIC_IS_NODCE(rdev)) {
6054 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6055 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6056 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6057 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6058 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6060 	}
6061 
6062 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6063 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6064 
6065 	thermal_int = RREG32(CG_THERMAL_INT) &
6066 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6067 
6068 	/* enable CP interrupts on all rings */
6069 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6070 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6071 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6072 	}
6073 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6074 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6075 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6076 	}
6077 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6078 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6079 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6080 	}
6081 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int dma\n");
6083 		dma_cntl |= TRAP_ENABLE;
6084 	}
6085 
6086 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6087 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6088 		dma_cntl1 |= TRAP_ENABLE;
6089 	}
6090 	if (rdev->irq.crtc_vblank_int[0] ||
6091 	    atomic_read(&rdev->irq.pflip[0])) {
6092 		DRM_DEBUG("si_irq_set: vblank 0\n");
6093 		crtc1 |= VBLANK_INT_MASK;
6094 	}
6095 	if (rdev->irq.crtc_vblank_int[1] ||
6096 	    atomic_read(&rdev->irq.pflip[1])) {
6097 		DRM_DEBUG("si_irq_set: vblank 1\n");
6098 		crtc2 |= VBLANK_INT_MASK;
6099 	}
6100 	if (rdev->irq.crtc_vblank_int[2] ||
6101 	    atomic_read(&rdev->irq.pflip[2])) {
6102 		DRM_DEBUG("si_irq_set: vblank 2\n");
6103 		crtc3 |= VBLANK_INT_MASK;
6104 	}
6105 	if (rdev->irq.crtc_vblank_int[3] ||
6106 	    atomic_read(&rdev->irq.pflip[3])) {
6107 		DRM_DEBUG("si_irq_set: vblank 3\n");
6108 		crtc4 |= VBLANK_INT_MASK;
6109 	}
6110 	if (rdev->irq.crtc_vblank_int[4] ||
6111 	    atomic_read(&rdev->irq.pflip[4])) {
6112 		DRM_DEBUG("si_irq_set: vblank 4\n");
6113 		crtc5 |= VBLANK_INT_MASK;
6114 	}
6115 	if (rdev->irq.crtc_vblank_int[5] ||
6116 	    atomic_read(&rdev->irq.pflip[5])) {
6117 		DRM_DEBUG("si_irq_set: vblank 5\n");
6118 		crtc6 |= VBLANK_INT_MASK;
6119 	}
6120 	if (rdev->irq.hpd[0]) {
6121 		DRM_DEBUG("si_irq_set: hpd 1\n");
6122 		hpd1 |= DC_HPDx_INT_EN;
6123 	}
6124 	if (rdev->irq.hpd[1]) {
6125 		DRM_DEBUG("si_irq_set: hpd 2\n");
6126 		hpd2 |= DC_HPDx_INT_EN;
6127 	}
6128 	if (rdev->irq.hpd[2]) {
6129 		DRM_DEBUG("si_irq_set: hpd 3\n");
6130 		hpd3 |= DC_HPDx_INT_EN;
6131 	}
6132 	if (rdev->irq.hpd[3]) {
6133 		DRM_DEBUG("si_irq_set: hpd 4\n");
6134 		hpd4 |= DC_HPDx_INT_EN;
6135 	}
6136 	if (rdev->irq.hpd[4]) {
6137 		DRM_DEBUG("si_irq_set: hpd 5\n");
6138 		hpd5 |= DC_HPDx_INT_EN;
6139 	}
6140 	if (rdev->irq.hpd[5]) {
6141 		DRM_DEBUG("si_irq_set: hpd 6\n");
6142 		hpd6 |= DC_HPDx_INT_EN;
6143 	}
6144 
6145 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6146 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6147 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6148 
6149 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6150 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6151 
6152 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6153 
6154 	if (rdev->irq.dpm_thermal) {
6155 		DRM_DEBUG("dpm thermal\n");
6156 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6157 	}
6158 
6159 	if (rdev->num_crtc >= 2) {
6160 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6161 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6162 	}
6163 	if (rdev->num_crtc >= 4) {
6164 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6165 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6166 	}
6167 	if (rdev->num_crtc >= 6) {
6168 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6169 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6170 	}
6171 
6172 	if (rdev->num_crtc >= 2) {
6173 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6174 		       GRPH_PFLIP_INT_MASK);
6175 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6176 		       GRPH_PFLIP_INT_MASK);
6177 	}
6178 	if (rdev->num_crtc >= 4) {
6179 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6180 		       GRPH_PFLIP_INT_MASK);
6181 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6182 		       GRPH_PFLIP_INT_MASK);
6183 	}
6184 	if (rdev->num_crtc >= 6) {
6185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6186 		       GRPH_PFLIP_INT_MASK);
6187 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6188 		       GRPH_PFLIP_INT_MASK);
6189 	}
6190 
6191 	if (!ASIC_IS_NODCE(rdev)) {
6192 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6193 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6194 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6195 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6196 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6197 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6198 	}
6199 
6200 	WREG32(CG_THERMAL_INT, thermal_int);
6201 
6202 	return 0;
6203 }
6204 
6205 static inline void si_irq_ack(struct radeon_device *rdev)
6206 {
6207 	u32 tmp;
6208 
6209 	if (ASIC_IS_NODCE(rdev))
6210 		return;
6211 
6212 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6213 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6214 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6215 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6216 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6217 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6218 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6219 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6220 	if (rdev->num_crtc >= 4) {
6221 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6222 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6223 	}
6224 	if (rdev->num_crtc >= 6) {
6225 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6226 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6227 	}
6228 
6229 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6230 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6231 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6232 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6233 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6234 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6235 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6236 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6237 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6238 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6239 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6240 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6241 
6242 	if (rdev->num_crtc >= 4) {
6243 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6244 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6245 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6246 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6247 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6248 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6249 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6250 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6251 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6252 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6253 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6254 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6255 	}
6256 
6257 	if (rdev->num_crtc >= 6) {
6258 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6259 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6260 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6261 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6262 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6263 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6264 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6265 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6266 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6267 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6268 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6269 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6270 	}
6271 
6272 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6273 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6274 		tmp |= DC_HPDx_INT_ACK;
6275 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6276 	}
6277 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6278 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6279 		tmp |= DC_HPDx_INT_ACK;
6280 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6281 	}
6282 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6283 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6284 		tmp |= DC_HPDx_INT_ACK;
6285 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6286 	}
6287 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6288 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6289 		tmp |= DC_HPDx_INT_ACK;
6290 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6291 	}
6292 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6293 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6294 		tmp |= DC_HPDx_INT_ACK;
6295 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6296 	}
6297 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6298 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6299 		tmp |= DC_HPDx_INT_ACK;
6300 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6301 	}
6302 }
6303 
6304 static void si_irq_disable(struct radeon_device *rdev)
6305 {
6306 	si_disable_interrupts(rdev);
6307 	/* Wait and acknowledge irq */
6308 	mdelay(1);
6309 	si_irq_ack(rdev);
6310 	si_disable_interrupt_state(rdev);
6311 }
6312 
6313 static void si_irq_suspend(struct radeon_device *rdev)
6314 {
6315 	si_irq_disable(rdev);
6316 	si_rlc_stop(rdev);
6317 }
6318 
6319 static void si_irq_fini(struct radeon_device *rdev)
6320 {
6321 	si_irq_suspend(rdev);
6322 	r600_ih_ring_fini(rdev);
6323 }
6324 
6325 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6326 {
6327 	u32 wptr, tmp;
6328 
6329 	if (rdev->wb.enabled)
6330 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6331 	else
6332 		wptr = RREG32(IH_RB_WPTR);
6333 
6334 	if (wptr & RB_OVERFLOW) {
6335 		wptr &= ~RB_OVERFLOW;
6336 		/* When a ring buffer overflow happen start parsing interrupt
6337 		 * from the last not overwritten vector (wptr + 16). Hopefully
6338 		 * this should allow us to catchup.
6339 		 */
6340 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6341 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6342 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6343 		tmp = RREG32(IH_RB_CNTL);
6344 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6345 		WREG32(IH_RB_CNTL, tmp);
6346 	}
6347 	return (wptr & rdev->ih.ptr_mask);
6348 }
6349 
6350 /*        SI IV Ring
6351  * Each IV ring entry is 128 bits:
6352  * [7:0]    - interrupt source id
6353  * [31:8]   - reserved
6354  * [59:32]  - interrupt source data
6355  * [63:60]  - reserved
6356  * [71:64]  - RINGID
6357  * [79:72]  - VMID
6358  * [127:80] - reserved
6359  */
6360 int si_irq_process(struct radeon_device *rdev)
6361 {
6362 	u32 wptr;
6363 	u32 rptr;
6364 	u32 src_id, src_data, ring_id;
6365 	u32 ring_index;
6366 	bool queue_hotplug = false;
6367 	bool queue_thermal = false;
6368 	u32 status, addr;
6369 
6370 	if (!rdev->ih.enabled || rdev->shutdown)
6371 		return IRQ_NONE;
6372 
6373 	wptr = si_get_ih_wptr(rdev);
6374 
6375 restart_ih:
6376 	/* is somebody else already processing irqs? */
6377 	if (atomic_xchg(&rdev->ih.lock, 1))
6378 		return IRQ_NONE;
6379 
6380 	rptr = rdev->ih.rptr;
6381 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6382 
6383 	/* Order reading of wptr vs. reading of IH ring data */
6384 	rmb();
6385 
6386 	/* display interrupts */
6387 	si_irq_ack(rdev);
6388 
6389 	while (rptr != wptr) {
6390 		/* wptr/rptr are in bytes! */
6391 		ring_index = rptr / 4;
6392 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6393 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6394 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6395 
6396 		switch (src_id) {
6397 		case 1: /* D1 vblank/vline */
6398 			switch (src_data) {
6399 			case 0: /* D1 vblank */
6400 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6401 					if (rdev->irq.crtc_vblank_int[0]) {
6402 						drm_handle_vblank(rdev->ddev, 0);
6403 						rdev->pm.vblank_sync = true;
6404 						wake_up(&rdev->irq.vblank_queue);
6405 					}
6406 					if (atomic_read(&rdev->irq.pflip[0]))
6407 						radeon_crtc_handle_vblank(rdev, 0);
6408 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6409 					DRM_DEBUG("IH: D1 vblank\n");
6410 				}
6411 				break;
6412 			case 1: /* D1 vline */
6413 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6414 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6415 					DRM_DEBUG("IH: D1 vline\n");
6416 				}
6417 				break;
6418 			default:
6419 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6420 				break;
6421 			}
6422 			break;
6423 		case 2: /* D2 vblank/vline */
6424 			switch (src_data) {
6425 			case 0: /* D2 vblank */
6426 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6427 					if (rdev->irq.crtc_vblank_int[1]) {
6428 						drm_handle_vblank(rdev->ddev, 1);
6429 						rdev->pm.vblank_sync = true;
6430 						wake_up(&rdev->irq.vblank_queue);
6431 					}
6432 					if (atomic_read(&rdev->irq.pflip[1]))
6433 						radeon_crtc_handle_vblank(rdev, 1);
6434 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6435 					DRM_DEBUG("IH: D2 vblank\n");
6436 				}
6437 				break;
6438 			case 1: /* D2 vline */
6439 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6440 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6441 					DRM_DEBUG("IH: D2 vline\n");
6442 				}
6443 				break;
6444 			default:
6445 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6446 				break;
6447 			}
6448 			break;
6449 		case 3: /* D3 vblank/vline */
6450 			switch (src_data) {
6451 			case 0: /* D3 vblank */
6452 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6453 					if (rdev->irq.crtc_vblank_int[2]) {
6454 						drm_handle_vblank(rdev->ddev, 2);
6455 						rdev->pm.vblank_sync = true;
6456 						wake_up(&rdev->irq.vblank_queue);
6457 					}
6458 					if (atomic_read(&rdev->irq.pflip[2]))
6459 						radeon_crtc_handle_vblank(rdev, 2);
6460 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6461 					DRM_DEBUG("IH: D3 vblank\n");
6462 				}
6463 				break;
6464 			case 1: /* D3 vline */
6465 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6466 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6467 					DRM_DEBUG("IH: D3 vline\n");
6468 				}
6469 				break;
6470 			default:
6471 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6472 				break;
6473 			}
6474 			break;
6475 		case 4: /* D4 vblank/vline */
6476 			switch (src_data) {
6477 			case 0: /* D4 vblank */
6478 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6479 					if (rdev->irq.crtc_vblank_int[3]) {
6480 						drm_handle_vblank(rdev->ddev, 3);
6481 						rdev->pm.vblank_sync = true;
6482 						wake_up(&rdev->irq.vblank_queue);
6483 					}
6484 					if (atomic_read(&rdev->irq.pflip[3]))
6485 						radeon_crtc_handle_vblank(rdev, 3);
6486 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6487 					DRM_DEBUG("IH: D4 vblank\n");
6488 				}
6489 				break;
6490 			case 1: /* D4 vline */
6491 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6492 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6493 					DRM_DEBUG("IH: D4 vline\n");
6494 				}
6495 				break;
6496 			default:
6497 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6498 				break;
6499 			}
6500 			break;
6501 		case 5: /* D5 vblank/vline */
6502 			switch (src_data) {
6503 			case 0: /* D5 vblank */
6504 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6505 					if (rdev->irq.crtc_vblank_int[4]) {
6506 						drm_handle_vblank(rdev->ddev, 4);
6507 						rdev->pm.vblank_sync = true;
6508 						wake_up(&rdev->irq.vblank_queue);
6509 					}
6510 					if (atomic_read(&rdev->irq.pflip[4]))
6511 						radeon_crtc_handle_vblank(rdev, 4);
6512 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6513 					DRM_DEBUG("IH: D5 vblank\n");
6514 				}
6515 				break;
6516 			case 1: /* D5 vline */
6517 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6518 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6519 					DRM_DEBUG("IH: D5 vline\n");
6520 				}
6521 				break;
6522 			default:
6523 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6524 				break;
6525 			}
6526 			break;
6527 		case 6: /* D6 vblank/vline */
6528 			switch (src_data) {
6529 			case 0: /* D6 vblank */
6530 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6531 					if (rdev->irq.crtc_vblank_int[5]) {
6532 						drm_handle_vblank(rdev->ddev, 5);
6533 						rdev->pm.vblank_sync = true;
6534 						wake_up(&rdev->irq.vblank_queue);
6535 					}
6536 					if (atomic_read(&rdev->irq.pflip[5]))
6537 						radeon_crtc_handle_vblank(rdev, 5);
6538 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6539 					DRM_DEBUG("IH: D6 vblank\n");
6540 				}
6541 				break;
6542 			case 1: /* D6 vline */
6543 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6544 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6545 					DRM_DEBUG("IH: D6 vline\n");
6546 				}
6547 				break;
6548 			default:
6549 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6550 				break;
6551 			}
6552 			break;
6553 		case 8: /* D1 page flip */
6554 		case 10: /* D2 page flip */
6555 		case 12: /* D3 page flip */
6556 		case 14: /* D4 page flip */
6557 		case 16: /* D5 page flip */
6558 		case 18: /* D6 page flip */
6559 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6560 			if (radeon_use_pflipirq > 0)
6561 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6562 			break;
6563 		case 42: /* HPD hotplug */
6564 			switch (src_data) {
6565 			case 0:
6566 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6567 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6568 					queue_hotplug = true;
6569 					DRM_DEBUG("IH: HPD1\n");
6570 				}
6571 				break;
6572 			case 1:
6573 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6574 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6575 					queue_hotplug = true;
6576 					DRM_DEBUG("IH: HPD2\n");
6577 				}
6578 				break;
6579 			case 2:
6580 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6581 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6582 					queue_hotplug = true;
6583 					DRM_DEBUG("IH: HPD3\n");
6584 				}
6585 				break;
6586 			case 3:
6587 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6588 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6589 					queue_hotplug = true;
6590 					DRM_DEBUG("IH: HPD4\n");
6591 				}
6592 				break;
6593 			case 4:
6594 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6595 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6596 					queue_hotplug = true;
6597 					DRM_DEBUG("IH: HPD5\n");
6598 				}
6599 				break;
6600 			case 5:
6601 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6602 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6603 					queue_hotplug = true;
6604 					DRM_DEBUG("IH: HPD6\n");
6605 				}
6606 				break;
6607 			default:
6608 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6609 				break;
6610 			}
6611 			break;
6612 		case 124: /* UVD */
6613 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6614 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6615 			break;
6616 		case 146:
6617 		case 147:
6618 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6619 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6620 			/* reset addr and status */
6621 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6622 			if (addr == 0x0 && status == 0x0)
6623 				break;
6624 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6625 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6626 				addr);
6627 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6628 				status);
6629 			si_vm_decode_fault(rdev, status, addr);
6630 			break;
6631 		case 176: /* RINGID0 CP_INT */
6632 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6633 			break;
6634 		case 177: /* RINGID1 CP_INT */
6635 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6636 			break;
6637 		case 178: /* RINGID2 CP_INT */
6638 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6639 			break;
6640 		case 181: /* CP EOP event */
6641 			DRM_DEBUG("IH: CP EOP\n");
6642 			switch (ring_id) {
6643 			case 0:
6644 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6645 				break;
6646 			case 1:
6647 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6648 				break;
6649 			case 2:
6650 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6651 				break;
6652 			}
6653 			break;
6654 		case 224: /* DMA trap event */
6655 			DRM_DEBUG("IH: DMA trap\n");
6656 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6657 			break;
6658 		case 230: /* thermal low to high */
6659 			DRM_DEBUG("IH: thermal low to high\n");
6660 			rdev->pm.dpm.thermal.high_to_low = false;
6661 			queue_thermal = true;
6662 			break;
6663 		case 231: /* thermal high to low */
6664 			DRM_DEBUG("IH: thermal high to low\n");
6665 			rdev->pm.dpm.thermal.high_to_low = true;
6666 			queue_thermal = true;
6667 			break;
6668 		case 233: /* GUI IDLE */
6669 			DRM_DEBUG("IH: GUI idle\n");
6670 			break;
6671 		case 244: /* DMA trap event */
6672 			DRM_DEBUG("IH: DMA1 trap\n");
6673 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6674 			break;
6675 		default:
6676 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6677 			break;
6678 		}
6679 
6680 		/* wptr/rptr are in bytes! */
6681 		rptr += 16;
6682 		rptr &= rdev->ih.ptr_mask;
6683 		WREG32(IH_RB_RPTR, rptr);
6684 	}
6685 	if (queue_hotplug)
6686 		schedule_work(&rdev->hotplug_work);
6687 	if (queue_thermal && rdev->pm.dpm_enabled)
6688 		schedule_work(&rdev->pm.dpm.thermal.work);
6689 	rdev->ih.rptr = rptr;
6690 	atomic_set(&rdev->ih.lock, 0);
6691 
6692 	/* make sure wptr hasn't changed while processing */
6693 	wptr = si_get_ih_wptr(rdev);
6694 	if (wptr != rptr)
6695 		goto restart_ih;
6696 
6697 	return IRQ_HANDLED;
6698 }
6699 
6700 /*
6701  * startup/shutdown callbacks
6702  */
6703 static int si_startup(struct radeon_device *rdev)
6704 {
6705 	struct radeon_ring *ring;
6706 	int r;
6707 
6708 	/* enable pcie gen2/3 link */
6709 	si_pcie_gen3_enable(rdev);
6710 	/* enable aspm */
6711 	si_program_aspm(rdev);
6712 
6713 	/* scratch needs to be initialized before MC */
6714 	r = r600_vram_scratch_init(rdev);
6715 	if (r)
6716 		return r;
6717 
6718 	si_mc_program(rdev);
6719 
6720 	if (!rdev->pm.dpm_enabled) {
6721 		r = si_mc_load_microcode(rdev);
6722 		if (r) {
6723 			DRM_ERROR("Failed to load MC firmware!\n");
6724 			return r;
6725 		}
6726 	}
6727 
6728 	r = si_pcie_gart_enable(rdev);
6729 	if (r)
6730 		return r;
6731 	si_gpu_init(rdev);
6732 
6733 	/* allocate rlc buffers */
6734 	if (rdev->family == CHIP_VERDE) {
6735 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6736 		rdev->rlc.reg_list_size =
6737 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6738 	}
6739 	rdev->rlc.cs_data = si_cs_data;
6740 	r = sumo_rlc_init(rdev);
6741 	if (r) {
6742 		DRM_ERROR("Failed to init rlc BOs!\n");
6743 		return r;
6744 	}
6745 
6746 	/* allocate wb buffer */
6747 	r = radeon_wb_init(rdev);
6748 	if (r)
6749 		return r;
6750 
6751 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6752 	if (r) {
6753 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6754 		return r;
6755 	}
6756 
6757 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6758 	if (r) {
6759 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6760 		return r;
6761 	}
6762 
6763 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6764 	if (r) {
6765 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6766 		return r;
6767 	}
6768 
6769 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6770 	if (r) {
6771 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6772 		return r;
6773 	}
6774 
6775 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6776 	if (r) {
6777 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6778 		return r;
6779 	}
6780 
6781 	if (rdev->has_uvd) {
6782 		r = uvd_v2_2_resume(rdev);
6783 		if (!r) {
6784 			r = radeon_fence_driver_start_ring(rdev,
6785 							   R600_RING_TYPE_UVD_INDEX);
6786 			if (r)
6787 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6788 		}
6789 		if (r)
6790 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6791 	}
6792 
6793 	/* Enable IRQ */
6794 	if (!rdev->irq.installed) {
6795 		r = radeon_irq_kms_init(rdev);
6796 		if (r)
6797 			return r;
6798 	}
6799 
6800 	r = si_irq_init(rdev);
6801 	if (r) {
6802 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6803 		radeon_irq_kms_fini(rdev);
6804 		return r;
6805 	}
6806 	si_irq_set(rdev);
6807 
6808 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6809 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6810 			     RADEON_CP_PACKET2);
6811 	if (r)
6812 		return r;
6813 
6814 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6815 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6816 			     RADEON_CP_PACKET2);
6817 	if (r)
6818 		return r;
6819 
6820 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6821 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6822 			     RADEON_CP_PACKET2);
6823 	if (r)
6824 		return r;
6825 
6826 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6827 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6828 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6829 	if (r)
6830 		return r;
6831 
6832 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6833 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6834 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6835 	if (r)
6836 		return r;
6837 
6838 	r = si_cp_load_microcode(rdev);
6839 	if (r)
6840 		return r;
6841 	r = si_cp_resume(rdev);
6842 	if (r)
6843 		return r;
6844 
6845 	r = cayman_dma_resume(rdev);
6846 	if (r)
6847 		return r;
6848 
6849 	if (rdev->has_uvd) {
6850 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6851 		if (ring->ring_size) {
6852 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6853 					     RADEON_CP_PACKET2);
6854 			if (!r)
6855 				r = uvd_v1_0_init(rdev);
6856 			if (r)
6857 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6858 		}
6859 	}
6860 
6861 	r = radeon_ib_pool_init(rdev);
6862 	if (r) {
6863 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6864 		return r;
6865 	}
6866 
6867 	r = radeon_vm_manager_init(rdev);
6868 	if (r) {
6869 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6870 		return r;
6871 	}
6872 
6873 	r = radeon_audio_init(rdev);
6874 	if (r)
6875 		return r;
6876 
6877 	return 0;
6878 }
6879 
6880 int si_resume(struct radeon_device *rdev)
6881 {
6882 	int r;
6883 
6884 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6885 	 * posting will perform necessary task to bring back GPU into good
6886 	 * shape.
6887 	 */
6888 	/* post card */
6889 	atom_asic_init(rdev->mode_info.atom_context);
6890 
6891 	/* init golden registers */
6892 	si_init_golden_registers(rdev);
6893 
6894 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6895 		radeon_pm_resume(rdev);
6896 
6897 	rdev->accel_working = true;
6898 	r = si_startup(rdev);
6899 	if (r) {
6900 		DRM_ERROR("si startup failed on resume\n");
6901 		rdev->accel_working = false;
6902 		return r;
6903 	}
6904 
6905 	return r;
6906 
6907 }
6908 
6909 int si_suspend(struct radeon_device *rdev)
6910 {
6911 	radeon_pm_suspend(rdev);
6912 	radeon_audio_fini(rdev);
6913 	radeon_vm_manager_fini(rdev);
6914 	si_cp_enable(rdev, false);
6915 	cayman_dma_stop(rdev);
6916 	if (rdev->has_uvd) {
6917 		uvd_v1_0_fini(rdev);
6918 		radeon_uvd_suspend(rdev);
6919 	}
6920 	si_fini_pg(rdev);
6921 	si_fini_cg(rdev);
6922 	si_irq_suspend(rdev);
6923 	radeon_wb_disable(rdev);
6924 	si_pcie_gart_disable(rdev);
6925 	return 0;
6926 }
6927 
6928 /* Plan is to move initialization in that function and use
6929  * helper function so that radeon_device_init pretty much
6930  * do nothing more than calling asic specific function. This
6931  * should also allow to remove a bunch of callback function
6932  * like vram_info.
6933  */
6934 int si_init(struct radeon_device *rdev)
6935 {
6936 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6937 	int r;
6938 
6939 	/* Read BIOS */
6940 	if (!radeon_get_bios(rdev)) {
6941 		if (ASIC_IS_AVIVO(rdev))
6942 			return -EINVAL;
6943 	}
6944 	/* Must be an ATOMBIOS */
6945 	if (!rdev->is_atom_bios) {
6946 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6947 		return -EINVAL;
6948 	}
6949 	r = radeon_atombios_init(rdev);
6950 	if (r)
6951 		return r;
6952 
6953 	/* Post card if necessary */
6954 	if (!radeon_card_posted(rdev)) {
6955 		if (!rdev->bios) {
6956 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6957 			return -EINVAL;
6958 		}
6959 		DRM_INFO("GPU not posted. posting now...\n");
6960 		atom_asic_init(rdev->mode_info.atom_context);
6961 	}
6962 	/* init golden registers */
6963 	si_init_golden_registers(rdev);
6964 	/* Initialize scratch registers */
6965 	si_scratch_init(rdev);
6966 	/* Initialize surface registers */
6967 	radeon_surface_init(rdev);
6968 	/* Initialize clocks */
6969 	radeon_get_clock_info(rdev->ddev);
6970 
6971 	/* Fence driver */
6972 	r = radeon_fence_driver_init(rdev);
6973 	if (r)
6974 		return r;
6975 
6976 	/* initialize memory controller */
6977 	r = si_mc_init(rdev);
6978 	if (r)
6979 		return r;
6980 	/* Memory manager */
6981 	r = radeon_bo_init(rdev);
6982 	if (r)
6983 		return r;
6984 
6985 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6986 	    !rdev->rlc_fw || !rdev->mc_fw) {
6987 		r = si_init_microcode(rdev);
6988 		if (r) {
6989 			DRM_ERROR("Failed to load firmware!\n");
6990 			return r;
6991 		}
6992 	}
6993 
6994 	/* Initialize power management */
6995 	radeon_pm_init(rdev);
6996 
6997 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6998 	ring->ring_obj = NULL;
6999 	r600_ring_init(rdev, ring, 1024 * 1024);
7000 
7001 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7002 	ring->ring_obj = NULL;
7003 	r600_ring_init(rdev, ring, 1024 * 1024);
7004 
7005 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7006 	ring->ring_obj = NULL;
7007 	r600_ring_init(rdev, ring, 1024 * 1024);
7008 
7009 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7010 	ring->ring_obj = NULL;
7011 	r600_ring_init(rdev, ring, 64 * 1024);
7012 
7013 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7014 	ring->ring_obj = NULL;
7015 	r600_ring_init(rdev, ring, 64 * 1024);
7016 
7017 	if (rdev->has_uvd) {
7018 		r = radeon_uvd_init(rdev);
7019 		if (!r) {
7020 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7021 			ring->ring_obj = NULL;
7022 			r600_ring_init(rdev, ring, 4096);
7023 		}
7024 	}
7025 
7026 	rdev->ih.ring_obj = NULL;
7027 	r600_ih_ring_init(rdev, 64 * 1024);
7028 
7029 	r = r600_pcie_gart_init(rdev);
7030 	if (r)
7031 		return r;
7032 
7033 	rdev->accel_working = true;
7034 	r = si_startup(rdev);
7035 	if (r) {
7036 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7037 		si_cp_fini(rdev);
7038 		cayman_dma_fini(rdev);
7039 		si_irq_fini(rdev);
7040 		sumo_rlc_fini(rdev);
7041 		radeon_wb_fini(rdev);
7042 		radeon_ib_pool_fini(rdev);
7043 		radeon_vm_manager_fini(rdev);
7044 		radeon_irq_kms_fini(rdev);
7045 		si_pcie_gart_fini(rdev);
7046 		rdev->accel_working = false;
7047 	}
7048 
7049 	/* Don't start up if the MC ucode is missing.
7050 	 * The default clocks and voltages before the MC ucode
7051 	 * is loaded are not suffient for advanced operations.
7052 	 */
7053 	if (!rdev->mc_fw) {
7054 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7055 		return -EINVAL;
7056 	}
7057 
7058 	return 0;
7059 }
7060 
7061 void si_fini(struct radeon_device *rdev)
7062 {
7063 	radeon_pm_fini(rdev);
7064 	si_cp_fini(rdev);
7065 	cayman_dma_fini(rdev);
7066 	si_fini_pg(rdev);
7067 	si_fini_cg(rdev);
7068 	si_irq_fini(rdev);
7069 	sumo_rlc_fini(rdev);
7070 	radeon_wb_fini(rdev);
7071 	radeon_vm_manager_fini(rdev);
7072 	radeon_ib_pool_fini(rdev);
7073 	radeon_irq_kms_fini(rdev);
7074 	if (rdev->has_uvd) {
7075 		uvd_v1_0_fini(rdev);
7076 		radeon_uvd_fini(rdev);
7077 	}
7078 	si_pcie_gart_fini(rdev);
7079 	r600_vram_scratch_fini(rdev);
7080 	radeon_gem_fini(rdev);
7081 	radeon_fence_driver_fini(rdev);
7082 	radeon_bo_fini(rdev);
7083 	radeon_atombios_fini(rdev);
7084 	kfree(rdev->bios);
7085 	rdev->bios = NULL;
7086 }
7087 
7088 /**
7089  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7090  *
7091  * @rdev: radeon_device pointer
7092  *
7093  * Fetches a GPU clock counter snapshot (SI).
7094  * Returns the 64 bit clock counter snapshot.
7095  */
7096 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7097 {
7098 	uint64_t clock;
7099 
7100 	mutex_lock(&rdev->gpu_clock_mutex);
7101 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7102 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7103 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7104 	mutex_unlock(&rdev->gpu_clock_mutex);
7105 	return clock;
7106 }
7107 
7108 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7109 {
7110 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7111 	int r;
7112 
7113 	/* bypass vclk and dclk with bclk */
7114 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7115 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7116 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7117 
7118 	/* put PLL in bypass mode */
7119 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7120 
7121 	if (!vclk || !dclk) {
7122 		/* keep the Bypass mode, put PLL to sleep */
7123 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7124 		return 0;
7125 	}
7126 
7127 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7128 					  16384, 0x03FFFFFF, 0, 128, 5,
7129 					  &fb_div, &vclk_div, &dclk_div);
7130 	if (r)
7131 		return r;
7132 
7133 	/* set RESET_ANTI_MUX to 0 */
7134 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7135 
7136 	/* set VCO_MODE to 1 */
7137 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7138 
7139 	/* toggle UPLL_SLEEP to 1 then back to 0 */
7140 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7141 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7142 
7143 	/* deassert UPLL_RESET */
7144 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7145 
7146 	mdelay(1);
7147 
7148 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7149 	if (r)
7150 		return r;
7151 
7152 	/* assert UPLL_RESET again */
7153 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7154 
7155 	/* disable spread spectrum. */
7156 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7157 
7158 	/* set feedback divider */
7159 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7160 
7161 	/* set ref divider to 0 */
7162 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7163 
7164 	if (fb_div < 307200)
7165 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7166 	else
7167 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7168 
7169 	/* set PDIV_A and PDIV_B */
7170 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7171 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7172 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7173 
7174 	/* give the PLL some time to settle */
7175 	mdelay(15);
7176 
7177 	/* deassert PLL_RESET */
7178 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7179 
7180 	mdelay(15);
7181 
7182 	/* switch from bypass mode to normal mode */
7183 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7184 
7185 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7186 	if (r)
7187 		return r;
7188 
7189 	/* switch VCLK and DCLK selection */
7190 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7191 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7192 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7193 
7194 	mdelay(100);
7195 
7196 	return 0;
7197 }
7198 
7199 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7200 {
7201 	struct pci_dev *root = rdev->pdev->bus->self;
7202 	int bridge_pos, gpu_pos;
7203 	u32 speed_cntl, mask, current_data_rate;
7204 	int ret, i;
7205 	u16 tmp16;
7206 
7207 	if (pci_is_root_bus(rdev->pdev->bus))
7208 		return;
7209 
7210 	if (radeon_pcie_gen2 == 0)
7211 		return;
7212 
7213 	if (rdev->flags & RADEON_IS_IGP)
7214 		return;
7215 
7216 	if (!(rdev->flags & RADEON_IS_PCIE))
7217 		return;
7218 
7219 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7220 	if (ret != 0)
7221 		return;
7222 
7223 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7224 		return;
7225 
7226 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7227 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7228 		LC_CURRENT_DATA_RATE_SHIFT;
7229 	if (mask & DRM_PCIE_SPEED_80) {
7230 		if (current_data_rate == 2) {
7231 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7232 			return;
7233 		}
7234 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7235 	} else if (mask & DRM_PCIE_SPEED_50) {
7236 		if (current_data_rate == 1) {
7237 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7238 			return;
7239 		}
7240 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7241 	}
7242 
7243 	bridge_pos = pci_pcie_cap(root);
7244 	if (!bridge_pos)
7245 		return;
7246 
7247 	gpu_pos = pci_pcie_cap(rdev->pdev);
7248 	if (!gpu_pos)
7249 		return;
7250 
7251 	if (mask & DRM_PCIE_SPEED_80) {
7252 		/* re-try equalization if gen3 is not already enabled */
7253 		if (current_data_rate != 2) {
7254 			u16 bridge_cfg, gpu_cfg;
7255 			u16 bridge_cfg2, gpu_cfg2;
7256 			u32 max_lw, current_lw, tmp;
7257 
7258 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7259 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7260 
7261 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7262 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7263 
7264 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7265 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7266 
7267 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7268 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7269 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7270 
7271 			if (current_lw < max_lw) {
7272 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7273 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7274 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7275 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7276 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7277 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7278 				}
7279 			}
7280 
7281 			for (i = 0; i < 10; i++) {
7282 				/* check status */
7283 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7284 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7285 					break;
7286 
7287 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7288 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7289 
7290 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7291 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7292 
7293 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7294 				tmp |= LC_SET_QUIESCE;
7295 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7296 
7297 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7298 				tmp |= LC_REDO_EQ;
7299 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7300 
7301 				mdelay(100);
7302 
7303 				/* linkctl */
7304 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7305 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7306 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7307 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7308 
7309 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7310 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7311 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7312 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7313 
7314 				/* linkctl2 */
7315 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7316 				tmp16 &= ~((1 << 4) | (7 << 9));
7317 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7318 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7319 
7320 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7321 				tmp16 &= ~((1 << 4) | (7 << 9));
7322 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7323 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7324 
7325 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7326 				tmp &= ~LC_SET_QUIESCE;
7327 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7328 			}
7329 		}
7330 	}
7331 
7332 	/* set the link speed */
7333 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7334 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7335 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7336 
7337 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7338 	tmp16 &= ~0xf;
7339 	if (mask & DRM_PCIE_SPEED_80)
7340 		tmp16 |= 3; /* gen3 */
7341 	else if (mask & DRM_PCIE_SPEED_50)
7342 		tmp16 |= 2; /* gen2 */
7343 	else
7344 		tmp16 |= 1; /* gen1 */
7345 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7346 
7347 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7348 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7349 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7350 
7351 	for (i = 0; i < rdev->usec_timeout; i++) {
7352 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7353 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7354 			break;
7355 		udelay(1);
7356 	}
7357 }
7358 
7359 static void si_program_aspm(struct radeon_device *rdev)
7360 {
7361 	u32 data, orig;
7362 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7363 	bool disable_clkreq = false;
7364 
7365 	if (radeon_aspm == 0)
7366 		return;
7367 
7368 	if (!(rdev->flags & RADEON_IS_PCIE))
7369 		return;
7370 
7371 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7372 	data &= ~LC_XMIT_N_FTS_MASK;
7373 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7374 	if (orig != data)
7375 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7376 
7377 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7378 	data |= LC_GO_TO_RECOVERY;
7379 	if (orig != data)
7380 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7381 
7382 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7383 	data |= P_IGNORE_EDB_ERR;
7384 	if (orig != data)
7385 		WREG32_PCIE(PCIE_P_CNTL, data);
7386 
7387 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7388 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7389 	data |= LC_PMI_TO_L1_DIS;
7390 	if (!disable_l0s)
7391 		data |= LC_L0S_INACTIVITY(7);
7392 
7393 	if (!disable_l1) {
7394 		data |= LC_L1_INACTIVITY(7);
7395 		data &= ~LC_PMI_TO_L1_DIS;
7396 		if (orig != data)
7397 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7398 
7399 		if (!disable_plloff_in_l1) {
7400 			bool clk_req_support;
7401 
7402 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7403 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7404 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7405 			if (orig != data)
7406 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7407 
7408 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7409 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7410 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7411 			if (orig != data)
7412 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7413 
7414 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7415 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7416 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7417 			if (orig != data)
7418 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7419 
7420 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7421 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7422 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7423 			if (orig != data)
7424 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7425 
7426 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7427 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7428 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7429 				if (orig != data)
7430 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7431 
7432 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7433 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7434 				if (orig != data)
7435 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7436 
7437 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7438 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7439 				if (orig != data)
7440 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7441 
7442 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7443 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7444 				if (orig != data)
7445 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7446 
7447 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7448 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7449 				if (orig != data)
7450 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7451 
7452 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7453 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7454 				if (orig != data)
7455 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7456 
7457 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7458 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7459 				if (orig != data)
7460 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7461 
7462 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7463 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7464 				if (orig != data)
7465 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7466 			}
7467 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7468 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7469 			data |= LC_DYN_LANES_PWR_STATE(3);
7470 			if (orig != data)
7471 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7472 
7473 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7474 			data &= ~LS2_EXIT_TIME_MASK;
7475 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7476 				data |= LS2_EXIT_TIME(5);
7477 			if (orig != data)
7478 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7479 
7480 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7481 			data &= ~LS2_EXIT_TIME_MASK;
7482 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7483 				data |= LS2_EXIT_TIME(5);
7484 			if (orig != data)
7485 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7486 
7487 			if (!disable_clkreq &&
7488 			    !pci_is_root_bus(rdev->pdev->bus)) {
7489 				struct pci_dev *root = rdev->pdev->bus->self;
7490 				u32 lnkcap;
7491 
7492 				clk_req_support = false;
7493 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7494 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7495 					clk_req_support = true;
7496 			} else {
7497 				clk_req_support = false;
7498 			}
7499 
7500 			if (clk_req_support) {
7501 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7502 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7503 				if (orig != data)
7504 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7505 
7506 				orig = data = RREG32(THM_CLK_CNTL);
7507 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7508 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7509 				if (orig != data)
7510 					WREG32(THM_CLK_CNTL, data);
7511 
7512 				orig = data = RREG32(MISC_CLK_CNTL);
7513 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7514 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7515 				if (orig != data)
7516 					WREG32(MISC_CLK_CNTL, data);
7517 
7518 				orig = data = RREG32(CG_CLKPIN_CNTL);
7519 				data &= ~BCLK_AS_XCLK;
7520 				if (orig != data)
7521 					WREG32(CG_CLKPIN_CNTL, data);
7522 
7523 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7524 				data &= ~FORCE_BIF_REFCLK_EN;
7525 				if (orig != data)
7526 					WREG32(CG_CLKPIN_CNTL_2, data);
7527 
7528 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7529 				data &= ~MPLL_CLKOUT_SEL_MASK;
7530 				data |= MPLL_CLKOUT_SEL(4);
7531 				if (orig != data)
7532 					WREG32(MPLL_BYPASSCLK_SEL, data);
7533 
7534 				orig = data = RREG32(SPLL_CNTL_MODE);
7535 				data &= ~SPLL_REFCLK_SEL_MASK;
7536 				if (orig != data)
7537 					WREG32(SPLL_CNTL_MODE, data);
7538 			}
7539 		}
7540 	} else {
7541 		if (orig != data)
7542 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7543 	}
7544 
7545 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7546 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7547 	if (orig != data)
7548 		WREG32_PCIE(PCIE_CNTL2, data);
7549 
7550 	if (!disable_l0s) {
7551 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7552 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7553 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7554 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7555 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7556 				data &= ~LC_L0S_INACTIVITY_MASK;
7557 				if (orig != data)
7558 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7559 			}
7560 		}
7561 	}
7562 }
7563