xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision 7b6d864b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42 
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68 
69 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
70 extern void r600_ih_ring_fini(struct radeon_device *rdev);
71 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
75 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
76 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
77 
78 static const u32 tahiti_golden_rlc_registers[] =
79 {
80 	0xc424, 0xffffffff, 0x00601005,
81 	0xc47c, 0xffffffff, 0x10104040,
82 	0xc488, 0xffffffff, 0x0100000a,
83 	0xc314, 0xffffffff, 0x00000800,
84 	0xc30c, 0xffffffff, 0x800000f4,
85 	0xf4a8, 0xffffffff, 0x00000000
86 };
87 
88 static const u32 tahiti_golden_registers[] =
89 {
90 	0x9a10, 0x00010000, 0x00018208,
91 	0x9830, 0xffffffff, 0x00000000,
92 	0x9834, 0xf00fffff, 0x00000400,
93 	0x9838, 0x0002021c, 0x00020200,
94 	0xc78, 0x00000080, 0x00000000,
95 	0xd030, 0x000300c0, 0x00800040,
96 	0xd830, 0x000300c0, 0x00800040,
97 	0x5bb0, 0x000000f0, 0x00000070,
98 	0x5bc0, 0x00200000, 0x50100000,
99 	0x7030, 0x31000311, 0x00000011,
100 	0x277c, 0x00000003, 0x000007ff,
101 	0x240c, 0x000007ff, 0x00000000,
102 	0x8a14, 0xf000001f, 0x00000007,
103 	0x8b24, 0xffffffff, 0x00ffffff,
104 	0x8b10, 0x0000ff0f, 0x00000000,
105 	0x28a4c, 0x07ffffff, 0x4e000000,
106 	0x28350, 0x3f3f3fff, 0x2a00126a,
107 	0x30, 0x000000ff, 0x0040,
108 	0x34, 0x00000040, 0x00004040,
109 	0x9100, 0x07ffffff, 0x03000000,
110 	0x8e88, 0x01ff1f3f, 0x00000000,
111 	0x8e84, 0x01ff1f3f, 0x00000000,
112 	0x9060, 0x0000007f, 0x00000020,
113 	0x9508, 0x00010000, 0x00010000,
114 	0xac14, 0x00000200, 0x000002fb,
115 	0xac10, 0xffffffff, 0x0000543b,
116 	0xac0c, 0xffffffff, 0xa9210876,
117 	0x88d0, 0xffffffff, 0x000fff40,
118 	0x88d4, 0x0000001f, 0x00000010,
119 	0x1410, 0x20000000, 0x20fffed8,
120 	0x15c0, 0x000c0fc0, 0x000c0400
121 };
122 
123 static const u32 tahiti_golden_registers2[] =
124 {
125 	0xc64, 0x00000001, 0x00000001
126 };
127 
128 static const u32 pitcairn_golden_rlc_registers[] =
129 {
130 	0xc424, 0xffffffff, 0x00601004,
131 	0xc47c, 0xffffffff, 0x10102020,
132 	0xc488, 0xffffffff, 0x01000020,
133 	0xc314, 0xffffffff, 0x00000800,
134 	0xc30c, 0xffffffff, 0x800000a4
135 };
136 
137 static const u32 pitcairn_golden_registers[] =
138 {
139 	0x9a10, 0x00010000, 0x00018208,
140 	0x9830, 0xffffffff, 0x00000000,
141 	0x9834, 0xf00fffff, 0x00000400,
142 	0x9838, 0x0002021c, 0x00020200,
143 	0xc78, 0x00000080, 0x00000000,
144 	0xd030, 0x000300c0, 0x00800040,
145 	0xd830, 0x000300c0, 0x00800040,
146 	0x5bb0, 0x000000f0, 0x00000070,
147 	0x5bc0, 0x00200000, 0x50100000,
148 	0x7030, 0x31000311, 0x00000011,
149 	0x2ae4, 0x00073ffe, 0x000022a2,
150 	0x240c, 0x000007ff, 0x00000000,
151 	0x8a14, 0xf000001f, 0x00000007,
152 	0x8b24, 0xffffffff, 0x00ffffff,
153 	0x8b10, 0x0000ff0f, 0x00000000,
154 	0x28a4c, 0x07ffffff, 0x4e000000,
155 	0x28350, 0x3f3f3fff, 0x2a00126a,
156 	0x30, 0x000000ff, 0x0040,
157 	0x34, 0x00000040, 0x00004040,
158 	0x9100, 0x07ffffff, 0x03000000,
159 	0x9060, 0x0000007f, 0x00000020,
160 	0x9508, 0x00010000, 0x00010000,
161 	0xac14, 0x000003ff, 0x000000f7,
162 	0xac10, 0xffffffff, 0x00000000,
163 	0xac0c, 0xffffffff, 0x32761054,
164 	0x88d4, 0x0000001f, 0x00000010,
165 	0x15c0, 0x000c0fc0, 0x000c0400
166 };
167 
168 static const u32 verde_golden_rlc_registers[] =
169 {
170 	0xc424, 0xffffffff, 0x033f1005,
171 	0xc47c, 0xffffffff, 0x10808020,
172 	0xc488, 0xffffffff, 0x00800008,
173 	0xc314, 0xffffffff, 0x00001000,
174 	0xc30c, 0xffffffff, 0x80010014
175 };
176 
177 static const u32 verde_golden_registers[] =
178 {
179 	0x9a10, 0x00010000, 0x00018208,
180 	0x9830, 0xffffffff, 0x00000000,
181 	0x9834, 0xf00fffff, 0x00000400,
182 	0x9838, 0x0002021c, 0x00020200,
183 	0xc78, 0x00000080, 0x00000000,
184 	0xd030, 0x000300c0, 0x00800040,
185 	0xd030, 0x000300c0, 0x00800040,
186 	0xd830, 0x000300c0, 0x00800040,
187 	0xd830, 0x000300c0, 0x00800040,
188 	0x5bb0, 0x000000f0, 0x00000070,
189 	0x5bc0, 0x00200000, 0x50100000,
190 	0x7030, 0x31000311, 0x00000011,
191 	0x2ae4, 0x00073ffe, 0x000022a2,
192 	0x2ae4, 0x00073ffe, 0x000022a2,
193 	0x2ae4, 0x00073ffe, 0x000022a2,
194 	0x240c, 0x000007ff, 0x00000000,
195 	0x240c, 0x000007ff, 0x00000000,
196 	0x240c, 0x000007ff, 0x00000000,
197 	0x8a14, 0xf000001f, 0x00000007,
198 	0x8a14, 0xf000001f, 0x00000007,
199 	0x8a14, 0xf000001f, 0x00000007,
200 	0x8b24, 0xffffffff, 0x00ffffff,
201 	0x8b10, 0x0000ff0f, 0x00000000,
202 	0x28a4c, 0x07ffffff, 0x4e000000,
203 	0x28350, 0x3f3f3fff, 0x0000124a,
204 	0x28350, 0x3f3f3fff, 0x0000124a,
205 	0x28350, 0x3f3f3fff, 0x0000124a,
206 	0x30, 0x000000ff, 0x0040,
207 	0x34, 0x00000040, 0x00004040,
208 	0x9100, 0x07ffffff, 0x03000000,
209 	0x9100, 0x07ffffff, 0x03000000,
210 	0x8e88, 0x01ff1f3f, 0x00000000,
211 	0x8e88, 0x01ff1f3f, 0x00000000,
212 	0x8e88, 0x01ff1f3f, 0x00000000,
213 	0x8e84, 0x01ff1f3f, 0x00000000,
214 	0x8e84, 0x01ff1f3f, 0x00000000,
215 	0x8e84, 0x01ff1f3f, 0x00000000,
216 	0x9060, 0x0000007f, 0x00000020,
217 	0x9508, 0x00010000, 0x00010000,
218 	0xac14, 0x000003ff, 0x00000003,
219 	0xac14, 0x000003ff, 0x00000003,
220 	0xac14, 0x000003ff, 0x00000003,
221 	0xac10, 0xffffffff, 0x00000000,
222 	0xac10, 0xffffffff, 0x00000000,
223 	0xac10, 0xffffffff, 0x00000000,
224 	0xac0c, 0xffffffff, 0x00001032,
225 	0xac0c, 0xffffffff, 0x00001032,
226 	0xac0c, 0xffffffff, 0x00001032,
227 	0x88d4, 0x0000001f, 0x00000010,
228 	0x88d4, 0x0000001f, 0x00000010,
229 	0x88d4, 0x0000001f, 0x00000010,
230 	0x15c0, 0x000c0fc0, 0x000c0400
231 };
232 
233 static const u32 oland_golden_rlc_registers[] =
234 {
235 	0xc424, 0xffffffff, 0x00601005,
236 	0xc47c, 0xffffffff, 0x10104040,
237 	0xc488, 0xffffffff, 0x0100000a,
238 	0xc314, 0xffffffff, 0x00000800,
239 	0xc30c, 0xffffffff, 0x800000f4
240 };
241 
242 static const u32 oland_golden_registers[] =
243 {
244 	0x9a10, 0x00010000, 0x00018208,
245 	0x9830, 0xffffffff, 0x00000000,
246 	0x9834, 0xf00fffff, 0x00000400,
247 	0x9838, 0x0002021c, 0x00020200,
248 	0xc78, 0x00000080, 0x00000000,
249 	0xd030, 0x000300c0, 0x00800040,
250 	0xd830, 0x000300c0, 0x00800040,
251 	0x5bb0, 0x000000f0, 0x00000070,
252 	0x5bc0, 0x00200000, 0x50100000,
253 	0x7030, 0x31000311, 0x00000011,
254 	0x2ae4, 0x00073ffe, 0x000022a2,
255 	0x240c, 0x000007ff, 0x00000000,
256 	0x8a14, 0xf000001f, 0x00000007,
257 	0x8b24, 0xffffffff, 0x00ffffff,
258 	0x8b10, 0x0000ff0f, 0x00000000,
259 	0x28a4c, 0x07ffffff, 0x4e000000,
260 	0x28350, 0x3f3f3fff, 0x00000082,
261 	0x30, 0x000000ff, 0x0040,
262 	0x34, 0x00000040, 0x00004040,
263 	0x9100, 0x07ffffff, 0x03000000,
264 	0x9060, 0x0000007f, 0x00000020,
265 	0x9508, 0x00010000, 0x00010000,
266 	0xac14, 0x000003ff, 0x000000f3,
267 	0xac10, 0xffffffff, 0x00000000,
268 	0xac0c, 0xffffffff, 0x00003210,
269 	0x88d4, 0x0000001f, 0x00000010,
270 	0x15c0, 0x000c0fc0, 0x000c0400
271 };
272 
273 static const u32 hainan_golden_registers[] =
274 {
275 	0x9a10, 0x00010000, 0x00018208,
276 	0x9830, 0xffffffff, 0x00000000,
277 	0x9834, 0xf00fffff, 0x00000400,
278 	0x9838, 0x0002021c, 0x00020200,
279 	0xd0c0, 0xff000fff, 0x00000100,
280 	0xd030, 0x000300c0, 0x00800040,
281 	0xd8c0, 0xff000fff, 0x00000100,
282 	0xd830, 0x000300c0, 0x00800040,
283 	0x2ae4, 0x00073ffe, 0x000022a2,
284 	0x240c, 0x000007ff, 0x00000000,
285 	0x8a14, 0xf000001f, 0x00000007,
286 	0x8b24, 0xffffffff, 0x00ffffff,
287 	0x8b10, 0x0000ff0f, 0x00000000,
288 	0x28a4c, 0x07ffffff, 0x4e000000,
289 	0x28350, 0x3f3f3fff, 0x00000000,
290 	0x30, 0x000000ff, 0x0040,
291 	0x34, 0x00000040, 0x00004040,
292 	0x9100, 0x03e00000, 0x03600000,
293 	0x9060, 0x0000007f, 0x00000020,
294 	0x9508, 0x00010000, 0x00010000,
295 	0xac14, 0x000003ff, 0x000000f1,
296 	0xac10, 0xffffffff, 0x00000000,
297 	0xac0c, 0xffffffff, 0x00003210,
298 	0x88d4, 0x0000001f, 0x00000010,
299 	0x15c0, 0x000c0fc0, 0x000c0400
300 };
301 
302 static const u32 hainan_golden_registers2[] =
303 {
304 	0x98f8, 0xffffffff, 0x02010001
305 };
306 
307 static const u32 tahiti_mgcg_cgcg_init[] =
308 {
309 	0xc400, 0xffffffff, 0xfffffffc,
310 	0x802c, 0xffffffff, 0xe0000000,
311 	0x9a60, 0xffffffff, 0x00000100,
312 	0x92a4, 0xffffffff, 0x00000100,
313 	0xc164, 0xffffffff, 0x00000100,
314 	0x9774, 0xffffffff, 0x00000100,
315 	0x8984, 0xffffffff, 0x06000100,
316 	0x8a18, 0xffffffff, 0x00000100,
317 	0x92a0, 0xffffffff, 0x00000100,
318 	0xc380, 0xffffffff, 0x00000100,
319 	0x8b28, 0xffffffff, 0x00000100,
320 	0x9144, 0xffffffff, 0x00000100,
321 	0x8d88, 0xffffffff, 0x00000100,
322 	0x8d8c, 0xffffffff, 0x00000100,
323 	0x9030, 0xffffffff, 0x00000100,
324 	0x9034, 0xffffffff, 0x00000100,
325 	0x9038, 0xffffffff, 0x00000100,
326 	0x903c, 0xffffffff, 0x00000100,
327 	0xad80, 0xffffffff, 0x00000100,
328 	0xac54, 0xffffffff, 0x00000100,
329 	0x897c, 0xffffffff, 0x06000100,
330 	0x9868, 0xffffffff, 0x00000100,
331 	0x9510, 0xffffffff, 0x00000100,
332 	0xaf04, 0xffffffff, 0x00000100,
333 	0xae04, 0xffffffff, 0x00000100,
334 	0x949c, 0xffffffff, 0x00000100,
335 	0x802c, 0xffffffff, 0xe0000000,
336 	0x9160, 0xffffffff, 0x00010000,
337 	0x9164, 0xffffffff, 0x00030002,
338 	0x9168, 0xffffffff, 0x00040007,
339 	0x916c, 0xffffffff, 0x00060005,
340 	0x9170, 0xffffffff, 0x00090008,
341 	0x9174, 0xffffffff, 0x00020001,
342 	0x9178, 0xffffffff, 0x00040003,
343 	0x917c, 0xffffffff, 0x00000007,
344 	0x9180, 0xffffffff, 0x00060005,
345 	0x9184, 0xffffffff, 0x00090008,
346 	0x9188, 0xffffffff, 0x00030002,
347 	0x918c, 0xffffffff, 0x00050004,
348 	0x9190, 0xffffffff, 0x00000008,
349 	0x9194, 0xffffffff, 0x00070006,
350 	0x9198, 0xffffffff, 0x000a0009,
351 	0x919c, 0xffffffff, 0x00040003,
352 	0x91a0, 0xffffffff, 0x00060005,
353 	0x91a4, 0xffffffff, 0x00000009,
354 	0x91a8, 0xffffffff, 0x00080007,
355 	0x91ac, 0xffffffff, 0x000b000a,
356 	0x91b0, 0xffffffff, 0x00050004,
357 	0x91b4, 0xffffffff, 0x00070006,
358 	0x91b8, 0xffffffff, 0x0008000b,
359 	0x91bc, 0xffffffff, 0x000a0009,
360 	0x91c0, 0xffffffff, 0x000d000c,
361 	0x91c4, 0xffffffff, 0x00060005,
362 	0x91c8, 0xffffffff, 0x00080007,
363 	0x91cc, 0xffffffff, 0x0000000b,
364 	0x91d0, 0xffffffff, 0x000a0009,
365 	0x91d4, 0xffffffff, 0x000d000c,
366 	0x91d8, 0xffffffff, 0x00070006,
367 	0x91dc, 0xffffffff, 0x00090008,
368 	0x91e0, 0xffffffff, 0x0000000c,
369 	0x91e4, 0xffffffff, 0x000b000a,
370 	0x91e8, 0xffffffff, 0x000e000d,
371 	0x91ec, 0xffffffff, 0x00080007,
372 	0x91f0, 0xffffffff, 0x000a0009,
373 	0x91f4, 0xffffffff, 0x0000000d,
374 	0x91f8, 0xffffffff, 0x000c000b,
375 	0x91fc, 0xffffffff, 0x000f000e,
376 	0x9200, 0xffffffff, 0x00090008,
377 	0x9204, 0xffffffff, 0x000b000a,
378 	0x9208, 0xffffffff, 0x000c000f,
379 	0x920c, 0xffffffff, 0x000e000d,
380 	0x9210, 0xffffffff, 0x00110010,
381 	0x9214, 0xffffffff, 0x000a0009,
382 	0x9218, 0xffffffff, 0x000c000b,
383 	0x921c, 0xffffffff, 0x0000000f,
384 	0x9220, 0xffffffff, 0x000e000d,
385 	0x9224, 0xffffffff, 0x00110010,
386 	0x9228, 0xffffffff, 0x000b000a,
387 	0x922c, 0xffffffff, 0x000d000c,
388 	0x9230, 0xffffffff, 0x00000010,
389 	0x9234, 0xffffffff, 0x000f000e,
390 	0x9238, 0xffffffff, 0x00120011,
391 	0x923c, 0xffffffff, 0x000c000b,
392 	0x9240, 0xffffffff, 0x000e000d,
393 	0x9244, 0xffffffff, 0x00000011,
394 	0x9248, 0xffffffff, 0x0010000f,
395 	0x924c, 0xffffffff, 0x00130012,
396 	0x9250, 0xffffffff, 0x000d000c,
397 	0x9254, 0xffffffff, 0x000f000e,
398 	0x9258, 0xffffffff, 0x00100013,
399 	0x925c, 0xffffffff, 0x00120011,
400 	0x9260, 0xffffffff, 0x00150014,
401 	0x9264, 0xffffffff, 0x000e000d,
402 	0x9268, 0xffffffff, 0x0010000f,
403 	0x926c, 0xffffffff, 0x00000013,
404 	0x9270, 0xffffffff, 0x00120011,
405 	0x9274, 0xffffffff, 0x00150014,
406 	0x9278, 0xffffffff, 0x000f000e,
407 	0x927c, 0xffffffff, 0x00110010,
408 	0x9280, 0xffffffff, 0x00000014,
409 	0x9284, 0xffffffff, 0x00130012,
410 	0x9288, 0xffffffff, 0x00160015,
411 	0x928c, 0xffffffff, 0x0010000f,
412 	0x9290, 0xffffffff, 0x00120011,
413 	0x9294, 0xffffffff, 0x00000015,
414 	0x9298, 0xffffffff, 0x00140013,
415 	0x929c, 0xffffffff, 0x00170016,
416 	0x9150, 0xffffffff, 0x96940200,
417 	0x8708, 0xffffffff, 0x00900100,
418 	0xc478, 0xffffffff, 0x00000080,
419 	0xc404, 0xffffffff, 0x0020003f,
420 	0x30, 0xffffffff, 0x0000001c,
421 	0x34, 0x000f0000, 0x000f0000,
422 	0x160c, 0xffffffff, 0x00000100,
423 	0x1024, 0xffffffff, 0x00000100,
424 	0x102c, 0x00000101, 0x00000000,
425 	0x20a8, 0xffffffff, 0x00000104,
426 	0x264c, 0x000c0000, 0x000c0000,
427 	0x2648, 0x000c0000, 0x000c0000,
428 	0x55e4, 0xff000fff, 0x00000100,
429 	0x55e8, 0x00000001, 0x00000001,
430 	0x2f50, 0x00000001, 0x00000001,
431 	0x30cc, 0xc0000fff, 0x00000104,
432 	0xc1e4, 0x00000001, 0x00000001,
433 	0xd0c0, 0xfffffff0, 0x00000100,
434 	0xd8c0, 0xfffffff0, 0x00000100
435 };
436 
437 static const u32 pitcairn_mgcg_cgcg_init[] =
438 {
439 	0xc400, 0xffffffff, 0xfffffffc,
440 	0x802c, 0xffffffff, 0xe0000000,
441 	0x9a60, 0xffffffff, 0x00000100,
442 	0x92a4, 0xffffffff, 0x00000100,
443 	0xc164, 0xffffffff, 0x00000100,
444 	0x9774, 0xffffffff, 0x00000100,
445 	0x8984, 0xffffffff, 0x06000100,
446 	0x8a18, 0xffffffff, 0x00000100,
447 	0x92a0, 0xffffffff, 0x00000100,
448 	0xc380, 0xffffffff, 0x00000100,
449 	0x8b28, 0xffffffff, 0x00000100,
450 	0x9144, 0xffffffff, 0x00000100,
451 	0x8d88, 0xffffffff, 0x00000100,
452 	0x8d8c, 0xffffffff, 0x00000100,
453 	0x9030, 0xffffffff, 0x00000100,
454 	0x9034, 0xffffffff, 0x00000100,
455 	0x9038, 0xffffffff, 0x00000100,
456 	0x903c, 0xffffffff, 0x00000100,
457 	0xad80, 0xffffffff, 0x00000100,
458 	0xac54, 0xffffffff, 0x00000100,
459 	0x897c, 0xffffffff, 0x06000100,
460 	0x9868, 0xffffffff, 0x00000100,
461 	0x9510, 0xffffffff, 0x00000100,
462 	0xaf04, 0xffffffff, 0x00000100,
463 	0xae04, 0xffffffff, 0x00000100,
464 	0x949c, 0xffffffff, 0x00000100,
465 	0x802c, 0xffffffff, 0xe0000000,
466 	0x9160, 0xffffffff, 0x00010000,
467 	0x9164, 0xffffffff, 0x00030002,
468 	0x9168, 0xffffffff, 0x00040007,
469 	0x916c, 0xffffffff, 0x00060005,
470 	0x9170, 0xffffffff, 0x00090008,
471 	0x9174, 0xffffffff, 0x00020001,
472 	0x9178, 0xffffffff, 0x00040003,
473 	0x917c, 0xffffffff, 0x00000007,
474 	0x9180, 0xffffffff, 0x00060005,
475 	0x9184, 0xffffffff, 0x00090008,
476 	0x9188, 0xffffffff, 0x00030002,
477 	0x918c, 0xffffffff, 0x00050004,
478 	0x9190, 0xffffffff, 0x00000008,
479 	0x9194, 0xffffffff, 0x00070006,
480 	0x9198, 0xffffffff, 0x000a0009,
481 	0x919c, 0xffffffff, 0x00040003,
482 	0x91a0, 0xffffffff, 0x00060005,
483 	0x91a4, 0xffffffff, 0x00000009,
484 	0x91a8, 0xffffffff, 0x00080007,
485 	0x91ac, 0xffffffff, 0x000b000a,
486 	0x91b0, 0xffffffff, 0x00050004,
487 	0x91b4, 0xffffffff, 0x00070006,
488 	0x91b8, 0xffffffff, 0x0008000b,
489 	0x91bc, 0xffffffff, 0x000a0009,
490 	0x91c0, 0xffffffff, 0x000d000c,
491 	0x9200, 0xffffffff, 0x00090008,
492 	0x9204, 0xffffffff, 0x000b000a,
493 	0x9208, 0xffffffff, 0x000c000f,
494 	0x920c, 0xffffffff, 0x000e000d,
495 	0x9210, 0xffffffff, 0x00110010,
496 	0x9214, 0xffffffff, 0x000a0009,
497 	0x9218, 0xffffffff, 0x000c000b,
498 	0x921c, 0xffffffff, 0x0000000f,
499 	0x9220, 0xffffffff, 0x000e000d,
500 	0x9224, 0xffffffff, 0x00110010,
501 	0x9228, 0xffffffff, 0x000b000a,
502 	0x922c, 0xffffffff, 0x000d000c,
503 	0x9230, 0xffffffff, 0x00000010,
504 	0x9234, 0xffffffff, 0x000f000e,
505 	0x9238, 0xffffffff, 0x00120011,
506 	0x923c, 0xffffffff, 0x000c000b,
507 	0x9240, 0xffffffff, 0x000e000d,
508 	0x9244, 0xffffffff, 0x00000011,
509 	0x9248, 0xffffffff, 0x0010000f,
510 	0x924c, 0xffffffff, 0x00130012,
511 	0x9250, 0xffffffff, 0x000d000c,
512 	0x9254, 0xffffffff, 0x000f000e,
513 	0x9258, 0xffffffff, 0x00100013,
514 	0x925c, 0xffffffff, 0x00120011,
515 	0x9260, 0xffffffff, 0x00150014,
516 	0x9150, 0xffffffff, 0x96940200,
517 	0x8708, 0xffffffff, 0x00900100,
518 	0xc478, 0xffffffff, 0x00000080,
519 	0xc404, 0xffffffff, 0x0020003f,
520 	0x30, 0xffffffff, 0x0000001c,
521 	0x34, 0x000f0000, 0x000f0000,
522 	0x160c, 0xffffffff, 0x00000100,
523 	0x1024, 0xffffffff, 0x00000100,
524 	0x102c, 0x00000101, 0x00000000,
525 	0x20a8, 0xffffffff, 0x00000104,
526 	0x55e4, 0xff000fff, 0x00000100,
527 	0x55e8, 0x00000001, 0x00000001,
528 	0x2f50, 0x00000001, 0x00000001,
529 	0x30cc, 0xc0000fff, 0x00000104,
530 	0xc1e4, 0x00000001, 0x00000001,
531 	0xd0c0, 0xfffffff0, 0x00000100,
532 	0xd8c0, 0xfffffff0, 0x00000100
533 };
534 
535 static const u32 verde_mgcg_cgcg_init[] =
536 {
537 	0xc400, 0xffffffff, 0xfffffffc,
538 	0x802c, 0xffffffff, 0xe0000000,
539 	0x9a60, 0xffffffff, 0x00000100,
540 	0x92a4, 0xffffffff, 0x00000100,
541 	0xc164, 0xffffffff, 0x00000100,
542 	0x9774, 0xffffffff, 0x00000100,
543 	0x8984, 0xffffffff, 0x06000100,
544 	0x8a18, 0xffffffff, 0x00000100,
545 	0x92a0, 0xffffffff, 0x00000100,
546 	0xc380, 0xffffffff, 0x00000100,
547 	0x8b28, 0xffffffff, 0x00000100,
548 	0x9144, 0xffffffff, 0x00000100,
549 	0x8d88, 0xffffffff, 0x00000100,
550 	0x8d8c, 0xffffffff, 0x00000100,
551 	0x9030, 0xffffffff, 0x00000100,
552 	0x9034, 0xffffffff, 0x00000100,
553 	0x9038, 0xffffffff, 0x00000100,
554 	0x903c, 0xffffffff, 0x00000100,
555 	0xad80, 0xffffffff, 0x00000100,
556 	0xac54, 0xffffffff, 0x00000100,
557 	0x897c, 0xffffffff, 0x06000100,
558 	0x9868, 0xffffffff, 0x00000100,
559 	0x9510, 0xffffffff, 0x00000100,
560 	0xaf04, 0xffffffff, 0x00000100,
561 	0xae04, 0xffffffff, 0x00000100,
562 	0x949c, 0xffffffff, 0x00000100,
563 	0x802c, 0xffffffff, 0xe0000000,
564 	0x9160, 0xffffffff, 0x00010000,
565 	0x9164, 0xffffffff, 0x00030002,
566 	0x9168, 0xffffffff, 0x00040007,
567 	0x916c, 0xffffffff, 0x00060005,
568 	0x9170, 0xffffffff, 0x00090008,
569 	0x9174, 0xffffffff, 0x00020001,
570 	0x9178, 0xffffffff, 0x00040003,
571 	0x917c, 0xffffffff, 0x00000007,
572 	0x9180, 0xffffffff, 0x00060005,
573 	0x9184, 0xffffffff, 0x00090008,
574 	0x9188, 0xffffffff, 0x00030002,
575 	0x918c, 0xffffffff, 0x00050004,
576 	0x9190, 0xffffffff, 0x00000008,
577 	0x9194, 0xffffffff, 0x00070006,
578 	0x9198, 0xffffffff, 0x000a0009,
579 	0x919c, 0xffffffff, 0x00040003,
580 	0x91a0, 0xffffffff, 0x00060005,
581 	0x91a4, 0xffffffff, 0x00000009,
582 	0x91a8, 0xffffffff, 0x00080007,
583 	0x91ac, 0xffffffff, 0x000b000a,
584 	0x91b0, 0xffffffff, 0x00050004,
585 	0x91b4, 0xffffffff, 0x00070006,
586 	0x91b8, 0xffffffff, 0x0008000b,
587 	0x91bc, 0xffffffff, 0x000a0009,
588 	0x91c0, 0xffffffff, 0x000d000c,
589 	0x9200, 0xffffffff, 0x00090008,
590 	0x9204, 0xffffffff, 0x000b000a,
591 	0x9208, 0xffffffff, 0x000c000f,
592 	0x920c, 0xffffffff, 0x000e000d,
593 	0x9210, 0xffffffff, 0x00110010,
594 	0x9214, 0xffffffff, 0x000a0009,
595 	0x9218, 0xffffffff, 0x000c000b,
596 	0x921c, 0xffffffff, 0x0000000f,
597 	0x9220, 0xffffffff, 0x000e000d,
598 	0x9224, 0xffffffff, 0x00110010,
599 	0x9228, 0xffffffff, 0x000b000a,
600 	0x922c, 0xffffffff, 0x000d000c,
601 	0x9230, 0xffffffff, 0x00000010,
602 	0x9234, 0xffffffff, 0x000f000e,
603 	0x9238, 0xffffffff, 0x00120011,
604 	0x923c, 0xffffffff, 0x000c000b,
605 	0x9240, 0xffffffff, 0x000e000d,
606 	0x9244, 0xffffffff, 0x00000011,
607 	0x9248, 0xffffffff, 0x0010000f,
608 	0x924c, 0xffffffff, 0x00130012,
609 	0x9250, 0xffffffff, 0x000d000c,
610 	0x9254, 0xffffffff, 0x000f000e,
611 	0x9258, 0xffffffff, 0x00100013,
612 	0x925c, 0xffffffff, 0x00120011,
613 	0x9260, 0xffffffff, 0x00150014,
614 	0x9150, 0xffffffff, 0x96940200,
615 	0x8708, 0xffffffff, 0x00900100,
616 	0xc478, 0xffffffff, 0x00000080,
617 	0xc404, 0xffffffff, 0x0020003f,
618 	0x30, 0xffffffff, 0x0000001c,
619 	0x34, 0x000f0000, 0x000f0000,
620 	0x160c, 0xffffffff, 0x00000100,
621 	0x1024, 0xffffffff, 0x00000100,
622 	0x102c, 0x00000101, 0x00000000,
623 	0x20a8, 0xffffffff, 0x00000104,
624 	0x264c, 0x000c0000, 0x000c0000,
625 	0x2648, 0x000c0000, 0x000c0000,
626 	0x55e4, 0xff000fff, 0x00000100,
627 	0x55e8, 0x00000001, 0x00000001,
628 	0x2f50, 0x00000001, 0x00000001,
629 	0x30cc, 0xc0000fff, 0x00000104,
630 	0xc1e4, 0x00000001, 0x00000001,
631 	0xd0c0, 0xfffffff0, 0x00000100,
632 	0xd8c0, 0xfffffff0, 0x00000100
633 };
634 
635 static const u32 oland_mgcg_cgcg_init[] =
636 {
637 	0xc400, 0xffffffff, 0xfffffffc,
638 	0x802c, 0xffffffff, 0xe0000000,
639 	0x9a60, 0xffffffff, 0x00000100,
640 	0x92a4, 0xffffffff, 0x00000100,
641 	0xc164, 0xffffffff, 0x00000100,
642 	0x9774, 0xffffffff, 0x00000100,
643 	0x8984, 0xffffffff, 0x06000100,
644 	0x8a18, 0xffffffff, 0x00000100,
645 	0x92a0, 0xffffffff, 0x00000100,
646 	0xc380, 0xffffffff, 0x00000100,
647 	0x8b28, 0xffffffff, 0x00000100,
648 	0x9144, 0xffffffff, 0x00000100,
649 	0x8d88, 0xffffffff, 0x00000100,
650 	0x8d8c, 0xffffffff, 0x00000100,
651 	0x9030, 0xffffffff, 0x00000100,
652 	0x9034, 0xffffffff, 0x00000100,
653 	0x9038, 0xffffffff, 0x00000100,
654 	0x903c, 0xffffffff, 0x00000100,
655 	0xad80, 0xffffffff, 0x00000100,
656 	0xac54, 0xffffffff, 0x00000100,
657 	0x897c, 0xffffffff, 0x06000100,
658 	0x9868, 0xffffffff, 0x00000100,
659 	0x9510, 0xffffffff, 0x00000100,
660 	0xaf04, 0xffffffff, 0x00000100,
661 	0xae04, 0xffffffff, 0x00000100,
662 	0x949c, 0xffffffff, 0x00000100,
663 	0x802c, 0xffffffff, 0xe0000000,
664 	0x9160, 0xffffffff, 0x00010000,
665 	0x9164, 0xffffffff, 0x00030002,
666 	0x9168, 0xffffffff, 0x00040007,
667 	0x916c, 0xffffffff, 0x00060005,
668 	0x9170, 0xffffffff, 0x00090008,
669 	0x9174, 0xffffffff, 0x00020001,
670 	0x9178, 0xffffffff, 0x00040003,
671 	0x917c, 0xffffffff, 0x00000007,
672 	0x9180, 0xffffffff, 0x00060005,
673 	0x9184, 0xffffffff, 0x00090008,
674 	0x9188, 0xffffffff, 0x00030002,
675 	0x918c, 0xffffffff, 0x00050004,
676 	0x9190, 0xffffffff, 0x00000008,
677 	0x9194, 0xffffffff, 0x00070006,
678 	0x9198, 0xffffffff, 0x000a0009,
679 	0x919c, 0xffffffff, 0x00040003,
680 	0x91a0, 0xffffffff, 0x00060005,
681 	0x91a4, 0xffffffff, 0x00000009,
682 	0x91a8, 0xffffffff, 0x00080007,
683 	0x91ac, 0xffffffff, 0x000b000a,
684 	0x91b0, 0xffffffff, 0x00050004,
685 	0x91b4, 0xffffffff, 0x00070006,
686 	0x91b8, 0xffffffff, 0x0008000b,
687 	0x91bc, 0xffffffff, 0x000a0009,
688 	0x91c0, 0xffffffff, 0x000d000c,
689 	0x91c4, 0xffffffff, 0x00060005,
690 	0x91c8, 0xffffffff, 0x00080007,
691 	0x91cc, 0xffffffff, 0x0000000b,
692 	0x91d0, 0xffffffff, 0x000a0009,
693 	0x91d4, 0xffffffff, 0x000d000c,
694 	0x9150, 0xffffffff, 0x96940200,
695 	0x8708, 0xffffffff, 0x00900100,
696 	0xc478, 0xffffffff, 0x00000080,
697 	0xc404, 0xffffffff, 0x0020003f,
698 	0x30, 0xffffffff, 0x0000001c,
699 	0x34, 0x000f0000, 0x000f0000,
700 	0x160c, 0xffffffff, 0x00000100,
701 	0x1024, 0xffffffff, 0x00000100,
702 	0x102c, 0x00000101, 0x00000000,
703 	0x20a8, 0xffffffff, 0x00000104,
704 	0x264c, 0x000c0000, 0x000c0000,
705 	0x2648, 0x000c0000, 0x000c0000,
706 	0x55e4, 0xff000fff, 0x00000100,
707 	0x55e8, 0x00000001, 0x00000001,
708 	0x2f50, 0x00000001, 0x00000001,
709 	0x30cc, 0xc0000fff, 0x00000104,
710 	0xc1e4, 0x00000001, 0x00000001,
711 	0xd0c0, 0xfffffff0, 0x00000100,
712 	0xd8c0, 0xfffffff0, 0x00000100
713 };
714 
715 static const u32 hainan_mgcg_cgcg_init[] =
716 {
717 	0xc400, 0xffffffff, 0xfffffffc,
718 	0x802c, 0xffffffff, 0xe0000000,
719 	0x9a60, 0xffffffff, 0x00000100,
720 	0x92a4, 0xffffffff, 0x00000100,
721 	0xc164, 0xffffffff, 0x00000100,
722 	0x9774, 0xffffffff, 0x00000100,
723 	0x8984, 0xffffffff, 0x06000100,
724 	0x8a18, 0xffffffff, 0x00000100,
725 	0x92a0, 0xffffffff, 0x00000100,
726 	0xc380, 0xffffffff, 0x00000100,
727 	0x8b28, 0xffffffff, 0x00000100,
728 	0x9144, 0xffffffff, 0x00000100,
729 	0x8d88, 0xffffffff, 0x00000100,
730 	0x8d8c, 0xffffffff, 0x00000100,
731 	0x9030, 0xffffffff, 0x00000100,
732 	0x9034, 0xffffffff, 0x00000100,
733 	0x9038, 0xffffffff, 0x00000100,
734 	0x903c, 0xffffffff, 0x00000100,
735 	0xad80, 0xffffffff, 0x00000100,
736 	0xac54, 0xffffffff, 0x00000100,
737 	0x897c, 0xffffffff, 0x06000100,
738 	0x9868, 0xffffffff, 0x00000100,
739 	0x9510, 0xffffffff, 0x00000100,
740 	0xaf04, 0xffffffff, 0x00000100,
741 	0xae04, 0xffffffff, 0x00000100,
742 	0x949c, 0xffffffff, 0x00000100,
743 	0x802c, 0xffffffff, 0xe0000000,
744 	0x9160, 0xffffffff, 0x00010000,
745 	0x9164, 0xffffffff, 0x00030002,
746 	0x9168, 0xffffffff, 0x00040007,
747 	0x916c, 0xffffffff, 0x00060005,
748 	0x9170, 0xffffffff, 0x00090008,
749 	0x9174, 0xffffffff, 0x00020001,
750 	0x9178, 0xffffffff, 0x00040003,
751 	0x917c, 0xffffffff, 0x00000007,
752 	0x9180, 0xffffffff, 0x00060005,
753 	0x9184, 0xffffffff, 0x00090008,
754 	0x9188, 0xffffffff, 0x00030002,
755 	0x918c, 0xffffffff, 0x00050004,
756 	0x9190, 0xffffffff, 0x00000008,
757 	0x9194, 0xffffffff, 0x00070006,
758 	0x9198, 0xffffffff, 0x000a0009,
759 	0x919c, 0xffffffff, 0x00040003,
760 	0x91a0, 0xffffffff, 0x00060005,
761 	0x91a4, 0xffffffff, 0x00000009,
762 	0x91a8, 0xffffffff, 0x00080007,
763 	0x91ac, 0xffffffff, 0x000b000a,
764 	0x91b0, 0xffffffff, 0x00050004,
765 	0x91b4, 0xffffffff, 0x00070006,
766 	0x91b8, 0xffffffff, 0x0008000b,
767 	0x91bc, 0xffffffff, 0x000a0009,
768 	0x91c0, 0xffffffff, 0x000d000c,
769 	0x91c4, 0xffffffff, 0x00060005,
770 	0x91c8, 0xffffffff, 0x00080007,
771 	0x91cc, 0xffffffff, 0x0000000b,
772 	0x91d0, 0xffffffff, 0x000a0009,
773 	0x91d4, 0xffffffff, 0x000d000c,
774 	0x9150, 0xffffffff, 0x96940200,
775 	0x8708, 0xffffffff, 0x00900100,
776 	0xc478, 0xffffffff, 0x00000080,
777 	0xc404, 0xffffffff, 0x0020003f,
778 	0x30, 0xffffffff, 0x0000001c,
779 	0x34, 0x000f0000, 0x000f0000,
780 	0x160c, 0xffffffff, 0x00000100,
781 	0x1024, 0xffffffff, 0x00000100,
782 	0x20a8, 0xffffffff, 0x00000104,
783 	0x264c, 0x000c0000, 0x000c0000,
784 	0x2648, 0x000c0000, 0x000c0000,
785 	0x2f50, 0x00000001, 0x00000001,
786 	0x30cc, 0xc0000fff, 0x00000104,
787 	0xc1e4, 0x00000001, 0x00000001,
788 	0xd0c0, 0xfffffff0, 0x00000100,
789 	0xd8c0, 0xfffffff0, 0x00000100
790 };
791 
792 static u32 verde_pg_init[] =
793 {
794 	0x353c, 0xffffffff, 0x40000,
795 	0x3538, 0xffffffff, 0x200010ff,
796 	0x353c, 0xffffffff, 0x0,
797 	0x353c, 0xffffffff, 0x0,
798 	0x353c, 0xffffffff, 0x0,
799 	0x353c, 0xffffffff, 0x0,
800 	0x353c, 0xffffffff, 0x0,
801 	0x353c, 0xffffffff, 0x7007,
802 	0x3538, 0xffffffff, 0x300010ff,
803 	0x353c, 0xffffffff, 0x0,
804 	0x353c, 0xffffffff, 0x0,
805 	0x353c, 0xffffffff, 0x0,
806 	0x353c, 0xffffffff, 0x0,
807 	0x353c, 0xffffffff, 0x0,
808 	0x353c, 0xffffffff, 0x400000,
809 	0x3538, 0xffffffff, 0x100010ff,
810 	0x353c, 0xffffffff, 0x0,
811 	0x353c, 0xffffffff, 0x0,
812 	0x353c, 0xffffffff, 0x0,
813 	0x353c, 0xffffffff, 0x0,
814 	0x353c, 0xffffffff, 0x0,
815 	0x353c, 0xffffffff, 0x120200,
816 	0x3538, 0xffffffff, 0x500010ff,
817 	0x353c, 0xffffffff, 0x0,
818 	0x353c, 0xffffffff, 0x0,
819 	0x353c, 0xffffffff, 0x0,
820 	0x353c, 0xffffffff, 0x0,
821 	0x353c, 0xffffffff, 0x0,
822 	0x353c, 0xffffffff, 0x1e1e16,
823 	0x3538, 0xffffffff, 0x600010ff,
824 	0x353c, 0xffffffff, 0x0,
825 	0x353c, 0xffffffff, 0x0,
826 	0x353c, 0xffffffff, 0x0,
827 	0x353c, 0xffffffff, 0x0,
828 	0x353c, 0xffffffff, 0x0,
829 	0x353c, 0xffffffff, 0x171f1e,
830 	0x3538, 0xffffffff, 0x700010ff,
831 	0x353c, 0xffffffff, 0x0,
832 	0x353c, 0xffffffff, 0x0,
833 	0x353c, 0xffffffff, 0x0,
834 	0x353c, 0xffffffff, 0x0,
835 	0x353c, 0xffffffff, 0x0,
836 	0x353c, 0xffffffff, 0x0,
837 	0x3538, 0xffffffff, 0x9ff,
838 	0x3500, 0xffffffff, 0x0,
839 	0x3504, 0xffffffff, 0x10000800,
840 	0x3504, 0xffffffff, 0xf,
841 	0x3504, 0xffffffff, 0xf,
842 	0x3500, 0xffffffff, 0x4,
843 	0x3504, 0xffffffff, 0x1000051e,
844 	0x3504, 0xffffffff, 0xffff,
845 	0x3504, 0xffffffff, 0xffff,
846 	0x3500, 0xffffffff, 0x8,
847 	0x3504, 0xffffffff, 0x80500,
848 	0x3500, 0xffffffff, 0x12,
849 	0x3504, 0xffffffff, 0x9050c,
850 	0x3500, 0xffffffff, 0x1d,
851 	0x3504, 0xffffffff, 0xb052c,
852 	0x3500, 0xffffffff, 0x2a,
853 	0x3504, 0xffffffff, 0x1053e,
854 	0x3500, 0xffffffff, 0x2d,
855 	0x3504, 0xffffffff, 0x10546,
856 	0x3500, 0xffffffff, 0x30,
857 	0x3504, 0xffffffff, 0xa054e,
858 	0x3500, 0xffffffff, 0x3c,
859 	0x3504, 0xffffffff, 0x1055f,
860 	0x3500, 0xffffffff, 0x3f,
861 	0x3504, 0xffffffff, 0x10567,
862 	0x3500, 0xffffffff, 0x42,
863 	0x3504, 0xffffffff, 0x1056f,
864 	0x3500, 0xffffffff, 0x45,
865 	0x3504, 0xffffffff, 0x10572,
866 	0x3500, 0xffffffff, 0x48,
867 	0x3504, 0xffffffff, 0x20575,
868 	0x3500, 0xffffffff, 0x4c,
869 	0x3504, 0xffffffff, 0x190801,
870 	0x3500, 0xffffffff, 0x67,
871 	0x3504, 0xffffffff, 0x1082a,
872 	0x3500, 0xffffffff, 0x6a,
873 	0x3504, 0xffffffff, 0x1b082d,
874 	0x3500, 0xffffffff, 0x87,
875 	0x3504, 0xffffffff, 0x310851,
876 	0x3500, 0xffffffff, 0xba,
877 	0x3504, 0xffffffff, 0x891,
878 	0x3500, 0xffffffff, 0xbc,
879 	0x3504, 0xffffffff, 0x893,
880 	0x3500, 0xffffffff, 0xbe,
881 	0x3504, 0xffffffff, 0x20895,
882 	0x3500, 0xffffffff, 0xc2,
883 	0x3504, 0xffffffff, 0x20899,
884 	0x3500, 0xffffffff, 0xc6,
885 	0x3504, 0xffffffff, 0x2089d,
886 	0x3500, 0xffffffff, 0xca,
887 	0x3504, 0xffffffff, 0x8a1,
888 	0x3500, 0xffffffff, 0xcc,
889 	0x3504, 0xffffffff, 0x8a3,
890 	0x3500, 0xffffffff, 0xce,
891 	0x3504, 0xffffffff, 0x308a5,
892 	0x3500, 0xffffffff, 0xd3,
893 	0x3504, 0xffffffff, 0x6d08cd,
894 	0x3500, 0xffffffff, 0x142,
895 	0x3504, 0xffffffff, 0x2000095a,
896 	0x3504, 0xffffffff, 0x1,
897 	0x3500, 0xffffffff, 0x144,
898 	0x3504, 0xffffffff, 0x301f095b,
899 	0x3500, 0xffffffff, 0x165,
900 	0x3504, 0xffffffff, 0xc094d,
901 	0x3500, 0xffffffff, 0x173,
902 	0x3504, 0xffffffff, 0xf096d,
903 	0x3500, 0xffffffff, 0x184,
904 	0x3504, 0xffffffff, 0x15097f,
905 	0x3500, 0xffffffff, 0x19b,
906 	0x3504, 0xffffffff, 0xc0998,
907 	0x3500, 0xffffffff, 0x1a9,
908 	0x3504, 0xffffffff, 0x409a7,
909 	0x3500, 0xffffffff, 0x1af,
910 	0x3504, 0xffffffff, 0xcdc,
911 	0x3500, 0xffffffff, 0x1b1,
912 	0x3504, 0xffffffff, 0x800,
913 	0x3508, 0xffffffff, 0x6c9b2000,
914 	0x3510, 0xfc00, 0x2000,
915 	0x3544, 0xffffffff, 0xfc0,
916 	0x28d4, 0x00000100, 0x100
917 };
918 
919 static void si_init_golden_registers(struct radeon_device *rdev)
920 {
921 	switch (rdev->family) {
922 	case CHIP_TAHITI:
923 		radeon_program_register_sequence(rdev,
924 						 tahiti_golden_registers,
925 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
926 		radeon_program_register_sequence(rdev,
927 						 tahiti_golden_rlc_registers,
928 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
929 		radeon_program_register_sequence(rdev,
930 						 tahiti_mgcg_cgcg_init,
931 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
932 		radeon_program_register_sequence(rdev,
933 						 tahiti_golden_registers2,
934 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
935 		break;
936 	case CHIP_PITCAIRN:
937 		radeon_program_register_sequence(rdev,
938 						 pitcairn_golden_registers,
939 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
940 		radeon_program_register_sequence(rdev,
941 						 pitcairn_golden_rlc_registers,
942 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
943 		radeon_program_register_sequence(rdev,
944 						 pitcairn_mgcg_cgcg_init,
945 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
946 		break;
947 	case CHIP_VERDE:
948 		radeon_program_register_sequence(rdev,
949 						 verde_golden_registers,
950 						 (const u32)ARRAY_SIZE(verde_golden_registers));
951 		radeon_program_register_sequence(rdev,
952 						 verde_golden_rlc_registers,
953 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
954 		radeon_program_register_sequence(rdev,
955 						 verde_mgcg_cgcg_init,
956 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
957 		radeon_program_register_sequence(rdev,
958 						 verde_pg_init,
959 						 (const u32)ARRAY_SIZE(verde_pg_init));
960 		break;
961 	case CHIP_OLAND:
962 		radeon_program_register_sequence(rdev,
963 						 oland_golden_registers,
964 						 (const u32)ARRAY_SIZE(oland_golden_registers));
965 		radeon_program_register_sequence(rdev,
966 						 oland_golden_rlc_registers,
967 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
968 		radeon_program_register_sequence(rdev,
969 						 oland_mgcg_cgcg_init,
970 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
971 		break;
972 	case CHIP_HAINAN:
973 		radeon_program_register_sequence(rdev,
974 						 hainan_golden_registers,
975 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
976 		radeon_program_register_sequence(rdev,
977 						 hainan_golden_registers2,
978 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
979 		radeon_program_register_sequence(rdev,
980 						 hainan_mgcg_cgcg_init,
981 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
982 		break;
983 	default:
984 		break;
985 	}
986 }
987 
988 #define PCIE_BUS_CLK                10000
989 #define TCLK                        (PCIE_BUS_CLK / 10)
990 
991 /**
992  * si_get_xclk - get the xclk
993  *
994  * @rdev: radeon_device pointer
995  *
996  * Returns the reference clock used by the gfx engine
997  * (SI).
998  */
999 u32 si_get_xclk(struct radeon_device *rdev)
1000 {
1001         u32 reference_clock = rdev->clock.spll.reference_freq;
1002 	u32 tmp;
1003 
1004 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1005 	if (tmp & MUX_TCLK_TO_XCLK)
1006 		return TCLK;
1007 
1008 	tmp = RREG32(CG_CLKPIN_CNTL);
1009 	if (tmp & XTALIN_DIVIDE)
1010 		return reference_clock / 4;
1011 
1012 	return reference_clock;
1013 }
1014 
1015 /* get temperature in millidegrees */
1016 int si_get_temp(struct radeon_device *rdev)
1017 {
1018 	u32 temp;
1019 	int actual_temp = 0;
1020 
1021 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1022 		CTF_TEMP_SHIFT;
1023 
1024 	if (temp & 0x200)
1025 		actual_temp = 255;
1026 	else
1027 		actual_temp = temp & 0x1ff;
1028 
1029 	actual_temp = (actual_temp * 1000);
1030 
1031 	return actual_temp;
1032 }
1033 
1034 #define TAHITI_IO_MC_REGS_SIZE 36
1035 
1036 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1037 	{0x0000006f, 0x03044000},
1038 	{0x00000070, 0x0480c018},
1039 	{0x00000071, 0x00000040},
1040 	{0x00000072, 0x01000000},
1041 	{0x00000074, 0x000000ff},
1042 	{0x00000075, 0x00143400},
1043 	{0x00000076, 0x08ec0800},
1044 	{0x00000077, 0x040000cc},
1045 	{0x00000079, 0x00000000},
1046 	{0x0000007a, 0x21000409},
1047 	{0x0000007c, 0x00000000},
1048 	{0x0000007d, 0xe8000000},
1049 	{0x0000007e, 0x044408a8},
1050 	{0x0000007f, 0x00000003},
1051 	{0x00000080, 0x00000000},
1052 	{0x00000081, 0x01000000},
1053 	{0x00000082, 0x02000000},
1054 	{0x00000083, 0x00000000},
1055 	{0x00000084, 0xe3f3e4f4},
1056 	{0x00000085, 0x00052024},
1057 	{0x00000087, 0x00000000},
1058 	{0x00000088, 0x66036603},
1059 	{0x00000089, 0x01000000},
1060 	{0x0000008b, 0x1c0a0000},
1061 	{0x0000008c, 0xff010000},
1062 	{0x0000008e, 0xffffefff},
1063 	{0x0000008f, 0xfff3efff},
1064 	{0x00000090, 0xfff3efbf},
1065 	{0x00000094, 0x00101101},
1066 	{0x00000095, 0x00000fff},
1067 	{0x00000096, 0x00116fff},
1068 	{0x00000097, 0x60010000},
1069 	{0x00000098, 0x10010000},
1070 	{0x00000099, 0x00006000},
1071 	{0x0000009a, 0x00001000},
1072 	{0x0000009f, 0x00a77400}
1073 };
1074 
1075 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1076 	{0x0000006f, 0x03044000},
1077 	{0x00000070, 0x0480c018},
1078 	{0x00000071, 0x00000040},
1079 	{0x00000072, 0x01000000},
1080 	{0x00000074, 0x000000ff},
1081 	{0x00000075, 0x00143400},
1082 	{0x00000076, 0x08ec0800},
1083 	{0x00000077, 0x040000cc},
1084 	{0x00000079, 0x00000000},
1085 	{0x0000007a, 0x21000409},
1086 	{0x0000007c, 0x00000000},
1087 	{0x0000007d, 0xe8000000},
1088 	{0x0000007e, 0x044408a8},
1089 	{0x0000007f, 0x00000003},
1090 	{0x00000080, 0x00000000},
1091 	{0x00000081, 0x01000000},
1092 	{0x00000082, 0x02000000},
1093 	{0x00000083, 0x00000000},
1094 	{0x00000084, 0xe3f3e4f4},
1095 	{0x00000085, 0x00052024},
1096 	{0x00000087, 0x00000000},
1097 	{0x00000088, 0x66036603},
1098 	{0x00000089, 0x01000000},
1099 	{0x0000008b, 0x1c0a0000},
1100 	{0x0000008c, 0xff010000},
1101 	{0x0000008e, 0xffffefff},
1102 	{0x0000008f, 0xfff3efff},
1103 	{0x00000090, 0xfff3efbf},
1104 	{0x00000094, 0x00101101},
1105 	{0x00000095, 0x00000fff},
1106 	{0x00000096, 0x00116fff},
1107 	{0x00000097, 0x60010000},
1108 	{0x00000098, 0x10010000},
1109 	{0x00000099, 0x00006000},
1110 	{0x0000009a, 0x00001000},
1111 	{0x0000009f, 0x00a47400}
1112 };
1113 
1114 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1115 	{0x0000006f, 0x03044000},
1116 	{0x00000070, 0x0480c018},
1117 	{0x00000071, 0x00000040},
1118 	{0x00000072, 0x01000000},
1119 	{0x00000074, 0x000000ff},
1120 	{0x00000075, 0x00143400},
1121 	{0x00000076, 0x08ec0800},
1122 	{0x00000077, 0x040000cc},
1123 	{0x00000079, 0x00000000},
1124 	{0x0000007a, 0x21000409},
1125 	{0x0000007c, 0x00000000},
1126 	{0x0000007d, 0xe8000000},
1127 	{0x0000007e, 0x044408a8},
1128 	{0x0000007f, 0x00000003},
1129 	{0x00000080, 0x00000000},
1130 	{0x00000081, 0x01000000},
1131 	{0x00000082, 0x02000000},
1132 	{0x00000083, 0x00000000},
1133 	{0x00000084, 0xe3f3e4f4},
1134 	{0x00000085, 0x00052024},
1135 	{0x00000087, 0x00000000},
1136 	{0x00000088, 0x66036603},
1137 	{0x00000089, 0x01000000},
1138 	{0x0000008b, 0x1c0a0000},
1139 	{0x0000008c, 0xff010000},
1140 	{0x0000008e, 0xffffefff},
1141 	{0x0000008f, 0xfff3efff},
1142 	{0x00000090, 0xfff3efbf},
1143 	{0x00000094, 0x00101101},
1144 	{0x00000095, 0x00000fff},
1145 	{0x00000096, 0x00116fff},
1146 	{0x00000097, 0x60010000},
1147 	{0x00000098, 0x10010000},
1148 	{0x00000099, 0x00006000},
1149 	{0x0000009a, 0x00001000},
1150 	{0x0000009f, 0x00a37400}
1151 };
1152 
1153 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1154 	{0x0000006f, 0x03044000},
1155 	{0x00000070, 0x0480c018},
1156 	{0x00000071, 0x00000040},
1157 	{0x00000072, 0x01000000},
1158 	{0x00000074, 0x000000ff},
1159 	{0x00000075, 0x00143400},
1160 	{0x00000076, 0x08ec0800},
1161 	{0x00000077, 0x040000cc},
1162 	{0x00000079, 0x00000000},
1163 	{0x0000007a, 0x21000409},
1164 	{0x0000007c, 0x00000000},
1165 	{0x0000007d, 0xe8000000},
1166 	{0x0000007e, 0x044408a8},
1167 	{0x0000007f, 0x00000003},
1168 	{0x00000080, 0x00000000},
1169 	{0x00000081, 0x01000000},
1170 	{0x00000082, 0x02000000},
1171 	{0x00000083, 0x00000000},
1172 	{0x00000084, 0xe3f3e4f4},
1173 	{0x00000085, 0x00052024},
1174 	{0x00000087, 0x00000000},
1175 	{0x00000088, 0x66036603},
1176 	{0x00000089, 0x01000000},
1177 	{0x0000008b, 0x1c0a0000},
1178 	{0x0000008c, 0xff010000},
1179 	{0x0000008e, 0xffffefff},
1180 	{0x0000008f, 0xfff3efff},
1181 	{0x00000090, 0xfff3efbf},
1182 	{0x00000094, 0x00101101},
1183 	{0x00000095, 0x00000fff},
1184 	{0x00000096, 0x00116fff},
1185 	{0x00000097, 0x60010000},
1186 	{0x00000098, 0x10010000},
1187 	{0x00000099, 0x00006000},
1188 	{0x0000009a, 0x00001000},
1189 	{0x0000009f, 0x00a17730}
1190 };
1191 
1192 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1193 	{0x0000006f, 0x03044000},
1194 	{0x00000070, 0x0480c018},
1195 	{0x00000071, 0x00000040},
1196 	{0x00000072, 0x01000000},
1197 	{0x00000074, 0x000000ff},
1198 	{0x00000075, 0x00143400},
1199 	{0x00000076, 0x08ec0800},
1200 	{0x00000077, 0x040000cc},
1201 	{0x00000079, 0x00000000},
1202 	{0x0000007a, 0x21000409},
1203 	{0x0000007c, 0x00000000},
1204 	{0x0000007d, 0xe8000000},
1205 	{0x0000007e, 0x044408a8},
1206 	{0x0000007f, 0x00000003},
1207 	{0x00000080, 0x00000000},
1208 	{0x00000081, 0x01000000},
1209 	{0x00000082, 0x02000000},
1210 	{0x00000083, 0x00000000},
1211 	{0x00000084, 0xe3f3e4f4},
1212 	{0x00000085, 0x00052024},
1213 	{0x00000087, 0x00000000},
1214 	{0x00000088, 0x66036603},
1215 	{0x00000089, 0x01000000},
1216 	{0x0000008b, 0x1c0a0000},
1217 	{0x0000008c, 0xff010000},
1218 	{0x0000008e, 0xffffefff},
1219 	{0x0000008f, 0xfff3efff},
1220 	{0x00000090, 0xfff3efbf},
1221 	{0x00000094, 0x00101101},
1222 	{0x00000095, 0x00000fff},
1223 	{0x00000096, 0x00116fff},
1224 	{0x00000097, 0x60010000},
1225 	{0x00000098, 0x10010000},
1226 	{0x00000099, 0x00006000},
1227 	{0x0000009a, 0x00001000},
1228 	{0x0000009f, 0x00a07730}
1229 };
1230 
1231 /* ucode loading */
1232 static int si_mc_load_microcode(struct radeon_device *rdev)
1233 {
1234 	const __be32 *fw_data;
1235 	u32 running, blackout = 0;
1236 	u32 *io_mc_regs;
1237 	int i, ucode_size, regs_size;
1238 
1239 	if (!rdev->mc_fw)
1240 		return -EINVAL;
1241 
1242 	switch (rdev->family) {
1243 	case CHIP_TAHITI:
1244 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1245 		ucode_size = SI_MC_UCODE_SIZE;
1246 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1247 		break;
1248 	case CHIP_PITCAIRN:
1249 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1250 		ucode_size = SI_MC_UCODE_SIZE;
1251 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1252 		break;
1253 	case CHIP_VERDE:
1254 	default:
1255 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1256 		ucode_size = SI_MC_UCODE_SIZE;
1257 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1258 		break;
1259 	case CHIP_OLAND:
1260 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1261 		ucode_size = OLAND_MC_UCODE_SIZE;
1262 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1263 		break;
1264 	case CHIP_HAINAN:
1265 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1266 		ucode_size = OLAND_MC_UCODE_SIZE;
1267 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1268 		break;
1269 	}
1270 
1271 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1272 
1273 	if (running == 0) {
1274 		if (running) {
1275 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1276 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1277 		}
1278 
1279 		/* reset the engine and set to writable */
1280 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1281 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1282 
1283 		/* load mc io regs */
1284 		for (i = 0; i < regs_size; i++) {
1285 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1286 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1287 		}
1288 		/* load the MC ucode */
1289 		fw_data = (const __be32 *)rdev->mc_fw->data;
1290 		for (i = 0; i < ucode_size; i++)
1291 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1292 
1293 		/* put the engine back into the active state */
1294 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1295 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1296 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1297 
1298 		/* wait for training to complete */
1299 		for (i = 0; i < rdev->usec_timeout; i++) {
1300 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1301 				break;
1302 			udelay(1);
1303 		}
1304 		for (i = 0; i < rdev->usec_timeout; i++) {
1305 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1306 				break;
1307 			udelay(1);
1308 		}
1309 
1310 		if (running)
1311 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1312 	}
1313 
1314 	return 0;
1315 }
1316 
1317 static int si_init_microcode(struct radeon_device *rdev)
1318 {
1319 	struct platform_device *pdev;
1320 	const char *chip_name;
1321 	const char *rlc_chip_name;
1322 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1323 	char fw_name[30];
1324 	int err;
1325 
1326 	DRM_DEBUG("\n");
1327 
1328 	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1329 	err = IS_ERR(pdev);
1330 	if (err) {
1331 		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1332 		return -EINVAL;
1333 	}
1334 
1335 	switch (rdev->family) {
1336 	case CHIP_TAHITI:
1337 		chip_name = "TAHITI";
1338 		rlc_chip_name = "TAHITI";
1339 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1340 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1341 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1342 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1343 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1344 		break;
1345 	case CHIP_PITCAIRN:
1346 		chip_name = "PITCAIRN";
1347 		rlc_chip_name = "PITCAIRN";
1348 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1349 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1350 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1351 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1352 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1353 		break;
1354 	case CHIP_VERDE:
1355 		chip_name = "VERDE";
1356 		rlc_chip_name = "VERDE";
1357 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1358 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1359 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1360 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1361 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1362 		break;
1363 	case CHIP_OLAND:
1364 		chip_name = "OLAND";
1365 		rlc_chip_name = "OLAND";
1366 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1367 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1368 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1369 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1370 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1371 		break;
1372 	case CHIP_HAINAN:
1373 		chip_name = "HAINAN";
1374 		rlc_chip_name = "HAINAN";
1375 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1376 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1377 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1378 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1379 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1380 		break;
1381 	default: BUG();
1382 	}
1383 
1384 	DRM_INFO("Loading %s Microcode\n", chip_name);
1385 
1386 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1387 	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1388 	if (err)
1389 		goto out;
1390 	if (rdev->pfp_fw->size != pfp_req_size) {
1391 		printk(KERN_ERR
1392 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1393 		       rdev->pfp_fw->size, fw_name);
1394 		err = -EINVAL;
1395 		goto out;
1396 	}
1397 
1398 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1399 	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1400 	if (err)
1401 		goto out;
1402 	if (rdev->me_fw->size != me_req_size) {
1403 		printk(KERN_ERR
1404 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1405 		       rdev->me_fw->size, fw_name);
1406 		err = -EINVAL;
1407 	}
1408 
1409 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1410 	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1411 	if (err)
1412 		goto out;
1413 	if (rdev->ce_fw->size != ce_req_size) {
1414 		printk(KERN_ERR
1415 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1416 		       rdev->ce_fw->size, fw_name);
1417 		err = -EINVAL;
1418 	}
1419 
1420 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1421 	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1422 	if (err)
1423 		goto out;
1424 	if (rdev->rlc_fw->size != rlc_req_size) {
1425 		printk(KERN_ERR
1426 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1427 		       rdev->rlc_fw->size, fw_name);
1428 		err = -EINVAL;
1429 	}
1430 
1431 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1432 	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1433 	if (err)
1434 		goto out;
1435 	if (rdev->mc_fw->size != mc_req_size) {
1436 		printk(KERN_ERR
1437 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1438 		       rdev->mc_fw->size, fw_name);
1439 		err = -EINVAL;
1440 	}
1441 
1442 out:
1443 	platform_device_unregister(pdev);
1444 
1445 	if (err) {
1446 		if (err != -EINVAL)
1447 			printk(KERN_ERR
1448 			       "si_cp: Failed to load firmware \"%s\"\n",
1449 			       fw_name);
1450 		release_firmware(rdev->pfp_fw);
1451 		rdev->pfp_fw = NULL;
1452 		release_firmware(rdev->me_fw);
1453 		rdev->me_fw = NULL;
1454 		release_firmware(rdev->ce_fw);
1455 		rdev->ce_fw = NULL;
1456 		release_firmware(rdev->rlc_fw);
1457 		rdev->rlc_fw = NULL;
1458 		release_firmware(rdev->mc_fw);
1459 		rdev->mc_fw = NULL;
1460 	}
1461 	return err;
1462 }
1463 
1464 /* watermark setup */
1465 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1466 				   struct radeon_crtc *radeon_crtc,
1467 				   struct drm_display_mode *mode,
1468 				   struct drm_display_mode *other_mode)
1469 {
1470 	u32 tmp;
1471 	/*
1472 	 * Line Buffer Setup
1473 	 * There are 3 line buffers, each one shared by 2 display controllers.
1474 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1475 	 * the display controllers.  The paritioning is done via one of four
1476 	 * preset allocations specified in bits 21:20:
1477 	 *  0 - half lb
1478 	 *  2 - whole lb, other crtc must be disabled
1479 	 */
1480 	/* this can get tricky if we have two large displays on a paired group
1481 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1482 	 * non-linked crtcs for maximum line buffer allocation.
1483 	 */
1484 	if (radeon_crtc->base.enabled && mode) {
1485 		if (other_mode)
1486 			tmp = 0; /* 1/2 */
1487 		else
1488 			tmp = 2; /* whole */
1489 	} else
1490 		tmp = 0;
1491 
1492 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1493 	       DC_LB_MEMORY_CONFIG(tmp));
1494 
1495 	if (radeon_crtc->base.enabled && mode) {
1496 		switch (tmp) {
1497 		case 0:
1498 		default:
1499 			return 4096 * 2;
1500 		case 2:
1501 			return 8192 * 2;
1502 		}
1503 	}
1504 
1505 	/* controller not enabled, so no lb used */
1506 	return 0;
1507 }
1508 
1509 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1510 {
1511 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1512 
1513 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1514 	case 0:
1515 	default:
1516 		return 1;
1517 	case 1:
1518 		return 2;
1519 	case 2:
1520 		return 4;
1521 	case 3:
1522 		return 8;
1523 	case 4:
1524 		return 3;
1525 	case 5:
1526 		return 6;
1527 	case 6:
1528 		return 10;
1529 	case 7:
1530 		return 12;
1531 	case 8:
1532 		return 16;
1533 	}
1534 }
1535 
1536 struct dce6_wm_params {
1537 	u32 dram_channels; /* number of dram channels */
1538 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1539 	u32 sclk;          /* engine clock in kHz */
1540 	u32 disp_clk;      /* display clock in kHz */
1541 	u32 src_width;     /* viewport width */
1542 	u32 active_time;   /* active display time in ns */
1543 	u32 blank_time;    /* blank time in ns */
1544 	bool interlaced;    /* mode is interlaced */
1545 	fixed20_12 vsc;    /* vertical scale ratio */
1546 	u32 num_heads;     /* number of active crtcs */
1547 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1548 	u32 lb_size;       /* line buffer allocated to pipe */
1549 	u32 vtaps;         /* vertical scaler taps */
1550 };
1551 
1552 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1553 {
1554 	/* Calculate raw DRAM Bandwidth */
1555 	fixed20_12 dram_efficiency; /* 0.7 */
1556 	fixed20_12 yclk, dram_channels, bandwidth;
1557 	fixed20_12 a;
1558 
1559 	a.full = dfixed_const(1000);
1560 	yclk.full = dfixed_const(wm->yclk);
1561 	yclk.full = dfixed_div(yclk, a);
1562 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1563 	a.full = dfixed_const(10);
1564 	dram_efficiency.full = dfixed_const(7);
1565 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1566 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1567 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1568 
1569 	return dfixed_trunc(bandwidth);
1570 }
1571 
1572 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1573 {
1574 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1575 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1576 	fixed20_12 yclk, dram_channels, bandwidth;
1577 	fixed20_12 a;
1578 
1579 	a.full = dfixed_const(1000);
1580 	yclk.full = dfixed_const(wm->yclk);
1581 	yclk.full = dfixed_div(yclk, a);
1582 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1583 	a.full = dfixed_const(10);
1584 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1585 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1586 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1587 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1588 
1589 	return dfixed_trunc(bandwidth);
1590 }
1591 
1592 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1593 {
1594 	/* Calculate the display Data return Bandwidth */
1595 	fixed20_12 return_efficiency; /* 0.8 */
1596 	fixed20_12 sclk, bandwidth;
1597 	fixed20_12 a;
1598 
1599 	a.full = dfixed_const(1000);
1600 	sclk.full = dfixed_const(wm->sclk);
1601 	sclk.full = dfixed_div(sclk, a);
1602 	a.full = dfixed_const(10);
1603 	return_efficiency.full = dfixed_const(8);
1604 	return_efficiency.full = dfixed_div(return_efficiency, a);
1605 	a.full = dfixed_const(32);
1606 	bandwidth.full = dfixed_mul(a, sclk);
1607 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1608 
1609 	return dfixed_trunc(bandwidth);
1610 }
1611 
1612 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1613 {
1614 	return 32;
1615 }
1616 
1617 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1618 {
1619 	/* Calculate the DMIF Request Bandwidth */
1620 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1621 	fixed20_12 disp_clk, sclk, bandwidth;
1622 	fixed20_12 a, b1, b2;
1623 	u32 min_bandwidth;
1624 
1625 	a.full = dfixed_const(1000);
1626 	disp_clk.full = dfixed_const(wm->disp_clk);
1627 	disp_clk.full = dfixed_div(disp_clk, a);
1628 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1629 	b1.full = dfixed_mul(a, disp_clk);
1630 
1631 	a.full = dfixed_const(1000);
1632 	sclk.full = dfixed_const(wm->sclk);
1633 	sclk.full = dfixed_div(sclk, a);
1634 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1635 	b2.full = dfixed_mul(a, sclk);
1636 
1637 	a.full = dfixed_const(10);
1638 	disp_clk_request_efficiency.full = dfixed_const(8);
1639 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1640 
1641 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1642 
1643 	a.full = dfixed_const(min_bandwidth);
1644 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1645 
1646 	return dfixed_trunc(bandwidth);
1647 }
1648 
1649 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1650 {
1651 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1652 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1653 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1654 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1655 
1656 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1657 }
1658 
1659 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1660 {
1661 	/* Calculate the display mode Average Bandwidth
1662 	 * DisplayMode should contain the source and destination dimensions,
1663 	 * timing, etc.
1664 	 */
1665 	fixed20_12 bpp;
1666 	fixed20_12 line_time;
1667 	fixed20_12 src_width;
1668 	fixed20_12 bandwidth;
1669 	fixed20_12 a;
1670 
1671 	a.full = dfixed_const(1000);
1672 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1673 	line_time.full = dfixed_div(line_time, a);
1674 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1675 	src_width.full = dfixed_const(wm->src_width);
1676 	bandwidth.full = dfixed_mul(src_width, bpp);
1677 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1678 	bandwidth.full = dfixed_div(bandwidth, line_time);
1679 
1680 	return dfixed_trunc(bandwidth);
1681 }
1682 
1683 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1684 {
1685 	/* First calcualte the latency in ns */
1686 	u32 mc_latency = 2000; /* 2000 ns. */
1687 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1688 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1689 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1690 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1691 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1692 		(wm->num_heads * cursor_line_pair_return_time);
1693 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1694 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1695 	u32 tmp, dmif_size = 12288;
1696 	fixed20_12 a, b, c;
1697 
1698 	if (wm->num_heads == 0)
1699 		return 0;
1700 
1701 	a.full = dfixed_const(2);
1702 	b.full = dfixed_const(1);
1703 	if ((wm->vsc.full > a.full) ||
1704 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1705 	    (wm->vtaps >= 5) ||
1706 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1707 		max_src_lines_per_dst_line = 4;
1708 	else
1709 		max_src_lines_per_dst_line = 2;
1710 
1711 	a.full = dfixed_const(available_bandwidth);
1712 	b.full = dfixed_const(wm->num_heads);
1713 	a.full = dfixed_div(a, b);
1714 
1715 	b.full = dfixed_const(mc_latency + 512);
1716 	c.full = dfixed_const(wm->disp_clk);
1717 	b.full = dfixed_div(b, c);
1718 
1719 	c.full = dfixed_const(dmif_size);
1720 	b.full = dfixed_div(c, b);
1721 
1722 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1723 
1724 	b.full = dfixed_const(1000);
1725 	c.full = dfixed_const(wm->disp_clk);
1726 	b.full = dfixed_div(c, b);
1727 	c.full = dfixed_const(wm->bytes_per_pixel);
1728 	b.full = dfixed_mul(b, c);
1729 
1730 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1731 
1732 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1733 	b.full = dfixed_const(1000);
1734 	c.full = dfixed_const(lb_fill_bw);
1735 	b.full = dfixed_div(c, b);
1736 	a.full = dfixed_div(a, b);
1737 	line_fill_time = dfixed_trunc(a);
1738 
1739 	if (line_fill_time < wm->active_time)
1740 		return latency;
1741 	else
1742 		return latency + (line_fill_time - wm->active_time);
1743 
1744 }
1745 
1746 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1747 {
1748 	if (dce6_average_bandwidth(wm) <=
1749 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1750 		return true;
1751 	else
1752 		return false;
1753 };
1754 
1755 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1756 {
1757 	if (dce6_average_bandwidth(wm) <=
1758 	    (dce6_available_bandwidth(wm) / wm->num_heads))
1759 		return true;
1760 	else
1761 		return false;
1762 };
1763 
1764 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1765 {
1766 	u32 lb_partitions = wm->lb_size / wm->src_width;
1767 	u32 line_time = wm->active_time + wm->blank_time;
1768 	u32 latency_tolerant_lines;
1769 	u32 latency_hiding;
1770 	fixed20_12 a;
1771 
1772 	a.full = dfixed_const(1);
1773 	if (wm->vsc.full > a.full)
1774 		latency_tolerant_lines = 1;
1775 	else {
1776 		if (lb_partitions <= (wm->vtaps + 1))
1777 			latency_tolerant_lines = 1;
1778 		else
1779 			latency_tolerant_lines = 2;
1780 	}
1781 
1782 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1783 
1784 	if (dce6_latency_watermark(wm) <= latency_hiding)
1785 		return true;
1786 	else
1787 		return false;
1788 }
1789 
1790 static void dce6_program_watermarks(struct radeon_device *rdev,
1791 					 struct radeon_crtc *radeon_crtc,
1792 					 u32 lb_size, u32 num_heads)
1793 {
1794 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
1795 	struct dce6_wm_params wm;
1796 	u32 pixel_period;
1797 	u32 line_time = 0;
1798 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
1799 	u32 priority_a_mark = 0, priority_b_mark = 0;
1800 	u32 priority_a_cnt = PRIORITY_OFF;
1801 	u32 priority_b_cnt = PRIORITY_OFF;
1802 	u32 tmp, arb_control3;
1803 	fixed20_12 a, b, c;
1804 
1805 	if (radeon_crtc->base.enabled && num_heads && mode) {
1806 		pixel_period = 1000000 / (u32)mode->clock;
1807 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1808 		priority_a_cnt = 0;
1809 		priority_b_cnt = 0;
1810 
1811 		wm.yclk = rdev->pm.current_mclk * 10;
1812 		wm.sclk = rdev->pm.current_sclk * 10;
1813 		wm.disp_clk = mode->clock;
1814 		wm.src_width = mode->crtc_hdisplay;
1815 		wm.active_time = mode->crtc_hdisplay * pixel_period;
1816 		wm.blank_time = line_time - wm.active_time;
1817 		wm.interlaced = false;
1818 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1819 			wm.interlaced = true;
1820 		wm.vsc = radeon_crtc->vsc;
1821 		wm.vtaps = 1;
1822 		if (radeon_crtc->rmx_type != RMX_OFF)
1823 			wm.vtaps = 2;
1824 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1825 		wm.lb_size = lb_size;
1826 		if (rdev->family == CHIP_ARUBA)
1827 			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1828 		else
1829 			wm.dram_channels = si_get_number_of_dram_channels(rdev);
1830 		wm.num_heads = num_heads;
1831 
1832 		/* set for high clocks */
1833 		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1834 		/* set for low clocks */
1835 		/* wm.yclk = low clk; wm.sclk = low clk */
1836 		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1837 
1838 		/* possibly force display priority to high */
1839 		/* should really do this at mode validation time... */
1840 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1841 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1842 		    !dce6_check_latency_hiding(&wm) ||
1843 		    (rdev->disp_priority == 2)) {
1844 			DRM_DEBUG_KMS("force priority to high\n");
1845 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
1846 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
1847 		}
1848 
1849 		a.full = dfixed_const(1000);
1850 		b.full = dfixed_const(mode->clock);
1851 		b.full = dfixed_div(b, a);
1852 		c.full = dfixed_const(latency_watermark_a);
1853 		c.full = dfixed_mul(c, b);
1854 		c.full = dfixed_mul(c, radeon_crtc->hsc);
1855 		c.full = dfixed_div(c, a);
1856 		a.full = dfixed_const(16);
1857 		c.full = dfixed_div(c, a);
1858 		priority_a_mark = dfixed_trunc(c);
1859 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1860 
1861 		a.full = dfixed_const(1000);
1862 		b.full = dfixed_const(mode->clock);
1863 		b.full = dfixed_div(b, a);
1864 		c.full = dfixed_const(latency_watermark_b);
1865 		c.full = dfixed_mul(c, b);
1866 		c.full = dfixed_mul(c, radeon_crtc->hsc);
1867 		c.full = dfixed_div(c, a);
1868 		a.full = dfixed_const(16);
1869 		c.full = dfixed_div(c, a);
1870 		priority_b_mark = dfixed_trunc(c);
1871 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1872 	}
1873 
1874 	/* select wm A */
1875 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1876 	tmp = arb_control3;
1877 	tmp &= ~LATENCY_WATERMARK_MASK(3);
1878 	tmp |= LATENCY_WATERMARK_MASK(1);
1879 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1880 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1881 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1882 		LATENCY_HIGH_WATERMARK(line_time)));
1883 	/* select wm B */
1884 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1885 	tmp &= ~LATENCY_WATERMARK_MASK(3);
1886 	tmp |= LATENCY_WATERMARK_MASK(2);
1887 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1888 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1889 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1890 		LATENCY_HIGH_WATERMARK(line_time)));
1891 	/* restore original selection */
1892 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1893 
1894 	/* write the priority marks */
1895 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1896 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1897 
1898 }
1899 
1900 void dce6_bandwidth_update(struct radeon_device *rdev)
1901 {
1902 	struct drm_display_mode *mode0 = NULL;
1903 	struct drm_display_mode *mode1 = NULL;
1904 	u32 num_heads = 0, lb_size;
1905 	int i;
1906 
1907 	radeon_update_display_priority(rdev);
1908 
1909 	for (i = 0; i < rdev->num_crtc; i++) {
1910 		if (rdev->mode_info.crtcs[i]->base.enabled)
1911 			num_heads++;
1912 	}
1913 	for (i = 0; i < rdev->num_crtc; i += 2) {
1914 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1915 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1916 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1917 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1918 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1919 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1920 	}
1921 }
1922 
1923 /*
1924  * Core functions
1925  */
1926 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1927 {
1928 	const u32 num_tile_mode_states = 32;
1929 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1930 
1931 	switch (rdev->config.si.mem_row_size_in_kb) {
1932 	case 1:
1933 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1934 		break;
1935 	case 2:
1936 	default:
1937 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1938 		break;
1939 	case 4:
1940 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1941 		break;
1942 	}
1943 
1944 	if ((rdev->family == CHIP_TAHITI) ||
1945 	    (rdev->family == CHIP_PITCAIRN)) {
1946 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1947 			switch (reg_offset) {
1948 			case 0:  /* non-AA compressed depth or any compressed stencil */
1949 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1951 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1953 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1954 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957 				break;
1958 			case 1:  /* 2xAA/4xAA compressed depth only */
1959 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1961 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1963 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1964 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1967 				break;
1968 			case 2:  /* 8xAA compressed depth only */
1969 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1971 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1974 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977 				break;
1978 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1979 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1981 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1983 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1984 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1986 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987 				break;
1988 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1989 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1990 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1991 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1993 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1994 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1996 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1997 				break;
1998 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1999 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2000 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2001 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2002 						 TILE_SPLIT(split_equal_to_row_size) |
2003 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2004 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2007 				break;
2008 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2009 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2012 						 TILE_SPLIT(split_equal_to_row_size) |
2013 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2014 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2015 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2016 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2017 				break;
2018 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2019 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2020 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2022 						 TILE_SPLIT(split_equal_to_row_size) |
2023 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2024 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2025 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2026 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2027 				break;
2028 			case 8:  /* 1D and 1D Array Surfaces */
2029 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2030 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2031 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2032 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2033 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2034 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2035 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2036 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2037 				break;
2038 			case 9:  /* Displayable maps. */
2039 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2040 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2042 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2043 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2044 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2045 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2046 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2047 				break;
2048 			case 10:  /* Display 8bpp. */
2049 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2051 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2052 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2053 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2054 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2055 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2056 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2057 				break;
2058 			case 11:  /* Display 16bpp. */
2059 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2060 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2061 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2062 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2063 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2064 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2065 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2066 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2067 				break;
2068 			case 12:  /* Display 32bpp. */
2069 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2070 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2072 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2073 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2074 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2075 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2076 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2077 				break;
2078 			case 13:  /* Thin. */
2079 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2080 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2081 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2082 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2083 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2084 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2085 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2086 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2087 				break;
2088 			case 14:  /* Thin 8 bpp. */
2089 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2091 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2092 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2094 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2095 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2096 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2097 				break;
2098 			case 15:  /* Thin 16 bpp. */
2099 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2100 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2101 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2102 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2103 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2104 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2107 				break;
2108 			case 16:  /* Thin 32 bpp. */
2109 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2110 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2111 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2112 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2113 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2114 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2115 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2116 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2117 				break;
2118 			case 17:  /* Thin 64 bpp. */
2119 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2121 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2122 						 TILE_SPLIT(split_equal_to_row_size) |
2123 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2124 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2125 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2126 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2127 				break;
2128 			case 21:  /* 8 bpp PRT. */
2129 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2131 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2132 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2133 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2134 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2137 				break;
2138 			case 22:  /* 16 bpp PRT */
2139 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2140 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2141 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2142 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2143 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2144 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2147 				break;
2148 			case 23:  /* 32 bpp PRT */
2149 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2151 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2152 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2153 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2154 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2155 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2156 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2157 				break;
2158 			case 24:  /* 64 bpp PRT */
2159 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2161 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2162 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2163 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2164 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2166 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2167 				break;
2168 			case 25:  /* 128 bpp PRT */
2169 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2171 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2172 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2173 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2174 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2176 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2177 				break;
2178 			default:
2179 				gb_tile_moden = 0;
2180 				break;
2181 			}
2182 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2183 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2184 		}
2185 	} else if ((rdev->family == CHIP_VERDE) ||
2186 		   (rdev->family == CHIP_OLAND) ||
2187 		   (rdev->family == CHIP_HAINAN)) {
2188 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2189 			switch (reg_offset) {
2190 			case 0:  /* non-AA compressed depth or any compressed stencil */
2191 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2192 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2193 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2194 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2195 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2196 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2198 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2199 				break;
2200 			case 1:  /* 2xAA/4xAA compressed depth only */
2201 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2203 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2204 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2205 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2206 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2209 				break;
2210 			case 2:  /* 8xAA compressed depth only */
2211 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2213 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2214 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2216 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2218 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2219 				break;
2220 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2221 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2223 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2225 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2226 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2228 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2229 				break;
2230 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2231 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2232 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2233 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2234 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2235 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2236 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2238 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2239 				break;
2240 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2241 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2243 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2244 						 TILE_SPLIT(split_equal_to_row_size) |
2245 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2246 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2249 				break;
2250 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2251 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2252 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2253 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2254 						 TILE_SPLIT(split_equal_to_row_size) |
2255 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2256 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2259 				break;
2260 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2261 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2263 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2264 						 TILE_SPLIT(split_equal_to_row_size) |
2265 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2266 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2269 				break;
2270 			case 8:  /* 1D and 1D Array Surfaces */
2271 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2272 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2273 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2274 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2275 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2276 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2279 				break;
2280 			case 9:  /* Displayable maps. */
2281 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2282 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2283 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2284 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2285 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2286 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2289 				break;
2290 			case 10:  /* Display 8bpp. */
2291 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2294 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2295 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2296 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2298 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2299 				break;
2300 			case 11:  /* Display 16bpp. */
2301 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2303 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2304 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2305 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2306 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2309 				break;
2310 			case 12:  /* Display 32bpp. */
2311 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2313 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2314 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2315 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2316 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2319 				break;
2320 			case 13:  /* Thin. */
2321 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2322 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2323 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2324 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2325 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2326 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2328 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2329 				break;
2330 			case 14:  /* Thin 8 bpp. */
2331 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2333 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2334 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2335 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2336 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2339 				break;
2340 			case 15:  /* Thin 16 bpp. */
2341 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2343 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2344 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2345 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2346 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2349 				break;
2350 			case 16:  /* Thin 32 bpp. */
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2354 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2355 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2356 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2358 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2359 				break;
2360 			case 17:  /* Thin 64 bpp. */
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2363 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2364 						 TILE_SPLIT(split_equal_to_row_size) |
2365 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2366 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2368 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2369 				break;
2370 			case 21:  /* 8 bpp PRT. */
2371 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2373 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2374 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2375 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2376 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2379 				break;
2380 			case 22:  /* 16 bpp PRT */
2381 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2383 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2384 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2385 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2386 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2388 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2389 				break;
2390 			case 23:  /* 32 bpp PRT */
2391 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2393 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2394 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2395 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2396 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2399 				break;
2400 			case 24:  /* 64 bpp PRT */
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2404 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2405 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2406 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2408 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2409 				break;
2410 			case 25:  /* 128 bpp PRT */
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2413 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2414 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2415 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2416 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2419 				break;
2420 			default:
2421 				gb_tile_moden = 0;
2422 				break;
2423 			}
2424 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2425 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2426 		}
2427 	} else
2428 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2429 }
2430 
2431 static void si_select_se_sh(struct radeon_device *rdev,
2432 			    u32 se_num, u32 sh_num)
2433 {
2434 	u32 data = INSTANCE_BROADCAST_WRITES;
2435 
2436 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2437 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2438 	else if (se_num == 0xffffffff)
2439 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2440 	else if (sh_num == 0xffffffff)
2441 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2442 	else
2443 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2444 	WREG32(GRBM_GFX_INDEX, data);
2445 }
2446 
2447 static u32 si_create_bitmask(u32 bit_width)
2448 {
2449 	u32 i, mask = 0;
2450 
2451 	for (i = 0; i < bit_width; i++) {
2452 		mask <<= 1;
2453 		mask |= 1;
2454 	}
2455 	return mask;
2456 }
2457 
2458 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2459 {
2460 	u32 data, mask;
2461 
2462 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2463 	if (data & 1)
2464 		data &= INACTIVE_CUS_MASK;
2465 	else
2466 		data = 0;
2467 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2468 
2469 	data >>= INACTIVE_CUS_SHIFT;
2470 
2471 	mask = si_create_bitmask(cu_per_sh);
2472 
2473 	return ~data & mask;
2474 }
2475 
2476 static void si_setup_spi(struct radeon_device *rdev,
2477 			 u32 se_num, u32 sh_per_se,
2478 			 u32 cu_per_sh)
2479 {
2480 	int i, j, k;
2481 	u32 data, mask, active_cu;
2482 
2483 	for (i = 0; i < se_num; i++) {
2484 		for (j = 0; j < sh_per_se; j++) {
2485 			si_select_se_sh(rdev, i, j);
2486 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2487 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2488 
2489 			mask = 1;
2490 			for (k = 0; k < 16; k++) {
2491 				mask <<= k;
2492 				if (active_cu & mask) {
2493 					data &= ~mask;
2494 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2495 					break;
2496 				}
2497 			}
2498 		}
2499 	}
2500 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2501 }
2502 
2503 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2504 			      u32 max_rb_num, u32 se_num,
2505 			      u32 sh_per_se)
2506 {
2507 	u32 data, mask;
2508 
2509 	data = RREG32(CC_RB_BACKEND_DISABLE);
2510 	if (data & 1)
2511 		data &= BACKEND_DISABLE_MASK;
2512 	else
2513 		data = 0;
2514 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2515 
2516 	data >>= BACKEND_DISABLE_SHIFT;
2517 
2518 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2519 
2520 	return data & mask;
2521 }
2522 
2523 static void si_setup_rb(struct radeon_device *rdev,
2524 			u32 se_num, u32 sh_per_se,
2525 			u32 max_rb_num)
2526 {
2527 	int i, j;
2528 	u32 data, mask;
2529 	u32 disabled_rbs = 0;
2530 	u32 enabled_rbs = 0;
2531 
2532 	for (i = 0; i < se_num; i++) {
2533 		for (j = 0; j < sh_per_se; j++) {
2534 			si_select_se_sh(rdev, i, j);
2535 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2536 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2537 		}
2538 	}
2539 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2540 
2541 	mask = 1;
2542 	for (i = 0; i < max_rb_num; i++) {
2543 		if (!(disabled_rbs & mask))
2544 			enabled_rbs |= mask;
2545 		mask <<= 1;
2546 	}
2547 
2548 	for (i = 0; i < se_num; i++) {
2549 		si_select_se_sh(rdev, i, 0xffffffff);
2550 		data = 0;
2551 		for (j = 0; j < sh_per_se; j++) {
2552 			switch (enabled_rbs & 3) {
2553 			case 1:
2554 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2555 				break;
2556 			case 2:
2557 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2558 				break;
2559 			case 3:
2560 			default:
2561 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2562 				break;
2563 			}
2564 			enabled_rbs >>= 2;
2565 		}
2566 		WREG32(PA_SC_RASTER_CONFIG, data);
2567 	}
2568 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2569 }
2570 
2571 static void si_gpu_init(struct radeon_device *rdev)
2572 {
2573 	u32 gb_addr_config = 0;
2574 	u32 mc_shared_chmap, mc_arb_ramcfg;
2575 	u32 sx_debug_1;
2576 	u32 hdp_host_path_cntl;
2577 	u32 tmp;
2578 	int i, j;
2579 
2580 	switch (rdev->family) {
2581 	case CHIP_TAHITI:
2582 		rdev->config.si.max_shader_engines = 2;
2583 		rdev->config.si.max_tile_pipes = 12;
2584 		rdev->config.si.max_cu_per_sh = 8;
2585 		rdev->config.si.max_sh_per_se = 2;
2586 		rdev->config.si.max_backends_per_se = 4;
2587 		rdev->config.si.max_texture_channel_caches = 12;
2588 		rdev->config.si.max_gprs = 256;
2589 		rdev->config.si.max_gs_threads = 32;
2590 		rdev->config.si.max_hw_contexts = 8;
2591 
2592 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2593 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2594 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2595 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2596 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2597 		break;
2598 	case CHIP_PITCAIRN:
2599 		rdev->config.si.max_shader_engines = 2;
2600 		rdev->config.si.max_tile_pipes = 8;
2601 		rdev->config.si.max_cu_per_sh = 5;
2602 		rdev->config.si.max_sh_per_se = 2;
2603 		rdev->config.si.max_backends_per_se = 4;
2604 		rdev->config.si.max_texture_channel_caches = 8;
2605 		rdev->config.si.max_gprs = 256;
2606 		rdev->config.si.max_gs_threads = 32;
2607 		rdev->config.si.max_hw_contexts = 8;
2608 
2609 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2610 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2611 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2612 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2613 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2614 		break;
2615 	case CHIP_VERDE:
2616 	default:
2617 		rdev->config.si.max_shader_engines = 1;
2618 		rdev->config.si.max_tile_pipes = 4;
2619 		rdev->config.si.max_cu_per_sh = 5;
2620 		rdev->config.si.max_sh_per_se = 2;
2621 		rdev->config.si.max_backends_per_se = 4;
2622 		rdev->config.si.max_texture_channel_caches = 4;
2623 		rdev->config.si.max_gprs = 256;
2624 		rdev->config.si.max_gs_threads = 32;
2625 		rdev->config.si.max_hw_contexts = 8;
2626 
2627 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2628 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2629 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2630 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2631 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2632 		break;
2633 	case CHIP_OLAND:
2634 		rdev->config.si.max_shader_engines = 1;
2635 		rdev->config.si.max_tile_pipes = 4;
2636 		rdev->config.si.max_cu_per_sh = 6;
2637 		rdev->config.si.max_sh_per_se = 1;
2638 		rdev->config.si.max_backends_per_se = 2;
2639 		rdev->config.si.max_texture_channel_caches = 4;
2640 		rdev->config.si.max_gprs = 256;
2641 		rdev->config.si.max_gs_threads = 16;
2642 		rdev->config.si.max_hw_contexts = 8;
2643 
2644 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2645 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2646 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2647 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2648 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2649 		break;
2650 	case CHIP_HAINAN:
2651 		rdev->config.si.max_shader_engines = 1;
2652 		rdev->config.si.max_tile_pipes = 4;
2653 		rdev->config.si.max_cu_per_sh = 5;
2654 		rdev->config.si.max_sh_per_se = 1;
2655 		rdev->config.si.max_backends_per_se = 1;
2656 		rdev->config.si.max_texture_channel_caches = 2;
2657 		rdev->config.si.max_gprs = 256;
2658 		rdev->config.si.max_gs_threads = 16;
2659 		rdev->config.si.max_hw_contexts = 8;
2660 
2661 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2662 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2663 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2664 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2665 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2666 		break;
2667 	}
2668 
2669 	/* Initialize HDP */
2670 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2671 		WREG32((0x2c14 + j), 0x00000000);
2672 		WREG32((0x2c18 + j), 0x00000000);
2673 		WREG32((0x2c1c + j), 0x00000000);
2674 		WREG32((0x2c20 + j), 0x00000000);
2675 		WREG32((0x2c24 + j), 0x00000000);
2676 	}
2677 
2678 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2679 
2680 	evergreen_fix_pci_max_read_req_size(rdev);
2681 
2682 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2683 
2684 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2685 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2686 
2687 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2688 	rdev->config.si.mem_max_burst_length_bytes = 256;
2689 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2690 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2691 	if (rdev->config.si.mem_row_size_in_kb > 4)
2692 		rdev->config.si.mem_row_size_in_kb = 4;
2693 	/* XXX use MC settings? */
2694 	rdev->config.si.shader_engine_tile_size = 32;
2695 	rdev->config.si.num_gpus = 1;
2696 	rdev->config.si.multi_gpu_tile_size = 64;
2697 
2698 	/* fix up row size */
2699 	gb_addr_config &= ~ROW_SIZE_MASK;
2700 	switch (rdev->config.si.mem_row_size_in_kb) {
2701 	case 1:
2702 	default:
2703 		gb_addr_config |= ROW_SIZE(0);
2704 		break;
2705 	case 2:
2706 		gb_addr_config |= ROW_SIZE(1);
2707 		break;
2708 	case 4:
2709 		gb_addr_config |= ROW_SIZE(2);
2710 		break;
2711 	}
2712 
2713 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2714 	 * not have bank info, so create a custom tiling dword.
2715 	 * bits 3:0   num_pipes
2716 	 * bits 7:4   num_banks
2717 	 * bits 11:8  group_size
2718 	 * bits 15:12 row_size
2719 	 */
2720 	rdev->config.si.tile_config = 0;
2721 	switch (rdev->config.si.num_tile_pipes) {
2722 	case 1:
2723 		rdev->config.si.tile_config |= (0 << 0);
2724 		break;
2725 	case 2:
2726 		rdev->config.si.tile_config |= (1 << 0);
2727 		break;
2728 	case 4:
2729 		rdev->config.si.tile_config |= (2 << 0);
2730 		break;
2731 	case 8:
2732 	default:
2733 		/* XXX what about 12? */
2734 		rdev->config.si.tile_config |= (3 << 0);
2735 		break;
2736 	}
2737 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2738 	case 0: /* four banks */
2739 		rdev->config.si.tile_config |= 0 << 4;
2740 		break;
2741 	case 1: /* eight banks */
2742 		rdev->config.si.tile_config |= 1 << 4;
2743 		break;
2744 	case 2: /* sixteen banks */
2745 	default:
2746 		rdev->config.si.tile_config |= 2 << 4;
2747 		break;
2748 	}
2749 	rdev->config.si.tile_config |=
2750 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2751 	rdev->config.si.tile_config |=
2752 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2753 
2754 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2755 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2756 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2757 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2758 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2759 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2760 	if (rdev->has_uvd) {
2761 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2762 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2763 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2764 	}
2765 
2766 	si_tiling_mode_table_init(rdev);
2767 
2768 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2769 		    rdev->config.si.max_sh_per_se,
2770 		    rdev->config.si.max_backends_per_se);
2771 
2772 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2773 		     rdev->config.si.max_sh_per_se,
2774 		     rdev->config.si.max_cu_per_sh);
2775 
2776 
2777 	/* set HW defaults for 3D engine */
2778 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2779 				     ROQ_IB2_START(0x2b)));
2780 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2781 
2782 	sx_debug_1 = RREG32(SX_DEBUG_1);
2783 	WREG32(SX_DEBUG_1, sx_debug_1);
2784 
2785 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2786 
2787 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2788 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2789 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2790 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2791 
2792 	WREG32(VGT_NUM_INSTANCES, 1);
2793 
2794 	WREG32(CP_PERFMON_CNTL, 0);
2795 
2796 	WREG32(SQ_CONFIG, 0);
2797 
2798 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2799 					  FORCE_EOV_MAX_REZ_CNT(255)));
2800 
2801 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2802 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2803 
2804 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2805 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2806 
2807 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2808 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2809 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2810 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2811 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2812 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2813 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2814 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2815 
2816 	tmp = RREG32(HDP_MISC_CNTL);
2817 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2818 	WREG32(HDP_MISC_CNTL, tmp);
2819 
2820 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2821 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2822 
2823 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2824 
2825 	udelay(50);
2826 }
2827 
2828 /*
2829  * GPU scratch registers helpers function.
2830  */
2831 static void si_scratch_init(struct radeon_device *rdev)
2832 {
2833 	int i;
2834 
2835 	rdev->scratch.num_reg = 7;
2836 	rdev->scratch.reg_base = SCRATCH_REG0;
2837 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2838 		rdev->scratch.free[i] = true;
2839 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2840 	}
2841 }
2842 
2843 void si_fence_ring_emit(struct radeon_device *rdev,
2844 			struct radeon_fence *fence)
2845 {
2846 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2847 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2848 
2849 	/* flush read cache over gart */
2850 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2851 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2852 	radeon_ring_write(ring, 0);
2853 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2854 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2855 			  PACKET3_TC_ACTION_ENA |
2856 			  PACKET3_SH_KCACHE_ACTION_ENA |
2857 			  PACKET3_SH_ICACHE_ACTION_ENA);
2858 	radeon_ring_write(ring, 0xFFFFFFFF);
2859 	radeon_ring_write(ring, 0);
2860 	radeon_ring_write(ring, 10); /* poll interval */
2861 	/* EVENT_WRITE_EOP - flush caches, send int */
2862 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2863 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2864 	radeon_ring_write(ring, addr & 0xffffffff);
2865 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2866 	radeon_ring_write(ring, fence->seq);
2867 	radeon_ring_write(ring, 0);
2868 }
2869 
2870 /*
2871  * IB stuff
2872  */
2873 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2874 {
2875 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2876 	u32 header;
2877 
2878 	if (ib->is_const_ib) {
2879 		/* set switch buffer packet before const IB */
2880 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2881 		radeon_ring_write(ring, 0);
2882 
2883 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2884 	} else {
2885 		u32 next_rptr;
2886 		if (ring->rptr_save_reg) {
2887 			next_rptr = ring->wptr + 3 + 4 + 8;
2888 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2889 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2890 						  PACKET3_SET_CONFIG_REG_START) >> 2));
2891 			radeon_ring_write(ring, next_rptr);
2892 		} else if (rdev->wb.enabled) {
2893 			next_rptr = ring->wptr + 5 + 4 + 8;
2894 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2895 			radeon_ring_write(ring, (1 << 8));
2896 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2897 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2898 			radeon_ring_write(ring, next_rptr);
2899 		}
2900 
2901 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2902 	}
2903 
2904 	radeon_ring_write(ring, header);
2905 	radeon_ring_write(ring,
2906 #ifdef __BIG_ENDIAN
2907 			  (2 << 0) |
2908 #endif
2909 			  (ib->gpu_addr & 0xFFFFFFFC));
2910 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2911 	radeon_ring_write(ring, ib->length_dw |
2912 			  (ib->vm ? (ib->vm->id << 24) : 0));
2913 
2914 	if (!ib->is_const_ib) {
2915 		/* flush read cache over gart for this vmid */
2916 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2917 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2918 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2919 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2920 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2921 				  PACKET3_TC_ACTION_ENA |
2922 				  PACKET3_SH_KCACHE_ACTION_ENA |
2923 				  PACKET3_SH_ICACHE_ACTION_ENA);
2924 		radeon_ring_write(ring, 0xFFFFFFFF);
2925 		radeon_ring_write(ring, 0);
2926 		radeon_ring_write(ring, 10); /* poll interval */
2927 	}
2928 }
2929 
2930 /*
2931  * CP.
2932  */
2933 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2934 {
2935 	if (enable)
2936 		WREG32(CP_ME_CNTL, 0);
2937 	else {
2938 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2939 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2940 		WREG32(SCRATCH_UMSK, 0);
2941 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2942 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2943 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2944 	}
2945 	udelay(50);
2946 }
2947 
2948 static int si_cp_load_microcode(struct radeon_device *rdev)
2949 {
2950 	const __be32 *fw_data;
2951 	int i;
2952 
2953 	if (!rdev->me_fw || !rdev->pfp_fw)
2954 		return -EINVAL;
2955 
2956 	si_cp_enable(rdev, false);
2957 
2958 	/* PFP */
2959 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2960 	WREG32(CP_PFP_UCODE_ADDR, 0);
2961 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2962 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2963 	WREG32(CP_PFP_UCODE_ADDR, 0);
2964 
2965 	/* CE */
2966 	fw_data = (const __be32 *)rdev->ce_fw->data;
2967 	WREG32(CP_CE_UCODE_ADDR, 0);
2968 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2969 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2970 	WREG32(CP_CE_UCODE_ADDR, 0);
2971 
2972 	/* ME */
2973 	fw_data = (const __be32 *)rdev->me_fw->data;
2974 	WREG32(CP_ME_RAM_WADDR, 0);
2975 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2976 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2977 	WREG32(CP_ME_RAM_WADDR, 0);
2978 
2979 	WREG32(CP_PFP_UCODE_ADDR, 0);
2980 	WREG32(CP_CE_UCODE_ADDR, 0);
2981 	WREG32(CP_ME_RAM_WADDR, 0);
2982 	WREG32(CP_ME_RAM_RADDR, 0);
2983 	return 0;
2984 }
2985 
2986 static int si_cp_start(struct radeon_device *rdev)
2987 {
2988 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2989 	int r, i;
2990 
2991 	r = radeon_ring_lock(rdev, ring, 7 + 4);
2992 	if (r) {
2993 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2994 		return r;
2995 	}
2996 	/* init the CP */
2997 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2998 	radeon_ring_write(ring, 0x1);
2999 	radeon_ring_write(ring, 0x0);
3000 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3001 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3002 	radeon_ring_write(ring, 0);
3003 	radeon_ring_write(ring, 0);
3004 
3005 	/* init the CE partitions */
3006 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3007 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3008 	radeon_ring_write(ring, 0xc000);
3009 	radeon_ring_write(ring, 0xe000);
3010 	radeon_ring_unlock_commit(rdev, ring);
3011 
3012 	si_cp_enable(rdev, true);
3013 
3014 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3015 	if (r) {
3016 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3017 		return r;
3018 	}
3019 
3020 	/* setup clear context state */
3021 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3022 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3023 
3024 	for (i = 0; i < si_default_size; i++)
3025 		radeon_ring_write(ring, si_default_state[i]);
3026 
3027 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3028 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3029 
3030 	/* set clear context state */
3031 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3032 	radeon_ring_write(ring, 0);
3033 
3034 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3035 	radeon_ring_write(ring, 0x00000316);
3036 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3037 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3038 
3039 	radeon_ring_unlock_commit(rdev, ring);
3040 
3041 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3042 		ring = &rdev->ring[i];
3043 		r = radeon_ring_lock(rdev, ring, 2);
3044 
3045 		/* clear the compute context state */
3046 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3047 		radeon_ring_write(ring, 0);
3048 
3049 		radeon_ring_unlock_commit(rdev, ring);
3050 	}
3051 
3052 	return 0;
3053 }
3054 
3055 static void si_cp_fini(struct radeon_device *rdev)
3056 {
3057 	struct radeon_ring *ring;
3058 	si_cp_enable(rdev, false);
3059 
3060 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3061 	radeon_ring_fini(rdev, ring);
3062 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3063 
3064 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3065 	radeon_ring_fini(rdev, ring);
3066 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3067 
3068 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3069 	radeon_ring_fini(rdev, ring);
3070 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3071 }
3072 
3073 static int si_cp_resume(struct radeon_device *rdev)
3074 {
3075 	struct radeon_ring *ring;
3076 	u32 tmp;
3077 	u32 rb_bufsz;
3078 	int r;
3079 
3080 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3081 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3082 				 SOFT_RESET_PA |
3083 				 SOFT_RESET_VGT |
3084 				 SOFT_RESET_SPI |
3085 				 SOFT_RESET_SX));
3086 	RREG32(GRBM_SOFT_RESET);
3087 	mdelay(15);
3088 	WREG32(GRBM_SOFT_RESET, 0);
3089 	RREG32(GRBM_SOFT_RESET);
3090 
3091 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3092 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3093 
3094 	/* Set the write pointer delay */
3095 	WREG32(CP_RB_WPTR_DELAY, 0);
3096 
3097 	WREG32(CP_DEBUG, 0);
3098 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3099 
3100 	/* ring 0 - compute and gfx */
3101 	/* Set ring buffer size */
3102 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3103 	rb_bufsz = drm_order(ring->ring_size / 8);
3104 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3105 #ifdef __BIG_ENDIAN
3106 	tmp |= BUF_SWAP_32BIT;
3107 #endif
3108 	WREG32(CP_RB0_CNTL, tmp);
3109 
3110 	/* Initialize the ring buffer's read and write pointers */
3111 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3112 	ring->wptr = 0;
3113 	WREG32(CP_RB0_WPTR, ring->wptr);
3114 
3115 	/* set the wb address whether it's enabled or not */
3116 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3117 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3118 
3119 	if (rdev->wb.enabled)
3120 		WREG32(SCRATCH_UMSK, 0xff);
3121 	else {
3122 		tmp |= RB_NO_UPDATE;
3123 		WREG32(SCRATCH_UMSK, 0);
3124 	}
3125 
3126 	mdelay(1);
3127 	WREG32(CP_RB0_CNTL, tmp);
3128 
3129 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3130 
3131 	ring->rptr = RREG32(CP_RB0_RPTR);
3132 
3133 	/* ring1  - compute only */
3134 	/* Set ring buffer size */
3135 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3136 	rb_bufsz = drm_order(ring->ring_size / 8);
3137 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3138 #ifdef __BIG_ENDIAN
3139 	tmp |= BUF_SWAP_32BIT;
3140 #endif
3141 	WREG32(CP_RB1_CNTL, tmp);
3142 
3143 	/* Initialize the ring buffer's read and write pointers */
3144 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3145 	ring->wptr = 0;
3146 	WREG32(CP_RB1_WPTR, ring->wptr);
3147 
3148 	/* set the wb address whether it's enabled or not */
3149 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3150 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3151 
3152 	mdelay(1);
3153 	WREG32(CP_RB1_CNTL, tmp);
3154 
3155 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3156 
3157 	ring->rptr = RREG32(CP_RB1_RPTR);
3158 
3159 	/* ring2 - compute only */
3160 	/* Set ring buffer size */
3161 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3162 	rb_bufsz = drm_order(ring->ring_size / 8);
3163 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3164 #ifdef __BIG_ENDIAN
3165 	tmp |= BUF_SWAP_32BIT;
3166 #endif
3167 	WREG32(CP_RB2_CNTL, tmp);
3168 
3169 	/* Initialize the ring buffer's read and write pointers */
3170 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3171 	ring->wptr = 0;
3172 	WREG32(CP_RB2_WPTR, ring->wptr);
3173 
3174 	/* set the wb address whether it's enabled or not */
3175 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3176 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3177 
3178 	mdelay(1);
3179 	WREG32(CP_RB2_CNTL, tmp);
3180 
3181 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3182 
3183 	ring->rptr = RREG32(CP_RB2_RPTR);
3184 
3185 	/* start the rings */
3186 	si_cp_start(rdev);
3187 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3188 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3189 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3190 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3191 	if (r) {
3192 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3193 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3194 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3195 		return r;
3196 	}
3197 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3198 	if (r) {
3199 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3200 	}
3201 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3202 	if (r) {
3203 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3204 	}
3205 
3206 	return 0;
3207 }
3208 
3209 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3210 {
3211 	u32 reset_mask = 0;
3212 	u32 tmp;
3213 
3214 	/* GRBM_STATUS */
3215 	tmp = RREG32(GRBM_STATUS);
3216 	if (tmp & (PA_BUSY | SC_BUSY |
3217 		   BCI_BUSY | SX_BUSY |
3218 		   TA_BUSY | VGT_BUSY |
3219 		   DB_BUSY | CB_BUSY |
3220 		   GDS_BUSY | SPI_BUSY |
3221 		   IA_BUSY | IA_BUSY_NO_DMA))
3222 		reset_mask |= RADEON_RESET_GFX;
3223 
3224 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3225 		   CP_BUSY | CP_COHERENCY_BUSY))
3226 		reset_mask |= RADEON_RESET_CP;
3227 
3228 	if (tmp & GRBM_EE_BUSY)
3229 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3230 
3231 	/* GRBM_STATUS2 */
3232 	tmp = RREG32(GRBM_STATUS2);
3233 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3234 		reset_mask |= RADEON_RESET_RLC;
3235 
3236 	/* DMA_STATUS_REG 0 */
3237 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3238 	if (!(tmp & DMA_IDLE))
3239 		reset_mask |= RADEON_RESET_DMA;
3240 
3241 	/* DMA_STATUS_REG 1 */
3242 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3243 	if (!(tmp & DMA_IDLE))
3244 		reset_mask |= RADEON_RESET_DMA1;
3245 
3246 	/* SRBM_STATUS2 */
3247 	tmp = RREG32(SRBM_STATUS2);
3248 	if (tmp & DMA_BUSY)
3249 		reset_mask |= RADEON_RESET_DMA;
3250 
3251 	if (tmp & DMA1_BUSY)
3252 		reset_mask |= RADEON_RESET_DMA1;
3253 
3254 	/* SRBM_STATUS */
3255 	tmp = RREG32(SRBM_STATUS);
3256 
3257 	if (tmp & IH_BUSY)
3258 		reset_mask |= RADEON_RESET_IH;
3259 
3260 	if (tmp & SEM_BUSY)
3261 		reset_mask |= RADEON_RESET_SEM;
3262 
3263 	if (tmp & GRBM_RQ_PENDING)
3264 		reset_mask |= RADEON_RESET_GRBM;
3265 
3266 	if (tmp & VMC_BUSY)
3267 		reset_mask |= RADEON_RESET_VMC;
3268 
3269 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3270 		   MCC_BUSY | MCD_BUSY))
3271 		reset_mask |= RADEON_RESET_MC;
3272 
3273 	if (evergreen_is_display_hung(rdev))
3274 		reset_mask |= RADEON_RESET_DISPLAY;
3275 
3276 	/* VM_L2_STATUS */
3277 	tmp = RREG32(VM_L2_STATUS);
3278 	if (tmp & L2_BUSY)
3279 		reset_mask |= RADEON_RESET_VMC;
3280 
3281 	/* Skip MC reset as it's mostly likely not hung, just busy */
3282 	if (reset_mask & RADEON_RESET_MC) {
3283 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3284 		reset_mask &= ~RADEON_RESET_MC;
3285 	}
3286 
3287 	return reset_mask;
3288 }
3289 
3290 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3291 {
3292 	struct evergreen_mc_save save;
3293 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3294 	u32 tmp;
3295 
3296 	if (reset_mask == 0)
3297 		return;
3298 
3299 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3300 
3301 	evergreen_print_gpu_status_regs(rdev);
3302 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3303 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3304 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3305 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3306 
3307 	/* Disable CP parsing/prefetching */
3308 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3309 
3310 	if (reset_mask & RADEON_RESET_DMA) {
3311 		/* dma0 */
3312 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3313 		tmp &= ~DMA_RB_ENABLE;
3314 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3315 	}
3316 	if (reset_mask & RADEON_RESET_DMA1) {
3317 		/* dma1 */
3318 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3319 		tmp &= ~DMA_RB_ENABLE;
3320 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3321 	}
3322 
3323 	udelay(50);
3324 
3325 	evergreen_mc_stop(rdev, &save);
3326 	if (evergreen_mc_wait_for_idle(rdev)) {
3327 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3328 	}
3329 
3330 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3331 		grbm_soft_reset = SOFT_RESET_CB |
3332 			SOFT_RESET_DB |
3333 			SOFT_RESET_GDS |
3334 			SOFT_RESET_PA |
3335 			SOFT_RESET_SC |
3336 			SOFT_RESET_BCI |
3337 			SOFT_RESET_SPI |
3338 			SOFT_RESET_SX |
3339 			SOFT_RESET_TC |
3340 			SOFT_RESET_TA |
3341 			SOFT_RESET_VGT |
3342 			SOFT_RESET_IA;
3343 	}
3344 
3345 	if (reset_mask & RADEON_RESET_CP) {
3346 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3347 
3348 		srbm_soft_reset |= SOFT_RESET_GRBM;
3349 	}
3350 
3351 	if (reset_mask & RADEON_RESET_DMA)
3352 		srbm_soft_reset |= SOFT_RESET_DMA;
3353 
3354 	if (reset_mask & RADEON_RESET_DMA1)
3355 		srbm_soft_reset |= SOFT_RESET_DMA1;
3356 
3357 	if (reset_mask & RADEON_RESET_DISPLAY)
3358 		srbm_soft_reset |= SOFT_RESET_DC;
3359 
3360 	if (reset_mask & RADEON_RESET_RLC)
3361 		grbm_soft_reset |= SOFT_RESET_RLC;
3362 
3363 	if (reset_mask & RADEON_RESET_SEM)
3364 		srbm_soft_reset |= SOFT_RESET_SEM;
3365 
3366 	if (reset_mask & RADEON_RESET_IH)
3367 		srbm_soft_reset |= SOFT_RESET_IH;
3368 
3369 	if (reset_mask & RADEON_RESET_GRBM)
3370 		srbm_soft_reset |= SOFT_RESET_GRBM;
3371 
3372 	if (reset_mask & RADEON_RESET_VMC)
3373 		srbm_soft_reset |= SOFT_RESET_VMC;
3374 
3375 	if (reset_mask & RADEON_RESET_MC)
3376 		srbm_soft_reset |= SOFT_RESET_MC;
3377 
3378 	if (grbm_soft_reset) {
3379 		tmp = RREG32(GRBM_SOFT_RESET);
3380 		tmp |= grbm_soft_reset;
3381 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3382 		WREG32(GRBM_SOFT_RESET, tmp);
3383 		tmp = RREG32(GRBM_SOFT_RESET);
3384 
3385 		udelay(50);
3386 
3387 		tmp &= ~grbm_soft_reset;
3388 		WREG32(GRBM_SOFT_RESET, tmp);
3389 		tmp = RREG32(GRBM_SOFT_RESET);
3390 	}
3391 
3392 	if (srbm_soft_reset) {
3393 		tmp = RREG32(SRBM_SOFT_RESET);
3394 		tmp |= srbm_soft_reset;
3395 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3396 		WREG32(SRBM_SOFT_RESET, tmp);
3397 		tmp = RREG32(SRBM_SOFT_RESET);
3398 
3399 		udelay(50);
3400 
3401 		tmp &= ~srbm_soft_reset;
3402 		WREG32(SRBM_SOFT_RESET, tmp);
3403 		tmp = RREG32(SRBM_SOFT_RESET);
3404 	}
3405 
3406 	/* Wait a little for things to settle down */
3407 	udelay(50);
3408 
3409 	evergreen_mc_resume(rdev, &save);
3410 	udelay(50);
3411 
3412 	evergreen_print_gpu_status_regs(rdev);
3413 }
3414 
3415 int si_asic_reset(struct radeon_device *rdev)
3416 {
3417 	u32 reset_mask;
3418 
3419 	reset_mask = si_gpu_check_soft_reset(rdev);
3420 
3421 	if (reset_mask)
3422 		r600_set_bios_scratch_engine_hung(rdev, true);
3423 
3424 	si_gpu_soft_reset(rdev, reset_mask);
3425 
3426 	reset_mask = si_gpu_check_soft_reset(rdev);
3427 
3428 	if (!reset_mask)
3429 		r600_set_bios_scratch_engine_hung(rdev, false);
3430 
3431 	return 0;
3432 }
3433 
3434 /**
3435  * si_gfx_is_lockup - Check if the GFX engine is locked up
3436  *
3437  * @rdev: radeon_device pointer
3438  * @ring: radeon_ring structure holding ring information
3439  *
3440  * Check if the GFX engine is locked up.
3441  * Returns true if the engine appears to be locked up, false if not.
3442  */
3443 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3444 {
3445 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3446 
3447 	if (!(reset_mask & (RADEON_RESET_GFX |
3448 			    RADEON_RESET_COMPUTE |
3449 			    RADEON_RESET_CP))) {
3450 		radeon_ring_lockup_update(ring);
3451 		return false;
3452 	}
3453 	/* force CP activities */
3454 	radeon_ring_force_activity(rdev, ring);
3455 	return radeon_ring_test_lockup(rdev, ring);
3456 }
3457 
3458 /**
3459  * si_dma_is_lockup - Check if the DMA engine is locked up
3460  *
3461  * @rdev: radeon_device pointer
3462  * @ring: radeon_ring structure holding ring information
3463  *
3464  * Check if the async DMA engine is locked up.
3465  * Returns true if the engine appears to be locked up, false if not.
3466  */
3467 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3468 {
3469 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3470 	u32 mask;
3471 
3472 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3473 		mask = RADEON_RESET_DMA;
3474 	else
3475 		mask = RADEON_RESET_DMA1;
3476 
3477 	if (!(reset_mask & mask)) {
3478 		radeon_ring_lockup_update(ring);
3479 		return false;
3480 	}
3481 	/* force ring activities */
3482 	radeon_ring_force_activity(rdev, ring);
3483 	return radeon_ring_test_lockup(rdev, ring);
3484 }
3485 
3486 /* MC */
3487 static void si_mc_program(struct radeon_device *rdev)
3488 {
3489 	struct evergreen_mc_save save;
3490 	u32 tmp;
3491 	int i, j;
3492 
3493 	/* Initialize HDP */
3494 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3495 		WREG32((0x2c14 + j), 0x00000000);
3496 		WREG32((0x2c18 + j), 0x00000000);
3497 		WREG32((0x2c1c + j), 0x00000000);
3498 		WREG32((0x2c20 + j), 0x00000000);
3499 		WREG32((0x2c24 + j), 0x00000000);
3500 	}
3501 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3502 
3503 	evergreen_mc_stop(rdev, &save);
3504 	if (radeon_mc_wait_for_idle(rdev)) {
3505 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3506 	}
3507 	if (!ASIC_IS_NODCE(rdev))
3508 		/* Lockout access through VGA aperture*/
3509 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3510 	/* Update configuration */
3511 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3512 	       rdev->mc.vram_start >> 12);
3513 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3514 	       rdev->mc.vram_end >> 12);
3515 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3516 	       rdev->vram_scratch.gpu_addr >> 12);
3517 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3518 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3519 	WREG32(MC_VM_FB_LOCATION, tmp);
3520 	/* XXX double check these! */
3521 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3522 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3523 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3524 	WREG32(MC_VM_AGP_BASE, 0);
3525 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3526 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3527 	if (radeon_mc_wait_for_idle(rdev)) {
3528 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3529 	}
3530 	evergreen_mc_resume(rdev, &save);
3531 	if (!ASIC_IS_NODCE(rdev)) {
3532 		/* we need to own VRAM, so turn off the VGA renderer here
3533 		 * to stop it overwriting our objects */
3534 		rv515_vga_render_disable(rdev);
3535 	}
3536 }
3537 
3538 static void si_vram_gtt_location(struct radeon_device *rdev,
3539 				 struct radeon_mc *mc)
3540 {
3541 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3542 		/* leave room for at least 1024M GTT */
3543 		dev_warn(rdev->dev, "limiting VRAM\n");
3544 		mc->real_vram_size = 0xFFC0000000ULL;
3545 		mc->mc_vram_size = 0xFFC0000000ULL;
3546 	}
3547 	radeon_vram_location(rdev, &rdev->mc, 0);
3548 	rdev->mc.gtt_base_align = 0;
3549 	radeon_gtt_location(rdev, mc);
3550 }
3551 
3552 static int si_mc_init(struct radeon_device *rdev)
3553 {
3554 	u32 tmp;
3555 	int chansize, numchan;
3556 
3557 	/* Get VRAM informations */
3558 	rdev->mc.vram_is_ddr = true;
3559 	tmp = RREG32(MC_ARB_RAMCFG);
3560 	if (tmp & CHANSIZE_OVERRIDE) {
3561 		chansize = 16;
3562 	} else if (tmp & CHANSIZE_MASK) {
3563 		chansize = 64;
3564 	} else {
3565 		chansize = 32;
3566 	}
3567 	tmp = RREG32(MC_SHARED_CHMAP);
3568 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3569 	case 0:
3570 	default:
3571 		numchan = 1;
3572 		break;
3573 	case 1:
3574 		numchan = 2;
3575 		break;
3576 	case 2:
3577 		numchan = 4;
3578 		break;
3579 	case 3:
3580 		numchan = 8;
3581 		break;
3582 	case 4:
3583 		numchan = 3;
3584 		break;
3585 	case 5:
3586 		numchan = 6;
3587 		break;
3588 	case 6:
3589 		numchan = 10;
3590 		break;
3591 	case 7:
3592 		numchan = 12;
3593 		break;
3594 	case 8:
3595 		numchan = 16;
3596 		break;
3597 	}
3598 	rdev->mc.vram_width = numchan * chansize;
3599 	/* Could aper size report 0 ? */
3600 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3601 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3602 	/* size in MB on si */
3603 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3604 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3605 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3606 	si_vram_gtt_location(rdev, &rdev->mc);
3607 	radeon_update_bandwidth_info(rdev);
3608 
3609 	return 0;
3610 }
3611 
3612 /*
3613  * GART
3614  */
3615 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3616 {
3617 	/* flush hdp cache */
3618 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3619 
3620 	/* bits 0-15 are the VM contexts0-15 */
3621 	WREG32(VM_INVALIDATE_REQUEST, 1);
3622 }
3623 
3624 static int si_pcie_gart_enable(struct radeon_device *rdev)
3625 {
3626 	int r, i;
3627 
3628 	if (rdev->gart.robj == NULL) {
3629 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3630 		return -EINVAL;
3631 	}
3632 	r = radeon_gart_table_vram_pin(rdev);
3633 	if (r)
3634 		return r;
3635 	radeon_gart_restore(rdev);
3636 	/* Setup TLB control */
3637 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3638 	       (0xA << 7) |
3639 	       ENABLE_L1_TLB |
3640 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3641 	       ENABLE_ADVANCED_DRIVER_MODEL |
3642 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3643 	/* Setup L2 cache */
3644 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3645 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3646 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3647 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3648 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3649 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3650 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3651 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3652 	/* setup context0 */
3653 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3654 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3655 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3656 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3657 			(u32)(rdev->dummy_page.addr >> 12));
3658 	WREG32(VM_CONTEXT0_CNTL2, 0);
3659 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3660 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3661 
3662 	WREG32(0x15D4, 0);
3663 	WREG32(0x15D8, 0);
3664 	WREG32(0x15DC, 0);
3665 
3666 	/* empty context1-15 */
3667 	/* set vm size, must be a multiple of 4 */
3668 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3669 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3670 	/* Assign the pt base to something valid for now; the pts used for
3671 	 * the VMs are determined by the application and setup and assigned
3672 	 * on the fly in the vm part of radeon_gart.c
3673 	 */
3674 	for (i = 1; i < 16; i++) {
3675 		if (i < 8)
3676 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3677 			       rdev->gart.table_addr >> 12);
3678 		else
3679 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3680 			       rdev->gart.table_addr >> 12);
3681 	}
3682 
3683 	/* enable context1-15 */
3684 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3685 	       (u32)(rdev->dummy_page.addr >> 12));
3686 	WREG32(VM_CONTEXT1_CNTL2, 4);
3687 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3688 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3689 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3690 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3691 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3692 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3693 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3694 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3695 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3696 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3697 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3698 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3699 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3700 
3701 	si_pcie_gart_tlb_flush(rdev);
3702 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3703 		 (unsigned)(rdev->mc.gtt_size >> 20),
3704 		 (unsigned long long)rdev->gart.table_addr);
3705 	rdev->gart.ready = true;
3706 	return 0;
3707 }
3708 
3709 static void si_pcie_gart_disable(struct radeon_device *rdev)
3710 {
3711 	/* Disable all tables */
3712 	WREG32(VM_CONTEXT0_CNTL, 0);
3713 	WREG32(VM_CONTEXT1_CNTL, 0);
3714 	/* Setup TLB control */
3715 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3716 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3717 	/* Setup L2 cache */
3718 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3719 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3720 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3721 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3722 	WREG32(VM_L2_CNTL2, 0);
3723 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3724 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3725 	radeon_gart_table_vram_unpin(rdev);
3726 }
3727 
3728 static void si_pcie_gart_fini(struct radeon_device *rdev)
3729 {
3730 	si_pcie_gart_disable(rdev);
3731 	radeon_gart_table_vram_free(rdev);
3732 	radeon_gart_fini(rdev);
3733 }
3734 
3735 /* vm parser */
3736 static bool si_vm_reg_valid(u32 reg)
3737 {
3738 	/* context regs are fine */
3739 	if (reg >= 0x28000)
3740 		return true;
3741 
3742 	/* check config regs */
3743 	switch (reg) {
3744 	case GRBM_GFX_INDEX:
3745 	case CP_STRMOUT_CNTL:
3746 	case VGT_VTX_VECT_EJECT_REG:
3747 	case VGT_CACHE_INVALIDATION:
3748 	case VGT_ESGS_RING_SIZE:
3749 	case VGT_GSVS_RING_SIZE:
3750 	case VGT_GS_VERTEX_REUSE:
3751 	case VGT_PRIMITIVE_TYPE:
3752 	case VGT_INDEX_TYPE:
3753 	case VGT_NUM_INDICES:
3754 	case VGT_NUM_INSTANCES:
3755 	case VGT_TF_RING_SIZE:
3756 	case VGT_HS_OFFCHIP_PARAM:
3757 	case VGT_TF_MEMORY_BASE:
3758 	case PA_CL_ENHANCE:
3759 	case PA_SU_LINE_STIPPLE_VALUE:
3760 	case PA_SC_LINE_STIPPLE_STATE:
3761 	case PA_SC_ENHANCE:
3762 	case SQC_CACHES:
3763 	case SPI_STATIC_THREAD_MGMT_1:
3764 	case SPI_STATIC_THREAD_MGMT_2:
3765 	case SPI_STATIC_THREAD_MGMT_3:
3766 	case SPI_PS_MAX_WAVE_ID:
3767 	case SPI_CONFIG_CNTL:
3768 	case SPI_CONFIG_CNTL_1:
3769 	case TA_CNTL_AUX:
3770 		return true;
3771 	default:
3772 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3773 		return false;
3774 	}
3775 }
3776 
3777 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3778 				  u32 *ib, struct radeon_cs_packet *pkt)
3779 {
3780 	switch (pkt->opcode) {
3781 	case PACKET3_NOP:
3782 	case PACKET3_SET_BASE:
3783 	case PACKET3_SET_CE_DE_COUNTERS:
3784 	case PACKET3_LOAD_CONST_RAM:
3785 	case PACKET3_WRITE_CONST_RAM:
3786 	case PACKET3_WRITE_CONST_RAM_OFFSET:
3787 	case PACKET3_DUMP_CONST_RAM:
3788 	case PACKET3_INCREMENT_CE_COUNTER:
3789 	case PACKET3_WAIT_ON_DE_COUNTER:
3790 	case PACKET3_CE_WRITE:
3791 		break;
3792 	default:
3793 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3794 		return -EINVAL;
3795 	}
3796 	return 0;
3797 }
3798 
3799 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3800 				   u32 *ib, struct radeon_cs_packet *pkt)
3801 {
3802 	u32 idx = pkt->idx + 1;
3803 	u32 idx_value = ib[idx];
3804 	u32 start_reg, end_reg, reg, i;
3805 	u32 command, info;
3806 
3807 	switch (pkt->opcode) {
3808 	case PACKET3_NOP:
3809 	case PACKET3_SET_BASE:
3810 	case PACKET3_CLEAR_STATE:
3811 	case PACKET3_INDEX_BUFFER_SIZE:
3812 	case PACKET3_DISPATCH_DIRECT:
3813 	case PACKET3_DISPATCH_INDIRECT:
3814 	case PACKET3_ALLOC_GDS:
3815 	case PACKET3_WRITE_GDS_RAM:
3816 	case PACKET3_ATOMIC_GDS:
3817 	case PACKET3_ATOMIC:
3818 	case PACKET3_OCCLUSION_QUERY:
3819 	case PACKET3_SET_PREDICATION:
3820 	case PACKET3_COND_EXEC:
3821 	case PACKET3_PRED_EXEC:
3822 	case PACKET3_DRAW_INDIRECT:
3823 	case PACKET3_DRAW_INDEX_INDIRECT:
3824 	case PACKET3_INDEX_BASE:
3825 	case PACKET3_DRAW_INDEX_2:
3826 	case PACKET3_CONTEXT_CONTROL:
3827 	case PACKET3_INDEX_TYPE:
3828 	case PACKET3_DRAW_INDIRECT_MULTI:
3829 	case PACKET3_DRAW_INDEX_AUTO:
3830 	case PACKET3_DRAW_INDEX_IMMD:
3831 	case PACKET3_NUM_INSTANCES:
3832 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3833 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3834 	case PACKET3_DRAW_INDEX_OFFSET_2:
3835 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3836 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3837 	case PACKET3_MPEG_INDEX:
3838 	case PACKET3_WAIT_REG_MEM:
3839 	case PACKET3_MEM_WRITE:
3840 	case PACKET3_PFP_SYNC_ME:
3841 	case PACKET3_SURFACE_SYNC:
3842 	case PACKET3_EVENT_WRITE:
3843 	case PACKET3_EVENT_WRITE_EOP:
3844 	case PACKET3_EVENT_WRITE_EOS:
3845 	case PACKET3_SET_CONTEXT_REG:
3846 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3847 	case PACKET3_SET_SH_REG:
3848 	case PACKET3_SET_SH_REG_OFFSET:
3849 	case PACKET3_INCREMENT_DE_COUNTER:
3850 	case PACKET3_WAIT_ON_CE_COUNTER:
3851 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
3852 	case PACKET3_ME_WRITE:
3853 		break;
3854 	case PACKET3_COPY_DATA:
3855 		if ((idx_value & 0xf00) == 0) {
3856 			reg = ib[idx + 3] * 4;
3857 			if (!si_vm_reg_valid(reg))
3858 				return -EINVAL;
3859 		}
3860 		break;
3861 	case PACKET3_WRITE_DATA:
3862 		if ((idx_value & 0xf00) == 0) {
3863 			start_reg = ib[idx + 1] * 4;
3864 			if (idx_value & 0x10000) {
3865 				if (!si_vm_reg_valid(start_reg))
3866 					return -EINVAL;
3867 			} else {
3868 				for (i = 0; i < (pkt->count - 2); i++) {
3869 					reg = start_reg + (4 * i);
3870 					if (!si_vm_reg_valid(reg))
3871 						return -EINVAL;
3872 				}
3873 			}
3874 		}
3875 		break;
3876 	case PACKET3_COND_WRITE:
3877 		if (idx_value & 0x100) {
3878 			reg = ib[idx + 5] * 4;
3879 			if (!si_vm_reg_valid(reg))
3880 				return -EINVAL;
3881 		}
3882 		break;
3883 	case PACKET3_COPY_DW:
3884 		if (idx_value & 0x2) {
3885 			reg = ib[idx + 3] * 4;
3886 			if (!si_vm_reg_valid(reg))
3887 				return -EINVAL;
3888 		}
3889 		break;
3890 	case PACKET3_SET_CONFIG_REG:
3891 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3892 		end_reg = 4 * pkt->count + start_reg - 4;
3893 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3894 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3895 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3896 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3897 			return -EINVAL;
3898 		}
3899 		for (i = 0; i < pkt->count; i++) {
3900 			reg = start_reg + (4 * i);
3901 			if (!si_vm_reg_valid(reg))
3902 				return -EINVAL;
3903 		}
3904 		break;
3905 	case PACKET3_CP_DMA:
3906 		command = ib[idx + 4];
3907 		info = ib[idx + 1];
3908 		if (command & PACKET3_CP_DMA_CMD_SAS) {
3909 			/* src address space is register */
3910 			if (((info & 0x60000000) >> 29) == 0) {
3911 				start_reg = idx_value << 2;
3912 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3913 					reg = start_reg;
3914 					if (!si_vm_reg_valid(reg)) {
3915 						DRM_ERROR("CP DMA Bad SRC register\n");
3916 						return -EINVAL;
3917 					}
3918 				} else {
3919 					for (i = 0; i < (command & 0x1fffff); i++) {
3920 						reg = start_reg + (4 * i);
3921 						if (!si_vm_reg_valid(reg)) {
3922 							DRM_ERROR("CP DMA Bad SRC register\n");
3923 							return -EINVAL;
3924 						}
3925 					}
3926 				}
3927 			}
3928 		}
3929 		if (command & PACKET3_CP_DMA_CMD_DAS) {
3930 			/* dst address space is register */
3931 			if (((info & 0x00300000) >> 20) == 0) {
3932 				start_reg = ib[idx + 2];
3933 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3934 					reg = start_reg;
3935 					if (!si_vm_reg_valid(reg)) {
3936 						DRM_ERROR("CP DMA Bad DST register\n");
3937 						return -EINVAL;
3938 					}
3939 				} else {
3940 					for (i = 0; i < (command & 0x1fffff); i++) {
3941 						reg = start_reg + (4 * i);
3942 						if (!si_vm_reg_valid(reg)) {
3943 							DRM_ERROR("CP DMA Bad DST register\n");
3944 							return -EINVAL;
3945 						}
3946 					}
3947 				}
3948 			}
3949 		}
3950 		break;
3951 	default:
3952 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3953 		return -EINVAL;
3954 	}
3955 	return 0;
3956 }
3957 
3958 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3959 				       u32 *ib, struct radeon_cs_packet *pkt)
3960 {
3961 	u32 idx = pkt->idx + 1;
3962 	u32 idx_value = ib[idx];
3963 	u32 start_reg, reg, i;
3964 
3965 	switch (pkt->opcode) {
3966 	case PACKET3_NOP:
3967 	case PACKET3_SET_BASE:
3968 	case PACKET3_CLEAR_STATE:
3969 	case PACKET3_DISPATCH_DIRECT:
3970 	case PACKET3_DISPATCH_INDIRECT:
3971 	case PACKET3_ALLOC_GDS:
3972 	case PACKET3_WRITE_GDS_RAM:
3973 	case PACKET3_ATOMIC_GDS:
3974 	case PACKET3_ATOMIC:
3975 	case PACKET3_OCCLUSION_QUERY:
3976 	case PACKET3_SET_PREDICATION:
3977 	case PACKET3_COND_EXEC:
3978 	case PACKET3_PRED_EXEC:
3979 	case PACKET3_CONTEXT_CONTROL:
3980 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3981 	case PACKET3_WAIT_REG_MEM:
3982 	case PACKET3_MEM_WRITE:
3983 	case PACKET3_PFP_SYNC_ME:
3984 	case PACKET3_SURFACE_SYNC:
3985 	case PACKET3_EVENT_WRITE:
3986 	case PACKET3_EVENT_WRITE_EOP:
3987 	case PACKET3_EVENT_WRITE_EOS:
3988 	case PACKET3_SET_CONTEXT_REG:
3989 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3990 	case PACKET3_SET_SH_REG:
3991 	case PACKET3_SET_SH_REG_OFFSET:
3992 	case PACKET3_INCREMENT_DE_COUNTER:
3993 	case PACKET3_WAIT_ON_CE_COUNTER:
3994 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
3995 	case PACKET3_ME_WRITE:
3996 		break;
3997 	case PACKET3_COPY_DATA:
3998 		if ((idx_value & 0xf00) == 0) {
3999 			reg = ib[idx + 3] * 4;
4000 			if (!si_vm_reg_valid(reg))
4001 				return -EINVAL;
4002 		}
4003 		break;
4004 	case PACKET3_WRITE_DATA:
4005 		if ((idx_value & 0xf00) == 0) {
4006 			start_reg = ib[idx + 1] * 4;
4007 			if (idx_value & 0x10000) {
4008 				if (!si_vm_reg_valid(start_reg))
4009 					return -EINVAL;
4010 			} else {
4011 				for (i = 0; i < (pkt->count - 2); i++) {
4012 					reg = start_reg + (4 * i);
4013 					if (!si_vm_reg_valid(reg))
4014 						return -EINVAL;
4015 				}
4016 			}
4017 		}
4018 		break;
4019 	case PACKET3_COND_WRITE:
4020 		if (idx_value & 0x100) {
4021 			reg = ib[idx + 5] * 4;
4022 			if (!si_vm_reg_valid(reg))
4023 				return -EINVAL;
4024 		}
4025 		break;
4026 	case PACKET3_COPY_DW:
4027 		if (idx_value & 0x2) {
4028 			reg = ib[idx + 3] * 4;
4029 			if (!si_vm_reg_valid(reg))
4030 				return -EINVAL;
4031 		}
4032 		break;
4033 	default:
4034 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4035 		return -EINVAL;
4036 	}
4037 	return 0;
4038 }
4039 
4040 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4041 {
4042 	int ret = 0;
4043 	u32 idx = 0;
4044 	struct radeon_cs_packet pkt;
4045 
4046 	do {
4047 		pkt.idx = idx;
4048 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4049 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4050 		pkt.one_reg_wr = 0;
4051 		switch (pkt.type) {
4052 		case RADEON_PACKET_TYPE0:
4053 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4054 			ret = -EINVAL;
4055 			break;
4056 		case RADEON_PACKET_TYPE2:
4057 			idx += 1;
4058 			break;
4059 		case RADEON_PACKET_TYPE3:
4060 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4061 			if (ib->is_const_ib)
4062 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4063 			else {
4064 				switch (ib->ring) {
4065 				case RADEON_RING_TYPE_GFX_INDEX:
4066 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4067 					break;
4068 				case CAYMAN_RING_TYPE_CP1_INDEX:
4069 				case CAYMAN_RING_TYPE_CP2_INDEX:
4070 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4071 					break;
4072 				default:
4073 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4074 					ret = -EINVAL;
4075 					break;
4076 				}
4077 			}
4078 			idx += pkt.count + 2;
4079 			break;
4080 		default:
4081 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4082 			ret = -EINVAL;
4083 			break;
4084 		}
4085 		if (ret)
4086 			break;
4087 	} while (idx < ib->length_dw);
4088 
4089 	return ret;
4090 }
4091 
4092 /*
4093  * vm
4094  */
4095 int si_vm_init(struct radeon_device *rdev)
4096 {
4097 	/* number of VMs */
4098 	rdev->vm_manager.nvm = 16;
4099 	/* base offset of vram pages */
4100 	rdev->vm_manager.vram_base_offset = 0;
4101 
4102 	return 0;
4103 }
4104 
4105 void si_vm_fini(struct radeon_device *rdev)
4106 {
4107 }
4108 
4109 /**
4110  * si_vm_set_page - update the page tables using the CP
4111  *
4112  * @rdev: radeon_device pointer
4113  * @ib: indirect buffer to fill with commands
4114  * @pe: addr of the page entry
4115  * @addr: dst addr to write into pe
4116  * @count: number of page entries to update
4117  * @incr: increase next addr by incr bytes
4118  * @flags: access flags
4119  *
4120  * Update the page tables using the CP (SI).
4121  */
4122 void si_vm_set_page(struct radeon_device *rdev,
4123 		    struct radeon_ib *ib,
4124 		    uint64_t pe,
4125 		    uint64_t addr, unsigned count,
4126 		    uint32_t incr, uint32_t flags)
4127 {
4128 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4129 	uint64_t value;
4130 	unsigned ndw;
4131 
4132 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4133 		while (count) {
4134 			ndw = 2 + count * 2;
4135 			if (ndw > 0x3FFE)
4136 				ndw = 0x3FFE;
4137 
4138 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4139 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4140 					WRITE_DATA_DST_SEL(1));
4141 			ib->ptr[ib->length_dw++] = pe;
4142 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4143 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4144 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4145 					value = radeon_vm_map_gart(rdev, addr);
4146 					value &= 0xFFFFFFFFFFFFF000ULL;
4147 				} else if (flags & RADEON_VM_PAGE_VALID) {
4148 					value = addr;
4149 				} else {
4150 					value = 0;
4151 				}
4152 				addr += incr;
4153 				value |= r600_flags;
4154 				ib->ptr[ib->length_dw++] = value;
4155 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4156 			}
4157 		}
4158 	} else {
4159 		/* DMA */
4160 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4161 			while (count) {
4162 				ndw = count * 2;
4163 				if (ndw > 0xFFFFE)
4164 					ndw = 0xFFFFE;
4165 
4166 				/* for non-physically contiguous pages (system) */
4167 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4168 				ib->ptr[ib->length_dw++] = pe;
4169 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4170 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4171 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4172 						value = radeon_vm_map_gart(rdev, addr);
4173 						value &= 0xFFFFFFFFFFFFF000ULL;
4174 					} else if (flags & RADEON_VM_PAGE_VALID) {
4175 						value = addr;
4176 					} else {
4177 						value = 0;
4178 					}
4179 					addr += incr;
4180 					value |= r600_flags;
4181 					ib->ptr[ib->length_dw++] = value;
4182 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4183 				}
4184 			}
4185 		} else {
4186 			while (count) {
4187 				ndw = count * 2;
4188 				if (ndw > 0xFFFFE)
4189 					ndw = 0xFFFFE;
4190 
4191 				if (flags & RADEON_VM_PAGE_VALID)
4192 					value = addr;
4193 				else
4194 					value = 0;
4195 				/* for physically contiguous pages (vram) */
4196 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4197 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4198 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4199 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4200 				ib->ptr[ib->length_dw++] = 0;
4201 				ib->ptr[ib->length_dw++] = value; /* value */
4202 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4203 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4204 				ib->ptr[ib->length_dw++] = 0;
4205 				pe += ndw * 4;
4206 				addr += (ndw / 2) * incr;
4207 				count -= ndw / 2;
4208 			}
4209 		}
4210 		while (ib->length_dw & 0x7)
4211 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4212 	}
4213 }
4214 
4215 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4216 {
4217 	struct radeon_ring *ring = &rdev->ring[ridx];
4218 
4219 	if (vm == NULL)
4220 		return;
4221 
4222 	/* write new base address */
4223 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4224 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4225 				 WRITE_DATA_DST_SEL(0)));
4226 
4227 	if (vm->id < 8) {
4228 		radeon_ring_write(ring,
4229 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4230 	} else {
4231 		radeon_ring_write(ring,
4232 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4233 	}
4234 	radeon_ring_write(ring, 0);
4235 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4236 
4237 	/* flush hdp cache */
4238 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4239 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4240 				 WRITE_DATA_DST_SEL(0)));
4241 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4242 	radeon_ring_write(ring, 0);
4243 	radeon_ring_write(ring, 0x1);
4244 
4245 	/* bits 0-15 are the VM contexts0-15 */
4246 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4247 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4248 				 WRITE_DATA_DST_SEL(0)));
4249 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4250 	radeon_ring_write(ring, 0);
4251 	radeon_ring_write(ring, 1 << vm->id);
4252 
4253 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4254 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4255 	radeon_ring_write(ring, 0x0);
4256 }
4257 
4258 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4259 {
4260 	struct radeon_ring *ring = &rdev->ring[ridx];
4261 
4262 	if (vm == NULL)
4263 		return;
4264 
4265 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4266 	if (vm->id < 8) {
4267 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4268 	} else {
4269 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4270 	}
4271 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4272 
4273 	/* flush hdp cache */
4274 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4275 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4276 	radeon_ring_write(ring, 1);
4277 
4278 	/* bits 0-7 are the VM contexts0-7 */
4279 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4280 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4281 	radeon_ring_write(ring, 1 << vm->id);
4282 }
4283 
4284 /*
4285  * RLC
4286  */
4287 void si_rlc_fini(struct radeon_device *rdev)
4288 {
4289 	int r;
4290 
4291 	/* save restore block */
4292 	if (rdev->rlc.save_restore_obj) {
4293 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4294 		if (unlikely(r != 0))
4295 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4296 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
4297 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4298 
4299 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
4300 		rdev->rlc.save_restore_obj = NULL;
4301 	}
4302 
4303 	/* clear state block */
4304 	if (rdev->rlc.clear_state_obj) {
4305 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4306 		if (unlikely(r != 0))
4307 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4308 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
4309 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4310 
4311 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
4312 		rdev->rlc.clear_state_obj = NULL;
4313 	}
4314 }
4315 
4316 int si_rlc_init(struct radeon_device *rdev)
4317 {
4318 	int r;
4319 
4320 	/* save restore block */
4321 	if (rdev->rlc.save_restore_obj == NULL) {
4322 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4323 				     RADEON_GEM_DOMAIN_VRAM, NULL,
4324 				     &rdev->rlc.save_restore_obj);
4325 		if (r) {
4326 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4327 			return r;
4328 		}
4329 	}
4330 
4331 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4332 	if (unlikely(r != 0)) {
4333 		si_rlc_fini(rdev);
4334 		return r;
4335 	}
4336 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4337 			  &rdev->rlc.save_restore_gpu_addr);
4338 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4339 	if (r) {
4340 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4341 		si_rlc_fini(rdev);
4342 		return r;
4343 	}
4344 
4345 	/* clear state block */
4346 	if (rdev->rlc.clear_state_obj == NULL) {
4347 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4348 				     RADEON_GEM_DOMAIN_VRAM, NULL,
4349 				     &rdev->rlc.clear_state_obj);
4350 		if (r) {
4351 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4352 			si_rlc_fini(rdev);
4353 			return r;
4354 		}
4355 	}
4356 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4357 	if (unlikely(r != 0)) {
4358 		si_rlc_fini(rdev);
4359 		return r;
4360 	}
4361 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4362 			  &rdev->rlc.clear_state_gpu_addr);
4363 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4364 	if (r) {
4365 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4366 		si_rlc_fini(rdev);
4367 		return r;
4368 	}
4369 
4370 	return 0;
4371 }
4372 
4373 static void si_rlc_stop(struct radeon_device *rdev)
4374 {
4375 	WREG32(RLC_CNTL, 0);
4376 }
4377 
4378 static void si_rlc_start(struct radeon_device *rdev)
4379 {
4380 	WREG32(RLC_CNTL, RLC_ENABLE);
4381 }
4382 
4383 static int si_rlc_resume(struct radeon_device *rdev)
4384 {
4385 	u32 i;
4386 	const __be32 *fw_data;
4387 
4388 	if (!rdev->rlc_fw)
4389 		return -EINVAL;
4390 
4391 	si_rlc_stop(rdev);
4392 
4393 	WREG32(RLC_RL_BASE, 0);
4394 	WREG32(RLC_RL_SIZE, 0);
4395 	WREG32(RLC_LB_CNTL, 0);
4396 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4397 	WREG32(RLC_LB_CNTR_INIT, 0);
4398 
4399 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4400 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4401 
4402 	WREG32(RLC_MC_CNTL, 0);
4403 	WREG32(RLC_UCODE_CNTL, 0);
4404 
4405 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4406 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4407 		WREG32(RLC_UCODE_ADDR, i);
4408 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4409 	}
4410 	WREG32(RLC_UCODE_ADDR, 0);
4411 
4412 	si_rlc_start(rdev);
4413 
4414 	return 0;
4415 }
4416 
4417 static void si_enable_interrupts(struct radeon_device *rdev)
4418 {
4419 	u32 ih_cntl = RREG32(IH_CNTL);
4420 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4421 
4422 	ih_cntl |= ENABLE_INTR;
4423 	ih_rb_cntl |= IH_RB_ENABLE;
4424 	WREG32(IH_CNTL, ih_cntl);
4425 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4426 	rdev->ih.enabled = true;
4427 }
4428 
4429 static void si_disable_interrupts(struct radeon_device *rdev)
4430 {
4431 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4432 	u32 ih_cntl = RREG32(IH_CNTL);
4433 
4434 	ih_rb_cntl &= ~IH_RB_ENABLE;
4435 	ih_cntl &= ~ENABLE_INTR;
4436 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4437 	WREG32(IH_CNTL, ih_cntl);
4438 	/* set rptr, wptr to 0 */
4439 	WREG32(IH_RB_RPTR, 0);
4440 	WREG32(IH_RB_WPTR, 0);
4441 	rdev->ih.enabled = false;
4442 	rdev->ih.rptr = 0;
4443 }
4444 
4445 static void si_disable_interrupt_state(struct radeon_device *rdev)
4446 {
4447 	u32 tmp;
4448 
4449 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4450 	WREG32(CP_INT_CNTL_RING1, 0);
4451 	WREG32(CP_INT_CNTL_RING2, 0);
4452 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4453 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4454 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4455 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4456 	WREG32(GRBM_INT_CNTL, 0);
4457 	if (rdev->num_crtc >= 2) {
4458 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4459 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4460 	}
4461 	if (rdev->num_crtc >= 4) {
4462 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4463 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4464 	}
4465 	if (rdev->num_crtc >= 6) {
4466 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4467 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4468 	}
4469 
4470 	if (rdev->num_crtc >= 2) {
4471 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4472 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4473 	}
4474 	if (rdev->num_crtc >= 4) {
4475 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4476 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4477 	}
4478 	if (rdev->num_crtc >= 6) {
4479 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4480 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4481 	}
4482 
4483 	if (!ASIC_IS_NODCE(rdev)) {
4484 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4485 
4486 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4487 		WREG32(DC_HPD1_INT_CONTROL, tmp);
4488 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4489 		WREG32(DC_HPD2_INT_CONTROL, tmp);
4490 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4491 		WREG32(DC_HPD3_INT_CONTROL, tmp);
4492 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4493 		WREG32(DC_HPD4_INT_CONTROL, tmp);
4494 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4495 		WREG32(DC_HPD5_INT_CONTROL, tmp);
4496 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4497 		WREG32(DC_HPD6_INT_CONTROL, tmp);
4498 	}
4499 }
4500 
4501 static int si_irq_init(struct radeon_device *rdev)
4502 {
4503 	int ret = 0;
4504 	int rb_bufsz;
4505 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4506 
4507 	/* allocate ring */
4508 	ret = r600_ih_ring_alloc(rdev);
4509 	if (ret)
4510 		return ret;
4511 
4512 	/* disable irqs */
4513 	si_disable_interrupts(rdev);
4514 
4515 	/* init rlc */
4516 	ret = si_rlc_resume(rdev);
4517 	if (ret) {
4518 		r600_ih_ring_fini(rdev);
4519 		return ret;
4520 	}
4521 
4522 	/* setup interrupt control */
4523 	/* set dummy read address to ring address */
4524 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4525 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
4526 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4527 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4528 	 */
4529 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4530 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4531 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4532 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
4533 
4534 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4535 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4536 
4537 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4538 		      IH_WPTR_OVERFLOW_CLEAR |
4539 		      (rb_bufsz << 1));
4540 
4541 	if (rdev->wb.enabled)
4542 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4543 
4544 	/* set the writeback address whether it's enabled or not */
4545 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4546 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4547 
4548 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4549 
4550 	/* set rptr, wptr to 0 */
4551 	WREG32(IH_RB_RPTR, 0);
4552 	WREG32(IH_RB_WPTR, 0);
4553 
4554 	/* Default settings for IH_CNTL (disabled at first) */
4555 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4556 	/* RPTR_REARM only works if msi's are enabled */
4557 	if (rdev->msi_enabled)
4558 		ih_cntl |= RPTR_REARM;
4559 	WREG32(IH_CNTL, ih_cntl);
4560 
4561 	/* force the active interrupt state to all disabled */
4562 	si_disable_interrupt_state(rdev);
4563 
4564 	pci_set_master(rdev->pdev);
4565 
4566 	/* enable irqs */
4567 	si_enable_interrupts(rdev);
4568 
4569 	return ret;
4570 }
4571 
4572 int si_irq_set(struct radeon_device *rdev)
4573 {
4574 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4575 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4576 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4577 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4578 	u32 grbm_int_cntl = 0;
4579 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4580 	u32 dma_cntl, dma_cntl1;
4581 
4582 	if (!rdev->irq.installed) {
4583 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4584 		return -EINVAL;
4585 	}
4586 	/* don't enable anything if the ih is disabled */
4587 	if (!rdev->ih.enabled) {
4588 		si_disable_interrupts(rdev);
4589 		/* force the active interrupt state to all disabled */
4590 		si_disable_interrupt_state(rdev);
4591 		return 0;
4592 	}
4593 
4594 	if (!ASIC_IS_NODCE(rdev)) {
4595 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4596 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4597 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4598 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4599 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4600 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4601 	}
4602 
4603 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4604 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4605 
4606 	/* enable CP interrupts on all rings */
4607 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4608 		DRM_DEBUG("si_irq_set: sw int gfx\n");
4609 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4610 	}
4611 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4612 		DRM_DEBUG("si_irq_set: sw int cp1\n");
4613 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4614 	}
4615 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4616 		DRM_DEBUG("si_irq_set: sw int cp2\n");
4617 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4618 	}
4619 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4620 		DRM_DEBUG("si_irq_set: sw int dma\n");
4621 		dma_cntl |= TRAP_ENABLE;
4622 	}
4623 
4624 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4625 		DRM_DEBUG("si_irq_set: sw int dma1\n");
4626 		dma_cntl1 |= TRAP_ENABLE;
4627 	}
4628 	if (rdev->irq.crtc_vblank_int[0] ||
4629 	    atomic_read(&rdev->irq.pflip[0])) {
4630 		DRM_DEBUG("si_irq_set: vblank 0\n");
4631 		crtc1 |= VBLANK_INT_MASK;
4632 	}
4633 	if (rdev->irq.crtc_vblank_int[1] ||
4634 	    atomic_read(&rdev->irq.pflip[1])) {
4635 		DRM_DEBUG("si_irq_set: vblank 1\n");
4636 		crtc2 |= VBLANK_INT_MASK;
4637 	}
4638 	if (rdev->irq.crtc_vblank_int[2] ||
4639 	    atomic_read(&rdev->irq.pflip[2])) {
4640 		DRM_DEBUG("si_irq_set: vblank 2\n");
4641 		crtc3 |= VBLANK_INT_MASK;
4642 	}
4643 	if (rdev->irq.crtc_vblank_int[3] ||
4644 	    atomic_read(&rdev->irq.pflip[3])) {
4645 		DRM_DEBUG("si_irq_set: vblank 3\n");
4646 		crtc4 |= VBLANK_INT_MASK;
4647 	}
4648 	if (rdev->irq.crtc_vblank_int[4] ||
4649 	    atomic_read(&rdev->irq.pflip[4])) {
4650 		DRM_DEBUG("si_irq_set: vblank 4\n");
4651 		crtc5 |= VBLANK_INT_MASK;
4652 	}
4653 	if (rdev->irq.crtc_vblank_int[5] ||
4654 	    atomic_read(&rdev->irq.pflip[5])) {
4655 		DRM_DEBUG("si_irq_set: vblank 5\n");
4656 		crtc6 |= VBLANK_INT_MASK;
4657 	}
4658 	if (rdev->irq.hpd[0]) {
4659 		DRM_DEBUG("si_irq_set: hpd 1\n");
4660 		hpd1 |= DC_HPDx_INT_EN;
4661 	}
4662 	if (rdev->irq.hpd[1]) {
4663 		DRM_DEBUG("si_irq_set: hpd 2\n");
4664 		hpd2 |= DC_HPDx_INT_EN;
4665 	}
4666 	if (rdev->irq.hpd[2]) {
4667 		DRM_DEBUG("si_irq_set: hpd 3\n");
4668 		hpd3 |= DC_HPDx_INT_EN;
4669 	}
4670 	if (rdev->irq.hpd[3]) {
4671 		DRM_DEBUG("si_irq_set: hpd 4\n");
4672 		hpd4 |= DC_HPDx_INT_EN;
4673 	}
4674 	if (rdev->irq.hpd[4]) {
4675 		DRM_DEBUG("si_irq_set: hpd 5\n");
4676 		hpd5 |= DC_HPDx_INT_EN;
4677 	}
4678 	if (rdev->irq.hpd[5]) {
4679 		DRM_DEBUG("si_irq_set: hpd 6\n");
4680 		hpd6 |= DC_HPDx_INT_EN;
4681 	}
4682 
4683 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4684 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4685 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4686 
4687 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4688 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4689 
4690 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4691 
4692 	if (rdev->num_crtc >= 2) {
4693 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4694 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4695 	}
4696 	if (rdev->num_crtc >= 4) {
4697 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4698 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4699 	}
4700 	if (rdev->num_crtc >= 6) {
4701 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4702 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4703 	}
4704 
4705 	if (rdev->num_crtc >= 2) {
4706 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4707 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4708 	}
4709 	if (rdev->num_crtc >= 4) {
4710 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4711 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4712 	}
4713 	if (rdev->num_crtc >= 6) {
4714 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4715 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4716 	}
4717 
4718 	if (!ASIC_IS_NODCE(rdev)) {
4719 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
4720 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
4721 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
4722 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
4723 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
4724 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
4725 	}
4726 
4727 	return 0;
4728 }
4729 
4730 static inline void si_irq_ack(struct radeon_device *rdev)
4731 {
4732 	u32 tmp;
4733 
4734 	if (ASIC_IS_NODCE(rdev))
4735 		return;
4736 
4737 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4738 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4739 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4740 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4741 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4742 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4743 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4744 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4745 	if (rdev->num_crtc >= 4) {
4746 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4747 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4748 	}
4749 	if (rdev->num_crtc >= 6) {
4750 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4751 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4752 	}
4753 
4754 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4755 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4756 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4757 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4758 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4759 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4760 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4761 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4762 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4763 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4764 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4765 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4766 
4767 	if (rdev->num_crtc >= 4) {
4768 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4769 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4770 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4771 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4772 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4773 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4774 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4775 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4776 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4777 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4778 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4779 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4780 	}
4781 
4782 	if (rdev->num_crtc >= 6) {
4783 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4784 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4785 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4786 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4787 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4788 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4789 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4790 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4791 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4792 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4793 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4794 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4795 	}
4796 
4797 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4798 		tmp = RREG32(DC_HPD1_INT_CONTROL);
4799 		tmp |= DC_HPDx_INT_ACK;
4800 		WREG32(DC_HPD1_INT_CONTROL, tmp);
4801 	}
4802 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4803 		tmp = RREG32(DC_HPD2_INT_CONTROL);
4804 		tmp |= DC_HPDx_INT_ACK;
4805 		WREG32(DC_HPD2_INT_CONTROL, tmp);
4806 	}
4807 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4808 		tmp = RREG32(DC_HPD3_INT_CONTROL);
4809 		tmp |= DC_HPDx_INT_ACK;
4810 		WREG32(DC_HPD3_INT_CONTROL, tmp);
4811 	}
4812 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4813 		tmp = RREG32(DC_HPD4_INT_CONTROL);
4814 		tmp |= DC_HPDx_INT_ACK;
4815 		WREG32(DC_HPD4_INT_CONTROL, tmp);
4816 	}
4817 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4818 		tmp = RREG32(DC_HPD5_INT_CONTROL);
4819 		tmp |= DC_HPDx_INT_ACK;
4820 		WREG32(DC_HPD5_INT_CONTROL, tmp);
4821 	}
4822 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4823 		tmp = RREG32(DC_HPD5_INT_CONTROL);
4824 		tmp |= DC_HPDx_INT_ACK;
4825 		WREG32(DC_HPD6_INT_CONTROL, tmp);
4826 	}
4827 }
4828 
4829 static void si_irq_disable(struct radeon_device *rdev)
4830 {
4831 	si_disable_interrupts(rdev);
4832 	/* Wait and acknowledge irq */
4833 	mdelay(1);
4834 	si_irq_ack(rdev);
4835 	si_disable_interrupt_state(rdev);
4836 }
4837 
4838 static void si_irq_suspend(struct radeon_device *rdev)
4839 {
4840 	si_irq_disable(rdev);
4841 	si_rlc_stop(rdev);
4842 }
4843 
4844 static void si_irq_fini(struct radeon_device *rdev)
4845 {
4846 	si_irq_suspend(rdev);
4847 	r600_ih_ring_fini(rdev);
4848 }
4849 
4850 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4851 {
4852 	u32 wptr, tmp;
4853 
4854 	if (rdev->wb.enabled)
4855 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4856 	else
4857 		wptr = RREG32(IH_RB_WPTR);
4858 
4859 	if (wptr & RB_OVERFLOW) {
4860 		/* When a ring buffer overflow happen start parsing interrupt
4861 		 * from the last not overwritten vector (wptr + 16). Hopefully
4862 		 * this should allow us to catchup.
4863 		 */
4864 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4865 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4866 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4867 		tmp = RREG32(IH_RB_CNTL);
4868 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
4869 		WREG32(IH_RB_CNTL, tmp);
4870 	}
4871 	return (wptr & rdev->ih.ptr_mask);
4872 }
4873 
4874 /*        SI IV Ring
4875  * Each IV ring entry is 128 bits:
4876  * [7:0]    - interrupt source id
4877  * [31:8]   - reserved
4878  * [59:32]  - interrupt source data
4879  * [63:60]  - reserved
4880  * [71:64]  - RINGID
4881  * [79:72]  - VMID
4882  * [127:80] - reserved
4883  */
4884 int si_irq_process(struct radeon_device *rdev)
4885 {
4886 	u32 wptr;
4887 	u32 rptr;
4888 	u32 src_id, src_data, ring_id;
4889 	u32 ring_index;
4890 	bool queue_hotplug = false;
4891 
4892 	if (!rdev->ih.enabled || rdev->shutdown)
4893 		return IRQ_NONE;
4894 
4895 	wptr = si_get_ih_wptr(rdev);
4896 
4897 restart_ih:
4898 	/* is somebody else already processing irqs? */
4899 	if (atomic_xchg(&rdev->ih.lock, 1))
4900 		return IRQ_NONE;
4901 
4902 	rptr = rdev->ih.rptr;
4903 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4904 
4905 	/* Order reading of wptr vs. reading of IH ring data */
4906 	rmb();
4907 
4908 	/* display interrupts */
4909 	si_irq_ack(rdev);
4910 
4911 	while (rptr != wptr) {
4912 		/* wptr/rptr are in bytes! */
4913 		ring_index = rptr / 4;
4914 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4915 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4916 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4917 
4918 		switch (src_id) {
4919 		case 1: /* D1 vblank/vline */
4920 			switch (src_data) {
4921 			case 0: /* D1 vblank */
4922 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4923 					if (rdev->irq.crtc_vblank_int[0]) {
4924 						drm_handle_vblank(rdev->ddev, 0);
4925 						rdev->pm.vblank_sync = true;
4926 						wake_up(&rdev->irq.vblank_queue);
4927 					}
4928 					if (atomic_read(&rdev->irq.pflip[0]))
4929 						radeon_crtc_handle_flip(rdev, 0);
4930 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4931 					DRM_DEBUG("IH: D1 vblank\n");
4932 				}
4933 				break;
4934 			case 1: /* D1 vline */
4935 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4936 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4937 					DRM_DEBUG("IH: D1 vline\n");
4938 				}
4939 				break;
4940 			default:
4941 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4942 				break;
4943 			}
4944 			break;
4945 		case 2: /* D2 vblank/vline */
4946 			switch (src_data) {
4947 			case 0: /* D2 vblank */
4948 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4949 					if (rdev->irq.crtc_vblank_int[1]) {
4950 						drm_handle_vblank(rdev->ddev, 1);
4951 						rdev->pm.vblank_sync = true;
4952 						wake_up(&rdev->irq.vblank_queue);
4953 					}
4954 					if (atomic_read(&rdev->irq.pflip[1]))
4955 						radeon_crtc_handle_flip(rdev, 1);
4956 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4957 					DRM_DEBUG("IH: D2 vblank\n");
4958 				}
4959 				break;
4960 			case 1: /* D2 vline */
4961 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4962 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4963 					DRM_DEBUG("IH: D2 vline\n");
4964 				}
4965 				break;
4966 			default:
4967 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4968 				break;
4969 			}
4970 			break;
4971 		case 3: /* D3 vblank/vline */
4972 			switch (src_data) {
4973 			case 0: /* D3 vblank */
4974 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4975 					if (rdev->irq.crtc_vblank_int[2]) {
4976 						drm_handle_vblank(rdev->ddev, 2);
4977 						rdev->pm.vblank_sync = true;
4978 						wake_up(&rdev->irq.vblank_queue);
4979 					}
4980 					if (atomic_read(&rdev->irq.pflip[2]))
4981 						radeon_crtc_handle_flip(rdev, 2);
4982 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4983 					DRM_DEBUG("IH: D3 vblank\n");
4984 				}
4985 				break;
4986 			case 1: /* D3 vline */
4987 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4988 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4989 					DRM_DEBUG("IH: D3 vline\n");
4990 				}
4991 				break;
4992 			default:
4993 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4994 				break;
4995 			}
4996 			break;
4997 		case 4: /* D4 vblank/vline */
4998 			switch (src_data) {
4999 			case 0: /* D4 vblank */
5000 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5001 					if (rdev->irq.crtc_vblank_int[3]) {
5002 						drm_handle_vblank(rdev->ddev, 3);
5003 						rdev->pm.vblank_sync = true;
5004 						wake_up(&rdev->irq.vblank_queue);
5005 					}
5006 					if (atomic_read(&rdev->irq.pflip[3]))
5007 						radeon_crtc_handle_flip(rdev, 3);
5008 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5009 					DRM_DEBUG("IH: D4 vblank\n");
5010 				}
5011 				break;
5012 			case 1: /* D4 vline */
5013 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5014 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5015 					DRM_DEBUG("IH: D4 vline\n");
5016 				}
5017 				break;
5018 			default:
5019 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5020 				break;
5021 			}
5022 			break;
5023 		case 5: /* D5 vblank/vline */
5024 			switch (src_data) {
5025 			case 0: /* D5 vblank */
5026 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5027 					if (rdev->irq.crtc_vblank_int[4]) {
5028 						drm_handle_vblank(rdev->ddev, 4);
5029 						rdev->pm.vblank_sync = true;
5030 						wake_up(&rdev->irq.vblank_queue);
5031 					}
5032 					if (atomic_read(&rdev->irq.pflip[4]))
5033 						radeon_crtc_handle_flip(rdev, 4);
5034 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5035 					DRM_DEBUG("IH: D5 vblank\n");
5036 				}
5037 				break;
5038 			case 1: /* D5 vline */
5039 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5040 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5041 					DRM_DEBUG("IH: D5 vline\n");
5042 				}
5043 				break;
5044 			default:
5045 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5046 				break;
5047 			}
5048 			break;
5049 		case 6: /* D6 vblank/vline */
5050 			switch (src_data) {
5051 			case 0: /* D6 vblank */
5052 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5053 					if (rdev->irq.crtc_vblank_int[5]) {
5054 						drm_handle_vblank(rdev->ddev, 5);
5055 						rdev->pm.vblank_sync = true;
5056 						wake_up(&rdev->irq.vblank_queue);
5057 					}
5058 					if (atomic_read(&rdev->irq.pflip[5]))
5059 						radeon_crtc_handle_flip(rdev, 5);
5060 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5061 					DRM_DEBUG("IH: D6 vblank\n");
5062 				}
5063 				break;
5064 			case 1: /* D6 vline */
5065 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5066 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5067 					DRM_DEBUG("IH: D6 vline\n");
5068 				}
5069 				break;
5070 			default:
5071 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5072 				break;
5073 			}
5074 			break;
5075 		case 42: /* HPD hotplug */
5076 			switch (src_data) {
5077 			case 0:
5078 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5079 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5080 					queue_hotplug = true;
5081 					DRM_DEBUG("IH: HPD1\n");
5082 				}
5083 				break;
5084 			case 1:
5085 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5086 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5087 					queue_hotplug = true;
5088 					DRM_DEBUG("IH: HPD2\n");
5089 				}
5090 				break;
5091 			case 2:
5092 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5093 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5094 					queue_hotplug = true;
5095 					DRM_DEBUG("IH: HPD3\n");
5096 				}
5097 				break;
5098 			case 3:
5099 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5100 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5101 					queue_hotplug = true;
5102 					DRM_DEBUG("IH: HPD4\n");
5103 				}
5104 				break;
5105 			case 4:
5106 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5107 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5108 					queue_hotplug = true;
5109 					DRM_DEBUG("IH: HPD5\n");
5110 				}
5111 				break;
5112 			case 5:
5113 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5114 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5115 					queue_hotplug = true;
5116 					DRM_DEBUG("IH: HPD6\n");
5117 				}
5118 				break;
5119 			default:
5120 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5121 				break;
5122 			}
5123 			break;
5124 		case 146:
5125 		case 147:
5126 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5127 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5128 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5129 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5130 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5131 			/* reset addr and status */
5132 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5133 			break;
5134 		case 176: /* RINGID0 CP_INT */
5135 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5136 			break;
5137 		case 177: /* RINGID1 CP_INT */
5138 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5139 			break;
5140 		case 178: /* RINGID2 CP_INT */
5141 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5142 			break;
5143 		case 181: /* CP EOP event */
5144 			DRM_DEBUG("IH: CP EOP\n");
5145 			switch (ring_id) {
5146 			case 0:
5147 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5148 				break;
5149 			case 1:
5150 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5151 				break;
5152 			case 2:
5153 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5154 				break;
5155 			}
5156 			break;
5157 		case 224: /* DMA trap event */
5158 			DRM_DEBUG("IH: DMA trap\n");
5159 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5160 			break;
5161 		case 233: /* GUI IDLE */
5162 			DRM_DEBUG("IH: GUI idle\n");
5163 			break;
5164 		case 244: /* DMA trap event */
5165 			DRM_DEBUG("IH: DMA1 trap\n");
5166 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5167 			break;
5168 		default:
5169 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5170 			break;
5171 		}
5172 
5173 		/* wptr/rptr are in bytes! */
5174 		rptr += 16;
5175 		rptr &= rdev->ih.ptr_mask;
5176 	}
5177 	if (queue_hotplug)
5178 		schedule_work(&rdev->hotplug_work);
5179 	rdev->ih.rptr = rptr;
5180 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
5181 	atomic_set(&rdev->ih.lock, 0);
5182 
5183 	/* make sure wptr hasn't changed while processing */
5184 	wptr = si_get_ih_wptr(rdev);
5185 	if (wptr != rptr)
5186 		goto restart_ih;
5187 
5188 	return IRQ_HANDLED;
5189 }
5190 
5191 /**
5192  * si_copy_dma - copy pages using the DMA engine
5193  *
5194  * @rdev: radeon_device pointer
5195  * @src_offset: src GPU address
5196  * @dst_offset: dst GPU address
5197  * @num_gpu_pages: number of GPU pages to xfer
5198  * @fence: radeon fence object
5199  *
5200  * Copy GPU paging using the DMA engine (SI).
5201  * Used by the radeon ttm implementation to move pages if
5202  * registered as the asic copy callback.
5203  */
5204 int si_copy_dma(struct radeon_device *rdev,
5205 		uint64_t src_offset, uint64_t dst_offset,
5206 		unsigned num_gpu_pages,
5207 		struct radeon_fence **fence)
5208 {
5209 	struct radeon_semaphore *sem = NULL;
5210 	int ring_index = rdev->asic->copy.dma_ring_index;
5211 	struct radeon_ring *ring = &rdev->ring[ring_index];
5212 	u32 size_in_bytes, cur_size_in_bytes;
5213 	int i, num_loops;
5214 	int r = 0;
5215 
5216 	r = radeon_semaphore_create(rdev, &sem);
5217 	if (r) {
5218 		DRM_ERROR("radeon: moving bo (%d).\n", r);
5219 		return r;
5220 	}
5221 
5222 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5223 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5224 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5225 	if (r) {
5226 		DRM_ERROR("radeon: moving bo (%d).\n", r);
5227 		radeon_semaphore_free(rdev, &sem, NULL);
5228 		return r;
5229 	}
5230 
5231 	if (radeon_fence_need_sync(*fence, ring->idx)) {
5232 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5233 					    ring->idx);
5234 		radeon_fence_note_sync(*fence, ring->idx);
5235 	} else {
5236 		radeon_semaphore_free(rdev, &sem, NULL);
5237 	}
5238 
5239 	for (i = 0; i < num_loops; i++) {
5240 		cur_size_in_bytes = size_in_bytes;
5241 		if (cur_size_in_bytes > 0xFFFFF)
5242 			cur_size_in_bytes = 0xFFFFF;
5243 		size_in_bytes -= cur_size_in_bytes;
5244 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5245 		radeon_ring_write(ring, dst_offset & 0xffffffff);
5246 		radeon_ring_write(ring, src_offset & 0xffffffff);
5247 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5248 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5249 		src_offset += cur_size_in_bytes;
5250 		dst_offset += cur_size_in_bytes;
5251 	}
5252 
5253 	r = radeon_fence_emit(rdev, fence, ring->idx);
5254 	if (r) {
5255 		radeon_ring_unlock_undo(rdev, ring);
5256 		return r;
5257 	}
5258 
5259 	radeon_ring_unlock_commit(rdev, ring);
5260 	radeon_semaphore_free(rdev, &sem, *fence);
5261 
5262 	return r;
5263 }
5264 
5265 /*
5266  * startup/shutdown callbacks
5267  */
5268 static int si_startup(struct radeon_device *rdev)
5269 {
5270 	struct radeon_ring *ring;
5271 	int r;
5272 
5273 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5274 	    !rdev->rlc_fw || !rdev->mc_fw) {
5275 		r = si_init_microcode(rdev);
5276 		if (r) {
5277 			DRM_ERROR("Failed to load firmware!\n");
5278 			return r;
5279 		}
5280 	}
5281 
5282 	r = si_mc_load_microcode(rdev);
5283 	if (r) {
5284 		DRM_ERROR("Failed to load MC firmware!\n");
5285 		return r;
5286 	}
5287 
5288 	r = r600_vram_scratch_init(rdev);
5289 	if (r)
5290 		return r;
5291 
5292 	si_mc_program(rdev);
5293 	r = si_pcie_gart_enable(rdev);
5294 	if (r)
5295 		return r;
5296 	si_gpu_init(rdev);
5297 
5298 	/* allocate rlc buffers */
5299 	r = si_rlc_init(rdev);
5300 	if (r) {
5301 		DRM_ERROR("Failed to init rlc BOs!\n");
5302 		return r;
5303 	}
5304 
5305 	/* allocate wb buffer */
5306 	r = radeon_wb_init(rdev);
5307 	if (r)
5308 		return r;
5309 
5310 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5311 	if (r) {
5312 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5313 		return r;
5314 	}
5315 
5316 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5317 	if (r) {
5318 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5319 		return r;
5320 	}
5321 
5322 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5323 	if (r) {
5324 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5325 		return r;
5326 	}
5327 
5328 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5329 	if (r) {
5330 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5331 		return r;
5332 	}
5333 
5334 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5335 	if (r) {
5336 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5337 		return r;
5338 	}
5339 
5340 	if (rdev->has_uvd) {
5341 		r = rv770_uvd_resume(rdev);
5342 		if (!r) {
5343 			r = radeon_fence_driver_start_ring(rdev,
5344 							   R600_RING_TYPE_UVD_INDEX);
5345 			if (r)
5346 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5347 		}
5348 		if (r)
5349 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5350 	}
5351 
5352 	/* Enable IRQ */
5353 	if (!rdev->irq.installed) {
5354 		r = radeon_irq_kms_init(rdev);
5355 		if (r)
5356 			return r;
5357 	}
5358 
5359 	r = si_irq_init(rdev);
5360 	if (r) {
5361 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
5362 		radeon_irq_kms_fini(rdev);
5363 		return r;
5364 	}
5365 	si_irq_set(rdev);
5366 
5367 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5368 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5369 			     CP_RB0_RPTR, CP_RB0_WPTR,
5370 			     0, 0xfffff, RADEON_CP_PACKET2);
5371 	if (r)
5372 		return r;
5373 
5374 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5375 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5376 			     CP_RB1_RPTR, CP_RB1_WPTR,
5377 			     0, 0xfffff, RADEON_CP_PACKET2);
5378 	if (r)
5379 		return r;
5380 
5381 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5382 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5383 			     CP_RB2_RPTR, CP_RB2_WPTR,
5384 			     0, 0xfffff, RADEON_CP_PACKET2);
5385 	if (r)
5386 		return r;
5387 
5388 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5389 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5390 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5391 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5392 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5393 	if (r)
5394 		return r;
5395 
5396 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5397 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5398 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5399 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5400 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5401 	if (r)
5402 		return r;
5403 
5404 	r = si_cp_load_microcode(rdev);
5405 	if (r)
5406 		return r;
5407 	r = si_cp_resume(rdev);
5408 	if (r)
5409 		return r;
5410 
5411 	r = cayman_dma_resume(rdev);
5412 	if (r)
5413 		return r;
5414 
5415 	if (rdev->has_uvd) {
5416 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5417 		if (ring->ring_size) {
5418 			r = radeon_ring_init(rdev, ring, ring->ring_size,
5419 					     R600_WB_UVD_RPTR_OFFSET,
5420 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5421 					     0, 0xfffff, RADEON_CP_PACKET2);
5422 			if (!r)
5423 				r = r600_uvd_init(rdev);
5424 			if (r)
5425 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5426 		}
5427 	}
5428 
5429 	r = radeon_ib_pool_init(rdev);
5430 	if (r) {
5431 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5432 		return r;
5433 	}
5434 
5435 	r = radeon_vm_manager_init(rdev);
5436 	if (r) {
5437 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5438 		return r;
5439 	}
5440 
5441 	return 0;
5442 }
5443 
5444 int si_resume(struct radeon_device *rdev)
5445 {
5446 	int r;
5447 
5448 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5449 	 * posting will perform necessary task to bring back GPU into good
5450 	 * shape.
5451 	 */
5452 	/* post card */
5453 	atom_asic_init(rdev->mode_info.atom_context);
5454 
5455 	/* init golden registers */
5456 	si_init_golden_registers(rdev);
5457 
5458 	rdev->accel_working = true;
5459 	r = si_startup(rdev);
5460 	if (r) {
5461 		DRM_ERROR("si startup failed on resume\n");
5462 		rdev->accel_working = false;
5463 		return r;
5464 	}
5465 
5466 	return r;
5467 
5468 }
5469 
5470 int si_suspend(struct radeon_device *rdev)
5471 {
5472 	radeon_vm_manager_fini(rdev);
5473 	si_cp_enable(rdev, false);
5474 	cayman_dma_stop(rdev);
5475 	if (rdev->has_uvd) {
5476 		r600_uvd_rbc_stop(rdev);
5477 		radeon_uvd_suspend(rdev);
5478 	}
5479 	si_irq_suspend(rdev);
5480 	radeon_wb_disable(rdev);
5481 	si_pcie_gart_disable(rdev);
5482 	return 0;
5483 }
5484 
5485 /* Plan is to move initialization in that function and use
5486  * helper function so that radeon_device_init pretty much
5487  * do nothing more than calling asic specific function. This
5488  * should also allow to remove a bunch of callback function
5489  * like vram_info.
5490  */
5491 int si_init(struct radeon_device *rdev)
5492 {
5493 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5494 	int r;
5495 
5496 	/* Read BIOS */
5497 	if (!radeon_get_bios(rdev)) {
5498 		if (ASIC_IS_AVIVO(rdev))
5499 			return -EINVAL;
5500 	}
5501 	/* Must be an ATOMBIOS */
5502 	if (!rdev->is_atom_bios) {
5503 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5504 		return -EINVAL;
5505 	}
5506 	r = radeon_atombios_init(rdev);
5507 	if (r)
5508 		return r;
5509 
5510 	/* Post card if necessary */
5511 	if (!radeon_card_posted(rdev)) {
5512 		if (!rdev->bios) {
5513 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5514 			return -EINVAL;
5515 		}
5516 		DRM_INFO("GPU not posted. posting now...\n");
5517 		atom_asic_init(rdev->mode_info.atom_context);
5518 	}
5519 	/* init golden registers */
5520 	si_init_golden_registers(rdev);
5521 	/* Initialize scratch registers */
5522 	si_scratch_init(rdev);
5523 	/* Initialize surface registers */
5524 	radeon_surface_init(rdev);
5525 	/* Initialize clocks */
5526 	radeon_get_clock_info(rdev->ddev);
5527 
5528 	/* Fence driver */
5529 	r = radeon_fence_driver_init(rdev);
5530 	if (r)
5531 		return r;
5532 
5533 	/* initialize memory controller */
5534 	r = si_mc_init(rdev);
5535 	if (r)
5536 		return r;
5537 	/* Memory manager */
5538 	r = radeon_bo_init(rdev);
5539 	if (r)
5540 		return r;
5541 
5542 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5543 	ring->ring_obj = NULL;
5544 	r600_ring_init(rdev, ring, 1024 * 1024);
5545 
5546 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5547 	ring->ring_obj = NULL;
5548 	r600_ring_init(rdev, ring, 1024 * 1024);
5549 
5550 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5551 	ring->ring_obj = NULL;
5552 	r600_ring_init(rdev, ring, 1024 * 1024);
5553 
5554 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5555 	ring->ring_obj = NULL;
5556 	r600_ring_init(rdev, ring, 64 * 1024);
5557 
5558 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5559 	ring->ring_obj = NULL;
5560 	r600_ring_init(rdev, ring, 64 * 1024);
5561 
5562 	if (rdev->has_uvd) {
5563 		r = radeon_uvd_init(rdev);
5564 		if (!r) {
5565 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5566 			ring->ring_obj = NULL;
5567 			r600_ring_init(rdev, ring, 4096);
5568 		}
5569 	}
5570 
5571 	rdev->ih.ring_obj = NULL;
5572 	r600_ih_ring_init(rdev, 64 * 1024);
5573 
5574 	r = r600_pcie_gart_init(rdev);
5575 	if (r)
5576 		return r;
5577 
5578 	rdev->accel_working = true;
5579 	r = si_startup(rdev);
5580 	if (r) {
5581 		dev_err(rdev->dev, "disabling GPU acceleration\n");
5582 		si_cp_fini(rdev);
5583 		cayman_dma_fini(rdev);
5584 		si_irq_fini(rdev);
5585 		si_rlc_fini(rdev);
5586 		radeon_wb_fini(rdev);
5587 		radeon_ib_pool_fini(rdev);
5588 		radeon_vm_manager_fini(rdev);
5589 		radeon_irq_kms_fini(rdev);
5590 		si_pcie_gart_fini(rdev);
5591 		rdev->accel_working = false;
5592 	}
5593 
5594 	/* Don't start up if the MC ucode is missing.
5595 	 * The default clocks and voltages before the MC ucode
5596 	 * is loaded are not suffient for advanced operations.
5597 	 */
5598 	if (!rdev->mc_fw) {
5599 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
5600 		return -EINVAL;
5601 	}
5602 
5603 	return 0;
5604 }
5605 
5606 void si_fini(struct radeon_device *rdev)
5607 {
5608 	si_cp_fini(rdev);
5609 	cayman_dma_fini(rdev);
5610 	si_irq_fini(rdev);
5611 	si_rlc_fini(rdev);
5612 	radeon_wb_fini(rdev);
5613 	radeon_vm_manager_fini(rdev);
5614 	radeon_ib_pool_fini(rdev);
5615 	radeon_irq_kms_fini(rdev);
5616 	if (rdev->has_uvd)
5617 		radeon_uvd_fini(rdev);
5618 	si_pcie_gart_fini(rdev);
5619 	r600_vram_scratch_fini(rdev);
5620 	radeon_gem_fini(rdev);
5621 	radeon_fence_driver_fini(rdev);
5622 	radeon_bo_fini(rdev);
5623 	radeon_atombios_fini(rdev);
5624 	kfree(rdev->bios);
5625 	rdev->bios = NULL;
5626 }
5627 
5628 /**
5629  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5630  *
5631  * @rdev: radeon_device pointer
5632  *
5633  * Fetches a GPU clock counter snapshot (SI).
5634  * Returns the 64 bit clock counter snapshot.
5635  */
5636 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5637 {
5638 	uint64_t clock;
5639 
5640 	mutex_lock(&rdev->gpu_clock_mutex);
5641 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5642 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5643 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5644 	mutex_unlock(&rdev->gpu_clock_mutex);
5645 	return clock;
5646 }
5647 
5648 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5649 {
5650 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5651 	int r;
5652 
5653 	/* bypass vclk and dclk with bclk */
5654 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5655 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5656 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5657 
5658 	/* put PLL in bypass mode */
5659 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5660 
5661 	if (!vclk || !dclk) {
5662 		/* keep the Bypass mode, put PLL to sleep */
5663 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5664 		return 0;
5665 	}
5666 
5667 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5668 					  16384, 0x03FFFFFF, 0, 128, 5,
5669 					  &fb_div, &vclk_div, &dclk_div);
5670 	if (r)
5671 		return r;
5672 
5673 	/* set RESET_ANTI_MUX to 0 */
5674 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5675 
5676 	/* set VCO_MODE to 1 */
5677 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5678 
5679 	/* toggle UPLL_SLEEP to 1 then back to 0 */
5680 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5681 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5682 
5683 	/* deassert UPLL_RESET */
5684 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5685 
5686 	mdelay(1);
5687 
5688 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5689 	if (r)
5690 		return r;
5691 
5692 	/* assert UPLL_RESET again */
5693 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5694 
5695 	/* disable spread spectrum. */
5696 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5697 
5698 	/* set feedback divider */
5699 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5700 
5701 	/* set ref divider to 0 */
5702 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5703 
5704 	if (fb_div < 307200)
5705 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5706 	else
5707 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5708 
5709 	/* set PDIV_A and PDIV_B */
5710 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5711 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5712 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5713 
5714 	/* give the PLL some time to settle */
5715 	mdelay(15);
5716 
5717 	/* deassert PLL_RESET */
5718 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5719 
5720 	mdelay(15);
5721 
5722 	/* switch from bypass mode to normal mode */
5723 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5724 
5725 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5726 	if (r)
5727 		return r;
5728 
5729 	/* switch VCLK and DCLK selection */
5730 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5731 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5732 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5733 
5734 	mdelay(100);
5735 
5736 	return 0;
5737 }
5738