xref: /openbmc/linux/drivers/gpu/drm/radeon/si.c (revision ca79522c)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42 
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 
64 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
65 extern void r600_ih_ring_fini(struct radeon_device *rdev);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72 
73 static const u32 tahiti_golden_rlc_registers[] =
74 {
75 	0xc424, 0xffffffff, 0x00601005,
76 	0xc47c, 0xffffffff, 0x10104040,
77 	0xc488, 0xffffffff, 0x0100000a,
78 	0xc314, 0xffffffff, 0x00000800,
79 	0xc30c, 0xffffffff, 0x800000f4,
80 	0xf4a8, 0xffffffff, 0x00000000
81 };
82 
83 static const u32 tahiti_golden_registers[] =
84 {
85 	0x9a10, 0x00010000, 0x00018208,
86 	0x9830, 0xffffffff, 0x00000000,
87 	0x9834, 0xf00fffff, 0x00000400,
88 	0x9838, 0x0002021c, 0x00020200,
89 	0xc78, 0x00000080, 0x00000000,
90 	0xd030, 0x000300c0, 0x00800040,
91 	0xd830, 0x000300c0, 0x00800040,
92 	0x5bb0, 0x000000f0, 0x00000070,
93 	0x5bc0, 0x00200000, 0x50100000,
94 	0x7030, 0x31000311, 0x00000011,
95 	0x277c, 0x00000003, 0x000007ff,
96 	0x240c, 0x000007ff, 0x00000000,
97 	0x8a14, 0xf000001f, 0x00000007,
98 	0x8b24, 0xffffffff, 0x00ffffff,
99 	0x8b10, 0x0000ff0f, 0x00000000,
100 	0x28a4c, 0x07ffffff, 0x4e000000,
101 	0x28350, 0x3f3f3fff, 0x2a00126a,
102 	0x30, 0x000000ff, 0x0040,
103 	0x34, 0x00000040, 0x00004040,
104 	0x9100, 0x07ffffff, 0x03000000,
105 	0x8e88, 0x01ff1f3f, 0x00000000,
106 	0x8e84, 0x01ff1f3f, 0x00000000,
107 	0x9060, 0x0000007f, 0x00000020,
108 	0x9508, 0x00010000, 0x00010000,
109 	0xac14, 0x00000200, 0x000002fb,
110 	0xac10, 0xffffffff, 0x0000543b,
111 	0xac0c, 0xffffffff, 0xa9210876,
112 	0x88d0, 0xffffffff, 0x000fff40,
113 	0x88d4, 0x0000001f, 0x00000010,
114 	0x1410, 0x20000000, 0x20fffed8,
115 	0x15c0, 0x000c0fc0, 0x000c0400
116 };
117 
118 static const u32 tahiti_golden_registers2[] =
119 {
120 	0xc64, 0x00000001, 0x00000001
121 };
122 
123 static const u32 pitcairn_golden_rlc_registers[] =
124 {
125 	0xc424, 0xffffffff, 0x00601004,
126 	0xc47c, 0xffffffff, 0x10102020,
127 	0xc488, 0xffffffff, 0x01000020,
128 	0xc314, 0xffffffff, 0x00000800,
129 	0xc30c, 0xffffffff, 0x800000a4
130 };
131 
132 static const u32 pitcairn_golden_registers[] =
133 {
134 	0x9a10, 0x00010000, 0x00018208,
135 	0x9830, 0xffffffff, 0x00000000,
136 	0x9834, 0xf00fffff, 0x00000400,
137 	0x9838, 0x0002021c, 0x00020200,
138 	0xc78, 0x00000080, 0x00000000,
139 	0xd030, 0x000300c0, 0x00800040,
140 	0xd830, 0x000300c0, 0x00800040,
141 	0x5bb0, 0x000000f0, 0x00000070,
142 	0x5bc0, 0x00200000, 0x50100000,
143 	0x7030, 0x31000311, 0x00000011,
144 	0x2ae4, 0x00073ffe, 0x000022a2,
145 	0x240c, 0x000007ff, 0x00000000,
146 	0x8a14, 0xf000001f, 0x00000007,
147 	0x8b24, 0xffffffff, 0x00ffffff,
148 	0x8b10, 0x0000ff0f, 0x00000000,
149 	0x28a4c, 0x07ffffff, 0x4e000000,
150 	0x28350, 0x3f3f3fff, 0x2a00126a,
151 	0x30, 0x000000ff, 0x0040,
152 	0x34, 0x00000040, 0x00004040,
153 	0x9100, 0x07ffffff, 0x03000000,
154 	0x9060, 0x0000007f, 0x00000020,
155 	0x9508, 0x00010000, 0x00010000,
156 	0xac14, 0x000003ff, 0x000000f7,
157 	0xac10, 0xffffffff, 0x00000000,
158 	0xac0c, 0xffffffff, 0x32761054,
159 	0x88d4, 0x0000001f, 0x00000010,
160 	0x15c0, 0x000c0fc0, 0x000c0400
161 };
162 
163 static const u32 verde_golden_rlc_registers[] =
164 {
165 	0xc424, 0xffffffff, 0x033f1005,
166 	0xc47c, 0xffffffff, 0x10808020,
167 	0xc488, 0xffffffff, 0x00800008,
168 	0xc314, 0xffffffff, 0x00001000,
169 	0xc30c, 0xffffffff, 0x80010014
170 };
171 
172 static const u32 verde_golden_registers[] =
173 {
174 	0x9a10, 0x00010000, 0x00018208,
175 	0x9830, 0xffffffff, 0x00000000,
176 	0x9834, 0xf00fffff, 0x00000400,
177 	0x9838, 0x0002021c, 0x00020200,
178 	0xc78, 0x00000080, 0x00000000,
179 	0xd030, 0x000300c0, 0x00800040,
180 	0xd030, 0x000300c0, 0x00800040,
181 	0xd830, 0x000300c0, 0x00800040,
182 	0xd830, 0x000300c0, 0x00800040,
183 	0x5bb0, 0x000000f0, 0x00000070,
184 	0x5bc0, 0x00200000, 0x50100000,
185 	0x7030, 0x31000311, 0x00000011,
186 	0x2ae4, 0x00073ffe, 0x000022a2,
187 	0x2ae4, 0x00073ffe, 0x000022a2,
188 	0x2ae4, 0x00073ffe, 0x000022a2,
189 	0x240c, 0x000007ff, 0x00000000,
190 	0x240c, 0x000007ff, 0x00000000,
191 	0x240c, 0x000007ff, 0x00000000,
192 	0x8a14, 0xf000001f, 0x00000007,
193 	0x8a14, 0xf000001f, 0x00000007,
194 	0x8a14, 0xf000001f, 0x00000007,
195 	0x8b24, 0xffffffff, 0x00ffffff,
196 	0x8b10, 0x0000ff0f, 0x00000000,
197 	0x28a4c, 0x07ffffff, 0x4e000000,
198 	0x28350, 0x3f3f3fff, 0x0000124a,
199 	0x28350, 0x3f3f3fff, 0x0000124a,
200 	0x28350, 0x3f3f3fff, 0x0000124a,
201 	0x30, 0x000000ff, 0x0040,
202 	0x34, 0x00000040, 0x00004040,
203 	0x9100, 0x07ffffff, 0x03000000,
204 	0x9100, 0x07ffffff, 0x03000000,
205 	0x8e88, 0x01ff1f3f, 0x00000000,
206 	0x8e88, 0x01ff1f3f, 0x00000000,
207 	0x8e88, 0x01ff1f3f, 0x00000000,
208 	0x8e84, 0x01ff1f3f, 0x00000000,
209 	0x8e84, 0x01ff1f3f, 0x00000000,
210 	0x8e84, 0x01ff1f3f, 0x00000000,
211 	0x9060, 0x0000007f, 0x00000020,
212 	0x9508, 0x00010000, 0x00010000,
213 	0xac14, 0x000003ff, 0x00000003,
214 	0xac14, 0x000003ff, 0x00000003,
215 	0xac14, 0x000003ff, 0x00000003,
216 	0xac10, 0xffffffff, 0x00000000,
217 	0xac10, 0xffffffff, 0x00000000,
218 	0xac10, 0xffffffff, 0x00000000,
219 	0xac0c, 0xffffffff, 0x00001032,
220 	0xac0c, 0xffffffff, 0x00001032,
221 	0xac0c, 0xffffffff, 0x00001032,
222 	0x88d4, 0x0000001f, 0x00000010,
223 	0x88d4, 0x0000001f, 0x00000010,
224 	0x88d4, 0x0000001f, 0x00000010,
225 	0x15c0, 0x000c0fc0, 0x000c0400
226 };
227 
228 static const u32 oland_golden_rlc_registers[] =
229 {
230 	0xc424, 0xffffffff, 0x00601005,
231 	0xc47c, 0xffffffff, 0x10104040,
232 	0xc488, 0xffffffff, 0x0100000a,
233 	0xc314, 0xffffffff, 0x00000800,
234 	0xc30c, 0xffffffff, 0x800000f4
235 };
236 
237 static const u32 oland_golden_registers[] =
238 {
239 	0x9a10, 0x00010000, 0x00018208,
240 	0x9830, 0xffffffff, 0x00000000,
241 	0x9834, 0xf00fffff, 0x00000400,
242 	0x9838, 0x0002021c, 0x00020200,
243 	0xc78, 0x00000080, 0x00000000,
244 	0xd030, 0x000300c0, 0x00800040,
245 	0xd830, 0x000300c0, 0x00800040,
246 	0x5bb0, 0x000000f0, 0x00000070,
247 	0x5bc0, 0x00200000, 0x50100000,
248 	0x7030, 0x31000311, 0x00000011,
249 	0x2ae4, 0x00073ffe, 0x000022a2,
250 	0x240c, 0x000007ff, 0x00000000,
251 	0x8a14, 0xf000001f, 0x00000007,
252 	0x8b24, 0xffffffff, 0x00ffffff,
253 	0x8b10, 0x0000ff0f, 0x00000000,
254 	0x28a4c, 0x07ffffff, 0x4e000000,
255 	0x28350, 0x3f3f3fff, 0x00000082,
256 	0x30, 0x000000ff, 0x0040,
257 	0x34, 0x00000040, 0x00004040,
258 	0x9100, 0x07ffffff, 0x03000000,
259 	0x9060, 0x0000007f, 0x00000020,
260 	0x9508, 0x00010000, 0x00010000,
261 	0xac14, 0x000003ff, 0x000000f3,
262 	0xac10, 0xffffffff, 0x00000000,
263 	0xac0c, 0xffffffff, 0x00003210,
264 	0x88d4, 0x0000001f, 0x00000010,
265 	0x15c0, 0x000c0fc0, 0x000c0400
266 };
267 
268 static const u32 tahiti_mgcg_cgcg_init[] =
269 {
270 	0xc400, 0xffffffff, 0xfffffffc,
271 	0x802c, 0xffffffff, 0xe0000000,
272 	0x9a60, 0xffffffff, 0x00000100,
273 	0x92a4, 0xffffffff, 0x00000100,
274 	0xc164, 0xffffffff, 0x00000100,
275 	0x9774, 0xffffffff, 0x00000100,
276 	0x8984, 0xffffffff, 0x06000100,
277 	0x8a18, 0xffffffff, 0x00000100,
278 	0x92a0, 0xffffffff, 0x00000100,
279 	0xc380, 0xffffffff, 0x00000100,
280 	0x8b28, 0xffffffff, 0x00000100,
281 	0x9144, 0xffffffff, 0x00000100,
282 	0x8d88, 0xffffffff, 0x00000100,
283 	0x8d8c, 0xffffffff, 0x00000100,
284 	0x9030, 0xffffffff, 0x00000100,
285 	0x9034, 0xffffffff, 0x00000100,
286 	0x9038, 0xffffffff, 0x00000100,
287 	0x903c, 0xffffffff, 0x00000100,
288 	0xad80, 0xffffffff, 0x00000100,
289 	0xac54, 0xffffffff, 0x00000100,
290 	0x897c, 0xffffffff, 0x06000100,
291 	0x9868, 0xffffffff, 0x00000100,
292 	0x9510, 0xffffffff, 0x00000100,
293 	0xaf04, 0xffffffff, 0x00000100,
294 	0xae04, 0xffffffff, 0x00000100,
295 	0x949c, 0xffffffff, 0x00000100,
296 	0x802c, 0xffffffff, 0xe0000000,
297 	0x9160, 0xffffffff, 0x00010000,
298 	0x9164, 0xffffffff, 0x00030002,
299 	0x9168, 0xffffffff, 0x00040007,
300 	0x916c, 0xffffffff, 0x00060005,
301 	0x9170, 0xffffffff, 0x00090008,
302 	0x9174, 0xffffffff, 0x00020001,
303 	0x9178, 0xffffffff, 0x00040003,
304 	0x917c, 0xffffffff, 0x00000007,
305 	0x9180, 0xffffffff, 0x00060005,
306 	0x9184, 0xffffffff, 0x00090008,
307 	0x9188, 0xffffffff, 0x00030002,
308 	0x918c, 0xffffffff, 0x00050004,
309 	0x9190, 0xffffffff, 0x00000008,
310 	0x9194, 0xffffffff, 0x00070006,
311 	0x9198, 0xffffffff, 0x000a0009,
312 	0x919c, 0xffffffff, 0x00040003,
313 	0x91a0, 0xffffffff, 0x00060005,
314 	0x91a4, 0xffffffff, 0x00000009,
315 	0x91a8, 0xffffffff, 0x00080007,
316 	0x91ac, 0xffffffff, 0x000b000a,
317 	0x91b0, 0xffffffff, 0x00050004,
318 	0x91b4, 0xffffffff, 0x00070006,
319 	0x91b8, 0xffffffff, 0x0008000b,
320 	0x91bc, 0xffffffff, 0x000a0009,
321 	0x91c0, 0xffffffff, 0x000d000c,
322 	0x91c4, 0xffffffff, 0x00060005,
323 	0x91c8, 0xffffffff, 0x00080007,
324 	0x91cc, 0xffffffff, 0x0000000b,
325 	0x91d0, 0xffffffff, 0x000a0009,
326 	0x91d4, 0xffffffff, 0x000d000c,
327 	0x91d8, 0xffffffff, 0x00070006,
328 	0x91dc, 0xffffffff, 0x00090008,
329 	0x91e0, 0xffffffff, 0x0000000c,
330 	0x91e4, 0xffffffff, 0x000b000a,
331 	0x91e8, 0xffffffff, 0x000e000d,
332 	0x91ec, 0xffffffff, 0x00080007,
333 	0x91f0, 0xffffffff, 0x000a0009,
334 	0x91f4, 0xffffffff, 0x0000000d,
335 	0x91f8, 0xffffffff, 0x000c000b,
336 	0x91fc, 0xffffffff, 0x000f000e,
337 	0x9200, 0xffffffff, 0x00090008,
338 	0x9204, 0xffffffff, 0x000b000a,
339 	0x9208, 0xffffffff, 0x000c000f,
340 	0x920c, 0xffffffff, 0x000e000d,
341 	0x9210, 0xffffffff, 0x00110010,
342 	0x9214, 0xffffffff, 0x000a0009,
343 	0x9218, 0xffffffff, 0x000c000b,
344 	0x921c, 0xffffffff, 0x0000000f,
345 	0x9220, 0xffffffff, 0x000e000d,
346 	0x9224, 0xffffffff, 0x00110010,
347 	0x9228, 0xffffffff, 0x000b000a,
348 	0x922c, 0xffffffff, 0x000d000c,
349 	0x9230, 0xffffffff, 0x00000010,
350 	0x9234, 0xffffffff, 0x000f000e,
351 	0x9238, 0xffffffff, 0x00120011,
352 	0x923c, 0xffffffff, 0x000c000b,
353 	0x9240, 0xffffffff, 0x000e000d,
354 	0x9244, 0xffffffff, 0x00000011,
355 	0x9248, 0xffffffff, 0x0010000f,
356 	0x924c, 0xffffffff, 0x00130012,
357 	0x9250, 0xffffffff, 0x000d000c,
358 	0x9254, 0xffffffff, 0x000f000e,
359 	0x9258, 0xffffffff, 0x00100013,
360 	0x925c, 0xffffffff, 0x00120011,
361 	0x9260, 0xffffffff, 0x00150014,
362 	0x9264, 0xffffffff, 0x000e000d,
363 	0x9268, 0xffffffff, 0x0010000f,
364 	0x926c, 0xffffffff, 0x00000013,
365 	0x9270, 0xffffffff, 0x00120011,
366 	0x9274, 0xffffffff, 0x00150014,
367 	0x9278, 0xffffffff, 0x000f000e,
368 	0x927c, 0xffffffff, 0x00110010,
369 	0x9280, 0xffffffff, 0x00000014,
370 	0x9284, 0xffffffff, 0x00130012,
371 	0x9288, 0xffffffff, 0x00160015,
372 	0x928c, 0xffffffff, 0x0010000f,
373 	0x9290, 0xffffffff, 0x00120011,
374 	0x9294, 0xffffffff, 0x00000015,
375 	0x9298, 0xffffffff, 0x00140013,
376 	0x929c, 0xffffffff, 0x00170016,
377 	0x9150, 0xffffffff, 0x96940200,
378 	0x8708, 0xffffffff, 0x00900100,
379 	0xc478, 0xffffffff, 0x00000080,
380 	0xc404, 0xffffffff, 0x0020003f,
381 	0x30, 0xffffffff, 0x0000001c,
382 	0x34, 0x000f0000, 0x000f0000,
383 	0x160c, 0xffffffff, 0x00000100,
384 	0x1024, 0xffffffff, 0x00000100,
385 	0x102c, 0x00000101, 0x00000000,
386 	0x20a8, 0xffffffff, 0x00000104,
387 	0x264c, 0x000c0000, 0x000c0000,
388 	0x2648, 0x000c0000, 0x000c0000,
389 	0x55e4, 0xff000fff, 0x00000100,
390 	0x55e8, 0x00000001, 0x00000001,
391 	0x2f50, 0x00000001, 0x00000001,
392 	0x30cc, 0xc0000fff, 0x00000104,
393 	0xc1e4, 0x00000001, 0x00000001,
394 	0xd0c0, 0xfffffff0, 0x00000100,
395 	0xd8c0, 0xfffffff0, 0x00000100
396 };
397 
398 static const u32 pitcairn_mgcg_cgcg_init[] =
399 {
400 	0xc400, 0xffffffff, 0xfffffffc,
401 	0x802c, 0xffffffff, 0xe0000000,
402 	0x9a60, 0xffffffff, 0x00000100,
403 	0x92a4, 0xffffffff, 0x00000100,
404 	0xc164, 0xffffffff, 0x00000100,
405 	0x9774, 0xffffffff, 0x00000100,
406 	0x8984, 0xffffffff, 0x06000100,
407 	0x8a18, 0xffffffff, 0x00000100,
408 	0x92a0, 0xffffffff, 0x00000100,
409 	0xc380, 0xffffffff, 0x00000100,
410 	0x8b28, 0xffffffff, 0x00000100,
411 	0x9144, 0xffffffff, 0x00000100,
412 	0x8d88, 0xffffffff, 0x00000100,
413 	0x8d8c, 0xffffffff, 0x00000100,
414 	0x9030, 0xffffffff, 0x00000100,
415 	0x9034, 0xffffffff, 0x00000100,
416 	0x9038, 0xffffffff, 0x00000100,
417 	0x903c, 0xffffffff, 0x00000100,
418 	0xad80, 0xffffffff, 0x00000100,
419 	0xac54, 0xffffffff, 0x00000100,
420 	0x897c, 0xffffffff, 0x06000100,
421 	0x9868, 0xffffffff, 0x00000100,
422 	0x9510, 0xffffffff, 0x00000100,
423 	0xaf04, 0xffffffff, 0x00000100,
424 	0xae04, 0xffffffff, 0x00000100,
425 	0x949c, 0xffffffff, 0x00000100,
426 	0x802c, 0xffffffff, 0xe0000000,
427 	0x9160, 0xffffffff, 0x00010000,
428 	0x9164, 0xffffffff, 0x00030002,
429 	0x9168, 0xffffffff, 0x00040007,
430 	0x916c, 0xffffffff, 0x00060005,
431 	0x9170, 0xffffffff, 0x00090008,
432 	0x9174, 0xffffffff, 0x00020001,
433 	0x9178, 0xffffffff, 0x00040003,
434 	0x917c, 0xffffffff, 0x00000007,
435 	0x9180, 0xffffffff, 0x00060005,
436 	0x9184, 0xffffffff, 0x00090008,
437 	0x9188, 0xffffffff, 0x00030002,
438 	0x918c, 0xffffffff, 0x00050004,
439 	0x9190, 0xffffffff, 0x00000008,
440 	0x9194, 0xffffffff, 0x00070006,
441 	0x9198, 0xffffffff, 0x000a0009,
442 	0x919c, 0xffffffff, 0x00040003,
443 	0x91a0, 0xffffffff, 0x00060005,
444 	0x91a4, 0xffffffff, 0x00000009,
445 	0x91a8, 0xffffffff, 0x00080007,
446 	0x91ac, 0xffffffff, 0x000b000a,
447 	0x91b0, 0xffffffff, 0x00050004,
448 	0x91b4, 0xffffffff, 0x00070006,
449 	0x91b8, 0xffffffff, 0x0008000b,
450 	0x91bc, 0xffffffff, 0x000a0009,
451 	0x91c0, 0xffffffff, 0x000d000c,
452 	0x9200, 0xffffffff, 0x00090008,
453 	0x9204, 0xffffffff, 0x000b000a,
454 	0x9208, 0xffffffff, 0x000c000f,
455 	0x920c, 0xffffffff, 0x000e000d,
456 	0x9210, 0xffffffff, 0x00110010,
457 	0x9214, 0xffffffff, 0x000a0009,
458 	0x9218, 0xffffffff, 0x000c000b,
459 	0x921c, 0xffffffff, 0x0000000f,
460 	0x9220, 0xffffffff, 0x000e000d,
461 	0x9224, 0xffffffff, 0x00110010,
462 	0x9228, 0xffffffff, 0x000b000a,
463 	0x922c, 0xffffffff, 0x000d000c,
464 	0x9230, 0xffffffff, 0x00000010,
465 	0x9234, 0xffffffff, 0x000f000e,
466 	0x9238, 0xffffffff, 0x00120011,
467 	0x923c, 0xffffffff, 0x000c000b,
468 	0x9240, 0xffffffff, 0x000e000d,
469 	0x9244, 0xffffffff, 0x00000011,
470 	0x9248, 0xffffffff, 0x0010000f,
471 	0x924c, 0xffffffff, 0x00130012,
472 	0x9250, 0xffffffff, 0x000d000c,
473 	0x9254, 0xffffffff, 0x000f000e,
474 	0x9258, 0xffffffff, 0x00100013,
475 	0x925c, 0xffffffff, 0x00120011,
476 	0x9260, 0xffffffff, 0x00150014,
477 	0x9150, 0xffffffff, 0x96940200,
478 	0x8708, 0xffffffff, 0x00900100,
479 	0xc478, 0xffffffff, 0x00000080,
480 	0xc404, 0xffffffff, 0x0020003f,
481 	0x30, 0xffffffff, 0x0000001c,
482 	0x34, 0x000f0000, 0x000f0000,
483 	0x160c, 0xffffffff, 0x00000100,
484 	0x1024, 0xffffffff, 0x00000100,
485 	0x102c, 0x00000101, 0x00000000,
486 	0x20a8, 0xffffffff, 0x00000104,
487 	0x55e4, 0xff000fff, 0x00000100,
488 	0x55e8, 0x00000001, 0x00000001,
489 	0x2f50, 0x00000001, 0x00000001,
490 	0x30cc, 0xc0000fff, 0x00000104,
491 	0xc1e4, 0x00000001, 0x00000001,
492 	0xd0c0, 0xfffffff0, 0x00000100,
493 	0xd8c0, 0xfffffff0, 0x00000100
494 };
495 
496 static const u32 verde_mgcg_cgcg_init[] =
497 {
498 	0xc400, 0xffffffff, 0xfffffffc,
499 	0x802c, 0xffffffff, 0xe0000000,
500 	0x9a60, 0xffffffff, 0x00000100,
501 	0x92a4, 0xffffffff, 0x00000100,
502 	0xc164, 0xffffffff, 0x00000100,
503 	0x9774, 0xffffffff, 0x00000100,
504 	0x8984, 0xffffffff, 0x06000100,
505 	0x8a18, 0xffffffff, 0x00000100,
506 	0x92a0, 0xffffffff, 0x00000100,
507 	0xc380, 0xffffffff, 0x00000100,
508 	0x8b28, 0xffffffff, 0x00000100,
509 	0x9144, 0xffffffff, 0x00000100,
510 	0x8d88, 0xffffffff, 0x00000100,
511 	0x8d8c, 0xffffffff, 0x00000100,
512 	0x9030, 0xffffffff, 0x00000100,
513 	0x9034, 0xffffffff, 0x00000100,
514 	0x9038, 0xffffffff, 0x00000100,
515 	0x903c, 0xffffffff, 0x00000100,
516 	0xad80, 0xffffffff, 0x00000100,
517 	0xac54, 0xffffffff, 0x00000100,
518 	0x897c, 0xffffffff, 0x06000100,
519 	0x9868, 0xffffffff, 0x00000100,
520 	0x9510, 0xffffffff, 0x00000100,
521 	0xaf04, 0xffffffff, 0x00000100,
522 	0xae04, 0xffffffff, 0x00000100,
523 	0x949c, 0xffffffff, 0x00000100,
524 	0x802c, 0xffffffff, 0xe0000000,
525 	0x9160, 0xffffffff, 0x00010000,
526 	0x9164, 0xffffffff, 0x00030002,
527 	0x9168, 0xffffffff, 0x00040007,
528 	0x916c, 0xffffffff, 0x00060005,
529 	0x9170, 0xffffffff, 0x00090008,
530 	0x9174, 0xffffffff, 0x00020001,
531 	0x9178, 0xffffffff, 0x00040003,
532 	0x917c, 0xffffffff, 0x00000007,
533 	0x9180, 0xffffffff, 0x00060005,
534 	0x9184, 0xffffffff, 0x00090008,
535 	0x9188, 0xffffffff, 0x00030002,
536 	0x918c, 0xffffffff, 0x00050004,
537 	0x9190, 0xffffffff, 0x00000008,
538 	0x9194, 0xffffffff, 0x00070006,
539 	0x9198, 0xffffffff, 0x000a0009,
540 	0x919c, 0xffffffff, 0x00040003,
541 	0x91a0, 0xffffffff, 0x00060005,
542 	0x91a4, 0xffffffff, 0x00000009,
543 	0x91a8, 0xffffffff, 0x00080007,
544 	0x91ac, 0xffffffff, 0x000b000a,
545 	0x91b0, 0xffffffff, 0x00050004,
546 	0x91b4, 0xffffffff, 0x00070006,
547 	0x91b8, 0xffffffff, 0x0008000b,
548 	0x91bc, 0xffffffff, 0x000a0009,
549 	0x91c0, 0xffffffff, 0x000d000c,
550 	0x9200, 0xffffffff, 0x00090008,
551 	0x9204, 0xffffffff, 0x000b000a,
552 	0x9208, 0xffffffff, 0x000c000f,
553 	0x920c, 0xffffffff, 0x000e000d,
554 	0x9210, 0xffffffff, 0x00110010,
555 	0x9214, 0xffffffff, 0x000a0009,
556 	0x9218, 0xffffffff, 0x000c000b,
557 	0x921c, 0xffffffff, 0x0000000f,
558 	0x9220, 0xffffffff, 0x000e000d,
559 	0x9224, 0xffffffff, 0x00110010,
560 	0x9228, 0xffffffff, 0x000b000a,
561 	0x922c, 0xffffffff, 0x000d000c,
562 	0x9230, 0xffffffff, 0x00000010,
563 	0x9234, 0xffffffff, 0x000f000e,
564 	0x9238, 0xffffffff, 0x00120011,
565 	0x923c, 0xffffffff, 0x000c000b,
566 	0x9240, 0xffffffff, 0x000e000d,
567 	0x9244, 0xffffffff, 0x00000011,
568 	0x9248, 0xffffffff, 0x0010000f,
569 	0x924c, 0xffffffff, 0x00130012,
570 	0x9250, 0xffffffff, 0x000d000c,
571 	0x9254, 0xffffffff, 0x000f000e,
572 	0x9258, 0xffffffff, 0x00100013,
573 	0x925c, 0xffffffff, 0x00120011,
574 	0x9260, 0xffffffff, 0x00150014,
575 	0x9150, 0xffffffff, 0x96940200,
576 	0x8708, 0xffffffff, 0x00900100,
577 	0xc478, 0xffffffff, 0x00000080,
578 	0xc404, 0xffffffff, 0x0020003f,
579 	0x30, 0xffffffff, 0x0000001c,
580 	0x34, 0x000f0000, 0x000f0000,
581 	0x160c, 0xffffffff, 0x00000100,
582 	0x1024, 0xffffffff, 0x00000100,
583 	0x102c, 0x00000101, 0x00000000,
584 	0x20a8, 0xffffffff, 0x00000104,
585 	0x264c, 0x000c0000, 0x000c0000,
586 	0x2648, 0x000c0000, 0x000c0000,
587 	0x55e4, 0xff000fff, 0x00000100,
588 	0x55e8, 0x00000001, 0x00000001,
589 	0x2f50, 0x00000001, 0x00000001,
590 	0x30cc, 0xc0000fff, 0x00000104,
591 	0xc1e4, 0x00000001, 0x00000001,
592 	0xd0c0, 0xfffffff0, 0x00000100,
593 	0xd8c0, 0xfffffff0, 0x00000100
594 };
595 
596 static const u32 oland_mgcg_cgcg_init[] =
597 {
598 	0xc400, 0xffffffff, 0xfffffffc,
599 	0x802c, 0xffffffff, 0xe0000000,
600 	0x9a60, 0xffffffff, 0x00000100,
601 	0x92a4, 0xffffffff, 0x00000100,
602 	0xc164, 0xffffffff, 0x00000100,
603 	0x9774, 0xffffffff, 0x00000100,
604 	0x8984, 0xffffffff, 0x06000100,
605 	0x8a18, 0xffffffff, 0x00000100,
606 	0x92a0, 0xffffffff, 0x00000100,
607 	0xc380, 0xffffffff, 0x00000100,
608 	0x8b28, 0xffffffff, 0x00000100,
609 	0x9144, 0xffffffff, 0x00000100,
610 	0x8d88, 0xffffffff, 0x00000100,
611 	0x8d8c, 0xffffffff, 0x00000100,
612 	0x9030, 0xffffffff, 0x00000100,
613 	0x9034, 0xffffffff, 0x00000100,
614 	0x9038, 0xffffffff, 0x00000100,
615 	0x903c, 0xffffffff, 0x00000100,
616 	0xad80, 0xffffffff, 0x00000100,
617 	0xac54, 0xffffffff, 0x00000100,
618 	0x897c, 0xffffffff, 0x06000100,
619 	0x9868, 0xffffffff, 0x00000100,
620 	0x9510, 0xffffffff, 0x00000100,
621 	0xaf04, 0xffffffff, 0x00000100,
622 	0xae04, 0xffffffff, 0x00000100,
623 	0x949c, 0xffffffff, 0x00000100,
624 	0x802c, 0xffffffff, 0xe0000000,
625 	0x9160, 0xffffffff, 0x00010000,
626 	0x9164, 0xffffffff, 0x00030002,
627 	0x9168, 0xffffffff, 0x00040007,
628 	0x916c, 0xffffffff, 0x00060005,
629 	0x9170, 0xffffffff, 0x00090008,
630 	0x9174, 0xffffffff, 0x00020001,
631 	0x9178, 0xffffffff, 0x00040003,
632 	0x917c, 0xffffffff, 0x00000007,
633 	0x9180, 0xffffffff, 0x00060005,
634 	0x9184, 0xffffffff, 0x00090008,
635 	0x9188, 0xffffffff, 0x00030002,
636 	0x918c, 0xffffffff, 0x00050004,
637 	0x9190, 0xffffffff, 0x00000008,
638 	0x9194, 0xffffffff, 0x00070006,
639 	0x9198, 0xffffffff, 0x000a0009,
640 	0x919c, 0xffffffff, 0x00040003,
641 	0x91a0, 0xffffffff, 0x00060005,
642 	0x91a4, 0xffffffff, 0x00000009,
643 	0x91a8, 0xffffffff, 0x00080007,
644 	0x91ac, 0xffffffff, 0x000b000a,
645 	0x91b0, 0xffffffff, 0x00050004,
646 	0x91b4, 0xffffffff, 0x00070006,
647 	0x91b8, 0xffffffff, 0x0008000b,
648 	0x91bc, 0xffffffff, 0x000a0009,
649 	0x91c0, 0xffffffff, 0x000d000c,
650 	0x91c4, 0xffffffff, 0x00060005,
651 	0x91c8, 0xffffffff, 0x00080007,
652 	0x91cc, 0xffffffff, 0x0000000b,
653 	0x91d0, 0xffffffff, 0x000a0009,
654 	0x91d4, 0xffffffff, 0x000d000c,
655 	0x9150, 0xffffffff, 0x96940200,
656 	0x8708, 0xffffffff, 0x00900100,
657 	0xc478, 0xffffffff, 0x00000080,
658 	0xc404, 0xffffffff, 0x0020003f,
659 	0x30, 0xffffffff, 0x0000001c,
660 	0x34, 0x000f0000, 0x000f0000,
661 	0x160c, 0xffffffff, 0x00000100,
662 	0x1024, 0xffffffff, 0x00000100,
663 	0x102c, 0x00000101, 0x00000000,
664 	0x20a8, 0xffffffff, 0x00000104,
665 	0x264c, 0x000c0000, 0x000c0000,
666 	0x2648, 0x000c0000, 0x000c0000,
667 	0x55e4, 0xff000fff, 0x00000100,
668 	0x55e8, 0x00000001, 0x00000001,
669 	0x2f50, 0x00000001, 0x00000001,
670 	0x30cc, 0xc0000fff, 0x00000104,
671 	0xc1e4, 0x00000001, 0x00000001,
672 	0xd0c0, 0xfffffff0, 0x00000100,
673 	0xd8c0, 0xfffffff0, 0x00000100
674 };
675 
676 static u32 verde_pg_init[] =
677 {
678 	0x353c, 0xffffffff, 0x40000,
679 	0x3538, 0xffffffff, 0x200010ff,
680 	0x353c, 0xffffffff, 0x0,
681 	0x353c, 0xffffffff, 0x0,
682 	0x353c, 0xffffffff, 0x0,
683 	0x353c, 0xffffffff, 0x0,
684 	0x353c, 0xffffffff, 0x0,
685 	0x353c, 0xffffffff, 0x7007,
686 	0x3538, 0xffffffff, 0x300010ff,
687 	0x353c, 0xffffffff, 0x0,
688 	0x353c, 0xffffffff, 0x0,
689 	0x353c, 0xffffffff, 0x0,
690 	0x353c, 0xffffffff, 0x0,
691 	0x353c, 0xffffffff, 0x0,
692 	0x353c, 0xffffffff, 0x400000,
693 	0x3538, 0xffffffff, 0x100010ff,
694 	0x353c, 0xffffffff, 0x0,
695 	0x353c, 0xffffffff, 0x0,
696 	0x353c, 0xffffffff, 0x0,
697 	0x353c, 0xffffffff, 0x0,
698 	0x353c, 0xffffffff, 0x0,
699 	0x353c, 0xffffffff, 0x120200,
700 	0x3538, 0xffffffff, 0x500010ff,
701 	0x353c, 0xffffffff, 0x0,
702 	0x353c, 0xffffffff, 0x0,
703 	0x353c, 0xffffffff, 0x0,
704 	0x353c, 0xffffffff, 0x0,
705 	0x353c, 0xffffffff, 0x0,
706 	0x353c, 0xffffffff, 0x1e1e16,
707 	0x3538, 0xffffffff, 0x600010ff,
708 	0x353c, 0xffffffff, 0x0,
709 	0x353c, 0xffffffff, 0x0,
710 	0x353c, 0xffffffff, 0x0,
711 	0x353c, 0xffffffff, 0x0,
712 	0x353c, 0xffffffff, 0x0,
713 	0x353c, 0xffffffff, 0x171f1e,
714 	0x3538, 0xffffffff, 0x700010ff,
715 	0x353c, 0xffffffff, 0x0,
716 	0x353c, 0xffffffff, 0x0,
717 	0x353c, 0xffffffff, 0x0,
718 	0x353c, 0xffffffff, 0x0,
719 	0x353c, 0xffffffff, 0x0,
720 	0x353c, 0xffffffff, 0x0,
721 	0x3538, 0xffffffff, 0x9ff,
722 	0x3500, 0xffffffff, 0x0,
723 	0x3504, 0xffffffff, 0x10000800,
724 	0x3504, 0xffffffff, 0xf,
725 	0x3504, 0xffffffff, 0xf,
726 	0x3500, 0xffffffff, 0x4,
727 	0x3504, 0xffffffff, 0x1000051e,
728 	0x3504, 0xffffffff, 0xffff,
729 	0x3504, 0xffffffff, 0xffff,
730 	0x3500, 0xffffffff, 0x8,
731 	0x3504, 0xffffffff, 0x80500,
732 	0x3500, 0xffffffff, 0x12,
733 	0x3504, 0xffffffff, 0x9050c,
734 	0x3500, 0xffffffff, 0x1d,
735 	0x3504, 0xffffffff, 0xb052c,
736 	0x3500, 0xffffffff, 0x2a,
737 	0x3504, 0xffffffff, 0x1053e,
738 	0x3500, 0xffffffff, 0x2d,
739 	0x3504, 0xffffffff, 0x10546,
740 	0x3500, 0xffffffff, 0x30,
741 	0x3504, 0xffffffff, 0xa054e,
742 	0x3500, 0xffffffff, 0x3c,
743 	0x3504, 0xffffffff, 0x1055f,
744 	0x3500, 0xffffffff, 0x3f,
745 	0x3504, 0xffffffff, 0x10567,
746 	0x3500, 0xffffffff, 0x42,
747 	0x3504, 0xffffffff, 0x1056f,
748 	0x3500, 0xffffffff, 0x45,
749 	0x3504, 0xffffffff, 0x10572,
750 	0x3500, 0xffffffff, 0x48,
751 	0x3504, 0xffffffff, 0x20575,
752 	0x3500, 0xffffffff, 0x4c,
753 	0x3504, 0xffffffff, 0x190801,
754 	0x3500, 0xffffffff, 0x67,
755 	0x3504, 0xffffffff, 0x1082a,
756 	0x3500, 0xffffffff, 0x6a,
757 	0x3504, 0xffffffff, 0x1b082d,
758 	0x3500, 0xffffffff, 0x87,
759 	0x3504, 0xffffffff, 0x310851,
760 	0x3500, 0xffffffff, 0xba,
761 	0x3504, 0xffffffff, 0x891,
762 	0x3500, 0xffffffff, 0xbc,
763 	0x3504, 0xffffffff, 0x893,
764 	0x3500, 0xffffffff, 0xbe,
765 	0x3504, 0xffffffff, 0x20895,
766 	0x3500, 0xffffffff, 0xc2,
767 	0x3504, 0xffffffff, 0x20899,
768 	0x3500, 0xffffffff, 0xc6,
769 	0x3504, 0xffffffff, 0x2089d,
770 	0x3500, 0xffffffff, 0xca,
771 	0x3504, 0xffffffff, 0x8a1,
772 	0x3500, 0xffffffff, 0xcc,
773 	0x3504, 0xffffffff, 0x8a3,
774 	0x3500, 0xffffffff, 0xce,
775 	0x3504, 0xffffffff, 0x308a5,
776 	0x3500, 0xffffffff, 0xd3,
777 	0x3504, 0xffffffff, 0x6d08cd,
778 	0x3500, 0xffffffff, 0x142,
779 	0x3504, 0xffffffff, 0x2000095a,
780 	0x3504, 0xffffffff, 0x1,
781 	0x3500, 0xffffffff, 0x144,
782 	0x3504, 0xffffffff, 0x301f095b,
783 	0x3500, 0xffffffff, 0x165,
784 	0x3504, 0xffffffff, 0xc094d,
785 	0x3500, 0xffffffff, 0x173,
786 	0x3504, 0xffffffff, 0xf096d,
787 	0x3500, 0xffffffff, 0x184,
788 	0x3504, 0xffffffff, 0x15097f,
789 	0x3500, 0xffffffff, 0x19b,
790 	0x3504, 0xffffffff, 0xc0998,
791 	0x3500, 0xffffffff, 0x1a9,
792 	0x3504, 0xffffffff, 0x409a7,
793 	0x3500, 0xffffffff, 0x1af,
794 	0x3504, 0xffffffff, 0xcdc,
795 	0x3500, 0xffffffff, 0x1b1,
796 	0x3504, 0xffffffff, 0x800,
797 	0x3508, 0xffffffff, 0x6c9b2000,
798 	0x3510, 0xfc00, 0x2000,
799 	0x3544, 0xffffffff, 0xfc0,
800 	0x28d4, 0x00000100, 0x100
801 };
802 
803 static void si_init_golden_registers(struct radeon_device *rdev)
804 {
805 	switch (rdev->family) {
806 	case CHIP_TAHITI:
807 		radeon_program_register_sequence(rdev,
808 						 tahiti_golden_registers,
809 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
810 		radeon_program_register_sequence(rdev,
811 						 tahiti_golden_rlc_registers,
812 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
813 		radeon_program_register_sequence(rdev,
814 						 tahiti_mgcg_cgcg_init,
815 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
816 		radeon_program_register_sequence(rdev,
817 						 tahiti_golden_registers2,
818 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
819 		break;
820 	case CHIP_PITCAIRN:
821 		radeon_program_register_sequence(rdev,
822 						 pitcairn_golden_registers,
823 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
824 		radeon_program_register_sequence(rdev,
825 						 pitcairn_golden_rlc_registers,
826 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
827 		radeon_program_register_sequence(rdev,
828 						 pitcairn_mgcg_cgcg_init,
829 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
830 		break;
831 	case CHIP_VERDE:
832 		radeon_program_register_sequence(rdev,
833 						 verde_golden_registers,
834 						 (const u32)ARRAY_SIZE(verde_golden_registers));
835 		radeon_program_register_sequence(rdev,
836 						 verde_golden_rlc_registers,
837 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
838 		radeon_program_register_sequence(rdev,
839 						 verde_mgcg_cgcg_init,
840 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
841 		radeon_program_register_sequence(rdev,
842 						 verde_pg_init,
843 						 (const u32)ARRAY_SIZE(verde_pg_init));
844 		break;
845 	case CHIP_OLAND:
846 		radeon_program_register_sequence(rdev,
847 						 oland_golden_registers,
848 						 (const u32)ARRAY_SIZE(oland_golden_registers));
849 		radeon_program_register_sequence(rdev,
850 						 oland_golden_rlc_registers,
851 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
852 		radeon_program_register_sequence(rdev,
853 						 oland_mgcg_cgcg_init,
854 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
855 		break;
856 	default:
857 		break;
858 	}
859 }
860 
861 #define PCIE_BUS_CLK                10000
862 #define TCLK                        (PCIE_BUS_CLK / 10)
863 
864 /**
865  * si_get_xclk - get the xclk
866  *
867  * @rdev: radeon_device pointer
868  *
869  * Returns the reference clock used by the gfx engine
870  * (SI).
871  */
872 u32 si_get_xclk(struct radeon_device *rdev)
873 {
874         u32 reference_clock = rdev->clock.spll.reference_freq;
875 	u32 tmp;
876 
877 	tmp = RREG32(CG_CLKPIN_CNTL_2);
878 	if (tmp & MUX_TCLK_TO_XCLK)
879 		return TCLK;
880 
881 	tmp = RREG32(CG_CLKPIN_CNTL);
882 	if (tmp & XTALIN_DIVIDE)
883 		return reference_clock / 4;
884 
885 	return reference_clock;
886 }
887 
888 /* get temperature in millidegrees */
889 int si_get_temp(struct radeon_device *rdev)
890 {
891 	u32 temp;
892 	int actual_temp = 0;
893 
894 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
895 		CTF_TEMP_SHIFT;
896 
897 	if (temp & 0x200)
898 		actual_temp = 255;
899 	else
900 		actual_temp = temp & 0x1ff;
901 
902 	actual_temp = (actual_temp * 1000);
903 
904 	return actual_temp;
905 }
906 
907 #define TAHITI_IO_MC_REGS_SIZE 36
908 
909 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
910 	{0x0000006f, 0x03044000},
911 	{0x00000070, 0x0480c018},
912 	{0x00000071, 0x00000040},
913 	{0x00000072, 0x01000000},
914 	{0x00000074, 0x000000ff},
915 	{0x00000075, 0x00143400},
916 	{0x00000076, 0x08ec0800},
917 	{0x00000077, 0x040000cc},
918 	{0x00000079, 0x00000000},
919 	{0x0000007a, 0x21000409},
920 	{0x0000007c, 0x00000000},
921 	{0x0000007d, 0xe8000000},
922 	{0x0000007e, 0x044408a8},
923 	{0x0000007f, 0x00000003},
924 	{0x00000080, 0x00000000},
925 	{0x00000081, 0x01000000},
926 	{0x00000082, 0x02000000},
927 	{0x00000083, 0x00000000},
928 	{0x00000084, 0xe3f3e4f4},
929 	{0x00000085, 0x00052024},
930 	{0x00000087, 0x00000000},
931 	{0x00000088, 0x66036603},
932 	{0x00000089, 0x01000000},
933 	{0x0000008b, 0x1c0a0000},
934 	{0x0000008c, 0xff010000},
935 	{0x0000008e, 0xffffefff},
936 	{0x0000008f, 0xfff3efff},
937 	{0x00000090, 0xfff3efbf},
938 	{0x00000094, 0x00101101},
939 	{0x00000095, 0x00000fff},
940 	{0x00000096, 0x00116fff},
941 	{0x00000097, 0x60010000},
942 	{0x00000098, 0x10010000},
943 	{0x00000099, 0x00006000},
944 	{0x0000009a, 0x00001000},
945 	{0x0000009f, 0x00a77400}
946 };
947 
948 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
949 	{0x0000006f, 0x03044000},
950 	{0x00000070, 0x0480c018},
951 	{0x00000071, 0x00000040},
952 	{0x00000072, 0x01000000},
953 	{0x00000074, 0x000000ff},
954 	{0x00000075, 0x00143400},
955 	{0x00000076, 0x08ec0800},
956 	{0x00000077, 0x040000cc},
957 	{0x00000079, 0x00000000},
958 	{0x0000007a, 0x21000409},
959 	{0x0000007c, 0x00000000},
960 	{0x0000007d, 0xe8000000},
961 	{0x0000007e, 0x044408a8},
962 	{0x0000007f, 0x00000003},
963 	{0x00000080, 0x00000000},
964 	{0x00000081, 0x01000000},
965 	{0x00000082, 0x02000000},
966 	{0x00000083, 0x00000000},
967 	{0x00000084, 0xe3f3e4f4},
968 	{0x00000085, 0x00052024},
969 	{0x00000087, 0x00000000},
970 	{0x00000088, 0x66036603},
971 	{0x00000089, 0x01000000},
972 	{0x0000008b, 0x1c0a0000},
973 	{0x0000008c, 0xff010000},
974 	{0x0000008e, 0xffffefff},
975 	{0x0000008f, 0xfff3efff},
976 	{0x00000090, 0xfff3efbf},
977 	{0x00000094, 0x00101101},
978 	{0x00000095, 0x00000fff},
979 	{0x00000096, 0x00116fff},
980 	{0x00000097, 0x60010000},
981 	{0x00000098, 0x10010000},
982 	{0x00000099, 0x00006000},
983 	{0x0000009a, 0x00001000},
984 	{0x0000009f, 0x00a47400}
985 };
986 
987 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
988 	{0x0000006f, 0x03044000},
989 	{0x00000070, 0x0480c018},
990 	{0x00000071, 0x00000040},
991 	{0x00000072, 0x01000000},
992 	{0x00000074, 0x000000ff},
993 	{0x00000075, 0x00143400},
994 	{0x00000076, 0x08ec0800},
995 	{0x00000077, 0x040000cc},
996 	{0x00000079, 0x00000000},
997 	{0x0000007a, 0x21000409},
998 	{0x0000007c, 0x00000000},
999 	{0x0000007d, 0xe8000000},
1000 	{0x0000007e, 0x044408a8},
1001 	{0x0000007f, 0x00000003},
1002 	{0x00000080, 0x00000000},
1003 	{0x00000081, 0x01000000},
1004 	{0x00000082, 0x02000000},
1005 	{0x00000083, 0x00000000},
1006 	{0x00000084, 0xe3f3e4f4},
1007 	{0x00000085, 0x00052024},
1008 	{0x00000087, 0x00000000},
1009 	{0x00000088, 0x66036603},
1010 	{0x00000089, 0x01000000},
1011 	{0x0000008b, 0x1c0a0000},
1012 	{0x0000008c, 0xff010000},
1013 	{0x0000008e, 0xffffefff},
1014 	{0x0000008f, 0xfff3efff},
1015 	{0x00000090, 0xfff3efbf},
1016 	{0x00000094, 0x00101101},
1017 	{0x00000095, 0x00000fff},
1018 	{0x00000096, 0x00116fff},
1019 	{0x00000097, 0x60010000},
1020 	{0x00000098, 0x10010000},
1021 	{0x00000099, 0x00006000},
1022 	{0x0000009a, 0x00001000},
1023 	{0x0000009f, 0x00a37400}
1024 };
1025 
1026 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1027 	{0x0000006f, 0x03044000},
1028 	{0x00000070, 0x0480c018},
1029 	{0x00000071, 0x00000040},
1030 	{0x00000072, 0x01000000},
1031 	{0x00000074, 0x000000ff},
1032 	{0x00000075, 0x00143400},
1033 	{0x00000076, 0x08ec0800},
1034 	{0x00000077, 0x040000cc},
1035 	{0x00000079, 0x00000000},
1036 	{0x0000007a, 0x21000409},
1037 	{0x0000007c, 0x00000000},
1038 	{0x0000007d, 0xe8000000},
1039 	{0x0000007e, 0x044408a8},
1040 	{0x0000007f, 0x00000003},
1041 	{0x00000080, 0x00000000},
1042 	{0x00000081, 0x01000000},
1043 	{0x00000082, 0x02000000},
1044 	{0x00000083, 0x00000000},
1045 	{0x00000084, 0xe3f3e4f4},
1046 	{0x00000085, 0x00052024},
1047 	{0x00000087, 0x00000000},
1048 	{0x00000088, 0x66036603},
1049 	{0x00000089, 0x01000000},
1050 	{0x0000008b, 0x1c0a0000},
1051 	{0x0000008c, 0xff010000},
1052 	{0x0000008e, 0xffffefff},
1053 	{0x0000008f, 0xfff3efff},
1054 	{0x00000090, 0xfff3efbf},
1055 	{0x00000094, 0x00101101},
1056 	{0x00000095, 0x00000fff},
1057 	{0x00000096, 0x00116fff},
1058 	{0x00000097, 0x60010000},
1059 	{0x00000098, 0x10010000},
1060 	{0x00000099, 0x00006000},
1061 	{0x0000009a, 0x00001000},
1062 	{0x0000009f, 0x00a17730}
1063 };
1064 
1065 /* ucode loading */
1066 static int si_mc_load_microcode(struct radeon_device *rdev)
1067 {
1068 	const __be32 *fw_data;
1069 	u32 running, blackout = 0;
1070 	u32 *io_mc_regs;
1071 	int i, ucode_size, regs_size;
1072 
1073 	if (!rdev->mc_fw)
1074 		return -EINVAL;
1075 
1076 	switch (rdev->family) {
1077 	case CHIP_TAHITI:
1078 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1079 		ucode_size = SI_MC_UCODE_SIZE;
1080 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1081 		break;
1082 	case CHIP_PITCAIRN:
1083 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1084 		ucode_size = SI_MC_UCODE_SIZE;
1085 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1086 		break;
1087 	case CHIP_VERDE:
1088 	default:
1089 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1090 		ucode_size = SI_MC_UCODE_SIZE;
1091 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1092 		break;
1093 	case CHIP_OLAND:
1094 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1095 		ucode_size = OLAND_MC_UCODE_SIZE;
1096 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1097 		break;
1098 	}
1099 
1100 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1101 
1102 	if (running == 0) {
1103 		if (running) {
1104 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1105 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1106 		}
1107 
1108 		/* reset the engine and set to writable */
1109 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1110 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1111 
1112 		/* load mc io regs */
1113 		for (i = 0; i < regs_size; i++) {
1114 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1115 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1116 		}
1117 		/* load the MC ucode */
1118 		fw_data = (const __be32 *)rdev->mc_fw->data;
1119 		for (i = 0; i < ucode_size; i++)
1120 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1121 
1122 		/* put the engine back into the active state */
1123 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1124 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1125 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1126 
1127 		/* wait for training to complete */
1128 		for (i = 0; i < rdev->usec_timeout; i++) {
1129 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1130 				break;
1131 			udelay(1);
1132 		}
1133 		for (i = 0; i < rdev->usec_timeout; i++) {
1134 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1135 				break;
1136 			udelay(1);
1137 		}
1138 
1139 		if (running)
1140 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1141 	}
1142 
1143 	return 0;
1144 }
1145 
1146 static int si_init_microcode(struct radeon_device *rdev)
1147 {
1148 	struct platform_device *pdev;
1149 	const char *chip_name;
1150 	const char *rlc_chip_name;
1151 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1152 	char fw_name[30];
1153 	int err;
1154 
1155 	DRM_DEBUG("\n");
1156 
1157 	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1158 	err = IS_ERR(pdev);
1159 	if (err) {
1160 		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1161 		return -EINVAL;
1162 	}
1163 
1164 	switch (rdev->family) {
1165 	case CHIP_TAHITI:
1166 		chip_name = "TAHITI";
1167 		rlc_chip_name = "TAHITI";
1168 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1169 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1170 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1171 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1172 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1173 		break;
1174 	case CHIP_PITCAIRN:
1175 		chip_name = "PITCAIRN";
1176 		rlc_chip_name = "PITCAIRN";
1177 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1178 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1179 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1180 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1181 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1182 		break;
1183 	case CHIP_VERDE:
1184 		chip_name = "VERDE";
1185 		rlc_chip_name = "VERDE";
1186 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1187 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1188 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1189 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1190 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1191 		break;
1192 	case CHIP_OLAND:
1193 		chip_name = "OLAND";
1194 		rlc_chip_name = "OLAND";
1195 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1196 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1197 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1198 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1199 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1200 		break;
1201 	default: BUG();
1202 	}
1203 
1204 	DRM_INFO("Loading %s Microcode\n", chip_name);
1205 
1206 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1207 	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1208 	if (err)
1209 		goto out;
1210 	if (rdev->pfp_fw->size != pfp_req_size) {
1211 		printk(KERN_ERR
1212 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1213 		       rdev->pfp_fw->size, fw_name);
1214 		err = -EINVAL;
1215 		goto out;
1216 	}
1217 
1218 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1219 	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1220 	if (err)
1221 		goto out;
1222 	if (rdev->me_fw->size != me_req_size) {
1223 		printk(KERN_ERR
1224 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1225 		       rdev->me_fw->size, fw_name);
1226 		err = -EINVAL;
1227 	}
1228 
1229 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1230 	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1231 	if (err)
1232 		goto out;
1233 	if (rdev->ce_fw->size != ce_req_size) {
1234 		printk(KERN_ERR
1235 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1236 		       rdev->ce_fw->size, fw_name);
1237 		err = -EINVAL;
1238 	}
1239 
1240 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1241 	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1242 	if (err)
1243 		goto out;
1244 	if (rdev->rlc_fw->size != rlc_req_size) {
1245 		printk(KERN_ERR
1246 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1247 		       rdev->rlc_fw->size, fw_name);
1248 		err = -EINVAL;
1249 	}
1250 
1251 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1252 	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1253 	if (err)
1254 		goto out;
1255 	if (rdev->mc_fw->size != mc_req_size) {
1256 		printk(KERN_ERR
1257 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1258 		       rdev->mc_fw->size, fw_name);
1259 		err = -EINVAL;
1260 	}
1261 
1262 out:
1263 	platform_device_unregister(pdev);
1264 
1265 	if (err) {
1266 		if (err != -EINVAL)
1267 			printk(KERN_ERR
1268 			       "si_cp: Failed to load firmware \"%s\"\n",
1269 			       fw_name);
1270 		release_firmware(rdev->pfp_fw);
1271 		rdev->pfp_fw = NULL;
1272 		release_firmware(rdev->me_fw);
1273 		rdev->me_fw = NULL;
1274 		release_firmware(rdev->ce_fw);
1275 		rdev->ce_fw = NULL;
1276 		release_firmware(rdev->rlc_fw);
1277 		rdev->rlc_fw = NULL;
1278 		release_firmware(rdev->mc_fw);
1279 		rdev->mc_fw = NULL;
1280 	}
1281 	return err;
1282 }
1283 
1284 /* watermark setup */
1285 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1286 				   struct radeon_crtc *radeon_crtc,
1287 				   struct drm_display_mode *mode,
1288 				   struct drm_display_mode *other_mode)
1289 {
1290 	u32 tmp;
1291 	/*
1292 	 * Line Buffer Setup
1293 	 * There are 3 line buffers, each one shared by 2 display controllers.
1294 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1295 	 * the display controllers.  The paritioning is done via one of four
1296 	 * preset allocations specified in bits 21:20:
1297 	 *  0 - half lb
1298 	 *  2 - whole lb, other crtc must be disabled
1299 	 */
1300 	/* this can get tricky if we have two large displays on a paired group
1301 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1302 	 * non-linked crtcs for maximum line buffer allocation.
1303 	 */
1304 	if (radeon_crtc->base.enabled && mode) {
1305 		if (other_mode)
1306 			tmp = 0; /* 1/2 */
1307 		else
1308 			tmp = 2; /* whole */
1309 	} else
1310 		tmp = 0;
1311 
1312 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1313 	       DC_LB_MEMORY_CONFIG(tmp));
1314 
1315 	if (radeon_crtc->base.enabled && mode) {
1316 		switch (tmp) {
1317 		case 0:
1318 		default:
1319 			return 4096 * 2;
1320 		case 2:
1321 			return 8192 * 2;
1322 		}
1323 	}
1324 
1325 	/* controller not enabled, so no lb used */
1326 	return 0;
1327 }
1328 
1329 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1330 {
1331 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1332 
1333 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1334 	case 0:
1335 	default:
1336 		return 1;
1337 	case 1:
1338 		return 2;
1339 	case 2:
1340 		return 4;
1341 	case 3:
1342 		return 8;
1343 	case 4:
1344 		return 3;
1345 	case 5:
1346 		return 6;
1347 	case 6:
1348 		return 10;
1349 	case 7:
1350 		return 12;
1351 	case 8:
1352 		return 16;
1353 	}
1354 }
1355 
1356 struct dce6_wm_params {
1357 	u32 dram_channels; /* number of dram channels */
1358 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1359 	u32 sclk;          /* engine clock in kHz */
1360 	u32 disp_clk;      /* display clock in kHz */
1361 	u32 src_width;     /* viewport width */
1362 	u32 active_time;   /* active display time in ns */
1363 	u32 blank_time;    /* blank time in ns */
1364 	bool interlaced;    /* mode is interlaced */
1365 	fixed20_12 vsc;    /* vertical scale ratio */
1366 	u32 num_heads;     /* number of active crtcs */
1367 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1368 	u32 lb_size;       /* line buffer allocated to pipe */
1369 	u32 vtaps;         /* vertical scaler taps */
1370 };
1371 
1372 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1373 {
1374 	/* Calculate raw DRAM Bandwidth */
1375 	fixed20_12 dram_efficiency; /* 0.7 */
1376 	fixed20_12 yclk, dram_channels, bandwidth;
1377 	fixed20_12 a;
1378 
1379 	a.full = dfixed_const(1000);
1380 	yclk.full = dfixed_const(wm->yclk);
1381 	yclk.full = dfixed_div(yclk, a);
1382 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1383 	a.full = dfixed_const(10);
1384 	dram_efficiency.full = dfixed_const(7);
1385 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1386 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1387 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1388 
1389 	return dfixed_trunc(bandwidth);
1390 }
1391 
1392 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1393 {
1394 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1395 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1396 	fixed20_12 yclk, dram_channels, bandwidth;
1397 	fixed20_12 a;
1398 
1399 	a.full = dfixed_const(1000);
1400 	yclk.full = dfixed_const(wm->yclk);
1401 	yclk.full = dfixed_div(yclk, a);
1402 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1403 	a.full = dfixed_const(10);
1404 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1405 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1406 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1407 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1408 
1409 	return dfixed_trunc(bandwidth);
1410 }
1411 
1412 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1413 {
1414 	/* Calculate the display Data return Bandwidth */
1415 	fixed20_12 return_efficiency; /* 0.8 */
1416 	fixed20_12 sclk, bandwidth;
1417 	fixed20_12 a;
1418 
1419 	a.full = dfixed_const(1000);
1420 	sclk.full = dfixed_const(wm->sclk);
1421 	sclk.full = dfixed_div(sclk, a);
1422 	a.full = dfixed_const(10);
1423 	return_efficiency.full = dfixed_const(8);
1424 	return_efficiency.full = dfixed_div(return_efficiency, a);
1425 	a.full = dfixed_const(32);
1426 	bandwidth.full = dfixed_mul(a, sclk);
1427 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1428 
1429 	return dfixed_trunc(bandwidth);
1430 }
1431 
1432 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1433 {
1434 	return 32;
1435 }
1436 
1437 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1438 {
1439 	/* Calculate the DMIF Request Bandwidth */
1440 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1441 	fixed20_12 disp_clk, sclk, bandwidth;
1442 	fixed20_12 a, b1, b2;
1443 	u32 min_bandwidth;
1444 
1445 	a.full = dfixed_const(1000);
1446 	disp_clk.full = dfixed_const(wm->disp_clk);
1447 	disp_clk.full = dfixed_div(disp_clk, a);
1448 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1449 	b1.full = dfixed_mul(a, disp_clk);
1450 
1451 	a.full = dfixed_const(1000);
1452 	sclk.full = dfixed_const(wm->sclk);
1453 	sclk.full = dfixed_div(sclk, a);
1454 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1455 	b2.full = dfixed_mul(a, sclk);
1456 
1457 	a.full = dfixed_const(10);
1458 	disp_clk_request_efficiency.full = dfixed_const(8);
1459 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1460 
1461 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1462 
1463 	a.full = dfixed_const(min_bandwidth);
1464 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1465 
1466 	return dfixed_trunc(bandwidth);
1467 }
1468 
1469 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1470 {
1471 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1472 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1473 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1474 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1475 
1476 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1477 }
1478 
1479 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1480 {
1481 	/* Calculate the display mode Average Bandwidth
1482 	 * DisplayMode should contain the source and destination dimensions,
1483 	 * timing, etc.
1484 	 */
1485 	fixed20_12 bpp;
1486 	fixed20_12 line_time;
1487 	fixed20_12 src_width;
1488 	fixed20_12 bandwidth;
1489 	fixed20_12 a;
1490 
1491 	a.full = dfixed_const(1000);
1492 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1493 	line_time.full = dfixed_div(line_time, a);
1494 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1495 	src_width.full = dfixed_const(wm->src_width);
1496 	bandwidth.full = dfixed_mul(src_width, bpp);
1497 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1498 	bandwidth.full = dfixed_div(bandwidth, line_time);
1499 
1500 	return dfixed_trunc(bandwidth);
1501 }
1502 
1503 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1504 {
1505 	/* First calcualte the latency in ns */
1506 	u32 mc_latency = 2000; /* 2000 ns. */
1507 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1508 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1509 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1510 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1511 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1512 		(wm->num_heads * cursor_line_pair_return_time);
1513 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1514 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1515 	u32 tmp, dmif_size = 12288;
1516 	fixed20_12 a, b, c;
1517 
1518 	if (wm->num_heads == 0)
1519 		return 0;
1520 
1521 	a.full = dfixed_const(2);
1522 	b.full = dfixed_const(1);
1523 	if ((wm->vsc.full > a.full) ||
1524 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1525 	    (wm->vtaps >= 5) ||
1526 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1527 		max_src_lines_per_dst_line = 4;
1528 	else
1529 		max_src_lines_per_dst_line = 2;
1530 
1531 	a.full = dfixed_const(available_bandwidth);
1532 	b.full = dfixed_const(wm->num_heads);
1533 	a.full = dfixed_div(a, b);
1534 
1535 	b.full = dfixed_const(mc_latency + 512);
1536 	c.full = dfixed_const(wm->disp_clk);
1537 	b.full = dfixed_div(b, c);
1538 
1539 	c.full = dfixed_const(dmif_size);
1540 	b.full = dfixed_div(c, b);
1541 
1542 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1543 
1544 	b.full = dfixed_const(1000);
1545 	c.full = dfixed_const(wm->disp_clk);
1546 	b.full = dfixed_div(c, b);
1547 	c.full = dfixed_const(wm->bytes_per_pixel);
1548 	b.full = dfixed_mul(b, c);
1549 
1550 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1551 
1552 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1553 	b.full = dfixed_const(1000);
1554 	c.full = dfixed_const(lb_fill_bw);
1555 	b.full = dfixed_div(c, b);
1556 	a.full = dfixed_div(a, b);
1557 	line_fill_time = dfixed_trunc(a);
1558 
1559 	if (line_fill_time < wm->active_time)
1560 		return latency;
1561 	else
1562 		return latency + (line_fill_time - wm->active_time);
1563 
1564 }
1565 
1566 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1567 {
1568 	if (dce6_average_bandwidth(wm) <=
1569 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1570 		return true;
1571 	else
1572 		return false;
1573 };
1574 
1575 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1576 {
1577 	if (dce6_average_bandwidth(wm) <=
1578 	    (dce6_available_bandwidth(wm) / wm->num_heads))
1579 		return true;
1580 	else
1581 		return false;
1582 };
1583 
1584 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1585 {
1586 	u32 lb_partitions = wm->lb_size / wm->src_width;
1587 	u32 line_time = wm->active_time + wm->blank_time;
1588 	u32 latency_tolerant_lines;
1589 	u32 latency_hiding;
1590 	fixed20_12 a;
1591 
1592 	a.full = dfixed_const(1);
1593 	if (wm->vsc.full > a.full)
1594 		latency_tolerant_lines = 1;
1595 	else {
1596 		if (lb_partitions <= (wm->vtaps + 1))
1597 			latency_tolerant_lines = 1;
1598 		else
1599 			latency_tolerant_lines = 2;
1600 	}
1601 
1602 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1603 
1604 	if (dce6_latency_watermark(wm) <= latency_hiding)
1605 		return true;
1606 	else
1607 		return false;
1608 }
1609 
1610 static void dce6_program_watermarks(struct radeon_device *rdev,
1611 					 struct radeon_crtc *radeon_crtc,
1612 					 u32 lb_size, u32 num_heads)
1613 {
1614 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
1615 	struct dce6_wm_params wm;
1616 	u32 pixel_period;
1617 	u32 line_time = 0;
1618 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
1619 	u32 priority_a_mark = 0, priority_b_mark = 0;
1620 	u32 priority_a_cnt = PRIORITY_OFF;
1621 	u32 priority_b_cnt = PRIORITY_OFF;
1622 	u32 tmp, arb_control3;
1623 	fixed20_12 a, b, c;
1624 
1625 	if (radeon_crtc->base.enabled && num_heads && mode) {
1626 		pixel_period = 1000000 / (u32)mode->clock;
1627 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1628 		priority_a_cnt = 0;
1629 		priority_b_cnt = 0;
1630 
1631 		wm.yclk = rdev->pm.current_mclk * 10;
1632 		wm.sclk = rdev->pm.current_sclk * 10;
1633 		wm.disp_clk = mode->clock;
1634 		wm.src_width = mode->crtc_hdisplay;
1635 		wm.active_time = mode->crtc_hdisplay * pixel_period;
1636 		wm.blank_time = line_time - wm.active_time;
1637 		wm.interlaced = false;
1638 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1639 			wm.interlaced = true;
1640 		wm.vsc = radeon_crtc->vsc;
1641 		wm.vtaps = 1;
1642 		if (radeon_crtc->rmx_type != RMX_OFF)
1643 			wm.vtaps = 2;
1644 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1645 		wm.lb_size = lb_size;
1646 		if (rdev->family == CHIP_ARUBA)
1647 			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1648 		else
1649 			wm.dram_channels = si_get_number_of_dram_channels(rdev);
1650 		wm.num_heads = num_heads;
1651 
1652 		/* set for high clocks */
1653 		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1654 		/* set for low clocks */
1655 		/* wm.yclk = low clk; wm.sclk = low clk */
1656 		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1657 
1658 		/* possibly force display priority to high */
1659 		/* should really do this at mode validation time... */
1660 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1661 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1662 		    !dce6_check_latency_hiding(&wm) ||
1663 		    (rdev->disp_priority == 2)) {
1664 			DRM_DEBUG_KMS("force priority to high\n");
1665 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
1666 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
1667 		}
1668 
1669 		a.full = dfixed_const(1000);
1670 		b.full = dfixed_const(mode->clock);
1671 		b.full = dfixed_div(b, a);
1672 		c.full = dfixed_const(latency_watermark_a);
1673 		c.full = dfixed_mul(c, b);
1674 		c.full = dfixed_mul(c, radeon_crtc->hsc);
1675 		c.full = dfixed_div(c, a);
1676 		a.full = dfixed_const(16);
1677 		c.full = dfixed_div(c, a);
1678 		priority_a_mark = dfixed_trunc(c);
1679 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1680 
1681 		a.full = dfixed_const(1000);
1682 		b.full = dfixed_const(mode->clock);
1683 		b.full = dfixed_div(b, a);
1684 		c.full = dfixed_const(latency_watermark_b);
1685 		c.full = dfixed_mul(c, b);
1686 		c.full = dfixed_mul(c, radeon_crtc->hsc);
1687 		c.full = dfixed_div(c, a);
1688 		a.full = dfixed_const(16);
1689 		c.full = dfixed_div(c, a);
1690 		priority_b_mark = dfixed_trunc(c);
1691 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1692 	}
1693 
1694 	/* select wm A */
1695 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1696 	tmp = arb_control3;
1697 	tmp &= ~LATENCY_WATERMARK_MASK(3);
1698 	tmp |= LATENCY_WATERMARK_MASK(1);
1699 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1700 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1701 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1702 		LATENCY_HIGH_WATERMARK(line_time)));
1703 	/* select wm B */
1704 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1705 	tmp &= ~LATENCY_WATERMARK_MASK(3);
1706 	tmp |= LATENCY_WATERMARK_MASK(2);
1707 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1708 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1709 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1710 		LATENCY_HIGH_WATERMARK(line_time)));
1711 	/* restore original selection */
1712 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1713 
1714 	/* write the priority marks */
1715 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1716 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1717 
1718 }
1719 
1720 void dce6_bandwidth_update(struct radeon_device *rdev)
1721 {
1722 	struct drm_display_mode *mode0 = NULL;
1723 	struct drm_display_mode *mode1 = NULL;
1724 	u32 num_heads = 0, lb_size;
1725 	int i;
1726 
1727 	radeon_update_display_priority(rdev);
1728 
1729 	for (i = 0; i < rdev->num_crtc; i++) {
1730 		if (rdev->mode_info.crtcs[i]->base.enabled)
1731 			num_heads++;
1732 	}
1733 	for (i = 0; i < rdev->num_crtc; i += 2) {
1734 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1735 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1736 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1737 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1738 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1739 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1740 	}
1741 }
1742 
1743 /*
1744  * Core functions
1745  */
1746 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1747 {
1748 	const u32 num_tile_mode_states = 32;
1749 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1750 
1751 	switch (rdev->config.si.mem_row_size_in_kb) {
1752 	case 1:
1753 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 		break;
1755 	case 2:
1756 	default:
1757 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758 		break;
1759 	case 4:
1760 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 		break;
1762 	}
1763 
1764 	if ((rdev->family == CHIP_TAHITI) ||
1765 	    (rdev->family == CHIP_PITCAIRN)) {
1766 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767 			switch (reg_offset) {
1768 			case 0:  /* non-AA compressed depth or any compressed stencil */
1769 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1772 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1773 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1774 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1777 				break;
1778 			case 1:  /* 2xAA/4xAA compressed depth only */
1779 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1781 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1782 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1783 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1784 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1785 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1787 				break;
1788 			case 2:  /* 8xAA compressed depth only */
1789 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1791 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1792 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1793 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1794 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1797 				break;
1798 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1799 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1800 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1801 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1802 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1803 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1804 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1807 				break;
1808 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1809 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1810 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1811 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1812 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1813 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1814 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1815 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1816 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1817 				break;
1818 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1819 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1820 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1821 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1822 						 TILE_SPLIT(split_equal_to_row_size) |
1823 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1824 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1827 				break;
1828 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1829 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1831 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832 						 TILE_SPLIT(split_equal_to_row_size) |
1833 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1834 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1837 				break;
1838 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1839 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1840 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1841 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842 						 TILE_SPLIT(split_equal_to_row_size) |
1843 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1844 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1845 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1846 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1847 				break;
1848 			case 8:  /* 1D and 1D Array Surfaces */
1849 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1850 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1851 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1854 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1855 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1856 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1857 				break;
1858 			case 9:  /* Displayable maps. */
1859 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1860 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1861 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1862 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1863 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1864 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1867 				break;
1868 			case 10:  /* Display 8bpp. */
1869 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1871 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1872 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1873 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1874 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1875 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1876 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1877 				break;
1878 			case 11:  /* Display 16bpp. */
1879 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1880 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1881 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1882 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1883 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1884 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1887 				break;
1888 			case 12:  /* Display 32bpp. */
1889 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1890 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1892 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1893 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1894 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1897 				break;
1898 			case 13:  /* Thin. */
1899 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1900 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1901 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1902 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1903 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1904 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1906 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1907 				break;
1908 			case 14:  /* Thin 8 bpp. */
1909 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1910 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1911 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1912 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1913 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1914 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1917 				break;
1918 			case 15:  /* Thin 16 bpp. */
1919 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1920 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1921 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1922 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1923 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1924 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1926 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1927 				break;
1928 			case 16:  /* Thin 32 bpp. */
1929 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1930 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1931 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1932 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1933 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1934 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1936 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1937 				break;
1938 			case 17:  /* Thin 64 bpp. */
1939 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1941 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1942 						 TILE_SPLIT(split_equal_to_row_size) |
1943 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1944 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1947 				break;
1948 			case 21:  /* 8 bpp PRT. */
1949 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1951 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1953 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1954 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957 				break;
1958 			case 22:  /* 16 bpp PRT */
1959 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1961 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1963 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1964 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1967 				break;
1968 			case 23:  /* 32 bpp PRT */
1969 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1971 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1974 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977 				break;
1978 			case 24:  /* 64 bpp PRT */
1979 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1981 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1983 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1984 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1986 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987 				break;
1988 			case 25:  /* 128 bpp PRT */
1989 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1991 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1993 						 NUM_BANKS(ADDR_SURF_8_BANK) |
1994 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1996 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1997 				break;
1998 			default:
1999 				gb_tile_moden = 0;
2000 				break;
2001 			}
2002 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2003 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2004 		}
2005 	} else if ((rdev->family == CHIP_VERDE) ||
2006 		   (rdev->family == CHIP_OLAND)) {
2007 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2008 			switch (reg_offset) {
2009 			case 0:  /* non-AA compressed depth or any compressed stencil */
2010 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2011 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2012 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2013 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2014 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2015 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2016 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2017 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2018 				break;
2019 			case 1:  /* 2xAA/4xAA compressed depth only */
2020 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2021 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2022 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2023 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2024 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2025 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2027 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2028 				break;
2029 			case 2:  /* 8xAA compressed depth only */
2030 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2032 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2033 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2034 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2035 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2036 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2037 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2038 				break;
2039 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2040 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2043 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2044 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2045 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2048 				break;
2049 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2050 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2052 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2054 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2055 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2056 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2057 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2058 				break;
2059 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2060 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2061 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2062 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2063 						 TILE_SPLIT(split_equal_to_row_size) |
2064 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2065 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2066 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2067 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2068 				break;
2069 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2070 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2071 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2072 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073 						 TILE_SPLIT(split_equal_to_row_size) |
2074 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2075 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2078 				break;
2079 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2080 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2081 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2082 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2083 						 TILE_SPLIT(split_equal_to_row_size) |
2084 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2085 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2086 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2087 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2088 				break;
2089 			case 8:  /* 1D and 1D Array Surfaces */
2090 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2091 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2092 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2093 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2094 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2095 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2096 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2097 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2098 				break;
2099 			case 9:  /* Displayable maps. */
2100 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2101 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2102 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2103 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2104 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2105 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2108 				break;
2109 			case 10:  /* Display 8bpp. */
2110 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2112 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2114 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2115 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2116 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2117 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2118 				break;
2119 			case 11:  /* Display 16bpp. */
2120 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2122 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2124 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2125 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2127 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2128 				break;
2129 			case 12:  /* Display 32bpp. */
2130 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2132 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2133 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2134 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2135 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2138 				break;
2139 			case 13:  /* Thin. */
2140 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2142 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2144 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2145 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2147 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2148 				break;
2149 			case 14:  /* Thin 8 bpp. */
2150 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2152 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2155 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2158 				break;
2159 			case 15:  /* Thin 16 bpp. */
2160 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2162 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2163 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2165 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2167 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2168 				break;
2169 			case 16:  /* Thin 32 bpp. */
2170 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2172 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2174 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2175 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2176 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2177 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2178 				break;
2179 			case 17:  /* Thin 64 bpp. */
2180 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2182 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 						 TILE_SPLIT(split_equal_to_row_size) |
2184 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2185 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2188 				break;
2189 			case 21:  /* 8 bpp PRT. */
2190 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2192 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2193 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2194 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2195 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2196 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2198 				break;
2199 			case 22:  /* 16 bpp PRT */
2200 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2201 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2202 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2203 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2204 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2205 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2207 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2208 				break;
2209 			case 23:  /* 32 bpp PRT */
2210 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2212 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2214 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2215 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2217 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2218 				break;
2219 			case 24:  /* 64 bpp PRT */
2220 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2222 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2223 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2224 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2225 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2228 				break;
2229 			case 25:  /* 128 bpp PRT */
2230 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2232 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2233 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2234 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2235 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2237 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2238 				break;
2239 			default:
2240 				gb_tile_moden = 0;
2241 				break;
2242 			}
2243 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2244 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2245 		}
2246 	} else
2247 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2248 }
2249 
2250 static void si_select_se_sh(struct radeon_device *rdev,
2251 			    u32 se_num, u32 sh_num)
2252 {
2253 	u32 data = INSTANCE_BROADCAST_WRITES;
2254 
2255 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2256 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2257 	else if (se_num == 0xffffffff)
2258 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2259 	else if (sh_num == 0xffffffff)
2260 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2261 	else
2262 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2263 	WREG32(GRBM_GFX_INDEX, data);
2264 }
2265 
2266 static u32 si_create_bitmask(u32 bit_width)
2267 {
2268 	u32 i, mask = 0;
2269 
2270 	for (i = 0; i < bit_width; i++) {
2271 		mask <<= 1;
2272 		mask |= 1;
2273 	}
2274 	return mask;
2275 }
2276 
2277 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2278 {
2279 	u32 data, mask;
2280 
2281 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2282 	if (data & 1)
2283 		data &= INACTIVE_CUS_MASK;
2284 	else
2285 		data = 0;
2286 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2287 
2288 	data >>= INACTIVE_CUS_SHIFT;
2289 
2290 	mask = si_create_bitmask(cu_per_sh);
2291 
2292 	return ~data & mask;
2293 }
2294 
2295 static void si_setup_spi(struct radeon_device *rdev,
2296 			 u32 se_num, u32 sh_per_se,
2297 			 u32 cu_per_sh)
2298 {
2299 	int i, j, k;
2300 	u32 data, mask, active_cu;
2301 
2302 	for (i = 0; i < se_num; i++) {
2303 		for (j = 0; j < sh_per_se; j++) {
2304 			si_select_se_sh(rdev, i, j);
2305 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2306 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2307 
2308 			mask = 1;
2309 			for (k = 0; k < 16; k++) {
2310 				mask <<= k;
2311 				if (active_cu & mask) {
2312 					data &= ~mask;
2313 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2314 					break;
2315 				}
2316 			}
2317 		}
2318 	}
2319 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2320 }
2321 
2322 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2323 			      u32 max_rb_num, u32 se_num,
2324 			      u32 sh_per_se)
2325 {
2326 	u32 data, mask;
2327 
2328 	data = RREG32(CC_RB_BACKEND_DISABLE);
2329 	if (data & 1)
2330 		data &= BACKEND_DISABLE_MASK;
2331 	else
2332 		data = 0;
2333 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2334 
2335 	data >>= BACKEND_DISABLE_SHIFT;
2336 
2337 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2338 
2339 	return data & mask;
2340 }
2341 
2342 static void si_setup_rb(struct radeon_device *rdev,
2343 			u32 se_num, u32 sh_per_se,
2344 			u32 max_rb_num)
2345 {
2346 	int i, j;
2347 	u32 data, mask;
2348 	u32 disabled_rbs = 0;
2349 	u32 enabled_rbs = 0;
2350 
2351 	for (i = 0; i < se_num; i++) {
2352 		for (j = 0; j < sh_per_se; j++) {
2353 			si_select_se_sh(rdev, i, j);
2354 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2355 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2356 		}
2357 	}
2358 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2359 
2360 	mask = 1;
2361 	for (i = 0; i < max_rb_num; i++) {
2362 		if (!(disabled_rbs & mask))
2363 			enabled_rbs |= mask;
2364 		mask <<= 1;
2365 	}
2366 
2367 	for (i = 0; i < se_num; i++) {
2368 		si_select_se_sh(rdev, i, 0xffffffff);
2369 		data = 0;
2370 		for (j = 0; j < sh_per_se; j++) {
2371 			switch (enabled_rbs & 3) {
2372 			case 1:
2373 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2374 				break;
2375 			case 2:
2376 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2377 				break;
2378 			case 3:
2379 			default:
2380 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2381 				break;
2382 			}
2383 			enabled_rbs >>= 2;
2384 		}
2385 		WREG32(PA_SC_RASTER_CONFIG, data);
2386 	}
2387 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2388 }
2389 
2390 static void si_gpu_init(struct radeon_device *rdev)
2391 {
2392 	u32 gb_addr_config = 0;
2393 	u32 mc_shared_chmap, mc_arb_ramcfg;
2394 	u32 sx_debug_1;
2395 	u32 hdp_host_path_cntl;
2396 	u32 tmp;
2397 	int i, j;
2398 
2399 	switch (rdev->family) {
2400 	case CHIP_TAHITI:
2401 		rdev->config.si.max_shader_engines = 2;
2402 		rdev->config.si.max_tile_pipes = 12;
2403 		rdev->config.si.max_cu_per_sh = 8;
2404 		rdev->config.si.max_sh_per_se = 2;
2405 		rdev->config.si.max_backends_per_se = 4;
2406 		rdev->config.si.max_texture_channel_caches = 12;
2407 		rdev->config.si.max_gprs = 256;
2408 		rdev->config.si.max_gs_threads = 32;
2409 		rdev->config.si.max_hw_contexts = 8;
2410 
2411 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2412 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2413 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2414 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2415 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2416 		break;
2417 	case CHIP_PITCAIRN:
2418 		rdev->config.si.max_shader_engines = 2;
2419 		rdev->config.si.max_tile_pipes = 8;
2420 		rdev->config.si.max_cu_per_sh = 5;
2421 		rdev->config.si.max_sh_per_se = 2;
2422 		rdev->config.si.max_backends_per_se = 4;
2423 		rdev->config.si.max_texture_channel_caches = 8;
2424 		rdev->config.si.max_gprs = 256;
2425 		rdev->config.si.max_gs_threads = 32;
2426 		rdev->config.si.max_hw_contexts = 8;
2427 
2428 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2429 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2430 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2431 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2432 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2433 		break;
2434 	case CHIP_VERDE:
2435 	default:
2436 		rdev->config.si.max_shader_engines = 1;
2437 		rdev->config.si.max_tile_pipes = 4;
2438 		rdev->config.si.max_cu_per_sh = 2;
2439 		rdev->config.si.max_sh_per_se = 2;
2440 		rdev->config.si.max_backends_per_se = 4;
2441 		rdev->config.si.max_texture_channel_caches = 4;
2442 		rdev->config.si.max_gprs = 256;
2443 		rdev->config.si.max_gs_threads = 32;
2444 		rdev->config.si.max_hw_contexts = 8;
2445 
2446 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2447 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2448 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2449 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2450 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2451 		break;
2452 	case CHIP_OLAND:
2453 		rdev->config.si.max_shader_engines = 1;
2454 		rdev->config.si.max_tile_pipes = 4;
2455 		rdev->config.si.max_cu_per_sh = 6;
2456 		rdev->config.si.max_sh_per_se = 1;
2457 		rdev->config.si.max_backends_per_se = 2;
2458 		rdev->config.si.max_texture_channel_caches = 4;
2459 		rdev->config.si.max_gprs = 256;
2460 		rdev->config.si.max_gs_threads = 16;
2461 		rdev->config.si.max_hw_contexts = 8;
2462 
2463 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2464 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2465 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2466 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2467 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2468 		break;
2469 	}
2470 
2471 	/* Initialize HDP */
2472 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2473 		WREG32((0x2c14 + j), 0x00000000);
2474 		WREG32((0x2c18 + j), 0x00000000);
2475 		WREG32((0x2c1c + j), 0x00000000);
2476 		WREG32((0x2c20 + j), 0x00000000);
2477 		WREG32((0x2c24 + j), 0x00000000);
2478 	}
2479 
2480 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2481 
2482 	evergreen_fix_pci_max_read_req_size(rdev);
2483 
2484 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2485 
2486 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2487 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2488 
2489 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2490 	rdev->config.si.mem_max_burst_length_bytes = 256;
2491 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2492 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2493 	if (rdev->config.si.mem_row_size_in_kb > 4)
2494 		rdev->config.si.mem_row_size_in_kb = 4;
2495 	/* XXX use MC settings? */
2496 	rdev->config.si.shader_engine_tile_size = 32;
2497 	rdev->config.si.num_gpus = 1;
2498 	rdev->config.si.multi_gpu_tile_size = 64;
2499 
2500 	/* fix up row size */
2501 	gb_addr_config &= ~ROW_SIZE_MASK;
2502 	switch (rdev->config.si.mem_row_size_in_kb) {
2503 	case 1:
2504 	default:
2505 		gb_addr_config |= ROW_SIZE(0);
2506 		break;
2507 	case 2:
2508 		gb_addr_config |= ROW_SIZE(1);
2509 		break;
2510 	case 4:
2511 		gb_addr_config |= ROW_SIZE(2);
2512 		break;
2513 	}
2514 
2515 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2516 	 * not have bank info, so create a custom tiling dword.
2517 	 * bits 3:0   num_pipes
2518 	 * bits 7:4   num_banks
2519 	 * bits 11:8  group_size
2520 	 * bits 15:12 row_size
2521 	 */
2522 	rdev->config.si.tile_config = 0;
2523 	switch (rdev->config.si.num_tile_pipes) {
2524 	case 1:
2525 		rdev->config.si.tile_config |= (0 << 0);
2526 		break;
2527 	case 2:
2528 		rdev->config.si.tile_config |= (1 << 0);
2529 		break;
2530 	case 4:
2531 		rdev->config.si.tile_config |= (2 << 0);
2532 		break;
2533 	case 8:
2534 	default:
2535 		/* XXX what about 12? */
2536 		rdev->config.si.tile_config |= (3 << 0);
2537 		break;
2538 	}
2539 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2540 	case 0: /* four banks */
2541 		rdev->config.si.tile_config |= 0 << 4;
2542 		break;
2543 	case 1: /* eight banks */
2544 		rdev->config.si.tile_config |= 1 << 4;
2545 		break;
2546 	case 2: /* sixteen banks */
2547 	default:
2548 		rdev->config.si.tile_config |= 2 << 4;
2549 		break;
2550 	}
2551 	rdev->config.si.tile_config |=
2552 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2553 	rdev->config.si.tile_config |=
2554 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2555 
2556 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2557 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2558 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2559 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2560 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2561 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2562 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2563 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2564 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2565 
2566 	si_tiling_mode_table_init(rdev);
2567 
2568 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2569 		    rdev->config.si.max_sh_per_se,
2570 		    rdev->config.si.max_backends_per_se);
2571 
2572 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2573 		     rdev->config.si.max_sh_per_se,
2574 		     rdev->config.si.max_cu_per_sh);
2575 
2576 
2577 	/* set HW defaults for 3D engine */
2578 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2579 				     ROQ_IB2_START(0x2b)));
2580 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2581 
2582 	sx_debug_1 = RREG32(SX_DEBUG_1);
2583 	WREG32(SX_DEBUG_1, sx_debug_1);
2584 
2585 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2586 
2587 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2588 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2589 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2590 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2591 
2592 	WREG32(VGT_NUM_INSTANCES, 1);
2593 
2594 	WREG32(CP_PERFMON_CNTL, 0);
2595 
2596 	WREG32(SQ_CONFIG, 0);
2597 
2598 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2599 					  FORCE_EOV_MAX_REZ_CNT(255)));
2600 
2601 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2602 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2603 
2604 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2605 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2606 
2607 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2608 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2609 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2610 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2611 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2612 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2613 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2614 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2615 
2616 	tmp = RREG32(HDP_MISC_CNTL);
2617 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2618 	WREG32(HDP_MISC_CNTL, tmp);
2619 
2620 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2621 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2622 
2623 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2624 
2625 	udelay(50);
2626 }
2627 
2628 /*
2629  * GPU scratch registers helpers function.
2630  */
2631 static void si_scratch_init(struct radeon_device *rdev)
2632 {
2633 	int i;
2634 
2635 	rdev->scratch.num_reg = 7;
2636 	rdev->scratch.reg_base = SCRATCH_REG0;
2637 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2638 		rdev->scratch.free[i] = true;
2639 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2640 	}
2641 }
2642 
2643 void si_fence_ring_emit(struct radeon_device *rdev,
2644 			struct radeon_fence *fence)
2645 {
2646 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2647 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2648 
2649 	/* flush read cache over gart */
2650 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2651 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2652 	radeon_ring_write(ring, 0);
2653 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2654 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2655 			  PACKET3_TC_ACTION_ENA |
2656 			  PACKET3_SH_KCACHE_ACTION_ENA |
2657 			  PACKET3_SH_ICACHE_ACTION_ENA);
2658 	radeon_ring_write(ring, 0xFFFFFFFF);
2659 	radeon_ring_write(ring, 0);
2660 	radeon_ring_write(ring, 10); /* poll interval */
2661 	/* EVENT_WRITE_EOP - flush caches, send int */
2662 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2663 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2664 	radeon_ring_write(ring, addr & 0xffffffff);
2665 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2666 	radeon_ring_write(ring, fence->seq);
2667 	radeon_ring_write(ring, 0);
2668 }
2669 
2670 /*
2671  * IB stuff
2672  */
2673 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2674 {
2675 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2676 	u32 header;
2677 
2678 	if (ib->is_const_ib) {
2679 		/* set switch buffer packet before const IB */
2680 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2681 		radeon_ring_write(ring, 0);
2682 
2683 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2684 	} else {
2685 		u32 next_rptr;
2686 		if (ring->rptr_save_reg) {
2687 			next_rptr = ring->wptr + 3 + 4 + 8;
2688 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2689 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2690 						  PACKET3_SET_CONFIG_REG_START) >> 2));
2691 			radeon_ring_write(ring, next_rptr);
2692 		} else if (rdev->wb.enabled) {
2693 			next_rptr = ring->wptr + 5 + 4 + 8;
2694 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2695 			radeon_ring_write(ring, (1 << 8));
2696 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2697 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2698 			radeon_ring_write(ring, next_rptr);
2699 		}
2700 
2701 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2702 	}
2703 
2704 	radeon_ring_write(ring, header);
2705 	radeon_ring_write(ring,
2706 #ifdef __BIG_ENDIAN
2707 			  (2 << 0) |
2708 #endif
2709 			  (ib->gpu_addr & 0xFFFFFFFC));
2710 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2711 	radeon_ring_write(ring, ib->length_dw |
2712 			  (ib->vm ? (ib->vm->id << 24) : 0));
2713 
2714 	if (!ib->is_const_ib) {
2715 		/* flush read cache over gart for this vmid */
2716 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2717 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2718 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2719 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2720 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2721 				  PACKET3_TC_ACTION_ENA |
2722 				  PACKET3_SH_KCACHE_ACTION_ENA |
2723 				  PACKET3_SH_ICACHE_ACTION_ENA);
2724 		radeon_ring_write(ring, 0xFFFFFFFF);
2725 		radeon_ring_write(ring, 0);
2726 		radeon_ring_write(ring, 10); /* poll interval */
2727 	}
2728 }
2729 
2730 /*
2731  * CP.
2732  */
2733 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2734 {
2735 	if (enable)
2736 		WREG32(CP_ME_CNTL, 0);
2737 	else {
2738 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2739 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2740 		WREG32(SCRATCH_UMSK, 0);
2741 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2742 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2743 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2744 	}
2745 	udelay(50);
2746 }
2747 
2748 static int si_cp_load_microcode(struct radeon_device *rdev)
2749 {
2750 	const __be32 *fw_data;
2751 	int i;
2752 
2753 	if (!rdev->me_fw || !rdev->pfp_fw)
2754 		return -EINVAL;
2755 
2756 	si_cp_enable(rdev, false);
2757 
2758 	/* PFP */
2759 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2760 	WREG32(CP_PFP_UCODE_ADDR, 0);
2761 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2762 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2763 	WREG32(CP_PFP_UCODE_ADDR, 0);
2764 
2765 	/* CE */
2766 	fw_data = (const __be32 *)rdev->ce_fw->data;
2767 	WREG32(CP_CE_UCODE_ADDR, 0);
2768 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2769 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2770 	WREG32(CP_CE_UCODE_ADDR, 0);
2771 
2772 	/* ME */
2773 	fw_data = (const __be32 *)rdev->me_fw->data;
2774 	WREG32(CP_ME_RAM_WADDR, 0);
2775 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2776 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2777 	WREG32(CP_ME_RAM_WADDR, 0);
2778 
2779 	WREG32(CP_PFP_UCODE_ADDR, 0);
2780 	WREG32(CP_CE_UCODE_ADDR, 0);
2781 	WREG32(CP_ME_RAM_WADDR, 0);
2782 	WREG32(CP_ME_RAM_RADDR, 0);
2783 	return 0;
2784 }
2785 
2786 static int si_cp_start(struct radeon_device *rdev)
2787 {
2788 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2789 	int r, i;
2790 
2791 	r = radeon_ring_lock(rdev, ring, 7 + 4);
2792 	if (r) {
2793 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2794 		return r;
2795 	}
2796 	/* init the CP */
2797 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2798 	radeon_ring_write(ring, 0x1);
2799 	radeon_ring_write(ring, 0x0);
2800 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2801 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2802 	radeon_ring_write(ring, 0);
2803 	radeon_ring_write(ring, 0);
2804 
2805 	/* init the CE partitions */
2806 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2807 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2808 	radeon_ring_write(ring, 0xc000);
2809 	radeon_ring_write(ring, 0xe000);
2810 	radeon_ring_unlock_commit(rdev, ring);
2811 
2812 	si_cp_enable(rdev, true);
2813 
2814 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2815 	if (r) {
2816 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2817 		return r;
2818 	}
2819 
2820 	/* setup clear context state */
2821 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2822 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2823 
2824 	for (i = 0; i < si_default_size; i++)
2825 		radeon_ring_write(ring, si_default_state[i]);
2826 
2827 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2828 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2829 
2830 	/* set clear context state */
2831 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2832 	radeon_ring_write(ring, 0);
2833 
2834 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2835 	radeon_ring_write(ring, 0x00000316);
2836 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2837 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2838 
2839 	radeon_ring_unlock_commit(rdev, ring);
2840 
2841 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2842 		ring = &rdev->ring[i];
2843 		r = radeon_ring_lock(rdev, ring, 2);
2844 
2845 		/* clear the compute context state */
2846 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2847 		radeon_ring_write(ring, 0);
2848 
2849 		radeon_ring_unlock_commit(rdev, ring);
2850 	}
2851 
2852 	return 0;
2853 }
2854 
2855 static void si_cp_fini(struct radeon_device *rdev)
2856 {
2857 	struct radeon_ring *ring;
2858 	si_cp_enable(rdev, false);
2859 
2860 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2861 	radeon_ring_fini(rdev, ring);
2862 	radeon_scratch_free(rdev, ring->rptr_save_reg);
2863 
2864 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2865 	radeon_ring_fini(rdev, ring);
2866 	radeon_scratch_free(rdev, ring->rptr_save_reg);
2867 
2868 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2869 	radeon_ring_fini(rdev, ring);
2870 	radeon_scratch_free(rdev, ring->rptr_save_reg);
2871 }
2872 
2873 static int si_cp_resume(struct radeon_device *rdev)
2874 {
2875 	struct radeon_ring *ring;
2876 	u32 tmp;
2877 	u32 rb_bufsz;
2878 	int r;
2879 
2880 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2881 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2882 				 SOFT_RESET_PA |
2883 				 SOFT_RESET_VGT |
2884 				 SOFT_RESET_SPI |
2885 				 SOFT_RESET_SX));
2886 	RREG32(GRBM_SOFT_RESET);
2887 	mdelay(15);
2888 	WREG32(GRBM_SOFT_RESET, 0);
2889 	RREG32(GRBM_SOFT_RESET);
2890 
2891 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2892 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2893 
2894 	/* Set the write pointer delay */
2895 	WREG32(CP_RB_WPTR_DELAY, 0);
2896 
2897 	WREG32(CP_DEBUG, 0);
2898 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2899 
2900 	/* ring 0 - compute and gfx */
2901 	/* Set ring buffer size */
2902 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2903 	rb_bufsz = drm_order(ring->ring_size / 8);
2904 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2905 #ifdef __BIG_ENDIAN
2906 	tmp |= BUF_SWAP_32BIT;
2907 #endif
2908 	WREG32(CP_RB0_CNTL, tmp);
2909 
2910 	/* Initialize the ring buffer's read and write pointers */
2911 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2912 	ring->wptr = 0;
2913 	WREG32(CP_RB0_WPTR, ring->wptr);
2914 
2915 	/* set the wb address whether it's enabled or not */
2916 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2917 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2918 
2919 	if (rdev->wb.enabled)
2920 		WREG32(SCRATCH_UMSK, 0xff);
2921 	else {
2922 		tmp |= RB_NO_UPDATE;
2923 		WREG32(SCRATCH_UMSK, 0);
2924 	}
2925 
2926 	mdelay(1);
2927 	WREG32(CP_RB0_CNTL, tmp);
2928 
2929 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2930 
2931 	ring->rptr = RREG32(CP_RB0_RPTR);
2932 
2933 	/* ring1  - compute only */
2934 	/* Set ring buffer size */
2935 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2936 	rb_bufsz = drm_order(ring->ring_size / 8);
2937 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2938 #ifdef __BIG_ENDIAN
2939 	tmp |= BUF_SWAP_32BIT;
2940 #endif
2941 	WREG32(CP_RB1_CNTL, tmp);
2942 
2943 	/* Initialize the ring buffer's read and write pointers */
2944 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2945 	ring->wptr = 0;
2946 	WREG32(CP_RB1_WPTR, ring->wptr);
2947 
2948 	/* set the wb address whether it's enabled or not */
2949 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2950 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2951 
2952 	mdelay(1);
2953 	WREG32(CP_RB1_CNTL, tmp);
2954 
2955 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2956 
2957 	ring->rptr = RREG32(CP_RB1_RPTR);
2958 
2959 	/* ring2 - compute only */
2960 	/* Set ring buffer size */
2961 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2962 	rb_bufsz = drm_order(ring->ring_size / 8);
2963 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2964 #ifdef __BIG_ENDIAN
2965 	tmp |= BUF_SWAP_32BIT;
2966 #endif
2967 	WREG32(CP_RB2_CNTL, tmp);
2968 
2969 	/* Initialize the ring buffer's read and write pointers */
2970 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2971 	ring->wptr = 0;
2972 	WREG32(CP_RB2_WPTR, ring->wptr);
2973 
2974 	/* set the wb address whether it's enabled or not */
2975 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2976 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2977 
2978 	mdelay(1);
2979 	WREG32(CP_RB2_CNTL, tmp);
2980 
2981 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2982 
2983 	ring->rptr = RREG32(CP_RB2_RPTR);
2984 
2985 	/* start the rings */
2986 	si_cp_start(rdev);
2987 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2988 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2989 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2990 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2991 	if (r) {
2992 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2993 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2994 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2995 		return r;
2996 	}
2997 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2998 	if (r) {
2999 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3000 	}
3001 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3002 	if (r) {
3003 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3004 	}
3005 
3006 	return 0;
3007 }
3008 
3009 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3010 {
3011 	u32 reset_mask = 0;
3012 	u32 tmp;
3013 
3014 	/* GRBM_STATUS */
3015 	tmp = RREG32(GRBM_STATUS);
3016 	if (tmp & (PA_BUSY | SC_BUSY |
3017 		   BCI_BUSY | SX_BUSY |
3018 		   TA_BUSY | VGT_BUSY |
3019 		   DB_BUSY | CB_BUSY |
3020 		   GDS_BUSY | SPI_BUSY |
3021 		   IA_BUSY | IA_BUSY_NO_DMA))
3022 		reset_mask |= RADEON_RESET_GFX;
3023 
3024 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3025 		   CP_BUSY | CP_COHERENCY_BUSY))
3026 		reset_mask |= RADEON_RESET_CP;
3027 
3028 	if (tmp & GRBM_EE_BUSY)
3029 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3030 
3031 	/* GRBM_STATUS2 */
3032 	tmp = RREG32(GRBM_STATUS2);
3033 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3034 		reset_mask |= RADEON_RESET_RLC;
3035 
3036 	/* DMA_STATUS_REG 0 */
3037 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3038 	if (!(tmp & DMA_IDLE))
3039 		reset_mask |= RADEON_RESET_DMA;
3040 
3041 	/* DMA_STATUS_REG 1 */
3042 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3043 	if (!(tmp & DMA_IDLE))
3044 		reset_mask |= RADEON_RESET_DMA1;
3045 
3046 	/* SRBM_STATUS2 */
3047 	tmp = RREG32(SRBM_STATUS2);
3048 	if (tmp & DMA_BUSY)
3049 		reset_mask |= RADEON_RESET_DMA;
3050 
3051 	if (tmp & DMA1_BUSY)
3052 		reset_mask |= RADEON_RESET_DMA1;
3053 
3054 	/* SRBM_STATUS */
3055 	tmp = RREG32(SRBM_STATUS);
3056 
3057 	if (tmp & IH_BUSY)
3058 		reset_mask |= RADEON_RESET_IH;
3059 
3060 	if (tmp & SEM_BUSY)
3061 		reset_mask |= RADEON_RESET_SEM;
3062 
3063 	if (tmp & GRBM_RQ_PENDING)
3064 		reset_mask |= RADEON_RESET_GRBM;
3065 
3066 	if (tmp & VMC_BUSY)
3067 		reset_mask |= RADEON_RESET_VMC;
3068 
3069 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3070 		   MCC_BUSY | MCD_BUSY))
3071 		reset_mask |= RADEON_RESET_MC;
3072 
3073 	if (evergreen_is_display_hung(rdev))
3074 		reset_mask |= RADEON_RESET_DISPLAY;
3075 
3076 	/* VM_L2_STATUS */
3077 	tmp = RREG32(VM_L2_STATUS);
3078 	if (tmp & L2_BUSY)
3079 		reset_mask |= RADEON_RESET_VMC;
3080 
3081 	/* Skip MC reset as it's mostly likely not hung, just busy */
3082 	if (reset_mask & RADEON_RESET_MC) {
3083 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3084 		reset_mask &= ~RADEON_RESET_MC;
3085 	}
3086 
3087 	return reset_mask;
3088 }
3089 
3090 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3091 {
3092 	struct evergreen_mc_save save;
3093 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3094 	u32 tmp;
3095 
3096 	if (reset_mask == 0)
3097 		return;
3098 
3099 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3100 
3101 	evergreen_print_gpu_status_regs(rdev);
3102 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3103 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3104 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3105 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3106 
3107 	/* Disable CP parsing/prefetching */
3108 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3109 
3110 	if (reset_mask & RADEON_RESET_DMA) {
3111 		/* dma0 */
3112 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3113 		tmp &= ~DMA_RB_ENABLE;
3114 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3115 	}
3116 	if (reset_mask & RADEON_RESET_DMA1) {
3117 		/* dma1 */
3118 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3119 		tmp &= ~DMA_RB_ENABLE;
3120 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3121 	}
3122 
3123 	udelay(50);
3124 
3125 	evergreen_mc_stop(rdev, &save);
3126 	if (evergreen_mc_wait_for_idle(rdev)) {
3127 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3128 	}
3129 
3130 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3131 		grbm_soft_reset = SOFT_RESET_CB |
3132 			SOFT_RESET_DB |
3133 			SOFT_RESET_GDS |
3134 			SOFT_RESET_PA |
3135 			SOFT_RESET_SC |
3136 			SOFT_RESET_BCI |
3137 			SOFT_RESET_SPI |
3138 			SOFT_RESET_SX |
3139 			SOFT_RESET_TC |
3140 			SOFT_RESET_TA |
3141 			SOFT_RESET_VGT |
3142 			SOFT_RESET_IA;
3143 	}
3144 
3145 	if (reset_mask & RADEON_RESET_CP) {
3146 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3147 
3148 		srbm_soft_reset |= SOFT_RESET_GRBM;
3149 	}
3150 
3151 	if (reset_mask & RADEON_RESET_DMA)
3152 		srbm_soft_reset |= SOFT_RESET_DMA;
3153 
3154 	if (reset_mask & RADEON_RESET_DMA1)
3155 		srbm_soft_reset |= SOFT_RESET_DMA1;
3156 
3157 	if (reset_mask & RADEON_RESET_DISPLAY)
3158 		srbm_soft_reset |= SOFT_RESET_DC;
3159 
3160 	if (reset_mask & RADEON_RESET_RLC)
3161 		grbm_soft_reset |= SOFT_RESET_RLC;
3162 
3163 	if (reset_mask & RADEON_RESET_SEM)
3164 		srbm_soft_reset |= SOFT_RESET_SEM;
3165 
3166 	if (reset_mask & RADEON_RESET_IH)
3167 		srbm_soft_reset |= SOFT_RESET_IH;
3168 
3169 	if (reset_mask & RADEON_RESET_GRBM)
3170 		srbm_soft_reset |= SOFT_RESET_GRBM;
3171 
3172 	if (reset_mask & RADEON_RESET_VMC)
3173 		srbm_soft_reset |= SOFT_RESET_VMC;
3174 
3175 	if (reset_mask & RADEON_RESET_MC)
3176 		srbm_soft_reset |= SOFT_RESET_MC;
3177 
3178 	if (grbm_soft_reset) {
3179 		tmp = RREG32(GRBM_SOFT_RESET);
3180 		tmp |= grbm_soft_reset;
3181 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3182 		WREG32(GRBM_SOFT_RESET, tmp);
3183 		tmp = RREG32(GRBM_SOFT_RESET);
3184 
3185 		udelay(50);
3186 
3187 		tmp &= ~grbm_soft_reset;
3188 		WREG32(GRBM_SOFT_RESET, tmp);
3189 		tmp = RREG32(GRBM_SOFT_RESET);
3190 	}
3191 
3192 	if (srbm_soft_reset) {
3193 		tmp = RREG32(SRBM_SOFT_RESET);
3194 		tmp |= srbm_soft_reset;
3195 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3196 		WREG32(SRBM_SOFT_RESET, tmp);
3197 		tmp = RREG32(SRBM_SOFT_RESET);
3198 
3199 		udelay(50);
3200 
3201 		tmp &= ~srbm_soft_reset;
3202 		WREG32(SRBM_SOFT_RESET, tmp);
3203 		tmp = RREG32(SRBM_SOFT_RESET);
3204 	}
3205 
3206 	/* Wait a little for things to settle down */
3207 	udelay(50);
3208 
3209 	evergreen_mc_resume(rdev, &save);
3210 	udelay(50);
3211 
3212 	evergreen_print_gpu_status_regs(rdev);
3213 }
3214 
3215 int si_asic_reset(struct radeon_device *rdev)
3216 {
3217 	u32 reset_mask;
3218 
3219 	reset_mask = si_gpu_check_soft_reset(rdev);
3220 
3221 	if (reset_mask)
3222 		r600_set_bios_scratch_engine_hung(rdev, true);
3223 
3224 	si_gpu_soft_reset(rdev, reset_mask);
3225 
3226 	reset_mask = si_gpu_check_soft_reset(rdev);
3227 
3228 	if (!reset_mask)
3229 		r600_set_bios_scratch_engine_hung(rdev, false);
3230 
3231 	return 0;
3232 }
3233 
3234 /**
3235  * si_gfx_is_lockup - Check if the GFX engine is locked up
3236  *
3237  * @rdev: radeon_device pointer
3238  * @ring: radeon_ring structure holding ring information
3239  *
3240  * Check if the GFX engine is locked up.
3241  * Returns true if the engine appears to be locked up, false if not.
3242  */
3243 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3244 {
3245 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3246 
3247 	if (!(reset_mask & (RADEON_RESET_GFX |
3248 			    RADEON_RESET_COMPUTE |
3249 			    RADEON_RESET_CP))) {
3250 		radeon_ring_lockup_update(ring);
3251 		return false;
3252 	}
3253 	/* force CP activities */
3254 	radeon_ring_force_activity(rdev, ring);
3255 	return radeon_ring_test_lockup(rdev, ring);
3256 }
3257 
3258 /**
3259  * si_dma_is_lockup - Check if the DMA engine is locked up
3260  *
3261  * @rdev: radeon_device pointer
3262  * @ring: radeon_ring structure holding ring information
3263  *
3264  * Check if the async DMA engine is locked up.
3265  * Returns true if the engine appears to be locked up, false if not.
3266  */
3267 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3268 {
3269 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3270 	u32 mask;
3271 
3272 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3273 		mask = RADEON_RESET_DMA;
3274 	else
3275 		mask = RADEON_RESET_DMA1;
3276 
3277 	if (!(reset_mask & mask)) {
3278 		radeon_ring_lockup_update(ring);
3279 		return false;
3280 	}
3281 	/* force ring activities */
3282 	radeon_ring_force_activity(rdev, ring);
3283 	return radeon_ring_test_lockup(rdev, ring);
3284 }
3285 
3286 /* MC */
3287 static void si_mc_program(struct radeon_device *rdev)
3288 {
3289 	struct evergreen_mc_save save;
3290 	u32 tmp;
3291 	int i, j;
3292 
3293 	/* Initialize HDP */
3294 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295 		WREG32((0x2c14 + j), 0x00000000);
3296 		WREG32((0x2c18 + j), 0x00000000);
3297 		WREG32((0x2c1c + j), 0x00000000);
3298 		WREG32((0x2c20 + j), 0x00000000);
3299 		WREG32((0x2c24 + j), 0x00000000);
3300 	}
3301 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3302 
3303 	evergreen_mc_stop(rdev, &save);
3304 	if (radeon_mc_wait_for_idle(rdev)) {
3305 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3306 	}
3307 	/* Lockout access through VGA aperture*/
3308 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3309 	/* Update configuration */
3310 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3311 	       rdev->mc.vram_start >> 12);
3312 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3313 	       rdev->mc.vram_end >> 12);
3314 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3315 	       rdev->vram_scratch.gpu_addr >> 12);
3316 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3317 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3318 	WREG32(MC_VM_FB_LOCATION, tmp);
3319 	/* XXX double check these! */
3320 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3321 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3322 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3323 	WREG32(MC_VM_AGP_BASE, 0);
3324 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3325 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3326 	if (radeon_mc_wait_for_idle(rdev)) {
3327 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3328 	}
3329 	evergreen_mc_resume(rdev, &save);
3330 	/* we need to own VRAM, so turn off the VGA renderer here
3331 	 * to stop it overwriting our objects */
3332 	rv515_vga_render_disable(rdev);
3333 }
3334 
3335 static void si_vram_gtt_location(struct radeon_device *rdev,
3336 				 struct radeon_mc *mc)
3337 {
3338 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3339 		/* leave room for at least 1024M GTT */
3340 		dev_warn(rdev->dev, "limiting VRAM\n");
3341 		mc->real_vram_size = 0xFFC0000000ULL;
3342 		mc->mc_vram_size = 0xFFC0000000ULL;
3343 	}
3344 	radeon_vram_location(rdev, &rdev->mc, 0);
3345 	rdev->mc.gtt_base_align = 0;
3346 	radeon_gtt_location(rdev, mc);
3347 }
3348 
3349 static int si_mc_init(struct radeon_device *rdev)
3350 {
3351 	u32 tmp;
3352 	int chansize, numchan;
3353 
3354 	/* Get VRAM informations */
3355 	rdev->mc.vram_is_ddr = true;
3356 	tmp = RREG32(MC_ARB_RAMCFG);
3357 	if (tmp & CHANSIZE_OVERRIDE) {
3358 		chansize = 16;
3359 	} else if (tmp & CHANSIZE_MASK) {
3360 		chansize = 64;
3361 	} else {
3362 		chansize = 32;
3363 	}
3364 	tmp = RREG32(MC_SHARED_CHMAP);
3365 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3366 	case 0:
3367 	default:
3368 		numchan = 1;
3369 		break;
3370 	case 1:
3371 		numchan = 2;
3372 		break;
3373 	case 2:
3374 		numchan = 4;
3375 		break;
3376 	case 3:
3377 		numchan = 8;
3378 		break;
3379 	case 4:
3380 		numchan = 3;
3381 		break;
3382 	case 5:
3383 		numchan = 6;
3384 		break;
3385 	case 6:
3386 		numchan = 10;
3387 		break;
3388 	case 7:
3389 		numchan = 12;
3390 		break;
3391 	case 8:
3392 		numchan = 16;
3393 		break;
3394 	}
3395 	rdev->mc.vram_width = numchan * chansize;
3396 	/* Could aper size report 0 ? */
3397 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3398 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3399 	/* size in MB on si */
3400 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3401 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3402 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3403 	si_vram_gtt_location(rdev, &rdev->mc);
3404 	radeon_update_bandwidth_info(rdev);
3405 
3406 	return 0;
3407 }
3408 
3409 /*
3410  * GART
3411  */
3412 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3413 {
3414 	/* flush hdp cache */
3415 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3416 
3417 	/* bits 0-15 are the VM contexts0-15 */
3418 	WREG32(VM_INVALIDATE_REQUEST, 1);
3419 }
3420 
3421 static int si_pcie_gart_enable(struct radeon_device *rdev)
3422 {
3423 	int r, i;
3424 
3425 	if (rdev->gart.robj == NULL) {
3426 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3427 		return -EINVAL;
3428 	}
3429 	r = radeon_gart_table_vram_pin(rdev);
3430 	if (r)
3431 		return r;
3432 	radeon_gart_restore(rdev);
3433 	/* Setup TLB control */
3434 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3435 	       (0xA << 7) |
3436 	       ENABLE_L1_TLB |
3437 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3438 	       ENABLE_ADVANCED_DRIVER_MODEL |
3439 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3440 	/* Setup L2 cache */
3441 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3442 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3443 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3444 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3445 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3446 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3447 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3448 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3449 	/* setup context0 */
3450 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3451 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3452 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3453 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3454 			(u32)(rdev->dummy_page.addr >> 12));
3455 	WREG32(VM_CONTEXT0_CNTL2, 0);
3456 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3457 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3458 
3459 	WREG32(0x15D4, 0);
3460 	WREG32(0x15D8, 0);
3461 	WREG32(0x15DC, 0);
3462 
3463 	/* empty context1-15 */
3464 	/* set vm size, must be a multiple of 4 */
3465 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3466 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3467 	/* Assign the pt base to something valid for now; the pts used for
3468 	 * the VMs are determined by the application and setup and assigned
3469 	 * on the fly in the vm part of radeon_gart.c
3470 	 */
3471 	for (i = 1; i < 16; i++) {
3472 		if (i < 8)
3473 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3474 			       rdev->gart.table_addr >> 12);
3475 		else
3476 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3477 			       rdev->gart.table_addr >> 12);
3478 	}
3479 
3480 	/* enable context1-15 */
3481 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3482 	       (u32)(rdev->dummy_page.addr >> 12));
3483 	WREG32(VM_CONTEXT1_CNTL2, 4);
3484 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3485 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3486 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3487 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3488 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3489 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3490 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3491 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3492 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3493 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3494 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3495 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3496 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3497 
3498 	si_pcie_gart_tlb_flush(rdev);
3499 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3500 		 (unsigned)(rdev->mc.gtt_size >> 20),
3501 		 (unsigned long long)rdev->gart.table_addr);
3502 	rdev->gart.ready = true;
3503 	return 0;
3504 }
3505 
3506 static void si_pcie_gart_disable(struct radeon_device *rdev)
3507 {
3508 	/* Disable all tables */
3509 	WREG32(VM_CONTEXT0_CNTL, 0);
3510 	WREG32(VM_CONTEXT1_CNTL, 0);
3511 	/* Setup TLB control */
3512 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3513 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3514 	/* Setup L2 cache */
3515 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3516 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3517 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3518 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3519 	WREG32(VM_L2_CNTL2, 0);
3520 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3521 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3522 	radeon_gart_table_vram_unpin(rdev);
3523 }
3524 
3525 static void si_pcie_gart_fini(struct radeon_device *rdev)
3526 {
3527 	si_pcie_gart_disable(rdev);
3528 	radeon_gart_table_vram_free(rdev);
3529 	radeon_gart_fini(rdev);
3530 }
3531 
3532 /* vm parser */
3533 static bool si_vm_reg_valid(u32 reg)
3534 {
3535 	/* context regs are fine */
3536 	if (reg >= 0x28000)
3537 		return true;
3538 
3539 	/* check config regs */
3540 	switch (reg) {
3541 	case GRBM_GFX_INDEX:
3542 	case CP_STRMOUT_CNTL:
3543 	case VGT_VTX_VECT_EJECT_REG:
3544 	case VGT_CACHE_INVALIDATION:
3545 	case VGT_ESGS_RING_SIZE:
3546 	case VGT_GSVS_RING_SIZE:
3547 	case VGT_GS_VERTEX_REUSE:
3548 	case VGT_PRIMITIVE_TYPE:
3549 	case VGT_INDEX_TYPE:
3550 	case VGT_NUM_INDICES:
3551 	case VGT_NUM_INSTANCES:
3552 	case VGT_TF_RING_SIZE:
3553 	case VGT_HS_OFFCHIP_PARAM:
3554 	case VGT_TF_MEMORY_BASE:
3555 	case PA_CL_ENHANCE:
3556 	case PA_SU_LINE_STIPPLE_VALUE:
3557 	case PA_SC_LINE_STIPPLE_STATE:
3558 	case PA_SC_ENHANCE:
3559 	case SQC_CACHES:
3560 	case SPI_STATIC_THREAD_MGMT_1:
3561 	case SPI_STATIC_THREAD_MGMT_2:
3562 	case SPI_STATIC_THREAD_MGMT_3:
3563 	case SPI_PS_MAX_WAVE_ID:
3564 	case SPI_CONFIG_CNTL:
3565 	case SPI_CONFIG_CNTL_1:
3566 	case TA_CNTL_AUX:
3567 		return true;
3568 	default:
3569 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3570 		return false;
3571 	}
3572 }
3573 
3574 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3575 				  u32 *ib, struct radeon_cs_packet *pkt)
3576 {
3577 	switch (pkt->opcode) {
3578 	case PACKET3_NOP:
3579 	case PACKET3_SET_BASE:
3580 	case PACKET3_SET_CE_DE_COUNTERS:
3581 	case PACKET3_LOAD_CONST_RAM:
3582 	case PACKET3_WRITE_CONST_RAM:
3583 	case PACKET3_WRITE_CONST_RAM_OFFSET:
3584 	case PACKET3_DUMP_CONST_RAM:
3585 	case PACKET3_INCREMENT_CE_COUNTER:
3586 	case PACKET3_WAIT_ON_DE_COUNTER:
3587 	case PACKET3_CE_WRITE:
3588 		break;
3589 	default:
3590 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3591 		return -EINVAL;
3592 	}
3593 	return 0;
3594 }
3595 
3596 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3597 				   u32 *ib, struct radeon_cs_packet *pkt)
3598 {
3599 	u32 idx = pkt->idx + 1;
3600 	u32 idx_value = ib[idx];
3601 	u32 start_reg, end_reg, reg, i;
3602 	u32 command, info;
3603 
3604 	switch (pkt->opcode) {
3605 	case PACKET3_NOP:
3606 	case PACKET3_SET_BASE:
3607 	case PACKET3_CLEAR_STATE:
3608 	case PACKET3_INDEX_BUFFER_SIZE:
3609 	case PACKET3_DISPATCH_DIRECT:
3610 	case PACKET3_DISPATCH_INDIRECT:
3611 	case PACKET3_ALLOC_GDS:
3612 	case PACKET3_WRITE_GDS_RAM:
3613 	case PACKET3_ATOMIC_GDS:
3614 	case PACKET3_ATOMIC:
3615 	case PACKET3_OCCLUSION_QUERY:
3616 	case PACKET3_SET_PREDICATION:
3617 	case PACKET3_COND_EXEC:
3618 	case PACKET3_PRED_EXEC:
3619 	case PACKET3_DRAW_INDIRECT:
3620 	case PACKET3_DRAW_INDEX_INDIRECT:
3621 	case PACKET3_INDEX_BASE:
3622 	case PACKET3_DRAW_INDEX_2:
3623 	case PACKET3_CONTEXT_CONTROL:
3624 	case PACKET3_INDEX_TYPE:
3625 	case PACKET3_DRAW_INDIRECT_MULTI:
3626 	case PACKET3_DRAW_INDEX_AUTO:
3627 	case PACKET3_DRAW_INDEX_IMMD:
3628 	case PACKET3_NUM_INSTANCES:
3629 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3630 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3631 	case PACKET3_DRAW_INDEX_OFFSET_2:
3632 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3633 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3634 	case PACKET3_MPEG_INDEX:
3635 	case PACKET3_WAIT_REG_MEM:
3636 	case PACKET3_MEM_WRITE:
3637 	case PACKET3_PFP_SYNC_ME:
3638 	case PACKET3_SURFACE_SYNC:
3639 	case PACKET3_EVENT_WRITE:
3640 	case PACKET3_EVENT_WRITE_EOP:
3641 	case PACKET3_EVENT_WRITE_EOS:
3642 	case PACKET3_SET_CONTEXT_REG:
3643 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3644 	case PACKET3_SET_SH_REG:
3645 	case PACKET3_SET_SH_REG_OFFSET:
3646 	case PACKET3_INCREMENT_DE_COUNTER:
3647 	case PACKET3_WAIT_ON_CE_COUNTER:
3648 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
3649 	case PACKET3_ME_WRITE:
3650 		break;
3651 	case PACKET3_COPY_DATA:
3652 		if ((idx_value & 0xf00) == 0) {
3653 			reg = ib[idx + 3] * 4;
3654 			if (!si_vm_reg_valid(reg))
3655 				return -EINVAL;
3656 		}
3657 		break;
3658 	case PACKET3_WRITE_DATA:
3659 		if ((idx_value & 0xf00) == 0) {
3660 			start_reg = ib[idx + 1] * 4;
3661 			if (idx_value & 0x10000) {
3662 				if (!si_vm_reg_valid(start_reg))
3663 					return -EINVAL;
3664 			} else {
3665 				for (i = 0; i < (pkt->count - 2); i++) {
3666 					reg = start_reg + (4 * i);
3667 					if (!si_vm_reg_valid(reg))
3668 						return -EINVAL;
3669 				}
3670 			}
3671 		}
3672 		break;
3673 	case PACKET3_COND_WRITE:
3674 		if (idx_value & 0x100) {
3675 			reg = ib[idx + 5] * 4;
3676 			if (!si_vm_reg_valid(reg))
3677 				return -EINVAL;
3678 		}
3679 		break;
3680 	case PACKET3_COPY_DW:
3681 		if (idx_value & 0x2) {
3682 			reg = ib[idx + 3] * 4;
3683 			if (!si_vm_reg_valid(reg))
3684 				return -EINVAL;
3685 		}
3686 		break;
3687 	case PACKET3_SET_CONFIG_REG:
3688 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3689 		end_reg = 4 * pkt->count + start_reg - 4;
3690 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3691 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3692 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3693 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3694 			return -EINVAL;
3695 		}
3696 		for (i = 0; i < pkt->count; i++) {
3697 			reg = start_reg + (4 * i);
3698 			if (!si_vm_reg_valid(reg))
3699 				return -EINVAL;
3700 		}
3701 		break;
3702 	case PACKET3_CP_DMA:
3703 		command = ib[idx + 4];
3704 		info = ib[idx + 1];
3705 		if (command & PACKET3_CP_DMA_CMD_SAS) {
3706 			/* src address space is register */
3707 			if (((info & 0x60000000) >> 29) == 0) {
3708 				start_reg = idx_value << 2;
3709 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3710 					reg = start_reg;
3711 					if (!si_vm_reg_valid(reg)) {
3712 						DRM_ERROR("CP DMA Bad SRC register\n");
3713 						return -EINVAL;
3714 					}
3715 				} else {
3716 					for (i = 0; i < (command & 0x1fffff); i++) {
3717 						reg = start_reg + (4 * i);
3718 						if (!si_vm_reg_valid(reg)) {
3719 							DRM_ERROR("CP DMA Bad SRC register\n");
3720 							return -EINVAL;
3721 						}
3722 					}
3723 				}
3724 			}
3725 		}
3726 		if (command & PACKET3_CP_DMA_CMD_DAS) {
3727 			/* dst address space is register */
3728 			if (((info & 0x00300000) >> 20) == 0) {
3729 				start_reg = ib[idx + 2];
3730 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3731 					reg = start_reg;
3732 					if (!si_vm_reg_valid(reg)) {
3733 						DRM_ERROR("CP DMA Bad DST register\n");
3734 						return -EINVAL;
3735 					}
3736 				} else {
3737 					for (i = 0; i < (command & 0x1fffff); i++) {
3738 						reg = start_reg + (4 * i);
3739 						if (!si_vm_reg_valid(reg)) {
3740 							DRM_ERROR("CP DMA Bad DST register\n");
3741 							return -EINVAL;
3742 						}
3743 					}
3744 				}
3745 			}
3746 		}
3747 		break;
3748 	default:
3749 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3750 		return -EINVAL;
3751 	}
3752 	return 0;
3753 }
3754 
3755 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3756 				       u32 *ib, struct radeon_cs_packet *pkt)
3757 {
3758 	u32 idx = pkt->idx + 1;
3759 	u32 idx_value = ib[idx];
3760 	u32 start_reg, reg, i;
3761 
3762 	switch (pkt->opcode) {
3763 	case PACKET3_NOP:
3764 	case PACKET3_SET_BASE:
3765 	case PACKET3_CLEAR_STATE:
3766 	case PACKET3_DISPATCH_DIRECT:
3767 	case PACKET3_DISPATCH_INDIRECT:
3768 	case PACKET3_ALLOC_GDS:
3769 	case PACKET3_WRITE_GDS_RAM:
3770 	case PACKET3_ATOMIC_GDS:
3771 	case PACKET3_ATOMIC:
3772 	case PACKET3_OCCLUSION_QUERY:
3773 	case PACKET3_SET_PREDICATION:
3774 	case PACKET3_COND_EXEC:
3775 	case PACKET3_PRED_EXEC:
3776 	case PACKET3_CONTEXT_CONTROL:
3777 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3778 	case PACKET3_WAIT_REG_MEM:
3779 	case PACKET3_MEM_WRITE:
3780 	case PACKET3_PFP_SYNC_ME:
3781 	case PACKET3_SURFACE_SYNC:
3782 	case PACKET3_EVENT_WRITE:
3783 	case PACKET3_EVENT_WRITE_EOP:
3784 	case PACKET3_EVENT_WRITE_EOS:
3785 	case PACKET3_SET_CONTEXT_REG:
3786 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3787 	case PACKET3_SET_SH_REG:
3788 	case PACKET3_SET_SH_REG_OFFSET:
3789 	case PACKET3_INCREMENT_DE_COUNTER:
3790 	case PACKET3_WAIT_ON_CE_COUNTER:
3791 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
3792 	case PACKET3_ME_WRITE:
3793 		break;
3794 	case PACKET3_COPY_DATA:
3795 		if ((idx_value & 0xf00) == 0) {
3796 			reg = ib[idx + 3] * 4;
3797 			if (!si_vm_reg_valid(reg))
3798 				return -EINVAL;
3799 		}
3800 		break;
3801 	case PACKET3_WRITE_DATA:
3802 		if ((idx_value & 0xf00) == 0) {
3803 			start_reg = ib[idx + 1] * 4;
3804 			if (idx_value & 0x10000) {
3805 				if (!si_vm_reg_valid(start_reg))
3806 					return -EINVAL;
3807 			} else {
3808 				for (i = 0; i < (pkt->count - 2); i++) {
3809 					reg = start_reg + (4 * i);
3810 					if (!si_vm_reg_valid(reg))
3811 						return -EINVAL;
3812 				}
3813 			}
3814 		}
3815 		break;
3816 	case PACKET3_COND_WRITE:
3817 		if (idx_value & 0x100) {
3818 			reg = ib[idx + 5] * 4;
3819 			if (!si_vm_reg_valid(reg))
3820 				return -EINVAL;
3821 		}
3822 		break;
3823 	case PACKET3_COPY_DW:
3824 		if (idx_value & 0x2) {
3825 			reg = ib[idx + 3] * 4;
3826 			if (!si_vm_reg_valid(reg))
3827 				return -EINVAL;
3828 		}
3829 		break;
3830 	default:
3831 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3832 		return -EINVAL;
3833 	}
3834 	return 0;
3835 }
3836 
3837 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3838 {
3839 	int ret = 0;
3840 	u32 idx = 0;
3841 	struct radeon_cs_packet pkt;
3842 
3843 	do {
3844 		pkt.idx = idx;
3845 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3846 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3847 		pkt.one_reg_wr = 0;
3848 		switch (pkt.type) {
3849 		case RADEON_PACKET_TYPE0:
3850 			dev_err(rdev->dev, "Packet0 not allowed!\n");
3851 			ret = -EINVAL;
3852 			break;
3853 		case RADEON_PACKET_TYPE2:
3854 			idx += 1;
3855 			break;
3856 		case RADEON_PACKET_TYPE3:
3857 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3858 			if (ib->is_const_ib)
3859 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3860 			else {
3861 				switch (ib->ring) {
3862 				case RADEON_RING_TYPE_GFX_INDEX:
3863 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3864 					break;
3865 				case CAYMAN_RING_TYPE_CP1_INDEX:
3866 				case CAYMAN_RING_TYPE_CP2_INDEX:
3867 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3868 					break;
3869 				default:
3870 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3871 					ret = -EINVAL;
3872 					break;
3873 				}
3874 			}
3875 			idx += pkt.count + 2;
3876 			break;
3877 		default:
3878 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3879 			ret = -EINVAL;
3880 			break;
3881 		}
3882 		if (ret)
3883 			break;
3884 	} while (idx < ib->length_dw);
3885 
3886 	return ret;
3887 }
3888 
3889 /*
3890  * vm
3891  */
3892 int si_vm_init(struct radeon_device *rdev)
3893 {
3894 	/* number of VMs */
3895 	rdev->vm_manager.nvm = 16;
3896 	/* base offset of vram pages */
3897 	rdev->vm_manager.vram_base_offset = 0;
3898 
3899 	return 0;
3900 }
3901 
3902 void si_vm_fini(struct radeon_device *rdev)
3903 {
3904 }
3905 
3906 /**
3907  * si_vm_set_page - update the page tables using the CP
3908  *
3909  * @rdev: radeon_device pointer
3910  * @ib: indirect buffer to fill with commands
3911  * @pe: addr of the page entry
3912  * @addr: dst addr to write into pe
3913  * @count: number of page entries to update
3914  * @incr: increase next addr by incr bytes
3915  * @flags: access flags
3916  *
3917  * Update the page tables using the CP (SI).
3918  */
3919 void si_vm_set_page(struct radeon_device *rdev,
3920 		    struct radeon_ib *ib,
3921 		    uint64_t pe,
3922 		    uint64_t addr, unsigned count,
3923 		    uint32_t incr, uint32_t flags)
3924 {
3925 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3926 	uint64_t value;
3927 	unsigned ndw;
3928 
3929 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3930 		while (count) {
3931 			ndw = 2 + count * 2;
3932 			if (ndw > 0x3FFE)
3933 				ndw = 0x3FFE;
3934 
3935 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3936 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3937 					WRITE_DATA_DST_SEL(1));
3938 			ib->ptr[ib->length_dw++] = pe;
3939 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3940 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3941 				if (flags & RADEON_VM_PAGE_SYSTEM) {
3942 					value = radeon_vm_map_gart(rdev, addr);
3943 					value &= 0xFFFFFFFFFFFFF000ULL;
3944 				} else if (flags & RADEON_VM_PAGE_VALID) {
3945 					value = addr;
3946 				} else {
3947 					value = 0;
3948 				}
3949 				addr += incr;
3950 				value |= r600_flags;
3951 				ib->ptr[ib->length_dw++] = value;
3952 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
3953 			}
3954 		}
3955 	} else {
3956 		/* DMA */
3957 		if (flags & RADEON_VM_PAGE_SYSTEM) {
3958 			while (count) {
3959 				ndw = count * 2;
3960 				if (ndw > 0xFFFFE)
3961 					ndw = 0xFFFFE;
3962 
3963 				/* for non-physically contiguous pages (system) */
3964 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3965 				ib->ptr[ib->length_dw++] = pe;
3966 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3967 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3968 					if (flags & RADEON_VM_PAGE_SYSTEM) {
3969 						value = radeon_vm_map_gart(rdev, addr);
3970 						value &= 0xFFFFFFFFFFFFF000ULL;
3971 					} else if (flags & RADEON_VM_PAGE_VALID) {
3972 						value = addr;
3973 					} else {
3974 						value = 0;
3975 					}
3976 					addr += incr;
3977 					value |= r600_flags;
3978 					ib->ptr[ib->length_dw++] = value;
3979 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
3980 				}
3981 			}
3982 		} else {
3983 			while (count) {
3984 				ndw = count * 2;
3985 				if (ndw > 0xFFFFE)
3986 					ndw = 0xFFFFE;
3987 
3988 				if (flags & RADEON_VM_PAGE_VALID)
3989 					value = addr;
3990 				else
3991 					value = 0;
3992 				/* for physically contiguous pages (vram) */
3993 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
3994 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
3995 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3996 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3997 				ib->ptr[ib->length_dw++] = 0;
3998 				ib->ptr[ib->length_dw++] = value; /* value */
3999 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4000 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4001 				ib->ptr[ib->length_dw++] = 0;
4002 				pe += ndw * 4;
4003 				addr += (ndw / 2) * incr;
4004 				count -= ndw / 2;
4005 			}
4006 		}
4007 		while (ib->length_dw & 0x7)
4008 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4009 	}
4010 }
4011 
4012 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4013 {
4014 	struct radeon_ring *ring = &rdev->ring[ridx];
4015 
4016 	if (vm == NULL)
4017 		return;
4018 
4019 	/* write new base address */
4020 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4021 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4022 				 WRITE_DATA_DST_SEL(0)));
4023 
4024 	if (vm->id < 8) {
4025 		radeon_ring_write(ring,
4026 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4027 	} else {
4028 		radeon_ring_write(ring,
4029 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4030 	}
4031 	radeon_ring_write(ring, 0);
4032 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4033 
4034 	/* flush hdp cache */
4035 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4036 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4037 				 WRITE_DATA_DST_SEL(0)));
4038 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4039 	radeon_ring_write(ring, 0);
4040 	radeon_ring_write(ring, 0x1);
4041 
4042 	/* bits 0-15 are the VM contexts0-15 */
4043 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4044 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4045 				 WRITE_DATA_DST_SEL(0)));
4046 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4047 	radeon_ring_write(ring, 0);
4048 	radeon_ring_write(ring, 1 << vm->id);
4049 
4050 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4051 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4052 	radeon_ring_write(ring, 0x0);
4053 }
4054 
4055 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4056 {
4057 	struct radeon_ring *ring = &rdev->ring[ridx];
4058 
4059 	if (vm == NULL)
4060 		return;
4061 
4062 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4063 	if (vm->id < 8) {
4064 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4065 	} else {
4066 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4067 	}
4068 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4069 
4070 	/* flush hdp cache */
4071 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4072 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4073 	radeon_ring_write(ring, 1);
4074 
4075 	/* bits 0-7 are the VM contexts0-7 */
4076 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4077 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4078 	radeon_ring_write(ring, 1 << vm->id);
4079 }
4080 
4081 /*
4082  * RLC
4083  */
4084 void si_rlc_fini(struct radeon_device *rdev)
4085 {
4086 	int r;
4087 
4088 	/* save restore block */
4089 	if (rdev->rlc.save_restore_obj) {
4090 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4091 		if (unlikely(r != 0))
4092 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4093 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
4094 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4095 
4096 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
4097 		rdev->rlc.save_restore_obj = NULL;
4098 	}
4099 
4100 	/* clear state block */
4101 	if (rdev->rlc.clear_state_obj) {
4102 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4103 		if (unlikely(r != 0))
4104 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4105 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
4106 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4107 
4108 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
4109 		rdev->rlc.clear_state_obj = NULL;
4110 	}
4111 }
4112 
4113 int si_rlc_init(struct radeon_device *rdev)
4114 {
4115 	int r;
4116 
4117 	/* save restore block */
4118 	if (rdev->rlc.save_restore_obj == NULL) {
4119 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4120 				     RADEON_GEM_DOMAIN_VRAM, NULL,
4121 				     &rdev->rlc.save_restore_obj);
4122 		if (r) {
4123 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4124 			return r;
4125 		}
4126 	}
4127 
4128 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4129 	if (unlikely(r != 0)) {
4130 		si_rlc_fini(rdev);
4131 		return r;
4132 	}
4133 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4134 			  &rdev->rlc.save_restore_gpu_addr);
4135 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4136 	if (r) {
4137 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4138 		si_rlc_fini(rdev);
4139 		return r;
4140 	}
4141 
4142 	/* clear state block */
4143 	if (rdev->rlc.clear_state_obj == NULL) {
4144 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4145 				     RADEON_GEM_DOMAIN_VRAM, NULL,
4146 				     &rdev->rlc.clear_state_obj);
4147 		if (r) {
4148 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4149 			si_rlc_fini(rdev);
4150 			return r;
4151 		}
4152 	}
4153 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4154 	if (unlikely(r != 0)) {
4155 		si_rlc_fini(rdev);
4156 		return r;
4157 	}
4158 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4159 			  &rdev->rlc.clear_state_gpu_addr);
4160 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4161 	if (r) {
4162 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4163 		si_rlc_fini(rdev);
4164 		return r;
4165 	}
4166 
4167 	return 0;
4168 }
4169 
4170 static void si_rlc_stop(struct radeon_device *rdev)
4171 {
4172 	WREG32(RLC_CNTL, 0);
4173 }
4174 
4175 static void si_rlc_start(struct radeon_device *rdev)
4176 {
4177 	WREG32(RLC_CNTL, RLC_ENABLE);
4178 }
4179 
4180 static int si_rlc_resume(struct radeon_device *rdev)
4181 {
4182 	u32 i;
4183 	const __be32 *fw_data;
4184 
4185 	if (!rdev->rlc_fw)
4186 		return -EINVAL;
4187 
4188 	si_rlc_stop(rdev);
4189 
4190 	WREG32(RLC_RL_BASE, 0);
4191 	WREG32(RLC_RL_SIZE, 0);
4192 	WREG32(RLC_LB_CNTL, 0);
4193 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4194 	WREG32(RLC_LB_CNTR_INIT, 0);
4195 
4196 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4197 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4198 
4199 	WREG32(RLC_MC_CNTL, 0);
4200 	WREG32(RLC_UCODE_CNTL, 0);
4201 
4202 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4203 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4204 		WREG32(RLC_UCODE_ADDR, i);
4205 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4206 	}
4207 	WREG32(RLC_UCODE_ADDR, 0);
4208 
4209 	si_rlc_start(rdev);
4210 
4211 	return 0;
4212 }
4213 
4214 static void si_enable_interrupts(struct radeon_device *rdev)
4215 {
4216 	u32 ih_cntl = RREG32(IH_CNTL);
4217 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4218 
4219 	ih_cntl |= ENABLE_INTR;
4220 	ih_rb_cntl |= IH_RB_ENABLE;
4221 	WREG32(IH_CNTL, ih_cntl);
4222 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4223 	rdev->ih.enabled = true;
4224 }
4225 
4226 static void si_disable_interrupts(struct radeon_device *rdev)
4227 {
4228 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4229 	u32 ih_cntl = RREG32(IH_CNTL);
4230 
4231 	ih_rb_cntl &= ~IH_RB_ENABLE;
4232 	ih_cntl &= ~ENABLE_INTR;
4233 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4234 	WREG32(IH_CNTL, ih_cntl);
4235 	/* set rptr, wptr to 0 */
4236 	WREG32(IH_RB_RPTR, 0);
4237 	WREG32(IH_RB_WPTR, 0);
4238 	rdev->ih.enabled = false;
4239 	rdev->ih.rptr = 0;
4240 }
4241 
4242 static void si_disable_interrupt_state(struct radeon_device *rdev)
4243 {
4244 	u32 tmp;
4245 
4246 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4247 	WREG32(CP_INT_CNTL_RING1, 0);
4248 	WREG32(CP_INT_CNTL_RING2, 0);
4249 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4250 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4251 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4252 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4253 	WREG32(GRBM_INT_CNTL, 0);
4254 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4255 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4256 	if (rdev->num_crtc >= 4) {
4257 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4258 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4259 	}
4260 	if (rdev->num_crtc >= 6) {
4261 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4262 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4263 	}
4264 
4265 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4266 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4267 	if (rdev->num_crtc >= 4) {
4268 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4269 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4270 	}
4271 	if (rdev->num_crtc >= 6) {
4272 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4273 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4274 	}
4275 
4276 	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4277 
4278 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4279 	WREG32(DC_HPD1_INT_CONTROL, tmp);
4280 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4281 	WREG32(DC_HPD2_INT_CONTROL, tmp);
4282 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4283 	WREG32(DC_HPD3_INT_CONTROL, tmp);
4284 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4285 	WREG32(DC_HPD4_INT_CONTROL, tmp);
4286 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4287 	WREG32(DC_HPD5_INT_CONTROL, tmp);
4288 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4289 	WREG32(DC_HPD6_INT_CONTROL, tmp);
4290 
4291 }
4292 
4293 static int si_irq_init(struct radeon_device *rdev)
4294 {
4295 	int ret = 0;
4296 	int rb_bufsz;
4297 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4298 
4299 	/* allocate ring */
4300 	ret = r600_ih_ring_alloc(rdev);
4301 	if (ret)
4302 		return ret;
4303 
4304 	/* disable irqs */
4305 	si_disable_interrupts(rdev);
4306 
4307 	/* init rlc */
4308 	ret = si_rlc_resume(rdev);
4309 	if (ret) {
4310 		r600_ih_ring_fini(rdev);
4311 		return ret;
4312 	}
4313 
4314 	/* setup interrupt control */
4315 	/* set dummy read address to ring address */
4316 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4317 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
4318 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4319 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4320 	 */
4321 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4322 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4323 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4324 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
4325 
4326 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4327 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4328 
4329 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4330 		      IH_WPTR_OVERFLOW_CLEAR |
4331 		      (rb_bufsz << 1));
4332 
4333 	if (rdev->wb.enabled)
4334 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4335 
4336 	/* set the writeback address whether it's enabled or not */
4337 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4338 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4339 
4340 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4341 
4342 	/* set rptr, wptr to 0 */
4343 	WREG32(IH_RB_RPTR, 0);
4344 	WREG32(IH_RB_WPTR, 0);
4345 
4346 	/* Default settings for IH_CNTL (disabled at first) */
4347 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4348 	/* RPTR_REARM only works if msi's are enabled */
4349 	if (rdev->msi_enabled)
4350 		ih_cntl |= RPTR_REARM;
4351 	WREG32(IH_CNTL, ih_cntl);
4352 
4353 	/* force the active interrupt state to all disabled */
4354 	si_disable_interrupt_state(rdev);
4355 
4356 	pci_set_master(rdev->pdev);
4357 
4358 	/* enable irqs */
4359 	si_enable_interrupts(rdev);
4360 
4361 	return ret;
4362 }
4363 
4364 int si_irq_set(struct radeon_device *rdev)
4365 {
4366 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4367 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4368 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4369 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4370 	u32 grbm_int_cntl = 0;
4371 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4372 	u32 dma_cntl, dma_cntl1;
4373 
4374 	if (!rdev->irq.installed) {
4375 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4376 		return -EINVAL;
4377 	}
4378 	/* don't enable anything if the ih is disabled */
4379 	if (!rdev->ih.enabled) {
4380 		si_disable_interrupts(rdev);
4381 		/* force the active interrupt state to all disabled */
4382 		si_disable_interrupt_state(rdev);
4383 		return 0;
4384 	}
4385 
4386 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4387 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4388 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4389 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4390 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4391 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4392 
4393 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4394 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4395 
4396 	/* enable CP interrupts on all rings */
4397 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4398 		DRM_DEBUG("si_irq_set: sw int gfx\n");
4399 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4400 	}
4401 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4402 		DRM_DEBUG("si_irq_set: sw int cp1\n");
4403 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4404 	}
4405 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4406 		DRM_DEBUG("si_irq_set: sw int cp2\n");
4407 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4408 	}
4409 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4410 		DRM_DEBUG("si_irq_set: sw int dma\n");
4411 		dma_cntl |= TRAP_ENABLE;
4412 	}
4413 
4414 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4415 		DRM_DEBUG("si_irq_set: sw int dma1\n");
4416 		dma_cntl1 |= TRAP_ENABLE;
4417 	}
4418 	if (rdev->irq.crtc_vblank_int[0] ||
4419 	    atomic_read(&rdev->irq.pflip[0])) {
4420 		DRM_DEBUG("si_irq_set: vblank 0\n");
4421 		crtc1 |= VBLANK_INT_MASK;
4422 	}
4423 	if (rdev->irq.crtc_vblank_int[1] ||
4424 	    atomic_read(&rdev->irq.pflip[1])) {
4425 		DRM_DEBUG("si_irq_set: vblank 1\n");
4426 		crtc2 |= VBLANK_INT_MASK;
4427 	}
4428 	if (rdev->irq.crtc_vblank_int[2] ||
4429 	    atomic_read(&rdev->irq.pflip[2])) {
4430 		DRM_DEBUG("si_irq_set: vblank 2\n");
4431 		crtc3 |= VBLANK_INT_MASK;
4432 	}
4433 	if (rdev->irq.crtc_vblank_int[3] ||
4434 	    atomic_read(&rdev->irq.pflip[3])) {
4435 		DRM_DEBUG("si_irq_set: vblank 3\n");
4436 		crtc4 |= VBLANK_INT_MASK;
4437 	}
4438 	if (rdev->irq.crtc_vblank_int[4] ||
4439 	    atomic_read(&rdev->irq.pflip[4])) {
4440 		DRM_DEBUG("si_irq_set: vblank 4\n");
4441 		crtc5 |= VBLANK_INT_MASK;
4442 	}
4443 	if (rdev->irq.crtc_vblank_int[5] ||
4444 	    atomic_read(&rdev->irq.pflip[5])) {
4445 		DRM_DEBUG("si_irq_set: vblank 5\n");
4446 		crtc6 |= VBLANK_INT_MASK;
4447 	}
4448 	if (rdev->irq.hpd[0]) {
4449 		DRM_DEBUG("si_irq_set: hpd 1\n");
4450 		hpd1 |= DC_HPDx_INT_EN;
4451 	}
4452 	if (rdev->irq.hpd[1]) {
4453 		DRM_DEBUG("si_irq_set: hpd 2\n");
4454 		hpd2 |= DC_HPDx_INT_EN;
4455 	}
4456 	if (rdev->irq.hpd[2]) {
4457 		DRM_DEBUG("si_irq_set: hpd 3\n");
4458 		hpd3 |= DC_HPDx_INT_EN;
4459 	}
4460 	if (rdev->irq.hpd[3]) {
4461 		DRM_DEBUG("si_irq_set: hpd 4\n");
4462 		hpd4 |= DC_HPDx_INT_EN;
4463 	}
4464 	if (rdev->irq.hpd[4]) {
4465 		DRM_DEBUG("si_irq_set: hpd 5\n");
4466 		hpd5 |= DC_HPDx_INT_EN;
4467 	}
4468 	if (rdev->irq.hpd[5]) {
4469 		DRM_DEBUG("si_irq_set: hpd 6\n");
4470 		hpd6 |= DC_HPDx_INT_EN;
4471 	}
4472 
4473 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4474 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4475 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4476 
4477 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4478 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4479 
4480 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4481 
4482 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4483 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4484 	if (rdev->num_crtc >= 4) {
4485 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4486 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4487 	}
4488 	if (rdev->num_crtc >= 6) {
4489 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4490 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4491 	}
4492 
4493 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4494 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4495 	if (rdev->num_crtc >= 4) {
4496 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4497 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4498 	}
4499 	if (rdev->num_crtc >= 6) {
4500 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4501 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4502 	}
4503 
4504 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
4505 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
4506 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
4507 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
4508 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
4509 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
4510 
4511 	return 0;
4512 }
4513 
4514 static inline void si_irq_ack(struct radeon_device *rdev)
4515 {
4516 	u32 tmp;
4517 
4518 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4519 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4520 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4521 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4522 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4523 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4524 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4525 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4526 	if (rdev->num_crtc >= 4) {
4527 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4528 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4529 	}
4530 	if (rdev->num_crtc >= 6) {
4531 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4532 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4533 	}
4534 
4535 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4536 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4537 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4538 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4539 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4540 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4541 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4542 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4543 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4544 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4545 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4546 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4547 
4548 	if (rdev->num_crtc >= 4) {
4549 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4550 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4551 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4552 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4553 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4554 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4555 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4556 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4557 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4558 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4559 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4560 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4561 	}
4562 
4563 	if (rdev->num_crtc >= 6) {
4564 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4565 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4566 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4567 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4568 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4569 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4570 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4571 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4572 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4573 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4574 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4575 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4576 	}
4577 
4578 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4579 		tmp = RREG32(DC_HPD1_INT_CONTROL);
4580 		tmp |= DC_HPDx_INT_ACK;
4581 		WREG32(DC_HPD1_INT_CONTROL, tmp);
4582 	}
4583 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4584 		tmp = RREG32(DC_HPD2_INT_CONTROL);
4585 		tmp |= DC_HPDx_INT_ACK;
4586 		WREG32(DC_HPD2_INT_CONTROL, tmp);
4587 	}
4588 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4589 		tmp = RREG32(DC_HPD3_INT_CONTROL);
4590 		tmp |= DC_HPDx_INT_ACK;
4591 		WREG32(DC_HPD3_INT_CONTROL, tmp);
4592 	}
4593 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4594 		tmp = RREG32(DC_HPD4_INT_CONTROL);
4595 		tmp |= DC_HPDx_INT_ACK;
4596 		WREG32(DC_HPD4_INT_CONTROL, tmp);
4597 	}
4598 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4599 		tmp = RREG32(DC_HPD5_INT_CONTROL);
4600 		tmp |= DC_HPDx_INT_ACK;
4601 		WREG32(DC_HPD5_INT_CONTROL, tmp);
4602 	}
4603 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4604 		tmp = RREG32(DC_HPD5_INT_CONTROL);
4605 		tmp |= DC_HPDx_INT_ACK;
4606 		WREG32(DC_HPD6_INT_CONTROL, tmp);
4607 	}
4608 }
4609 
4610 static void si_irq_disable(struct radeon_device *rdev)
4611 {
4612 	si_disable_interrupts(rdev);
4613 	/* Wait and acknowledge irq */
4614 	mdelay(1);
4615 	si_irq_ack(rdev);
4616 	si_disable_interrupt_state(rdev);
4617 }
4618 
4619 static void si_irq_suspend(struct radeon_device *rdev)
4620 {
4621 	si_irq_disable(rdev);
4622 	si_rlc_stop(rdev);
4623 }
4624 
4625 static void si_irq_fini(struct radeon_device *rdev)
4626 {
4627 	si_irq_suspend(rdev);
4628 	r600_ih_ring_fini(rdev);
4629 }
4630 
4631 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4632 {
4633 	u32 wptr, tmp;
4634 
4635 	if (rdev->wb.enabled)
4636 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4637 	else
4638 		wptr = RREG32(IH_RB_WPTR);
4639 
4640 	if (wptr & RB_OVERFLOW) {
4641 		/* When a ring buffer overflow happen start parsing interrupt
4642 		 * from the last not overwritten vector (wptr + 16). Hopefully
4643 		 * this should allow us to catchup.
4644 		 */
4645 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4646 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4647 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4648 		tmp = RREG32(IH_RB_CNTL);
4649 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
4650 		WREG32(IH_RB_CNTL, tmp);
4651 	}
4652 	return (wptr & rdev->ih.ptr_mask);
4653 }
4654 
4655 /*        SI IV Ring
4656  * Each IV ring entry is 128 bits:
4657  * [7:0]    - interrupt source id
4658  * [31:8]   - reserved
4659  * [59:32]  - interrupt source data
4660  * [63:60]  - reserved
4661  * [71:64]  - RINGID
4662  * [79:72]  - VMID
4663  * [127:80] - reserved
4664  */
4665 int si_irq_process(struct radeon_device *rdev)
4666 {
4667 	u32 wptr;
4668 	u32 rptr;
4669 	u32 src_id, src_data, ring_id;
4670 	u32 ring_index;
4671 	bool queue_hotplug = false;
4672 
4673 	if (!rdev->ih.enabled || rdev->shutdown)
4674 		return IRQ_NONE;
4675 
4676 	wptr = si_get_ih_wptr(rdev);
4677 
4678 restart_ih:
4679 	/* is somebody else already processing irqs? */
4680 	if (atomic_xchg(&rdev->ih.lock, 1))
4681 		return IRQ_NONE;
4682 
4683 	rptr = rdev->ih.rptr;
4684 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4685 
4686 	/* Order reading of wptr vs. reading of IH ring data */
4687 	rmb();
4688 
4689 	/* display interrupts */
4690 	si_irq_ack(rdev);
4691 
4692 	while (rptr != wptr) {
4693 		/* wptr/rptr are in bytes! */
4694 		ring_index = rptr / 4;
4695 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4696 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4697 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4698 
4699 		switch (src_id) {
4700 		case 1: /* D1 vblank/vline */
4701 			switch (src_data) {
4702 			case 0: /* D1 vblank */
4703 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4704 					if (rdev->irq.crtc_vblank_int[0]) {
4705 						drm_handle_vblank(rdev->ddev, 0);
4706 						rdev->pm.vblank_sync = true;
4707 						wake_up(&rdev->irq.vblank_queue);
4708 					}
4709 					if (atomic_read(&rdev->irq.pflip[0]))
4710 						radeon_crtc_handle_flip(rdev, 0);
4711 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4712 					DRM_DEBUG("IH: D1 vblank\n");
4713 				}
4714 				break;
4715 			case 1: /* D1 vline */
4716 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4717 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4718 					DRM_DEBUG("IH: D1 vline\n");
4719 				}
4720 				break;
4721 			default:
4722 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4723 				break;
4724 			}
4725 			break;
4726 		case 2: /* D2 vblank/vline */
4727 			switch (src_data) {
4728 			case 0: /* D2 vblank */
4729 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4730 					if (rdev->irq.crtc_vblank_int[1]) {
4731 						drm_handle_vblank(rdev->ddev, 1);
4732 						rdev->pm.vblank_sync = true;
4733 						wake_up(&rdev->irq.vblank_queue);
4734 					}
4735 					if (atomic_read(&rdev->irq.pflip[1]))
4736 						radeon_crtc_handle_flip(rdev, 1);
4737 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4738 					DRM_DEBUG("IH: D2 vblank\n");
4739 				}
4740 				break;
4741 			case 1: /* D2 vline */
4742 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4743 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4744 					DRM_DEBUG("IH: D2 vline\n");
4745 				}
4746 				break;
4747 			default:
4748 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4749 				break;
4750 			}
4751 			break;
4752 		case 3: /* D3 vblank/vline */
4753 			switch (src_data) {
4754 			case 0: /* D3 vblank */
4755 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4756 					if (rdev->irq.crtc_vblank_int[2]) {
4757 						drm_handle_vblank(rdev->ddev, 2);
4758 						rdev->pm.vblank_sync = true;
4759 						wake_up(&rdev->irq.vblank_queue);
4760 					}
4761 					if (atomic_read(&rdev->irq.pflip[2]))
4762 						radeon_crtc_handle_flip(rdev, 2);
4763 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4764 					DRM_DEBUG("IH: D3 vblank\n");
4765 				}
4766 				break;
4767 			case 1: /* D3 vline */
4768 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4769 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4770 					DRM_DEBUG("IH: D3 vline\n");
4771 				}
4772 				break;
4773 			default:
4774 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4775 				break;
4776 			}
4777 			break;
4778 		case 4: /* D4 vblank/vline */
4779 			switch (src_data) {
4780 			case 0: /* D4 vblank */
4781 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4782 					if (rdev->irq.crtc_vblank_int[3]) {
4783 						drm_handle_vblank(rdev->ddev, 3);
4784 						rdev->pm.vblank_sync = true;
4785 						wake_up(&rdev->irq.vblank_queue);
4786 					}
4787 					if (atomic_read(&rdev->irq.pflip[3]))
4788 						radeon_crtc_handle_flip(rdev, 3);
4789 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4790 					DRM_DEBUG("IH: D4 vblank\n");
4791 				}
4792 				break;
4793 			case 1: /* D4 vline */
4794 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4795 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4796 					DRM_DEBUG("IH: D4 vline\n");
4797 				}
4798 				break;
4799 			default:
4800 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4801 				break;
4802 			}
4803 			break;
4804 		case 5: /* D5 vblank/vline */
4805 			switch (src_data) {
4806 			case 0: /* D5 vblank */
4807 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4808 					if (rdev->irq.crtc_vblank_int[4]) {
4809 						drm_handle_vblank(rdev->ddev, 4);
4810 						rdev->pm.vblank_sync = true;
4811 						wake_up(&rdev->irq.vblank_queue);
4812 					}
4813 					if (atomic_read(&rdev->irq.pflip[4]))
4814 						radeon_crtc_handle_flip(rdev, 4);
4815 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4816 					DRM_DEBUG("IH: D5 vblank\n");
4817 				}
4818 				break;
4819 			case 1: /* D5 vline */
4820 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4821 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4822 					DRM_DEBUG("IH: D5 vline\n");
4823 				}
4824 				break;
4825 			default:
4826 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4827 				break;
4828 			}
4829 			break;
4830 		case 6: /* D6 vblank/vline */
4831 			switch (src_data) {
4832 			case 0: /* D6 vblank */
4833 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4834 					if (rdev->irq.crtc_vblank_int[5]) {
4835 						drm_handle_vblank(rdev->ddev, 5);
4836 						rdev->pm.vblank_sync = true;
4837 						wake_up(&rdev->irq.vblank_queue);
4838 					}
4839 					if (atomic_read(&rdev->irq.pflip[5]))
4840 						radeon_crtc_handle_flip(rdev, 5);
4841 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4842 					DRM_DEBUG("IH: D6 vblank\n");
4843 				}
4844 				break;
4845 			case 1: /* D6 vline */
4846 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4847 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4848 					DRM_DEBUG("IH: D6 vline\n");
4849 				}
4850 				break;
4851 			default:
4852 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4853 				break;
4854 			}
4855 			break;
4856 		case 42: /* HPD hotplug */
4857 			switch (src_data) {
4858 			case 0:
4859 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4860 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4861 					queue_hotplug = true;
4862 					DRM_DEBUG("IH: HPD1\n");
4863 				}
4864 				break;
4865 			case 1:
4866 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4867 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4868 					queue_hotplug = true;
4869 					DRM_DEBUG("IH: HPD2\n");
4870 				}
4871 				break;
4872 			case 2:
4873 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4874 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4875 					queue_hotplug = true;
4876 					DRM_DEBUG("IH: HPD3\n");
4877 				}
4878 				break;
4879 			case 3:
4880 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4881 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4882 					queue_hotplug = true;
4883 					DRM_DEBUG("IH: HPD4\n");
4884 				}
4885 				break;
4886 			case 4:
4887 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4888 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4889 					queue_hotplug = true;
4890 					DRM_DEBUG("IH: HPD5\n");
4891 				}
4892 				break;
4893 			case 5:
4894 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4895 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4896 					queue_hotplug = true;
4897 					DRM_DEBUG("IH: HPD6\n");
4898 				}
4899 				break;
4900 			default:
4901 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4902 				break;
4903 			}
4904 			break;
4905 		case 146:
4906 		case 147:
4907 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4908 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4909 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4910 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4911 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4912 			/* reset addr and status */
4913 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4914 			break;
4915 		case 176: /* RINGID0 CP_INT */
4916 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4917 			break;
4918 		case 177: /* RINGID1 CP_INT */
4919 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4920 			break;
4921 		case 178: /* RINGID2 CP_INT */
4922 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4923 			break;
4924 		case 181: /* CP EOP event */
4925 			DRM_DEBUG("IH: CP EOP\n");
4926 			switch (ring_id) {
4927 			case 0:
4928 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4929 				break;
4930 			case 1:
4931 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4932 				break;
4933 			case 2:
4934 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4935 				break;
4936 			}
4937 			break;
4938 		case 224: /* DMA trap event */
4939 			DRM_DEBUG("IH: DMA trap\n");
4940 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4941 			break;
4942 		case 233: /* GUI IDLE */
4943 			DRM_DEBUG("IH: GUI idle\n");
4944 			break;
4945 		case 244: /* DMA trap event */
4946 			DRM_DEBUG("IH: DMA1 trap\n");
4947 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4948 			break;
4949 		default:
4950 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4951 			break;
4952 		}
4953 
4954 		/* wptr/rptr are in bytes! */
4955 		rptr += 16;
4956 		rptr &= rdev->ih.ptr_mask;
4957 	}
4958 	if (queue_hotplug)
4959 		schedule_work(&rdev->hotplug_work);
4960 	rdev->ih.rptr = rptr;
4961 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
4962 	atomic_set(&rdev->ih.lock, 0);
4963 
4964 	/* make sure wptr hasn't changed while processing */
4965 	wptr = si_get_ih_wptr(rdev);
4966 	if (wptr != rptr)
4967 		goto restart_ih;
4968 
4969 	return IRQ_HANDLED;
4970 }
4971 
4972 /**
4973  * si_copy_dma - copy pages using the DMA engine
4974  *
4975  * @rdev: radeon_device pointer
4976  * @src_offset: src GPU address
4977  * @dst_offset: dst GPU address
4978  * @num_gpu_pages: number of GPU pages to xfer
4979  * @fence: radeon fence object
4980  *
4981  * Copy GPU paging using the DMA engine (SI).
4982  * Used by the radeon ttm implementation to move pages if
4983  * registered as the asic copy callback.
4984  */
4985 int si_copy_dma(struct radeon_device *rdev,
4986 		uint64_t src_offset, uint64_t dst_offset,
4987 		unsigned num_gpu_pages,
4988 		struct radeon_fence **fence)
4989 {
4990 	struct radeon_semaphore *sem = NULL;
4991 	int ring_index = rdev->asic->copy.dma_ring_index;
4992 	struct radeon_ring *ring = &rdev->ring[ring_index];
4993 	u32 size_in_bytes, cur_size_in_bytes;
4994 	int i, num_loops;
4995 	int r = 0;
4996 
4997 	r = radeon_semaphore_create(rdev, &sem);
4998 	if (r) {
4999 		DRM_ERROR("radeon: moving bo (%d).\n", r);
5000 		return r;
5001 	}
5002 
5003 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5004 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5005 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5006 	if (r) {
5007 		DRM_ERROR("radeon: moving bo (%d).\n", r);
5008 		radeon_semaphore_free(rdev, &sem, NULL);
5009 		return r;
5010 	}
5011 
5012 	if (radeon_fence_need_sync(*fence, ring->idx)) {
5013 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5014 					    ring->idx);
5015 		radeon_fence_note_sync(*fence, ring->idx);
5016 	} else {
5017 		radeon_semaphore_free(rdev, &sem, NULL);
5018 	}
5019 
5020 	for (i = 0; i < num_loops; i++) {
5021 		cur_size_in_bytes = size_in_bytes;
5022 		if (cur_size_in_bytes > 0xFFFFF)
5023 			cur_size_in_bytes = 0xFFFFF;
5024 		size_in_bytes -= cur_size_in_bytes;
5025 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5026 		radeon_ring_write(ring, dst_offset & 0xffffffff);
5027 		radeon_ring_write(ring, src_offset & 0xffffffff);
5028 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5029 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5030 		src_offset += cur_size_in_bytes;
5031 		dst_offset += cur_size_in_bytes;
5032 	}
5033 
5034 	r = radeon_fence_emit(rdev, fence, ring->idx);
5035 	if (r) {
5036 		radeon_ring_unlock_undo(rdev, ring);
5037 		return r;
5038 	}
5039 
5040 	radeon_ring_unlock_commit(rdev, ring);
5041 	radeon_semaphore_free(rdev, &sem, *fence);
5042 
5043 	return r;
5044 }
5045 
5046 /*
5047  * startup/shutdown callbacks
5048  */
5049 static int si_startup(struct radeon_device *rdev)
5050 {
5051 	struct radeon_ring *ring;
5052 	int r;
5053 
5054 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5055 	    !rdev->rlc_fw || !rdev->mc_fw) {
5056 		r = si_init_microcode(rdev);
5057 		if (r) {
5058 			DRM_ERROR("Failed to load firmware!\n");
5059 			return r;
5060 		}
5061 	}
5062 
5063 	r = si_mc_load_microcode(rdev);
5064 	if (r) {
5065 		DRM_ERROR("Failed to load MC firmware!\n");
5066 		return r;
5067 	}
5068 
5069 	r = r600_vram_scratch_init(rdev);
5070 	if (r)
5071 		return r;
5072 
5073 	si_mc_program(rdev);
5074 	r = si_pcie_gart_enable(rdev);
5075 	if (r)
5076 		return r;
5077 	si_gpu_init(rdev);
5078 
5079 	/* allocate rlc buffers */
5080 	r = si_rlc_init(rdev);
5081 	if (r) {
5082 		DRM_ERROR("Failed to init rlc BOs!\n");
5083 		return r;
5084 	}
5085 
5086 	/* allocate wb buffer */
5087 	r = radeon_wb_init(rdev);
5088 	if (r)
5089 		return r;
5090 
5091 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5092 	if (r) {
5093 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5094 		return r;
5095 	}
5096 
5097 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5098 	if (r) {
5099 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5100 		return r;
5101 	}
5102 
5103 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5104 	if (r) {
5105 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5106 		return r;
5107 	}
5108 
5109 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5110 	if (r) {
5111 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5112 		return r;
5113 	}
5114 
5115 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5116 	if (r) {
5117 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5118 		return r;
5119 	}
5120 
5121 	r = rv770_uvd_resume(rdev);
5122 	if (!r) {
5123 		r = radeon_fence_driver_start_ring(rdev,
5124 						   R600_RING_TYPE_UVD_INDEX);
5125 		if (r)
5126 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5127 	}
5128 	if (r)
5129 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5130 
5131 	/* Enable IRQ */
5132 	r = si_irq_init(rdev);
5133 	if (r) {
5134 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
5135 		radeon_irq_kms_fini(rdev);
5136 		return r;
5137 	}
5138 	si_irq_set(rdev);
5139 
5140 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5141 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5142 			     CP_RB0_RPTR, CP_RB0_WPTR,
5143 			     0, 0xfffff, RADEON_CP_PACKET2);
5144 	if (r)
5145 		return r;
5146 
5147 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5148 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5149 			     CP_RB1_RPTR, CP_RB1_WPTR,
5150 			     0, 0xfffff, RADEON_CP_PACKET2);
5151 	if (r)
5152 		return r;
5153 
5154 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5155 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5156 			     CP_RB2_RPTR, CP_RB2_WPTR,
5157 			     0, 0xfffff, RADEON_CP_PACKET2);
5158 	if (r)
5159 		return r;
5160 
5161 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5162 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5163 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5164 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5165 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5166 	if (r)
5167 		return r;
5168 
5169 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5170 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5171 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5172 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5173 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5174 	if (r)
5175 		return r;
5176 
5177 	r = si_cp_load_microcode(rdev);
5178 	if (r)
5179 		return r;
5180 	r = si_cp_resume(rdev);
5181 	if (r)
5182 		return r;
5183 
5184 	r = cayman_dma_resume(rdev);
5185 	if (r)
5186 		return r;
5187 
5188 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5189 	if (ring->ring_size) {
5190 		r = radeon_ring_init(rdev, ring, ring->ring_size,
5191 				     R600_WB_UVD_RPTR_OFFSET,
5192 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5193 				     0, 0xfffff, RADEON_CP_PACKET2);
5194 		if (!r)
5195 			r = r600_uvd_init(rdev);
5196 		if (r)
5197 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5198 	}
5199 
5200 	r = radeon_ib_pool_init(rdev);
5201 	if (r) {
5202 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5203 		return r;
5204 	}
5205 
5206 	r = radeon_vm_manager_init(rdev);
5207 	if (r) {
5208 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5209 		return r;
5210 	}
5211 
5212 	return 0;
5213 }
5214 
5215 int si_resume(struct radeon_device *rdev)
5216 {
5217 	int r;
5218 
5219 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5220 	 * posting will perform necessary task to bring back GPU into good
5221 	 * shape.
5222 	 */
5223 	/* post card */
5224 	atom_asic_init(rdev->mode_info.atom_context);
5225 
5226 	/* init golden registers */
5227 	si_init_golden_registers(rdev);
5228 
5229 	rdev->accel_working = true;
5230 	r = si_startup(rdev);
5231 	if (r) {
5232 		DRM_ERROR("si startup failed on resume\n");
5233 		rdev->accel_working = false;
5234 		return r;
5235 	}
5236 
5237 	return r;
5238 
5239 }
5240 
5241 int si_suspend(struct radeon_device *rdev)
5242 {
5243 	radeon_vm_manager_fini(rdev);
5244 	si_cp_enable(rdev, false);
5245 	cayman_dma_stop(rdev);
5246 	r600_uvd_rbc_stop(rdev);
5247 	radeon_uvd_suspend(rdev);
5248 	si_irq_suspend(rdev);
5249 	radeon_wb_disable(rdev);
5250 	si_pcie_gart_disable(rdev);
5251 	return 0;
5252 }
5253 
5254 /* Plan is to move initialization in that function and use
5255  * helper function so that radeon_device_init pretty much
5256  * do nothing more than calling asic specific function. This
5257  * should also allow to remove a bunch of callback function
5258  * like vram_info.
5259  */
5260 int si_init(struct radeon_device *rdev)
5261 {
5262 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5263 	int r;
5264 
5265 	/* Read BIOS */
5266 	if (!radeon_get_bios(rdev)) {
5267 		if (ASIC_IS_AVIVO(rdev))
5268 			return -EINVAL;
5269 	}
5270 	/* Must be an ATOMBIOS */
5271 	if (!rdev->is_atom_bios) {
5272 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5273 		return -EINVAL;
5274 	}
5275 	r = radeon_atombios_init(rdev);
5276 	if (r)
5277 		return r;
5278 
5279 	/* Post card if necessary */
5280 	if (!radeon_card_posted(rdev)) {
5281 		if (!rdev->bios) {
5282 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5283 			return -EINVAL;
5284 		}
5285 		DRM_INFO("GPU not posted. posting now...\n");
5286 		atom_asic_init(rdev->mode_info.atom_context);
5287 	}
5288 	/* init golden registers */
5289 	si_init_golden_registers(rdev);
5290 	/* Initialize scratch registers */
5291 	si_scratch_init(rdev);
5292 	/* Initialize surface registers */
5293 	radeon_surface_init(rdev);
5294 	/* Initialize clocks */
5295 	radeon_get_clock_info(rdev->ddev);
5296 
5297 	/* Fence driver */
5298 	r = radeon_fence_driver_init(rdev);
5299 	if (r)
5300 		return r;
5301 
5302 	/* initialize memory controller */
5303 	r = si_mc_init(rdev);
5304 	if (r)
5305 		return r;
5306 	/* Memory manager */
5307 	r = radeon_bo_init(rdev);
5308 	if (r)
5309 		return r;
5310 
5311 	r = radeon_irq_kms_init(rdev);
5312 	if (r)
5313 		return r;
5314 
5315 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5316 	ring->ring_obj = NULL;
5317 	r600_ring_init(rdev, ring, 1024 * 1024);
5318 
5319 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5320 	ring->ring_obj = NULL;
5321 	r600_ring_init(rdev, ring, 1024 * 1024);
5322 
5323 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5324 	ring->ring_obj = NULL;
5325 	r600_ring_init(rdev, ring, 1024 * 1024);
5326 
5327 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5328 	ring->ring_obj = NULL;
5329 	r600_ring_init(rdev, ring, 64 * 1024);
5330 
5331 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5332 	ring->ring_obj = NULL;
5333 	r600_ring_init(rdev, ring, 64 * 1024);
5334 
5335 	r = radeon_uvd_init(rdev);
5336 	if (!r) {
5337 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5338 		ring->ring_obj = NULL;
5339 		r600_ring_init(rdev, ring, 4096);
5340 	}
5341 
5342 	rdev->ih.ring_obj = NULL;
5343 	r600_ih_ring_init(rdev, 64 * 1024);
5344 
5345 	r = r600_pcie_gart_init(rdev);
5346 	if (r)
5347 		return r;
5348 
5349 	rdev->accel_working = true;
5350 	r = si_startup(rdev);
5351 	if (r) {
5352 		dev_err(rdev->dev, "disabling GPU acceleration\n");
5353 		si_cp_fini(rdev);
5354 		cayman_dma_fini(rdev);
5355 		si_irq_fini(rdev);
5356 		si_rlc_fini(rdev);
5357 		radeon_wb_fini(rdev);
5358 		radeon_ib_pool_fini(rdev);
5359 		radeon_vm_manager_fini(rdev);
5360 		radeon_irq_kms_fini(rdev);
5361 		si_pcie_gart_fini(rdev);
5362 		rdev->accel_working = false;
5363 	}
5364 
5365 	/* Don't start up if the MC ucode is missing.
5366 	 * The default clocks and voltages before the MC ucode
5367 	 * is loaded are not suffient for advanced operations.
5368 	 */
5369 	if (!rdev->mc_fw) {
5370 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
5371 		return -EINVAL;
5372 	}
5373 
5374 	return 0;
5375 }
5376 
5377 void si_fini(struct radeon_device *rdev)
5378 {
5379 	si_cp_fini(rdev);
5380 	cayman_dma_fini(rdev);
5381 	si_irq_fini(rdev);
5382 	si_rlc_fini(rdev);
5383 	radeon_wb_fini(rdev);
5384 	radeon_vm_manager_fini(rdev);
5385 	radeon_ib_pool_fini(rdev);
5386 	radeon_irq_kms_fini(rdev);
5387 	radeon_uvd_fini(rdev);
5388 	si_pcie_gart_fini(rdev);
5389 	r600_vram_scratch_fini(rdev);
5390 	radeon_gem_fini(rdev);
5391 	radeon_fence_driver_fini(rdev);
5392 	radeon_bo_fini(rdev);
5393 	radeon_atombios_fini(rdev);
5394 	kfree(rdev->bios);
5395 	rdev->bios = NULL;
5396 }
5397 
5398 /**
5399  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5400  *
5401  * @rdev: radeon_device pointer
5402  *
5403  * Fetches a GPU clock counter snapshot (SI).
5404  * Returns the 64 bit clock counter snapshot.
5405  */
5406 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5407 {
5408 	uint64_t clock;
5409 
5410 	mutex_lock(&rdev->gpu_clock_mutex);
5411 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5412 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5413 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5414 	mutex_unlock(&rdev->gpu_clock_mutex);
5415 	return clock;
5416 }
5417 
5418 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5419 {
5420 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5421 	int r;
5422 
5423 	/* bypass vclk and dclk with bclk */
5424 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5425 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5426 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5427 
5428 	/* put PLL in bypass mode */
5429 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5430 
5431 	if (!vclk || !dclk) {
5432 		/* keep the Bypass mode, put PLL to sleep */
5433 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5434 		return 0;
5435 	}
5436 
5437 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5438 					  16384, 0x03FFFFFF, 0, 128, 5,
5439 					  &fb_div, &vclk_div, &dclk_div);
5440 	if (r)
5441 		return r;
5442 
5443 	/* set RESET_ANTI_MUX to 0 */
5444 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5445 
5446 	/* set VCO_MODE to 1 */
5447 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5448 
5449 	/* toggle UPLL_SLEEP to 1 then back to 0 */
5450 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5451 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5452 
5453 	/* deassert UPLL_RESET */
5454 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5455 
5456 	mdelay(1);
5457 
5458 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5459 	if (r)
5460 		return r;
5461 
5462 	/* assert UPLL_RESET again */
5463 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5464 
5465 	/* disable spread spectrum. */
5466 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5467 
5468 	/* set feedback divider */
5469 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5470 
5471 	/* set ref divider to 0 */
5472 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5473 
5474 	if (fb_div < 307200)
5475 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5476 	else
5477 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5478 
5479 	/* set PDIV_A and PDIV_B */
5480 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5481 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5482 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5483 
5484 	/* give the PLL some time to settle */
5485 	mdelay(15);
5486 
5487 	/* deassert PLL_RESET */
5488 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5489 
5490 	mdelay(15);
5491 
5492 	/* switch from bypass mode to normal mode */
5493 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5494 
5495 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5496 	if (r)
5497 		return r;
5498 
5499 	/* switch VCLK and DCLK selection */
5500 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
5501 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5502 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5503 
5504 	mdelay(100);
5505 
5506 	return 0;
5507 }
5508