xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 8851b9f1)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 
34 /* GFX */
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
38 /* compute */
39 #define CIK_MEC_UCODE_SIZE 4192
40 /* interrupts */
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
44 /* gddr controller */
45 #define CIK_MC_UCODE_SIZE 7866
46 /* sdma */
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
49 
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69 
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 
80 /*
81  * Indirect registers accessor
82  */
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84 {
85 	u32 r;
86 
87 	WREG32(PCIE_INDEX, reg);
88 	(void)RREG32(PCIE_INDEX);
89 	r = RREG32(PCIE_DATA);
90 	return r;
91 }
92 
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94 {
95 	WREG32(PCIE_INDEX, reg);
96 	(void)RREG32(PCIE_INDEX);
97 	WREG32(PCIE_DATA, v);
98 	(void)RREG32(PCIE_DATA);
99 }
100 
101 static const u32 bonaire_golden_spm_registers[] =
102 {
103 	0x30800, 0xe0ffffff, 0xe0000000
104 };
105 
106 static const u32 bonaire_golden_common_registers[] =
107 {
108 	0xc770, 0xffffffff, 0x00000800,
109 	0xc774, 0xffffffff, 0x00000800,
110 	0xc798, 0xffffffff, 0x00007fbf,
111 	0xc79c, 0xffffffff, 0x00007faf
112 };
113 
114 static const u32 bonaire_golden_registers[] =
115 {
116 	0x3354, 0x00000333, 0x00000333,
117 	0x3350, 0x000c0fc0, 0x00040200,
118 	0x9a10, 0x00010000, 0x00058208,
119 	0x3c000, 0xffff1fff, 0x00140000,
120 	0x3c200, 0xfdfc0fff, 0x00000100,
121 	0x3c234, 0x40000000, 0x40000200,
122 	0x9830, 0xffffffff, 0x00000000,
123 	0x9834, 0xf00fffff, 0x00000400,
124 	0x9838, 0x0002021c, 0x00020200,
125 	0xc78, 0x00000080, 0x00000000,
126 	0x5bb0, 0x000000f0, 0x00000070,
127 	0x5bc0, 0xf0311fff, 0x80300000,
128 	0x98f8, 0x73773777, 0x12010001,
129 	0x350c, 0x00810000, 0x408af000,
130 	0x7030, 0x31000111, 0x00000011,
131 	0x2f48, 0x73773777, 0x12010001,
132 	0x220c, 0x00007fb6, 0x0021a1b1,
133 	0x2210, 0x00007fb6, 0x002021b1,
134 	0x2180, 0x00007fb6, 0x00002191,
135 	0x2218, 0x00007fb6, 0x002121b1,
136 	0x221c, 0x00007fb6, 0x002021b1,
137 	0x21dc, 0x00007fb6, 0x00002191,
138 	0x21e0, 0x00007fb6, 0x00002191,
139 	0x3628, 0x0000003f, 0x0000000a,
140 	0x362c, 0x0000003f, 0x0000000a,
141 	0x2ae4, 0x00073ffe, 0x000022a2,
142 	0x240c, 0x000007ff, 0x00000000,
143 	0x8a14, 0xf000003f, 0x00000007,
144 	0x8bf0, 0x00002001, 0x00000001,
145 	0x8b24, 0xffffffff, 0x00ffffff,
146 	0x30a04, 0x0000ff0f, 0x00000000,
147 	0x28a4c, 0x07ffffff, 0x06000000,
148 	0x4d8, 0x00000fff, 0x00000100,
149 	0x3e78, 0x00000001, 0x00000002,
150 	0x9100, 0x03000000, 0x0362c688,
151 	0x8c00, 0x000000ff, 0x00000001,
152 	0xe40, 0x00001fff, 0x00001fff,
153 	0x9060, 0x0000007f, 0x00000020,
154 	0x9508, 0x00010000, 0x00010000,
155 	0xac14, 0x000003ff, 0x000000f3,
156 	0xac0c, 0xffffffff, 0x00001032
157 };
158 
159 static const u32 bonaire_mgcg_cgcg_init[] =
160 {
161 	0xc420, 0xffffffff, 0xfffffffc,
162 	0x30800, 0xffffffff, 0xe0000000,
163 	0x3c2a0, 0xffffffff, 0x00000100,
164 	0x3c208, 0xffffffff, 0x00000100,
165 	0x3c2c0, 0xffffffff, 0xc0000100,
166 	0x3c2c8, 0xffffffff, 0xc0000100,
167 	0x3c2c4, 0xffffffff, 0xc0000100,
168 	0x55e4, 0xffffffff, 0x00600100,
169 	0x3c280, 0xffffffff, 0x00000100,
170 	0x3c214, 0xffffffff, 0x06000100,
171 	0x3c220, 0xffffffff, 0x00000100,
172 	0x3c218, 0xffffffff, 0x06000100,
173 	0x3c204, 0xffffffff, 0x00000100,
174 	0x3c2e0, 0xffffffff, 0x00000100,
175 	0x3c224, 0xffffffff, 0x00000100,
176 	0x3c200, 0xffffffff, 0x00000100,
177 	0x3c230, 0xffffffff, 0x00000100,
178 	0x3c234, 0xffffffff, 0x00000100,
179 	0x3c250, 0xffffffff, 0x00000100,
180 	0x3c254, 0xffffffff, 0x00000100,
181 	0x3c258, 0xffffffff, 0x00000100,
182 	0x3c25c, 0xffffffff, 0x00000100,
183 	0x3c260, 0xffffffff, 0x00000100,
184 	0x3c27c, 0xffffffff, 0x00000100,
185 	0x3c278, 0xffffffff, 0x00000100,
186 	0x3c210, 0xffffffff, 0x06000100,
187 	0x3c290, 0xffffffff, 0x00000100,
188 	0x3c274, 0xffffffff, 0x00000100,
189 	0x3c2b4, 0xffffffff, 0x00000100,
190 	0x3c2b0, 0xffffffff, 0x00000100,
191 	0x3c270, 0xffffffff, 0x00000100,
192 	0x30800, 0xffffffff, 0xe0000000,
193 	0x3c020, 0xffffffff, 0x00010000,
194 	0x3c024, 0xffffffff, 0x00030002,
195 	0x3c028, 0xffffffff, 0x00040007,
196 	0x3c02c, 0xffffffff, 0x00060005,
197 	0x3c030, 0xffffffff, 0x00090008,
198 	0x3c034, 0xffffffff, 0x00010000,
199 	0x3c038, 0xffffffff, 0x00030002,
200 	0x3c03c, 0xffffffff, 0x00040007,
201 	0x3c040, 0xffffffff, 0x00060005,
202 	0x3c044, 0xffffffff, 0x00090008,
203 	0x3c048, 0xffffffff, 0x00010000,
204 	0x3c04c, 0xffffffff, 0x00030002,
205 	0x3c050, 0xffffffff, 0x00040007,
206 	0x3c054, 0xffffffff, 0x00060005,
207 	0x3c058, 0xffffffff, 0x00090008,
208 	0x3c05c, 0xffffffff, 0x00010000,
209 	0x3c060, 0xffffffff, 0x00030002,
210 	0x3c064, 0xffffffff, 0x00040007,
211 	0x3c068, 0xffffffff, 0x00060005,
212 	0x3c06c, 0xffffffff, 0x00090008,
213 	0x3c070, 0xffffffff, 0x00010000,
214 	0x3c074, 0xffffffff, 0x00030002,
215 	0x3c078, 0xffffffff, 0x00040007,
216 	0x3c07c, 0xffffffff, 0x00060005,
217 	0x3c080, 0xffffffff, 0x00090008,
218 	0x3c084, 0xffffffff, 0x00010000,
219 	0x3c088, 0xffffffff, 0x00030002,
220 	0x3c08c, 0xffffffff, 0x00040007,
221 	0x3c090, 0xffffffff, 0x00060005,
222 	0x3c094, 0xffffffff, 0x00090008,
223 	0x3c098, 0xffffffff, 0x00010000,
224 	0x3c09c, 0xffffffff, 0x00030002,
225 	0x3c0a0, 0xffffffff, 0x00040007,
226 	0x3c0a4, 0xffffffff, 0x00060005,
227 	0x3c0a8, 0xffffffff, 0x00090008,
228 	0x3c000, 0xffffffff, 0x96e00200,
229 	0x8708, 0xffffffff, 0x00900100,
230 	0xc424, 0xffffffff, 0x0020003f,
231 	0x38, 0xffffffff, 0x0140001c,
232 	0x3c, 0x000f0000, 0x000f0000,
233 	0x220, 0xffffffff, 0xC060000C,
234 	0x224, 0xc0000fff, 0x00000100,
235 	0xf90, 0xffffffff, 0x00000100,
236 	0xf98, 0x00000101, 0x00000000,
237 	0x20a8, 0xffffffff, 0x00000104,
238 	0x55e4, 0xff000fff, 0x00000100,
239 	0x30cc, 0xc0000fff, 0x00000104,
240 	0xc1e4, 0x00000001, 0x00000001,
241 	0xd00c, 0xff000ff0, 0x00000100,
242 	0xd80c, 0xff000ff0, 0x00000100
243 };
244 
245 static const u32 spectre_golden_spm_registers[] =
246 {
247 	0x30800, 0xe0ffffff, 0xe0000000
248 };
249 
250 static const u32 spectre_golden_common_registers[] =
251 {
252 	0xc770, 0xffffffff, 0x00000800,
253 	0xc774, 0xffffffff, 0x00000800,
254 	0xc798, 0xffffffff, 0x00007fbf,
255 	0xc79c, 0xffffffff, 0x00007faf
256 };
257 
258 static const u32 spectre_golden_registers[] =
259 {
260 	0x3c000, 0xffff1fff, 0x96940200,
261 	0x3c00c, 0xffff0001, 0xff000000,
262 	0x3c200, 0xfffc0fff, 0x00000100,
263 	0x6ed8, 0x00010101, 0x00010000,
264 	0x9834, 0xf00fffff, 0x00000400,
265 	0x9838, 0xfffffffc, 0x00020200,
266 	0x5bb0, 0x000000f0, 0x00000070,
267 	0x5bc0, 0xf0311fff, 0x80300000,
268 	0x98f8, 0x73773777, 0x12010001,
269 	0x9b7c, 0x00ff0000, 0x00fc0000,
270 	0x2f48, 0x73773777, 0x12010001,
271 	0x8a14, 0xf000003f, 0x00000007,
272 	0x8b24, 0xffffffff, 0x00ffffff,
273 	0x28350, 0x3f3f3fff, 0x00000082,
274 	0x28355, 0x0000003f, 0x00000000,
275 	0x3e78, 0x00000001, 0x00000002,
276 	0x913c, 0xffff03df, 0x00000004,
277 	0xc768, 0x00000008, 0x00000008,
278 	0x8c00, 0x000008ff, 0x00000800,
279 	0x9508, 0x00010000, 0x00010000,
280 	0xac0c, 0xffffffff, 0x54763210,
281 	0x214f8, 0x01ff01ff, 0x00000002,
282 	0x21498, 0x007ff800, 0x00200000,
283 	0x2015c, 0xffffffff, 0x00000f40,
284 	0x30934, 0xffffffff, 0x00000001
285 };
286 
287 static const u32 spectre_mgcg_cgcg_init[] =
288 {
289 	0xc420, 0xffffffff, 0xfffffffc,
290 	0x30800, 0xffffffff, 0xe0000000,
291 	0x3c2a0, 0xffffffff, 0x00000100,
292 	0x3c208, 0xffffffff, 0x00000100,
293 	0x3c2c0, 0xffffffff, 0x00000100,
294 	0x3c2c8, 0xffffffff, 0x00000100,
295 	0x3c2c4, 0xffffffff, 0x00000100,
296 	0x55e4, 0xffffffff, 0x00600100,
297 	0x3c280, 0xffffffff, 0x00000100,
298 	0x3c214, 0xffffffff, 0x06000100,
299 	0x3c220, 0xffffffff, 0x00000100,
300 	0x3c218, 0xffffffff, 0x06000100,
301 	0x3c204, 0xffffffff, 0x00000100,
302 	0x3c2e0, 0xffffffff, 0x00000100,
303 	0x3c224, 0xffffffff, 0x00000100,
304 	0x3c200, 0xffffffff, 0x00000100,
305 	0x3c230, 0xffffffff, 0x00000100,
306 	0x3c234, 0xffffffff, 0x00000100,
307 	0x3c250, 0xffffffff, 0x00000100,
308 	0x3c254, 0xffffffff, 0x00000100,
309 	0x3c258, 0xffffffff, 0x00000100,
310 	0x3c25c, 0xffffffff, 0x00000100,
311 	0x3c260, 0xffffffff, 0x00000100,
312 	0x3c27c, 0xffffffff, 0x00000100,
313 	0x3c278, 0xffffffff, 0x00000100,
314 	0x3c210, 0xffffffff, 0x06000100,
315 	0x3c290, 0xffffffff, 0x00000100,
316 	0x3c274, 0xffffffff, 0x00000100,
317 	0x3c2b4, 0xffffffff, 0x00000100,
318 	0x3c2b0, 0xffffffff, 0x00000100,
319 	0x3c270, 0xffffffff, 0x00000100,
320 	0x30800, 0xffffffff, 0xe0000000,
321 	0x3c020, 0xffffffff, 0x00010000,
322 	0x3c024, 0xffffffff, 0x00030002,
323 	0x3c028, 0xffffffff, 0x00040007,
324 	0x3c02c, 0xffffffff, 0x00060005,
325 	0x3c030, 0xffffffff, 0x00090008,
326 	0x3c034, 0xffffffff, 0x00010000,
327 	0x3c038, 0xffffffff, 0x00030002,
328 	0x3c03c, 0xffffffff, 0x00040007,
329 	0x3c040, 0xffffffff, 0x00060005,
330 	0x3c044, 0xffffffff, 0x00090008,
331 	0x3c048, 0xffffffff, 0x00010000,
332 	0x3c04c, 0xffffffff, 0x00030002,
333 	0x3c050, 0xffffffff, 0x00040007,
334 	0x3c054, 0xffffffff, 0x00060005,
335 	0x3c058, 0xffffffff, 0x00090008,
336 	0x3c05c, 0xffffffff, 0x00010000,
337 	0x3c060, 0xffffffff, 0x00030002,
338 	0x3c064, 0xffffffff, 0x00040007,
339 	0x3c068, 0xffffffff, 0x00060005,
340 	0x3c06c, 0xffffffff, 0x00090008,
341 	0x3c070, 0xffffffff, 0x00010000,
342 	0x3c074, 0xffffffff, 0x00030002,
343 	0x3c078, 0xffffffff, 0x00040007,
344 	0x3c07c, 0xffffffff, 0x00060005,
345 	0x3c080, 0xffffffff, 0x00090008,
346 	0x3c084, 0xffffffff, 0x00010000,
347 	0x3c088, 0xffffffff, 0x00030002,
348 	0x3c08c, 0xffffffff, 0x00040007,
349 	0x3c090, 0xffffffff, 0x00060005,
350 	0x3c094, 0xffffffff, 0x00090008,
351 	0x3c098, 0xffffffff, 0x00010000,
352 	0x3c09c, 0xffffffff, 0x00030002,
353 	0x3c0a0, 0xffffffff, 0x00040007,
354 	0x3c0a4, 0xffffffff, 0x00060005,
355 	0x3c0a8, 0xffffffff, 0x00090008,
356 	0x3c0ac, 0xffffffff, 0x00010000,
357 	0x3c0b0, 0xffffffff, 0x00030002,
358 	0x3c0b4, 0xffffffff, 0x00040007,
359 	0x3c0b8, 0xffffffff, 0x00060005,
360 	0x3c0bc, 0xffffffff, 0x00090008,
361 	0x3c000, 0xffffffff, 0x96e00200,
362 	0x8708, 0xffffffff, 0x00900100,
363 	0xc424, 0xffffffff, 0x0020003f,
364 	0x38, 0xffffffff, 0x0140001c,
365 	0x3c, 0x000f0000, 0x000f0000,
366 	0x220, 0xffffffff, 0xC060000C,
367 	0x224, 0xc0000fff, 0x00000100,
368 	0xf90, 0xffffffff, 0x00000100,
369 	0xf98, 0x00000101, 0x00000000,
370 	0x20a8, 0xffffffff, 0x00000104,
371 	0x55e4, 0xff000fff, 0x00000100,
372 	0x30cc, 0xc0000fff, 0x00000104,
373 	0xc1e4, 0x00000001, 0x00000001,
374 	0xd00c, 0xff000ff0, 0x00000100,
375 	0xd80c, 0xff000ff0, 0x00000100
376 };
377 
378 static const u32 kalindi_golden_spm_registers[] =
379 {
380 	0x30800, 0xe0ffffff, 0xe0000000
381 };
382 
383 static const u32 kalindi_golden_common_registers[] =
384 {
385 	0xc770, 0xffffffff, 0x00000800,
386 	0xc774, 0xffffffff, 0x00000800,
387 	0xc798, 0xffffffff, 0x00007fbf,
388 	0xc79c, 0xffffffff, 0x00007faf
389 };
390 
391 static const u32 kalindi_golden_registers[] =
392 {
393 	0x3c000, 0xffffdfff, 0x6e944040,
394 	0x55e4, 0xff607fff, 0xfc000100,
395 	0x3c220, 0xff000fff, 0x00000100,
396 	0x3c224, 0xff000fff, 0x00000100,
397 	0x3c200, 0xfffc0fff, 0x00000100,
398 	0x6ed8, 0x00010101, 0x00010000,
399 	0x9830, 0xffffffff, 0x00000000,
400 	0x9834, 0xf00fffff, 0x00000400,
401 	0x5bb0, 0x000000f0, 0x00000070,
402 	0x5bc0, 0xf0311fff, 0x80300000,
403 	0x98f8, 0x73773777, 0x12010001,
404 	0x98fc, 0xffffffff, 0x00000010,
405 	0x9b7c, 0x00ff0000, 0x00fc0000,
406 	0x8030, 0x00001f0f, 0x0000100a,
407 	0x2f48, 0x73773777, 0x12010001,
408 	0x2408, 0x000fffff, 0x000c007f,
409 	0x8a14, 0xf000003f, 0x00000007,
410 	0x8b24, 0x3fff3fff, 0x00ffcfff,
411 	0x30a04, 0x0000ff0f, 0x00000000,
412 	0x28a4c, 0x07ffffff, 0x06000000,
413 	0x4d8, 0x00000fff, 0x00000100,
414 	0x3e78, 0x00000001, 0x00000002,
415 	0xc768, 0x00000008, 0x00000008,
416 	0x8c00, 0x000000ff, 0x00000003,
417 	0x214f8, 0x01ff01ff, 0x00000002,
418 	0x21498, 0x007ff800, 0x00200000,
419 	0x2015c, 0xffffffff, 0x00000f40,
420 	0x88c4, 0x001f3ae3, 0x00000082,
421 	0x88d4, 0x0000001f, 0x00000010,
422 	0x30934, 0xffffffff, 0x00000000
423 };
424 
425 static const u32 kalindi_mgcg_cgcg_init[] =
426 {
427 	0xc420, 0xffffffff, 0xfffffffc,
428 	0x30800, 0xffffffff, 0xe0000000,
429 	0x3c2a0, 0xffffffff, 0x00000100,
430 	0x3c208, 0xffffffff, 0x00000100,
431 	0x3c2c0, 0xffffffff, 0x00000100,
432 	0x3c2c8, 0xffffffff, 0x00000100,
433 	0x3c2c4, 0xffffffff, 0x00000100,
434 	0x55e4, 0xffffffff, 0x00600100,
435 	0x3c280, 0xffffffff, 0x00000100,
436 	0x3c214, 0xffffffff, 0x06000100,
437 	0x3c220, 0xffffffff, 0x00000100,
438 	0x3c218, 0xffffffff, 0x06000100,
439 	0x3c204, 0xffffffff, 0x00000100,
440 	0x3c2e0, 0xffffffff, 0x00000100,
441 	0x3c224, 0xffffffff, 0x00000100,
442 	0x3c200, 0xffffffff, 0x00000100,
443 	0x3c230, 0xffffffff, 0x00000100,
444 	0x3c234, 0xffffffff, 0x00000100,
445 	0x3c250, 0xffffffff, 0x00000100,
446 	0x3c254, 0xffffffff, 0x00000100,
447 	0x3c258, 0xffffffff, 0x00000100,
448 	0x3c25c, 0xffffffff, 0x00000100,
449 	0x3c260, 0xffffffff, 0x00000100,
450 	0x3c27c, 0xffffffff, 0x00000100,
451 	0x3c278, 0xffffffff, 0x00000100,
452 	0x3c210, 0xffffffff, 0x06000100,
453 	0x3c290, 0xffffffff, 0x00000100,
454 	0x3c274, 0xffffffff, 0x00000100,
455 	0x3c2b4, 0xffffffff, 0x00000100,
456 	0x3c2b0, 0xffffffff, 0x00000100,
457 	0x3c270, 0xffffffff, 0x00000100,
458 	0x30800, 0xffffffff, 0xe0000000,
459 	0x3c020, 0xffffffff, 0x00010000,
460 	0x3c024, 0xffffffff, 0x00030002,
461 	0x3c028, 0xffffffff, 0x00040007,
462 	0x3c02c, 0xffffffff, 0x00060005,
463 	0x3c030, 0xffffffff, 0x00090008,
464 	0x3c034, 0xffffffff, 0x00010000,
465 	0x3c038, 0xffffffff, 0x00030002,
466 	0x3c03c, 0xffffffff, 0x00040007,
467 	0x3c040, 0xffffffff, 0x00060005,
468 	0x3c044, 0xffffffff, 0x00090008,
469 	0x3c000, 0xffffffff, 0x96e00200,
470 	0x8708, 0xffffffff, 0x00900100,
471 	0xc424, 0xffffffff, 0x0020003f,
472 	0x38, 0xffffffff, 0x0140001c,
473 	0x3c, 0x000f0000, 0x000f0000,
474 	0x220, 0xffffffff, 0xC060000C,
475 	0x224, 0xc0000fff, 0x00000100,
476 	0x20a8, 0xffffffff, 0x00000104,
477 	0x55e4, 0xff000fff, 0x00000100,
478 	0x30cc, 0xc0000fff, 0x00000104,
479 	0xc1e4, 0x00000001, 0x00000001,
480 	0xd00c, 0xff000ff0, 0x00000100,
481 	0xd80c, 0xff000ff0, 0x00000100
482 };
483 
484 static void cik_init_golden_registers(struct radeon_device *rdev)
485 {
486 	switch (rdev->family) {
487 	case CHIP_BONAIRE:
488 		radeon_program_register_sequence(rdev,
489 						 bonaire_mgcg_cgcg_init,
490 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491 		radeon_program_register_sequence(rdev,
492 						 bonaire_golden_registers,
493 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
494 		radeon_program_register_sequence(rdev,
495 						 bonaire_golden_common_registers,
496 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497 		radeon_program_register_sequence(rdev,
498 						 bonaire_golden_spm_registers,
499 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500 		break;
501 	case CHIP_KABINI:
502 		radeon_program_register_sequence(rdev,
503 						 kalindi_mgcg_cgcg_init,
504 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505 		radeon_program_register_sequence(rdev,
506 						 kalindi_golden_registers,
507 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
508 		radeon_program_register_sequence(rdev,
509 						 kalindi_golden_common_registers,
510 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511 		radeon_program_register_sequence(rdev,
512 						 kalindi_golden_spm_registers,
513 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514 		break;
515 	case CHIP_KAVERI:
516 		radeon_program_register_sequence(rdev,
517 						 spectre_mgcg_cgcg_init,
518 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519 		radeon_program_register_sequence(rdev,
520 						 spectre_golden_registers,
521 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
522 		radeon_program_register_sequence(rdev,
523 						 spectre_golden_common_registers,
524 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525 		radeon_program_register_sequence(rdev,
526 						 spectre_golden_spm_registers,
527 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528 		break;
529 	default:
530 		break;
531 	}
532 }
533 
534 /**
535  * cik_get_xclk - get the xclk
536  *
537  * @rdev: radeon_device pointer
538  *
539  * Returns the reference clock used by the gfx engine
540  * (CIK).
541  */
542 u32 cik_get_xclk(struct radeon_device *rdev)
543 {
544         u32 reference_clock = rdev->clock.spll.reference_freq;
545 
546 	if (rdev->flags & RADEON_IS_IGP) {
547 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548 			return reference_clock / 2;
549 	} else {
550 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551 			return reference_clock / 4;
552 	}
553 	return reference_clock;
554 }
555 
556 /**
557  * cik_mm_rdoorbell - read a doorbell dword
558  *
559  * @rdev: radeon_device pointer
560  * @offset: byte offset into the aperture
561  *
562  * Returns the value in the doorbell aperture at the
563  * requested offset (CIK).
564  */
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566 {
567 	if (offset < rdev->doorbell.size) {
568 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569 	} else {
570 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571 		return 0;
572 	}
573 }
574 
575 /**
576  * cik_mm_wdoorbell - write a doorbell dword
577  *
578  * @rdev: radeon_device pointer
579  * @offset: byte offset into the aperture
580  * @v: value to write
581  *
582  * Writes @v to the doorbell aperture at the
583  * requested offset (CIK).
584  */
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586 {
587 	if (offset < rdev->doorbell.size) {
588 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589 	} else {
590 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591 	}
592 }
593 
594 #define BONAIRE_IO_MC_REGS_SIZE 36
595 
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597 {
598 	{0x00000070, 0x04400000},
599 	{0x00000071, 0x80c01803},
600 	{0x00000072, 0x00004004},
601 	{0x00000073, 0x00000100},
602 	{0x00000074, 0x00ff0000},
603 	{0x00000075, 0x34000000},
604 	{0x00000076, 0x08000014},
605 	{0x00000077, 0x00cc08ec},
606 	{0x00000078, 0x00000400},
607 	{0x00000079, 0x00000000},
608 	{0x0000007a, 0x04090000},
609 	{0x0000007c, 0x00000000},
610 	{0x0000007e, 0x4408a8e8},
611 	{0x0000007f, 0x00000304},
612 	{0x00000080, 0x00000000},
613 	{0x00000082, 0x00000001},
614 	{0x00000083, 0x00000002},
615 	{0x00000084, 0xf3e4f400},
616 	{0x00000085, 0x052024e3},
617 	{0x00000087, 0x00000000},
618 	{0x00000088, 0x01000000},
619 	{0x0000008a, 0x1c0a0000},
620 	{0x0000008b, 0xff010000},
621 	{0x0000008d, 0xffffefff},
622 	{0x0000008e, 0xfff3efff},
623 	{0x0000008f, 0xfff3efbf},
624 	{0x00000092, 0xf7ffffff},
625 	{0x00000093, 0xffffff7f},
626 	{0x00000095, 0x00101101},
627 	{0x00000096, 0x00000fff},
628 	{0x00000097, 0x00116fff},
629 	{0x00000098, 0x60010000},
630 	{0x00000099, 0x10010000},
631 	{0x0000009a, 0x00006000},
632 	{0x0000009b, 0x00001000},
633 	{0x0000009f, 0x00b48000}
634 };
635 
636 /**
637  * cik_srbm_select - select specific register instances
638  *
639  * @rdev: radeon_device pointer
640  * @me: selected ME (micro engine)
641  * @pipe: pipe
642  * @queue: queue
643  * @vmid: VMID
644  *
645  * Switches the currently active registers instances.  Some
646  * registers are instanced per VMID, others are instanced per
647  * me/pipe/queue combination.
648  */
649 static void cik_srbm_select(struct radeon_device *rdev,
650 			    u32 me, u32 pipe, u32 queue, u32 vmid)
651 {
652 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653 			     MEID(me & 0x3) |
654 			     VMID(vmid & 0xf) |
655 			     QUEUEID(queue & 0x7));
656 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657 }
658 
659 /* ucode loading */
660 /**
661  * ci_mc_load_microcode - load MC ucode into the hw
662  *
663  * @rdev: radeon_device pointer
664  *
665  * Load the GDDR MC ucode into the hw (CIK).
666  * Returns 0 on success, error on failure.
667  */
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
669 {
670 	const __be32 *fw_data;
671 	u32 running, blackout = 0;
672 	u32 *io_mc_regs;
673 	int i, ucode_size, regs_size;
674 
675 	if (!rdev->mc_fw)
676 		return -EINVAL;
677 
678 	switch (rdev->family) {
679 	case CHIP_BONAIRE:
680 	default:
681 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682 		ucode_size = CIK_MC_UCODE_SIZE;
683 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
684 		break;
685 	}
686 
687 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688 
689 	if (running == 0) {
690 		if (running) {
691 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693 		}
694 
695 		/* reset the engine and set to writable */
696 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698 
699 		/* load mc io regs */
700 		for (i = 0; i < regs_size; i++) {
701 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703 		}
704 		/* load the MC ucode */
705 		fw_data = (const __be32 *)rdev->mc_fw->data;
706 		for (i = 0; i < ucode_size; i++)
707 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708 
709 		/* put the engine back into the active state */
710 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713 
714 		/* wait for training to complete */
715 		for (i = 0; i < rdev->usec_timeout; i++) {
716 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717 				break;
718 			udelay(1);
719 		}
720 		for (i = 0; i < rdev->usec_timeout; i++) {
721 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722 				break;
723 			udelay(1);
724 		}
725 
726 		if (running)
727 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728 	}
729 
730 	return 0;
731 }
732 
733 /**
734  * cik_init_microcode - load ucode images from disk
735  *
736  * @rdev: radeon_device pointer
737  *
738  * Use the firmware interface to load the ucode images into
739  * the driver (not loaded into hw).
740  * Returns 0 on success, error on failure.
741  */
742 static int cik_init_microcode(struct radeon_device *rdev)
743 {
744 	const char *chip_name;
745 	size_t pfp_req_size, me_req_size, ce_req_size,
746 		mec_req_size, rlc_req_size, mc_req_size,
747 		sdma_req_size;
748 	char fw_name[30];
749 	int err;
750 
751 	DRM_DEBUG("\n");
752 
753 	switch (rdev->family) {
754 	case CHIP_BONAIRE:
755 		chip_name = "BONAIRE";
756 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757 		me_req_size = CIK_ME_UCODE_SIZE * 4;
758 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
759 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
762 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763 		break;
764 	case CHIP_KAVERI:
765 		chip_name = "KAVERI";
766 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 		me_req_size = CIK_ME_UCODE_SIZE * 4;
768 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772 		break;
773 	case CHIP_KABINI:
774 		chip_name = "KABINI";
775 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 		me_req_size = CIK_ME_UCODE_SIZE * 4;
777 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781 		break;
782 	default: BUG();
783 	}
784 
785 	DRM_INFO("Loading %s Microcode\n", chip_name);
786 
787 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
789 	if (err)
790 		goto out;
791 	if (rdev->pfp_fw->size != pfp_req_size) {
792 		printk(KERN_ERR
793 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 		       rdev->pfp_fw->size, fw_name);
795 		err = -EINVAL;
796 		goto out;
797 	}
798 
799 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
801 	if (err)
802 		goto out;
803 	if (rdev->me_fw->size != me_req_size) {
804 		printk(KERN_ERR
805 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 		       rdev->me_fw->size, fw_name);
807 		err = -EINVAL;
808 	}
809 
810 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
812 	if (err)
813 		goto out;
814 	if (rdev->ce_fw->size != ce_req_size) {
815 		printk(KERN_ERR
816 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 		       rdev->ce_fw->size, fw_name);
818 		err = -EINVAL;
819 	}
820 
821 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
823 	if (err)
824 		goto out;
825 	if (rdev->mec_fw->size != mec_req_size) {
826 		printk(KERN_ERR
827 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 		       rdev->mec_fw->size, fw_name);
829 		err = -EINVAL;
830 	}
831 
832 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
834 	if (err)
835 		goto out;
836 	if (rdev->rlc_fw->size != rlc_req_size) {
837 		printk(KERN_ERR
838 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 		       rdev->rlc_fw->size, fw_name);
840 		err = -EINVAL;
841 	}
842 
843 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
845 	if (err)
846 		goto out;
847 	if (rdev->sdma_fw->size != sdma_req_size) {
848 		printk(KERN_ERR
849 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 		       rdev->sdma_fw->size, fw_name);
851 		err = -EINVAL;
852 	}
853 
854 	/* No MC ucode on APUs */
855 	if (!(rdev->flags & RADEON_IS_IGP)) {
856 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
858 		if (err)
859 			goto out;
860 		if (rdev->mc_fw->size != mc_req_size) {
861 			printk(KERN_ERR
862 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 			       rdev->mc_fw->size, fw_name);
864 			err = -EINVAL;
865 		}
866 	}
867 
868 out:
869 	if (err) {
870 		if (err != -EINVAL)
871 			printk(KERN_ERR
872 			       "cik_cp: Failed to load firmware \"%s\"\n",
873 			       fw_name);
874 		release_firmware(rdev->pfp_fw);
875 		rdev->pfp_fw = NULL;
876 		release_firmware(rdev->me_fw);
877 		rdev->me_fw = NULL;
878 		release_firmware(rdev->ce_fw);
879 		rdev->ce_fw = NULL;
880 		release_firmware(rdev->rlc_fw);
881 		rdev->rlc_fw = NULL;
882 		release_firmware(rdev->mc_fw);
883 		rdev->mc_fw = NULL;
884 	}
885 	return err;
886 }
887 
888 /*
889  * Core functions
890  */
891 /**
892  * cik_tiling_mode_table_init - init the hw tiling table
893  *
894  * @rdev: radeon_device pointer
895  *
896  * Starting with SI, the tiling setup is done globally in a
897  * set of 32 tiling modes.  Rather than selecting each set of
898  * parameters per surface as on older asics, we just select
899  * which index in the tiling table we want to use, and the
900  * surface uses those parameters (CIK).
901  */
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903 {
904 	const u32 num_tile_mode_states = 32;
905 	const u32 num_secondary_tile_mode_states = 16;
906 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907 	u32 num_pipe_configs;
908 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
909 		rdev->config.cik.max_shader_engines;
910 
911 	switch (rdev->config.cik.mem_row_size_in_kb) {
912 	case 1:
913 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914 		break;
915 	case 2:
916 	default:
917 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918 		break;
919 	case 4:
920 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921 		break;
922 	}
923 
924 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
925 	if (num_pipe_configs > 8)
926 		num_pipe_configs = 8; /* ??? */
927 
928 	if (num_pipe_configs == 8) {
929 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930 			switch (reg_offset) {
931 			case 0:
932 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936 				break;
937 			case 1:
938 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942 				break;
943 			case 2:
944 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948 				break;
949 			case 3:
950 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954 				break;
955 			case 4:
956 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959 						 TILE_SPLIT(split_equal_to_row_size));
960 				break;
961 			case 5:
962 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964 				break;
965 			case 6:
966 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970 				break;
971 			case 7:
972 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 						 TILE_SPLIT(split_equal_to_row_size));
976 				break;
977 			case 8:
978 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980 				break;
981 			case 9:
982 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984 				break;
985 			case 10:
986 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990 				break;
991 			case 11:
992 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996 				break;
997 			case 12:
998 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002 				break;
1003 			case 13:
1004 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006 				break;
1007 			case 14:
1008 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 				break;
1013 			case 16:
1014 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018 				break;
1019 			case 17:
1020 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 				break;
1025 			case 27:
1026 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028 				break;
1029 			case 28:
1030 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 				break;
1035 			case 29:
1036 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040 				break;
1041 			case 30:
1042 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 				break;
1047 			default:
1048 				gb_tile_moden = 0;
1049 				break;
1050 			}
1051 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053 		}
1054 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055 			switch (reg_offset) {
1056 			case 0:
1057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060 						 NUM_BANKS(ADDR_SURF_16_BANK));
1061 				break;
1062 			case 1:
1063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 						 NUM_BANKS(ADDR_SURF_16_BANK));
1067 				break;
1068 			case 2:
1069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072 						 NUM_BANKS(ADDR_SURF_16_BANK));
1073 				break;
1074 			case 3:
1075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078 						 NUM_BANKS(ADDR_SURF_16_BANK));
1079 				break;
1080 			case 4:
1081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 						 NUM_BANKS(ADDR_SURF_8_BANK));
1085 				break;
1086 			case 5:
1087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090 						 NUM_BANKS(ADDR_SURF_4_BANK));
1091 				break;
1092 			case 6:
1093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096 						 NUM_BANKS(ADDR_SURF_2_BANK));
1097 				break;
1098 			case 8:
1099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102 						 NUM_BANKS(ADDR_SURF_16_BANK));
1103 				break;
1104 			case 9:
1105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108 						 NUM_BANKS(ADDR_SURF_16_BANK));
1109 				break;
1110 			case 10:
1111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114 						 NUM_BANKS(ADDR_SURF_16_BANK));
1115 				break;
1116 			case 11:
1117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120 						 NUM_BANKS(ADDR_SURF_16_BANK));
1121 				break;
1122 			case 12:
1123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 						 NUM_BANKS(ADDR_SURF_8_BANK));
1127 				break;
1128 			case 13:
1129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132 						 NUM_BANKS(ADDR_SURF_4_BANK));
1133 				break;
1134 			case 14:
1135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138 						 NUM_BANKS(ADDR_SURF_2_BANK));
1139 				break;
1140 			default:
1141 				gb_tile_moden = 0;
1142 				break;
1143 			}
1144 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145 		}
1146 	} else if (num_pipe_configs == 4) {
1147 		if (num_rbs == 4) {
1148 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149 				switch (reg_offset) {
1150 				case 0:
1151 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155 					break;
1156 				case 1:
1157 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161 					break;
1162 				case 2:
1163 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167 					break;
1168 				case 3:
1169 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173 					break;
1174 				case 4:
1175 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178 							 TILE_SPLIT(split_equal_to_row_size));
1179 					break;
1180 				case 5:
1181 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183 					break;
1184 				case 6:
1185 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189 					break;
1190 				case 7:
1191 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 							 TILE_SPLIT(split_equal_to_row_size));
1195 					break;
1196 				case 8:
1197 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199 					break;
1200 				case 9:
1201 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203 					break;
1204 				case 10:
1205 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209 					break;
1210 				case 11:
1211 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215 					break;
1216 				case 12:
1217 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221 					break;
1222 				case 13:
1223 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225 					break;
1226 				case 14:
1227 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 					break;
1232 				case 16:
1233 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237 					break;
1238 				case 17:
1239 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243 					break;
1244 				case 27:
1245 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247 					break;
1248 				case 28:
1249 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 					break;
1254 				case 29:
1255 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259 					break;
1260 				case 30:
1261 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 					break;
1266 				default:
1267 					gb_tile_moden = 0;
1268 					break;
1269 				}
1270 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272 			}
1273 		} else if (num_rbs < 4) {
1274 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275 				switch (reg_offset) {
1276 				case 0:
1277 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281 					break;
1282 				case 1:
1283 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287 					break;
1288 				case 2:
1289 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293 					break;
1294 				case 3:
1295 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299 					break;
1300 				case 4:
1301 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304 							 TILE_SPLIT(split_equal_to_row_size));
1305 					break;
1306 				case 5:
1307 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309 					break;
1310 				case 6:
1311 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315 					break;
1316 				case 7:
1317 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 							 TILE_SPLIT(split_equal_to_row_size));
1321 					break;
1322 				case 8:
1323 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325 					break;
1326 				case 9:
1327 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329 					break;
1330 				case 10:
1331 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335 					break;
1336 				case 11:
1337 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341 					break;
1342 				case 12:
1343 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347 					break;
1348 				case 13:
1349 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351 					break;
1352 				case 14:
1353 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 					break;
1358 				case 16:
1359 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363 					break;
1364 				case 17:
1365 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369 					break;
1370 				case 27:
1371 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373 					break;
1374 				case 28:
1375 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379 					break;
1380 				case 29:
1381 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385 					break;
1386 				case 30:
1387 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 					break;
1392 				default:
1393 					gb_tile_moden = 0;
1394 					break;
1395 				}
1396 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398 			}
1399 		}
1400 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 			switch (reg_offset) {
1402 			case 0:
1403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 						 NUM_BANKS(ADDR_SURF_16_BANK));
1407 				break;
1408 			case 1:
1409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 						 NUM_BANKS(ADDR_SURF_16_BANK));
1413 				break;
1414 			case 2:
1415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 						 NUM_BANKS(ADDR_SURF_16_BANK));
1419 				break;
1420 			case 3:
1421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 						 NUM_BANKS(ADDR_SURF_16_BANK));
1425 				break;
1426 			case 4:
1427 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 						 NUM_BANKS(ADDR_SURF_16_BANK));
1431 				break;
1432 			case 5:
1433 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436 						 NUM_BANKS(ADDR_SURF_8_BANK));
1437 				break;
1438 			case 6:
1439 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 						 NUM_BANKS(ADDR_SURF_4_BANK));
1443 				break;
1444 			case 8:
1445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 						 NUM_BANKS(ADDR_SURF_16_BANK));
1449 				break;
1450 			case 9:
1451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 						 NUM_BANKS(ADDR_SURF_16_BANK));
1455 				break;
1456 			case 10:
1457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460 						 NUM_BANKS(ADDR_SURF_16_BANK));
1461 				break;
1462 			case 11:
1463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466 						 NUM_BANKS(ADDR_SURF_16_BANK));
1467 				break;
1468 			case 12:
1469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472 						 NUM_BANKS(ADDR_SURF_16_BANK));
1473 				break;
1474 			case 13:
1475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478 						 NUM_BANKS(ADDR_SURF_8_BANK));
1479 				break;
1480 			case 14:
1481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 						 NUM_BANKS(ADDR_SURF_4_BANK));
1485 				break;
1486 			default:
1487 				gb_tile_moden = 0;
1488 				break;
1489 			}
1490 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491 		}
1492 	} else if (num_pipe_configs == 2) {
1493 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494 			switch (reg_offset) {
1495 			case 0:
1496 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 						 PIPE_CONFIG(ADDR_SURF_P2) |
1499 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500 				break;
1501 			case 1:
1502 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 						 PIPE_CONFIG(ADDR_SURF_P2) |
1505 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506 				break;
1507 			case 2:
1508 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 						 PIPE_CONFIG(ADDR_SURF_P2) |
1511 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512 				break;
1513 			case 3:
1514 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516 						 PIPE_CONFIG(ADDR_SURF_P2) |
1517 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518 				break;
1519 			case 4:
1520 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522 						 PIPE_CONFIG(ADDR_SURF_P2) |
1523 						 TILE_SPLIT(split_equal_to_row_size));
1524 				break;
1525 			case 5:
1526 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528 				break;
1529 			case 6:
1530 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532 						 PIPE_CONFIG(ADDR_SURF_P2) |
1533 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534 				break;
1535 			case 7:
1536 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538 						 PIPE_CONFIG(ADDR_SURF_P2) |
1539 						 TILE_SPLIT(split_equal_to_row_size));
1540 				break;
1541 			case 8:
1542 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543 				break;
1544 			case 9:
1545 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547 				break;
1548 			case 10:
1549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 						 PIPE_CONFIG(ADDR_SURF_P2) |
1552 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553 				break;
1554 			case 11:
1555 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 						 PIPE_CONFIG(ADDR_SURF_P2) |
1558 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559 				break;
1560 			case 12:
1561 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 						 PIPE_CONFIG(ADDR_SURF_P2) |
1564 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565 				break;
1566 			case 13:
1567 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569 				break;
1570 			case 14:
1571 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 						 PIPE_CONFIG(ADDR_SURF_P2) |
1574 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575 				break;
1576 			case 16:
1577 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579 						 PIPE_CONFIG(ADDR_SURF_P2) |
1580 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581 				break;
1582 			case 17:
1583 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585 						 PIPE_CONFIG(ADDR_SURF_P2) |
1586 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587 				break;
1588 			case 27:
1589 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591 				break;
1592 			case 28:
1593 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 						 PIPE_CONFIG(ADDR_SURF_P2) |
1596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597 				break;
1598 			case 29:
1599 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601 						 PIPE_CONFIG(ADDR_SURF_P2) |
1602 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603 				break;
1604 			case 30:
1605 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607 						 PIPE_CONFIG(ADDR_SURF_P2) |
1608 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 				break;
1610 			default:
1611 				gb_tile_moden = 0;
1612 				break;
1613 			}
1614 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616 		}
1617 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618 			switch (reg_offset) {
1619 			case 0:
1620 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 						 NUM_BANKS(ADDR_SURF_16_BANK));
1624 				break;
1625 			case 1:
1626 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 						 NUM_BANKS(ADDR_SURF_16_BANK));
1630 				break;
1631 			case 2:
1632 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 						 NUM_BANKS(ADDR_SURF_16_BANK));
1636 				break;
1637 			case 3:
1638 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 						 NUM_BANKS(ADDR_SURF_16_BANK));
1642 				break;
1643 			case 4:
1644 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647 						 NUM_BANKS(ADDR_SURF_16_BANK));
1648 				break;
1649 			case 5:
1650 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 						 NUM_BANKS(ADDR_SURF_16_BANK));
1654 				break;
1655 			case 6:
1656 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659 						 NUM_BANKS(ADDR_SURF_8_BANK));
1660 				break;
1661 			case 8:
1662 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 						 NUM_BANKS(ADDR_SURF_16_BANK));
1666 				break;
1667 			case 9:
1668 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 						 NUM_BANKS(ADDR_SURF_16_BANK));
1672 				break;
1673 			case 10:
1674 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 						 NUM_BANKS(ADDR_SURF_16_BANK));
1678 				break;
1679 			case 11:
1680 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 						 NUM_BANKS(ADDR_SURF_16_BANK));
1684 				break;
1685 			case 12:
1686 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689 						 NUM_BANKS(ADDR_SURF_16_BANK));
1690 				break;
1691 			case 13:
1692 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 						 NUM_BANKS(ADDR_SURF_16_BANK));
1696 				break;
1697 			case 14:
1698 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701 						 NUM_BANKS(ADDR_SURF_8_BANK));
1702 				break;
1703 			default:
1704 				gb_tile_moden = 0;
1705 				break;
1706 			}
1707 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708 		}
1709 	} else
1710 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711 }
1712 
1713 /**
1714  * cik_select_se_sh - select which SE, SH to address
1715  *
1716  * @rdev: radeon_device pointer
1717  * @se_num: shader engine to address
1718  * @sh_num: sh block to address
1719  *
1720  * Select which SE, SH combinations to address. Certain
1721  * registers are instanced per SE or SH.  0xffffffff means
1722  * broadcast to all SEs or SHs (CIK).
1723  */
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725 			     u32 se_num, u32 sh_num)
1726 {
1727 	u32 data = INSTANCE_BROADCAST_WRITES;
1728 
1729 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731 	else if (se_num == 0xffffffff)
1732 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733 	else if (sh_num == 0xffffffff)
1734 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735 	else
1736 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737 	WREG32(GRBM_GFX_INDEX, data);
1738 }
1739 
1740 /**
1741  * cik_create_bitmask - create a bitmask
1742  *
1743  * @bit_width: length of the mask
1744  *
1745  * create a variable length bit mask (CIK).
1746  * Returns the bitmask.
1747  */
1748 static u32 cik_create_bitmask(u32 bit_width)
1749 {
1750 	u32 i, mask = 0;
1751 
1752 	for (i = 0; i < bit_width; i++) {
1753 		mask <<= 1;
1754 		mask |= 1;
1755 	}
1756 	return mask;
1757 }
1758 
1759 /**
1760  * cik_select_se_sh - select which SE, SH to address
1761  *
1762  * @rdev: radeon_device pointer
1763  * @max_rb_num: max RBs (render backends) for the asic
1764  * @se_num: number of SEs (shader engines) for the asic
1765  * @sh_per_se: number of SH blocks per SE for the asic
1766  *
1767  * Calculates the bitmask of disabled RBs (CIK).
1768  * Returns the disabled RB bitmask.
1769  */
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771 			      u32 max_rb_num, u32 se_num,
1772 			      u32 sh_per_se)
1773 {
1774 	u32 data, mask;
1775 
1776 	data = RREG32(CC_RB_BACKEND_DISABLE);
1777 	if (data & 1)
1778 		data &= BACKEND_DISABLE_MASK;
1779 	else
1780 		data = 0;
1781 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782 
1783 	data >>= BACKEND_DISABLE_SHIFT;
1784 
1785 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786 
1787 	return data & mask;
1788 }
1789 
1790 /**
1791  * cik_setup_rb - setup the RBs on the asic
1792  *
1793  * @rdev: radeon_device pointer
1794  * @se_num: number of SEs (shader engines) for the asic
1795  * @sh_per_se: number of SH blocks per SE for the asic
1796  * @max_rb_num: max RBs (render backends) for the asic
1797  *
1798  * Configures per-SE/SH RB registers (CIK).
1799  */
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801 			 u32 se_num, u32 sh_per_se,
1802 			 u32 max_rb_num)
1803 {
1804 	int i, j;
1805 	u32 data, mask;
1806 	u32 disabled_rbs = 0;
1807 	u32 enabled_rbs = 0;
1808 
1809 	for (i = 0; i < se_num; i++) {
1810 		for (j = 0; j < sh_per_se; j++) {
1811 			cik_select_se_sh(rdev, i, j);
1812 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814 		}
1815 	}
1816 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817 
1818 	mask = 1;
1819 	for (i = 0; i < max_rb_num; i++) {
1820 		if (!(disabled_rbs & mask))
1821 			enabled_rbs |= mask;
1822 		mask <<= 1;
1823 	}
1824 
1825 	for (i = 0; i < se_num; i++) {
1826 		cik_select_se_sh(rdev, i, 0xffffffff);
1827 		data = 0;
1828 		for (j = 0; j < sh_per_se; j++) {
1829 			switch (enabled_rbs & 3) {
1830 			case 1:
1831 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832 				break;
1833 			case 2:
1834 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835 				break;
1836 			case 3:
1837 			default:
1838 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839 				break;
1840 			}
1841 			enabled_rbs >>= 2;
1842 		}
1843 		WREG32(PA_SC_RASTER_CONFIG, data);
1844 	}
1845 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846 }
1847 
1848 /**
1849  * cik_gpu_init - setup the 3D engine
1850  *
1851  * @rdev: radeon_device pointer
1852  *
1853  * Configures the 3D engine and tiling configuration
1854  * registers so that the 3D engine is usable.
1855  */
1856 static void cik_gpu_init(struct radeon_device *rdev)
1857 {
1858 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859 	u32 mc_shared_chmap, mc_arb_ramcfg;
1860 	u32 hdp_host_path_cntl;
1861 	u32 tmp;
1862 	int i, j;
1863 
1864 	switch (rdev->family) {
1865 	case CHIP_BONAIRE:
1866 		rdev->config.cik.max_shader_engines = 2;
1867 		rdev->config.cik.max_tile_pipes = 4;
1868 		rdev->config.cik.max_cu_per_sh = 7;
1869 		rdev->config.cik.max_sh_per_se = 1;
1870 		rdev->config.cik.max_backends_per_se = 2;
1871 		rdev->config.cik.max_texture_channel_caches = 4;
1872 		rdev->config.cik.max_gprs = 256;
1873 		rdev->config.cik.max_gs_threads = 32;
1874 		rdev->config.cik.max_hw_contexts = 8;
1875 
1876 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881 		break;
1882 	case CHIP_KAVERI:
1883 		/* TODO */
1884 		break;
1885 	case CHIP_KABINI:
1886 	default:
1887 		rdev->config.cik.max_shader_engines = 1;
1888 		rdev->config.cik.max_tile_pipes = 2;
1889 		rdev->config.cik.max_cu_per_sh = 2;
1890 		rdev->config.cik.max_sh_per_se = 1;
1891 		rdev->config.cik.max_backends_per_se = 1;
1892 		rdev->config.cik.max_texture_channel_caches = 2;
1893 		rdev->config.cik.max_gprs = 256;
1894 		rdev->config.cik.max_gs_threads = 16;
1895 		rdev->config.cik.max_hw_contexts = 8;
1896 
1897 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902 		break;
1903 	}
1904 
1905 	/* Initialize HDP */
1906 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907 		WREG32((0x2c14 + j), 0x00000000);
1908 		WREG32((0x2c18 + j), 0x00000000);
1909 		WREG32((0x2c1c + j), 0x00000000);
1910 		WREG32((0x2c20 + j), 0x00000000);
1911 		WREG32((0x2c24 + j), 0x00000000);
1912 	}
1913 
1914 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915 
1916 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917 
1918 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920 
1921 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922 	rdev->config.cik.mem_max_burst_length_bytes = 256;
1923 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 	if (rdev->config.cik.mem_row_size_in_kb > 4)
1926 		rdev->config.cik.mem_row_size_in_kb = 4;
1927 	/* XXX use MC settings? */
1928 	rdev->config.cik.shader_engine_tile_size = 32;
1929 	rdev->config.cik.num_gpus = 1;
1930 	rdev->config.cik.multi_gpu_tile_size = 64;
1931 
1932 	/* fix up row size */
1933 	gb_addr_config &= ~ROW_SIZE_MASK;
1934 	switch (rdev->config.cik.mem_row_size_in_kb) {
1935 	case 1:
1936 	default:
1937 		gb_addr_config |= ROW_SIZE(0);
1938 		break;
1939 	case 2:
1940 		gb_addr_config |= ROW_SIZE(1);
1941 		break;
1942 	case 4:
1943 		gb_addr_config |= ROW_SIZE(2);
1944 		break;
1945 	}
1946 
1947 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1948 	 * not have bank info, so create a custom tiling dword.
1949 	 * bits 3:0   num_pipes
1950 	 * bits 7:4   num_banks
1951 	 * bits 11:8  group_size
1952 	 * bits 15:12 row_size
1953 	 */
1954 	rdev->config.cik.tile_config = 0;
1955 	switch (rdev->config.cik.num_tile_pipes) {
1956 	case 1:
1957 		rdev->config.cik.tile_config |= (0 << 0);
1958 		break;
1959 	case 2:
1960 		rdev->config.cik.tile_config |= (1 << 0);
1961 		break;
1962 	case 4:
1963 		rdev->config.cik.tile_config |= (2 << 0);
1964 		break;
1965 	case 8:
1966 	default:
1967 		/* XXX what about 12? */
1968 		rdev->config.cik.tile_config |= (3 << 0);
1969 		break;
1970 	}
1971 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972 		rdev->config.cik.tile_config |= 1 << 4;
1973 	else
1974 		rdev->config.cik.tile_config |= 0 << 4;
1975 	rdev->config.cik.tile_config |=
1976 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977 	rdev->config.cik.tile_config |=
1978 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979 
1980 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1988 
1989 	cik_tiling_mode_table_init(rdev);
1990 
1991 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992 		     rdev->config.cik.max_sh_per_se,
1993 		     rdev->config.cik.max_backends_per_se);
1994 
1995 	/* set HW defaults for 3D engine */
1996 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997 
1998 	WREG32(SX_DEBUG_1, 0x20);
1999 
2000 	WREG32(TA_CNTL_AUX, 0x00010000);
2001 
2002 	tmp = RREG32(SPI_CONFIG_CNTL);
2003 	tmp |= 0x03000000;
2004 	WREG32(SPI_CONFIG_CNTL, tmp);
2005 
2006 	WREG32(SQ_CONFIG, 1);
2007 
2008 	WREG32(DB_DEBUG, 0);
2009 
2010 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011 	tmp |= 0x00000400;
2012 	WREG32(DB_DEBUG2, tmp);
2013 
2014 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015 	tmp |= 0x00020200;
2016 	WREG32(DB_DEBUG3, tmp);
2017 
2018 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019 	tmp |= 0x00018208;
2020 	WREG32(CB_HW_CONTROL, tmp);
2021 
2022 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023 
2024 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028 
2029 	WREG32(VGT_NUM_INSTANCES, 1);
2030 
2031 	WREG32(CP_PERFMON_CNTL, 0);
2032 
2033 	WREG32(SQ_CONFIG, 0);
2034 
2035 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 					  FORCE_EOV_MAX_REZ_CNT(255)));
2037 
2038 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040 
2041 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2042 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043 
2044 	tmp = RREG32(HDP_MISC_CNTL);
2045 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046 	WREG32(HDP_MISC_CNTL, tmp);
2047 
2048 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050 
2051 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053 
2054 	udelay(50);
2055 }
2056 
2057 /*
2058  * GPU scratch registers helpers function.
2059  */
2060 /**
2061  * cik_scratch_init - setup driver info for CP scratch regs
2062  *
2063  * @rdev: radeon_device pointer
2064  *
2065  * Set up the number and offset of the CP scratch registers.
2066  * NOTE: use of CP scratch registers is a legacy inferface and
2067  * is not used by default on newer asics (r6xx+).  On newer asics,
2068  * memory buffers are used for fences rather than scratch regs.
2069  */
2070 static void cik_scratch_init(struct radeon_device *rdev)
2071 {
2072 	int i;
2073 
2074 	rdev->scratch.num_reg = 7;
2075 	rdev->scratch.reg_base = SCRATCH_REG0;
2076 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2077 		rdev->scratch.free[i] = true;
2078 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079 	}
2080 }
2081 
2082 /**
2083  * cik_ring_test - basic gfx ring test
2084  *
2085  * @rdev: radeon_device pointer
2086  * @ring: radeon_ring structure holding ring information
2087  *
2088  * Allocate a scratch register and write to it using the gfx ring (CIK).
2089  * Provides a basic gfx ring test to verify that the ring is working.
2090  * Used by cik_cp_gfx_resume();
2091  * Returns 0 on success, error on failure.
2092  */
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094 {
2095 	uint32_t scratch;
2096 	uint32_t tmp = 0;
2097 	unsigned i;
2098 	int r;
2099 
2100 	r = radeon_scratch_get(rdev, &scratch);
2101 	if (r) {
2102 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103 		return r;
2104 	}
2105 	WREG32(scratch, 0xCAFEDEAD);
2106 	r = radeon_ring_lock(rdev, ring, 3);
2107 	if (r) {
2108 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109 		radeon_scratch_free(rdev, scratch);
2110 		return r;
2111 	}
2112 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114 	radeon_ring_write(ring, 0xDEADBEEF);
2115 	radeon_ring_unlock_commit(rdev, ring);
2116 
2117 	for (i = 0; i < rdev->usec_timeout; i++) {
2118 		tmp = RREG32(scratch);
2119 		if (tmp == 0xDEADBEEF)
2120 			break;
2121 		DRM_UDELAY(1);
2122 	}
2123 	if (i < rdev->usec_timeout) {
2124 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125 	} else {
2126 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 			  ring->idx, scratch, tmp);
2128 		r = -EINVAL;
2129 	}
2130 	radeon_scratch_free(rdev, scratch);
2131 	return r;
2132 }
2133 
2134 /**
2135  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2136  *
2137  * @rdev: radeon_device pointer
2138  * @fence: radeon fence object
2139  *
2140  * Emits a fence sequnce number on the gfx ring and flushes
2141  * GPU caches.
2142  */
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144 			     struct radeon_fence *fence)
2145 {
2146 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2147 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148 
2149 	/* EVENT_WRITE_EOP - flush caches, send int */
2150 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152 				 EOP_TC_ACTION_EN |
2153 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154 				 EVENT_INDEX(5)));
2155 	radeon_ring_write(ring, addr & 0xfffffffc);
2156 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 	radeon_ring_write(ring, fence->seq);
2158 	radeon_ring_write(ring, 0);
2159 	/* HDP flush */
2160 	/* We should be using the new WAIT_REG_MEM special op packet here
2161 	 * but it causes the CP to hang
2162 	 */
2163 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165 				 WRITE_DATA_DST_SEL(0)));
2166 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167 	radeon_ring_write(ring, 0);
2168 	radeon_ring_write(ring, 0);
2169 }
2170 
2171 /**
2172  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173  *
2174  * @rdev: radeon_device pointer
2175  * @fence: radeon fence object
2176  *
2177  * Emits a fence sequnce number on the compute ring and flushes
2178  * GPU caches.
2179  */
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181 				 struct radeon_fence *fence)
2182 {
2183 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2184 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185 
2186 	/* RELEASE_MEM - flush caches, send int */
2187 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189 				 EOP_TC_ACTION_EN |
2190 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191 				 EVENT_INDEX(5)));
2192 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193 	radeon_ring_write(ring, addr & 0xfffffffc);
2194 	radeon_ring_write(ring, upper_32_bits(addr));
2195 	radeon_ring_write(ring, fence->seq);
2196 	radeon_ring_write(ring, 0);
2197 	/* HDP flush */
2198 	/* We should be using the new WAIT_REG_MEM special op packet here
2199 	 * but it causes the CP to hang
2200 	 */
2201 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203 				 WRITE_DATA_DST_SEL(0)));
2204 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205 	radeon_ring_write(ring, 0);
2206 	radeon_ring_write(ring, 0);
2207 }
2208 
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210 			     struct radeon_ring *ring,
2211 			     struct radeon_semaphore *semaphore,
2212 			     bool emit_wait)
2213 {
2214 	uint64_t addr = semaphore->gpu_addr;
2215 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216 
2217 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218 	radeon_ring_write(ring, addr & 0xffffffff);
2219 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220 }
2221 
2222 /*
2223  * IB stuff
2224  */
2225 /**
2226  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227  *
2228  * @rdev: radeon_device pointer
2229  * @ib: radeon indirect buffer object
2230  *
2231  * Emits an DE (drawing engine) or CE (constant engine) IB
2232  * on the gfx ring.  IBs are usually generated by userspace
2233  * acceleration drivers and submitted to the kernel for
2234  * sheduling on the ring.  This function schedules the IB
2235  * on the gfx ring for execution by the GPU.
2236  */
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238 {
2239 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 	u32 header, control = INDIRECT_BUFFER_VALID;
2241 
2242 	if (ib->is_const_ib) {
2243 		/* set switch buffer packet before const IB */
2244 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245 		radeon_ring_write(ring, 0);
2246 
2247 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248 	} else {
2249 		u32 next_rptr;
2250 		if (ring->rptr_save_reg) {
2251 			next_rptr = ring->wptr + 3 + 4;
2252 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2254 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
2255 			radeon_ring_write(ring, next_rptr);
2256 		} else if (rdev->wb.enabled) {
2257 			next_rptr = ring->wptr + 5 + 4;
2258 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262 			radeon_ring_write(ring, next_rptr);
2263 		}
2264 
2265 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266 	}
2267 
2268 	control |= ib->length_dw |
2269 		(ib->vm ? (ib->vm->id << 24) : 0);
2270 
2271 	radeon_ring_write(ring, header);
2272 	radeon_ring_write(ring,
2273 #ifdef __BIG_ENDIAN
2274 			  (2 << 0) |
2275 #endif
2276 			  (ib->gpu_addr & 0xFFFFFFFC));
2277 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278 	radeon_ring_write(ring, control);
2279 }
2280 
2281 /**
2282  * cik_ib_test - basic gfx ring IB test
2283  *
2284  * @rdev: radeon_device pointer
2285  * @ring: radeon_ring structure holding ring information
2286  *
2287  * Allocate an IB and execute it on the gfx ring (CIK).
2288  * Provides a basic gfx ring test to verify that IBs are working.
2289  * Returns 0 on success, error on failure.
2290  */
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292 {
2293 	struct radeon_ib ib;
2294 	uint32_t scratch;
2295 	uint32_t tmp = 0;
2296 	unsigned i;
2297 	int r;
2298 
2299 	r = radeon_scratch_get(rdev, &scratch);
2300 	if (r) {
2301 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302 		return r;
2303 	}
2304 	WREG32(scratch, 0xCAFEDEAD);
2305 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306 	if (r) {
2307 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308 		return r;
2309 	}
2310 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312 	ib.ptr[2] = 0xDEADBEEF;
2313 	ib.length_dw = 3;
2314 	r = radeon_ib_schedule(rdev, &ib, NULL);
2315 	if (r) {
2316 		radeon_scratch_free(rdev, scratch);
2317 		radeon_ib_free(rdev, &ib);
2318 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319 		return r;
2320 	}
2321 	r = radeon_fence_wait(ib.fence, false);
2322 	if (r) {
2323 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324 		return r;
2325 	}
2326 	for (i = 0; i < rdev->usec_timeout; i++) {
2327 		tmp = RREG32(scratch);
2328 		if (tmp == 0xDEADBEEF)
2329 			break;
2330 		DRM_UDELAY(1);
2331 	}
2332 	if (i < rdev->usec_timeout) {
2333 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334 	} else {
2335 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336 			  scratch, tmp);
2337 		r = -EINVAL;
2338 	}
2339 	radeon_scratch_free(rdev, scratch);
2340 	radeon_ib_free(rdev, &ib);
2341 	return r;
2342 }
2343 
2344 /*
2345  * CP.
2346  * On CIK, gfx and compute now have independant command processors.
2347  *
2348  * GFX
2349  * Gfx consists of a single ring and can process both gfx jobs and
2350  * compute jobs.  The gfx CP consists of three microengines (ME):
2351  * PFP - Pre-Fetch Parser
2352  * ME - Micro Engine
2353  * CE - Constant Engine
2354  * The PFP and ME make up what is considered the Drawing Engine (DE).
2355  * The CE is an asynchronous engine used for updating buffer desciptors
2356  * used by the DE so that they can be loaded into cache in parallel
2357  * while the DE is processing state update packets.
2358  *
2359  * Compute
2360  * The compute CP consists of two microengines (ME):
2361  * MEC1 - Compute MicroEngine 1
2362  * MEC2 - Compute MicroEngine 2
2363  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364  * The queues are exposed to userspace and are programmed directly
2365  * by the compute runtime.
2366  */
2367 /**
2368  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369  *
2370  * @rdev: radeon_device pointer
2371  * @enable: enable or disable the MEs
2372  *
2373  * Halts or unhalts the gfx MEs.
2374  */
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376 {
2377 	if (enable)
2378 		WREG32(CP_ME_CNTL, 0);
2379 	else {
2380 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 	}
2383 	udelay(50);
2384 }
2385 
2386 /**
2387  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388  *
2389  * @rdev: radeon_device pointer
2390  *
2391  * Loads the gfx PFP, ME, and CE ucode.
2392  * Returns 0 for success, -EINVAL if the ucode is not available.
2393  */
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395 {
2396 	const __be32 *fw_data;
2397 	int i;
2398 
2399 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400 		return -EINVAL;
2401 
2402 	cik_cp_gfx_enable(rdev, false);
2403 
2404 	/* PFP */
2405 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2406 	WREG32(CP_PFP_UCODE_ADDR, 0);
2407 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409 	WREG32(CP_PFP_UCODE_ADDR, 0);
2410 
2411 	/* CE */
2412 	fw_data = (const __be32 *)rdev->ce_fw->data;
2413 	WREG32(CP_CE_UCODE_ADDR, 0);
2414 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416 	WREG32(CP_CE_UCODE_ADDR, 0);
2417 
2418 	/* ME */
2419 	fw_data = (const __be32 *)rdev->me_fw->data;
2420 	WREG32(CP_ME_RAM_WADDR, 0);
2421 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423 	WREG32(CP_ME_RAM_WADDR, 0);
2424 
2425 	WREG32(CP_PFP_UCODE_ADDR, 0);
2426 	WREG32(CP_CE_UCODE_ADDR, 0);
2427 	WREG32(CP_ME_RAM_WADDR, 0);
2428 	WREG32(CP_ME_RAM_RADDR, 0);
2429 	return 0;
2430 }
2431 
2432 /**
2433  * cik_cp_gfx_start - start the gfx ring
2434  *
2435  * @rdev: radeon_device pointer
2436  *
2437  * Enables the ring and loads the clear state context and other
2438  * packets required to init the ring.
2439  * Returns 0 for success, error for failure.
2440  */
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2442 {
2443 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444 	int r, i;
2445 
2446 	/* init the CP */
2447 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448 	WREG32(CP_ENDIAN_SWAP, 0);
2449 	WREG32(CP_DEVICE_ID, 1);
2450 
2451 	cik_cp_gfx_enable(rdev, true);
2452 
2453 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454 	if (r) {
2455 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456 		return r;
2457 	}
2458 
2459 	/* init the CE partitions.  CE only used for gfx on CIK */
2460 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462 	radeon_ring_write(ring, 0xc000);
2463 	radeon_ring_write(ring, 0xc000);
2464 
2465 	/* setup clear context state */
2466 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468 
2469 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470 	radeon_ring_write(ring, 0x80000000);
2471 	radeon_ring_write(ring, 0x80000000);
2472 
2473 	for (i = 0; i < cik_default_size; i++)
2474 		radeon_ring_write(ring, cik_default_state[i]);
2475 
2476 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478 
2479 	/* set clear context state */
2480 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481 	radeon_ring_write(ring, 0);
2482 
2483 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484 	radeon_ring_write(ring, 0x00000316);
2485 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487 
2488 	radeon_ring_unlock_commit(rdev, ring);
2489 
2490 	return 0;
2491 }
2492 
2493 /**
2494  * cik_cp_gfx_fini - stop the gfx ring
2495  *
2496  * @rdev: radeon_device pointer
2497  *
2498  * Stop the gfx ring and tear down the driver ring
2499  * info.
2500  */
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502 {
2503 	cik_cp_gfx_enable(rdev, false);
2504 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505 }
2506 
2507 /**
2508  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509  *
2510  * @rdev: radeon_device pointer
2511  *
2512  * Program the location and size of the gfx ring buffer
2513  * and test it to make sure it's working.
2514  * Returns 0 for success, error for failure.
2515  */
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517 {
2518 	struct radeon_ring *ring;
2519 	u32 tmp;
2520 	u32 rb_bufsz;
2521 	u64 rb_addr;
2522 	int r;
2523 
2524 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526 
2527 	/* Set the write pointer delay */
2528 	WREG32(CP_RB_WPTR_DELAY, 0);
2529 
2530 	/* set the RB to use vmid 0 */
2531 	WREG32(CP_RB_VMID, 0);
2532 
2533 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534 
2535 	/* ring 0 - compute and gfx */
2536 	/* Set ring buffer size */
2537 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538 	rb_bufsz = drm_order(ring->ring_size / 8);
2539 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540 #ifdef __BIG_ENDIAN
2541 	tmp |= BUF_SWAP_32BIT;
2542 #endif
2543 	WREG32(CP_RB0_CNTL, tmp);
2544 
2545 	/* Initialize the ring buffer's read and write pointers */
2546 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547 	ring->wptr = 0;
2548 	WREG32(CP_RB0_WPTR, ring->wptr);
2549 
2550 	/* set the wb address wether it's enabled or not */
2551 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553 
2554 	/* scratch register shadowing is no longer supported */
2555 	WREG32(SCRATCH_UMSK, 0);
2556 
2557 	if (!rdev->wb.enabled)
2558 		tmp |= RB_NO_UPDATE;
2559 
2560 	mdelay(1);
2561 	WREG32(CP_RB0_CNTL, tmp);
2562 
2563 	rb_addr = ring->gpu_addr >> 8;
2564 	WREG32(CP_RB0_BASE, rb_addr);
2565 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566 
2567 	ring->rptr = RREG32(CP_RB0_RPTR);
2568 
2569 	/* start the ring */
2570 	cik_cp_gfx_start(rdev);
2571 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573 	if (r) {
2574 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 		return r;
2576 	}
2577 	return 0;
2578 }
2579 
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581 			      struct radeon_ring *ring)
2582 {
2583 	u32 rptr;
2584 
2585 
2586 
2587 	if (rdev->wb.enabled) {
2588 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589 	} else {
2590 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2591 		rptr = RREG32(CP_HQD_PQ_RPTR);
2592 		cik_srbm_select(rdev, 0, 0, 0, 0);
2593 	}
2594 	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2595 
2596 	return rptr;
2597 }
2598 
2599 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2600 			      struct radeon_ring *ring)
2601 {
2602 	u32 wptr;
2603 
2604 	if (rdev->wb.enabled) {
2605 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2606 	} else {
2607 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2608 		wptr = RREG32(CP_HQD_PQ_WPTR);
2609 		cik_srbm_select(rdev, 0, 0, 0, 0);
2610 	}
2611 	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2612 
2613 	return wptr;
2614 }
2615 
2616 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2617 			       struct radeon_ring *ring)
2618 {
2619 	u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620 
2621 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2622 	WDOORBELL32(ring->doorbell_offset, wptr);
2623 }
2624 
2625 /**
2626  * cik_cp_compute_enable - enable/disable the compute CP MEs
2627  *
2628  * @rdev: radeon_device pointer
2629  * @enable: enable or disable the MEs
2630  *
2631  * Halts or unhalts the compute MEs.
2632  */
2633 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2634 {
2635 	if (enable)
2636 		WREG32(CP_MEC_CNTL, 0);
2637 	else
2638 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2639 	udelay(50);
2640 }
2641 
2642 /**
2643  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644  *
2645  * @rdev: radeon_device pointer
2646  *
2647  * Loads the compute MEC1&2 ucode.
2648  * Returns 0 for success, -EINVAL if the ucode is not available.
2649  */
2650 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651 {
2652 	const __be32 *fw_data;
2653 	int i;
2654 
2655 	if (!rdev->mec_fw)
2656 		return -EINVAL;
2657 
2658 	cik_cp_compute_enable(rdev, false);
2659 
2660 	/* MEC1 */
2661 	fw_data = (const __be32 *)rdev->mec_fw->data;
2662 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2663 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2664 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2665 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666 
2667 	if (rdev->family == CHIP_KAVERI) {
2668 		/* MEC2 */
2669 		fw_data = (const __be32 *)rdev->mec_fw->data;
2670 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2671 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2672 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2673 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2674 	}
2675 
2676 	return 0;
2677 }
2678 
2679 /**
2680  * cik_cp_compute_start - start the compute queues
2681  *
2682  * @rdev: radeon_device pointer
2683  *
2684  * Enable the compute queues.
2685  * Returns 0 for success, error for failure.
2686  */
2687 static int cik_cp_compute_start(struct radeon_device *rdev)
2688 {
2689 	cik_cp_compute_enable(rdev, true);
2690 
2691 	return 0;
2692 }
2693 
2694 /**
2695  * cik_cp_compute_fini - stop the compute queues
2696  *
2697  * @rdev: radeon_device pointer
2698  *
2699  * Stop the compute queues and tear down the driver queue
2700  * info.
2701  */
2702 static void cik_cp_compute_fini(struct radeon_device *rdev)
2703 {
2704 	int i, idx, r;
2705 
2706 	cik_cp_compute_enable(rdev, false);
2707 
2708 	for (i = 0; i < 2; i++) {
2709 		if (i == 0)
2710 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711 		else
2712 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713 
2714 		if (rdev->ring[idx].mqd_obj) {
2715 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2716 			if (unlikely(r != 0))
2717 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718 
2719 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2720 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721 
2722 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2723 			rdev->ring[idx].mqd_obj = NULL;
2724 		}
2725 	}
2726 }
2727 
2728 static void cik_mec_fini(struct radeon_device *rdev)
2729 {
2730 	int r;
2731 
2732 	if (rdev->mec.hpd_eop_obj) {
2733 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2734 		if (unlikely(r != 0))
2735 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2736 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2737 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738 
2739 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2740 		rdev->mec.hpd_eop_obj = NULL;
2741 	}
2742 }
2743 
2744 #define MEC_HPD_SIZE 2048
2745 
2746 static int cik_mec_init(struct radeon_device *rdev)
2747 {
2748 	int r;
2749 	u32 *hpd;
2750 
2751 	/*
2752 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2753 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754 	 */
2755 	if (rdev->family == CHIP_KAVERI)
2756 		rdev->mec.num_mec = 2;
2757 	else
2758 		rdev->mec.num_mec = 1;
2759 	rdev->mec.num_pipe = 4;
2760 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761 
2762 	if (rdev->mec.hpd_eop_obj == NULL) {
2763 		r = radeon_bo_create(rdev,
2764 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765 				     PAGE_SIZE, true,
2766 				     RADEON_GEM_DOMAIN_GTT, NULL,
2767 				     &rdev->mec.hpd_eop_obj);
2768 		if (r) {
2769 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2770 			return r;
2771 		}
2772 	}
2773 
2774 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2775 	if (unlikely(r != 0)) {
2776 		cik_mec_fini(rdev);
2777 		return r;
2778 	}
2779 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2780 			  &rdev->mec.hpd_eop_gpu_addr);
2781 	if (r) {
2782 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2783 		cik_mec_fini(rdev);
2784 		return r;
2785 	}
2786 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787 	if (r) {
2788 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2789 		cik_mec_fini(rdev);
2790 		return r;
2791 	}
2792 
2793 	/* clear memory.  Not sure if this is required or not */
2794 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795 
2796 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2797 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2798 
2799 	return 0;
2800 }
2801 
2802 struct hqd_registers
2803 {
2804 	u32 cp_mqd_base_addr;
2805 	u32 cp_mqd_base_addr_hi;
2806 	u32 cp_hqd_active;
2807 	u32 cp_hqd_vmid;
2808 	u32 cp_hqd_persistent_state;
2809 	u32 cp_hqd_pipe_priority;
2810 	u32 cp_hqd_queue_priority;
2811 	u32 cp_hqd_quantum;
2812 	u32 cp_hqd_pq_base;
2813 	u32 cp_hqd_pq_base_hi;
2814 	u32 cp_hqd_pq_rptr;
2815 	u32 cp_hqd_pq_rptr_report_addr;
2816 	u32 cp_hqd_pq_rptr_report_addr_hi;
2817 	u32 cp_hqd_pq_wptr_poll_addr;
2818 	u32 cp_hqd_pq_wptr_poll_addr_hi;
2819 	u32 cp_hqd_pq_doorbell_control;
2820 	u32 cp_hqd_pq_wptr;
2821 	u32 cp_hqd_pq_control;
2822 	u32 cp_hqd_ib_base_addr;
2823 	u32 cp_hqd_ib_base_addr_hi;
2824 	u32 cp_hqd_ib_rptr;
2825 	u32 cp_hqd_ib_control;
2826 	u32 cp_hqd_iq_timer;
2827 	u32 cp_hqd_iq_rptr;
2828 	u32 cp_hqd_dequeue_request;
2829 	u32 cp_hqd_dma_offload;
2830 	u32 cp_hqd_sema_cmd;
2831 	u32 cp_hqd_msg_type;
2832 	u32 cp_hqd_atomic0_preop_lo;
2833 	u32 cp_hqd_atomic0_preop_hi;
2834 	u32 cp_hqd_atomic1_preop_lo;
2835 	u32 cp_hqd_atomic1_preop_hi;
2836 	u32 cp_hqd_hq_scheduler0;
2837 	u32 cp_hqd_hq_scheduler1;
2838 	u32 cp_mqd_control;
2839 };
2840 
2841 struct bonaire_mqd
2842 {
2843 	u32 header;
2844 	u32 dispatch_initiator;
2845 	u32 dimensions[3];
2846 	u32 start_idx[3];
2847 	u32 num_threads[3];
2848 	u32 pipeline_stat_enable;
2849 	u32 perf_counter_enable;
2850 	u32 pgm[2];
2851 	u32 tba[2];
2852 	u32 tma[2];
2853 	u32 pgm_rsrc[2];
2854 	u32 vmid;
2855 	u32 resource_limits;
2856 	u32 static_thread_mgmt01[2];
2857 	u32 tmp_ring_size;
2858 	u32 static_thread_mgmt23[2];
2859 	u32 restart[3];
2860 	u32 thread_trace_enable;
2861 	u32 reserved1;
2862 	u32 user_data[16];
2863 	u32 vgtcs_invoke_count[2];
2864 	struct hqd_registers queue_state;
2865 	u32 dequeue_cntr;
2866 	u32 interrupt_queue[64];
2867 };
2868 
2869 /**
2870  * cik_cp_compute_resume - setup the compute queue registers
2871  *
2872  * @rdev: radeon_device pointer
2873  *
2874  * Program the compute queues and test them to make sure they
2875  * are working.
2876  * Returns 0 for success, error for failure.
2877  */
2878 static int cik_cp_compute_resume(struct radeon_device *rdev)
2879 {
2880 	int r, i, idx;
2881 	u32 tmp;
2882 	bool use_doorbell = true;
2883 	u64 hqd_gpu_addr;
2884 	u64 mqd_gpu_addr;
2885 	u64 eop_gpu_addr;
2886 	u64 wb_gpu_addr;
2887 	u32 *buf;
2888 	struct bonaire_mqd *mqd;
2889 
2890 	r = cik_cp_compute_start(rdev);
2891 	if (r)
2892 		return r;
2893 
2894 	/* fix up chicken bits */
2895 	tmp = RREG32(CP_CPF_DEBUG);
2896 	tmp |= (1 << 23);
2897 	WREG32(CP_CPF_DEBUG, tmp);
2898 
2899 	/* init the pipes */
2900 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 		int me = (i < 4) ? 1 : 2;
2902 		int pipe = (i < 4) ? i : (i - 4);
2903 
2904 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905 
2906 		cik_srbm_select(rdev, me, pipe, 0, 0);
2907 
2908 		/* write the EOP addr */
2909 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911 
2912 		/* set the VMID assigned */
2913 		WREG32(CP_HPD_EOP_VMID, 0);
2914 
2915 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 		tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 		tmp &= ~EOP_SIZE_MASK;
2918 		tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 		WREG32(CP_HPD_EOP_CONTROL, tmp);
2920 	}
2921 	cik_srbm_select(rdev, 0, 0, 0, 0);
2922 
2923 	/* init the queues.  Just two for now. */
2924 	for (i = 0; i < 2; i++) {
2925 		if (i == 0)
2926 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2927 		else
2928 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2929 
2930 		if (rdev->ring[idx].mqd_obj == NULL) {
2931 			r = radeon_bo_create(rdev,
2932 					     sizeof(struct bonaire_mqd),
2933 					     PAGE_SIZE, true,
2934 					     RADEON_GEM_DOMAIN_GTT, NULL,
2935 					     &rdev->ring[idx].mqd_obj);
2936 			if (r) {
2937 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2938 				return r;
2939 			}
2940 		}
2941 
2942 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2943 		if (unlikely(r != 0)) {
2944 			cik_cp_compute_fini(rdev);
2945 			return r;
2946 		}
2947 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2948 				  &mqd_gpu_addr);
2949 		if (r) {
2950 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2951 			cik_cp_compute_fini(rdev);
2952 			return r;
2953 		}
2954 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2955 		if (r) {
2956 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2957 			cik_cp_compute_fini(rdev);
2958 			return r;
2959 		}
2960 
2961 		/* doorbell offset */
2962 		rdev->ring[idx].doorbell_offset =
2963 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2964 
2965 		/* init the mqd struct */
2966 		memset(buf, 0, sizeof(struct bonaire_mqd));
2967 
2968 		mqd = (struct bonaire_mqd *)buf;
2969 		mqd->header = 0xC0310800;
2970 		mqd->static_thread_mgmt01[0] = 0xffffffff;
2971 		mqd->static_thread_mgmt01[1] = 0xffffffff;
2972 		mqd->static_thread_mgmt23[0] = 0xffffffff;
2973 		mqd->static_thread_mgmt23[1] = 0xffffffff;
2974 
2975 		cik_srbm_select(rdev, rdev->ring[idx].me,
2976 				rdev->ring[idx].pipe,
2977 				rdev->ring[idx].queue, 0);
2978 
2979 		/* disable wptr polling */
2980 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2981 		tmp &= ~WPTR_POLL_EN;
2982 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2983 
2984 		/* enable doorbell? */
2985 		mqd->queue_state.cp_hqd_pq_doorbell_control =
2986 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2987 		if (use_doorbell)
2988 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2989 		else
2990 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2991 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2992 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
2993 
2994 		/* disable the queue if it's active */
2995 		mqd->queue_state.cp_hqd_dequeue_request = 0;
2996 		mqd->queue_state.cp_hqd_pq_rptr = 0;
2997 		mqd->queue_state.cp_hqd_pq_wptr= 0;
2998 		if (RREG32(CP_HQD_ACTIVE) & 1) {
2999 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3000 			for (i = 0; i < rdev->usec_timeout; i++) {
3001 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3002 					break;
3003 				udelay(1);
3004 			}
3005 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3006 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3007 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3008 		}
3009 
3010 		/* set the pointer to the MQD */
3011 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3012 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3013 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3014 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3015 		/* set MQD vmid to 0 */
3016 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3017 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3018 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3019 
3020 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3021 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3022 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3023 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3024 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3025 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3026 
3027 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3028 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3029 		mqd->queue_state.cp_hqd_pq_control &=
3030 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3031 
3032 		mqd->queue_state.cp_hqd_pq_control |=
3033 			drm_order(rdev->ring[idx].ring_size / 8);
3034 		mqd->queue_state.cp_hqd_pq_control |=
3035 			(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3036 #ifdef __BIG_ENDIAN
3037 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3038 #endif
3039 		mqd->queue_state.cp_hqd_pq_control &=
3040 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3041 		mqd->queue_state.cp_hqd_pq_control |=
3042 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3043 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3044 
3045 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3046 		if (i == 0)
3047 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3048 		else
3049 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3050 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3051 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3052 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3053 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3054 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3055 
3056 		/* set the wb address wether it's enabled or not */
3057 		if (i == 0)
3058 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3059 		else
3060 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3061 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3062 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3063 			upper_32_bits(wb_gpu_addr) & 0xffff;
3064 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3065 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3066 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3067 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3068 
3069 		/* enable the doorbell if requested */
3070 		if (use_doorbell) {
3071 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3072 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3073 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3074 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3075 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3076 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3077 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3078 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3079 
3080 		} else {
3081 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3082 		}
3083 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3084 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3085 
3086 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3087 		rdev->ring[idx].wptr = 0;
3088 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3089 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3090 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3091 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3092 
3093 		/* set the vmid for the queue */
3094 		mqd->queue_state.cp_hqd_vmid = 0;
3095 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3096 
3097 		/* activate the queue */
3098 		mqd->queue_state.cp_hqd_active = 1;
3099 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3100 
3101 		cik_srbm_select(rdev, 0, 0, 0, 0);
3102 
3103 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3104 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3105 
3106 		rdev->ring[idx].ready = true;
3107 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3108 		if (r)
3109 			rdev->ring[idx].ready = false;
3110 	}
3111 
3112 	return 0;
3113 }
3114 
3115 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3116 {
3117 	cik_cp_gfx_enable(rdev, enable);
3118 	cik_cp_compute_enable(rdev, enable);
3119 }
3120 
3121 static int cik_cp_load_microcode(struct radeon_device *rdev)
3122 {
3123 	int r;
3124 
3125 	r = cik_cp_gfx_load_microcode(rdev);
3126 	if (r)
3127 		return r;
3128 	r = cik_cp_compute_load_microcode(rdev);
3129 	if (r)
3130 		return r;
3131 
3132 	return 0;
3133 }
3134 
3135 static void cik_cp_fini(struct radeon_device *rdev)
3136 {
3137 	cik_cp_gfx_fini(rdev);
3138 	cik_cp_compute_fini(rdev);
3139 }
3140 
3141 static int cik_cp_resume(struct radeon_device *rdev)
3142 {
3143 	int r;
3144 
3145 	/* Reset all cp blocks */
3146 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3147 	RREG32(GRBM_SOFT_RESET);
3148 	mdelay(15);
3149 	WREG32(GRBM_SOFT_RESET, 0);
3150 	RREG32(GRBM_SOFT_RESET);
3151 
3152 	r = cik_cp_load_microcode(rdev);
3153 	if (r)
3154 		return r;
3155 
3156 	r = cik_cp_gfx_resume(rdev);
3157 	if (r)
3158 		return r;
3159 	r = cik_cp_compute_resume(rdev);
3160 	if (r)
3161 		return r;
3162 
3163 	return 0;
3164 }
3165 
3166 /*
3167  * sDMA - System DMA
3168  * Starting with CIK, the GPU has new asynchronous
3169  * DMA engines.  These engines are used for compute
3170  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3171  * and each one supports 1 ring buffer used for gfx
3172  * and 2 queues used for compute.
3173  *
3174  * The programming model is very similar to the CP
3175  * (ring buffer, IBs, etc.), but sDMA has it's own
3176  * packet format that is different from the PM4 format
3177  * used by the CP. sDMA supports copying data, writing
3178  * embedded data, solid fills, and a number of other
3179  * things.  It also has support for tiling/detiling of
3180  * buffers.
3181  */
3182 /**
3183  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3184  *
3185  * @rdev: radeon_device pointer
3186  * @ib: IB object to schedule
3187  *
3188  * Schedule an IB in the DMA ring (CIK).
3189  */
3190 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3191 			      struct radeon_ib *ib)
3192 {
3193 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3194 	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3195 
3196 	if (rdev->wb.enabled) {
3197 		u32 next_rptr = ring->wptr + 5;
3198 		while ((next_rptr & 7) != 4)
3199 			next_rptr++;
3200 		next_rptr += 4;
3201 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3202 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3203 		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3204 		radeon_ring_write(ring, 1); /* number of DWs to follow */
3205 		radeon_ring_write(ring, next_rptr);
3206 	}
3207 
3208 	/* IB packet must end on a 8 DW boundary */
3209 	while ((ring->wptr & 7) != 4)
3210 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3211 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3212 	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3213 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3214 	radeon_ring_write(ring, ib->length_dw);
3215 
3216 }
3217 
3218 /**
3219  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3220  *
3221  * @rdev: radeon_device pointer
3222  * @fence: radeon fence object
3223  *
3224  * Add a DMA fence packet to the ring to write
3225  * the fence seq number and DMA trap packet to generate
3226  * an interrupt if needed (CIK).
3227  */
3228 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3229 			      struct radeon_fence *fence)
3230 {
3231 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3232 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3233 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3234 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3235 	u32 ref_and_mask;
3236 
3237 	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3238 		ref_and_mask = SDMA0;
3239 	else
3240 		ref_and_mask = SDMA1;
3241 
3242 	/* write the fence */
3243 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3244 	radeon_ring_write(ring, addr & 0xffffffff);
3245 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3246 	radeon_ring_write(ring, fence->seq);
3247 	/* generate an interrupt */
3248 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3249 	/* flush HDP */
3250 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3251 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3252 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3253 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3254 	radeon_ring_write(ring, ref_and_mask); /* MASK */
3255 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3256 }
3257 
3258 /**
3259  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3260  *
3261  * @rdev: radeon_device pointer
3262  * @ring: radeon_ring structure holding ring information
3263  * @semaphore: radeon semaphore object
3264  * @emit_wait: wait or signal semaphore
3265  *
3266  * Add a DMA semaphore packet to the ring wait on or signal
3267  * other rings (CIK).
3268  */
3269 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3270 				  struct radeon_ring *ring,
3271 				  struct radeon_semaphore *semaphore,
3272 				  bool emit_wait)
3273 {
3274 	u64 addr = semaphore->gpu_addr;
3275 	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3276 
3277 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3278 	radeon_ring_write(ring, addr & 0xfffffff8);
3279 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3280 }
3281 
3282 /**
3283  * cik_sdma_gfx_stop - stop the gfx async dma engines
3284  *
3285  * @rdev: radeon_device pointer
3286  *
3287  * Stop the gfx async dma ring buffers (CIK).
3288  */
3289 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3290 {
3291 	u32 rb_cntl, reg_offset;
3292 	int i;
3293 
3294 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3295 
3296 	for (i = 0; i < 2; i++) {
3297 		if (i == 0)
3298 			reg_offset = SDMA0_REGISTER_OFFSET;
3299 		else
3300 			reg_offset = SDMA1_REGISTER_OFFSET;
3301 		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3302 		rb_cntl &= ~SDMA_RB_ENABLE;
3303 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3304 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3305 	}
3306 }
3307 
3308 /**
3309  * cik_sdma_rlc_stop - stop the compute async dma engines
3310  *
3311  * @rdev: radeon_device pointer
3312  *
3313  * Stop the compute async dma queues (CIK).
3314  */
3315 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3316 {
3317 	/* XXX todo */
3318 }
3319 
3320 /**
3321  * cik_sdma_enable - stop the async dma engines
3322  *
3323  * @rdev: radeon_device pointer
3324  * @enable: enable/disable the DMA MEs.
3325  *
3326  * Halt or unhalt the async dma engines (CIK).
3327  */
3328 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3329 {
3330 	u32 me_cntl, reg_offset;
3331 	int i;
3332 
3333 	for (i = 0; i < 2; i++) {
3334 		if (i == 0)
3335 			reg_offset = SDMA0_REGISTER_OFFSET;
3336 		else
3337 			reg_offset = SDMA1_REGISTER_OFFSET;
3338 		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3339 		if (enable)
3340 			me_cntl &= ~SDMA_HALT;
3341 		else
3342 			me_cntl |= SDMA_HALT;
3343 		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3344 	}
3345 }
3346 
3347 /**
3348  * cik_sdma_gfx_resume - setup and start the async dma engines
3349  *
3350  * @rdev: radeon_device pointer
3351  *
3352  * Set up the gfx DMA ring buffers and enable them (CIK).
3353  * Returns 0 for success, error for failure.
3354  */
3355 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3356 {
3357 	struct radeon_ring *ring;
3358 	u32 rb_cntl, ib_cntl;
3359 	u32 rb_bufsz;
3360 	u32 reg_offset, wb_offset;
3361 	int i, r;
3362 
3363 	for (i = 0; i < 2; i++) {
3364 		if (i == 0) {
3365 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3366 			reg_offset = SDMA0_REGISTER_OFFSET;
3367 			wb_offset = R600_WB_DMA_RPTR_OFFSET;
3368 		} else {
3369 			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3370 			reg_offset = SDMA1_REGISTER_OFFSET;
3371 			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3372 		}
3373 
3374 		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3375 		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3376 
3377 		/* Set ring buffer size in dwords */
3378 		rb_bufsz = drm_order(ring->ring_size / 4);
3379 		rb_cntl = rb_bufsz << 1;
3380 #ifdef __BIG_ENDIAN
3381 		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3382 #endif
3383 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3384 
3385 		/* Initialize the ring buffer's read and write pointers */
3386 		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3387 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3388 
3389 		/* set the wb address whether it's enabled or not */
3390 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3391 		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3392 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3393 		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3394 
3395 		if (rdev->wb.enabled)
3396 			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3397 
3398 		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3399 		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3400 
3401 		ring->wptr = 0;
3402 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3403 
3404 		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3405 
3406 		/* enable DMA RB */
3407 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3408 
3409 		ib_cntl = SDMA_IB_ENABLE;
3410 #ifdef __BIG_ENDIAN
3411 		ib_cntl |= SDMA_IB_SWAP_ENABLE;
3412 #endif
3413 		/* enable DMA IBs */
3414 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3415 
3416 		ring->ready = true;
3417 
3418 		r = radeon_ring_test(rdev, ring->idx, ring);
3419 		if (r) {
3420 			ring->ready = false;
3421 			return r;
3422 		}
3423 	}
3424 
3425 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3426 
3427 	return 0;
3428 }
3429 
3430 /**
3431  * cik_sdma_rlc_resume - setup and start the async dma engines
3432  *
3433  * @rdev: radeon_device pointer
3434  *
3435  * Set up the compute DMA queues and enable them (CIK).
3436  * Returns 0 for success, error for failure.
3437  */
3438 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3439 {
3440 	/* XXX todo */
3441 	return 0;
3442 }
3443 
3444 /**
3445  * cik_sdma_load_microcode - load the sDMA ME ucode
3446  *
3447  * @rdev: radeon_device pointer
3448  *
3449  * Loads the sDMA0/1 ucode.
3450  * Returns 0 for success, -EINVAL if the ucode is not available.
3451  */
3452 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3453 {
3454 	const __be32 *fw_data;
3455 	int i;
3456 
3457 	if (!rdev->sdma_fw)
3458 		return -EINVAL;
3459 
3460 	/* stop the gfx rings and rlc compute queues */
3461 	cik_sdma_gfx_stop(rdev);
3462 	cik_sdma_rlc_stop(rdev);
3463 
3464 	/* halt the MEs */
3465 	cik_sdma_enable(rdev, false);
3466 
3467 	/* sdma0 */
3468 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3469 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3470 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3471 		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3472 	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3473 
3474 	/* sdma1 */
3475 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3476 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3477 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3478 		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3479 	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3480 
3481 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3482 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3483 	return 0;
3484 }
3485 
3486 /**
3487  * cik_sdma_resume - setup and start the async dma engines
3488  *
3489  * @rdev: radeon_device pointer
3490  *
3491  * Set up the DMA engines and enable them (CIK).
3492  * Returns 0 for success, error for failure.
3493  */
3494 static int cik_sdma_resume(struct radeon_device *rdev)
3495 {
3496 	int r;
3497 
3498 	/* Reset dma */
3499 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3500 	RREG32(SRBM_SOFT_RESET);
3501 	udelay(50);
3502 	WREG32(SRBM_SOFT_RESET, 0);
3503 	RREG32(SRBM_SOFT_RESET);
3504 
3505 	r = cik_sdma_load_microcode(rdev);
3506 	if (r)
3507 		return r;
3508 
3509 	/* unhalt the MEs */
3510 	cik_sdma_enable(rdev, true);
3511 
3512 	/* start the gfx rings and rlc compute queues */
3513 	r = cik_sdma_gfx_resume(rdev);
3514 	if (r)
3515 		return r;
3516 	r = cik_sdma_rlc_resume(rdev);
3517 	if (r)
3518 		return r;
3519 
3520 	return 0;
3521 }
3522 
3523 /**
3524  * cik_sdma_fini - tear down the async dma engines
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Stop the async dma engines and free the rings (CIK).
3529  */
3530 static void cik_sdma_fini(struct radeon_device *rdev)
3531 {
3532 	/* stop the gfx rings and rlc compute queues */
3533 	cik_sdma_gfx_stop(rdev);
3534 	cik_sdma_rlc_stop(rdev);
3535 	/* halt the MEs */
3536 	cik_sdma_enable(rdev, false);
3537 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3538 	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3539 	/* XXX - compute dma queue tear down */
3540 }
3541 
3542 /**
3543  * cik_copy_dma - copy pages using the DMA engine
3544  *
3545  * @rdev: radeon_device pointer
3546  * @src_offset: src GPU address
3547  * @dst_offset: dst GPU address
3548  * @num_gpu_pages: number of GPU pages to xfer
3549  * @fence: radeon fence object
3550  *
3551  * Copy GPU paging using the DMA engine (CIK).
3552  * Used by the radeon ttm implementation to move pages if
3553  * registered as the asic copy callback.
3554  */
3555 int cik_copy_dma(struct radeon_device *rdev,
3556 		 uint64_t src_offset, uint64_t dst_offset,
3557 		 unsigned num_gpu_pages,
3558 		 struct radeon_fence **fence)
3559 {
3560 	struct radeon_semaphore *sem = NULL;
3561 	int ring_index = rdev->asic->copy.dma_ring_index;
3562 	struct radeon_ring *ring = &rdev->ring[ring_index];
3563 	u32 size_in_bytes, cur_size_in_bytes;
3564 	int i, num_loops;
3565 	int r = 0;
3566 
3567 	r = radeon_semaphore_create(rdev, &sem);
3568 	if (r) {
3569 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3570 		return r;
3571 	}
3572 
3573 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3574 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3575 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3576 	if (r) {
3577 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3578 		radeon_semaphore_free(rdev, &sem, NULL);
3579 		return r;
3580 	}
3581 
3582 	if (radeon_fence_need_sync(*fence, ring->idx)) {
3583 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3584 					    ring->idx);
3585 		radeon_fence_note_sync(*fence, ring->idx);
3586 	} else {
3587 		radeon_semaphore_free(rdev, &sem, NULL);
3588 	}
3589 
3590 	for (i = 0; i < num_loops; i++) {
3591 		cur_size_in_bytes = size_in_bytes;
3592 		if (cur_size_in_bytes > 0x1fffff)
3593 			cur_size_in_bytes = 0x1fffff;
3594 		size_in_bytes -= cur_size_in_bytes;
3595 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3596 		radeon_ring_write(ring, cur_size_in_bytes);
3597 		radeon_ring_write(ring, 0); /* src/dst endian swap */
3598 		radeon_ring_write(ring, src_offset & 0xffffffff);
3599 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3600 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
3601 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3602 		src_offset += cur_size_in_bytes;
3603 		dst_offset += cur_size_in_bytes;
3604 	}
3605 
3606 	r = radeon_fence_emit(rdev, fence, ring->idx);
3607 	if (r) {
3608 		radeon_ring_unlock_undo(rdev, ring);
3609 		return r;
3610 	}
3611 
3612 	radeon_ring_unlock_commit(rdev, ring);
3613 	radeon_semaphore_free(rdev, &sem, *fence);
3614 
3615 	return r;
3616 }
3617 
3618 /**
3619  * cik_sdma_ring_test - simple async dma engine test
3620  *
3621  * @rdev: radeon_device pointer
3622  * @ring: radeon_ring structure holding ring information
3623  *
3624  * Test the DMA engine by writing using it to write an
3625  * value to memory. (CIK).
3626  * Returns 0 for success, error for failure.
3627  */
3628 int cik_sdma_ring_test(struct radeon_device *rdev,
3629 		       struct radeon_ring *ring)
3630 {
3631 	unsigned i;
3632 	int r;
3633 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3634 	u32 tmp;
3635 
3636 	if (!ptr) {
3637 		DRM_ERROR("invalid vram scratch pointer\n");
3638 		return -EINVAL;
3639 	}
3640 
3641 	tmp = 0xCAFEDEAD;
3642 	writel(tmp, ptr);
3643 
3644 	r = radeon_ring_lock(rdev, ring, 4);
3645 	if (r) {
3646 		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3647 		return r;
3648 	}
3649 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3650 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3651 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3652 	radeon_ring_write(ring, 1); /* number of DWs to follow */
3653 	radeon_ring_write(ring, 0xDEADBEEF);
3654 	radeon_ring_unlock_commit(rdev, ring);
3655 
3656 	for (i = 0; i < rdev->usec_timeout; i++) {
3657 		tmp = readl(ptr);
3658 		if (tmp == 0xDEADBEEF)
3659 			break;
3660 		DRM_UDELAY(1);
3661 	}
3662 
3663 	if (i < rdev->usec_timeout) {
3664 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3665 	} else {
3666 		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3667 			  ring->idx, tmp);
3668 		r = -EINVAL;
3669 	}
3670 	return r;
3671 }
3672 
3673 /**
3674  * cik_sdma_ib_test - test an IB on the DMA engine
3675  *
3676  * @rdev: radeon_device pointer
3677  * @ring: radeon_ring structure holding ring information
3678  *
3679  * Test a simple IB in the DMA ring (CIK).
3680  * Returns 0 on success, error on failure.
3681  */
3682 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3683 {
3684 	struct radeon_ib ib;
3685 	unsigned i;
3686 	int r;
3687 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3688 	u32 tmp = 0;
3689 
3690 	if (!ptr) {
3691 		DRM_ERROR("invalid vram scratch pointer\n");
3692 		return -EINVAL;
3693 	}
3694 
3695 	tmp = 0xCAFEDEAD;
3696 	writel(tmp, ptr);
3697 
3698 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3699 	if (r) {
3700 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3701 		return r;
3702 	}
3703 
3704 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3705 	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3706 	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3707 	ib.ptr[3] = 1;
3708 	ib.ptr[4] = 0xDEADBEEF;
3709 	ib.length_dw = 5;
3710 
3711 	r = radeon_ib_schedule(rdev, &ib, NULL);
3712 	if (r) {
3713 		radeon_ib_free(rdev, &ib);
3714 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3715 		return r;
3716 	}
3717 	r = radeon_fence_wait(ib.fence, false);
3718 	if (r) {
3719 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3720 		return r;
3721 	}
3722 	for (i = 0; i < rdev->usec_timeout; i++) {
3723 		tmp = readl(ptr);
3724 		if (tmp == 0xDEADBEEF)
3725 			break;
3726 		DRM_UDELAY(1);
3727 	}
3728 	if (i < rdev->usec_timeout) {
3729 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3730 	} else {
3731 		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3732 		r = -EINVAL;
3733 	}
3734 	radeon_ib_free(rdev, &ib);
3735 	return r;
3736 }
3737 
3738 
3739 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3740 {
3741 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3742 		RREG32(GRBM_STATUS));
3743 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3744 		RREG32(GRBM_STATUS2));
3745 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3746 		RREG32(GRBM_STATUS_SE0));
3747 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3748 		RREG32(GRBM_STATUS_SE1));
3749 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3750 		RREG32(GRBM_STATUS_SE2));
3751 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3752 		RREG32(GRBM_STATUS_SE3));
3753 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3754 		RREG32(SRBM_STATUS));
3755 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3756 		RREG32(SRBM_STATUS2));
3757 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3758 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3759 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3760 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3761 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3762 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3763 		 RREG32(CP_STALLED_STAT1));
3764 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3765 		 RREG32(CP_STALLED_STAT2));
3766 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3767 		 RREG32(CP_STALLED_STAT3));
3768 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3769 		 RREG32(CP_CPF_BUSY_STAT));
3770 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3771 		 RREG32(CP_CPF_STALLED_STAT1));
3772 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3773 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3774 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3775 		 RREG32(CP_CPC_STALLED_STAT1));
3776 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3777 }
3778 
3779 /**
3780  * cik_gpu_check_soft_reset - check which blocks are busy
3781  *
3782  * @rdev: radeon_device pointer
3783  *
3784  * Check which blocks are busy and return the relevant reset
3785  * mask to be used by cik_gpu_soft_reset().
3786  * Returns a mask of the blocks to be reset.
3787  */
3788 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3789 {
3790 	u32 reset_mask = 0;
3791 	u32 tmp;
3792 
3793 	/* GRBM_STATUS */
3794 	tmp = RREG32(GRBM_STATUS);
3795 	if (tmp & (PA_BUSY | SC_BUSY |
3796 		   BCI_BUSY | SX_BUSY |
3797 		   TA_BUSY | VGT_BUSY |
3798 		   DB_BUSY | CB_BUSY |
3799 		   GDS_BUSY | SPI_BUSY |
3800 		   IA_BUSY | IA_BUSY_NO_DMA))
3801 		reset_mask |= RADEON_RESET_GFX;
3802 
3803 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3804 		reset_mask |= RADEON_RESET_CP;
3805 
3806 	/* GRBM_STATUS2 */
3807 	tmp = RREG32(GRBM_STATUS2);
3808 	if (tmp & RLC_BUSY)
3809 		reset_mask |= RADEON_RESET_RLC;
3810 
3811 	/* SDMA0_STATUS_REG */
3812 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3813 	if (!(tmp & SDMA_IDLE))
3814 		reset_mask |= RADEON_RESET_DMA;
3815 
3816 	/* SDMA1_STATUS_REG */
3817 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3818 	if (!(tmp & SDMA_IDLE))
3819 		reset_mask |= RADEON_RESET_DMA1;
3820 
3821 	/* SRBM_STATUS2 */
3822 	tmp = RREG32(SRBM_STATUS2);
3823 	if (tmp & SDMA_BUSY)
3824 		reset_mask |= RADEON_RESET_DMA;
3825 
3826 	if (tmp & SDMA1_BUSY)
3827 		reset_mask |= RADEON_RESET_DMA1;
3828 
3829 	/* SRBM_STATUS */
3830 	tmp = RREG32(SRBM_STATUS);
3831 
3832 	if (tmp & IH_BUSY)
3833 		reset_mask |= RADEON_RESET_IH;
3834 
3835 	if (tmp & SEM_BUSY)
3836 		reset_mask |= RADEON_RESET_SEM;
3837 
3838 	if (tmp & GRBM_RQ_PENDING)
3839 		reset_mask |= RADEON_RESET_GRBM;
3840 
3841 	if (tmp & VMC_BUSY)
3842 		reset_mask |= RADEON_RESET_VMC;
3843 
3844 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845 		   MCC_BUSY | MCD_BUSY))
3846 		reset_mask |= RADEON_RESET_MC;
3847 
3848 	if (evergreen_is_display_hung(rdev))
3849 		reset_mask |= RADEON_RESET_DISPLAY;
3850 
3851 	/* Skip MC reset as it's mostly likely not hung, just busy */
3852 	if (reset_mask & RADEON_RESET_MC) {
3853 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3854 		reset_mask &= ~RADEON_RESET_MC;
3855 	}
3856 
3857 	return reset_mask;
3858 }
3859 
3860 /**
3861  * cik_gpu_soft_reset - soft reset GPU
3862  *
3863  * @rdev: radeon_device pointer
3864  * @reset_mask: mask of which blocks to reset
3865  *
3866  * Soft reset the blocks specified in @reset_mask.
3867  */
3868 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3869 {
3870 	struct evergreen_mc_save save;
3871 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3872 	u32 tmp;
3873 
3874 	if (reset_mask == 0)
3875 		return;
3876 
3877 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3878 
3879 	cik_print_gpu_status_regs(rdev);
3880 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3881 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3882 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3883 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3884 
3885 	/* stop the rlc */
3886 	cik_rlc_stop(rdev);
3887 
3888 	/* Disable GFX parsing/prefetching */
3889 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3890 
3891 	/* Disable MEC parsing/prefetching */
3892 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3893 
3894 	if (reset_mask & RADEON_RESET_DMA) {
3895 		/* sdma0 */
3896 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3897 		tmp |= SDMA_HALT;
3898 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3899 	}
3900 	if (reset_mask & RADEON_RESET_DMA1) {
3901 		/* sdma1 */
3902 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3903 		tmp |= SDMA_HALT;
3904 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3905 	}
3906 
3907 	evergreen_mc_stop(rdev, &save);
3908 	if (evergreen_mc_wait_for_idle(rdev)) {
3909 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910 	}
3911 
3912 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3913 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3914 
3915 	if (reset_mask & RADEON_RESET_CP) {
3916 		grbm_soft_reset |= SOFT_RESET_CP;
3917 
3918 		srbm_soft_reset |= SOFT_RESET_GRBM;
3919 	}
3920 
3921 	if (reset_mask & RADEON_RESET_DMA)
3922 		srbm_soft_reset |= SOFT_RESET_SDMA;
3923 
3924 	if (reset_mask & RADEON_RESET_DMA1)
3925 		srbm_soft_reset |= SOFT_RESET_SDMA1;
3926 
3927 	if (reset_mask & RADEON_RESET_DISPLAY)
3928 		srbm_soft_reset |= SOFT_RESET_DC;
3929 
3930 	if (reset_mask & RADEON_RESET_RLC)
3931 		grbm_soft_reset |= SOFT_RESET_RLC;
3932 
3933 	if (reset_mask & RADEON_RESET_SEM)
3934 		srbm_soft_reset |= SOFT_RESET_SEM;
3935 
3936 	if (reset_mask & RADEON_RESET_IH)
3937 		srbm_soft_reset |= SOFT_RESET_IH;
3938 
3939 	if (reset_mask & RADEON_RESET_GRBM)
3940 		srbm_soft_reset |= SOFT_RESET_GRBM;
3941 
3942 	if (reset_mask & RADEON_RESET_VMC)
3943 		srbm_soft_reset |= SOFT_RESET_VMC;
3944 
3945 	if (!(rdev->flags & RADEON_IS_IGP)) {
3946 		if (reset_mask & RADEON_RESET_MC)
3947 			srbm_soft_reset |= SOFT_RESET_MC;
3948 	}
3949 
3950 	if (grbm_soft_reset) {
3951 		tmp = RREG32(GRBM_SOFT_RESET);
3952 		tmp |= grbm_soft_reset;
3953 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954 		WREG32(GRBM_SOFT_RESET, tmp);
3955 		tmp = RREG32(GRBM_SOFT_RESET);
3956 
3957 		udelay(50);
3958 
3959 		tmp &= ~grbm_soft_reset;
3960 		WREG32(GRBM_SOFT_RESET, tmp);
3961 		tmp = RREG32(GRBM_SOFT_RESET);
3962 	}
3963 
3964 	if (srbm_soft_reset) {
3965 		tmp = RREG32(SRBM_SOFT_RESET);
3966 		tmp |= srbm_soft_reset;
3967 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968 		WREG32(SRBM_SOFT_RESET, tmp);
3969 		tmp = RREG32(SRBM_SOFT_RESET);
3970 
3971 		udelay(50);
3972 
3973 		tmp &= ~srbm_soft_reset;
3974 		WREG32(SRBM_SOFT_RESET, tmp);
3975 		tmp = RREG32(SRBM_SOFT_RESET);
3976 	}
3977 
3978 	/* Wait a little for things to settle down */
3979 	udelay(50);
3980 
3981 	evergreen_mc_resume(rdev, &save);
3982 	udelay(50);
3983 
3984 	cik_print_gpu_status_regs(rdev);
3985 }
3986 
3987 /**
3988  * cik_asic_reset - soft reset GPU
3989  *
3990  * @rdev: radeon_device pointer
3991  *
3992  * Look up which blocks are hung and attempt
3993  * to reset them.
3994  * Returns 0 for success.
3995  */
3996 int cik_asic_reset(struct radeon_device *rdev)
3997 {
3998 	u32 reset_mask;
3999 
4000 	reset_mask = cik_gpu_check_soft_reset(rdev);
4001 
4002 	if (reset_mask)
4003 		r600_set_bios_scratch_engine_hung(rdev, true);
4004 
4005 	cik_gpu_soft_reset(rdev, reset_mask);
4006 
4007 	reset_mask = cik_gpu_check_soft_reset(rdev);
4008 
4009 	if (!reset_mask)
4010 		r600_set_bios_scratch_engine_hung(rdev, false);
4011 
4012 	return 0;
4013 }
4014 
4015 /**
4016  * cik_gfx_is_lockup - check if the 3D engine is locked up
4017  *
4018  * @rdev: radeon_device pointer
4019  * @ring: radeon_ring structure holding ring information
4020  *
4021  * Check if the 3D engine is locked up (CIK).
4022  * Returns true if the engine is locked, false if not.
4023  */
4024 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4025 {
4026 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4027 
4028 	if (!(reset_mask & (RADEON_RESET_GFX |
4029 			    RADEON_RESET_COMPUTE |
4030 			    RADEON_RESET_CP))) {
4031 		radeon_ring_lockup_update(ring);
4032 		return false;
4033 	}
4034 	/* force CP activities */
4035 	radeon_ring_force_activity(rdev, ring);
4036 	return radeon_ring_test_lockup(rdev, ring);
4037 }
4038 
4039 /**
4040  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4041  *
4042  * @rdev: radeon_device pointer
4043  * @ring: radeon_ring structure holding ring information
4044  *
4045  * Check if the async DMA engine is locked up (CIK).
4046  * Returns true if the engine appears to be locked up, false if not.
4047  */
4048 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4049 {
4050 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4051 	u32 mask;
4052 
4053 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4054 		mask = RADEON_RESET_DMA;
4055 	else
4056 		mask = RADEON_RESET_DMA1;
4057 
4058 	if (!(reset_mask & mask)) {
4059 		radeon_ring_lockup_update(ring);
4060 		return false;
4061 	}
4062 	/* force ring activities */
4063 	radeon_ring_force_activity(rdev, ring);
4064 	return radeon_ring_test_lockup(rdev, ring);
4065 }
4066 
4067 /* MC */
4068 /**
4069  * cik_mc_program - program the GPU memory controller
4070  *
4071  * @rdev: radeon_device pointer
4072  *
4073  * Set the location of vram, gart, and AGP in the GPU's
4074  * physical address space (CIK).
4075  */
4076 static void cik_mc_program(struct radeon_device *rdev)
4077 {
4078 	struct evergreen_mc_save save;
4079 	u32 tmp;
4080 	int i, j;
4081 
4082 	/* Initialize HDP */
4083 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4084 		WREG32((0x2c14 + j), 0x00000000);
4085 		WREG32((0x2c18 + j), 0x00000000);
4086 		WREG32((0x2c1c + j), 0x00000000);
4087 		WREG32((0x2c20 + j), 0x00000000);
4088 		WREG32((0x2c24 + j), 0x00000000);
4089 	}
4090 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4091 
4092 	evergreen_mc_stop(rdev, &save);
4093 	if (radeon_mc_wait_for_idle(rdev)) {
4094 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4095 	}
4096 	/* Lockout access through VGA aperture*/
4097 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4098 	/* Update configuration */
4099 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4100 	       rdev->mc.vram_start >> 12);
4101 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4102 	       rdev->mc.vram_end >> 12);
4103 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4104 	       rdev->vram_scratch.gpu_addr >> 12);
4105 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4106 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4107 	WREG32(MC_VM_FB_LOCATION, tmp);
4108 	/* XXX double check these! */
4109 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4110 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4111 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4112 	WREG32(MC_VM_AGP_BASE, 0);
4113 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4114 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4115 	if (radeon_mc_wait_for_idle(rdev)) {
4116 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117 	}
4118 	evergreen_mc_resume(rdev, &save);
4119 	/* we need to own VRAM, so turn off the VGA renderer here
4120 	 * to stop it overwriting our objects */
4121 	rv515_vga_render_disable(rdev);
4122 }
4123 
4124 /**
4125  * cik_mc_init - initialize the memory controller driver params
4126  *
4127  * @rdev: radeon_device pointer
4128  *
4129  * Look up the amount of vram, vram width, and decide how to place
4130  * vram and gart within the GPU's physical address space (CIK).
4131  * Returns 0 for success.
4132  */
4133 static int cik_mc_init(struct radeon_device *rdev)
4134 {
4135 	u32 tmp;
4136 	int chansize, numchan;
4137 
4138 	/* Get VRAM informations */
4139 	rdev->mc.vram_is_ddr = true;
4140 	tmp = RREG32(MC_ARB_RAMCFG);
4141 	if (tmp & CHANSIZE_MASK) {
4142 		chansize = 64;
4143 	} else {
4144 		chansize = 32;
4145 	}
4146 	tmp = RREG32(MC_SHARED_CHMAP);
4147 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4148 	case 0:
4149 	default:
4150 		numchan = 1;
4151 		break;
4152 	case 1:
4153 		numchan = 2;
4154 		break;
4155 	case 2:
4156 		numchan = 4;
4157 		break;
4158 	case 3:
4159 		numchan = 8;
4160 		break;
4161 	case 4:
4162 		numchan = 3;
4163 		break;
4164 	case 5:
4165 		numchan = 6;
4166 		break;
4167 	case 6:
4168 		numchan = 10;
4169 		break;
4170 	case 7:
4171 		numchan = 12;
4172 		break;
4173 	case 8:
4174 		numchan = 16;
4175 		break;
4176 	}
4177 	rdev->mc.vram_width = numchan * chansize;
4178 	/* Could aper size report 0 ? */
4179 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4180 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4181 	/* size in MB on si */
4182 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4183 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4184 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4185 	si_vram_gtt_location(rdev, &rdev->mc);
4186 	radeon_update_bandwidth_info(rdev);
4187 
4188 	return 0;
4189 }
4190 
4191 /*
4192  * GART
4193  * VMID 0 is the physical GPU addresses as used by the kernel.
4194  * VMIDs 1-15 are used for userspace clients and are handled
4195  * by the radeon vm/hsa code.
4196  */
4197 /**
4198  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4199  *
4200  * @rdev: radeon_device pointer
4201  *
4202  * Flush the TLB for the VMID 0 page table (CIK).
4203  */
4204 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4205 {
4206 	/* flush hdp cache */
4207 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4208 
4209 	/* bits 0-15 are the VM contexts0-15 */
4210 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4211 }
4212 
4213 /**
4214  * cik_pcie_gart_enable - gart enable
4215  *
4216  * @rdev: radeon_device pointer
4217  *
4218  * This sets up the TLBs, programs the page tables for VMID0,
4219  * sets up the hw for VMIDs 1-15 which are allocated on
4220  * demand, and sets up the global locations for the LDS, GDS,
4221  * and GPUVM for FSA64 clients (CIK).
4222  * Returns 0 for success, errors for failure.
4223  */
4224 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4225 {
4226 	int r, i;
4227 
4228 	if (rdev->gart.robj == NULL) {
4229 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4230 		return -EINVAL;
4231 	}
4232 	r = radeon_gart_table_vram_pin(rdev);
4233 	if (r)
4234 		return r;
4235 	radeon_gart_restore(rdev);
4236 	/* Setup TLB control */
4237 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4238 	       (0xA << 7) |
4239 	       ENABLE_L1_TLB |
4240 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4241 	       ENABLE_ADVANCED_DRIVER_MODEL |
4242 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4243 	/* Setup L2 cache */
4244 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4245 	       ENABLE_L2_FRAGMENT_PROCESSING |
4246 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4247 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4248 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4249 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4250 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4251 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4252 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4253 	/* setup context0 */
4254 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4255 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4256 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4257 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4258 			(u32)(rdev->dummy_page.addr >> 12));
4259 	WREG32(VM_CONTEXT0_CNTL2, 0);
4260 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4261 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4262 
4263 	WREG32(0x15D4, 0);
4264 	WREG32(0x15D8, 0);
4265 	WREG32(0x15DC, 0);
4266 
4267 	/* empty context1-15 */
4268 	/* FIXME start with 4G, once using 2 level pt switch to full
4269 	 * vm size space
4270 	 */
4271 	/* set vm size, must be a multiple of 4 */
4272 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4273 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4274 	for (i = 1; i < 16; i++) {
4275 		if (i < 8)
4276 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4277 			       rdev->gart.table_addr >> 12);
4278 		else
4279 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4280 			       rdev->gart.table_addr >> 12);
4281 	}
4282 
4283 	/* enable context1-15 */
4284 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4285 	       (u32)(rdev->dummy_page.addr >> 12));
4286 	WREG32(VM_CONTEXT1_CNTL2, 4);
4287 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4288 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4290 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4294 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4296 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4300 
4301 	/* TC cache setup ??? */
4302 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4303 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4304 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4305 
4306 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4307 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4308 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4309 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4310 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4311 
4312 	WREG32(TC_CFG_L1_VOLATILE, 0);
4313 	WREG32(TC_CFG_L2_VOLATILE, 0);
4314 
4315 	if (rdev->family == CHIP_KAVERI) {
4316 		u32 tmp = RREG32(CHUB_CONTROL);
4317 		tmp &= ~BYPASS_VM;
4318 		WREG32(CHUB_CONTROL, tmp);
4319 	}
4320 
4321 	/* XXX SH_MEM regs */
4322 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4323 	for (i = 0; i < 16; i++) {
4324 		cik_srbm_select(rdev, 0, 0, 0, i);
4325 		/* CP and shaders */
4326 		WREG32(SH_MEM_CONFIG, 0);
4327 		WREG32(SH_MEM_APE1_BASE, 1);
4328 		WREG32(SH_MEM_APE1_LIMIT, 0);
4329 		WREG32(SH_MEM_BASES, 0);
4330 		/* SDMA GFX */
4331 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4332 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4333 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4334 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4335 		/* XXX SDMA RLC - todo */
4336 	}
4337 	cik_srbm_select(rdev, 0, 0, 0, 0);
4338 
4339 	cik_pcie_gart_tlb_flush(rdev);
4340 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4341 		 (unsigned)(rdev->mc.gtt_size >> 20),
4342 		 (unsigned long long)rdev->gart.table_addr);
4343 	rdev->gart.ready = true;
4344 	return 0;
4345 }
4346 
4347 /**
4348  * cik_pcie_gart_disable - gart disable
4349  *
4350  * @rdev: radeon_device pointer
4351  *
4352  * This disables all VM page table (CIK).
4353  */
4354 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4355 {
4356 	/* Disable all tables */
4357 	WREG32(VM_CONTEXT0_CNTL, 0);
4358 	WREG32(VM_CONTEXT1_CNTL, 0);
4359 	/* Setup TLB control */
4360 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4361 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4362 	/* Setup L2 cache */
4363 	WREG32(VM_L2_CNTL,
4364 	       ENABLE_L2_FRAGMENT_PROCESSING |
4365 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4366 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4367 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4368 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369 	WREG32(VM_L2_CNTL2, 0);
4370 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4371 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4372 	radeon_gart_table_vram_unpin(rdev);
4373 }
4374 
4375 /**
4376  * cik_pcie_gart_fini - vm fini callback
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Tears down the driver GART/VM setup (CIK).
4381  */
4382 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4383 {
4384 	cik_pcie_gart_disable(rdev);
4385 	radeon_gart_table_vram_free(rdev);
4386 	radeon_gart_fini(rdev);
4387 }
4388 
4389 /* vm parser */
4390 /**
4391  * cik_ib_parse - vm ib_parse callback
4392  *
4393  * @rdev: radeon_device pointer
4394  * @ib: indirect buffer pointer
4395  *
4396  * CIK uses hw IB checking so this is a nop (CIK).
4397  */
4398 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4399 {
4400 	return 0;
4401 }
4402 
4403 /*
4404  * vm
4405  * VMID 0 is the physical GPU addresses as used by the kernel.
4406  * VMIDs 1-15 are used for userspace clients and are handled
4407  * by the radeon vm/hsa code.
4408  */
4409 /**
4410  * cik_vm_init - cik vm init callback
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Inits cik specific vm parameters (number of VMs, base of vram for
4415  * VMIDs 1-15) (CIK).
4416  * Returns 0 for success.
4417  */
4418 int cik_vm_init(struct radeon_device *rdev)
4419 {
4420 	/* number of VMs */
4421 	rdev->vm_manager.nvm = 16;
4422 	/* base offset of vram pages */
4423 	if (rdev->flags & RADEON_IS_IGP) {
4424 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4425 		tmp <<= 22;
4426 		rdev->vm_manager.vram_base_offset = tmp;
4427 	} else
4428 		rdev->vm_manager.vram_base_offset = 0;
4429 
4430 	return 0;
4431 }
4432 
4433 /**
4434  * cik_vm_fini - cik vm fini callback
4435  *
4436  * @rdev: radeon_device pointer
4437  *
4438  * Tear down any asic specific VM setup (CIK).
4439  */
4440 void cik_vm_fini(struct radeon_device *rdev)
4441 {
4442 }
4443 
4444 /**
4445  * cik_vm_decode_fault - print human readable fault info
4446  *
4447  * @rdev: radeon_device pointer
4448  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4449  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4450  *
4451  * Print human readable fault information (CIK).
4452  */
4453 static void cik_vm_decode_fault(struct radeon_device *rdev,
4454 				u32 status, u32 addr, u32 mc_client)
4455 {
4456 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4457 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4458 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4459 	char *block = (char *)&mc_client;
4460 
4461 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4462 	       protections, vmid, addr,
4463 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4464 	       block, mc_id);
4465 }
4466 
4467 /**
4468  * cik_vm_flush - cik vm flush using the CP
4469  *
4470  * @rdev: radeon_device pointer
4471  *
4472  * Update the page table base and flush the VM TLB
4473  * using the CP (CIK).
4474  */
4475 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4476 {
4477 	struct radeon_ring *ring = &rdev->ring[ridx];
4478 
4479 	if (vm == NULL)
4480 		return;
4481 
4482 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4483 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4484 				 WRITE_DATA_DST_SEL(0)));
4485 	if (vm->id < 8) {
4486 		radeon_ring_write(ring,
4487 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4488 	} else {
4489 		radeon_ring_write(ring,
4490 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4491 	}
4492 	radeon_ring_write(ring, 0);
4493 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4494 
4495 	/* update SH_MEM_* regs */
4496 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4497 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4498 				 WRITE_DATA_DST_SEL(0)));
4499 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4500 	radeon_ring_write(ring, 0);
4501 	radeon_ring_write(ring, VMID(vm->id));
4502 
4503 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4504 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4505 				 WRITE_DATA_DST_SEL(0)));
4506 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4507 	radeon_ring_write(ring, 0);
4508 
4509 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4510 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4511 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4512 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4513 
4514 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516 				 WRITE_DATA_DST_SEL(0)));
4517 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4518 	radeon_ring_write(ring, 0);
4519 	radeon_ring_write(ring, VMID(0));
4520 
4521 	/* HDP flush */
4522 	/* We should be using the WAIT_REG_MEM packet here like in
4523 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4524 	 * context...
4525 	 */
4526 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4527 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4528 				 WRITE_DATA_DST_SEL(0)));
4529 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4530 	radeon_ring_write(ring, 0);
4531 	radeon_ring_write(ring, 0);
4532 
4533 	/* bits 0-15 are the VM contexts0-15 */
4534 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4535 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4536 				 WRITE_DATA_DST_SEL(0)));
4537 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4538 	radeon_ring_write(ring, 0);
4539 	radeon_ring_write(ring, 1 << vm->id);
4540 
4541 	/* compute doesn't have PFP */
4542 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4543 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4544 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4545 		radeon_ring_write(ring, 0x0);
4546 	}
4547 }
4548 
4549 /**
4550  * cik_vm_set_page - update the page tables using sDMA
4551  *
4552  * @rdev: radeon_device pointer
4553  * @ib: indirect buffer to fill with commands
4554  * @pe: addr of the page entry
4555  * @addr: dst addr to write into pe
4556  * @count: number of page entries to update
4557  * @incr: increase next addr by incr bytes
4558  * @flags: access flags
4559  *
4560  * Update the page tables using CP or sDMA (CIK).
4561  */
4562 void cik_vm_set_page(struct radeon_device *rdev,
4563 		     struct radeon_ib *ib,
4564 		     uint64_t pe,
4565 		     uint64_t addr, unsigned count,
4566 		     uint32_t incr, uint32_t flags)
4567 {
4568 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4569 	uint64_t value;
4570 	unsigned ndw;
4571 
4572 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4573 		/* CP */
4574 		while (count) {
4575 			ndw = 2 + count * 2;
4576 			if (ndw > 0x3FFE)
4577 				ndw = 0x3FFE;
4578 
4579 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4580 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4581 						    WRITE_DATA_DST_SEL(1));
4582 			ib->ptr[ib->length_dw++] = pe;
4583 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4584 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4585 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4586 					value = radeon_vm_map_gart(rdev, addr);
4587 					value &= 0xFFFFFFFFFFFFF000ULL;
4588 				} else if (flags & RADEON_VM_PAGE_VALID) {
4589 					value = addr;
4590 				} else {
4591 					value = 0;
4592 				}
4593 				addr += incr;
4594 				value |= r600_flags;
4595 				ib->ptr[ib->length_dw++] = value;
4596 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4597 			}
4598 		}
4599 	} else {
4600 		/* DMA */
4601 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4602 			while (count) {
4603 				ndw = count * 2;
4604 				if (ndw > 0xFFFFE)
4605 					ndw = 0xFFFFE;
4606 
4607 				/* for non-physically contiguous pages (system) */
4608 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4609 				ib->ptr[ib->length_dw++] = pe;
4610 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4611 				ib->ptr[ib->length_dw++] = ndw;
4612 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4613 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4614 						value = radeon_vm_map_gart(rdev, addr);
4615 						value &= 0xFFFFFFFFFFFFF000ULL;
4616 					} else if (flags & RADEON_VM_PAGE_VALID) {
4617 						value = addr;
4618 					} else {
4619 						value = 0;
4620 					}
4621 					addr += incr;
4622 					value |= r600_flags;
4623 					ib->ptr[ib->length_dw++] = value;
4624 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4625 				}
4626 			}
4627 		} else {
4628 			while (count) {
4629 				ndw = count;
4630 				if (ndw > 0x7FFFF)
4631 					ndw = 0x7FFFF;
4632 
4633 				if (flags & RADEON_VM_PAGE_VALID)
4634 					value = addr;
4635 				else
4636 					value = 0;
4637 				/* for physically contiguous pages (vram) */
4638 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4639 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4640 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4641 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4642 				ib->ptr[ib->length_dw++] = 0;
4643 				ib->ptr[ib->length_dw++] = value; /* value */
4644 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4645 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4646 				ib->ptr[ib->length_dw++] = 0;
4647 				ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4648 				pe += ndw * 8;
4649 				addr += ndw * incr;
4650 				count -= ndw;
4651 			}
4652 		}
4653 		while (ib->length_dw & 0x7)
4654 			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4655 	}
4656 }
4657 
4658 /**
4659  * cik_dma_vm_flush - cik vm flush using sDMA
4660  *
4661  * @rdev: radeon_device pointer
4662  *
4663  * Update the page table base and flush the VM TLB
4664  * using sDMA (CIK).
4665  */
4666 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4667 {
4668 	struct radeon_ring *ring = &rdev->ring[ridx];
4669 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4670 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4671 	u32 ref_and_mask;
4672 
4673 	if (vm == NULL)
4674 		return;
4675 
4676 	if (ridx == R600_RING_TYPE_DMA_INDEX)
4677 		ref_and_mask = SDMA0;
4678 	else
4679 		ref_and_mask = SDMA1;
4680 
4681 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4682 	if (vm->id < 8) {
4683 		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4684 	} else {
4685 		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4686 	}
4687 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4688 
4689 	/* update SH_MEM_* regs */
4690 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4692 	radeon_ring_write(ring, VMID(vm->id));
4693 
4694 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4696 	radeon_ring_write(ring, 0);
4697 
4698 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4700 	radeon_ring_write(ring, 0);
4701 
4702 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703 	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4704 	radeon_ring_write(ring, 1);
4705 
4706 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707 	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4708 	radeon_ring_write(ring, 0);
4709 
4710 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4711 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4712 	radeon_ring_write(ring, VMID(0));
4713 
4714 	/* flush HDP */
4715 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4716 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4717 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4718 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4719 	radeon_ring_write(ring, ref_and_mask); /* MASK */
4720 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4721 
4722 	/* flush TLB */
4723 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4724 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4725 	radeon_ring_write(ring, 1 << vm->id);
4726 }
4727 
4728 /*
4729  * RLC
4730  * The RLC is a multi-purpose microengine that handles a
4731  * variety of functions, the most important of which is
4732  * the interrupt controller.
4733  */
4734 /**
4735  * cik_rlc_stop - stop the RLC ME
4736  *
4737  * @rdev: radeon_device pointer
4738  *
4739  * Halt the RLC ME (MicroEngine) (CIK).
4740  */
4741 static void cik_rlc_stop(struct radeon_device *rdev)
4742 {
4743 	int i, j, k;
4744 	u32 mask, tmp;
4745 
4746 	tmp = RREG32(CP_INT_CNTL_RING0);
4747 	tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4748 	WREG32(CP_INT_CNTL_RING0, tmp);
4749 
4750 	RREG32(CB_CGTT_SCLK_CTRL);
4751 	RREG32(CB_CGTT_SCLK_CTRL);
4752 	RREG32(CB_CGTT_SCLK_CTRL);
4753 	RREG32(CB_CGTT_SCLK_CTRL);
4754 
4755 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4756 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4757 
4758 	WREG32(RLC_CNTL, 0);
4759 
4760 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4761 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4762 			cik_select_se_sh(rdev, i, j);
4763 			for (k = 0; k < rdev->usec_timeout; k++) {
4764 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4765 					break;
4766 				udelay(1);
4767 			}
4768 		}
4769 	}
4770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4771 
4772 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4773 	for (k = 0; k < rdev->usec_timeout; k++) {
4774 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4775 			break;
4776 		udelay(1);
4777 	}
4778 }
4779 
4780 /**
4781  * cik_rlc_start - start the RLC ME
4782  *
4783  * @rdev: radeon_device pointer
4784  *
4785  * Unhalt the RLC ME (MicroEngine) (CIK).
4786  */
4787 static void cik_rlc_start(struct radeon_device *rdev)
4788 {
4789 	u32 tmp;
4790 
4791 	WREG32(RLC_CNTL, RLC_ENABLE);
4792 
4793 	tmp = RREG32(CP_INT_CNTL_RING0);
4794 	tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4795 	WREG32(CP_INT_CNTL_RING0, tmp);
4796 
4797 	udelay(50);
4798 }
4799 
4800 /**
4801  * cik_rlc_resume - setup the RLC hw
4802  *
4803  * @rdev: radeon_device pointer
4804  *
4805  * Initialize the RLC registers, load the ucode,
4806  * and start the RLC (CIK).
4807  * Returns 0 for success, -EINVAL if the ucode is not available.
4808  */
4809 static int cik_rlc_resume(struct radeon_device *rdev)
4810 {
4811 	u32 i, size;
4812 	u32 clear_state_info[3];
4813 	const __be32 *fw_data;
4814 
4815 	if (!rdev->rlc_fw)
4816 		return -EINVAL;
4817 
4818 	switch (rdev->family) {
4819 	case CHIP_BONAIRE:
4820 	default:
4821 		size = BONAIRE_RLC_UCODE_SIZE;
4822 		break;
4823 	case CHIP_KAVERI:
4824 		size = KV_RLC_UCODE_SIZE;
4825 		break;
4826 	case CHIP_KABINI:
4827 		size = KB_RLC_UCODE_SIZE;
4828 		break;
4829 	}
4830 
4831 	cik_rlc_stop(rdev);
4832 
4833 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4834 	RREG32(GRBM_SOFT_RESET);
4835 	udelay(50);
4836 	WREG32(GRBM_SOFT_RESET, 0);
4837 	RREG32(GRBM_SOFT_RESET);
4838 	udelay(50);
4839 
4840 	WREG32(RLC_LB_CNTR_INIT, 0);
4841 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4842 
4843 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4844 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4845 	WREG32(RLC_LB_PARAMS, 0x00600408);
4846 	WREG32(RLC_LB_CNTL, 0x80000004);
4847 
4848 	WREG32(RLC_MC_CNTL, 0);
4849 	WREG32(RLC_UCODE_CNTL, 0);
4850 
4851 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4852 		WREG32(RLC_GPM_UCODE_ADDR, 0);
4853 	for (i = 0; i < size; i++)
4854 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4855 	WREG32(RLC_GPM_UCODE_ADDR, 0);
4856 
4857 	/* XXX */
4858 	clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4859 	clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4860 	clear_state_info[2] = 0;//cik_default_size;
4861 	WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4862 	for (i = 0; i < 3; i++)
4863 		WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4864 	WREG32(RLC_DRIVER_DMA_STATUS, 0);
4865 
4866 	cik_rlc_start(rdev);
4867 
4868 	return 0;
4869 }
4870 
4871 /*
4872  * Interrupts
4873  * Starting with r6xx, interrupts are handled via a ring buffer.
4874  * Ring buffers are areas of GPU accessible memory that the GPU
4875  * writes interrupt vectors into and the host reads vectors out of.
4876  * There is a rptr (read pointer) that determines where the
4877  * host is currently reading, and a wptr (write pointer)
4878  * which determines where the GPU has written.  When the
4879  * pointers are equal, the ring is idle.  When the GPU
4880  * writes vectors to the ring buffer, it increments the
4881  * wptr.  When there is an interrupt, the host then starts
4882  * fetching commands and processing them until the pointers are
4883  * equal again at which point it updates the rptr.
4884  */
4885 
4886 /**
4887  * cik_enable_interrupts - Enable the interrupt ring buffer
4888  *
4889  * @rdev: radeon_device pointer
4890  *
4891  * Enable the interrupt ring buffer (CIK).
4892  */
4893 static void cik_enable_interrupts(struct radeon_device *rdev)
4894 {
4895 	u32 ih_cntl = RREG32(IH_CNTL);
4896 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4897 
4898 	ih_cntl |= ENABLE_INTR;
4899 	ih_rb_cntl |= IH_RB_ENABLE;
4900 	WREG32(IH_CNTL, ih_cntl);
4901 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4902 	rdev->ih.enabled = true;
4903 }
4904 
4905 /**
4906  * cik_disable_interrupts - Disable the interrupt ring buffer
4907  *
4908  * @rdev: radeon_device pointer
4909  *
4910  * Disable the interrupt ring buffer (CIK).
4911  */
4912 static void cik_disable_interrupts(struct radeon_device *rdev)
4913 {
4914 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4915 	u32 ih_cntl = RREG32(IH_CNTL);
4916 
4917 	ih_rb_cntl &= ~IH_RB_ENABLE;
4918 	ih_cntl &= ~ENABLE_INTR;
4919 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4920 	WREG32(IH_CNTL, ih_cntl);
4921 	/* set rptr, wptr to 0 */
4922 	WREG32(IH_RB_RPTR, 0);
4923 	WREG32(IH_RB_WPTR, 0);
4924 	rdev->ih.enabled = false;
4925 	rdev->ih.rptr = 0;
4926 }
4927 
4928 /**
4929  * cik_disable_interrupt_state - Disable all interrupt sources
4930  *
4931  * @rdev: radeon_device pointer
4932  *
4933  * Clear all interrupt enable bits used by the driver (CIK).
4934  */
4935 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4936 {
4937 	u32 tmp;
4938 
4939 	/* gfx ring */
4940 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4941 	/* sdma */
4942 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4943 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4944 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4945 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4946 	/* compute queues */
4947 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4948 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4949 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4950 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4951 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4952 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4953 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4954 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4955 	/* grbm */
4956 	WREG32(GRBM_INT_CNTL, 0);
4957 	/* vline/vblank, etc. */
4958 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4959 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4960 	if (rdev->num_crtc >= 4) {
4961 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4962 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4963 	}
4964 	if (rdev->num_crtc >= 6) {
4965 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4966 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4967 	}
4968 
4969 	/* dac hotplug */
4970 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4971 
4972 	/* digital hotplug */
4973 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4974 	WREG32(DC_HPD1_INT_CONTROL, tmp);
4975 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4976 	WREG32(DC_HPD2_INT_CONTROL, tmp);
4977 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978 	WREG32(DC_HPD3_INT_CONTROL, tmp);
4979 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980 	WREG32(DC_HPD4_INT_CONTROL, tmp);
4981 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4982 	WREG32(DC_HPD5_INT_CONTROL, tmp);
4983 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984 	WREG32(DC_HPD6_INT_CONTROL, tmp);
4985 
4986 }
4987 
4988 /**
4989  * cik_irq_init - init and enable the interrupt ring
4990  *
4991  * @rdev: radeon_device pointer
4992  *
4993  * Allocate a ring buffer for the interrupt controller,
4994  * enable the RLC, disable interrupts, enable the IH
4995  * ring buffer and enable it (CIK).
4996  * Called at device load and reume.
4997  * Returns 0 for success, errors for failure.
4998  */
4999 static int cik_irq_init(struct radeon_device *rdev)
5000 {
5001 	int ret = 0;
5002 	int rb_bufsz;
5003 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5004 
5005 	/* allocate ring */
5006 	ret = r600_ih_ring_alloc(rdev);
5007 	if (ret)
5008 		return ret;
5009 
5010 	/* disable irqs */
5011 	cik_disable_interrupts(rdev);
5012 
5013 	/* init rlc */
5014 	ret = cik_rlc_resume(rdev);
5015 	if (ret) {
5016 		r600_ih_ring_fini(rdev);
5017 		return ret;
5018 	}
5019 
5020 	/* setup interrupt control */
5021 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5022 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5023 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5024 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5025 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5026 	 */
5027 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5028 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5029 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5030 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5031 
5032 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5033 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5034 
5035 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5036 		      IH_WPTR_OVERFLOW_CLEAR |
5037 		      (rb_bufsz << 1));
5038 
5039 	if (rdev->wb.enabled)
5040 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5041 
5042 	/* set the writeback address whether it's enabled or not */
5043 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5044 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5045 
5046 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5047 
5048 	/* set rptr, wptr to 0 */
5049 	WREG32(IH_RB_RPTR, 0);
5050 	WREG32(IH_RB_WPTR, 0);
5051 
5052 	/* Default settings for IH_CNTL (disabled at first) */
5053 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5054 	/* RPTR_REARM only works if msi's are enabled */
5055 	if (rdev->msi_enabled)
5056 		ih_cntl |= RPTR_REARM;
5057 	WREG32(IH_CNTL, ih_cntl);
5058 
5059 	/* force the active interrupt state to all disabled */
5060 	cik_disable_interrupt_state(rdev);
5061 
5062 	pci_set_master(rdev->pdev);
5063 
5064 	/* enable irqs */
5065 	cik_enable_interrupts(rdev);
5066 
5067 	return ret;
5068 }
5069 
5070 /**
5071  * cik_irq_set - enable/disable interrupt sources
5072  *
5073  * @rdev: radeon_device pointer
5074  *
5075  * Enable interrupt sources on the GPU (vblanks, hpd,
5076  * etc.) (CIK).
5077  * Returns 0 for success, errors for failure.
5078  */
5079 int cik_irq_set(struct radeon_device *rdev)
5080 {
5081 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5082 		PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5083 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5084 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5085 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5086 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5087 	u32 grbm_int_cntl = 0;
5088 	u32 dma_cntl, dma_cntl1;
5089 
5090 	if (!rdev->irq.installed) {
5091 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5092 		return -EINVAL;
5093 	}
5094 	/* don't enable anything if the ih is disabled */
5095 	if (!rdev->ih.enabled) {
5096 		cik_disable_interrupts(rdev);
5097 		/* force the active interrupt state to all disabled */
5098 		cik_disable_interrupt_state(rdev);
5099 		return 0;
5100 	}
5101 
5102 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5104 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5105 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5106 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5107 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108 
5109 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5110 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5111 
5112 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5116 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120 
5121 	/* enable CP interrupts on all rings */
5122 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5123 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
5124 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5125 	}
5126 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5127 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5128 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5129 		if (ring->me == 1) {
5130 			switch (ring->pipe) {
5131 			case 0:
5132 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5133 				break;
5134 			case 1:
5135 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5136 				break;
5137 			case 2:
5138 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5139 				break;
5140 			case 3:
5141 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5142 				break;
5143 			default:
5144 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5145 				break;
5146 			}
5147 		} else if (ring->me == 2) {
5148 			switch (ring->pipe) {
5149 			case 0:
5150 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5151 				break;
5152 			case 1:
5153 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5154 				break;
5155 			case 2:
5156 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5157 				break;
5158 			case 3:
5159 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5160 				break;
5161 			default:
5162 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5163 				break;
5164 			}
5165 		} else {
5166 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5167 		}
5168 	}
5169 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5170 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5171 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5172 		if (ring->me == 1) {
5173 			switch (ring->pipe) {
5174 			case 0:
5175 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5176 				break;
5177 			case 1:
5178 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5179 				break;
5180 			case 2:
5181 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5182 				break;
5183 			case 3:
5184 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5185 				break;
5186 			default:
5187 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5188 				break;
5189 			}
5190 		} else if (ring->me == 2) {
5191 			switch (ring->pipe) {
5192 			case 0:
5193 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5194 				break;
5195 			case 1:
5196 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5197 				break;
5198 			case 2:
5199 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5200 				break;
5201 			case 3:
5202 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5203 				break;
5204 			default:
5205 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5206 				break;
5207 			}
5208 		} else {
5209 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5210 		}
5211 	}
5212 
5213 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5214 		DRM_DEBUG("cik_irq_set: sw int dma\n");
5215 		dma_cntl |= TRAP_ENABLE;
5216 	}
5217 
5218 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5219 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
5220 		dma_cntl1 |= TRAP_ENABLE;
5221 	}
5222 
5223 	if (rdev->irq.crtc_vblank_int[0] ||
5224 	    atomic_read(&rdev->irq.pflip[0])) {
5225 		DRM_DEBUG("cik_irq_set: vblank 0\n");
5226 		crtc1 |= VBLANK_INTERRUPT_MASK;
5227 	}
5228 	if (rdev->irq.crtc_vblank_int[1] ||
5229 	    atomic_read(&rdev->irq.pflip[1])) {
5230 		DRM_DEBUG("cik_irq_set: vblank 1\n");
5231 		crtc2 |= VBLANK_INTERRUPT_MASK;
5232 	}
5233 	if (rdev->irq.crtc_vblank_int[2] ||
5234 	    atomic_read(&rdev->irq.pflip[2])) {
5235 		DRM_DEBUG("cik_irq_set: vblank 2\n");
5236 		crtc3 |= VBLANK_INTERRUPT_MASK;
5237 	}
5238 	if (rdev->irq.crtc_vblank_int[3] ||
5239 	    atomic_read(&rdev->irq.pflip[3])) {
5240 		DRM_DEBUG("cik_irq_set: vblank 3\n");
5241 		crtc4 |= VBLANK_INTERRUPT_MASK;
5242 	}
5243 	if (rdev->irq.crtc_vblank_int[4] ||
5244 	    atomic_read(&rdev->irq.pflip[4])) {
5245 		DRM_DEBUG("cik_irq_set: vblank 4\n");
5246 		crtc5 |= VBLANK_INTERRUPT_MASK;
5247 	}
5248 	if (rdev->irq.crtc_vblank_int[5] ||
5249 	    atomic_read(&rdev->irq.pflip[5])) {
5250 		DRM_DEBUG("cik_irq_set: vblank 5\n");
5251 		crtc6 |= VBLANK_INTERRUPT_MASK;
5252 	}
5253 	if (rdev->irq.hpd[0]) {
5254 		DRM_DEBUG("cik_irq_set: hpd 1\n");
5255 		hpd1 |= DC_HPDx_INT_EN;
5256 	}
5257 	if (rdev->irq.hpd[1]) {
5258 		DRM_DEBUG("cik_irq_set: hpd 2\n");
5259 		hpd2 |= DC_HPDx_INT_EN;
5260 	}
5261 	if (rdev->irq.hpd[2]) {
5262 		DRM_DEBUG("cik_irq_set: hpd 3\n");
5263 		hpd3 |= DC_HPDx_INT_EN;
5264 	}
5265 	if (rdev->irq.hpd[3]) {
5266 		DRM_DEBUG("cik_irq_set: hpd 4\n");
5267 		hpd4 |= DC_HPDx_INT_EN;
5268 	}
5269 	if (rdev->irq.hpd[4]) {
5270 		DRM_DEBUG("cik_irq_set: hpd 5\n");
5271 		hpd5 |= DC_HPDx_INT_EN;
5272 	}
5273 	if (rdev->irq.hpd[5]) {
5274 		DRM_DEBUG("cik_irq_set: hpd 6\n");
5275 		hpd6 |= DC_HPDx_INT_EN;
5276 	}
5277 
5278 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5279 
5280 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5281 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5282 
5283 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5284 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5285 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5286 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5287 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5288 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5289 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5290 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5291 
5292 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5293 
5294 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5295 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5296 	if (rdev->num_crtc >= 4) {
5297 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5298 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5299 	}
5300 	if (rdev->num_crtc >= 6) {
5301 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5302 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5303 	}
5304 
5305 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
5306 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
5307 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
5308 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
5309 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
5310 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
5311 
5312 	return 0;
5313 }
5314 
5315 /**
5316  * cik_irq_ack - ack interrupt sources
5317  *
5318  * @rdev: radeon_device pointer
5319  *
5320  * Ack interrupt sources on the GPU (vblanks, hpd,
5321  * etc.) (CIK).  Certain interrupts sources are sw
5322  * generated and do not require an explicit ack.
5323  */
5324 static inline void cik_irq_ack(struct radeon_device *rdev)
5325 {
5326 	u32 tmp;
5327 
5328 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5329 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5330 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5331 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5332 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5333 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5334 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5335 
5336 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5337 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5338 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5339 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5340 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5341 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5342 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5343 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5344 
5345 	if (rdev->num_crtc >= 4) {
5346 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5347 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5348 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5349 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5350 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5351 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5352 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5353 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5354 	}
5355 
5356 	if (rdev->num_crtc >= 6) {
5357 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5358 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5359 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5360 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5361 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5362 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5363 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5364 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5365 	}
5366 
5367 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5368 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5369 		tmp |= DC_HPDx_INT_ACK;
5370 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5371 	}
5372 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5373 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5374 		tmp |= DC_HPDx_INT_ACK;
5375 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5376 	}
5377 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5378 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5379 		tmp |= DC_HPDx_INT_ACK;
5380 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5381 	}
5382 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5383 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5384 		tmp |= DC_HPDx_INT_ACK;
5385 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5386 	}
5387 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5388 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5389 		tmp |= DC_HPDx_INT_ACK;
5390 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5391 	}
5392 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5393 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5394 		tmp |= DC_HPDx_INT_ACK;
5395 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5396 	}
5397 }
5398 
5399 /**
5400  * cik_irq_disable - disable interrupts
5401  *
5402  * @rdev: radeon_device pointer
5403  *
5404  * Disable interrupts on the hw (CIK).
5405  */
5406 static void cik_irq_disable(struct radeon_device *rdev)
5407 {
5408 	cik_disable_interrupts(rdev);
5409 	/* Wait and acknowledge irq */
5410 	mdelay(1);
5411 	cik_irq_ack(rdev);
5412 	cik_disable_interrupt_state(rdev);
5413 }
5414 
5415 /**
5416  * cik_irq_disable - disable interrupts for suspend
5417  *
5418  * @rdev: radeon_device pointer
5419  *
5420  * Disable interrupts and stop the RLC (CIK).
5421  * Used for suspend.
5422  */
5423 static void cik_irq_suspend(struct radeon_device *rdev)
5424 {
5425 	cik_irq_disable(rdev);
5426 	cik_rlc_stop(rdev);
5427 }
5428 
5429 /**
5430  * cik_irq_fini - tear down interrupt support
5431  *
5432  * @rdev: radeon_device pointer
5433  *
5434  * Disable interrupts on the hw and free the IH ring
5435  * buffer (CIK).
5436  * Used for driver unload.
5437  */
5438 static void cik_irq_fini(struct radeon_device *rdev)
5439 {
5440 	cik_irq_suspend(rdev);
5441 	r600_ih_ring_fini(rdev);
5442 }
5443 
5444 /**
5445  * cik_get_ih_wptr - get the IH ring buffer wptr
5446  *
5447  * @rdev: radeon_device pointer
5448  *
5449  * Get the IH ring buffer wptr from either the register
5450  * or the writeback memory buffer (CIK).  Also check for
5451  * ring buffer overflow and deal with it.
5452  * Used by cik_irq_process().
5453  * Returns the value of the wptr.
5454  */
5455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5456 {
5457 	u32 wptr, tmp;
5458 
5459 	if (rdev->wb.enabled)
5460 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5461 	else
5462 		wptr = RREG32(IH_RB_WPTR);
5463 
5464 	if (wptr & RB_OVERFLOW) {
5465 		/* When a ring buffer overflow happen start parsing interrupt
5466 		 * from the last not overwritten vector (wptr + 16). Hopefully
5467 		 * this should allow us to catchup.
5468 		 */
5469 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5470 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5471 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5472 		tmp = RREG32(IH_RB_CNTL);
5473 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5474 		WREG32(IH_RB_CNTL, tmp);
5475 	}
5476 	return (wptr & rdev->ih.ptr_mask);
5477 }
5478 
5479 /*        CIK IV Ring
5480  * Each IV ring entry is 128 bits:
5481  * [7:0]    - interrupt source id
5482  * [31:8]   - reserved
5483  * [59:32]  - interrupt source data
5484  * [63:60]  - reserved
5485  * [71:64]  - RINGID
5486  *            CP:
5487  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5488  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5489  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5490  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5491  *            PIPE_ID - ME0 0=3D
5492  *                    - ME1&2 compute dispatcher (4 pipes each)
5493  *            SDMA:
5494  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5495  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5496  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5497  * [79:72]  - VMID
5498  * [95:80]  - PASID
5499  * [127:96] - reserved
5500  */
5501 /**
5502  * cik_irq_process - interrupt handler
5503  *
5504  * @rdev: radeon_device pointer
5505  *
5506  * Interrupt hander (CIK).  Walk the IH ring,
5507  * ack interrupts and schedule work to handle
5508  * interrupt events.
5509  * Returns irq process return code.
5510  */
5511 int cik_irq_process(struct radeon_device *rdev)
5512 {
5513 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5514 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5515 	u32 wptr;
5516 	u32 rptr;
5517 	u32 src_id, src_data, ring_id;
5518 	u8 me_id, pipe_id, queue_id;
5519 	u32 ring_index;
5520 	bool queue_hotplug = false;
5521 	bool queue_reset = false;
5522 	u32 addr, status, mc_client;
5523 
5524 	if (!rdev->ih.enabled || rdev->shutdown)
5525 		return IRQ_NONE;
5526 
5527 	wptr = cik_get_ih_wptr(rdev);
5528 
5529 restart_ih:
5530 	/* is somebody else already processing irqs? */
5531 	if (atomic_xchg(&rdev->ih.lock, 1))
5532 		return IRQ_NONE;
5533 
5534 	rptr = rdev->ih.rptr;
5535 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5536 
5537 	/* Order reading of wptr vs. reading of IH ring data */
5538 	rmb();
5539 
5540 	/* display interrupts */
5541 	cik_irq_ack(rdev);
5542 
5543 	while (rptr != wptr) {
5544 		/* wptr/rptr are in bytes! */
5545 		ring_index = rptr / 4;
5546 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5547 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5548 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5549 
5550 		switch (src_id) {
5551 		case 1: /* D1 vblank/vline */
5552 			switch (src_data) {
5553 			case 0: /* D1 vblank */
5554 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5555 					if (rdev->irq.crtc_vblank_int[0]) {
5556 						drm_handle_vblank(rdev->ddev, 0);
5557 						rdev->pm.vblank_sync = true;
5558 						wake_up(&rdev->irq.vblank_queue);
5559 					}
5560 					if (atomic_read(&rdev->irq.pflip[0]))
5561 						radeon_crtc_handle_flip(rdev, 0);
5562 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5563 					DRM_DEBUG("IH: D1 vblank\n");
5564 				}
5565 				break;
5566 			case 1: /* D1 vline */
5567 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5568 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5569 					DRM_DEBUG("IH: D1 vline\n");
5570 				}
5571 				break;
5572 			default:
5573 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5574 				break;
5575 			}
5576 			break;
5577 		case 2: /* D2 vblank/vline */
5578 			switch (src_data) {
5579 			case 0: /* D2 vblank */
5580 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5581 					if (rdev->irq.crtc_vblank_int[1]) {
5582 						drm_handle_vblank(rdev->ddev, 1);
5583 						rdev->pm.vblank_sync = true;
5584 						wake_up(&rdev->irq.vblank_queue);
5585 					}
5586 					if (atomic_read(&rdev->irq.pflip[1]))
5587 						radeon_crtc_handle_flip(rdev, 1);
5588 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5589 					DRM_DEBUG("IH: D2 vblank\n");
5590 				}
5591 				break;
5592 			case 1: /* D2 vline */
5593 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5594 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5595 					DRM_DEBUG("IH: D2 vline\n");
5596 				}
5597 				break;
5598 			default:
5599 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5600 				break;
5601 			}
5602 			break;
5603 		case 3: /* D3 vblank/vline */
5604 			switch (src_data) {
5605 			case 0: /* D3 vblank */
5606 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5607 					if (rdev->irq.crtc_vblank_int[2]) {
5608 						drm_handle_vblank(rdev->ddev, 2);
5609 						rdev->pm.vblank_sync = true;
5610 						wake_up(&rdev->irq.vblank_queue);
5611 					}
5612 					if (atomic_read(&rdev->irq.pflip[2]))
5613 						radeon_crtc_handle_flip(rdev, 2);
5614 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5615 					DRM_DEBUG("IH: D3 vblank\n");
5616 				}
5617 				break;
5618 			case 1: /* D3 vline */
5619 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5620 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5621 					DRM_DEBUG("IH: D3 vline\n");
5622 				}
5623 				break;
5624 			default:
5625 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5626 				break;
5627 			}
5628 			break;
5629 		case 4: /* D4 vblank/vline */
5630 			switch (src_data) {
5631 			case 0: /* D4 vblank */
5632 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5633 					if (rdev->irq.crtc_vblank_int[3]) {
5634 						drm_handle_vblank(rdev->ddev, 3);
5635 						rdev->pm.vblank_sync = true;
5636 						wake_up(&rdev->irq.vblank_queue);
5637 					}
5638 					if (atomic_read(&rdev->irq.pflip[3]))
5639 						radeon_crtc_handle_flip(rdev, 3);
5640 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5641 					DRM_DEBUG("IH: D4 vblank\n");
5642 				}
5643 				break;
5644 			case 1: /* D4 vline */
5645 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5646 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5647 					DRM_DEBUG("IH: D4 vline\n");
5648 				}
5649 				break;
5650 			default:
5651 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5652 				break;
5653 			}
5654 			break;
5655 		case 5: /* D5 vblank/vline */
5656 			switch (src_data) {
5657 			case 0: /* D5 vblank */
5658 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5659 					if (rdev->irq.crtc_vblank_int[4]) {
5660 						drm_handle_vblank(rdev->ddev, 4);
5661 						rdev->pm.vblank_sync = true;
5662 						wake_up(&rdev->irq.vblank_queue);
5663 					}
5664 					if (atomic_read(&rdev->irq.pflip[4]))
5665 						radeon_crtc_handle_flip(rdev, 4);
5666 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5667 					DRM_DEBUG("IH: D5 vblank\n");
5668 				}
5669 				break;
5670 			case 1: /* D5 vline */
5671 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5672 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5673 					DRM_DEBUG("IH: D5 vline\n");
5674 				}
5675 				break;
5676 			default:
5677 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5678 				break;
5679 			}
5680 			break;
5681 		case 6: /* D6 vblank/vline */
5682 			switch (src_data) {
5683 			case 0: /* D6 vblank */
5684 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5685 					if (rdev->irq.crtc_vblank_int[5]) {
5686 						drm_handle_vblank(rdev->ddev, 5);
5687 						rdev->pm.vblank_sync = true;
5688 						wake_up(&rdev->irq.vblank_queue);
5689 					}
5690 					if (atomic_read(&rdev->irq.pflip[5]))
5691 						radeon_crtc_handle_flip(rdev, 5);
5692 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5693 					DRM_DEBUG("IH: D6 vblank\n");
5694 				}
5695 				break;
5696 			case 1: /* D6 vline */
5697 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5698 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5699 					DRM_DEBUG("IH: D6 vline\n");
5700 				}
5701 				break;
5702 			default:
5703 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5704 				break;
5705 			}
5706 			break;
5707 		case 42: /* HPD hotplug */
5708 			switch (src_data) {
5709 			case 0:
5710 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5711 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5712 					queue_hotplug = true;
5713 					DRM_DEBUG("IH: HPD1\n");
5714 				}
5715 				break;
5716 			case 1:
5717 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5718 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5719 					queue_hotplug = true;
5720 					DRM_DEBUG("IH: HPD2\n");
5721 				}
5722 				break;
5723 			case 2:
5724 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5725 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5726 					queue_hotplug = true;
5727 					DRM_DEBUG("IH: HPD3\n");
5728 				}
5729 				break;
5730 			case 3:
5731 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5732 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5733 					queue_hotplug = true;
5734 					DRM_DEBUG("IH: HPD4\n");
5735 				}
5736 				break;
5737 			case 4:
5738 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5739 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5740 					queue_hotplug = true;
5741 					DRM_DEBUG("IH: HPD5\n");
5742 				}
5743 				break;
5744 			case 5:
5745 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5746 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5747 					queue_hotplug = true;
5748 					DRM_DEBUG("IH: HPD6\n");
5749 				}
5750 				break;
5751 			default:
5752 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5753 				break;
5754 			}
5755 			break;
5756 		case 146:
5757 		case 147:
5758 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5759 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5760 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5761 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5762 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5763 				addr);
5764 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5765 				status);
5766 			cik_vm_decode_fault(rdev, status, addr, mc_client);
5767 			/* reset addr and status */
5768 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5769 			break;
5770 		case 176: /* GFX RB CP_INT */
5771 		case 177: /* GFX IB CP_INT */
5772 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5773 			break;
5774 		case 181: /* CP EOP event */
5775 			DRM_DEBUG("IH: CP EOP\n");
5776 			/* XXX check the bitfield order! */
5777 			me_id = (ring_id & 0x60) >> 5;
5778 			pipe_id = (ring_id & 0x18) >> 3;
5779 			queue_id = (ring_id & 0x7) >> 0;
5780 			switch (me_id) {
5781 			case 0:
5782 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5783 				break;
5784 			case 1:
5785 			case 2:
5786 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5787 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5788 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5789 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5790 				break;
5791 			}
5792 			break;
5793 		case 184: /* CP Privileged reg access */
5794 			DRM_ERROR("Illegal register access in command stream\n");
5795 			/* XXX check the bitfield order! */
5796 			me_id = (ring_id & 0x60) >> 5;
5797 			pipe_id = (ring_id & 0x18) >> 3;
5798 			queue_id = (ring_id & 0x7) >> 0;
5799 			switch (me_id) {
5800 			case 0:
5801 				/* This results in a full GPU reset, but all we need to do is soft
5802 				 * reset the CP for gfx
5803 				 */
5804 				queue_reset = true;
5805 				break;
5806 			case 1:
5807 				/* XXX compute */
5808 				queue_reset = true;
5809 				break;
5810 			case 2:
5811 				/* XXX compute */
5812 				queue_reset = true;
5813 				break;
5814 			}
5815 			break;
5816 		case 185: /* CP Privileged inst */
5817 			DRM_ERROR("Illegal instruction in command stream\n");
5818 			/* XXX check the bitfield order! */
5819 			me_id = (ring_id & 0x60) >> 5;
5820 			pipe_id = (ring_id & 0x18) >> 3;
5821 			queue_id = (ring_id & 0x7) >> 0;
5822 			switch (me_id) {
5823 			case 0:
5824 				/* This results in a full GPU reset, but all we need to do is soft
5825 				 * reset the CP for gfx
5826 				 */
5827 				queue_reset = true;
5828 				break;
5829 			case 1:
5830 				/* XXX compute */
5831 				queue_reset = true;
5832 				break;
5833 			case 2:
5834 				/* XXX compute */
5835 				queue_reset = true;
5836 				break;
5837 			}
5838 			break;
5839 		case 224: /* SDMA trap event */
5840 			/* XXX check the bitfield order! */
5841 			me_id = (ring_id & 0x3) >> 0;
5842 			queue_id = (ring_id & 0xc) >> 2;
5843 			DRM_DEBUG("IH: SDMA trap\n");
5844 			switch (me_id) {
5845 			case 0:
5846 				switch (queue_id) {
5847 				case 0:
5848 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5849 					break;
5850 				case 1:
5851 					/* XXX compute */
5852 					break;
5853 				case 2:
5854 					/* XXX compute */
5855 					break;
5856 				}
5857 				break;
5858 			case 1:
5859 				switch (queue_id) {
5860 				case 0:
5861 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5862 					break;
5863 				case 1:
5864 					/* XXX compute */
5865 					break;
5866 				case 2:
5867 					/* XXX compute */
5868 					break;
5869 				}
5870 				break;
5871 			}
5872 			break;
5873 		case 241: /* SDMA Privileged inst */
5874 		case 247: /* SDMA Privileged inst */
5875 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
5876 			/* XXX check the bitfield order! */
5877 			me_id = (ring_id & 0x3) >> 0;
5878 			queue_id = (ring_id & 0xc) >> 2;
5879 			switch (me_id) {
5880 			case 0:
5881 				switch (queue_id) {
5882 				case 0:
5883 					queue_reset = true;
5884 					break;
5885 				case 1:
5886 					/* XXX compute */
5887 					queue_reset = true;
5888 					break;
5889 				case 2:
5890 					/* XXX compute */
5891 					queue_reset = true;
5892 					break;
5893 				}
5894 				break;
5895 			case 1:
5896 				switch (queue_id) {
5897 				case 0:
5898 					queue_reset = true;
5899 					break;
5900 				case 1:
5901 					/* XXX compute */
5902 					queue_reset = true;
5903 					break;
5904 				case 2:
5905 					/* XXX compute */
5906 					queue_reset = true;
5907 					break;
5908 				}
5909 				break;
5910 			}
5911 			break;
5912 		case 233: /* GUI IDLE */
5913 			DRM_DEBUG("IH: GUI idle\n");
5914 			break;
5915 		default:
5916 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5917 			break;
5918 		}
5919 
5920 		/* wptr/rptr are in bytes! */
5921 		rptr += 16;
5922 		rptr &= rdev->ih.ptr_mask;
5923 	}
5924 	if (queue_hotplug)
5925 		schedule_work(&rdev->hotplug_work);
5926 	if (queue_reset)
5927 		schedule_work(&rdev->reset_work);
5928 	rdev->ih.rptr = rptr;
5929 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
5930 	atomic_set(&rdev->ih.lock, 0);
5931 
5932 	/* make sure wptr hasn't changed while processing */
5933 	wptr = cik_get_ih_wptr(rdev);
5934 	if (wptr != rptr)
5935 		goto restart_ih;
5936 
5937 	return IRQ_HANDLED;
5938 }
5939 
5940 /*
5941  * startup/shutdown callbacks
5942  */
5943 /**
5944  * cik_startup - program the asic to a functional state
5945  *
5946  * @rdev: radeon_device pointer
5947  *
5948  * Programs the asic to a functional state (CIK).
5949  * Called by cik_init() and cik_resume().
5950  * Returns 0 for success, error for failure.
5951  */
5952 static int cik_startup(struct radeon_device *rdev)
5953 {
5954 	struct radeon_ring *ring;
5955 	int r;
5956 
5957 	if (rdev->flags & RADEON_IS_IGP) {
5958 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5959 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5960 			r = cik_init_microcode(rdev);
5961 			if (r) {
5962 				DRM_ERROR("Failed to load firmware!\n");
5963 				return r;
5964 			}
5965 		}
5966 	} else {
5967 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5968 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5969 		    !rdev->mc_fw) {
5970 			r = cik_init_microcode(rdev);
5971 			if (r) {
5972 				DRM_ERROR("Failed to load firmware!\n");
5973 				return r;
5974 			}
5975 		}
5976 
5977 		r = ci_mc_load_microcode(rdev);
5978 		if (r) {
5979 			DRM_ERROR("Failed to load MC firmware!\n");
5980 			return r;
5981 		}
5982 	}
5983 
5984 	r = r600_vram_scratch_init(rdev);
5985 	if (r)
5986 		return r;
5987 
5988 	cik_mc_program(rdev);
5989 	r = cik_pcie_gart_enable(rdev);
5990 	if (r)
5991 		return r;
5992 	cik_gpu_init(rdev);
5993 
5994 	/* allocate rlc buffers */
5995 	r = si_rlc_init(rdev);
5996 	if (r) {
5997 		DRM_ERROR("Failed to init rlc BOs!\n");
5998 		return r;
5999 	}
6000 
6001 	/* allocate wb buffer */
6002 	r = radeon_wb_init(rdev);
6003 	if (r)
6004 		return r;
6005 
6006 	/* allocate mec buffers */
6007 	r = cik_mec_init(rdev);
6008 	if (r) {
6009 		DRM_ERROR("Failed to init MEC BOs!\n");
6010 		return r;
6011 	}
6012 
6013 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6014 	if (r) {
6015 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6016 		return r;
6017 	}
6018 
6019 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6020 	if (r) {
6021 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022 		return r;
6023 	}
6024 
6025 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6026 	if (r) {
6027 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028 		return r;
6029 	}
6030 
6031 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6032 	if (r) {
6033 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6034 		return r;
6035 	}
6036 
6037 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6038 	if (r) {
6039 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040 		return r;
6041 	}
6042 
6043 	r = cik_uvd_resume(rdev);
6044 	if (!r) {
6045 		r = radeon_fence_driver_start_ring(rdev,
6046 						   R600_RING_TYPE_UVD_INDEX);
6047 		if (r)
6048 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6049 	}
6050 	if (r)
6051 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6052 
6053 	/* Enable IRQ */
6054 	if (!rdev->irq.installed) {
6055 		r = radeon_irq_kms_init(rdev);
6056 		if (r)
6057 			return r;
6058 	}
6059 
6060 	r = cik_irq_init(rdev);
6061 	if (r) {
6062 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6063 		radeon_irq_kms_fini(rdev);
6064 		return r;
6065 	}
6066 	cik_irq_set(rdev);
6067 
6068 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6069 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6070 			     CP_RB0_RPTR, CP_RB0_WPTR,
6071 			     0, 0xfffff, RADEON_CP_PACKET2);
6072 	if (r)
6073 		return r;
6074 
6075 	/* set up the compute queues */
6076 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6077 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6078 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6079 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6080 			     0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6081 	if (r)
6082 		return r;
6083 	ring->me = 1; /* first MEC */
6084 	ring->pipe = 0; /* first pipe */
6085 	ring->queue = 0; /* first queue */
6086 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6087 
6088 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6089 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6090 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6091 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6092 			     0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6093 	if (r)
6094 		return r;
6095 	/* dGPU only have 1 MEC */
6096 	ring->me = 1; /* first MEC */
6097 	ring->pipe = 0; /* first pipe */
6098 	ring->queue = 1; /* second queue */
6099 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6100 
6101 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6102 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6103 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6104 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6105 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6106 	if (r)
6107 		return r;
6108 
6109 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6110 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6111 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6112 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6113 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6114 	if (r)
6115 		return r;
6116 
6117 	r = cik_cp_resume(rdev);
6118 	if (r)
6119 		return r;
6120 
6121 	r = cik_sdma_resume(rdev);
6122 	if (r)
6123 		return r;
6124 
6125 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6126 	if (ring->ring_size) {
6127 		r = radeon_ring_init(rdev, ring, ring->ring_size,
6128 				     R600_WB_UVD_RPTR_OFFSET,
6129 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6130 				     0, 0xfffff, RADEON_CP_PACKET2);
6131 		if (!r)
6132 			r = r600_uvd_init(rdev);
6133 		if (r)
6134 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6135 	}
6136 
6137 	r = radeon_ib_pool_init(rdev);
6138 	if (r) {
6139 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6140 		return r;
6141 	}
6142 
6143 	r = radeon_vm_manager_init(rdev);
6144 	if (r) {
6145 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6146 		return r;
6147 	}
6148 
6149 	return 0;
6150 }
6151 
6152 /**
6153  * cik_resume - resume the asic to a functional state
6154  *
6155  * @rdev: radeon_device pointer
6156  *
6157  * Programs the asic to a functional state (CIK).
6158  * Called at resume.
6159  * Returns 0 for success, error for failure.
6160  */
6161 int cik_resume(struct radeon_device *rdev)
6162 {
6163 	int r;
6164 
6165 	/* post card */
6166 	atom_asic_init(rdev->mode_info.atom_context);
6167 
6168 	/* init golden registers */
6169 	cik_init_golden_registers(rdev);
6170 
6171 	rdev->accel_working = true;
6172 	r = cik_startup(rdev);
6173 	if (r) {
6174 		DRM_ERROR("cik startup failed on resume\n");
6175 		rdev->accel_working = false;
6176 		return r;
6177 	}
6178 
6179 	return r;
6180 
6181 }
6182 
6183 /**
6184  * cik_suspend - suspend the asic
6185  *
6186  * @rdev: radeon_device pointer
6187  *
6188  * Bring the chip into a state suitable for suspend (CIK).
6189  * Called at suspend.
6190  * Returns 0 for success.
6191  */
6192 int cik_suspend(struct radeon_device *rdev)
6193 {
6194 	radeon_vm_manager_fini(rdev);
6195 	cik_cp_enable(rdev, false);
6196 	cik_sdma_enable(rdev, false);
6197 	r600_uvd_rbc_stop(rdev);
6198 	radeon_uvd_suspend(rdev);
6199 	cik_irq_suspend(rdev);
6200 	radeon_wb_disable(rdev);
6201 	cik_pcie_gart_disable(rdev);
6202 	return 0;
6203 }
6204 
6205 /* Plan is to move initialization in that function and use
6206  * helper function so that radeon_device_init pretty much
6207  * do nothing more than calling asic specific function. This
6208  * should also allow to remove a bunch of callback function
6209  * like vram_info.
6210  */
6211 /**
6212  * cik_init - asic specific driver and hw init
6213  *
6214  * @rdev: radeon_device pointer
6215  *
6216  * Setup asic specific driver variables and program the hw
6217  * to a functional state (CIK).
6218  * Called at driver startup.
6219  * Returns 0 for success, errors for failure.
6220  */
6221 int cik_init(struct radeon_device *rdev)
6222 {
6223 	struct radeon_ring *ring;
6224 	int r;
6225 
6226 	/* Read BIOS */
6227 	if (!radeon_get_bios(rdev)) {
6228 		if (ASIC_IS_AVIVO(rdev))
6229 			return -EINVAL;
6230 	}
6231 	/* Must be an ATOMBIOS */
6232 	if (!rdev->is_atom_bios) {
6233 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6234 		return -EINVAL;
6235 	}
6236 	r = radeon_atombios_init(rdev);
6237 	if (r)
6238 		return r;
6239 
6240 	/* Post card if necessary */
6241 	if (!radeon_card_posted(rdev)) {
6242 		if (!rdev->bios) {
6243 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6244 			return -EINVAL;
6245 		}
6246 		DRM_INFO("GPU not posted. posting now...\n");
6247 		atom_asic_init(rdev->mode_info.atom_context);
6248 	}
6249 	/* init golden registers */
6250 	cik_init_golden_registers(rdev);
6251 	/* Initialize scratch registers */
6252 	cik_scratch_init(rdev);
6253 	/* Initialize surface registers */
6254 	radeon_surface_init(rdev);
6255 	/* Initialize clocks */
6256 	radeon_get_clock_info(rdev->ddev);
6257 
6258 	/* Fence driver */
6259 	r = radeon_fence_driver_init(rdev);
6260 	if (r)
6261 		return r;
6262 
6263 	/* initialize memory controller */
6264 	r = cik_mc_init(rdev);
6265 	if (r)
6266 		return r;
6267 	/* Memory manager */
6268 	r = radeon_bo_init(rdev);
6269 	if (r)
6270 		return r;
6271 
6272 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6273 	ring->ring_obj = NULL;
6274 	r600_ring_init(rdev, ring, 1024 * 1024);
6275 
6276 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6277 	ring->ring_obj = NULL;
6278 	r600_ring_init(rdev, ring, 1024 * 1024);
6279 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6280 	if (r)
6281 		return r;
6282 
6283 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6284 	ring->ring_obj = NULL;
6285 	r600_ring_init(rdev, ring, 1024 * 1024);
6286 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6287 	if (r)
6288 		return r;
6289 
6290 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6291 	ring->ring_obj = NULL;
6292 	r600_ring_init(rdev, ring, 256 * 1024);
6293 
6294 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6295 	ring->ring_obj = NULL;
6296 	r600_ring_init(rdev, ring, 256 * 1024);
6297 
6298 	r = radeon_uvd_init(rdev);
6299 	if (!r) {
6300 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6301 		ring->ring_obj = NULL;
6302 		r600_ring_init(rdev, ring, 4096);
6303 	}
6304 
6305 	rdev->ih.ring_obj = NULL;
6306 	r600_ih_ring_init(rdev, 64 * 1024);
6307 
6308 	r = r600_pcie_gart_init(rdev);
6309 	if (r)
6310 		return r;
6311 
6312 	rdev->accel_working = true;
6313 	r = cik_startup(rdev);
6314 	if (r) {
6315 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6316 		cik_cp_fini(rdev);
6317 		cik_sdma_fini(rdev);
6318 		cik_irq_fini(rdev);
6319 		si_rlc_fini(rdev);
6320 		cik_mec_fini(rdev);
6321 		radeon_wb_fini(rdev);
6322 		radeon_ib_pool_fini(rdev);
6323 		radeon_vm_manager_fini(rdev);
6324 		radeon_irq_kms_fini(rdev);
6325 		cik_pcie_gart_fini(rdev);
6326 		rdev->accel_working = false;
6327 	}
6328 
6329 	/* Don't start up if the MC ucode is missing.
6330 	 * The default clocks and voltages before the MC ucode
6331 	 * is loaded are not suffient for advanced operations.
6332 	 */
6333 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6334 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6335 		return -EINVAL;
6336 	}
6337 
6338 	return 0;
6339 }
6340 
6341 /**
6342  * cik_fini - asic specific driver and hw fini
6343  *
6344  * @rdev: radeon_device pointer
6345  *
6346  * Tear down the asic specific driver variables and program the hw
6347  * to an idle state (CIK).
6348  * Called at driver unload.
6349  */
6350 void cik_fini(struct radeon_device *rdev)
6351 {
6352 	cik_cp_fini(rdev);
6353 	cik_sdma_fini(rdev);
6354 	cik_irq_fini(rdev);
6355 	si_rlc_fini(rdev);
6356 	cik_mec_fini(rdev);
6357 	radeon_wb_fini(rdev);
6358 	radeon_vm_manager_fini(rdev);
6359 	radeon_ib_pool_fini(rdev);
6360 	radeon_irq_kms_fini(rdev);
6361 	radeon_uvd_fini(rdev);
6362 	cik_pcie_gart_fini(rdev);
6363 	r600_vram_scratch_fini(rdev);
6364 	radeon_gem_fini(rdev);
6365 	radeon_fence_driver_fini(rdev);
6366 	radeon_bo_fini(rdev);
6367 	radeon_atombios_fini(rdev);
6368 	kfree(rdev->bios);
6369 	rdev->bios = NULL;
6370 }
6371 
6372 /* display watermark setup */
6373 /**
6374  * dce8_line_buffer_adjust - Set up the line buffer
6375  *
6376  * @rdev: radeon_device pointer
6377  * @radeon_crtc: the selected display controller
6378  * @mode: the current display mode on the selected display
6379  * controller
6380  *
6381  * Setup up the line buffer allocation for
6382  * the selected display controller (CIK).
6383  * Returns the line buffer size in pixels.
6384  */
6385 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6386 				   struct radeon_crtc *radeon_crtc,
6387 				   struct drm_display_mode *mode)
6388 {
6389 	u32 tmp;
6390 
6391 	/*
6392 	 * Line Buffer Setup
6393 	 * There are 6 line buffers, one for each display controllers.
6394 	 * There are 3 partitions per LB. Select the number of partitions
6395 	 * to enable based on the display width.  For display widths larger
6396 	 * than 4096, you need use to use 2 display controllers and combine
6397 	 * them using the stereo blender.
6398 	 */
6399 	if (radeon_crtc->base.enabled && mode) {
6400 		if (mode->crtc_hdisplay < 1920)
6401 			tmp = 1;
6402 		else if (mode->crtc_hdisplay < 2560)
6403 			tmp = 2;
6404 		else if (mode->crtc_hdisplay < 4096)
6405 			tmp = 0;
6406 		else {
6407 			DRM_DEBUG_KMS("Mode too big for LB!\n");
6408 			tmp = 0;
6409 		}
6410 	} else
6411 		tmp = 1;
6412 
6413 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6414 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6415 
6416 	if (radeon_crtc->base.enabled && mode) {
6417 		switch (tmp) {
6418 		case 0:
6419 		default:
6420 			return 4096 * 2;
6421 		case 1:
6422 			return 1920 * 2;
6423 		case 2:
6424 			return 2560 * 2;
6425 		}
6426 	}
6427 
6428 	/* controller not enabled, so no lb used */
6429 	return 0;
6430 }
6431 
6432 /**
6433  * cik_get_number_of_dram_channels - get the number of dram channels
6434  *
6435  * @rdev: radeon_device pointer
6436  *
6437  * Look up the number of video ram channels (CIK).
6438  * Used for display watermark bandwidth calculations
6439  * Returns the number of dram channels
6440  */
6441 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6442 {
6443 	u32 tmp = RREG32(MC_SHARED_CHMAP);
6444 
6445 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6446 	case 0:
6447 	default:
6448 		return 1;
6449 	case 1:
6450 		return 2;
6451 	case 2:
6452 		return 4;
6453 	case 3:
6454 		return 8;
6455 	case 4:
6456 		return 3;
6457 	case 5:
6458 		return 6;
6459 	case 6:
6460 		return 10;
6461 	case 7:
6462 		return 12;
6463 	case 8:
6464 		return 16;
6465 	}
6466 }
6467 
6468 struct dce8_wm_params {
6469 	u32 dram_channels; /* number of dram channels */
6470 	u32 yclk;          /* bandwidth per dram data pin in kHz */
6471 	u32 sclk;          /* engine clock in kHz */
6472 	u32 disp_clk;      /* display clock in kHz */
6473 	u32 src_width;     /* viewport width */
6474 	u32 active_time;   /* active display time in ns */
6475 	u32 blank_time;    /* blank time in ns */
6476 	bool interlaced;    /* mode is interlaced */
6477 	fixed20_12 vsc;    /* vertical scale ratio */
6478 	u32 num_heads;     /* number of active crtcs */
6479 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6480 	u32 lb_size;       /* line buffer allocated to pipe */
6481 	u32 vtaps;         /* vertical scaler taps */
6482 };
6483 
6484 /**
6485  * dce8_dram_bandwidth - get the dram bandwidth
6486  *
6487  * @wm: watermark calculation data
6488  *
6489  * Calculate the raw dram bandwidth (CIK).
6490  * Used for display watermark bandwidth calculations
6491  * Returns the dram bandwidth in MBytes/s
6492  */
6493 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6494 {
6495 	/* Calculate raw DRAM Bandwidth */
6496 	fixed20_12 dram_efficiency; /* 0.7 */
6497 	fixed20_12 yclk, dram_channels, bandwidth;
6498 	fixed20_12 a;
6499 
6500 	a.full = dfixed_const(1000);
6501 	yclk.full = dfixed_const(wm->yclk);
6502 	yclk.full = dfixed_div(yclk, a);
6503 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6504 	a.full = dfixed_const(10);
6505 	dram_efficiency.full = dfixed_const(7);
6506 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
6507 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6508 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6509 
6510 	return dfixed_trunc(bandwidth);
6511 }
6512 
6513 /**
6514  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6515  *
6516  * @wm: watermark calculation data
6517  *
6518  * Calculate the dram bandwidth used for display (CIK).
6519  * Used for display watermark bandwidth calculations
6520  * Returns the dram bandwidth for display in MBytes/s
6521  */
6522 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6523 {
6524 	/* Calculate DRAM Bandwidth and the part allocated to display. */
6525 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6526 	fixed20_12 yclk, dram_channels, bandwidth;
6527 	fixed20_12 a;
6528 
6529 	a.full = dfixed_const(1000);
6530 	yclk.full = dfixed_const(wm->yclk);
6531 	yclk.full = dfixed_div(yclk, a);
6532 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6533 	a.full = dfixed_const(10);
6534 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6535 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6536 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6537 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6538 
6539 	return dfixed_trunc(bandwidth);
6540 }
6541 
6542 /**
6543  * dce8_data_return_bandwidth - get the data return bandwidth
6544  *
6545  * @wm: watermark calculation data
6546  *
6547  * Calculate the data return bandwidth used for display (CIK).
6548  * Used for display watermark bandwidth calculations
6549  * Returns the data return bandwidth in MBytes/s
6550  */
6551 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6552 {
6553 	/* Calculate the display Data return Bandwidth */
6554 	fixed20_12 return_efficiency; /* 0.8 */
6555 	fixed20_12 sclk, bandwidth;
6556 	fixed20_12 a;
6557 
6558 	a.full = dfixed_const(1000);
6559 	sclk.full = dfixed_const(wm->sclk);
6560 	sclk.full = dfixed_div(sclk, a);
6561 	a.full = dfixed_const(10);
6562 	return_efficiency.full = dfixed_const(8);
6563 	return_efficiency.full = dfixed_div(return_efficiency, a);
6564 	a.full = dfixed_const(32);
6565 	bandwidth.full = dfixed_mul(a, sclk);
6566 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6567 
6568 	return dfixed_trunc(bandwidth);
6569 }
6570 
6571 /**
6572  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6573  *
6574  * @wm: watermark calculation data
6575  *
6576  * Calculate the dmif bandwidth used for display (CIK).
6577  * Used for display watermark bandwidth calculations
6578  * Returns the dmif bandwidth in MBytes/s
6579  */
6580 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6581 {
6582 	/* Calculate the DMIF Request Bandwidth */
6583 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6584 	fixed20_12 disp_clk, bandwidth;
6585 	fixed20_12 a, b;
6586 
6587 	a.full = dfixed_const(1000);
6588 	disp_clk.full = dfixed_const(wm->disp_clk);
6589 	disp_clk.full = dfixed_div(disp_clk, a);
6590 	a.full = dfixed_const(32);
6591 	b.full = dfixed_mul(a, disp_clk);
6592 
6593 	a.full = dfixed_const(10);
6594 	disp_clk_request_efficiency.full = dfixed_const(8);
6595 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6596 
6597 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6598 
6599 	return dfixed_trunc(bandwidth);
6600 }
6601 
6602 /**
6603  * dce8_available_bandwidth - get the min available bandwidth
6604  *
6605  * @wm: watermark calculation data
6606  *
6607  * Calculate the min available bandwidth used for display (CIK).
6608  * Used for display watermark bandwidth calculations
6609  * Returns the min available bandwidth in MBytes/s
6610  */
6611 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6612 {
6613 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6614 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6615 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6616 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6617 
6618 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6619 }
6620 
6621 /**
6622  * dce8_average_bandwidth - get the average available bandwidth
6623  *
6624  * @wm: watermark calculation data
6625  *
6626  * Calculate the average available bandwidth used for display (CIK).
6627  * Used for display watermark bandwidth calculations
6628  * Returns the average available bandwidth in MBytes/s
6629  */
6630 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6631 {
6632 	/* Calculate the display mode Average Bandwidth
6633 	 * DisplayMode should contain the source and destination dimensions,
6634 	 * timing, etc.
6635 	 */
6636 	fixed20_12 bpp;
6637 	fixed20_12 line_time;
6638 	fixed20_12 src_width;
6639 	fixed20_12 bandwidth;
6640 	fixed20_12 a;
6641 
6642 	a.full = dfixed_const(1000);
6643 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6644 	line_time.full = dfixed_div(line_time, a);
6645 	bpp.full = dfixed_const(wm->bytes_per_pixel);
6646 	src_width.full = dfixed_const(wm->src_width);
6647 	bandwidth.full = dfixed_mul(src_width, bpp);
6648 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6649 	bandwidth.full = dfixed_div(bandwidth, line_time);
6650 
6651 	return dfixed_trunc(bandwidth);
6652 }
6653 
6654 /**
6655  * dce8_latency_watermark - get the latency watermark
6656  *
6657  * @wm: watermark calculation data
6658  *
6659  * Calculate the latency watermark (CIK).
6660  * Used for display watermark bandwidth calculations
6661  * Returns the latency watermark in ns
6662  */
6663 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6664 {
6665 	/* First calculate the latency in ns */
6666 	u32 mc_latency = 2000; /* 2000 ns. */
6667 	u32 available_bandwidth = dce8_available_bandwidth(wm);
6668 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6669 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6670 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6671 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6672 		(wm->num_heads * cursor_line_pair_return_time);
6673 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6674 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6675 	u32 tmp, dmif_size = 12288;
6676 	fixed20_12 a, b, c;
6677 
6678 	if (wm->num_heads == 0)
6679 		return 0;
6680 
6681 	a.full = dfixed_const(2);
6682 	b.full = dfixed_const(1);
6683 	if ((wm->vsc.full > a.full) ||
6684 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6685 	    (wm->vtaps >= 5) ||
6686 	    ((wm->vsc.full >= a.full) && wm->interlaced))
6687 		max_src_lines_per_dst_line = 4;
6688 	else
6689 		max_src_lines_per_dst_line = 2;
6690 
6691 	a.full = dfixed_const(available_bandwidth);
6692 	b.full = dfixed_const(wm->num_heads);
6693 	a.full = dfixed_div(a, b);
6694 
6695 	b.full = dfixed_const(mc_latency + 512);
6696 	c.full = dfixed_const(wm->disp_clk);
6697 	b.full = dfixed_div(b, c);
6698 
6699 	c.full = dfixed_const(dmif_size);
6700 	b.full = dfixed_div(c, b);
6701 
6702 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6703 
6704 	b.full = dfixed_const(1000);
6705 	c.full = dfixed_const(wm->disp_clk);
6706 	b.full = dfixed_div(c, b);
6707 	c.full = dfixed_const(wm->bytes_per_pixel);
6708 	b.full = dfixed_mul(b, c);
6709 
6710 	lb_fill_bw = min(tmp, dfixed_trunc(b));
6711 
6712 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6713 	b.full = dfixed_const(1000);
6714 	c.full = dfixed_const(lb_fill_bw);
6715 	b.full = dfixed_div(c, b);
6716 	a.full = dfixed_div(a, b);
6717 	line_fill_time = dfixed_trunc(a);
6718 
6719 	if (line_fill_time < wm->active_time)
6720 		return latency;
6721 	else
6722 		return latency + (line_fill_time - wm->active_time);
6723 
6724 }
6725 
6726 /**
6727  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6728  * average and available dram bandwidth
6729  *
6730  * @wm: watermark calculation data
6731  *
6732  * Check if the display average bandwidth fits in the display
6733  * dram bandwidth (CIK).
6734  * Used for display watermark bandwidth calculations
6735  * Returns true if the display fits, false if not.
6736  */
6737 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6738 {
6739 	if (dce8_average_bandwidth(wm) <=
6740 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6741 		return true;
6742 	else
6743 		return false;
6744 }
6745 
6746 /**
6747  * dce8_average_bandwidth_vs_available_bandwidth - check
6748  * average and available bandwidth
6749  *
6750  * @wm: watermark calculation data
6751  *
6752  * Check if the display average bandwidth fits in the display
6753  * available bandwidth (CIK).
6754  * Used for display watermark bandwidth calculations
6755  * Returns true if the display fits, false if not.
6756  */
6757 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6758 {
6759 	if (dce8_average_bandwidth(wm) <=
6760 	    (dce8_available_bandwidth(wm) / wm->num_heads))
6761 		return true;
6762 	else
6763 		return false;
6764 }
6765 
6766 /**
6767  * dce8_check_latency_hiding - check latency hiding
6768  *
6769  * @wm: watermark calculation data
6770  *
6771  * Check latency hiding (CIK).
6772  * Used for display watermark bandwidth calculations
6773  * Returns true if the display fits, false if not.
6774  */
6775 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6776 {
6777 	u32 lb_partitions = wm->lb_size / wm->src_width;
6778 	u32 line_time = wm->active_time + wm->blank_time;
6779 	u32 latency_tolerant_lines;
6780 	u32 latency_hiding;
6781 	fixed20_12 a;
6782 
6783 	a.full = dfixed_const(1);
6784 	if (wm->vsc.full > a.full)
6785 		latency_tolerant_lines = 1;
6786 	else {
6787 		if (lb_partitions <= (wm->vtaps + 1))
6788 			latency_tolerant_lines = 1;
6789 		else
6790 			latency_tolerant_lines = 2;
6791 	}
6792 
6793 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6794 
6795 	if (dce8_latency_watermark(wm) <= latency_hiding)
6796 		return true;
6797 	else
6798 		return false;
6799 }
6800 
6801 /**
6802  * dce8_program_watermarks - program display watermarks
6803  *
6804  * @rdev: radeon_device pointer
6805  * @radeon_crtc: the selected display controller
6806  * @lb_size: line buffer size
6807  * @num_heads: number of display controllers in use
6808  *
6809  * Calculate and program the display watermarks for the
6810  * selected display controller (CIK).
6811  */
6812 static void dce8_program_watermarks(struct radeon_device *rdev,
6813 				    struct radeon_crtc *radeon_crtc,
6814 				    u32 lb_size, u32 num_heads)
6815 {
6816 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
6817 	struct dce8_wm_params wm;
6818 	u32 pixel_period;
6819 	u32 line_time = 0;
6820 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
6821 	u32 tmp, wm_mask;
6822 
6823 	if (radeon_crtc->base.enabled && num_heads && mode) {
6824 		pixel_period = 1000000 / (u32)mode->clock;
6825 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6826 
6827 		wm.yclk = rdev->pm.current_mclk * 10;
6828 		wm.sclk = rdev->pm.current_sclk * 10;
6829 		wm.disp_clk = mode->clock;
6830 		wm.src_width = mode->crtc_hdisplay;
6831 		wm.active_time = mode->crtc_hdisplay * pixel_period;
6832 		wm.blank_time = line_time - wm.active_time;
6833 		wm.interlaced = false;
6834 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6835 			wm.interlaced = true;
6836 		wm.vsc = radeon_crtc->vsc;
6837 		wm.vtaps = 1;
6838 		if (radeon_crtc->rmx_type != RMX_OFF)
6839 			wm.vtaps = 2;
6840 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6841 		wm.lb_size = lb_size;
6842 		wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6843 		wm.num_heads = num_heads;
6844 
6845 		/* set for high clocks */
6846 		latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6847 		/* set for low clocks */
6848 		/* wm.yclk = low clk; wm.sclk = low clk */
6849 		latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6850 
6851 		/* possibly force display priority to high */
6852 		/* should really do this at mode validation time... */
6853 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6854 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6855 		    !dce8_check_latency_hiding(&wm) ||
6856 		    (rdev->disp_priority == 2)) {
6857 			DRM_DEBUG_KMS("force priority to high\n");
6858 		}
6859 	}
6860 
6861 	/* select wm A */
6862 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6863 	tmp = wm_mask;
6864 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6865 	tmp |= LATENCY_WATERMARK_MASK(1);
6866 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6867 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6868 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6869 		LATENCY_HIGH_WATERMARK(line_time)));
6870 	/* select wm B */
6871 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6872 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6873 	tmp |= LATENCY_WATERMARK_MASK(2);
6874 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6875 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6876 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6877 		LATENCY_HIGH_WATERMARK(line_time)));
6878 	/* restore original selection */
6879 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6880 }
6881 
6882 /**
6883  * dce8_bandwidth_update - program display watermarks
6884  *
6885  * @rdev: radeon_device pointer
6886  *
6887  * Calculate and program the display watermarks and line
6888  * buffer allocation (CIK).
6889  */
6890 void dce8_bandwidth_update(struct radeon_device *rdev)
6891 {
6892 	struct drm_display_mode *mode = NULL;
6893 	u32 num_heads = 0, lb_size;
6894 	int i;
6895 
6896 	radeon_update_display_priority(rdev);
6897 
6898 	for (i = 0; i < rdev->num_crtc; i++) {
6899 		if (rdev->mode_info.crtcs[i]->base.enabled)
6900 			num_heads++;
6901 	}
6902 	for (i = 0; i < rdev->num_crtc; i++) {
6903 		mode = &rdev->mode_info.crtcs[i]->base.mode;
6904 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6905 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6906 	}
6907 }
6908 
6909 /**
6910  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6911  *
6912  * @rdev: radeon_device pointer
6913  *
6914  * Fetches a GPU clock counter snapshot (SI).
6915  * Returns the 64 bit clock counter snapshot.
6916  */
6917 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6918 {
6919 	uint64_t clock;
6920 
6921 	mutex_lock(&rdev->gpu_clock_mutex);
6922 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6923 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6924 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6925 	mutex_unlock(&rdev->gpu_clock_mutex);
6926 	return clock;
6927 }
6928 
6929 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6930                               u32 cntl_reg, u32 status_reg)
6931 {
6932 	int r, i;
6933 	struct atom_clock_dividers dividers;
6934 	uint32_t tmp;
6935 
6936 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6937 					   clock, false, &dividers);
6938 	if (r)
6939 		return r;
6940 
6941 	tmp = RREG32_SMC(cntl_reg);
6942 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6943 	tmp |= dividers.post_divider;
6944 	WREG32_SMC(cntl_reg, tmp);
6945 
6946 	for (i = 0; i < 100; i++) {
6947 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
6948 			break;
6949 		mdelay(10);
6950 	}
6951 	if (i == 100)
6952 		return -ETIMEDOUT;
6953 
6954 	return 0;
6955 }
6956 
6957 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6958 {
6959 	int r = 0;
6960 
6961 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6962 	if (r)
6963 		return r;
6964 
6965 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6966 	return r;
6967 }
6968 
6969 int cik_uvd_resume(struct radeon_device *rdev)
6970 {
6971 	uint64_t addr;
6972 	uint32_t size;
6973 	int r;
6974 
6975 	r = radeon_uvd_resume(rdev);
6976 	if (r)
6977 		return r;
6978 
6979 	/* programm the VCPU memory controller bits 0-27 */
6980 	addr = rdev->uvd.gpu_addr >> 3;
6981 	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
6982 	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6983 	WREG32(UVD_VCPU_CACHE_SIZE0, size);
6984 
6985 	addr += size;
6986 	size = RADEON_UVD_STACK_SIZE >> 3;
6987 	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6988 	WREG32(UVD_VCPU_CACHE_SIZE1, size);
6989 
6990 	addr += size;
6991 	size = RADEON_UVD_HEAP_SIZE >> 3;
6992 	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6993 	WREG32(UVD_VCPU_CACHE_SIZE2, size);
6994 
6995 	/* bits 28-31 */
6996 	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6997 	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6998 
6999 	/* bits 32-39 */
7000 	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7001 	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7002 
7003 	return 0;
7004 }
7005