xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 80ecbd24)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 
34 /* GFX */
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
38 /* compute */
39 #define CIK_MEC_UCODE_SIZE 4192
40 /* interrupts */
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
44 /* gddr controller */
45 #define CIK_MC_UCODE_SIZE 7866
46 /* sdma */
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
49 
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69 
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 
80 /*
81  * Indirect registers accessor
82  */
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84 {
85 	u32 r;
86 
87 	WREG32(PCIE_INDEX, reg);
88 	(void)RREG32(PCIE_INDEX);
89 	r = RREG32(PCIE_DATA);
90 	return r;
91 }
92 
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94 {
95 	WREG32(PCIE_INDEX, reg);
96 	(void)RREG32(PCIE_INDEX);
97 	WREG32(PCIE_DATA, v);
98 	(void)RREG32(PCIE_DATA);
99 }
100 
101 static const u32 bonaire_golden_spm_registers[] =
102 {
103 	0x30800, 0xe0ffffff, 0xe0000000
104 };
105 
106 static const u32 bonaire_golden_common_registers[] =
107 {
108 	0xc770, 0xffffffff, 0x00000800,
109 	0xc774, 0xffffffff, 0x00000800,
110 	0xc798, 0xffffffff, 0x00007fbf,
111 	0xc79c, 0xffffffff, 0x00007faf
112 };
113 
114 static const u32 bonaire_golden_registers[] =
115 {
116 	0x3354, 0x00000333, 0x00000333,
117 	0x3350, 0x000c0fc0, 0x00040200,
118 	0x9a10, 0x00010000, 0x00058208,
119 	0x3c000, 0xffff1fff, 0x00140000,
120 	0x3c200, 0xfdfc0fff, 0x00000100,
121 	0x3c234, 0x40000000, 0x40000200,
122 	0x9830, 0xffffffff, 0x00000000,
123 	0x9834, 0xf00fffff, 0x00000400,
124 	0x9838, 0x0002021c, 0x00020200,
125 	0xc78, 0x00000080, 0x00000000,
126 	0x5bb0, 0x000000f0, 0x00000070,
127 	0x5bc0, 0xf0311fff, 0x80300000,
128 	0x98f8, 0x73773777, 0x12010001,
129 	0x350c, 0x00810000, 0x408af000,
130 	0x7030, 0x31000111, 0x00000011,
131 	0x2f48, 0x73773777, 0x12010001,
132 	0x220c, 0x00007fb6, 0x0021a1b1,
133 	0x2210, 0x00007fb6, 0x002021b1,
134 	0x2180, 0x00007fb6, 0x00002191,
135 	0x2218, 0x00007fb6, 0x002121b1,
136 	0x221c, 0x00007fb6, 0x002021b1,
137 	0x21dc, 0x00007fb6, 0x00002191,
138 	0x21e0, 0x00007fb6, 0x00002191,
139 	0x3628, 0x0000003f, 0x0000000a,
140 	0x362c, 0x0000003f, 0x0000000a,
141 	0x2ae4, 0x00073ffe, 0x000022a2,
142 	0x240c, 0x000007ff, 0x00000000,
143 	0x8a14, 0xf000003f, 0x00000007,
144 	0x8bf0, 0x00002001, 0x00000001,
145 	0x8b24, 0xffffffff, 0x00ffffff,
146 	0x30a04, 0x0000ff0f, 0x00000000,
147 	0x28a4c, 0x07ffffff, 0x06000000,
148 	0x4d8, 0x00000fff, 0x00000100,
149 	0x3e78, 0x00000001, 0x00000002,
150 	0x9100, 0x03000000, 0x0362c688,
151 	0x8c00, 0x000000ff, 0x00000001,
152 	0xe40, 0x00001fff, 0x00001fff,
153 	0x9060, 0x0000007f, 0x00000020,
154 	0x9508, 0x00010000, 0x00010000,
155 	0xac14, 0x000003ff, 0x000000f3,
156 	0xac0c, 0xffffffff, 0x00001032
157 };
158 
159 static const u32 bonaire_mgcg_cgcg_init[] =
160 {
161 	0xc420, 0xffffffff, 0xfffffffc,
162 	0x30800, 0xffffffff, 0xe0000000,
163 	0x3c2a0, 0xffffffff, 0x00000100,
164 	0x3c208, 0xffffffff, 0x00000100,
165 	0x3c2c0, 0xffffffff, 0xc0000100,
166 	0x3c2c8, 0xffffffff, 0xc0000100,
167 	0x3c2c4, 0xffffffff, 0xc0000100,
168 	0x55e4, 0xffffffff, 0x00600100,
169 	0x3c280, 0xffffffff, 0x00000100,
170 	0x3c214, 0xffffffff, 0x06000100,
171 	0x3c220, 0xffffffff, 0x00000100,
172 	0x3c218, 0xffffffff, 0x06000100,
173 	0x3c204, 0xffffffff, 0x00000100,
174 	0x3c2e0, 0xffffffff, 0x00000100,
175 	0x3c224, 0xffffffff, 0x00000100,
176 	0x3c200, 0xffffffff, 0x00000100,
177 	0x3c230, 0xffffffff, 0x00000100,
178 	0x3c234, 0xffffffff, 0x00000100,
179 	0x3c250, 0xffffffff, 0x00000100,
180 	0x3c254, 0xffffffff, 0x00000100,
181 	0x3c258, 0xffffffff, 0x00000100,
182 	0x3c25c, 0xffffffff, 0x00000100,
183 	0x3c260, 0xffffffff, 0x00000100,
184 	0x3c27c, 0xffffffff, 0x00000100,
185 	0x3c278, 0xffffffff, 0x00000100,
186 	0x3c210, 0xffffffff, 0x06000100,
187 	0x3c290, 0xffffffff, 0x00000100,
188 	0x3c274, 0xffffffff, 0x00000100,
189 	0x3c2b4, 0xffffffff, 0x00000100,
190 	0x3c2b0, 0xffffffff, 0x00000100,
191 	0x3c270, 0xffffffff, 0x00000100,
192 	0x30800, 0xffffffff, 0xe0000000,
193 	0x3c020, 0xffffffff, 0x00010000,
194 	0x3c024, 0xffffffff, 0x00030002,
195 	0x3c028, 0xffffffff, 0x00040007,
196 	0x3c02c, 0xffffffff, 0x00060005,
197 	0x3c030, 0xffffffff, 0x00090008,
198 	0x3c034, 0xffffffff, 0x00010000,
199 	0x3c038, 0xffffffff, 0x00030002,
200 	0x3c03c, 0xffffffff, 0x00040007,
201 	0x3c040, 0xffffffff, 0x00060005,
202 	0x3c044, 0xffffffff, 0x00090008,
203 	0x3c048, 0xffffffff, 0x00010000,
204 	0x3c04c, 0xffffffff, 0x00030002,
205 	0x3c050, 0xffffffff, 0x00040007,
206 	0x3c054, 0xffffffff, 0x00060005,
207 	0x3c058, 0xffffffff, 0x00090008,
208 	0x3c05c, 0xffffffff, 0x00010000,
209 	0x3c060, 0xffffffff, 0x00030002,
210 	0x3c064, 0xffffffff, 0x00040007,
211 	0x3c068, 0xffffffff, 0x00060005,
212 	0x3c06c, 0xffffffff, 0x00090008,
213 	0x3c070, 0xffffffff, 0x00010000,
214 	0x3c074, 0xffffffff, 0x00030002,
215 	0x3c078, 0xffffffff, 0x00040007,
216 	0x3c07c, 0xffffffff, 0x00060005,
217 	0x3c080, 0xffffffff, 0x00090008,
218 	0x3c084, 0xffffffff, 0x00010000,
219 	0x3c088, 0xffffffff, 0x00030002,
220 	0x3c08c, 0xffffffff, 0x00040007,
221 	0x3c090, 0xffffffff, 0x00060005,
222 	0x3c094, 0xffffffff, 0x00090008,
223 	0x3c098, 0xffffffff, 0x00010000,
224 	0x3c09c, 0xffffffff, 0x00030002,
225 	0x3c0a0, 0xffffffff, 0x00040007,
226 	0x3c0a4, 0xffffffff, 0x00060005,
227 	0x3c0a8, 0xffffffff, 0x00090008,
228 	0x3c000, 0xffffffff, 0x96e00200,
229 	0x8708, 0xffffffff, 0x00900100,
230 	0xc424, 0xffffffff, 0x0020003f,
231 	0x38, 0xffffffff, 0x0140001c,
232 	0x3c, 0x000f0000, 0x000f0000,
233 	0x220, 0xffffffff, 0xC060000C,
234 	0x224, 0xc0000fff, 0x00000100,
235 	0xf90, 0xffffffff, 0x00000100,
236 	0xf98, 0x00000101, 0x00000000,
237 	0x20a8, 0xffffffff, 0x00000104,
238 	0x55e4, 0xff000fff, 0x00000100,
239 	0x30cc, 0xc0000fff, 0x00000104,
240 	0xc1e4, 0x00000001, 0x00000001,
241 	0xd00c, 0xff000ff0, 0x00000100,
242 	0xd80c, 0xff000ff0, 0x00000100
243 };
244 
245 static const u32 spectre_golden_spm_registers[] =
246 {
247 	0x30800, 0xe0ffffff, 0xe0000000
248 };
249 
250 static const u32 spectre_golden_common_registers[] =
251 {
252 	0xc770, 0xffffffff, 0x00000800,
253 	0xc774, 0xffffffff, 0x00000800,
254 	0xc798, 0xffffffff, 0x00007fbf,
255 	0xc79c, 0xffffffff, 0x00007faf
256 };
257 
258 static const u32 spectre_golden_registers[] =
259 {
260 	0x3c000, 0xffff1fff, 0x96940200,
261 	0x3c00c, 0xffff0001, 0xff000000,
262 	0x3c200, 0xfffc0fff, 0x00000100,
263 	0x6ed8, 0x00010101, 0x00010000,
264 	0x9834, 0xf00fffff, 0x00000400,
265 	0x9838, 0xfffffffc, 0x00020200,
266 	0x5bb0, 0x000000f0, 0x00000070,
267 	0x5bc0, 0xf0311fff, 0x80300000,
268 	0x98f8, 0x73773777, 0x12010001,
269 	0x9b7c, 0x00ff0000, 0x00fc0000,
270 	0x2f48, 0x73773777, 0x12010001,
271 	0x8a14, 0xf000003f, 0x00000007,
272 	0x8b24, 0xffffffff, 0x00ffffff,
273 	0x28350, 0x3f3f3fff, 0x00000082,
274 	0x28355, 0x0000003f, 0x00000000,
275 	0x3e78, 0x00000001, 0x00000002,
276 	0x913c, 0xffff03df, 0x00000004,
277 	0xc768, 0x00000008, 0x00000008,
278 	0x8c00, 0x000008ff, 0x00000800,
279 	0x9508, 0x00010000, 0x00010000,
280 	0xac0c, 0xffffffff, 0x54763210,
281 	0x214f8, 0x01ff01ff, 0x00000002,
282 	0x21498, 0x007ff800, 0x00200000,
283 	0x2015c, 0xffffffff, 0x00000f40,
284 	0x30934, 0xffffffff, 0x00000001
285 };
286 
287 static const u32 spectre_mgcg_cgcg_init[] =
288 {
289 	0xc420, 0xffffffff, 0xfffffffc,
290 	0x30800, 0xffffffff, 0xe0000000,
291 	0x3c2a0, 0xffffffff, 0x00000100,
292 	0x3c208, 0xffffffff, 0x00000100,
293 	0x3c2c0, 0xffffffff, 0x00000100,
294 	0x3c2c8, 0xffffffff, 0x00000100,
295 	0x3c2c4, 0xffffffff, 0x00000100,
296 	0x55e4, 0xffffffff, 0x00600100,
297 	0x3c280, 0xffffffff, 0x00000100,
298 	0x3c214, 0xffffffff, 0x06000100,
299 	0x3c220, 0xffffffff, 0x00000100,
300 	0x3c218, 0xffffffff, 0x06000100,
301 	0x3c204, 0xffffffff, 0x00000100,
302 	0x3c2e0, 0xffffffff, 0x00000100,
303 	0x3c224, 0xffffffff, 0x00000100,
304 	0x3c200, 0xffffffff, 0x00000100,
305 	0x3c230, 0xffffffff, 0x00000100,
306 	0x3c234, 0xffffffff, 0x00000100,
307 	0x3c250, 0xffffffff, 0x00000100,
308 	0x3c254, 0xffffffff, 0x00000100,
309 	0x3c258, 0xffffffff, 0x00000100,
310 	0x3c25c, 0xffffffff, 0x00000100,
311 	0x3c260, 0xffffffff, 0x00000100,
312 	0x3c27c, 0xffffffff, 0x00000100,
313 	0x3c278, 0xffffffff, 0x00000100,
314 	0x3c210, 0xffffffff, 0x06000100,
315 	0x3c290, 0xffffffff, 0x00000100,
316 	0x3c274, 0xffffffff, 0x00000100,
317 	0x3c2b4, 0xffffffff, 0x00000100,
318 	0x3c2b0, 0xffffffff, 0x00000100,
319 	0x3c270, 0xffffffff, 0x00000100,
320 	0x30800, 0xffffffff, 0xe0000000,
321 	0x3c020, 0xffffffff, 0x00010000,
322 	0x3c024, 0xffffffff, 0x00030002,
323 	0x3c028, 0xffffffff, 0x00040007,
324 	0x3c02c, 0xffffffff, 0x00060005,
325 	0x3c030, 0xffffffff, 0x00090008,
326 	0x3c034, 0xffffffff, 0x00010000,
327 	0x3c038, 0xffffffff, 0x00030002,
328 	0x3c03c, 0xffffffff, 0x00040007,
329 	0x3c040, 0xffffffff, 0x00060005,
330 	0x3c044, 0xffffffff, 0x00090008,
331 	0x3c048, 0xffffffff, 0x00010000,
332 	0x3c04c, 0xffffffff, 0x00030002,
333 	0x3c050, 0xffffffff, 0x00040007,
334 	0x3c054, 0xffffffff, 0x00060005,
335 	0x3c058, 0xffffffff, 0x00090008,
336 	0x3c05c, 0xffffffff, 0x00010000,
337 	0x3c060, 0xffffffff, 0x00030002,
338 	0x3c064, 0xffffffff, 0x00040007,
339 	0x3c068, 0xffffffff, 0x00060005,
340 	0x3c06c, 0xffffffff, 0x00090008,
341 	0x3c070, 0xffffffff, 0x00010000,
342 	0x3c074, 0xffffffff, 0x00030002,
343 	0x3c078, 0xffffffff, 0x00040007,
344 	0x3c07c, 0xffffffff, 0x00060005,
345 	0x3c080, 0xffffffff, 0x00090008,
346 	0x3c084, 0xffffffff, 0x00010000,
347 	0x3c088, 0xffffffff, 0x00030002,
348 	0x3c08c, 0xffffffff, 0x00040007,
349 	0x3c090, 0xffffffff, 0x00060005,
350 	0x3c094, 0xffffffff, 0x00090008,
351 	0x3c098, 0xffffffff, 0x00010000,
352 	0x3c09c, 0xffffffff, 0x00030002,
353 	0x3c0a0, 0xffffffff, 0x00040007,
354 	0x3c0a4, 0xffffffff, 0x00060005,
355 	0x3c0a8, 0xffffffff, 0x00090008,
356 	0x3c0ac, 0xffffffff, 0x00010000,
357 	0x3c0b0, 0xffffffff, 0x00030002,
358 	0x3c0b4, 0xffffffff, 0x00040007,
359 	0x3c0b8, 0xffffffff, 0x00060005,
360 	0x3c0bc, 0xffffffff, 0x00090008,
361 	0x3c000, 0xffffffff, 0x96e00200,
362 	0x8708, 0xffffffff, 0x00900100,
363 	0xc424, 0xffffffff, 0x0020003f,
364 	0x38, 0xffffffff, 0x0140001c,
365 	0x3c, 0x000f0000, 0x000f0000,
366 	0x220, 0xffffffff, 0xC060000C,
367 	0x224, 0xc0000fff, 0x00000100,
368 	0xf90, 0xffffffff, 0x00000100,
369 	0xf98, 0x00000101, 0x00000000,
370 	0x20a8, 0xffffffff, 0x00000104,
371 	0x55e4, 0xff000fff, 0x00000100,
372 	0x30cc, 0xc0000fff, 0x00000104,
373 	0xc1e4, 0x00000001, 0x00000001,
374 	0xd00c, 0xff000ff0, 0x00000100,
375 	0xd80c, 0xff000ff0, 0x00000100
376 };
377 
378 static const u32 kalindi_golden_spm_registers[] =
379 {
380 	0x30800, 0xe0ffffff, 0xe0000000
381 };
382 
383 static const u32 kalindi_golden_common_registers[] =
384 {
385 	0xc770, 0xffffffff, 0x00000800,
386 	0xc774, 0xffffffff, 0x00000800,
387 	0xc798, 0xffffffff, 0x00007fbf,
388 	0xc79c, 0xffffffff, 0x00007faf
389 };
390 
391 static const u32 kalindi_golden_registers[] =
392 {
393 	0x3c000, 0xffffdfff, 0x6e944040,
394 	0x55e4, 0xff607fff, 0xfc000100,
395 	0x3c220, 0xff000fff, 0x00000100,
396 	0x3c224, 0xff000fff, 0x00000100,
397 	0x3c200, 0xfffc0fff, 0x00000100,
398 	0x6ed8, 0x00010101, 0x00010000,
399 	0x9830, 0xffffffff, 0x00000000,
400 	0x9834, 0xf00fffff, 0x00000400,
401 	0x5bb0, 0x000000f0, 0x00000070,
402 	0x5bc0, 0xf0311fff, 0x80300000,
403 	0x98f8, 0x73773777, 0x12010001,
404 	0x98fc, 0xffffffff, 0x00000010,
405 	0x9b7c, 0x00ff0000, 0x00fc0000,
406 	0x8030, 0x00001f0f, 0x0000100a,
407 	0x2f48, 0x73773777, 0x12010001,
408 	0x2408, 0x000fffff, 0x000c007f,
409 	0x8a14, 0xf000003f, 0x00000007,
410 	0x8b24, 0x3fff3fff, 0x00ffcfff,
411 	0x30a04, 0x0000ff0f, 0x00000000,
412 	0x28a4c, 0x07ffffff, 0x06000000,
413 	0x4d8, 0x00000fff, 0x00000100,
414 	0x3e78, 0x00000001, 0x00000002,
415 	0xc768, 0x00000008, 0x00000008,
416 	0x8c00, 0x000000ff, 0x00000003,
417 	0x214f8, 0x01ff01ff, 0x00000002,
418 	0x21498, 0x007ff800, 0x00200000,
419 	0x2015c, 0xffffffff, 0x00000f40,
420 	0x88c4, 0x001f3ae3, 0x00000082,
421 	0x88d4, 0x0000001f, 0x00000010,
422 	0x30934, 0xffffffff, 0x00000000
423 };
424 
425 static const u32 kalindi_mgcg_cgcg_init[] =
426 {
427 	0xc420, 0xffffffff, 0xfffffffc,
428 	0x30800, 0xffffffff, 0xe0000000,
429 	0x3c2a0, 0xffffffff, 0x00000100,
430 	0x3c208, 0xffffffff, 0x00000100,
431 	0x3c2c0, 0xffffffff, 0x00000100,
432 	0x3c2c8, 0xffffffff, 0x00000100,
433 	0x3c2c4, 0xffffffff, 0x00000100,
434 	0x55e4, 0xffffffff, 0x00600100,
435 	0x3c280, 0xffffffff, 0x00000100,
436 	0x3c214, 0xffffffff, 0x06000100,
437 	0x3c220, 0xffffffff, 0x00000100,
438 	0x3c218, 0xffffffff, 0x06000100,
439 	0x3c204, 0xffffffff, 0x00000100,
440 	0x3c2e0, 0xffffffff, 0x00000100,
441 	0x3c224, 0xffffffff, 0x00000100,
442 	0x3c200, 0xffffffff, 0x00000100,
443 	0x3c230, 0xffffffff, 0x00000100,
444 	0x3c234, 0xffffffff, 0x00000100,
445 	0x3c250, 0xffffffff, 0x00000100,
446 	0x3c254, 0xffffffff, 0x00000100,
447 	0x3c258, 0xffffffff, 0x00000100,
448 	0x3c25c, 0xffffffff, 0x00000100,
449 	0x3c260, 0xffffffff, 0x00000100,
450 	0x3c27c, 0xffffffff, 0x00000100,
451 	0x3c278, 0xffffffff, 0x00000100,
452 	0x3c210, 0xffffffff, 0x06000100,
453 	0x3c290, 0xffffffff, 0x00000100,
454 	0x3c274, 0xffffffff, 0x00000100,
455 	0x3c2b4, 0xffffffff, 0x00000100,
456 	0x3c2b0, 0xffffffff, 0x00000100,
457 	0x3c270, 0xffffffff, 0x00000100,
458 	0x30800, 0xffffffff, 0xe0000000,
459 	0x3c020, 0xffffffff, 0x00010000,
460 	0x3c024, 0xffffffff, 0x00030002,
461 	0x3c028, 0xffffffff, 0x00040007,
462 	0x3c02c, 0xffffffff, 0x00060005,
463 	0x3c030, 0xffffffff, 0x00090008,
464 	0x3c034, 0xffffffff, 0x00010000,
465 	0x3c038, 0xffffffff, 0x00030002,
466 	0x3c03c, 0xffffffff, 0x00040007,
467 	0x3c040, 0xffffffff, 0x00060005,
468 	0x3c044, 0xffffffff, 0x00090008,
469 	0x3c000, 0xffffffff, 0x96e00200,
470 	0x8708, 0xffffffff, 0x00900100,
471 	0xc424, 0xffffffff, 0x0020003f,
472 	0x38, 0xffffffff, 0x0140001c,
473 	0x3c, 0x000f0000, 0x000f0000,
474 	0x220, 0xffffffff, 0xC060000C,
475 	0x224, 0xc0000fff, 0x00000100,
476 	0x20a8, 0xffffffff, 0x00000104,
477 	0x55e4, 0xff000fff, 0x00000100,
478 	0x30cc, 0xc0000fff, 0x00000104,
479 	0xc1e4, 0x00000001, 0x00000001,
480 	0xd00c, 0xff000ff0, 0x00000100,
481 	0xd80c, 0xff000ff0, 0x00000100
482 };
483 
484 static void cik_init_golden_registers(struct radeon_device *rdev)
485 {
486 	switch (rdev->family) {
487 	case CHIP_BONAIRE:
488 		radeon_program_register_sequence(rdev,
489 						 bonaire_mgcg_cgcg_init,
490 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491 		radeon_program_register_sequence(rdev,
492 						 bonaire_golden_registers,
493 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
494 		radeon_program_register_sequence(rdev,
495 						 bonaire_golden_common_registers,
496 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497 		radeon_program_register_sequence(rdev,
498 						 bonaire_golden_spm_registers,
499 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500 		break;
501 	case CHIP_KABINI:
502 		radeon_program_register_sequence(rdev,
503 						 kalindi_mgcg_cgcg_init,
504 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505 		radeon_program_register_sequence(rdev,
506 						 kalindi_golden_registers,
507 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
508 		radeon_program_register_sequence(rdev,
509 						 kalindi_golden_common_registers,
510 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511 		radeon_program_register_sequence(rdev,
512 						 kalindi_golden_spm_registers,
513 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514 		break;
515 	case CHIP_KAVERI:
516 		radeon_program_register_sequence(rdev,
517 						 spectre_mgcg_cgcg_init,
518 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519 		radeon_program_register_sequence(rdev,
520 						 spectre_golden_registers,
521 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
522 		radeon_program_register_sequence(rdev,
523 						 spectre_golden_common_registers,
524 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525 		radeon_program_register_sequence(rdev,
526 						 spectre_golden_spm_registers,
527 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528 		break;
529 	default:
530 		break;
531 	}
532 }
533 
534 /**
535  * cik_get_xclk - get the xclk
536  *
537  * @rdev: radeon_device pointer
538  *
539  * Returns the reference clock used by the gfx engine
540  * (CIK).
541  */
542 u32 cik_get_xclk(struct radeon_device *rdev)
543 {
544         u32 reference_clock = rdev->clock.spll.reference_freq;
545 
546 	if (rdev->flags & RADEON_IS_IGP) {
547 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548 			return reference_clock / 2;
549 	} else {
550 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551 			return reference_clock / 4;
552 	}
553 	return reference_clock;
554 }
555 
556 /**
557  * cik_mm_rdoorbell - read a doorbell dword
558  *
559  * @rdev: radeon_device pointer
560  * @offset: byte offset into the aperture
561  *
562  * Returns the value in the doorbell aperture at the
563  * requested offset (CIK).
564  */
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566 {
567 	if (offset < rdev->doorbell.size) {
568 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569 	} else {
570 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571 		return 0;
572 	}
573 }
574 
575 /**
576  * cik_mm_wdoorbell - write a doorbell dword
577  *
578  * @rdev: radeon_device pointer
579  * @offset: byte offset into the aperture
580  * @v: value to write
581  *
582  * Writes @v to the doorbell aperture at the
583  * requested offset (CIK).
584  */
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586 {
587 	if (offset < rdev->doorbell.size) {
588 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589 	} else {
590 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591 	}
592 }
593 
594 #define BONAIRE_IO_MC_REGS_SIZE 36
595 
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597 {
598 	{0x00000070, 0x04400000},
599 	{0x00000071, 0x80c01803},
600 	{0x00000072, 0x00004004},
601 	{0x00000073, 0x00000100},
602 	{0x00000074, 0x00ff0000},
603 	{0x00000075, 0x34000000},
604 	{0x00000076, 0x08000014},
605 	{0x00000077, 0x00cc08ec},
606 	{0x00000078, 0x00000400},
607 	{0x00000079, 0x00000000},
608 	{0x0000007a, 0x04090000},
609 	{0x0000007c, 0x00000000},
610 	{0x0000007e, 0x4408a8e8},
611 	{0x0000007f, 0x00000304},
612 	{0x00000080, 0x00000000},
613 	{0x00000082, 0x00000001},
614 	{0x00000083, 0x00000002},
615 	{0x00000084, 0xf3e4f400},
616 	{0x00000085, 0x052024e3},
617 	{0x00000087, 0x00000000},
618 	{0x00000088, 0x01000000},
619 	{0x0000008a, 0x1c0a0000},
620 	{0x0000008b, 0xff010000},
621 	{0x0000008d, 0xffffefff},
622 	{0x0000008e, 0xfff3efff},
623 	{0x0000008f, 0xfff3efbf},
624 	{0x00000092, 0xf7ffffff},
625 	{0x00000093, 0xffffff7f},
626 	{0x00000095, 0x00101101},
627 	{0x00000096, 0x00000fff},
628 	{0x00000097, 0x00116fff},
629 	{0x00000098, 0x60010000},
630 	{0x00000099, 0x10010000},
631 	{0x0000009a, 0x00006000},
632 	{0x0000009b, 0x00001000},
633 	{0x0000009f, 0x00b48000}
634 };
635 
636 /**
637  * cik_srbm_select - select specific register instances
638  *
639  * @rdev: radeon_device pointer
640  * @me: selected ME (micro engine)
641  * @pipe: pipe
642  * @queue: queue
643  * @vmid: VMID
644  *
645  * Switches the currently active registers instances.  Some
646  * registers are instanced per VMID, others are instanced per
647  * me/pipe/queue combination.
648  */
649 static void cik_srbm_select(struct radeon_device *rdev,
650 			    u32 me, u32 pipe, u32 queue, u32 vmid)
651 {
652 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653 			     MEID(me & 0x3) |
654 			     VMID(vmid & 0xf) |
655 			     QUEUEID(queue & 0x7));
656 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657 }
658 
659 /* ucode loading */
660 /**
661  * ci_mc_load_microcode - load MC ucode into the hw
662  *
663  * @rdev: radeon_device pointer
664  *
665  * Load the GDDR MC ucode into the hw (CIK).
666  * Returns 0 on success, error on failure.
667  */
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
669 {
670 	const __be32 *fw_data;
671 	u32 running, blackout = 0;
672 	u32 *io_mc_regs;
673 	int i, ucode_size, regs_size;
674 
675 	if (!rdev->mc_fw)
676 		return -EINVAL;
677 
678 	switch (rdev->family) {
679 	case CHIP_BONAIRE:
680 	default:
681 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682 		ucode_size = CIK_MC_UCODE_SIZE;
683 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
684 		break;
685 	}
686 
687 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688 
689 	if (running == 0) {
690 		if (running) {
691 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693 		}
694 
695 		/* reset the engine and set to writable */
696 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698 
699 		/* load mc io regs */
700 		for (i = 0; i < regs_size; i++) {
701 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703 		}
704 		/* load the MC ucode */
705 		fw_data = (const __be32 *)rdev->mc_fw->data;
706 		for (i = 0; i < ucode_size; i++)
707 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708 
709 		/* put the engine back into the active state */
710 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713 
714 		/* wait for training to complete */
715 		for (i = 0; i < rdev->usec_timeout; i++) {
716 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717 				break;
718 			udelay(1);
719 		}
720 		for (i = 0; i < rdev->usec_timeout; i++) {
721 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722 				break;
723 			udelay(1);
724 		}
725 
726 		if (running)
727 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728 	}
729 
730 	return 0;
731 }
732 
733 /**
734  * cik_init_microcode - load ucode images from disk
735  *
736  * @rdev: radeon_device pointer
737  *
738  * Use the firmware interface to load the ucode images into
739  * the driver (not loaded into hw).
740  * Returns 0 on success, error on failure.
741  */
742 static int cik_init_microcode(struct radeon_device *rdev)
743 {
744 	const char *chip_name;
745 	size_t pfp_req_size, me_req_size, ce_req_size,
746 		mec_req_size, rlc_req_size, mc_req_size,
747 		sdma_req_size;
748 	char fw_name[30];
749 	int err;
750 
751 	DRM_DEBUG("\n");
752 
753 	switch (rdev->family) {
754 	case CHIP_BONAIRE:
755 		chip_name = "BONAIRE";
756 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757 		me_req_size = CIK_ME_UCODE_SIZE * 4;
758 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
759 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
762 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763 		break;
764 	case CHIP_KAVERI:
765 		chip_name = "KAVERI";
766 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 		me_req_size = CIK_ME_UCODE_SIZE * 4;
768 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772 		break;
773 	case CHIP_KABINI:
774 		chip_name = "KABINI";
775 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 		me_req_size = CIK_ME_UCODE_SIZE * 4;
777 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781 		break;
782 	default: BUG();
783 	}
784 
785 	DRM_INFO("Loading %s Microcode\n", chip_name);
786 
787 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
789 	if (err)
790 		goto out;
791 	if (rdev->pfp_fw->size != pfp_req_size) {
792 		printk(KERN_ERR
793 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 		       rdev->pfp_fw->size, fw_name);
795 		err = -EINVAL;
796 		goto out;
797 	}
798 
799 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
801 	if (err)
802 		goto out;
803 	if (rdev->me_fw->size != me_req_size) {
804 		printk(KERN_ERR
805 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 		       rdev->me_fw->size, fw_name);
807 		err = -EINVAL;
808 	}
809 
810 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
812 	if (err)
813 		goto out;
814 	if (rdev->ce_fw->size != ce_req_size) {
815 		printk(KERN_ERR
816 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 		       rdev->ce_fw->size, fw_name);
818 		err = -EINVAL;
819 	}
820 
821 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
823 	if (err)
824 		goto out;
825 	if (rdev->mec_fw->size != mec_req_size) {
826 		printk(KERN_ERR
827 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 		       rdev->mec_fw->size, fw_name);
829 		err = -EINVAL;
830 	}
831 
832 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
834 	if (err)
835 		goto out;
836 	if (rdev->rlc_fw->size != rlc_req_size) {
837 		printk(KERN_ERR
838 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 		       rdev->rlc_fw->size, fw_name);
840 		err = -EINVAL;
841 	}
842 
843 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
845 	if (err)
846 		goto out;
847 	if (rdev->sdma_fw->size != sdma_req_size) {
848 		printk(KERN_ERR
849 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 		       rdev->sdma_fw->size, fw_name);
851 		err = -EINVAL;
852 	}
853 
854 	/* No MC ucode on APUs */
855 	if (!(rdev->flags & RADEON_IS_IGP)) {
856 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
858 		if (err)
859 			goto out;
860 		if (rdev->mc_fw->size != mc_req_size) {
861 			printk(KERN_ERR
862 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 			       rdev->mc_fw->size, fw_name);
864 			err = -EINVAL;
865 		}
866 	}
867 
868 out:
869 	if (err) {
870 		if (err != -EINVAL)
871 			printk(KERN_ERR
872 			       "cik_cp: Failed to load firmware \"%s\"\n",
873 			       fw_name);
874 		release_firmware(rdev->pfp_fw);
875 		rdev->pfp_fw = NULL;
876 		release_firmware(rdev->me_fw);
877 		rdev->me_fw = NULL;
878 		release_firmware(rdev->ce_fw);
879 		rdev->ce_fw = NULL;
880 		release_firmware(rdev->rlc_fw);
881 		rdev->rlc_fw = NULL;
882 		release_firmware(rdev->mc_fw);
883 		rdev->mc_fw = NULL;
884 	}
885 	return err;
886 }
887 
888 /*
889  * Core functions
890  */
891 /**
892  * cik_tiling_mode_table_init - init the hw tiling table
893  *
894  * @rdev: radeon_device pointer
895  *
896  * Starting with SI, the tiling setup is done globally in a
897  * set of 32 tiling modes.  Rather than selecting each set of
898  * parameters per surface as on older asics, we just select
899  * which index in the tiling table we want to use, and the
900  * surface uses those parameters (CIK).
901  */
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903 {
904 	const u32 num_tile_mode_states = 32;
905 	const u32 num_secondary_tile_mode_states = 16;
906 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907 	u32 num_pipe_configs;
908 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
909 		rdev->config.cik.max_shader_engines;
910 
911 	switch (rdev->config.cik.mem_row_size_in_kb) {
912 	case 1:
913 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914 		break;
915 	case 2:
916 	default:
917 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918 		break;
919 	case 4:
920 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921 		break;
922 	}
923 
924 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
925 	if (num_pipe_configs > 8)
926 		num_pipe_configs = 8; /* ??? */
927 
928 	if (num_pipe_configs == 8) {
929 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930 			switch (reg_offset) {
931 			case 0:
932 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936 				break;
937 			case 1:
938 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942 				break;
943 			case 2:
944 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948 				break;
949 			case 3:
950 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954 				break;
955 			case 4:
956 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959 						 TILE_SPLIT(split_equal_to_row_size));
960 				break;
961 			case 5:
962 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964 				break;
965 			case 6:
966 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970 				break;
971 			case 7:
972 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 						 TILE_SPLIT(split_equal_to_row_size));
976 				break;
977 			case 8:
978 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980 				break;
981 			case 9:
982 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984 				break;
985 			case 10:
986 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990 				break;
991 			case 11:
992 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996 				break;
997 			case 12:
998 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002 				break;
1003 			case 13:
1004 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006 				break;
1007 			case 14:
1008 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 				break;
1013 			case 16:
1014 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018 				break;
1019 			case 17:
1020 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 				break;
1025 			case 27:
1026 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028 				break;
1029 			case 28:
1030 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 				break;
1035 			case 29:
1036 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040 				break;
1041 			case 30:
1042 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 				break;
1047 			default:
1048 				gb_tile_moden = 0;
1049 				break;
1050 			}
1051 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053 		}
1054 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055 			switch (reg_offset) {
1056 			case 0:
1057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060 						 NUM_BANKS(ADDR_SURF_16_BANK));
1061 				break;
1062 			case 1:
1063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 						 NUM_BANKS(ADDR_SURF_16_BANK));
1067 				break;
1068 			case 2:
1069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072 						 NUM_BANKS(ADDR_SURF_16_BANK));
1073 				break;
1074 			case 3:
1075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078 						 NUM_BANKS(ADDR_SURF_16_BANK));
1079 				break;
1080 			case 4:
1081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 						 NUM_BANKS(ADDR_SURF_8_BANK));
1085 				break;
1086 			case 5:
1087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090 						 NUM_BANKS(ADDR_SURF_4_BANK));
1091 				break;
1092 			case 6:
1093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096 						 NUM_BANKS(ADDR_SURF_2_BANK));
1097 				break;
1098 			case 8:
1099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102 						 NUM_BANKS(ADDR_SURF_16_BANK));
1103 				break;
1104 			case 9:
1105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108 						 NUM_BANKS(ADDR_SURF_16_BANK));
1109 				break;
1110 			case 10:
1111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114 						 NUM_BANKS(ADDR_SURF_16_BANK));
1115 				break;
1116 			case 11:
1117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120 						 NUM_BANKS(ADDR_SURF_16_BANK));
1121 				break;
1122 			case 12:
1123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 						 NUM_BANKS(ADDR_SURF_8_BANK));
1127 				break;
1128 			case 13:
1129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132 						 NUM_BANKS(ADDR_SURF_4_BANK));
1133 				break;
1134 			case 14:
1135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138 						 NUM_BANKS(ADDR_SURF_2_BANK));
1139 				break;
1140 			default:
1141 				gb_tile_moden = 0;
1142 				break;
1143 			}
1144 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145 		}
1146 	} else if (num_pipe_configs == 4) {
1147 		if (num_rbs == 4) {
1148 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149 				switch (reg_offset) {
1150 				case 0:
1151 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155 					break;
1156 				case 1:
1157 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161 					break;
1162 				case 2:
1163 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167 					break;
1168 				case 3:
1169 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173 					break;
1174 				case 4:
1175 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178 							 TILE_SPLIT(split_equal_to_row_size));
1179 					break;
1180 				case 5:
1181 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183 					break;
1184 				case 6:
1185 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189 					break;
1190 				case 7:
1191 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 							 TILE_SPLIT(split_equal_to_row_size));
1195 					break;
1196 				case 8:
1197 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199 					break;
1200 				case 9:
1201 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203 					break;
1204 				case 10:
1205 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209 					break;
1210 				case 11:
1211 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215 					break;
1216 				case 12:
1217 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221 					break;
1222 				case 13:
1223 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225 					break;
1226 				case 14:
1227 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 					break;
1232 				case 16:
1233 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237 					break;
1238 				case 17:
1239 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243 					break;
1244 				case 27:
1245 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247 					break;
1248 				case 28:
1249 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 					break;
1254 				case 29:
1255 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259 					break;
1260 				case 30:
1261 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 					break;
1266 				default:
1267 					gb_tile_moden = 0;
1268 					break;
1269 				}
1270 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272 			}
1273 		} else if (num_rbs < 4) {
1274 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275 				switch (reg_offset) {
1276 				case 0:
1277 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281 					break;
1282 				case 1:
1283 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287 					break;
1288 				case 2:
1289 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293 					break;
1294 				case 3:
1295 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299 					break;
1300 				case 4:
1301 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304 							 TILE_SPLIT(split_equal_to_row_size));
1305 					break;
1306 				case 5:
1307 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309 					break;
1310 				case 6:
1311 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315 					break;
1316 				case 7:
1317 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 							 TILE_SPLIT(split_equal_to_row_size));
1321 					break;
1322 				case 8:
1323 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325 					break;
1326 				case 9:
1327 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329 					break;
1330 				case 10:
1331 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335 					break;
1336 				case 11:
1337 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341 					break;
1342 				case 12:
1343 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347 					break;
1348 				case 13:
1349 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351 					break;
1352 				case 14:
1353 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 					break;
1358 				case 16:
1359 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363 					break;
1364 				case 17:
1365 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369 					break;
1370 				case 27:
1371 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373 					break;
1374 				case 28:
1375 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379 					break;
1380 				case 29:
1381 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385 					break;
1386 				case 30:
1387 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 					break;
1392 				default:
1393 					gb_tile_moden = 0;
1394 					break;
1395 				}
1396 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398 			}
1399 		}
1400 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 			switch (reg_offset) {
1402 			case 0:
1403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 						 NUM_BANKS(ADDR_SURF_16_BANK));
1407 				break;
1408 			case 1:
1409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 						 NUM_BANKS(ADDR_SURF_16_BANK));
1413 				break;
1414 			case 2:
1415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 						 NUM_BANKS(ADDR_SURF_16_BANK));
1419 				break;
1420 			case 3:
1421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 						 NUM_BANKS(ADDR_SURF_16_BANK));
1425 				break;
1426 			case 4:
1427 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 						 NUM_BANKS(ADDR_SURF_16_BANK));
1431 				break;
1432 			case 5:
1433 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436 						 NUM_BANKS(ADDR_SURF_8_BANK));
1437 				break;
1438 			case 6:
1439 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 						 NUM_BANKS(ADDR_SURF_4_BANK));
1443 				break;
1444 			case 8:
1445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 						 NUM_BANKS(ADDR_SURF_16_BANK));
1449 				break;
1450 			case 9:
1451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 						 NUM_BANKS(ADDR_SURF_16_BANK));
1455 				break;
1456 			case 10:
1457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460 						 NUM_BANKS(ADDR_SURF_16_BANK));
1461 				break;
1462 			case 11:
1463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466 						 NUM_BANKS(ADDR_SURF_16_BANK));
1467 				break;
1468 			case 12:
1469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472 						 NUM_BANKS(ADDR_SURF_16_BANK));
1473 				break;
1474 			case 13:
1475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478 						 NUM_BANKS(ADDR_SURF_8_BANK));
1479 				break;
1480 			case 14:
1481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 						 NUM_BANKS(ADDR_SURF_4_BANK));
1485 				break;
1486 			default:
1487 				gb_tile_moden = 0;
1488 				break;
1489 			}
1490 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491 		}
1492 	} else if (num_pipe_configs == 2) {
1493 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494 			switch (reg_offset) {
1495 			case 0:
1496 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 						 PIPE_CONFIG(ADDR_SURF_P2) |
1499 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500 				break;
1501 			case 1:
1502 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 						 PIPE_CONFIG(ADDR_SURF_P2) |
1505 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506 				break;
1507 			case 2:
1508 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 						 PIPE_CONFIG(ADDR_SURF_P2) |
1511 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512 				break;
1513 			case 3:
1514 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516 						 PIPE_CONFIG(ADDR_SURF_P2) |
1517 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518 				break;
1519 			case 4:
1520 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522 						 PIPE_CONFIG(ADDR_SURF_P2) |
1523 						 TILE_SPLIT(split_equal_to_row_size));
1524 				break;
1525 			case 5:
1526 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528 				break;
1529 			case 6:
1530 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532 						 PIPE_CONFIG(ADDR_SURF_P2) |
1533 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534 				break;
1535 			case 7:
1536 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538 						 PIPE_CONFIG(ADDR_SURF_P2) |
1539 						 TILE_SPLIT(split_equal_to_row_size));
1540 				break;
1541 			case 8:
1542 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543 				break;
1544 			case 9:
1545 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547 				break;
1548 			case 10:
1549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 						 PIPE_CONFIG(ADDR_SURF_P2) |
1552 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553 				break;
1554 			case 11:
1555 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 						 PIPE_CONFIG(ADDR_SURF_P2) |
1558 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559 				break;
1560 			case 12:
1561 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 						 PIPE_CONFIG(ADDR_SURF_P2) |
1564 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565 				break;
1566 			case 13:
1567 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569 				break;
1570 			case 14:
1571 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 						 PIPE_CONFIG(ADDR_SURF_P2) |
1574 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575 				break;
1576 			case 16:
1577 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579 						 PIPE_CONFIG(ADDR_SURF_P2) |
1580 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581 				break;
1582 			case 17:
1583 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585 						 PIPE_CONFIG(ADDR_SURF_P2) |
1586 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587 				break;
1588 			case 27:
1589 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591 				break;
1592 			case 28:
1593 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 						 PIPE_CONFIG(ADDR_SURF_P2) |
1596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597 				break;
1598 			case 29:
1599 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601 						 PIPE_CONFIG(ADDR_SURF_P2) |
1602 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603 				break;
1604 			case 30:
1605 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607 						 PIPE_CONFIG(ADDR_SURF_P2) |
1608 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 				break;
1610 			default:
1611 				gb_tile_moden = 0;
1612 				break;
1613 			}
1614 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616 		}
1617 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618 			switch (reg_offset) {
1619 			case 0:
1620 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 						 NUM_BANKS(ADDR_SURF_16_BANK));
1624 				break;
1625 			case 1:
1626 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 						 NUM_BANKS(ADDR_SURF_16_BANK));
1630 				break;
1631 			case 2:
1632 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 						 NUM_BANKS(ADDR_SURF_16_BANK));
1636 				break;
1637 			case 3:
1638 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 						 NUM_BANKS(ADDR_SURF_16_BANK));
1642 				break;
1643 			case 4:
1644 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647 						 NUM_BANKS(ADDR_SURF_16_BANK));
1648 				break;
1649 			case 5:
1650 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 						 NUM_BANKS(ADDR_SURF_16_BANK));
1654 				break;
1655 			case 6:
1656 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659 						 NUM_BANKS(ADDR_SURF_8_BANK));
1660 				break;
1661 			case 8:
1662 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 						 NUM_BANKS(ADDR_SURF_16_BANK));
1666 				break;
1667 			case 9:
1668 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 						 NUM_BANKS(ADDR_SURF_16_BANK));
1672 				break;
1673 			case 10:
1674 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 						 NUM_BANKS(ADDR_SURF_16_BANK));
1678 				break;
1679 			case 11:
1680 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 						 NUM_BANKS(ADDR_SURF_16_BANK));
1684 				break;
1685 			case 12:
1686 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689 						 NUM_BANKS(ADDR_SURF_16_BANK));
1690 				break;
1691 			case 13:
1692 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 						 NUM_BANKS(ADDR_SURF_16_BANK));
1696 				break;
1697 			case 14:
1698 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701 						 NUM_BANKS(ADDR_SURF_8_BANK));
1702 				break;
1703 			default:
1704 				gb_tile_moden = 0;
1705 				break;
1706 			}
1707 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708 		}
1709 	} else
1710 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711 }
1712 
1713 /**
1714  * cik_select_se_sh - select which SE, SH to address
1715  *
1716  * @rdev: radeon_device pointer
1717  * @se_num: shader engine to address
1718  * @sh_num: sh block to address
1719  *
1720  * Select which SE, SH combinations to address. Certain
1721  * registers are instanced per SE or SH.  0xffffffff means
1722  * broadcast to all SEs or SHs (CIK).
1723  */
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725 			     u32 se_num, u32 sh_num)
1726 {
1727 	u32 data = INSTANCE_BROADCAST_WRITES;
1728 
1729 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731 	else if (se_num == 0xffffffff)
1732 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733 	else if (sh_num == 0xffffffff)
1734 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735 	else
1736 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737 	WREG32(GRBM_GFX_INDEX, data);
1738 }
1739 
1740 /**
1741  * cik_create_bitmask - create a bitmask
1742  *
1743  * @bit_width: length of the mask
1744  *
1745  * create a variable length bit mask (CIK).
1746  * Returns the bitmask.
1747  */
1748 static u32 cik_create_bitmask(u32 bit_width)
1749 {
1750 	u32 i, mask = 0;
1751 
1752 	for (i = 0; i < bit_width; i++) {
1753 		mask <<= 1;
1754 		mask |= 1;
1755 	}
1756 	return mask;
1757 }
1758 
1759 /**
1760  * cik_select_se_sh - select which SE, SH to address
1761  *
1762  * @rdev: radeon_device pointer
1763  * @max_rb_num: max RBs (render backends) for the asic
1764  * @se_num: number of SEs (shader engines) for the asic
1765  * @sh_per_se: number of SH blocks per SE for the asic
1766  *
1767  * Calculates the bitmask of disabled RBs (CIK).
1768  * Returns the disabled RB bitmask.
1769  */
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771 			      u32 max_rb_num, u32 se_num,
1772 			      u32 sh_per_se)
1773 {
1774 	u32 data, mask;
1775 
1776 	data = RREG32(CC_RB_BACKEND_DISABLE);
1777 	if (data & 1)
1778 		data &= BACKEND_DISABLE_MASK;
1779 	else
1780 		data = 0;
1781 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782 
1783 	data >>= BACKEND_DISABLE_SHIFT;
1784 
1785 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786 
1787 	return data & mask;
1788 }
1789 
1790 /**
1791  * cik_setup_rb - setup the RBs on the asic
1792  *
1793  * @rdev: radeon_device pointer
1794  * @se_num: number of SEs (shader engines) for the asic
1795  * @sh_per_se: number of SH blocks per SE for the asic
1796  * @max_rb_num: max RBs (render backends) for the asic
1797  *
1798  * Configures per-SE/SH RB registers (CIK).
1799  */
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801 			 u32 se_num, u32 sh_per_se,
1802 			 u32 max_rb_num)
1803 {
1804 	int i, j;
1805 	u32 data, mask;
1806 	u32 disabled_rbs = 0;
1807 	u32 enabled_rbs = 0;
1808 
1809 	for (i = 0; i < se_num; i++) {
1810 		for (j = 0; j < sh_per_se; j++) {
1811 			cik_select_se_sh(rdev, i, j);
1812 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814 		}
1815 	}
1816 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817 
1818 	mask = 1;
1819 	for (i = 0; i < max_rb_num; i++) {
1820 		if (!(disabled_rbs & mask))
1821 			enabled_rbs |= mask;
1822 		mask <<= 1;
1823 	}
1824 
1825 	for (i = 0; i < se_num; i++) {
1826 		cik_select_se_sh(rdev, i, 0xffffffff);
1827 		data = 0;
1828 		for (j = 0; j < sh_per_se; j++) {
1829 			switch (enabled_rbs & 3) {
1830 			case 1:
1831 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832 				break;
1833 			case 2:
1834 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835 				break;
1836 			case 3:
1837 			default:
1838 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839 				break;
1840 			}
1841 			enabled_rbs >>= 2;
1842 		}
1843 		WREG32(PA_SC_RASTER_CONFIG, data);
1844 	}
1845 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846 }
1847 
1848 /**
1849  * cik_gpu_init - setup the 3D engine
1850  *
1851  * @rdev: radeon_device pointer
1852  *
1853  * Configures the 3D engine and tiling configuration
1854  * registers so that the 3D engine is usable.
1855  */
1856 static void cik_gpu_init(struct radeon_device *rdev)
1857 {
1858 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859 	u32 mc_shared_chmap, mc_arb_ramcfg;
1860 	u32 hdp_host_path_cntl;
1861 	u32 tmp;
1862 	int i, j;
1863 
1864 	switch (rdev->family) {
1865 	case CHIP_BONAIRE:
1866 		rdev->config.cik.max_shader_engines = 2;
1867 		rdev->config.cik.max_tile_pipes = 4;
1868 		rdev->config.cik.max_cu_per_sh = 7;
1869 		rdev->config.cik.max_sh_per_se = 1;
1870 		rdev->config.cik.max_backends_per_se = 2;
1871 		rdev->config.cik.max_texture_channel_caches = 4;
1872 		rdev->config.cik.max_gprs = 256;
1873 		rdev->config.cik.max_gs_threads = 32;
1874 		rdev->config.cik.max_hw_contexts = 8;
1875 
1876 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881 		break;
1882 	case CHIP_KAVERI:
1883 		/* TODO */
1884 		break;
1885 	case CHIP_KABINI:
1886 	default:
1887 		rdev->config.cik.max_shader_engines = 1;
1888 		rdev->config.cik.max_tile_pipes = 2;
1889 		rdev->config.cik.max_cu_per_sh = 2;
1890 		rdev->config.cik.max_sh_per_se = 1;
1891 		rdev->config.cik.max_backends_per_se = 1;
1892 		rdev->config.cik.max_texture_channel_caches = 2;
1893 		rdev->config.cik.max_gprs = 256;
1894 		rdev->config.cik.max_gs_threads = 16;
1895 		rdev->config.cik.max_hw_contexts = 8;
1896 
1897 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902 		break;
1903 	}
1904 
1905 	/* Initialize HDP */
1906 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907 		WREG32((0x2c14 + j), 0x00000000);
1908 		WREG32((0x2c18 + j), 0x00000000);
1909 		WREG32((0x2c1c + j), 0x00000000);
1910 		WREG32((0x2c20 + j), 0x00000000);
1911 		WREG32((0x2c24 + j), 0x00000000);
1912 	}
1913 
1914 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915 
1916 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917 
1918 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920 
1921 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922 	rdev->config.cik.mem_max_burst_length_bytes = 256;
1923 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 	if (rdev->config.cik.mem_row_size_in_kb > 4)
1926 		rdev->config.cik.mem_row_size_in_kb = 4;
1927 	/* XXX use MC settings? */
1928 	rdev->config.cik.shader_engine_tile_size = 32;
1929 	rdev->config.cik.num_gpus = 1;
1930 	rdev->config.cik.multi_gpu_tile_size = 64;
1931 
1932 	/* fix up row size */
1933 	gb_addr_config &= ~ROW_SIZE_MASK;
1934 	switch (rdev->config.cik.mem_row_size_in_kb) {
1935 	case 1:
1936 	default:
1937 		gb_addr_config |= ROW_SIZE(0);
1938 		break;
1939 	case 2:
1940 		gb_addr_config |= ROW_SIZE(1);
1941 		break;
1942 	case 4:
1943 		gb_addr_config |= ROW_SIZE(2);
1944 		break;
1945 	}
1946 
1947 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1948 	 * not have bank info, so create a custom tiling dword.
1949 	 * bits 3:0   num_pipes
1950 	 * bits 7:4   num_banks
1951 	 * bits 11:8  group_size
1952 	 * bits 15:12 row_size
1953 	 */
1954 	rdev->config.cik.tile_config = 0;
1955 	switch (rdev->config.cik.num_tile_pipes) {
1956 	case 1:
1957 		rdev->config.cik.tile_config |= (0 << 0);
1958 		break;
1959 	case 2:
1960 		rdev->config.cik.tile_config |= (1 << 0);
1961 		break;
1962 	case 4:
1963 		rdev->config.cik.tile_config |= (2 << 0);
1964 		break;
1965 	case 8:
1966 	default:
1967 		/* XXX what about 12? */
1968 		rdev->config.cik.tile_config |= (3 << 0);
1969 		break;
1970 	}
1971 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972 		rdev->config.cik.tile_config |= 1 << 4;
1973 	else
1974 		rdev->config.cik.tile_config |= 0 << 4;
1975 	rdev->config.cik.tile_config |=
1976 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977 	rdev->config.cik.tile_config |=
1978 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979 
1980 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1988 
1989 	cik_tiling_mode_table_init(rdev);
1990 
1991 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992 		     rdev->config.cik.max_sh_per_se,
1993 		     rdev->config.cik.max_backends_per_se);
1994 
1995 	/* set HW defaults for 3D engine */
1996 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997 
1998 	WREG32(SX_DEBUG_1, 0x20);
1999 
2000 	WREG32(TA_CNTL_AUX, 0x00010000);
2001 
2002 	tmp = RREG32(SPI_CONFIG_CNTL);
2003 	tmp |= 0x03000000;
2004 	WREG32(SPI_CONFIG_CNTL, tmp);
2005 
2006 	WREG32(SQ_CONFIG, 1);
2007 
2008 	WREG32(DB_DEBUG, 0);
2009 
2010 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011 	tmp |= 0x00000400;
2012 	WREG32(DB_DEBUG2, tmp);
2013 
2014 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015 	tmp |= 0x00020200;
2016 	WREG32(DB_DEBUG3, tmp);
2017 
2018 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019 	tmp |= 0x00018208;
2020 	WREG32(CB_HW_CONTROL, tmp);
2021 
2022 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023 
2024 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028 
2029 	WREG32(VGT_NUM_INSTANCES, 1);
2030 
2031 	WREG32(CP_PERFMON_CNTL, 0);
2032 
2033 	WREG32(SQ_CONFIG, 0);
2034 
2035 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 					  FORCE_EOV_MAX_REZ_CNT(255)));
2037 
2038 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040 
2041 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2042 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043 
2044 	tmp = RREG32(HDP_MISC_CNTL);
2045 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046 	WREG32(HDP_MISC_CNTL, tmp);
2047 
2048 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050 
2051 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053 
2054 	udelay(50);
2055 }
2056 
2057 /*
2058  * GPU scratch registers helpers function.
2059  */
2060 /**
2061  * cik_scratch_init - setup driver info for CP scratch regs
2062  *
2063  * @rdev: radeon_device pointer
2064  *
2065  * Set up the number and offset of the CP scratch registers.
2066  * NOTE: use of CP scratch registers is a legacy inferface and
2067  * is not used by default on newer asics (r6xx+).  On newer asics,
2068  * memory buffers are used for fences rather than scratch regs.
2069  */
2070 static void cik_scratch_init(struct radeon_device *rdev)
2071 {
2072 	int i;
2073 
2074 	rdev->scratch.num_reg = 7;
2075 	rdev->scratch.reg_base = SCRATCH_REG0;
2076 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2077 		rdev->scratch.free[i] = true;
2078 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079 	}
2080 }
2081 
2082 /**
2083  * cik_ring_test - basic gfx ring test
2084  *
2085  * @rdev: radeon_device pointer
2086  * @ring: radeon_ring structure holding ring information
2087  *
2088  * Allocate a scratch register and write to it using the gfx ring (CIK).
2089  * Provides a basic gfx ring test to verify that the ring is working.
2090  * Used by cik_cp_gfx_resume();
2091  * Returns 0 on success, error on failure.
2092  */
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094 {
2095 	uint32_t scratch;
2096 	uint32_t tmp = 0;
2097 	unsigned i;
2098 	int r;
2099 
2100 	r = radeon_scratch_get(rdev, &scratch);
2101 	if (r) {
2102 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103 		return r;
2104 	}
2105 	WREG32(scratch, 0xCAFEDEAD);
2106 	r = radeon_ring_lock(rdev, ring, 3);
2107 	if (r) {
2108 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109 		radeon_scratch_free(rdev, scratch);
2110 		return r;
2111 	}
2112 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114 	radeon_ring_write(ring, 0xDEADBEEF);
2115 	radeon_ring_unlock_commit(rdev, ring);
2116 
2117 	for (i = 0; i < rdev->usec_timeout; i++) {
2118 		tmp = RREG32(scratch);
2119 		if (tmp == 0xDEADBEEF)
2120 			break;
2121 		DRM_UDELAY(1);
2122 	}
2123 	if (i < rdev->usec_timeout) {
2124 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125 	} else {
2126 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 			  ring->idx, scratch, tmp);
2128 		r = -EINVAL;
2129 	}
2130 	radeon_scratch_free(rdev, scratch);
2131 	return r;
2132 }
2133 
2134 /**
2135  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2136  *
2137  * @rdev: radeon_device pointer
2138  * @fence: radeon fence object
2139  *
2140  * Emits a fence sequnce number on the gfx ring and flushes
2141  * GPU caches.
2142  */
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144 			     struct radeon_fence *fence)
2145 {
2146 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2147 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148 
2149 	/* EVENT_WRITE_EOP - flush caches, send int */
2150 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152 				 EOP_TC_ACTION_EN |
2153 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154 				 EVENT_INDEX(5)));
2155 	radeon_ring_write(ring, addr & 0xfffffffc);
2156 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 	radeon_ring_write(ring, fence->seq);
2158 	radeon_ring_write(ring, 0);
2159 	/* HDP flush */
2160 	/* We should be using the new WAIT_REG_MEM special op packet here
2161 	 * but it causes the CP to hang
2162 	 */
2163 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165 				 WRITE_DATA_DST_SEL(0)));
2166 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167 	radeon_ring_write(ring, 0);
2168 	radeon_ring_write(ring, 0);
2169 }
2170 
2171 /**
2172  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173  *
2174  * @rdev: radeon_device pointer
2175  * @fence: radeon fence object
2176  *
2177  * Emits a fence sequnce number on the compute ring and flushes
2178  * GPU caches.
2179  */
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181 				 struct radeon_fence *fence)
2182 {
2183 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2184 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185 
2186 	/* RELEASE_MEM - flush caches, send int */
2187 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189 				 EOP_TC_ACTION_EN |
2190 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191 				 EVENT_INDEX(5)));
2192 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193 	radeon_ring_write(ring, addr & 0xfffffffc);
2194 	radeon_ring_write(ring, upper_32_bits(addr));
2195 	radeon_ring_write(ring, fence->seq);
2196 	radeon_ring_write(ring, 0);
2197 	/* HDP flush */
2198 	/* We should be using the new WAIT_REG_MEM special op packet here
2199 	 * but it causes the CP to hang
2200 	 */
2201 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203 				 WRITE_DATA_DST_SEL(0)));
2204 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205 	radeon_ring_write(ring, 0);
2206 	radeon_ring_write(ring, 0);
2207 }
2208 
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210 			     struct radeon_ring *ring,
2211 			     struct radeon_semaphore *semaphore,
2212 			     bool emit_wait)
2213 {
2214 	uint64_t addr = semaphore->gpu_addr;
2215 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216 
2217 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218 	radeon_ring_write(ring, addr & 0xffffffff);
2219 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220 }
2221 
2222 /*
2223  * IB stuff
2224  */
2225 /**
2226  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227  *
2228  * @rdev: radeon_device pointer
2229  * @ib: radeon indirect buffer object
2230  *
2231  * Emits an DE (drawing engine) or CE (constant engine) IB
2232  * on the gfx ring.  IBs are usually generated by userspace
2233  * acceleration drivers and submitted to the kernel for
2234  * sheduling on the ring.  This function schedules the IB
2235  * on the gfx ring for execution by the GPU.
2236  */
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238 {
2239 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 	u32 header, control = INDIRECT_BUFFER_VALID;
2241 
2242 	if (ib->is_const_ib) {
2243 		/* set switch buffer packet before const IB */
2244 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245 		radeon_ring_write(ring, 0);
2246 
2247 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248 	} else {
2249 		u32 next_rptr;
2250 		if (ring->rptr_save_reg) {
2251 			next_rptr = ring->wptr + 3 + 4;
2252 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2254 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
2255 			radeon_ring_write(ring, next_rptr);
2256 		} else if (rdev->wb.enabled) {
2257 			next_rptr = ring->wptr + 5 + 4;
2258 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262 			radeon_ring_write(ring, next_rptr);
2263 		}
2264 
2265 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266 	}
2267 
2268 	control |= ib->length_dw |
2269 		(ib->vm ? (ib->vm->id << 24) : 0);
2270 
2271 	radeon_ring_write(ring, header);
2272 	radeon_ring_write(ring,
2273 #ifdef __BIG_ENDIAN
2274 			  (2 << 0) |
2275 #endif
2276 			  (ib->gpu_addr & 0xFFFFFFFC));
2277 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278 	radeon_ring_write(ring, control);
2279 }
2280 
2281 /**
2282  * cik_ib_test - basic gfx ring IB test
2283  *
2284  * @rdev: radeon_device pointer
2285  * @ring: radeon_ring structure holding ring information
2286  *
2287  * Allocate an IB and execute it on the gfx ring (CIK).
2288  * Provides a basic gfx ring test to verify that IBs are working.
2289  * Returns 0 on success, error on failure.
2290  */
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292 {
2293 	struct radeon_ib ib;
2294 	uint32_t scratch;
2295 	uint32_t tmp = 0;
2296 	unsigned i;
2297 	int r;
2298 
2299 	r = radeon_scratch_get(rdev, &scratch);
2300 	if (r) {
2301 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302 		return r;
2303 	}
2304 	WREG32(scratch, 0xCAFEDEAD);
2305 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306 	if (r) {
2307 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308 		return r;
2309 	}
2310 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312 	ib.ptr[2] = 0xDEADBEEF;
2313 	ib.length_dw = 3;
2314 	r = radeon_ib_schedule(rdev, &ib, NULL);
2315 	if (r) {
2316 		radeon_scratch_free(rdev, scratch);
2317 		radeon_ib_free(rdev, &ib);
2318 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319 		return r;
2320 	}
2321 	r = radeon_fence_wait(ib.fence, false);
2322 	if (r) {
2323 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324 		return r;
2325 	}
2326 	for (i = 0; i < rdev->usec_timeout; i++) {
2327 		tmp = RREG32(scratch);
2328 		if (tmp == 0xDEADBEEF)
2329 			break;
2330 		DRM_UDELAY(1);
2331 	}
2332 	if (i < rdev->usec_timeout) {
2333 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334 	} else {
2335 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336 			  scratch, tmp);
2337 		r = -EINVAL;
2338 	}
2339 	radeon_scratch_free(rdev, scratch);
2340 	radeon_ib_free(rdev, &ib);
2341 	return r;
2342 }
2343 
2344 /*
2345  * CP.
2346  * On CIK, gfx and compute now have independant command processors.
2347  *
2348  * GFX
2349  * Gfx consists of a single ring and can process both gfx jobs and
2350  * compute jobs.  The gfx CP consists of three microengines (ME):
2351  * PFP - Pre-Fetch Parser
2352  * ME - Micro Engine
2353  * CE - Constant Engine
2354  * The PFP and ME make up what is considered the Drawing Engine (DE).
2355  * The CE is an asynchronous engine used for updating buffer desciptors
2356  * used by the DE so that they can be loaded into cache in parallel
2357  * while the DE is processing state update packets.
2358  *
2359  * Compute
2360  * The compute CP consists of two microengines (ME):
2361  * MEC1 - Compute MicroEngine 1
2362  * MEC2 - Compute MicroEngine 2
2363  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364  * The queues are exposed to userspace and are programmed directly
2365  * by the compute runtime.
2366  */
2367 /**
2368  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369  *
2370  * @rdev: radeon_device pointer
2371  * @enable: enable or disable the MEs
2372  *
2373  * Halts or unhalts the gfx MEs.
2374  */
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376 {
2377 	if (enable)
2378 		WREG32(CP_ME_CNTL, 0);
2379 	else {
2380 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 	}
2383 	udelay(50);
2384 }
2385 
2386 /**
2387  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388  *
2389  * @rdev: radeon_device pointer
2390  *
2391  * Loads the gfx PFP, ME, and CE ucode.
2392  * Returns 0 for success, -EINVAL if the ucode is not available.
2393  */
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395 {
2396 	const __be32 *fw_data;
2397 	int i;
2398 
2399 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400 		return -EINVAL;
2401 
2402 	cik_cp_gfx_enable(rdev, false);
2403 
2404 	/* PFP */
2405 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2406 	WREG32(CP_PFP_UCODE_ADDR, 0);
2407 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409 	WREG32(CP_PFP_UCODE_ADDR, 0);
2410 
2411 	/* CE */
2412 	fw_data = (const __be32 *)rdev->ce_fw->data;
2413 	WREG32(CP_CE_UCODE_ADDR, 0);
2414 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416 	WREG32(CP_CE_UCODE_ADDR, 0);
2417 
2418 	/* ME */
2419 	fw_data = (const __be32 *)rdev->me_fw->data;
2420 	WREG32(CP_ME_RAM_WADDR, 0);
2421 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423 	WREG32(CP_ME_RAM_WADDR, 0);
2424 
2425 	WREG32(CP_PFP_UCODE_ADDR, 0);
2426 	WREG32(CP_CE_UCODE_ADDR, 0);
2427 	WREG32(CP_ME_RAM_WADDR, 0);
2428 	WREG32(CP_ME_RAM_RADDR, 0);
2429 	return 0;
2430 }
2431 
2432 /**
2433  * cik_cp_gfx_start - start the gfx ring
2434  *
2435  * @rdev: radeon_device pointer
2436  *
2437  * Enables the ring and loads the clear state context and other
2438  * packets required to init the ring.
2439  * Returns 0 for success, error for failure.
2440  */
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2442 {
2443 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444 	int r, i;
2445 
2446 	/* init the CP */
2447 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448 	WREG32(CP_ENDIAN_SWAP, 0);
2449 	WREG32(CP_DEVICE_ID, 1);
2450 
2451 	cik_cp_gfx_enable(rdev, true);
2452 
2453 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454 	if (r) {
2455 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456 		return r;
2457 	}
2458 
2459 	/* init the CE partitions.  CE only used for gfx on CIK */
2460 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462 	radeon_ring_write(ring, 0xc000);
2463 	radeon_ring_write(ring, 0xc000);
2464 
2465 	/* setup clear context state */
2466 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468 
2469 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470 	radeon_ring_write(ring, 0x80000000);
2471 	radeon_ring_write(ring, 0x80000000);
2472 
2473 	for (i = 0; i < cik_default_size; i++)
2474 		radeon_ring_write(ring, cik_default_state[i]);
2475 
2476 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478 
2479 	/* set clear context state */
2480 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481 	radeon_ring_write(ring, 0);
2482 
2483 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484 	radeon_ring_write(ring, 0x00000316);
2485 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487 
2488 	radeon_ring_unlock_commit(rdev, ring);
2489 
2490 	return 0;
2491 }
2492 
2493 /**
2494  * cik_cp_gfx_fini - stop the gfx ring
2495  *
2496  * @rdev: radeon_device pointer
2497  *
2498  * Stop the gfx ring and tear down the driver ring
2499  * info.
2500  */
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502 {
2503 	cik_cp_gfx_enable(rdev, false);
2504 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505 }
2506 
2507 /**
2508  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509  *
2510  * @rdev: radeon_device pointer
2511  *
2512  * Program the location and size of the gfx ring buffer
2513  * and test it to make sure it's working.
2514  * Returns 0 for success, error for failure.
2515  */
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517 {
2518 	struct radeon_ring *ring;
2519 	u32 tmp;
2520 	u32 rb_bufsz;
2521 	u64 rb_addr;
2522 	int r;
2523 
2524 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526 
2527 	/* Set the write pointer delay */
2528 	WREG32(CP_RB_WPTR_DELAY, 0);
2529 
2530 	/* set the RB to use vmid 0 */
2531 	WREG32(CP_RB_VMID, 0);
2532 
2533 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534 
2535 	/* ring 0 - compute and gfx */
2536 	/* Set ring buffer size */
2537 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538 	rb_bufsz = drm_order(ring->ring_size / 8);
2539 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540 #ifdef __BIG_ENDIAN
2541 	tmp |= BUF_SWAP_32BIT;
2542 #endif
2543 	WREG32(CP_RB0_CNTL, tmp);
2544 
2545 	/* Initialize the ring buffer's read and write pointers */
2546 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547 	ring->wptr = 0;
2548 	WREG32(CP_RB0_WPTR, ring->wptr);
2549 
2550 	/* set the wb address wether it's enabled or not */
2551 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553 
2554 	/* scratch register shadowing is no longer supported */
2555 	WREG32(SCRATCH_UMSK, 0);
2556 
2557 	if (!rdev->wb.enabled)
2558 		tmp |= RB_NO_UPDATE;
2559 
2560 	mdelay(1);
2561 	WREG32(CP_RB0_CNTL, tmp);
2562 
2563 	rb_addr = ring->gpu_addr >> 8;
2564 	WREG32(CP_RB0_BASE, rb_addr);
2565 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566 
2567 	ring->rptr = RREG32(CP_RB0_RPTR);
2568 
2569 	/* start the ring */
2570 	cik_cp_gfx_start(rdev);
2571 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573 	if (r) {
2574 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 		return r;
2576 	}
2577 	return 0;
2578 }
2579 
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581 			      struct radeon_ring *ring)
2582 {
2583 	u32 rptr;
2584 
2585 
2586 
2587 	if (rdev->wb.enabled) {
2588 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589 	} else {
2590 		mutex_lock(&rdev->srbm_mutex);
2591 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2592 		rptr = RREG32(CP_HQD_PQ_RPTR);
2593 		cik_srbm_select(rdev, 0, 0, 0, 0);
2594 		mutex_unlock(&rdev->srbm_mutex);
2595 	}
2596 	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2597 
2598 	return rptr;
2599 }
2600 
2601 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2602 			      struct radeon_ring *ring)
2603 {
2604 	u32 wptr;
2605 
2606 	if (rdev->wb.enabled) {
2607 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2608 	} else {
2609 		mutex_lock(&rdev->srbm_mutex);
2610 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2611 		wptr = RREG32(CP_HQD_PQ_WPTR);
2612 		cik_srbm_select(rdev, 0, 0, 0, 0);
2613 		mutex_unlock(&rdev->srbm_mutex);
2614 	}
2615 	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2616 
2617 	return wptr;
2618 }
2619 
2620 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2621 			       struct radeon_ring *ring)
2622 {
2623 	u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2624 
2625 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2626 	WDOORBELL32(ring->doorbell_offset, wptr);
2627 }
2628 
2629 /**
2630  * cik_cp_compute_enable - enable/disable the compute CP MEs
2631  *
2632  * @rdev: radeon_device pointer
2633  * @enable: enable or disable the MEs
2634  *
2635  * Halts or unhalts the compute MEs.
2636  */
2637 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2638 {
2639 	if (enable)
2640 		WREG32(CP_MEC_CNTL, 0);
2641 	else
2642 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2643 	udelay(50);
2644 }
2645 
2646 /**
2647  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2648  *
2649  * @rdev: radeon_device pointer
2650  *
2651  * Loads the compute MEC1&2 ucode.
2652  * Returns 0 for success, -EINVAL if the ucode is not available.
2653  */
2654 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2655 {
2656 	const __be32 *fw_data;
2657 	int i;
2658 
2659 	if (!rdev->mec_fw)
2660 		return -EINVAL;
2661 
2662 	cik_cp_compute_enable(rdev, false);
2663 
2664 	/* MEC1 */
2665 	fw_data = (const __be32 *)rdev->mec_fw->data;
2666 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2667 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2668 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2669 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2670 
2671 	if (rdev->family == CHIP_KAVERI) {
2672 		/* MEC2 */
2673 		fw_data = (const __be32 *)rdev->mec_fw->data;
2674 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2675 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2676 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2677 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2678 	}
2679 
2680 	return 0;
2681 }
2682 
2683 /**
2684  * cik_cp_compute_start - start the compute queues
2685  *
2686  * @rdev: radeon_device pointer
2687  *
2688  * Enable the compute queues.
2689  * Returns 0 for success, error for failure.
2690  */
2691 static int cik_cp_compute_start(struct radeon_device *rdev)
2692 {
2693 	cik_cp_compute_enable(rdev, true);
2694 
2695 	return 0;
2696 }
2697 
2698 /**
2699  * cik_cp_compute_fini - stop the compute queues
2700  *
2701  * @rdev: radeon_device pointer
2702  *
2703  * Stop the compute queues and tear down the driver queue
2704  * info.
2705  */
2706 static void cik_cp_compute_fini(struct radeon_device *rdev)
2707 {
2708 	int i, idx, r;
2709 
2710 	cik_cp_compute_enable(rdev, false);
2711 
2712 	for (i = 0; i < 2; i++) {
2713 		if (i == 0)
2714 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2715 		else
2716 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2717 
2718 		if (rdev->ring[idx].mqd_obj) {
2719 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2720 			if (unlikely(r != 0))
2721 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2722 
2723 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2724 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2725 
2726 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2727 			rdev->ring[idx].mqd_obj = NULL;
2728 		}
2729 	}
2730 }
2731 
2732 static void cik_mec_fini(struct radeon_device *rdev)
2733 {
2734 	int r;
2735 
2736 	if (rdev->mec.hpd_eop_obj) {
2737 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2738 		if (unlikely(r != 0))
2739 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2740 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2741 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2742 
2743 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2744 		rdev->mec.hpd_eop_obj = NULL;
2745 	}
2746 }
2747 
2748 #define MEC_HPD_SIZE 2048
2749 
2750 static int cik_mec_init(struct radeon_device *rdev)
2751 {
2752 	int r;
2753 	u32 *hpd;
2754 
2755 	/*
2756 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2757 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2758 	 */
2759 	if (rdev->family == CHIP_KAVERI)
2760 		rdev->mec.num_mec = 2;
2761 	else
2762 		rdev->mec.num_mec = 1;
2763 	rdev->mec.num_pipe = 4;
2764 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2765 
2766 	if (rdev->mec.hpd_eop_obj == NULL) {
2767 		r = radeon_bo_create(rdev,
2768 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2769 				     PAGE_SIZE, true,
2770 				     RADEON_GEM_DOMAIN_GTT, NULL,
2771 				     &rdev->mec.hpd_eop_obj);
2772 		if (r) {
2773 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2774 			return r;
2775 		}
2776 	}
2777 
2778 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2779 	if (unlikely(r != 0)) {
2780 		cik_mec_fini(rdev);
2781 		return r;
2782 	}
2783 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2784 			  &rdev->mec.hpd_eop_gpu_addr);
2785 	if (r) {
2786 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2787 		cik_mec_fini(rdev);
2788 		return r;
2789 	}
2790 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2791 	if (r) {
2792 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2793 		cik_mec_fini(rdev);
2794 		return r;
2795 	}
2796 
2797 	/* clear memory.  Not sure if this is required or not */
2798 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2799 
2800 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2801 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2802 
2803 	return 0;
2804 }
2805 
2806 struct hqd_registers
2807 {
2808 	u32 cp_mqd_base_addr;
2809 	u32 cp_mqd_base_addr_hi;
2810 	u32 cp_hqd_active;
2811 	u32 cp_hqd_vmid;
2812 	u32 cp_hqd_persistent_state;
2813 	u32 cp_hqd_pipe_priority;
2814 	u32 cp_hqd_queue_priority;
2815 	u32 cp_hqd_quantum;
2816 	u32 cp_hqd_pq_base;
2817 	u32 cp_hqd_pq_base_hi;
2818 	u32 cp_hqd_pq_rptr;
2819 	u32 cp_hqd_pq_rptr_report_addr;
2820 	u32 cp_hqd_pq_rptr_report_addr_hi;
2821 	u32 cp_hqd_pq_wptr_poll_addr;
2822 	u32 cp_hqd_pq_wptr_poll_addr_hi;
2823 	u32 cp_hqd_pq_doorbell_control;
2824 	u32 cp_hqd_pq_wptr;
2825 	u32 cp_hqd_pq_control;
2826 	u32 cp_hqd_ib_base_addr;
2827 	u32 cp_hqd_ib_base_addr_hi;
2828 	u32 cp_hqd_ib_rptr;
2829 	u32 cp_hqd_ib_control;
2830 	u32 cp_hqd_iq_timer;
2831 	u32 cp_hqd_iq_rptr;
2832 	u32 cp_hqd_dequeue_request;
2833 	u32 cp_hqd_dma_offload;
2834 	u32 cp_hqd_sema_cmd;
2835 	u32 cp_hqd_msg_type;
2836 	u32 cp_hqd_atomic0_preop_lo;
2837 	u32 cp_hqd_atomic0_preop_hi;
2838 	u32 cp_hqd_atomic1_preop_lo;
2839 	u32 cp_hqd_atomic1_preop_hi;
2840 	u32 cp_hqd_hq_scheduler0;
2841 	u32 cp_hqd_hq_scheduler1;
2842 	u32 cp_mqd_control;
2843 };
2844 
2845 struct bonaire_mqd
2846 {
2847 	u32 header;
2848 	u32 dispatch_initiator;
2849 	u32 dimensions[3];
2850 	u32 start_idx[3];
2851 	u32 num_threads[3];
2852 	u32 pipeline_stat_enable;
2853 	u32 perf_counter_enable;
2854 	u32 pgm[2];
2855 	u32 tba[2];
2856 	u32 tma[2];
2857 	u32 pgm_rsrc[2];
2858 	u32 vmid;
2859 	u32 resource_limits;
2860 	u32 static_thread_mgmt01[2];
2861 	u32 tmp_ring_size;
2862 	u32 static_thread_mgmt23[2];
2863 	u32 restart[3];
2864 	u32 thread_trace_enable;
2865 	u32 reserved1;
2866 	u32 user_data[16];
2867 	u32 vgtcs_invoke_count[2];
2868 	struct hqd_registers queue_state;
2869 	u32 dequeue_cntr;
2870 	u32 interrupt_queue[64];
2871 };
2872 
2873 /**
2874  * cik_cp_compute_resume - setup the compute queue registers
2875  *
2876  * @rdev: radeon_device pointer
2877  *
2878  * Program the compute queues and test them to make sure they
2879  * are working.
2880  * Returns 0 for success, error for failure.
2881  */
2882 static int cik_cp_compute_resume(struct radeon_device *rdev)
2883 {
2884 	int r, i, idx;
2885 	u32 tmp;
2886 	bool use_doorbell = true;
2887 	u64 hqd_gpu_addr;
2888 	u64 mqd_gpu_addr;
2889 	u64 eop_gpu_addr;
2890 	u64 wb_gpu_addr;
2891 	u32 *buf;
2892 	struct bonaire_mqd *mqd;
2893 
2894 	r = cik_cp_compute_start(rdev);
2895 	if (r)
2896 		return r;
2897 
2898 	/* fix up chicken bits */
2899 	tmp = RREG32(CP_CPF_DEBUG);
2900 	tmp |= (1 << 23);
2901 	WREG32(CP_CPF_DEBUG, tmp);
2902 
2903 	/* init the pipes */
2904 	mutex_lock(&rdev->srbm_mutex);
2905 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2906 		int me = (i < 4) ? 1 : 2;
2907 		int pipe = (i < 4) ? i : (i - 4);
2908 
2909 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2910 
2911 		cik_srbm_select(rdev, me, pipe, 0, 0);
2912 
2913 		/* write the EOP addr */
2914 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2915 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2916 
2917 		/* set the VMID assigned */
2918 		WREG32(CP_HPD_EOP_VMID, 0);
2919 
2920 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2921 		tmp = RREG32(CP_HPD_EOP_CONTROL);
2922 		tmp &= ~EOP_SIZE_MASK;
2923 		tmp |= drm_order(MEC_HPD_SIZE / 8);
2924 		WREG32(CP_HPD_EOP_CONTROL, tmp);
2925 	}
2926 	cik_srbm_select(rdev, 0, 0, 0, 0);
2927 	mutex_unlock(&rdev->srbm_mutex);
2928 
2929 	/* init the queues.  Just two for now. */
2930 	for (i = 0; i < 2; i++) {
2931 		if (i == 0)
2932 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2933 		else
2934 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2935 
2936 		if (rdev->ring[idx].mqd_obj == NULL) {
2937 			r = radeon_bo_create(rdev,
2938 					     sizeof(struct bonaire_mqd),
2939 					     PAGE_SIZE, true,
2940 					     RADEON_GEM_DOMAIN_GTT, NULL,
2941 					     &rdev->ring[idx].mqd_obj);
2942 			if (r) {
2943 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2944 				return r;
2945 			}
2946 		}
2947 
2948 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2949 		if (unlikely(r != 0)) {
2950 			cik_cp_compute_fini(rdev);
2951 			return r;
2952 		}
2953 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2954 				  &mqd_gpu_addr);
2955 		if (r) {
2956 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2957 			cik_cp_compute_fini(rdev);
2958 			return r;
2959 		}
2960 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2961 		if (r) {
2962 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2963 			cik_cp_compute_fini(rdev);
2964 			return r;
2965 		}
2966 
2967 		/* doorbell offset */
2968 		rdev->ring[idx].doorbell_offset =
2969 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2970 
2971 		/* init the mqd struct */
2972 		memset(buf, 0, sizeof(struct bonaire_mqd));
2973 
2974 		mqd = (struct bonaire_mqd *)buf;
2975 		mqd->header = 0xC0310800;
2976 		mqd->static_thread_mgmt01[0] = 0xffffffff;
2977 		mqd->static_thread_mgmt01[1] = 0xffffffff;
2978 		mqd->static_thread_mgmt23[0] = 0xffffffff;
2979 		mqd->static_thread_mgmt23[1] = 0xffffffff;
2980 
2981 		mutex_lock(&rdev->srbm_mutex);
2982 		cik_srbm_select(rdev, rdev->ring[idx].me,
2983 				rdev->ring[idx].pipe,
2984 				rdev->ring[idx].queue, 0);
2985 
2986 		/* disable wptr polling */
2987 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2988 		tmp &= ~WPTR_POLL_EN;
2989 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2990 
2991 		/* enable doorbell? */
2992 		mqd->queue_state.cp_hqd_pq_doorbell_control =
2993 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2994 		if (use_doorbell)
2995 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2996 		else
2997 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2998 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2999 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3000 
3001 		/* disable the queue if it's active */
3002 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3003 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3004 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3005 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3006 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3007 			for (i = 0; i < rdev->usec_timeout; i++) {
3008 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3009 					break;
3010 				udelay(1);
3011 			}
3012 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3013 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3014 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3015 		}
3016 
3017 		/* set the pointer to the MQD */
3018 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3019 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3020 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3021 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3022 		/* set MQD vmid to 0 */
3023 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3024 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3025 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3026 
3027 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3029 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3030 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3031 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3032 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3033 
3034 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3035 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3036 		mqd->queue_state.cp_hqd_pq_control &=
3037 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3038 
3039 		mqd->queue_state.cp_hqd_pq_control |=
3040 			drm_order(rdev->ring[idx].ring_size / 8);
3041 		mqd->queue_state.cp_hqd_pq_control |=
3042 			(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3043 #ifdef __BIG_ENDIAN
3044 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3045 #endif
3046 		mqd->queue_state.cp_hqd_pq_control &=
3047 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3048 		mqd->queue_state.cp_hqd_pq_control |=
3049 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3050 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3051 
3052 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3053 		if (i == 0)
3054 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3055 		else
3056 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3057 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3058 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3059 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3060 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3061 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3062 
3063 		/* set the wb address wether it's enabled or not */
3064 		if (i == 0)
3065 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3066 		else
3067 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3068 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3069 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3070 			upper_32_bits(wb_gpu_addr) & 0xffff;
3071 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3072 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3073 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3074 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3075 
3076 		/* enable the doorbell if requested */
3077 		if (use_doorbell) {
3078 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3079 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3080 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3081 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3082 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3083 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3084 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3085 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3086 
3087 		} else {
3088 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3089 		}
3090 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3091 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3092 
3093 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3094 		rdev->ring[idx].wptr = 0;
3095 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3096 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3097 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3098 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3099 
3100 		/* set the vmid for the queue */
3101 		mqd->queue_state.cp_hqd_vmid = 0;
3102 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3103 
3104 		/* activate the queue */
3105 		mqd->queue_state.cp_hqd_active = 1;
3106 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3107 
3108 		cik_srbm_select(rdev, 0, 0, 0, 0);
3109 		mutex_unlock(&rdev->srbm_mutex);
3110 
3111 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3112 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3113 
3114 		rdev->ring[idx].ready = true;
3115 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3116 		if (r)
3117 			rdev->ring[idx].ready = false;
3118 	}
3119 
3120 	return 0;
3121 }
3122 
3123 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3124 {
3125 	cik_cp_gfx_enable(rdev, enable);
3126 	cik_cp_compute_enable(rdev, enable);
3127 }
3128 
3129 static int cik_cp_load_microcode(struct radeon_device *rdev)
3130 {
3131 	int r;
3132 
3133 	r = cik_cp_gfx_load_microcode(rdev);
3134 	if (r)
3135 		return r;
3136 	r = cik_cp_compute_load_microcode(rdev);
3137 	if (r)
3138 		return r;
3139 
3140 	return 0;
3141 }
3142 
3143 static void cik_cp_fini(struct radeon_device *rdev)
3144 {
3145 	cik_cp_gfx_fini(rdev);
3146 	cik_cp_compute_fini(rdev);
3147 }
3148 
3149 static int cik_cp_resume(struct radeon_device *rdev)
3150 {
3151 	int r;
3152 
3153 	/* Reset all cp blocks */
3154 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3155 	RREG32(GRBM_SOFT_RESET);
3156 	mdelay(15);
3157 	WREG32(GRBM_SOFT_RESET, 0);
3158 	RREG32(GRBM_SOFT_RESET);
3159 
3160 	r = cik_cp_load_microcode(rdev);
3161 	if (r)
3162 		return r;
3163 
3164 	r = cik_cp_gfx_resume(rdev);
3165 	if (r)
3166 		return r;
3167 	r = cik_cp_compute_resume(rdev);
3168 	if (r)
3169 		return r;
3170 
3171 	return 0;
3172 }
3173 
3174 /*
3175  * sDMA - System DMA
3176  * Starting with CIK, the GPU has new asynchronous
3177  * DMA engines.  These engines are used for compute
3178  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3179  * and each one supports 1 ring buffer used for gfx
3180  * and 2 queues used for compute.
3181  *
3182  * The programming model is very similar to the CP
3183  * (ring buffer, IBs, etc.), but sDMA has it's own
3184  * packet format that is different from the PM4 format
3185  * used by the CP. sDMA supports copying data, writing
3186  * embedded data, solid fills, and a number of other
3187  * things.  It also has support for tiling/detiling of
3188  * buffers.
3189  */
3190 /**
3191  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3192  *
3193  * @rdev: radeon_device pointer
3194  * @ib: IB object to schedule
3195  *
3196  * Schedule an IB in the DMA ring (CIK).
3197  */
3198 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3199 			      struct radeon_ib *ib)
3200 {
3201 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3202 	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3203 
3204 	if (rdev->wb.enabled) {
3205 		u32 next_rptr = ring->wptr + 5;
3206 		while ((next_rptr & 7) != 4)
3207 			next_rptr++;
3208 		next_rptr += 4;
3209 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3210 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211 		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212 		radeon_ring_write(ring, 1); /* number of DWs to follow */
3213 		radeon_ring_write(ring, next_rptr);
3214 	}
3215 
3216 	/* IB packet must end on a 8 DW boundary */
3217 	while ((ring->wptr & 7) != 4)
3218 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3219 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3220 	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3221 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3222 	radeon_ring_write(ring, ib->length_dw);
3223 
3224 }
3225 
3226 /**
3227  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3228  *
3229  * @rdev: radeon_device pointer
3230  * @fence: radeon fence object
3231  *
3232  * Add a DMA fence packet to the ring to write
3233  * the fence seq number and DMA trap packet to generate
3234  * an interrupt if needed (CIK).
3235  */
3236 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3237 			      struct radeon_fence *fence)
3238 {
3239 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3240 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3241 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3242 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3243 	u32 ref_and_mask;
3244 
3245 	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3246 		ref_and_mask = SDMA0;
3247 	else
3248 		ref_and_mask = SDMA1;
3249 
3250 	/* write the fence */
3251 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3252 	radeon_ring_write(ring, addr & 0xffffffff);
3253 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3254 	radeon_ring_write(ring, fence->seq);
3255 	/* generate an interrupt */
3256 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3257 	/* flush HDP */
3258 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3259 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3260 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3261 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3262 	radeon_ring_write(ring, ref_and_mask); /* MASK */
3263 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3264 }
3265 
3266 /**
3267  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3268  *
3269  * @rdev: radeon_device pointer
3270  * @ring: radeon_ring structure holding ring information
3271  * @semaphore: radeon semaphore object
3272  * @emit_wait: wait or signal semaphore
3273  *
3274  * Add a DMA semaphore packet to the ring wait on or signal
3275  * other rings (CIK).
3276  */
3277 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3278 				  struct radeon_ring *ring,
3279 				  struct radeon_semaphore *semaphore,
3280 				  bool emit_wait)
3281 {
3282 	u64 addr = semaphore->gpu_addr;
3283 	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3284 
3285 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3286 	radeon_ring_write(ring, addr & 0xfffffff8);
3287 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3288 }
3289 
3290 /**
3291  * cik_sdma_gfx_stop - stop the gfx async dma engines
3292  *
3293  * @rdev: radeon_device pointer
3294  *
3295  * Stop the gfx async dma ring buffers (CIK).
3296  */
3297 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3298 {
3299 	u32 rb_cntl, reg_offset;
3300 	int i;
3301 
3302 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3303 
3304 	for (i = 0; i < 2; i++) {
3305 		if (i == 0)
3306 			reg_offset = SDMA0_REGISTER_OFFSET;
3307 		else
3308 			reg_offset = SDMA1_REGISTER_OFFSET;
3309 		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3310 		rb_cntl &= ~SDMA_RB_ENABLE;
3311 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3312 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3313 	}
3314 }
3315 
3316 /**
3317  * cik_sdma_rlc_stop - stop the compute async dma engines
3318  *
3319  * @rdev: radeon_device pointer
3320  *
3321  * Stop the compute async dma queues (CIK).
3322  */
3323 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3324 {
3325 	/* XXX todo */
3326 }
3327 
3328 /**
3329  * cik_sdma_enable - stop the async dma engines
3330  *
3331  * @rdev: radeon_device pointer
3332  * @enable: enable/disable the DMA MEs.
3333  *
3334  * Halt or unhalt the async dma engines (CIK).
3335  */
3336 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3337 {
3338 	u32 me_cntl, reg_offset;
3339 	int i;
3340 
3341 	for (i = 0; i < 2; i++) {
3342 		if (i == 0)
3343 			reg_offset = SDMA0_REGISTER_OFFSET;
3344 		else
3345 			reg_offset = SDMA1_REGISTER_OFFSET;
3346 		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3347 		if (enable)
3348 			me_cntl &= ~SDMA_HALT;
3349 		else
3350 			me_cntl |= SDMA_HALT;
3351 		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3352 	}
3353 }
3354 
3355 /**
3356  * cik_sdma_gfx_resume - setup and start the async dma engines
3357  *
3358  * @rdev: radeon_device pointer
3359  *
3360  * Set up the gfx DMA ring buffers and enable them (CIK).
3361  * Returns 0 for success, error for failure.
3362  */
3363 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3364 {
3365 	struct radeon_ring *ring;
3366 	u32 rb_cntl, ib_cntl;
3367 	u32 rb_bufsz;
3368 	u32 reg_offset, wb_offset;
3369 	int i, r;
3370 
3371 	for (i = 0; i < 2; i++) {
3372 		if (i == 0) {
3373 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3374 			reg_offset = SDMA0_REGISTER_OFFSET;
3375 			wb_offset = R600_WB_DMA_RPTR_OFFSET;
3376 		} else {
3377 			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3378 			reg_offset = SDMA1_REGISTER_OFFSET;
3379 			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3380 		}
3381 
3382 		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3383 		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3384 
3385 		/* Set ring buffer size in dwords */
3386 		rb_bufsz = drm_order(ring->ring_size / 4);
3387 		rb_cntl = rb_bufsz << 1;
3388 #ifdef __BIG_ENDIAN
3389 		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3390 #endif
3391 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3392 
3393 		/* Initialize the ring buffer's read and write pointers */
3394 		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3395 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3396 
3397 		/* set the wb address whether it's enabled or not */
3398 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3399 		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3400 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3401 		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3402 
3403 		if (rdev->wb.enabled)
3404 			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3405 
3406 		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3407 		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3408 
3409 		ring->wptr = 0;
3410 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3411 
3412 		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3413 
3414 		/* enable DMA RB */
3415 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3416 
3417 		ib_cntl = SDMA_IB_ENABLE;
3418 #ifdef __BIG_ENDIAN
3419 		ib_cntl |= SDMA_IB_SWAP_ENABLE;
3420 #endif
3421 		/* enable DMA IBs */
3422 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3423 
3424 		ring->ready = true;
3425 
3426 		r = radeon_ring_test(rdev, ring->idx, ring);
3427 		if (r) {
3428 			ring->ready = false;
3429 			return r;
3430 		}
3431 	}
3432 
3433 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3434 
3435 	return 0;
3436 }
3437 
3438 /**
3439  * cik_sdma_rlc_resume - setup and start the async dma engines
3440  *
3441  * @rdev: radeon_device pointer
3442  *
3443  * Set up the compute DMA queues and enable them (CIK).
3444  * Returns 0 for success, error for failure.
3445  */
3446 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3447 {
3448 	/* XXX todo */
3449 	return 0;
3450 }
3451 
3452 /**
3453  * cik_sdma_load_microcode - load the sDMA ME ucode
3454  *
3455  * @rdev: radeon_device pointer
3456  *
3457  * Loads the sDMA0/1 ucode.
3458  * Returns 0 for success, -EINVAL if the ucode is not available.
3459  */
3460 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3461 {
3462 	const __be32 *fw_data;
3463 	int i;
3464 
3465 	if (!rdev->sdma_fw)
3466 		return -EINVAL;
3467 
3468 	/* stop the gfx rings and rlc compute queues */
3469 	cik_sdma_gfx_stop(rdev);
3470 	cik_sdma_rlc_stop(rdev);
3471 
3472 	/* halt the MEs */
3473 	cik_sdma_enable(rdev, false);
3474 
3475 	/* sdma0 */
3476 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3477 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3478 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3479 		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3480 	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3481 
3482 	/* sdma1 */
3483 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3484 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3485 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3486 		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3487 	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3488 
3489 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3490 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3491 	return 0;
3492 }
3493 
3494 /**
3495  * cik_sdma_resume - setup and start the async dma engines
3496  *
3497  * @rdev: radeon_device pointer
3498  *
3499  * Set up the DMA engines and enable them (CIK).
3500  * Returns 0 for success, error for failure.
3501  */
3502 static int cik_sdma_resume(struct radeon_device *rdev)
3503 {
3504 	int r;
3505 
3506 	/* Reset dma */
3507 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3508 	RREG32(SRBM_SOFT_RESET);
3509 	udelay(50);
3510 	WREG32(SRBM_SOFT_RESET, 0);
3511 	RREG32(SRBM_SOFT_RESET);
3512 
3513 	r = cik_sdma_load_microcode(rdev);
3514 	if (r)
3515 		return r;
3516 
3517 	/* unhalt the MEs */
3518 	cik_sdma_enable(rdev, true);
3519 
3520 	/* start the gfx rings and rlc compute queues */
3521 	r = cik_sdma_gfx_resume(rdev);
3522 	if (r)
3523 		return r;
3524 	r = cik_sdma_rlc_resume(rdev);
3525 	if (r)
3526 		return r;
3527 
3528 	return 0;
3529 }
3530 
3531 /**
3532  * cik_sdma_fini - tear down the async dma engines
3533  *
3534  * @rdev: radeon_device pointer
3535  *
3536  * Stop the async dma engines and free the rings (CIK).
3537  */
3538 static void cik_sdma_fini(struct radeon_device *rdev)
3539 {
3540 	/* stop the gfx rings and rlc compute queues */
3541 	cik_sdma_gfx_stop(rdev);
3542 	cik_sdma_rlc_stop(rdev);
3543 	/* halt the MEs */
3544 	cik_sdma_enable(rdev, false);
3545 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3546 	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3547 	/* XXX - compute dma queue tear down */
3548 }
3549 
3550 /**
3551  * cik_copy_dma - copy pages using the DMA engine
3552  *
3553  * @rdev: radeon_device pointer
3554  * @src_offset: src GPU address
3555  * @dst_offset: dst GPU address
3556  * @num_gpu_pages: number of GPU pages to xfer
3557  * @fence: radeon fence object
3558  *
3559  * Copy GPU paging using the DMA engine (CIK).
3560  * Used by the radeon ttm implementation to move pages if
3561  * registered as the asic copy callback.
3562  */
3563 int cik_copy_dma(struct radeon_device *rdev,
3564 		 uint64_t src_offset, uint64_t dst_offset,
3565 		 unsigned num_gpu_pages,
3566 		 struct radeon_fence **fence)
3567 {
3568 	struct radeon_semaphore *sem = NULL;
3569 	int ring_index = rdev->asic->copy.dma_ring_index;
3570 	struct radeon_ring *ring = &rdev->ring[ring_index];
3571 	u32 size_in_bytes, cur_size_in_bytes;
3572 	int i, num_loops;
3573 	int r = 0;
3574 
3575 	r = radeon_semaphore_create(rdev, &sem);
3576 	if (r) {
3577 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3578 		return r;
3579 	}
3580 
3581 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3582 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3583 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3584 	if (r) {
3585 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3586 		radeon_semaphore_free(rdev, &sem, NULL);
3587 		return r;
3588 	}
3589 
3590 	if (radeon_fence_need_sync(*fence, ring->idx)) {
3591 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3592 					    ring->idx);
3593 		radeon_fence_note_sync(*fence, ring->idx);
3594 	} else {
3595 		radeon_semaphore_free(rdev, &sem, NULL);
3596 	}
3597 
3598 	for (i = 0; i < num_loops; i++) {
3599 		cur_size_in_bytes = size_in_bytes;
3600 		if (cur_size_in_bytes > 0x1fffff)
3601 			cur_size_in_bytes = 0x1fffff;
3602 		size_in_bytes -= cur_size_in_bytes;
3603 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3604 		radeon_ring_write(ring, cur_size_in_bytes);
3605 		radeon_ring_write(ring, 0); /* src/dst endian swap */
3606 		radeon_ring_write(ring, src_offset & 0xffffffff);
3607 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3608 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
3609 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3610 		src_offset += cur_size_in_bytes;
3611 		dst_offset += cur_size_in_bytes;
3612 	}
3613 
3614 	r = radeon_fence_emit(rdev, fence, ring->idx);
3615 	if (r) {
3616 		radeon_ring_unlock_undo(rdev, ring);
3617 		return r;
3618 	}
3619 
3620 	radeon_ring_unlock_commit(rdev, ring);
3621 	radeon_semaphore_free(rdev, &sem, *fence);
3622 
3623 	return r;
3624 }
3625 
3626 /**
3627  * cik_sdma_ring_test - simple async dma engine test
3628  *
3629  * @rdev: radeon_device pointer
3630  * @ring: radeon_ring structure holding ring information
3631  *
3632  * Test the DMA engine by writing using it to write an
3633  * value to memory. (CIK).
3634  * Returns 0 for success, error for failure.
3635  */
3636 int cik_sdma_ring_test(struct radeon_device *rdev,
3637 		       struct radeon_ring *ring)
3638 {
3639 	unsigned i;
3640 	int r;
3641 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3642 	u32 tmp;
3643 
3644 	if (!ptr) {
3645 		DRM_ERROR("invalid vram scratch pointer\n");
3646 		return -EINVAL;
3647 	}
3648 
3649 	tmp = 0xCAFEDEAD;
3650 	writel(tmp, ptr);
3651 
3652 	r = radeon_ring_lock(rdev, ring, 4);
3653 	if (r) {
3654 		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3655 		return r;
3656 	}
3657 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3658 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3659 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3660 	radeon_ring_write(ring, 1); /* number of DWs to follow */
3661 	radeon_ring_write(ring, 0xDEADBEEF);
3662 	radeon_ring_unlock_commit(rdev, ring);
3663 
3664 	for (i = 0; i < rdev->usec_timeout; i++) {
3665 		tmp = readl(ptr);
3666 		if (tmp == 0xDEADBEEF)
3667 			break;
3668 		DRM_UDELAY(1);
3669 	}
3670 
3671 	if (i < rdev->usec_timeout) {
3672 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3673 	} else {
3674 		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3675 			  ring->idx, tmp);
3676 		r = -EINVAL;
3677 	}
3678 	return r;
3679 }
3680 
3681 /**
3682  * cik_sdma_ib_test - test an IB on the DMA engine
3683  *
3684  * @rdev: radeon_device pointer
3685  * @ring: radeon_ring structure holding ring information
3686  *
3687  * Test a simple IB in the DMA ring (CIK).
3688  * Returns 0 on success, error on failure.
3689  */
3690 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3691 {
3692 	struct radeon_ib ib;
3693 	unsigned i;
3694 	int r;
3695 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3696 	u32 tmp = 0;
3697 
3698 	if (!ptr) {
3699 		DRM_ERROR("invalid vram scratch pointer\n");
3700 		return -EINVAL;
3701 	}
3702 
3703 	tmp = 0xCAFEDEAD;
3704 	writel(tmp, ptr);
3705 
3706 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3707 	if (r) {
3708 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3709 		return r;
3710 	}
3711 
3712 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3713 	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3714 	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3715 	ib.ptr[3] = 1;
3716 	ib.ptr[4] = 0xDEADBEEF;
3717 	ib.length_dw = 5;
3718 
3719 	r = radeon_ib_schedule(rdev, &ib, NULL);
3720 	if (r) {
3721 		radeon_ib_free(rdev, &ib);
3722 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3723 		return r;
3724 	}
3725 	r = radeon_fence_wait(ib.fence, false);
3726 	if (r) {
3727 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3728 		return r;
3729 	}
3730 	for (i = 0; i < rdev->usec_timeout; i++) {
3731 		tmp = readl(ptr);
3732 		if (tmp == 0xDEADBEEF)
3733 			break;
3734 		DRM_UDELAY(1);
3735 	}
3736 	if (i < rdev->usec_timeout) {
3737 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3738 	} else {
3739 		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3740 		r = -EINVAL;
3741 	}
3742 	radeon_ib_free(rdev, &ib);
3743 	return r;
3744 }
3745 
3746 
3747 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3748 {
3749 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3750 		RREG32(GRBM_STATUS));
3751 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3752 		RREG32(GRBM_STATUS2));
3753 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3754 		RREG32(GRBM_STATUS_SE0));
3755 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3756 		RREG32(GRBM_STATUS_SE1));
3757 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3758 		RREG32(GRBM_STATUS_SE2));
3759 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3760 		RREG32(GRBM_STATUS_SE3));
3761 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3762 		RREG32(SRBM_STATUS));
3763 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3764 		RREG32(SRBM_STATUS2));
3765 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3766 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3767 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3768 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3769 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3770 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3771 		 RREG32(CP_STALLED_STAT1));
3772 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3773 		 RREG32(CP_STALLED_STAT2));
3774 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3775 		 RREG32(CP_STALLED_STAT3));
3776 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3777 		 RREG32(CP_CPF_BUSY_STAT));
3778 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3779 		 RREG32(CP_CPF_STALLED_STAT1));
3780 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3781 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3782 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3783 		 RREG32(CP_CPC_STALLED_STAT1));
3784 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3785 }
3786 
3787 /**
3788  * cik_gpu_check_soft_reset - check which blocks are busy
3789  *
3790  * @rdev: radeon_device pointer
3791  *
3792  * Check which blocks are busy and return the relevant reset
3793  * mask to be used by cik_gpu_soft_reset().
3794  * Returns a mask of the blocks to be reset.
3795  */
3796 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3797 {
3798 	u32 reset_mask = 0;
3799 	u32 tmp;
3800 
3801 	/* GRBM_STATUS */
3802 	tmp = RREG32(GRBM_STATUS);
3803 	if (tmp & (PA_BUSY | SC_BUSY |
3804 		   BCI_BUSY | SX_BUSY |
3805 		   TA_BUSY | VGT_BUSY |
3806 		   DB_BUSY | CB_BUSY |
3807 		   GDS_BUSY | SPI_BUSY |
3808 		   IA_BUSY | IA_BUSY_NO_DMA))
3809 		reset_mask |= RADEON_RESET_GFX;
3810 
3811 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3812 		reset_mask |= RADEON_RESET_CP;
3813 
3814 	/* GRBM_STATUS2 */
3815 	tmp = RREG32(GRBM_STATUS2);
3816 	if (tmp & RLC_BUSY)
3817 		reset_mask |= RADEON_RESET_RLC;
3818 
3819 	/* SDMA0_STATUS_REG */
3820 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3821 	if (!(tmp & SDMA_IDLE))
3822 		reset_mask |= RADEON_RESET_DMA;
3823 
3824 	/* SDMA1_STATUS_REG */
3825 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3826 	if (!(tmp & SDMA_IDLE))
3827 		reset_mask |= RADEON_RESET_DMA1;
3828 
3829 	/* SRBM_STATUS2 */
3830 	tmp = RREG32(SRBM_STATUS2);
3831 	if (tmp & SDMA_BUSY)
3832 		reset_mask |= RADEON_RESET_DMA;
3833 
3834 	if (tmp & SDMA1_BUSY)
3835 		reset_mask |= RADEON_RESET_DMA1;
3836 
3837 	/* SRBM_STATUS */
3838 	tmp = RREG32(SRBM_STATUS);
3839 
3840 	if (tmp & IH_BUSY)
3841 		reset_mask |= RADEON_RESET_IH;
3842 
3843 	if (tmp & SEM_BUSY)
3844 		reset_mask |= RADEON_RESET_SEM;
3845 
3846 	if (tmp & GRBM_RQ_PENDING)
3847 		reset_mask |= RADEON_RESET_GRBM;
3848 
3849 	if (tmp & VMC_BUSY)
3850 		reset_mask |= RADEON_RESET_VMC;
3851 
3852 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3853 		   MCC_BUSY | MCD_BUSY))
3854 		reset_mask |= RADEON_RESET_MC;
3855 
3856 	if (evergreen_is_display_hung(rdev))
3857 		reset_mask |= RADEON_RESET_DISPLAY;
3858 
3859 	/* Skip MC reset as it's mostly likely not hung, just busy */
3860 	if (reset_mask & RADEON_RESET_MC) {
3861 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3862 		reset_mask &= ~RADEON_RESET_MC;
3863 	}
3864 
3865 	return reset_mask;
3866 }
3867 
3868 /**
3869  * cik_gpu_soft_reset - soft reset GPU
3870  *
3871  * @rdev: radeon_device pointer
3872  * @reset_mask: mask of which blocks to reset
3873  *
3874  * Soft reset the blocks specified in @reset_mask.
3875  */
3876 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3877 {
3878 	struct evergreen_mc_save save;
3879 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3880 	u32 tmp;
3881 
3882 	if (reset_mask == 0)
3883 		return;
3884 
3885 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3886 
3887 	cik_print_gpu_status_regs(rdev);
3888 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3889 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3890 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3891 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3892 
3893 	/* stop the rlc */
3894 	cik_rlc_stop(rdev);
3895 
3896 	/* Disable GFX parsing/prefetching */
3897 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3898 
3899 	/* Disable MEC parsing/prefetching */
3900 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3901 
3902 	if (reset_mask & RADEON_RESET_DMA) {
3903 		/* sdma0 */
3904 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3905 		tmp |= SDMA_HALT;
3906 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3907 	}
3908 	if (reset_mask & RADEON_RESET_DMA1) {
3909 		/* sdma1 */
3910 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3911 		tmp |= SDMA_HALT;
3912 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3913 	}
3914 
3915 	evergreen_mc_stop(rdev, &save);
3916 	if (evergreen_mc_wait_for_idle(rdev)) {
3917 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3918 	}
3919 
3920 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3921 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3922 
3923 	if (reset_mask & RADEON_RESET_CP) {
3924 		grbm_soft_reset |= SOFT_RESET_CP;
3925 
3926 		srbm_soft_reset |= SOFT_RESET_GRBM;
3927 	}
3928 
3929 	if (reset_mask & RADEON_RESET_DMA)
3930 		srbm_soft_reset |= SOFT_RESET_SDMA;
3931 
3932 	if (reset_mask & RADEON_RESET_DMA1)
3933 		srbm_soft_reset |= SOFT_RESET_SDMA1;
3934 
3935 	if (reset_mask & RADEON_RESET_DISPLAY)
3936 		srbm_soft_reset |= SOFT_RESET_DC;
3937 
3938 	if (reset_mask & RADEON_RESET_RLC)
3939 		grbm_soft_reset |= SOFT_RESET_RLC;
3940 
3941 	if (reset_mask & RADEON_RESET_SEM)
3942 		srbm_soft_reset |= SOFT_RESET_SEM;
3943 
3944 	if (reset_mask & RADEON_RESET_IH)
3945 		srbm_soft_reset |= SOFT_RESET_IH;
3946 
3947 	if (reset_mask & RADEON_RESET_GRBM)
3948 		srbm_soft_reset |= SOFT_RESET_GRBM;
3949 
3950 	if (reset_mask & RADEON_RESET_VMC)
3951 		srbm_soft_reset |= SOFT_RESET_VMC;
3952 
3953 	if (!(rdev->flags & RADEON_IS_IGP)) {
3954 		if (reset_mask & RADEON_RESET_MC)
3955 			srbm_soft_reset |= SOFT_RESET_MC;
3956 	}
3957 
3958 	if (grbm_soft_reset) {
3959 		tmp = RREG32(GRBM_SOFT_RESET);
3960 		tmp |= grbm_soft_reset;
3961 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3962 		WREG32(GRBM_SOFT_RESET, tmp);
3963 		tmp = RREG32(GRBM_SOFT_RESET);
3964 
3965 		udelay(50);
3966 
3967 		tmp &= ~grbm_soft_reset;
3968 		WREG32(GRBM_SOFT_RESET, tmp);
3969 		tmp = RREG32(GRBM_SOFT_RESET);
3970 	}
3971 
3972 	if (srbm_soft_reset) {
3973 		tmp = RREG32(SRBM_SOFT_RESET);
3974 		tmp |= srbm_soft_reset;
3975 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3976 		WREG32(SRBM_SOFT_RESET, tmp);
3977 		tmp = RREG32(SRBM_SOFT_RESET);
3978 
3979 		udelay(50);
3980 
3981 		tmp &= ~srbm_soft_reset;
3982 		WREG32(SRBM_SOFT_RESET, tmp);
3983 		tmp = RREG32(SRBM_SOFT_RESET);
3984 	}
3985 
3986 	/* Wait a little for things to settle down */
3987 	udelay(50);
3988 
3989 	evergreen_mc_resume(rdev, &save);
3990 	udelay(50);
3991 
3992 	cik_print_gpu_status_regs(rdev);
3993 }
3994 
3995 /**
3996  * cik_asic_reset - soft reset GPU
3997  *
3998  * @rdev: radeon_device pointer
3999  *
4000  * Look up which blocks are hung and attempt
4001  * to reset them.
4002  * Returns 0 for success.
4003  */
4004 int cik_asic_reset(struct radeon_device *rdev)
4005 {
4006 	u32 reset_mask;
4007 
4008 	reset_mask = cik_gpu_check_soft_reset(rdev);
4009 
4010 	if (reset_mask)
4011 		r600_set_bios_scratch_engine_hung(rdev, true);
4012 
4013 	cik_gpu_soft_reset(rdev, reset_mask);
4014 
4015 	reset_mask = cik_gpu_check_soft_reset(rdev);
4016 
4017 	if (!reset_mask)
4018 		r600_set_bios_scratch_engine_hung(rdev, false);
4019 
4020 	return 0;
4021 }
4022 
4023 /**
4024  * cik_gfx_is_lockup - check if the 3D engine is locked up
4025  *
4026  * @rdev: radeon_device pointer
4027  * @ring: radeon_ring structure holding ring information
4028  *
4029  * Check if the 3D engine is locked up (CIK).
4030  * Returns true if the engine is locked, false if not.
4031  */
4032 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4033 {
4034 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4035 
4036 	if (!(reset_mask & (RADEON_RESET_GFX |
4037 			    RADEON_RESET_COMPUTE |
4038 			    RADEON_RESET_CP))) {
4039 		radeon_ring_lockup_update(ring);
4040 		return false;
4041 	}
4042 	/* force CP activities */
4043 	radeon_ring_force_activity(rdev, ring);
4044 	return radeon_ring_test_lockup(rdev, ring);
4045 }
4046 
4047 /**
4048  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4049  *
4050  * @rdev: radeon_device pointer
4051  * @ring: radeon_ring structure holding ring information
4052  *
4053  * Check if the async DMA engine is locked up (CIK).
4054  * Returns true if the engine appears to be locked up, false if not.
4055  */
4056 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4057 {
4058 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4059 	u32 mask;
4060 
4061 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4062 		mask = RADEON_RESET_DMA;
4063 	else
4064 		mask = RADEON_RESET_DMA1;
4065 
4066 	if (!(reset_mask & mask)) {
4067 		radeon_ring_lockup_update(ring);
4068 		return false;
4069 	}
4070 	/* force ring activities */
4071 	radeon_ring_force_activity(rdev, ring);
4072 	return radeon_ring_test_lockup(rdev, ring);
4073 }
4074 
4075 /* MC */
4076 /**
4077  * cik_mc_program - program the GPU memory controller
4078  *
4079  * @rdev: radeon_device pointer
4080  *
4081  * Set the location of vram, gart, and AGP in the GPU's
4082  * physical address space (CIK).
4083  */
4084 static void cik_mc_program(struct radeon_device *rdev)
4085 {
4086 	struct evergreen_mc_save save;
4087 	u32 tmp;
4088 	int i, j;
4089 
4090 	/* Initialize HDP */
4091 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4092 		WREG32((0x2c14 + j), 0x00000000);
4093 		WREG32((0x2c18 + j), 0x00000000);
4094 		WREG32((0x2c1c + j), 0x00000000);
4095 		WREG32((0x2c20 + j), 0x00000000);
4096 		WREG32((0x2c24 + j), 0x00000000);
4097 	}
4098 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4099 
4100 	evergreen_mc_stop(rdev, &save);
4101 	if (radeon_mc_wait_for_idle(rdev)) {
4102 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4103 	}
4104 	/* Lockout access through VGA aperture*/
4105 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4106 	/* Update configuration */
4107 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4108 	       rdev->mc.vram_start >> 12);
4109 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4110 	       rdev->mc.vram_end >> 12);
4111 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4112 	       rdev->vram_scratch.gpu_addr >> 12);
4113 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4114 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4115 	WREG32(MC_VM_FB_LOCATION, tmp);
4116 	/* XXX double check these! */
4117 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4118 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4119 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4120 	WREG32(MC_VM_AGP_BASE, 0);
4121 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4122 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4123 	if (radeon_mc_wait_for_idle(rdev)) {
4124 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4125 	}
4126 	evergreen_mc_resume(rdev, &save);
4127 	/* we need to own VRAM, so turn off the VGA renderer here
4128 	 * to stop it overwriting our objects */
4129 	rv515_vga_render_disable(rdev);
4130 }
4131 
4132 /**
4133  * cik_mc_init - initialize the memory controller driver params
4134  *
4135  * @rdev: radeon_device pointer
4136  *
4137  * Look up the amount of vram, vram width, and decide how to place
4138  * vram and gart within the GPU's physical address space (CIK).
4139  * Returns 0 for success.
4140  */
4141 static int cik_mc_init(struct radeon_device *rdev)
4142 {
4143 	u32 tmp;
4144 	int chansize, numchan;
4145 
4146 	/* Get VRAM informations */
4147 	rdev->mc.vram_is_ddr = true;
4148 	tmp = RREG32(MC_ARB_RAMCFG);
4149 	if (tmp & CHANSIZE_MASK) {
4150 		chansize = 64;
4151 	} else {
4152 		chansize = 32;
4153 	}
4154 	tmp = RREG32(MC_SHARED_CHMAP);
4155 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4156 	case 0:
4157 	default:
4158 		numchan = 1;
4159 		break;
4160 	case 1:
4161 		numchan = 2;
4162 		break;
4163 	case 2:
4164 		numchan = 4;
4165 		break;
4166 	case 3:
4167 		numchan = 8;
4168 		break;
4169 	case 4:
4170 		numchan = 3;
4171 		break;
4172 	case 5:
4173 		numchan = 6;
4174 		break;
4175 	case 6:
4176 		numchan = 10;
4177 		break;
4178 	case 7:
4179 		numchan = 12;
4180 		break;
4181 	case 8:
4182 		numchan = 16;
4183 		break;
4184 	}
4185 	rdev->mc.vram_width = numchan * chansize;
4186 	/* Could aper size report 0 ? */
4187 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4188 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4189 	/* size in MB on si */
4190 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4191 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4192 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4193 	si_vram_gtt_location(rdev, &rdev->mc);
4194 	radeon_update_bandwidth_info(rdev);
4195 
4196 	return 0;
4197 }
4198 
4199 /*
4200  * GART
4201  * VMID 0 is the physical GPU addresses as used by the kernel.
4202  * VMIDs 1-15 are used for userspace clients and are handled
4203  * by the radeon vm/hsa code.
4204  */
4205 /**
4206  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4207  *
4208  * @rdev: radeon_device pointer
4209  *
4210  * Flush the TLB for the VMID 0 page table (CIK).
4211  */
4212 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4213 {
4214 	/* flush hdp cache */
4215 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4216 
4217 	/* bits 0-15 are the VM contexts0-15 */
4218 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4219 }
4220 
4221 /**
4222  * cik_pcie_gart_enable - gart enable
4223  *
4224  * @rdev: radeon_device pointer
4225  *
4226  * This sets up the TLBs, programs the page tables for VMID0,
4227  * sets up the hw for VMIDs 1-15 which are allocated on
4228  * demand, and sets up the global locations for the LDS, GDS,
4229  * and GPUVM for FSA64 clients (CIK).
4230  * Returns 0 for success, errors for failure.
4231  */
4232 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4233 {
4234 	int r, i;
4235 
4236 	if (rdev->gart.robj == NULL) {
4237 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4238 		return -EINVAL;
4239 	}
4240 	r = radeon_gart_table_vram_pin(rdev);
4241 	if (r)
4242 		return r;
4243 	radeon_gart_restore(rdev);
4244 	/* Setup TLB control */
4245 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4246 	       (0xA << 7) |
4247 	       ENABLE_L1_TLB |
4248 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4249 	       ENABLE_ADVANCED_DRIVER_MODEL |
4250 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4251 	/* Setup L2 cache */
4252 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4253 	       ENABLE_L2_FRAGMENT_PROCESSING |
4254 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4255 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4256 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4257 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4258 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4259 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4260 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4261 	/* setup context0 */
4262 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4263 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4264 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4265 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4266 			(u32)(rdev->dummy_page.addr >> 12));
4267 	WREG32(VM_CONTEXT0_CNTL2, 0);
4268 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4269 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4270 
4271 	WREG32(0x15D4, 0);
4272 	WREG32(0x15D8, 0);
4273 	WREG32(0x15DC, 0);
4274 
4275 	/* empty context1-15 */
4276 	/* FIXME start with 4G, once using 2 level pt switch to full
4277 	 * vm size space
4278 	 */
4279 	/* set vm size, must be a multiple of 4 */
4280 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4281 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4282 	for (i = 1; i < 16; i++) {
4283 		if (i < 8)
4284 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4285 			       rdev->gart.table_addr >> 12);
4286 		else
4287 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4288 			       rdev->gart.table_addr >> 12);
4289 	}
4290 
4291 	/* enable context1-15 */
4292 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4293 	       (u32)(rdev->dummy_page.addr >> 12));
4294 	WREG32(VM_CONTEXT1_CNTL2, 4);
4295 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4296 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4300 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4301 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4302 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4303 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4304 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4305 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4306 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4307 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4308 
4309 	/* TC cache setup ??? */
4310 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4311 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4312 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4313 
4314 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4315 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4316 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4317 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4318 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4319 
4320 	WREG32(TC_CFG_L1_VOLATILE, 0);
4321 	WREG32(TC_CFG_L2_VOLATILE, 0);
4322 
4323 	if (rdev->family == CHIP_KAVERI) {
4324 		u32 tmp = RREG32(CHUB_CONTROL);
4325 		tmp &= ~BYPASS_VM;
4326 		WREG32(CHUB_CONTROL, tmp);
4327 	}
4328 
4329 	/* XXX SH_MEM regs */
4330 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4331 	mutex_lock(&rdev->srbm_mutex);
4332 	for (i = 0; i < 16; i++) {
4333 		cik_srbm_select(rdev, 0, 0, 0, i);
4334 		/* CP and shaders */
4335 		WREG32(SH_MEM_CONFIG, 0);
4336 		WREG32(SH_MEM_APE1_BASE, 1);
4337 		WREG32(SH_MEM_APE1_LIMIT, 0);
4338 		WREG32(SH_MEM_BASES, 0);
4339 		/* SDMA GFX */
4340 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4341 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4342 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4343 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4344 		/* XXX SDMA RLC - todo */
4345 	}
4346 	cik_srbm_select(rdev, 0, 0, 0, 0);
4347 	mutex_unlock(&rdev->srbm_mutex);
4348 
4349 	cik_pcie_gart_tlb_flush(rdev);
4350 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4351 		 (unsigned)(rdev->mc.gtt_size >> 20),
4352 		 (unsigned long long)rdev->gart.table_addr);
4353 	rdev->gart.ready = true;
4354 	return 0;
4355 }
4356 
4357 /**
4358  * cik_pcie_gart_disable - gart disable
4359  *
4360  * @rdev: radeon_device pointer
4361  *
4362  * This disables all VM page table (CIK).
4363  */
4364 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4365 {
4366 	/* Disable all tables */
4367 	WREG32(VM_CONTEXT0_CNTL, 0);
4368 	WREG32(VM_CONTEXT1_CNTL, 0);
4369 	/* Setup TLB control */
4370 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4371 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4372 	/* Setup L2 cache */
4373 	WREG32(VM_L2_CNTL,
4374 	       ENABLE_L2_FRAGMENT_PROCESSING |
4375 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4376 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4377 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4378 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4379 	WREG32(VM_L2_CNTL2, 0);
4380 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4381 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4382 	radeon_gart_table_vram_unpin(rdev);
4383 }
4384 
4385 /**
4386  * cik_pcie_gart_fini - vm fini callback
4387  *
4388  * @rdev: radeon_device pointer
4389  *
4390  * Tears down the driver GART/VM setup (CIK).
4391  */
4392 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4393 {
4394 	cik_pcie_gart_disable(rdev);
4395 	radeon_gart_table_vram_free(rdev);
4396 	radeon_gart_fini(rdev);
4397 }
4398 
4399 /* vm parser */
4400 /**
4401  * cik_ib_parse - vm ib_parse callback
4402  *
4403  * @rdev: radeon_device pointer
4404  * @ib: indirect buffer pointer
4405  *
4406  * CIK uses hw IB checking so this is a nop (CIK).
4407  */
4408 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4409 {
4410 	return 0;
4411 }
4412 
4413 /*
4414  * vm
4415  * VMID 0 is the physical GPU addresses as used by the kernel.
4416  * VMIDs 1-15 are used for userspace clients and are handled
4417  * by the radeon vm/hsa code.
4418  */
4419 /**
4420  * cik_vm_init - cik vm init callback
4421  *
4422  * @rdev: radeon_device pointer
4423  *
4424  * Inits cik specific vm parameters (number of VMs, base of vram for
4425  * VMIDs 1-15) (CIK).
4426  * Returns 0 for success.
4427  */
4428 int cik_vm_init(struct radeon_device *rdev)
4429 {
4430 	/* number of VMs */
4431 	rdev->vm_manager.nvm = 16;
4432 	/* base offset of vram pages */
4433 	if (rdev->flags & RADEON_IS_IGP) {
4434 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4435 		tmp <<= 22;
4436 		rdev->vm_manager.vram_base_offset = tmp;
4437 	} else
4438 		rdev->vm_manager.vram_base_offset = 0;
4439 
4440 	return 0;
4441 }
4442 
4443 /**
4444  * cik_vm_fini - cik vm fini callback
4445  *
4446  * @rdev: radeon_device pointer
4447  *
4448  * Tear down any asic specific VM setup (CIK).
4449  */
4450 void cik_vm_fini(struct radeon_device *rdev)
4451 {
4452 }
4453 
4454 /**
4455  * cik_vm_decode_fault - print human readable fault info
4456  *
4457  * @rdev: radeon_device pointer
4458  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4459  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4460  *
4461  * Print human readable fault information (CIK).
4462  */
4463 static void cik_vm_decode_fault(struct radeon_device *rdev,
4464 				u32 status, u32 addr, u32 mc_client)
4465 {
4466 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4467 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4468 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4469 	char *block = (char *)&mc_client;
4470 
4471 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4472 	       protections, vmid, addr,
4473 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4474 	       block, mc_id);
4475 }
4476 
4477 /**
4478  * cik_vm_flush - cik vm flush using the CP
4479  *
4480  * @rdev: radeon_device pointer
4481  *
4482  * Update the page table base and flush the VM TLB
4483  * using the CP (CIK).
4484  */
4485 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4486 {
4487 	struct radeon_ring *ring = &rdev->ring[ridx];
4488 
4489 	if (vm == NULL)
4490 		return;
4491 
4492 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4493 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4494 				 WRITE_DATA_DST_SEL(0)));
4495 	if (vm->id < 8) {
4496 		radeon_ring_write(ring,
4497 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4498 	} else {
4499 		radeon_ring_write(ring,
4500 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4501 	}
4502 	radeon_ring_write(ring, 0);
4503 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4504 
4505 	/* update SH_MEM_* regs */
4506 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4507 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4508 				 WRITE_DATA_DST_SEL(0)));
4509 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4510 	radeon_ring_write(ring, 0);
4511 	radeon_ring_write(ring, VMID(vm->id));
4512 
4513 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4514 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4515 				 WRITE_DATA_DST_SEL(0)));
4516 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4517 	radeon_ring_write(ring, 0);
4518 
4519 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4520 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4521 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4522 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4523 
4524 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4525 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4526 				 WRITE_DATA_DST_SEL(0)));
4527 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4528 	radeon_ring_write(ring, 0);
4529 	radeon_ring_write(ring, VMID(0));
4530 
4531 	/* HDP flush */
4532 	/* We should be using the WAIT_REG_MEM packet here like in
4533 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4534 	 * context...
4535 	 */
4536 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4537 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4538 				 WRITE_DATA_DST_SEL(0)));
4539 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4540 	radeon_ring_write(ring, 0);
4541 	radeon_ring_write(ring, 0);
4542 
4543 	/* bits 0-15 are the VM contexts0-15 */
4544 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4545 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4546 				 WRITE_DATA_DST_SEL(0)));
4547 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4548 	radeon_ring_write(ring, 0);
4549 	radeon_ring_write(ring, 1 << vm->id);
4550 
4551 	/* compute doesn't have PFP */
4552 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4553 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4554 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4555 		radeon_ring_write(ring, 0x0);
4556 	}
4557 }
4558 
4559 /**
4560  * cik_vm_set_page - update the page tables using sDMA
4561  *
4562  * @rdev: radeon_device pointer
4563  * @ib: indirect buffer to fill with commands
4564  * @pe: addr of the page entry
4565  * @addr: dst addr to write into pe
4566  * @count: number of page entries to update
4567  * @incr: increase next addr by incr bytes
4568  * @flags: access flags
4569  *
4570  * Update the page tables using CP or sDMA (CIK).
4571  */
4572 void cik_vm_set_page(struct radeon_device *rdev,
4573 		     struct radeon_ib *ib,
4574 		     uint64_t pe,
4575 		     uint64_t addr, unsigned count,
4576 		     uint32_t incr, uint32_t flags)
4577 {
4578 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4579 	uint64_t value;
4580 	unsigned ndw;
4581 
4582 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4583 		/* CP */
4584 		while (count) {
4585 			ndw = 2 + count * 2;
4586 			if (ndw > 0x3FFE)
4587 				ndw = 0x3FFE;
4588 
4589 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4590 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4591 						    WRITE_DATA_DST_SEL(1));
4592 			ib->ptr[ib->length_dw++] = pe;
4593 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4594 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4595 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4596 					value = radeon_vm_map_gart(rdev, addr);
4597 					value &= 0xFFFFFFFFFFFFF000ULL;
4598 				} else if (flags & RADEON_VM_PAGE_VALID) {
4599 					value = addr;
4600 				} else {
4601 					value = 0;
4602 				}
4603 				addr += incr;
4604 				value |= r600_flags;
4605 				ib->ptr[ib->length_dw++] = value;
4606 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4607 			}
4608 		}
4609 	} else {
4610 		/* DMA */
4611 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4612 			while (count) {
4613 				ndw = count * 2;
4614 				if (ndw > 0xFFFFE)
4615 					ndw = 0xFFFFE;
4616 
4617 				/* for non-physically contiguous pages (system) */
4618 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4619 				ib->ptr[ib->length_dw++] = pe;
4620 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4621 				ib->ptr[ib->length_dw++] = ndw;
4622 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4623 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4624 						value = radeon_vm_map_gart(rdev, addr);
4625 						value &= 0xFFFFFFFFFFFFF000ULL;
4626 					} else if (flags & RADEON_VM_PAGE_VALID) {
4627 						value = addr;
4628 					} else {
4629 						value = 0;
4630 					}
4631 					addr += incr;
4632 					value |= r600_flags;
4633 					ib->ptr[ib->length_dw++] = value;
4634 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4635 				}
4636 			}
4637 		} else {
4638 			while (count) {
4639 				ndw = count;
4640 				if (ndw > 0x7FFFF)
4641 					ndw = 0x7FFFF;
4642 
4643 				if (flags & RADEON_VM_PAGE_VALID)
4644 					value = addr;
4645 				else
4646 					value = 0;
4647 				/* for physically contiguous pages (vram) */
4648 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4649 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4650 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4651 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4652 				ib->ptr[ib->length_dw++] = 0;
4653 				ib->ptr[ib->length_dw++] = value; /* value */
4654 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4655 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4656 				ib->ptr[ib->length_dw++] = 0;
4657 				ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4658 				pe += ndw * 8;
4659 				addr += ndw * incr;
4660 				count -= ndw;
4661 			}
4662 		}
4663 		while (ib->length_dw & 0x7)
4664 			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4665 	}
4666 }
4667 
4668 /**
4669  * cik_dma_vm_flush - cik vm flush using sDMA
4670  *
4671  * @rdev: radeon_device pointer
4672  *
4673  * Update the page table base and flush the VM TLB
4674  * using sDMA (CIK).
4675  */
4676 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4677 {
4678 	struct radeon_ring *ring = &rdev->ring[ridx];
4679 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4680 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4681 	u32 ref_and_mask;
4682 
4683 	if (vm == NULL)
4684 		return;
4685 
4686 	if (ridx == R600_RING_TYPE_DMA_INDEX)
4687 		ref_and_mask = SDMA0;
4688 	else
4689 		ref_and_mask = SDMA1;
4690 
4691 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4692 	if (vm->id < 8) {
4693 		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4694 	} else {
4695 		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4696 	}
4697 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4698 
4699 	/* update SH_MEM_* regs */
4700 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4701 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4702 	radeon_ring_write(ring, VMID(vm->id));
4703 
4704 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4705 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4706 	radeon_ring_write(ring, 0);
4707 
4708 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4709 	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4710 	radeon_ring_write(ring, 0);
4711 
4712 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4713 	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4714 	radeon_ring_write(ring, 1);
4715 
4716 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4717 	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4718 	radeon_ring_write(ring, 0);
4719 
4720 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4721 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4722 	radeon_ring_write(ring, VMID(0));
4723 
4724 	/* flush HDP */
4725 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4726 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4727 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4728 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4729 	radeon_ring_write(ring, ref_and_mask); /* MASK */
4730 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4731 
4732 	/* flush TLB */
4733 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4734 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4735 	radeon_ring_write(ring, 1 << vm->id);
4736 }
4737 
4738 /*
4739  * RLC
4740  * The RLC is a multi-purpose microengine that handles a
4741  * variety of functions, the most important of which is
4742  * the interrupt controller.
4743  */
4744 /**
4745  * cik_rlc_stop - stop the RLC ME
4746  *
4747  * @rdev: radeon_device pointer
4748  *
4749  * Halt the RLC ME (MicroEngine) (CIK).
4750  */
4751 static void cik_rlc_stop(struct radeon_device *rdev)
4752 {
4753 	int i, j, k;
4754 	u32 mask, tmp;
4755 
4756 	tmp = RREG32(CP_INT_CNTL_RING0);
4757 	tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4758 	WREG32(CP_INT_CNTL_RING0, tmp);
4759 
4760 	RREG32(CB_CGTT_SCLK_CTRL);
4761 	RREG32(CB_CGTT_SCLK_CTRL);
4762 	RREG32(CB_CGTT_SCLK_CTRL);
4763 	RREG32(CB_CGTT_SCLK_CTRL);
4764 
4765 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4766 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4767 
4768 	WREG32(RLC_CNTL, 0);
4769 
4770 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4771 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4772 			cik_select_se_sh(rdev, i, j);
4773 			for (k = 0; k < rdev->usec_timeout; k++) {
4774 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4775 					break;
4776 				udelay(1);
4777 			}
4778 		}
4779 	}
4780 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4781 
4782 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4783 	for (k = 0; k < rdev->usec_timeout; k++) {
4784 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4785 			break;
4786 		udelay(1);
4787 	}
4788 }
4789 
4790 /**
4791  * cik_rlc_start - start the RLC ME
4792  *
4793  * @rdev: radeon_device pointer
4794  *
4795  * Unhalt the RLC ME (MicroEngine) (CIK).
4796  */
4797 static void cik_rlc_start(struct radeon_device *rdev)
4798 {
4799 	u32 tmp;
4800 
4801 	WREG32(RLC_CNTL, RLC_ENABLE);
4802 
4803 	tmp = RREG32(CP_INT_CNTL_RING0);
4804 	tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4805 	WREG32(CP_INT_CNTL_RING0, tmp);
4806 
4807 	udelay(50);
4808 }
4809 
4810 /**
4811  * cik_rlc_resume - setup the RLC hw
4812  *
4813  * @rdev: radeon_device pointer
4814  *
4815  * Initialize the RLC registers, load the ucode,
4816  * and start the RLC (CIK).
4817  * Returns 0 for success, -EINVAL if the ucode is not available.
4818  */
4819 static int cik_rlc_resume(struct radeon_device *rdev)
4820 {
4821 	u32 i, size;
4822 	u32 clear_state_info[3];
4823 	const __be32 *fw_data;
4824 
4825 	if (!rdev->rlc_fw)
4826 		return -EINVAL;
4827 
4828 	switch (rdev->family) {
4829 	case CHIP_BONAIRE:
4830 	default:
4831 		size = BONAIRE_RLC_UCODE_SIZE;
4832 		break;
4833 	case CHIP_KAVERI:
4834 		size = KV_RLC_UCODE_SIZE;
4835 		break;
4836 	case CHIP_KABINI:
4837 		size = KB_RLC_UCODE_SIZE;
4838 		break;
4839 	}
4840 
4841 	cik_rlc_stop(rdev);
4842 
4843 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4844 	RREG32(GRBM_SOFT_RESET);
4845 	udelay(50);
4846 	WREG32(GRBM_SOFT_RESET, 0);
4847 	RREG32(GRBM_SOFT_RESET);
4848 	udelay(50);
4849 
4850 	WREG32(RLC_LB_CNTR_INIT, 0);
4851 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4852 
4853 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4854 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4855 	WREG32(RLC_LB_PARAMS, 0x00600408);
4856 	WREG32(RLC_LB_CNTL, 0x80000004);
4857 
4858 	WREG32(RLC_MC_CNTL, 0);
4859 	WREG32(RLC_UCODE_CNTL, 0);
4860 
4861 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4862 		WREG32(RLC_GPM_UCODE_ADDR, 0);
4863 	for (i = 0; i < size; i++)
4864 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4865 	WREG32(RLC_GPM_UCODE_ADDR, 0);
4866 
4867 	/* XXX */
4868 	clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4869 	clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4870 	clear_state_info[2] = 0;//cik_default_size;
4871 	WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4872 	for (i = 0; i < 3; i++)
4873 		WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4874 	WREG32(RLC_DRIVER_DMA_STATUS, 0);
4875 
4876 	cik_rlc_start(rdev);
4877 
4878 	return 0;
4879 }
4880 
4881 /*
4882  * Interrupts
4883  * Starting with r6xx, interrupts are handled via a ring buffer.
4884  * Ring buffers are areas of GPU accessible memory that the GPU
4885  * writes interrupt vectors into and the host reads vectors out of.
4886  * There is a rptr (read pointer) that determines where the
4887  * host is currently reading, and a wptr (write pointer)
4888  * which determines where the GPU has written.  When the
4889  * pointers are equal, the ring is idle.  When the GPU
4890  * writes vectors to the ring buffer, it increments the
4891  * wptr.  When there is an interrupt, the host then starts
4892  * fetching commands and processing them until the pointers are
4893  * equal again at which point it updates the rptr.
4894  */
4895 
4896 /**
4897  * cik_enable_interrupts - Enable the interrupt ring buffer
4898  *
4899  * @rdev: radeon_device pointer
4900  *
4901  * Enable the interrupt ring buffer (CIK).
4902  */
4903 static void cik_enable_interrupts(struct radeon_device *rdev)
4904 {
4905 	u32 ih_cntl = RREG32(IH_CNTL);
4906 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4907 
4908 	ih_cntl |= ENABLE_INTR;
4909 	ih_rb_cntl |= IH_RB_ENABLE;
4910 	WREG32(IH_CNTL, ih_cntl);
4911 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4912 	rdev->ih.enabled = true;
4913 }
4914 
4915 /**
4916  * cik_disable_interrupts - Disable the interrupt ring buffer
4917  *
4918  * @rdev: radeon_device pointer
4919  *
4920  * Disable the interrupt ring buffer (CIK).
4921  */
4922 static void cik_disable_interrupts(struct radeon_device *rdev)
4923 {
4924 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4925 	u32 ih_cntl = RREG32(IH_CNTL);
4926 
4927 	ih_rb_cntl &= ~IH_RB_ENABLE;
4928 	ih_cntl &= ~ENABLE_INTR;
4929 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4930 	WREG32(IH_CNTL, ih_cntl);
4931 	/* set rptr, wptr to 0 */
4932 	WREG32(IH_RB_RPTR, 0);
4933 	WREG32(IH_RB_WPTR, 0);
4934 	rdev->ih.enabled = false;
4935 	rdev->ih.rptr = 0;
4936 }
4937 
4938 /**
4939  * cik_disable_interrupt_state - Disable all interrupt sources
4940  *
4941  * @rdev: radeon_device pointer
4942  *
4943  * Clear all interrupt enable bits used by the driver (CIK).
4944  */
4945 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4946 {
4947 	u32 tmp;
4948 
4949 	/* gfx ring */
4950 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4951 	/* sdma */
4952 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4953 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4954 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4955 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4956 	/* compute queues */
4957 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4958 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4959 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4960 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4961 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4962 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4963 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4964 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4965 	/* grbm */
4966 	WREG32(GRBM_INT_CNTL, 0);
4967 	/* vline/vblank, etc. */
4968 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4969 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4970 	if (rdev->num_crtc >= 4) {
4971 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4972 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4973 	}
4974 	if (rdev->num_crtc >= 6) {
4975 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4976 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4977 	}
4978 
4979 	/* dac hotplug */
4980 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4981 
4982 	/* digital hotplug */
4983 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984 	WREG32(DC_HPD1_INT_CONTROL, tmp);
4985 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4986 	WREG32(DC_HPD2_INT_CONTROL, tmp);
4987 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4988 	WREG32(DC_HPD3_INT_CONTROL, tmp);
4989 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4990 	WREG32(DC_HPD4_INT_CONTROL, tmp);
4991 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4992 	WREG32(DC_HPD5_INT_CONTROL, tmp);
4993 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4994 	WREG32(DC_HPD6_INT_CONTROL, tmp);
4995 
4996 }
4997 
4998 /**
4999  * cik_irq_init - init and enable the interrupt ring
5000  *
5001  * @rdev: radeon_device pointer
5002  *
5003  * Allocate a ring buffer for the interrupt controller,
5004  * enable the RLC, disable interrupts, enable the IH
5005  * ring buffer and enable it (CIK).
5006  * Called at device load and reume.
5007  * Returns 0 for success, errors for failure.
5008  */
5009 static int cik_irq_init(struct radeon_device *rdev)
5010 {
5011 	int ret = 0;
5012 	int rb_bufsz;
5013 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5014 
5015 	/* allocate ring */
5016 	ret = r600_ih_ring_alloc(rdev);
5017 	if (ret)
5018 		return ret;
5019 
5020 	/* disable irqs */
5021 	cik_disable_interrupts(rdev);
5022 
5023 	/* init rlc */
5024 	ret = cik_rlc_resume(rdev);
5025 	if (ret) {
5026 		r600_ih_ring_fini(rdev);
5027 		return ret;
5028 	}
5029 
5030 	/* setup interrupt control */
5031 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5032 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5033 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5034 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5035 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5036 	 */
5037 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5038 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5039 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5040 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5041 
5042 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5043 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5044 
5045 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5046 		      IH_WPTR_OVERFLOW_CLEAR |
5047 		      (rb_bufsz << 1));
5048 
5049 	if (rdev->wb.enabled)
5050 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5051 
5052 	/* set the writeback address whether it's enabled or not */
5053 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5054 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5055 
5056 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5057 
5058 	/* set rptr, wptr to 0 */
5059 	WREG32(IH_RB_RPTR, 0);
5060 	WREG32(IH_RB_WPTR, 0);
5061 
5062 	/* Default settings for IH_CNTL (disabled at first) */
5063 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5064 	/* RPTR_REARM only works if msi's are enabled */
5065 	if (rdev->msi_enabled)
5066 		ih_cntl |= RPTR_REARM;
5067 	WREG32(IH_CNTL, ih_cntl);
5068 
5069 	/* force the active interrupt state to all disabled */
5070 	cik_disable_interrupt_state(rdev);
5071 
5072 	pci_set_master(rdev->pdev);
5073 
5074 	/* enable irqs */
5075 	cik_enable_interrupts(rdev);
5076 
5077 	return ret;
5078 }
5079 
5080 /**
5081  * cik_irq_set - enable/disable interrupt sources
5082  *
5083  * @rdev: radeon_device pointer
5084  *
5085  * Enable interrupt sources on the GPU (vblanks, hpd,
5086  * etc.) (CIK).
5087  * Returns 0 for success, errors for failure.
5088  */
5089 int cik_irq_set(struct radeon_device *rdev)
5090 {
5091 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5092 		PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5093 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5094 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5095 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5096 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5097 	u32 grbm_int_cntl = 0;
5098 	u32 dma_cntl, dma_cntl1;
5099 
5100 	if (!rdev->irq.installed) {
5101 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5102 		return -EINVAL;
5103 	}
5104 	/* don't enable anything if the ih is disabled */
5105 	if (!rdev->ih.enabled) {
5106 		cik_disable_interrupts(rdev);
5107 		/* force the active interrupt state to all disabled */
5108 		cik_disable_interrupt_state(rdev);
5109 		return 0;
5110 	}
5111 
5112 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5114 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5115 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5116 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5117 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5118 
5119 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5120 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5121 
5122 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5126 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5127 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5128 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5129 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5130 
5131 	/* enable CP interrupts on all rings */
5132 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5133 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
5134 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5135 	}
5136 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5137 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5138 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5139 		if (ring->me == 1) {
5140 			switch (ring->pipe) {
5141 			case 0:
5142 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5143 				break;
5144 			case 1:
5145 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5146 				break;
5147 			case 2:
5148 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5149 				break;
5150 			case 3:
5151 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5152 				break;
5153 			default:
5154 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5155 				break;
5156 			}
5157 		} else if (ring->me == 2) {
5158 			switch (ring->pipe) {
5159 			case 0:
5160 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5161 				break;
5162 			case 1:
5163 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5164 				break;
5165 			case 2:
5166 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5167 				break;
5168 			case 3:
5169 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5170 				break;
5171 			default:
5172 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5173 				break;
5174 			}
5175 		} else {
5176 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5177 		}
5178 	}
5179 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5180 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5181 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5182 		if (ring->me == 1) {
5183 			switch (ring->pipe) {
5184 			case 0:
5185 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5186 				break;
5187 			case 1:
5188 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5189 				break;
5190 			case 2:
5191 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5192 				break;
5193 			case 3:
5194 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5195 				break;
5196 			default:
5197 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5198 				break;
5199 			}
5200 		} else if (ring->me == 2) {
5201 			switch (ring->pipe) {
5202 			case 0:
5203 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5204 				break;
5205 			case 1:
5206 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5207 				break;
5208 			case 2:
5209 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5210 				break;
5211 			case 3:
5212 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5213 				break;
5214 			default:
5215 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5216 				break;
5217 			}
5218 		} else {
5219 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5220 		}
5221 	}
5222 
5223 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5224 		DRM_DEBUG("cik_irq_set: sw int dma\n");
5225 		dma_cntl |= TRAP_ENABLE;
5226 	}
5227 
5228 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5229 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
5230 		dma_cntl1 |= TRAP_ENABLE;
5231 	}
5232 
5233 	if (rdev->irq.crtc_vblank_int[0] ||
5234 	    atomic_read(&rdev->irq.pflip[0])) {
5235 		DRM_DEBUG("cik_irq_set: vblank 0\n");
5236 		crtc1 |= VBLANK_INTERRUPT_MASK;
5237 	}
5238 	if (rdev->irq.crtc_vblank_int[1] ||
5239 	    atomic_read(&rdev->irq.pflip[1])) {
5240 		DRM_DEBUG("cik_irq_set: vblank 1\n");
5241 		crtc2 |= VBLANK_INTERRUPT_MASK;
5242 	}
5243 	if (rdev->irq.crtc_vblank_int[2] ||
5244 	    atomic_read(&rdev->irq.pflip[2])) {
5245 		DRM_DEBUG("cik_irq_set: vblank 2\n");
5246 		crtc3 |= VBLANK_INTERRUPT_MASK;
5247 	}
5248 	if (rdev->irq.crtc_vblank_int[3] ||
5249 	    atomic_read(&rdev->irq.pflip[3])) {
5250 		DRM_DEBUG("cik_irq_set: vblank 3\n");
5251 		crtc4 |= VBLANK_INTERRUPT_MASK;
5252 	}
5253 	if (rdev->irq.crtc_vblank_int[4] ||
5254 	    atomic_read(&rdev->irq.pflip[4])) {
5255 		DRM_DEBUG("cik_irq_set: vblank 4\n");
5256 		crtc5 |= VBLANK_INTERRUPT_MASK;
5257 	}
5258 	if (rdev->irq.crtc_vblank_int[5] ||
5259 	    atomic_read(&rdev->irq.pflip[5])) {
5260 		DRM_DEBUG("cik_irq_set: vblank 5\n");
5261 		crtc6 |= VBLANK_INTERRUPT_MASK;
5262 	}
5263 	if (rdev->irq.hpd[0]) {
5264 		DRM_DEBUG("cik_irq_set: hpd 1\n");
5265 		hpd1 |= DC_HPDx_INT_EN;
5266 	}
5267 	if (rdev->irq.hpd[1]) {
5268 		DRM_DEBUG("cik_irq_set: hpd 2\n");
5269 		hpd2 |= DC_HPDx_INT_EN;
5270 	}
5271 	if (rdev->irq.hpd[2]) {
5272 		DRM_DEBUG("cik_irq_set: hpd 3\n");
5273 		hpd3 |= DC_HPDx_INT_EN;
5274 	}
5275 	if (rdev->irq.hpd[3]) {
5276 		DRM_DEBUG("cik_irq_set: hpd 4\n");
5277 		hpd4 |= DC_HPDx_INT_EN;
5278 	}
5279 	if (rdev->irq.hpd[4]) {
5280 		DRM_DEBUG("cik_irq_set: hpd 5\n");
5281 		hpd5 |= DC_HPDx_INT_EN;
5282 	}
5283 	if (rdev->irq.hpd[5]) {
5284 		DRM_DEBUG("cik_irq_set: hpd 6\n");
5285 		hpd6 |= DC_HPDx_INT_EN;
5286 	}
5287 
5288 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5289 
5290 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5291 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5292 
5293 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5294 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5295 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5296 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5297 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5298 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5299 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5300 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5301 
5302 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5303 
5304 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5305 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5306 	if (rdev->num_crtc >= 4) {
5307 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5308 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5309 	}
5310 	if (rdev->num_crtc >= 6) {
5311 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5312 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5313 	}
5314 
5315 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
5316 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
5317 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
5318 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
5319 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
5320 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
5321 
5322 	return 0;
5323 }
5324 
5325 /**
5326  * cik_irq_ack - ack interrupt sources
5327  *
5328  * @rdev: radeon_device pointer
5329  *
5330  * Ack interrupt sources on the GPU (vblanks, hpd,
5331  * etc.) (CIK).  Certain interrupts sources are sw
5332  * generated and do not require an explicit ack.
5333  */
5334 static inline void cik_irq_ack(struct radeon_device *rdev)
5335 {
5336 	u32 tmp;
5337 
5338 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5339 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5340 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5341 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5342 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5343 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5344 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5345 
5346 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5347 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5348 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5349 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5350 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5351 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5352 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5353 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5354 
5355 	if (rdev->num_crtc >= 4) {
5356 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5357 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5358 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5359 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5360 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5361 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5362 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5363 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5364 	}
5365 
5366 	if (rdev->num_crtc >= 6) {
5367 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5368 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5369 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5370 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5371 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5372 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5373 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5374 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5375 	}
5376 
5377 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5378 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5379 		tmp |= DC_HPDx_INT_ACK;
5380 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5381 	}
5382 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5383 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5384 		tmp |= DC_HPDx_INT_ACK;
5385 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5386 	}
5387 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5388 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5389 		tmp |= DC_HPDx_INT_ACK;
5390 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5391 	}
5392 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5393 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5394 		tmp |= DC_HPDx_INT_ACK;
5395 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5396 	}
5397 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5398 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5399 		tmp |= DC_HPDx_INT_ACK;
5400 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5401 	}
5402 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5403 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5404 		tmp |= DC_HPDx_INT_ACK;
5405 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5406 	}
5407 }
5408 
5409 /**
5410  * cik_irq_disable - disable interrupts
5411  *
5412  * @rdev: radeon_device pointer
5413  *
5414  * Disable interrupts on the hw (CIK).
5415  */
5416 static void cik_irq_disable(struct radeon_device *rdev)
5417 {
5418 	cik_disable_interrupts(rdev);
5419 	/* Wait and acknowledge irq */
5420 	mdelay(1);
5421 	cik_irq_ack(rdev);
5422 	cik_disable_interrupt_state(rdev);
5423 }
5424 
5425 /**
5426  * cik_irq_disable - disable interrupts for suspend
5427  *
5428  * @rdev: radeon_device pointer
5429  *
5430  * Disable interrupts and stop the RLC (CIK).
5431  * Used for suspend.
5432  */
5433 static void cik_irq_suspend(struct radeon_device *rdev)
5434 {
5435 	cik_irq_disable(rdev);
5436 	cik_rlc_stop(rdev);
5437 }
5438 
5439 /**
5440  * cik_irq_fini - tear down interrupt support
5441  *
5442  * @rdev: radeon_device pointer
5443  *
5444  * Disable interrupts on the hw and free the IH ring
5445  * buffer (CIK).
5446  * Used for driver unload.
5447  */
5448 static void cik_irq_fini(struct radeon_device *rdev)
5449 {
5450 	cik_irq_suspend(rdev);
5451 	r600_ih_ring_fini(rdev);
5452 }
5453 
5454 /**
5455  * cik_get_ih_wptr - get the IH ring buffer wptr
5456  *
5457  * @rdev: radeon_device pointer
5458  *
5459  * Get the IH ring buffer wptr from either the register
5460  * or the writeback memory buffer (CIK).  Also check for
5461  * ring buffer overflow and deal with it.
5462  * Used by cik_irq_process().
5463  * Returns the value of the wptr.
5464  */
5465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5466 {
5467 	u32 wptr, tmp;
5468 
5469 	if (rdev->wb.enabled)
5470 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5471 	else
5472 		wptr = RREG32(IH_RB_WPTR);
5473 
5474 	if (wptr & RB_OVERFLOW) {
5475 		/* When a ring buffer overflow happen start parsing interrupt
5476 		 * from the last not overwritten vector (wptr + 16). Hopefully
5477 		 * this should allow us to catchup.
5478 		 */
5479 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5480 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5481 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5482 		tmp = RREG32(IH_RB_CNTL);
5483 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5484 		WREG32(IH_RB_CNTL, tmp);
5485 	}
5486 	return (wptr & rdev->ih.ptr_mask);
5487 }
5488 
5489 /*        CIK IV Ring
5490  * Each IV ring entry is 128 bits:
5491  * [7:0]    - interrupt source id
5492  * [31:8]   - reserved
5493  * [59:32]  - interrupt source data
5494  * [63:60]  - reserved
5495  * [71:64]  - RINGID
5496  *            CP:
5497  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5498  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5499  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5500  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5501  *            PIPE_ID - ME0 0=3D
5502  *                    - ME1&2 compute dispatcher (4 pipes each)
5503  *            SDMA:
5504  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5505  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5506  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5507  * [79:72]  - VMID
5508  * [95:80]  - PASID
5509  * [127:96] - reserved
5510  */
5511 /**
5512  * cik_irq_process - interrupt handler
5513  *
5514  * @rdev: radeon_device pointer
5515  *
5516  * Interrupt hander (CIK).  Walk the IH ring,
5517  * ack interrupts and schedule work to handle
5518  * interrupt events.
5519  * Returns irq process return code.
5520  */
5521 int cik_irq_process(struct radeon_device *rdev)
5522 {
5523 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5524 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5525 	u32 wptr;
5526 	u32 rptr;
5527 	u32 src_id, src_data, ring_id;
5528 	u8 me_id, pipe_id, queue_id;
5529 	u32 ring_index;
5530 	bool queue_hotplug = false;
5531 	bool queue_reset = false;
5532 	u32 addr, status, mc_client;
5533 
5534 	if (!rdev->ih.enabled || rdev->shutdown)
5535 		return IRQ_NONE;
5536 
5537 	wptr = cik_get_ih_wptr(rdev);
5538 
5539 restart_ih:
5540 	/* is somebody else already processing irqs? */
5541 	if (atomic_xchg(&rdev->ih.lock, 1))
5542 		return IRQ_NONE;
5543 
5544 	rptr = rdev->ih.rptr;
5545 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5546 
5547 	/* Order reading of wptr vs. reading of IH ring data */
5548 	rmb();
5549 
5550 	/* display interrupts */
5551 	cik_irq_ack(rdev);
5552 
5553 	while (rptr != wptr) {
5554 		/* wptr/rptr are in bytes! */
5555 		ring_index = rptr / 4;
5556 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5557 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5558 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5559 
5560 		switch (src_id) {
5561 		case 1: /* D1 vblank/vline */
5562 			switch (src_data) {
5563 			case 0: /* D1 vblank */
5564 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5565 					if (rdev->irq.crtc_vblank_int[0]) {
5566 						drm_handle_vblank(rdev->ddev, 0);
5567 						rdev->pm.vblank_sync = true;
5568 						wake_up(&rdev->irq.vblank_queue);
5569 					}
5570 					if (atomic_read(&rdev->irq.pflip[0]))
5571 						radeon_crtc_handle_flip(rdev, 0);
5572 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5573 					DRM_DEBUG("IH: D1 vblank\n");
5574 				}
5575 				break;
5576 			case 1: /* D1 vline */
5577 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5578 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5579 					DRM_DEBUG("IH: D1 vline\n");
5580 				}
5581 				break;
5582 			default:
5583 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5584 				break;
5585 			}
5586 			break;
5587 		case 2: /* D2 vblank/vline */
5588 			switch (src_data) {
5589 			case 0: /* D2 vblank */
5590 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5591 					if (rdev->irq.crtc_vblank_int[1]) {
5592 						drm_handle_vblank(rdev->ddev, 1);
5593 						rdev->pm.vblank_sync = true;
5594 						wake_up(&rdev->irq.vblank_queue);
5595 					}
5596 					if (atomic_read(&rdev->irq.pflip[1]))
5597 						radeon_crtc_handle_flip(rdev, 1);
5598 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5599 					DRM_DEBUG("IH: D2 vblank\n");
5600 				}
5601 				break;
5602 			case 1: /* D2 vline */
5603 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5604 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5605 					DRM_DEBUG("IH: D2 vline\n");
5606 				}
5607 				break;
5608 			default:
5609 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5610 				break;
5611 			}
5612 			break;
5613 		case 3: /* D3 vblank/vline */
5614 			switch (src_data) {
5615 			case 0: /* D3 vblank */
5616 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5617 					if (rdev->irq.crtc_vblank_int[2]) {
5618 						drm_handle_vblank(rdev->ddev, 2);
5619 						rdev->pm.vblank_sync = true;
5620 						wake_up(&rdev->irq.vblank_queue);
5621 					}
5622 					if (atomic_read(&rdev->irq.pflip[2]))
5623 						radeon_crtc_handle_flip(rdev, 2);
5624 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5625 					DRM_DEBUG("IH: D3 vblank\n");
5626 				}
5627 				break;
5628 			case 1: /* D3 vline */
5629 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5630 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5631 					DRM_DEBUG("IH: D3 vline\n");
5632 				}
5633 				break;
5634 			default:
5635 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5636 				break;
5637 			}
5638 			break;
5639 		case 4: /* D4 vblank/vline */
5640 			switch (src_data) {
5641 			case 0: /* D4 vblank */
5642 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5643 					if (rdev->irq.crtc_vblank_int[3]) {
5644 						drm_handle_vblank(rdev->ddev, 3);
5645 						rdev->pm.vblank_sync = true;
5646 						wake_up(&rdev->irq.vblank_queue);
5647 					}
5648 					if (atomic_read(&rdev->irq.pflip[3]))
5649 						radeon_crtc_handle_flip(rdev, 3);
5650 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5651 					DRM_DEBUG("IH: D4 vblank\n");
5652 				}
5653 				break;
5654 			case 1: /* D4 vline */
5655 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5656 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5657 					DRM_DEBUG("IH: D4 vline\n");
5658 				}
5659 				break;
5660 			default:
5661 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5662 				break;
5663 			}
5664 			break;
5665 		case 5: /* D5 vblank/vline */
5666 			switch (src_data) {
5667 			case 0: /* D5 vblank */
5668 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5669 					if (rdev->irq.crtc_vblank_int[4]) {
5670 						drm_handle_vblank(rdev->ddev, 4);
5671 						rdev->pm.vblank_sync = true;
5672 						wake_up(&rdev->irq.vblank_queue);
5673 					}
5674 					if (atomic_read(&rdev->irq.pflip[4]))
5675 						radeon_crtc_handle_flip(rdev, 4);
5676 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5677 					DRM_DEBUG("IH: D5 vblank\n");
5678 				}
5679 				break;
5680 			case 1: /* D5 vline */
5681 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5682 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5683 					DRM_DEBUG("IH: D5 vline\n");
5684 				}
5685 				break;
5686 			default:
5687 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5688 				break;
5689 			}
5690 			break;
5691 		case 6: /* D6 vblank/vline */
5692 			switch (src_data) {
5693 			case 0: /* D6 vblank */
5694 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5695 					if (rdev->irq.crtc_vblank_int[5]) {
5696 						drm_handle_vblank(rdev->ddev, 5);
5697 						rdev->pm.vblank_sync = true;
5698 						wake_up(&rdev->irq.vblank_queue);
5699 					}
5700 					if (atomic_read(&rdev->irq.pflip[5]))
5701 						radeon_crtc_handle_flip(rdev, 5);
5702 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5703 					DRM_DEBUG("IH: D6 vblank\n");
5704 				}
5705 				break;
5706 			case 1: /* D6 vline */
5707 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5708 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5709 					DRM_DEBUG("IH: D6 vline\n");
5710 				}
5711 				break;
5712 			default:
5713 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5714 				break;
5715 			}
5716 			break;
5717 		case 42: /* HPD hotplug */
5718 			switch (src_data) {
5719 			case 0:
5720 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5721 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5722 					queue_hotplug = true;
5723 					DRM_DEBUG("IH: HPD1\n");
5724 				}
5725 				break;
5726 			case 1:
5727 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5728 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5729 					queue_hotplug = true;
5730 					DRM_DEBUG("IH: HPD2\n");
5731 				}
5732 				break;
5733 			case 2:
5734 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5735 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5736 					queue_hotplug = true;
5737 					DRM_DEBUG("IH: HPD3\n");
5738 				}
5739 				break;
5740 			case 3:
5741 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5742 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5743 					queue_hotplug = true;
5744 					DRM_DEBUG("IH: HPD4\n");
5745 				}
5746 				break;
5747 			case 4:
5748 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5749 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5750 					queue_hotplug = true;
5751 					DRM_DEBUG("IH: HPD5\n");
5752 				}
5753 				break;
5754 			case 5:
5755 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5756 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5757 					queue_hotplug = true;
5758 					DRM_DEBUG("IH: HPD6\n");
5759 				}
5760 				break;
5761 			default:
5762 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5763 				break;
5764 			}
5765 			break;
5766 		case 146:
5767 		case 147:
5768 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5769 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5770 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5771 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5772 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5773 				addr);
5774 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5775 				status);
5776 			cik_vm_decode_fault(rdev, status, addr, mc_client);
5777 			/* reset addr and status */
5778 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5779 			break;
5780 		case 176: /* GFX RB CP_INT */
5781 		case 177: /* GFX IB CP_INT */
5782 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5783 			break;
5784 		case 181: /* CP EOP event */
5785 			DRM_DEBUG("IH: CP EOP\n");
5786 			/* XXX check the bitfield order! */
5787 			me_id = (ring_id & 0x60) >> 5;
5788 			pipe_id = (ring_id & 0x18) >> 3;
5789 			queue_id = (ring_id & 0x7) >> 0;
5790 			switch (me_id) {
5791 			case 0:
5792 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5793 				break;
5794 			case 1:
5795 			case 2:
5796 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5797 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5798 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5799 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5800 				break;
5801 			}
5802 			break;
5803 		case 184: /* CP Privileged reg access */
5804 			DRM_ERROR("Illegal register access in command stream\n");
5805 			/* XXX check the bitfield order! */
5806 			me_id = (ring_id & 0x60) >> 5;
5807 			pipe_id = (ring_id & 0x18) >> 3;
5808 			queue_id = (ring_id & 0x7) >> 0;
5809 			switch (me_id) {
5810 			case 0:
5811 				/* This results in a full GPU reset, but all we need to do is soft
5812 				 * reset the CP for gfx
5813 				 */
5814 				queue_reset = true;
5815 				break;
5816 			case 1:
5817 				/* XXX compute */
5818 				queue_reset = true;
5819 				break;
5820 			case 2:
5821 				/* XXX compute */
5822 				queue_reset = true;
5823 				break;
5824 			}
5825 			break;
5826 		case 185: /* CP Privileged inst */
5827 			DRM_ERROR("Illegal instruction in command stream\n");
5828 			/* XXX check the bitfield order! */
5829 			me_id = (ring_id & 0x60) >> 5;
5830 			pipe_id = (ring_id & 0x18) >> 3;
5831 			queue_id = (ring_id & 0x7) >> 0;
5832 			switch (me_id) {
5833 			case 0:
5834 				/* This results in a full GPU reset, but all we need to do is soft
5835 				 * reset the CP for gfx
5836 				 */
5837 				queue_reset = true;
5838 				break;
5839 			case 1:
5840 				/* XXX compute */
5841 				queue_reset = true;
5842 				break;
5843 			case 2:
5844 				/* XXX compute */
5845 				queue_reset = true;
5846 				break;
5847 			}
5848 			break;
5849 		case 224: /* SDMA trap event */
5850 			/* XXX check the bitfield order! */
5851 			me_id = (ring_id & 0x3) >> 0;
5852 			queue_id = (ring_id & 0xc) >> 2;
5853 			DRM_DEBUG("IH: SDMA trap\n");
5854 			switch (me_id) {
5855 			case 0:
5856 				switch (queue_id) {
5857 				case 0:
5858 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5859 					break;
5860 				case 1:
5861 					/* XXX compute */
5862 					break;
5863 				case 2:
5864 					/* XXX compute */
5865 					break;
5866 				}
5867 				break;
5868 			case 1:
5869 				switch (queue_id) {
5870 				case 0:
5871 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5872 					break;
5873 				case 1:
5874 					/* XXX compute */
5875 					break;
5876 				case 2:
5877 					/* XXX compute */
5878 					break;
5879 				}
5880 				break;
5881 			}
5882 			break;
5883 		case 241: /* SDMA Privileged inst */
5884 		case 247: /* SDMA Privileged inst */
5885 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
5886 			/* XXX check the bitfield order! */
5887 			me_id = (ring_id & 0x3) >> 0;
5888 			queue_id = (ring_id & 0xc) >> 2;
5889 			switch (me_id) {
5890 			case 0:
5891 				switch (queue_id) {
5892 				case 0:
5893 					queue_reset = true;
5894 					break;
5895 				case 1:
5896 					/* XXX compute */
5897 					queue_reset = true;
5898 					break;
5899 				case 2:
5900 					/* XXX compute */
5901 					queue_reset = true;
5902 					break;
5903 				}
5904 				break;
5905 			case 1:
5906 				switch (queue_id) {
5907 				case 0:
5908 					queue_reset = true;
5909 					break;
5910 				case 1:
5911 					/* XXX compute */
5912 					queue_reset = true;
5913 					break;
5914 				case 2:
5915 					/* XXX compute */
5916 					queue_reset = true;
5917 					break;
5918 				}
5919 				break;
5920 			}
5921 			break;
5922 		case 233: /* GUI IDLE */
5923 			DRM_DEBUG("IH: GUI idle\n");
5924 			break;
5925 		default:
5926 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5927 			break;
5928 		}
5929 
5930 		/* wptr/rptr are in bytes! */
5931 		rptr += 16;
5932 		rptr &= rdev->ih.ptr_mask;
5933 	}
5934 	if (queue_hotplug)
5935 		schedule_work(&rdev->hotplug_work);
5936 	if (queue_reset)
5937 		schedule_work(&rdev->reset_work);
5938 	rdev->ih.rptr = rptr;
5939 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
5940 	atomic_set(&rdev->ih.lock, 0);
5941 
5942 	/* make sure wptr hasn't changed while processing */
5943 	wptr = cik_get_ih_wptr(rdev);
5944 	if (wptr != rptr)
5945 		goto restart_ih;
5946 
5947 	return IRQ_HANDLED;
5948 }
5949 
5950 /*
5951  * startup/shutdown callbacks
5952  */
5953 /**
5954  * cik_startup - program the asic to a functional state
5955  *
5956  * @rdev: radeon_device pointer
5957  *
5958  * Programs the asic to a functional state (CIK).
5959  * Called by cik_init() and cik_resume().
5960  * Returns 0 for success, error for failure.
5961  */
5962 static int cik_startup(struct radeon_device *rdev)
5963 {
5964 	struct radeon_ring *ring;
5965 	int r;
5966 
5967 	cik_mc_program(rdev);
5968 
5969 	if (rdev->flags & RADEON_IS_IGP) {
5970 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5971 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5972 			r = cik_init_microcode(rdev);
5973 			if (r) {
5974 				DRM_ERROR("Failed to load firmware!\n");
5975 				return r;
5976 			}
5977 		}
5978 	} else {
5979 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5980 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5981 		    !rdev->mc_fw) {
5982 			r = cik_init_microcode(rdev);
5983 			if (r) {
5984 				DRM_ERROR("Failed to load firmware!\n");
5985 				return r;
5986 			}
5987 		}
5988 
5989 		r = ci_mc_load_microcode(rdev);
5990 		if (r) {
5991 			DRM_ERROR("Failed to load MC firmware!\n");
5992 			return r;
5993 		}
5994 	}
5995 
5996 	r = r600_vram_scratch_init(rdev);
5997 	if (r)
5998 		return r;
5999 
6000 	r = cik_pcie_gart_enable(rdev);
6001 	if (r)
6002 		return r;
6003 	cik_gpu_init(rdev);
6004 
6005 	/* allocate rlc buffers */
6006 	r = si_rlc_init(rdev);
6007 	if (r) {
6008 		DRM_ERROR("Failed to init rlc BOs!\n");
6009 		return r;
6010 	}
6011 
6012 	/* allocate wb buffer */
6013 	r = radeon_wb_init(rdev);
6014 	if (r)
6015 		return r;
6016 
6017 	/* allocate mec buffers */
6018 	r = cik_mec_init(rdev);
6019 	if (r) {
6020 		DRM_ERROR("Failed to init MEC BOs!\n");
6021 		return r;
6022 	}
6023 
6024 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6025 	if (r) {
6026 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6027 		return r;
6028 	}
6029 
6030 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6031 	if (r) {
6032 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6033 		return r;
6034 	}
6035 
6036 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6037 	if (r) {
6038 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6039 		return r;
6040 	}
6041 
6042 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6043 	if (r) {
6044 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6045 		return r;
6046 	}
6047 
6048 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6049 	if (r) {
6050 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6051 		return r;
6052 	}
6053 
6054 	r = cik_uvd_resume(rdev);
6055 	if (!r) {
6056 		r = radeon_fence_driver_start_ring(rdev,
6057 						   R600_RING_TYPE_UVD_INDEX);
6058 		if (r)
6059 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6060 	}
6061 	if (r)
6062 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6063 
6064 	/* Enable IRQ */
6065 	if (!rdev->irq.installed) {
6066 		r = radeon_irq_kms_init(rdev);
6067 		if (r)
6068 			return r;
6069 	}
6070 
6071 	r = cik_irq_init(rdev);
6072 	if (r) {
6073 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6074 		radeon_irq_kms_fini(rdev);
6075 		return r;
6076 	}
6077 	cik_irq_set(rdev);
6078 
6079 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6080 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6081 			     CP_RB0_RPTR, CP_RB0_WPTR,
6082 			     0, 0xfffff, RADEON_CP_PACKET2);
6083 	if (r)
6084 		return r;
6085 
6086 	/* set up the compute queues */
6087 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6088 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6089 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6090 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6091 			     0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6092 	if (r)
6093 		return r;
6094 	ring->me = 1; /* first MEC */
6095 	ring->pipe = 0; /* first pipe */
6096 	ring->queue = 0; /* first queue */
6097 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6098 
6099 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6100 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6101 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6102 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6103 			     0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6104 	if (r)
6105 		return r;
6106 	/* dGPU only have 1 MEC */
6107 	ring->me = 1; /* first MEC */
6108 	ring->pipe = 0; /* first pipe */
6109 	ring->queue = 1; /* second queue */
6110 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6111 
6112 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6113 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6114 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6115 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6116 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6117 	if (r)
6118 		return r;
6119 
6120 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6121 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6122 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6123 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6124 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6125 	if (r)
6126 		return r;
6127 
6128 	r = cik_cp_resume(rdev);
6129 	if (r)
6130 		return r;
6131 
6132 	r = cik_sdma_resume(rdev);
6133 	if (r)
6134 		return r;
6135 
6136 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6137 	if (ring->ring_size) {
6138 		r = radeon_ring_init(rdev, ring, ring->ring_size,
6139 				     R600_WB_UVD_RPTR_OFFSET,
6140 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6141 				     0, 0xfffff, RADEON_CP_PACKET2);
6142 		if (!r)
6143 			r = r600_uvd_init(rdev);
6144 		if (r)
6145 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6146 	}
6147 
6148 	r = radeon_ib_pool_init(rdev);
6149 	if (r) {
6150 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6151 		return r;
6152 	}
6153 
6154 	r = radeon_vm_manager_init(rdev);
6155 	if (r) {
6156 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6157 		return r;
6158 	}
6159 
6160 	return 0;
6161 }
6162 
6163 /**
6164  * cik_resume - resume the asic to a functional state
6165  *
6166  * @rdev: radeon_device pointer
6167  *
6168  * Programs the asic to a functional state (CIK).
6169  * Called at resume.
6170  * Returns 0 for success, error for failure.
6171  */
6172 int cik_resume(struct radeon_device *rdev)
6173 {
6174 	int r;
6175 
6176 	/* post card */
6177 	atom_asic_init(rdev->mode_info.atom_context);
6178 
6179 	/* init golden registers */
6180 	cik_init_golden_registers(rdev);
6181 
6182 	rdev->accel_working = true;
6183 	r = cik_startup(rdev);
6184 	if (r) {
6185 		DRM_ERROR("cik startup failed on resume\n");
6186 		rdev->accel_working = false;
6187 		return r;
6188 	}
6189 
6190 	return r;
6191 
6192 }
6193 
6194 /**
6195  * cik_suspend - suspend the asic
6196  *
6197  * @rdev: radeon_device pointer
6198  *
6199  * Bring the chip into a state suitable for suspend (CIK).
6200  * Called at suspend.
6201  * Returns 0 for success.
6202  */
6203 int cik_suspend(struct radeon_device *rdev)
6204 {
6205 	radeon_vm_manager_fini(rdev);
6206 	cik_cp_enable(rdev, false);
6207 	cik_sdma_enable(rdev, false);
6208 	r600_uvd_stop(rdev);
6209 	radeon_uvd_suspend(rdev);
6210 	cik_irq_suspend(rdev);
6211 	radeon_wb_disable(rdev);
6212 	cik_pcie_gart_disable(rdev);
6213 	return 0;
6214 }
6215 
6216 /* Plan is to move initialization in that function and use
6217  * helper function so that radeon_device_init pretty much
6218  * do nothing more than calling asic specific function. This
6219  * should also allow to remove a bunch of callback function
6220  * like vram_info.
6221  */
6222 /**
6223  * cik_init - asic specific driver and hw init
6224  *
6225  * @rdev: radeon_device pointer
6226  *
6227  * Setup asic specific driver variables and program the hw
6228  * to a functional state (CIK).
6229  * Called at driver startup.
6230  * Returns 0 for success, errors for failure.
6231  */
6232 int cik_init(struct radeon_device *rdev)
6233 {
6234 	struct radeon_ring *ring;
6235 	int r;
6236 
6237 	/* Read BIOS */
6238 	if (!radeon_get_bios(rdev)) {
6239 		if (ASIC_IS_AVIVO(rdev))
6240 			return -EINVAL;
6241 	}
6242 	/* Must be an ATOMBIOS */
6243 	if (!rdev->is_atom_bios) {
6244 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6245 		return -EINVAL;
6246 	}
6247 	r = radeon_atombios_init(rdev);
6248 	if (r)
6249 		return r;
6250 
6251 	/* Post card if necessary */
6252 	if (!radeon_card_posted(rdev)) {
6253 		if (!rdev->bios) {
6254 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6255 			return -EINVAL;
6256 		}
6257 		DRM_INFO("GPU not posted. posting now...\n");
6258 		atom_asic_init(rdev->mode_info.atom_context);
6259 	}
6260 	/* init golden registers */
6261 	cik_init_golden_registers(rdev);
6262 	/* Initialize scratch registers */
6263 	cik_scratch_init(rdev);
6264 	/* Initialize surface registers */
6265 	radeon_surface_init(rdev);
6266 	/* Initialize clocks */
6267 	radeon_get_clock_info(rdev->ddev);
6268 
6269 	/* Fence driver */
6270 	r = radeon_fence_driver_init(rdev);
6271 	if (r)
6272 		return r;
6273 
6274 	/* initialize memory controller */
6275 	r = cik_mc_init(rdev);
6276 	if (r)
6277 		return r;
6278 	/* Memory manager */
6279 	r = radeon_bo_init(rdev);
6280 	if (r)
6281 		return r;
6282 
6283 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6284 	ring->ring_obj = NULL;
6285 	r600_ring_init(rdev, ring, 1024 * 1024);
6286 
6287 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6288 	ring->ring_obj = NULL;
6289 	r600_ring_init(rdev, ring, 1024 * 1024);
6290 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6291 	if (r)
6292 		return r;
6293 
6294 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6295 	ring->ring_obj = NULL;
6296 	r600_ring_init(rdev, ring, 1024 * 1024);
6297 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6298 	if (r)
6299 		return r;
6300 
6301 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6302 	ring->ring_obj = NULL;
6303 	r600_ring_init(rdev, ring, 256 * 1024);
6304 
6305 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6306 	ring->ring_obj = NULL;
6307 	r600_ring_init(rdev, ring, 256 * 1024);
6308 
6309 	r = radeon_uvd_init(rdev);
6310 	if (!r) {
6311 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6312 		ring->ring_obj = NULL;
6313 		r600_ring_init(rdev, ring, 4096);
6314 	}
6315 
6316 	rdev->ih.ring_obj = NULL;
6317 	r600_ih_ring_init(rdev, 64 * 1024);
6318 
6319 	r = r600_pcie_gart_init(rdev);
6320 	if (r)
6321 		return r;
6322 
6323 	rdev->accel_working = true;
6324 	r = cik_startup(rdev);
6325 	if (r) {
6326 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6327 		cik_cp_fini(rdev);
6328 		cik_sdma_fini(rdev);
6329 		cik_irq_fini(rdev);
6330 		si_rlc_fini(rdev);
6331 		cik_mec_fini(rdev);
6332 		radeon_wb_fini(rdev);
6333 		radeon_ib_pool_fini(rdev);
6334 		radeon_vm_manager_fini(rdev);
6335 		radeon_irq_kms_fini(rdev);
6336 		cik_pcie_gart_fini(rdev);
6337 		rdev->accel_working = false;
6338 	}
6339 
6340 	/* Don't start up if the MC ucode is missing.
6341 	 * The default clocks and voltages before the MC ucode
6342 	 * is loaded are not suffient for advanced operations.
6343 	 */
6344 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6345 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6346 		return -EINVAL;
6347 	}
6348 
6349 	return 0;
6350 }
6351 
6352 /**
6353  * cik_fini - asic specific driver and hw fini
6354  *
6355  * @rdev: radeon_device pointer
6356  *
6357  * Tear down the asic specific driver variables and program the hw
6358  * to an idle state (CIK).
6359  * Called at driver unload.
6360  */
6361 void cik_fini(struct radeon_device *rdev)
6362 {
6363 	cik_cp_fini(rdev);
6364 	cik_sdma_fini(rdev);
6365 	cik_irq_fini(rdev);
6366 	si_rlc_fini(rdev);
6367 	cik_mec_fini(rdev);
6368 	radeon_wb_fini(rdev);
6369 	radeon_vm_manager_fini(rdev);
6370 	radeon_ib_pool_fini(rdev);
6371 	radeon_irq_kms_fini(rdev);
6372 	r600_uvd_stop(rdev);
6373 	radeon_uvd_fini(rdev);
6374 	cik_pcie_gart_fini(rdev);
6375 	r600_vram_scratch_fini(rdev);
6376 	radeon_gem_fini(rdev);
6377 	radeon_fence_driver_fini(rdev);
6378 	radeon_bo_fini(rdev);
6379 	radeon_atombios_fini(rdev);
6380 	kfree(rdev->bios);
6381 	rdev->bios = NULL;
6382 }
6383 
6384 /* display watermark setup */
6385 /**
6386  * dce8_line_buffer_adjust - Set up the line buffer
6387  *
6388  * @rdev: radeon_device pointer
6389  * @radeon_crtc: the selected display controller
6390  * @mode: the current display mode on the selected display
6391  * controller
6392  *
6393  * Setup up the line buffer allocation for
6394  * the selected display controller (CIK).
6395  * Returns the line buffer size in pixels.
6396  */
6397 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6398 				   struct radeon_crtc *radeon_crtc,
6399 				   struct drm_display_mode *mode)
6400 {
6401 	u32 tmp;
6402 
6403 	/*
6404 	 * Line Buffer Setup
6405 	 * There are 6 line buffers, one for each display controllers.
6406 	 * There are 3 partitions per LB. Select the number of partitions
6407 	 * to enable based on the display width.  For display widths larger
6408 	 * than 4096, you need use to use 2 display controllers and combine
6409 	 * them using the stereo blender.
6410 	 */
6411 	if (radeon_crtc->base.enabled && mode) {
6412 		if (mode->crtc_hdisplay < 1920)
6413 			tmp = 1;
6414 		else if (mode->crtc_hdisplay < 2560)
6415 			tmp = 2;
6416 		else if (mode->crtc_hdisplay < 4096)
6417 			tmp = 0;
6418 		else {
6419 			DRM_DEBUG_KMS("Mode too big for LB!\n");
6420 			tmp = 0;
6421 		}
6422 	} else
6423 		tmp = 1;
6424 
6425 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6426 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6427 
6428 	if (radeon_crtc->base.enabled && mode) {
6429 		switch (tmp) {
6430 		case 0:
6431 		default:
6432 			return 4096 * 2;
6433 		case 1:
6434 			return 1920 * 2;
6435 		case 2:
6436 			return 2560 * 2;
6437 		}
6438 	}
6439 
6440 	/* controller not enabled, so no lb used */
6441 	return 0;
6442 }
6443 
6444 /**
6445  * cik_get_number_of_dram_channels - get the number of dram channels
6446  *
6447  * @rdev: radeon_device pointer
6448  *
6449  * Look up the number of video ram channels (CIK).
6450  * Used for display watermark bandwidth calculations
6451  * Returns the number of dram channels
6452  */
6453 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6454 {
6455 	u32 tmp = RREG32(MC_SHARED_CHMAP);
6456 
6457 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6458 	case 0:
6459 	default:
6460 		return 1;
6461 	case 1:
6462 		return 2;
6463 	case 2:
6464 		return 4;
6465 	case 3:
6466 		return 8;
6467 	case 4:
6468 		return 3;
6469 	case 5:
6470 		return 6;
6471 	case 6:
6472 		return 10;
6473 	case 7:
6474 		return 12;
6475 	case 8:
6476 		return 16;
6477 	}
6478 }
6479 
6480 struct dce8_wm_params {
6481 	u32 dram_channels; /* number of dram channels */
6482 	u32 yclk;          /* bandwidth per dram data pin in kHz */
6483 	u32 sclk;          /* engine clock in kHz */
6484 	u32 disp_clk;      /* display clock in kHz */
6485 	u32 src_width;     /* viewport width */
6486 	u32 active_time;   /* active display time in ns */
6487 	u32 blank_time;    /* blank time in ns */
6488 	bool interlaced;    /* mode is interlaced */
6489 	fixed20_12 vsc;    /* vertical scale ratio */
6490 	u32 num_heads;     /* number of active crtcs */
6491 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6492 	u32 lb_size;       /* line buffer allocated to pipe */
6493 	u32 vtaps;         /* vertical scaler taps */
6494 };
6495 
6496 /**
6497  * dce8_dram_bandwidth - get the dram bandwidth
6498  *
6499  * @wm: watermark calculation data
6500  *
6501  * Calculate the raw dram bandwidth (CIK).
6502  * Used for display watermark bandwidth calculations
6503  * Returns the dram bandwidth in MBytes/s
6504  */
6505 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6506 {
6507 	/* Calculate raw DRAM Bandwidth */
6508 	fixed20_12 dram_efficiency; /* 0.7 */
6509 	fixed20_12 yclk, dram_channels, bandwidth;
6510 	fixed20_12 a;
6511 
6512 	a.full = dfixed_const(1000);
6513 	yclk.full = dfixed_const(wm->yclk);
6514 	yclk.full = dfixed_div(yclk, a);
6515 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516 	a.full = dfixed_const(10);
6517 	dram_efficiency.full = dfixed_const(7);
6518 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
6519 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6520 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6521 
6522 	return dfixed_trunc(bandwidth);
6523 }
6524 
6525 /**
6526  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6527  *
6528  * @wm: watermark calculation data
6529  *
6530  * Calculate the dram bandwidth used for display (CIK).
6531  * Used for display watermark bandwidth calculations
6532  * Returns the dram bandwidth for display in MBytes/s
6533  */
6534 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6535 {
6536 	/* Calculate DRAM Bandwidth and the part allocated to display. */
6537 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6538 	fixed20_12 yclk, dram_channels, bandwidth;
6539 	fixed20_12 a;
6540 
6541 	a.full = dfixed_const(1000);
6542 	yclk.full = dfixed_const(wm->yclk);
6543 	yclk.full = dfixed_div(yclk, a);
6544 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6545 	a.full = dfixed_const(10);
6546 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6547 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6548 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6549 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6550 
6551 	return dfixed_trunc(bandwidth);
6552 }
6553 
6554 /**
6555  * dce8_data_return_bandwidth - get the data return bandwidth
6556  *
6557  * @wm: watermark calculation data
6558  *
6559  * Calculate the data return bandwidth used for display (CIK).
6560  * Used for display watermark bandwidth calculations
6561  * Returns the data return bandwidth in MBytes/s
6562  */
6563 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6564 {
6565 	/* Calculate the display Data return Bandwidth */
6566 	fixed20_12 return_efficiency; /* 0.8 */
6567 	fixed20_12 sclk, bandwidth;
6568 	fixed20_12 a;
6569 
6570 	a.full = dfixed_const(1000);
6571 	sclk.full = dfixed_const(wm->sclk);
6572 	sclk.full = dfixed_div(sclk, a);
6573 	a.full = dfixed_const(10);
6574 	return_efficiency.full = dfixed_const(8);
6575 	return_efficiency.full = dfixed_div(return_efficiency, a);
6576 	a.full = dfixed_const(32);
6577 	bandwidth.full = dfixed_mul(a, sclk);
6578 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6579 
6580 	return dfixed_trunc(bandwidth);
6581 }
6582 
6583 /**
6584  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6585  *
6586  * @wm: watermark calculation data
6587  *
6588  * Calculate the dmif bandwidth used for display (CIK).
6589  * Used for display watermark bandwidth calculations
6590  * Returns the dmif bandwidth in MBytes/s
6591  */
6592 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6593 {
6594 	/* Calculate the DMIF Request Bandwidth */
6595 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6596 	fixed20_12 disp_clk, bandwidth;
6597 	fixed20_12 a, b;
6598 
6599 	a.full = dfixed_const(1000);
6600 	disp_clk.full = dfixed_const(wm->disp_clk);
6601 	disp_clk.full = dfixed_div(disp_clk, a);
6602 	a.full = dfixed_const(32);
6603 	b.full = dfixed_mul(a, disp_clk);
6604 
6605 	a.full = dfixed_const(10);
6606 	disp_clk_request_efficiency.full = dfixed_const(8);
6607 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6608 
6609 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6610 
6611 	return dfixed_trunc(bandwidth);
6612 }
6613 
6614 /**
6615  * dce8_available_bandwidth - get the min available bandwidth
6616  *
6617  * @wm: watermark calculation data
6618  *
6619  * Calculate the min available bandwidth used for display (CIK).
6620  * Used for display watermark bandwidth calculations
6621  * Returns the min available bandwidth in MBytes/s
6622  */
6623 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6624 {
6625 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6626 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6627 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6628 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6629 
6630 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6631 }
6632 
6633 /**
6634  * dce8_average_bandwidth - get the average available bandwidth
6635  *
6636  * @wm: watermark calculation data
6637  *
6638  * Calculate the average available bandwidth used for display (CIK).
6639  * Used for display watermark bandwidth calculations
6640  * Returns the average available bandwidth in MBytes/s
6641  */
6642 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6643 {
6644 	/* Calculate the display mode Average Bandwidth
6645 	 * DisplayMode should contain the source and destination dimensions,
6646 	 * timing, etc.
6647 	 */
6648 	fixed20_12 bpp;
6649 	fixed20_12 line_time;
6650 	fixed20_12 src_width;
6651 	fixed20_12 bandwidth;
6652 	fixed20_12 a;
6653 
6654 	a.full = dfixed_const(1000);
6655 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6656 	line_time.full = dfixed_div(line_time, a);
6657 	bpp.full = dfixed_const(wm->bytes_per_pixel);
6658 	src_width.full = dfixed_const(wm->src_width);
6659 	bandwidth.full = dfixed_mul(src_width, bpp);
6660 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6661 	bandwidth.full = dfixed_div(bandwidth, line_time);
6662 
6663 	return dfixed_trunc(bandwidth);
6664 }
6665 
6666 /**
6667  * dce8_latency_watermark - get the latency watermark
6668  *
6669  * @wm: watermark calculation data
6670  *
6671  * Calculate the latency watermark (CIK).
6672  * Used for display watermark bandwidth calculations
6673  * Returns the latency watermark in ns
6674  */
6675 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6676 {
6677 	/* First calculate the latency in ns */
6678 	u32 mc_latency = 2000; /* 2000 ns. */
6679 	u32 available_bandwidth = dce8_available_bandwidth(wm);
6680 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6681 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6682 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6683 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6684 		(wm->num_heads * cursor_line_pair_return_time);
6685 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6686 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6687 	u32 tmp, dmif_size = 12288;
6688 	fixed20_12 a, b, c;
6689 
6690 	if (wm->num_heads == 0)
6691 		return 0;
6692 
6693 	a.full = dfixed_const(2);
6694 	b.full = dfixed_const(1);
6695 	if ((wm->vsc.full > a.full) ||
6696 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6697 	    (wm->vtaps >= 5) ||
6698 	    ((wm->vsc.full >= a.full) && wm->interlaced))
6699 		max_src_lines_per_dst_line = 4;
6700 	else
6701 		max_src_lines_per_dst_line = 2;
6702 
6703 	a.full = dfixed_const(available_bandwidth);
6704 	b.full = dfixed_const(wm->num_heads);
6705 	a.full = dfixed_div(a, b);
6706 
6707 	b.full = dfixed_const(mc_latency + 512);
6708 	c.full = dfixed_const(wm->disp_clk);
6709 	b.full = dfixed_div(b, c);
6710 
6711 	c.full = dfixed_const(dmif_size);
6712 	b.full = dfixed_div(c, b);
6713 
6714 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6715 
6716 	b.full = dfixed_const(1000);
6717 	c.full = dfixed_const(wm->disp_clk);
6718 	b.full = dfixed_div(c, b);
6719 	c.full = dfixed_const(wm->bytes_per_pixel);
6720 	b.full = dfixed_mul(b, c);
6721 
6722 	lb_fill_bw = min(tmp, dfixed_trunc(b));
6723 
6724 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6725 	b.full = dfixed_const(1000);
6726 	c.full = dfixed_const(lb_fill_bw);
6727 	b.full = dfixed_div(c, b);
6728 	a.full = dfixed_div(a, b);
6729 	line_fill_time = dfixed_trunc(a);
6730 
6731 	if (line_fill_time < wm->active_time)
6732 		return latency;
6733 	else
6734 		return latency + (line_fill_time - wm->active_time);
6735 
6736 }
6737 
6738 /**
6739  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6740  * average and available dram bandwidth
6741  *
6742  * @wm: watermark calculation data
6743  *
6744  * Check if the display average bandwidth fits in the display
6745  * dram bandwidth (CIK).
6746  * Used for display watermark bandwidth calculations
6747  * Returns true if the display fits, false if not.
6748  */
6749 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6750 {
6751 	if (dce8_average_bandwidth(wm) <=
6752 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6753 		return true;
6754 	else
6755 		return false;
6756 }
6757 
6758 /**
6759  * dce8_average_bandwidth_vs_available_bandwidth - check
6760  * average and available bandwidth
6761  *
6762  * @wm: watermark calculation data
6763  *
6764  * Check if the display average bandwidth fits in the display
6765  * available bandwidth (CIK).
6766  * Used for display watermark bandwidth calculations
6767  * Returns true if the display fits, false if not.
6768  */
6769 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6770 {
6771 	if (dce8_average_bandwidth(wm) <=
6772 	    (dce8_available_bandwidth(wm) / wm->num_heads))
6773 		return true;
6774 	else
6775 		return false;
6776 }
6777 
6778 /**
6779  * dce8_check_latency_hiding - check latency hiding
6780  *
6781  * @wm: watermark calculation data
6782  *
6783  * Check latency hiding (CIK).
6784  * Used for display watermark bandwidth calculations
6785  * Returns true if the display fits, false if not.
6786  */
6787 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6788 {
6789 	u32 lb_partitions = wm->lb_size / wm->src_width;
6790 	u32 line_time = wm->active_time + wm->blank_time;
6791 	u32 latency_tolerant_lines;
6792 	u32 latency_hiding;
6793 	fixed20_12 a;
6794 
6795 	a.full = dfixed_const(1);
6796 	if (wm->vsc.full > a.full)
6797 		latency_tolerant_lines = 1;
6798 	else {
6799 		if (lb_partitions <= (wm->vtaps + 1))
6800 			latency_tolerant_lines = 1;
6801 		else
6802 			latency_tolerant_lines = 2;
6803 	}
6804 
6805 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6806 
6807 	if (dce8_latency_watermark(wm) <= latency_hiding)
6808 		return true;
6809 	else
6810 		return false;
6811 }
6812 
6813 /**
6814  * dce8_program_watermarks - program display watermarks
6815  *
6816  * @rdev: radeon_device pointer
6817  * @radeon_crtc: the selected display controller
6818  * @lb_size: line buffer size
6819  * @num_heads: number of display controllers in use
6820  *
6821  * Calculate and program the display watermarks for the
6822  * selected display controller (CIK).
6823  */
6824 static void dce8_program_watermarks(struct radeon_device *rdev,
6825 				    struct radeon_crtc *radeon_crtc,
6826 				    u32 lb_size, u32 num_heads)
6827 {
6828 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
6829 	struct dce8_wm_params wm;
6830 	u32 pixel_period;
6831 	u32 line_time = 0;
6832 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
6833 	u32 tmp, wm_mask;
6834 
6835 	if (radeon_crtc->base.enabled && num_heads && mode) {
6836 		pixel_period = 1000000 / (u32)mode->clock;
6837 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6838 
6839 		wm.yclk = rdev->pm.current_mclk * 10;
6840 		wm.sclk = rdev->pm.current_sclk * 10;
6841 		wm.disp_clk = mode->clock;
6842 		wm.src_width = mode->crtc_hdisplay;
6843 		wm.active_time = mode->crtc_hdisplay * pixel_period;
6844 		wm.blank_time = line_time - wm.active_time;
6845 		wm.interlaced = false;
6846 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6847 			wm.interlaced = true;
6848 		wm.vsc = radeon_crtc->vsc;
6849 		wm.vtaps = 1;
6850 		if (radeon_crtc->rmx_type != RMX_OFF)
6851 			wm.vtaps = 2;
6852 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6853 		wm.lb_size = lb_size;
6854 		wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6855 		wm.num_heads = num_heads;
6856 
6857 		/* set for high clocks */
6858 		latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6859 		/* set for low clocks */
6860 		/* wm.yclk = low clk; wm.sclk = low clk */
6861 		latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6862 
6863 		/* possibly force display priority to high */
6864 		/* should really do this at mode validation time... */
6865 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6866 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6867 		    !dce8_check_latency_hiding(&wm) ||
6868 		    (rdev->disp_priority == 2)) {
6869 			DRM_DEBUG_KMS("force priority to high\n");
6870 		}
6871 	}
6872 
6873 	/* select wm A */
6874 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6875 	tmp = wm_mask;
6876 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6877 	tmp |= LATENCY_WATERMARK_MASK(1);
6878 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6879 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6880 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6881 		LATENCY_HIGH_WATERMARK(line_time)));
6882 	/* select wm B */
6883 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6884 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6885 	tmp |= LATENCY_WATERMARK_MASK(2);
6886 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6887 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6888 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6889 		LATENCY_HIGH_WATERMARK(line_time)));
6890 	/* restore original selection */
6891 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6892 }
6893 
6894 /**
6895  * dce8_bandwidth_update - program display watermarks
6896  *
6897  * @rdev: radeon_device pointer
6898  *
6899  * Calculate and program the display watermarks and line
6900  * buffer allocation (CIK).
6901  */
6902 void dce8_bandwidth_update(struct radeon_device *rdev)
6903 {
6904 	struct drm_display_mode *mode = NULL;
6905 	u32 num_heads = 0, lb_size;
6906 	int i;
6907 
6908 	radeon_update_display_priority(rdev);
6909 
6910 	for (i = 0; i < rdev->num_crtc; i++) {
6911 		if (rdev->mode_info.crtcs[i]->base.enabled)
6912 			num_heads++;
6913 	}
6914 	for (i = 0; i < rdev->num_crtc; i++) {
6915 		mode = &rdev->mode_info.crtcs[i]->base.mode;
6916 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6917 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6918 	}
6919 }
6920 
6921 /**
6922  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6923  *
6924  * @rdev: radeon_device pointer
6925  *
6926  * Fetches a GPU clock counter snapshot (SI).
6927  * Returns the 64 bit clock counter snapshot.
6928  */
6929 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6930 {
6931 	uint64_t clock;
6932 
6933 	mutex_lock(&rdev->gpu_clock_mutex);
6934 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6935 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6936 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6937 	mutex_unlock(&rdev->gpu_clock_mutex);
6938 	return clock;
6939 }
6940 
6941 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6942                               u32 cntl_reg, u32 status_reg)
6943 {
6944 	int r, i;
6945 	struct atom_clock_dividers dividers;
6946 	uint32_t tmp;
6947 
6948 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6949 					   clock, false, &dividers);
6950 	if (r)
6951 		return r;
6952 
6953 	tmp = RREG32_SMC(cntl_reg);
6954 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6955 	tmp |= dividers.post_divider;
6956 	WREG32_SMC(cntl_reg, tmp);
6957 
6958 	for (i = 0; i < 100; i++) {
6959 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
6960 			break;
6961 		mdelay(10);
6962 	}
6963 	if (i == 100)
6964 		return -ETIMEDOUT;
6965 
6966 	return 0;
6967 }
6968 
6969 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6970 {
6971 	int r = 0;
6972 
6973 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6974 	if (r)
6975 		return r;
6976 
6977 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6978 	return r;
6979 }
6980 
6981 int cik_uvd_resume(struct radeon_device *rdev)
6982 {
6983 	uint64_t addr;
6984 	uint32_t size;
6985 	int r;
6986 
6987 	r = radeon_uvd_resume(rdev);
6988 	if (r)
6989 		return r;
6990 
6991 	/* programm the VCPU memory controller bits 0-27 */
6992 	addr = rdev->uvd.gpu_addr >> 3;
6993 	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6994 	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6995 	WREG32(UVD_VCPU_CACHE_SIZE0, size);
6996 
6997 	addr += size;
6998 	size = RADEON_UVD_STACK_SIZE >> 3;
6999 	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7000 	WREG32(UVD_VCPU_CACHE_SIZE1, size);
7001 
7002 	addr += size;
7003 	size = RADEON_UVD_HEAP_SIZE >> 3;
7004 	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7005 	WREG32(UVD_VCPU_CACHE_SIZE2, size);
7006 
7007 	/* bits 28-31 */
7008 	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7009 	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7010 
7011 	/* bits 32-39 */
7012 	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7013 	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7014 
7015 	return 0;
7016 }
7017