xref: /openbmc/linux/drivers/gpu/drm/radeon/ni.c (revision e23feb16)
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "nid.h"
32 #include "atom.h"
33 #include "ni_reg.h"
34 #include "cayman_blit_shaders.h"
35 #include "radeon_ucode.h"
36 #include "clearstate_cayman.h"
37 
38 static const u32 tn_rlc_save_restore_register_list[] =
39 {
40 	0x98fc,
41 	0x98f0,
42 	0x9834,
43 	0x9838,
44 	0x9870,
45 	0x9874,
46 	0x8a14,
47 	0x8b24,
48 	0x8bcc,
49 	0x8b10,
50 	0x8c30,
51 	0x8d00,
52 	0x8d04,
53 	0x8c00,
54 	0x8c04,
55 	0x8c10,
56 	0x8c14,
57 	0x8d8c,
58 	0x8cf0,
59 	0x8e38,
60 	0x9508,
61 	0x9688,
62 	0x9608,
63 	0x960c,
64 	0x9610,
65 	0x9614,
66 	0x88c4,
67 	0x8978,
68 	0x88d4,
69 	0x900c,
70 	0x9100,
71 	0x913c,
72 	0x90e8,
73 	0x9354,
74 	0xa008,
75 	0x98f8,
76 	0x9148,
77 	0x914c,
78 	0x3f94,
79 	0x98f4,
80 	0x9b7c,
81 	0x3f8c,
82 	0x8950,
83 	0x8954,
84 	0x8a18,
85 	0x8b28,
86 	0x9144,
87 	0x3f90,
88 	0x915c,
89 	0x9160,
90 	0x9178,
91 	0x917c,
92 	0x9180,
93 	0x918c,
94 	0x9190,
95 	0x9194,
96 	0x9198,
97 	0x919c,
98 	0x91a8,
99 	0x91ac,
100 	0x91b0,
101 	0x91b4,
102 	0x91b8,
103 	0x91c4,
104 	0x91c8,
105 	0x91cc,
106 	0x91d0,
107 	0x91d4,
108 	0x91e0,
109 	0x91e4,
110 	0x91ec,
111 	0x91f0,
112 	0x91f4,
113 	0x9200,
114 	0x9204,
115 	0x929c,
116 	0x8030,
117 	0x9150,
118 	0x9a60,
119 	0x920c,
120 	0x9210,
121 	0x9228,
122 	0x922c,
123 	0x9244,
124 	0x9248,
125 	0x91e8,
126 	0x9294,
127 	0x9208,
128 	0x9224,
129 	0x9240,
130 	0x9220,
131 	0x923c,
132 	0x9258,
133 	0x9744,
134 	0xa200,
135 	0xa204,
136 	0xa208,
137 	0xa20c,
138 	0x8d58,
139 	0x9030,
140 	0x9034,
141 	0x9038,
142 	0x903c,
143 	0x9040,
144 	0x9654,
145 	0x897c,
146 	0xa210,
147 	0xa214,
148 	0x9868,
149 	0xa02c,
150 	0x9664,
151 	0x9698,
152 	0x949c,
153 	0x8e10,
154 	0x8e18,
155 	0x8c50,
156 	0x8c58,
157 	0x8c60,
158 	0x8c68,
159 	0x89b4,
160 	0x9830,
161 	0x802c,
162 };
163 
164 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
165 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
166 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
167 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
168 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
169 extern void evergreen_mc_program(struct radeon_device *rdev);
170 extern void evergreen_irq_suspend(struct radeon_device *rdev);
171 extern int evergreen_mc_init(struct radeon_device *rdev);
172 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
173 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174 extern void evergreen_program_aspm(struct radeon_device *rdev);
175 extern void sumo_rlc_fini(struct radeon_device *rdev);
176 extern int sumo_rlc_init(struct radeon_device *rdev);
177 extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
178 				   struct radeon_ib *ib,
179 				   uint64_t pe,
180 				   uint64_t addr, unsigned count,
181 				   uint32_t incr, uint32_t flags);
182 
183 /* Firmware Names */
184 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
185 MODULE_FIRMWARE("radeon/BARTS_me.bin");
186 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
187 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
188 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
189 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
190 MODULE_FIRMWARE("radeon/TURKS_me.bin");
191 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
192 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
193 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
194 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
195 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
196 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
197 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
198 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
199 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
200 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
201 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
202 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
203 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
204 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
205 
206 
207 static const u32 cayman_golden_registers2[] =
208 {
209 	0x3e5c, 0xffffffff, 0x00000000,
210 	0x3e48, 0xffffffff, 0x00000000,
211 	0x3e4c, 0xffffffff, 0x00000000,
212 	0x3e64, 0xffffffff, 0x00000000,
213 	0x3e50, 0xffffffff, 0x00000000,
214 	0x3e60, 0xffffffff, 0x00000000
215 };
216 
217 static const u32 cayman_golden_registers[] =
218 {
219 	0x5eb4, 0xffffffff, 0x00000002,
220 	0x5e78, 0x8f311ff1, 0x001000f0,
221 	0x3f90, 0xffff0000, 0xff000000,
222 	0x9148, 0xffff0000, 0xff000000,
223 	0x3f94, 0xffff0000, 0xff000000,
224 	0x914c, 0xffff0000, 0xff000000,
225 	0xc78, 0x00000080, 0x00000080,
226 	0xbd4, 0x70073777, 0x00011003,
227 	0xd02c, 0xbfffff1f, 0x08421000,
228 	0xd0b8, 0x73773777, 0x02011003,
229 	0x5bc0, 0x00200000, 0x50100000,
230 	0x98f8, 0x33773777, 0x02011003,
231 	0x98fc, 0xffffffff, 0x76541032,
232 	0x7030, 0x31000311, 0x00000011,
233 	0x2f48, 0x33773777, 0x42010001,
234 	0x6b28, 0x00000010, 0x00000012,
235 	0x7728, 0x00000010, 0x00000012,
236 	0x10328, 0x00000010, 0x00000012,
237 	0x10f28, 0x00000010, 0x00000012,
238 	0x11b28, 0x00000010, 0x00000012,
239 	0x12728, 0x00000010, 0x00000012,
240 	0x240c, 0x000007ff, 0x00000000,
241 	0x8a14, 0xf000001f, 0x00000007,
242 	0x8b24, 0x3fff3fff, 0x00ff0fff,
243 	0x8b10, 0x0000ff0f, 0x00000000,
244 	0x28a4c, 0x07ffffff, 0x06000000,
245 	0x10c, 0x00000001, 0x00010003,
246 	0xa02c, 0xffffffff, 0x0000009b,
247 	0x913c, 0x0000010f, 0x01000100,
248 	0x8c04, 0xf8ff00ff, 0x40600060,
249 	0x28350, 0x00000f01, 0x00000000,
250 	0x9508, 0x3700001f, 0x00000002,
251 	0x960c, 0xffffffff, 0x54763210,
252 	0x88c4, 0x001f3ae3, 0x00000082,
253 	0x88d0, 0xffffffff, 0x0f40df40,
254 	0x88d4, 0x0000001f, 0x00000010,
255 	0x8974, 0xffffffff, 0x00000000
256 };
257 
258 static const u32 dvst_golden_registers2[] =
259 {
260 	0x8f8, 0xffffffff, 0,
261 	0x8fc, 0x00380000, 0,
262 	0x8f8, 0xffffffff, 1,
263 	0x8fc, 0x0e000000, 0
264 };
265 
266 static const u32 dvst_golden_registers[] =
267 {
268 	0x690, 0x3fff3fff, 0x20c00033,
269 	0x918c, 0x0fff0fff, 0x00010006,
270 	0x91a8, 0x0fff0fff, 0x00010006,
271 	0x9150, 0xffffdfff, 0x6e944040,
272 	0x917c, 0x0fff0fff, 0x00030002,
273 	0x9198, 0x0fff0fff, 0x00030002,
274 	0x915c, 0x0fff0fff, 0x00010000,
275 	0x3f90, 0xffff0001, 0xff000000,
276 	0x9178, 0x0fff0fff, 0x00070000,
277 	0x9194, 0x0fff0fff, 0x00070000,
278 	0x9148, 0xffff0001, 0xff000000,
279 	0x9190, 0x0fff0fff, 0x00090008,
280 	0x91ac, 0x0fff0fff, 0x00090008,
281 	0x3f94, 0xffff0000, 0xff000000,
282 	0x914c, 0xffff0000, 0xff000000,
283 	0x929c, 0x00000fff, 0x00000001,
284 	0x55e4, 0xff607fff, 0xfc000100,
285 	0x8a18, 0xff000fff, 0x00000100,
286 	0x8b28, 0xff000fff, 0x00000100,
287 	0x9144, 0xfffc0fff, 0x00000100,
288 	0x6ed8, 0x00010101, 0x00010000,
289 	0x9830, 0xffffffff, 0x00000000,
290 	0x9834, 0xf00fffff, 0x00000400,
291 	0x9838, 0xfffffffe, 0x00000000,
292 	0xd0c0, 0xff000fff, 0x00000100,
293 	0xd02c, 0xbfffff1f, 0x08421000,
294 	0xd0b8, 0x73773777, 0x12010001,
295 	0x5bb0, 0x000000f0, 0x00000070,
296 	0x98f8, 0x73773777, 0x12010001,
297 	0x98fc, 0xffffffff, 0x00000010,
298 	0x9b7c, 0x00ff0000, 0x00fc0000,
299 	0x8030, 0x00001f0f, 0x0000100a,
300 	0x2f48, 0x73773777, 0x12010001,
301 	0x2408, 0x00030000, 0x000c007f,
302 	0x8a14, 0xf000003f, 0x00000007,
303 	0x8b24, 0x3fff3fff, 0x00ff0fff,
304 	0x8b10, 0x0000ff0f, 0x00000000,
305 	0x28a4c, 0x07ffffff, 0x06000000,
306 	0x4d8, 0x00000fff, 0x00000100,
307 	0xa008, 0xffffffff, 0x00010000,
308 	0x913c, 0xffff03ff, 0x01000100,
309 	0x8c00, 0x000000ff, 0x00000003,
310 	0x8c04, 0xf8ff00ff, 0x40600060,
311 	0x8cf0, 0x1fff1fff, 0x08e00410,
312 	0x28350, 0x00000f01, 0x00000000,
313 	0x9508, 0xf700071f, 0x00000002,
314 	0x960c, 0xffffffff, 0x54763210,
315 	0x20ef8, 0x01ff01ff, 0x00000002,
316 	0x20e98, 0xfffffbff, 0x00200000,
317 	0x2015c, 0xffffffff, 0x00000f40,
318 	0x88c4, 0x001f3ae3, 0x00000082,
319 	0x8978, 0x3fffffff, 0x04050140,
320 	0x88d4, 0x0000001f, 0x00000010,
321 	0x8974, 0xffffffff, 0x00000000
322 };
323 
324 static const u32 scrapper_golden_registers[] =
325 {
326 	0x690, 0x3fff3fff, 0x20c00033,
327 	0x918c, 0x0fff0fff, 0x00010006,
328 	0x918c, 0x0fff0fff, 0x00010006,
329 	0x91a8, 0x0fff0fff, 0x00010006,
330 	0x91a8, 0x0fff0fff, 0x00010006,
331 	0x9150, 0xffffdfff, 0x6e944040,
332 	0x9150, 0xffffdfff, 0x6e944040,
333 	0x917c, 0x0fff0fff, 0x00030002,
334 	0x917c, 0x0fff0fff, 0x00030002,
335 	0x9198, 0x0fff0fff, 0x00030002,
336 	0x9198, 0x0fff0fff, 0x00030002,
337 	0x915c, 0x0fff0fff, 0x00010000,
338 	0x915c, 0x0fff0fff, 0x00010000,
339 	0x3f90, 0xffff0001, 0xff000000,
340 	0x3f90, 0xffff0001, 0xff000000,
341 	0x9178, 0x0fff0fff, 0x00070000,
342 	0x9178, 0x0fff0fff, 0x00070000,
343 	0x9194, 0x0fff0fff, 0x00070000,
344 	0x9194, 0x0fff0fff, 0x00070000,
345 	0x9148, 0xffff0001, 0xff000000,
346 	0x9148, 0xffff0001, 0xff000000,
347 	0x9190, 0x0fff0fff, 0x00090008,
348 	0x9190, 0x0fff0fff, 0x00090008,
349 	0x91ac, 0x0fff0fff, 0x00090008,
350 	0x91ac, 0x0fff0fff, 0x00090008,
351 	0x3f94, 0xffff0000, 0xff000000,
352 	0x3f94, 0xffff0000, 0xff000000,
353 	0x914c, 0xffff0000, 0xff000000,
354 	0x914c, 0xffff0000, 0xff000000,
355 	0x929c, 0x00000fff, 0x00000001,
356 	0x929c, 0x00000fff, 0x00000001,
357 	0x55e4, 0xff607fff, 0xfc000100,
358 	0x8a18, 0xff000fff, 0x00000100,
359 	0x8a18, 0xff000fff, 0x00000100,
360 	0x8b28, 0xff000fff, 0x00000100,
361 	0x8b28, 0xff000fff, 0x00000100,
362 	0x9144, 0xfffc0fff, 0x00000100,
363 	0x9144, 0xfffc0fff, 0x00000100,
364 	0x6ed8, 0x00010101, 0x00010000,
365 	0x9830, 0xffffffff, 0x00000000,
366 	0x9830, 0xffffffff, 0x00000000,
367 	0x9834, 0xf00fffff, 0x00000400,
368 	0x9834, 0xf00fffff, 0x00000400,
369 	0x9838, 0xfffffffe, 0x00000000,
370 	0x9838, 0xfffffffe, 0x00000000,
371 	0xd0c0, 0xff000fff, 0x00000100,
372 	0xd02c, 0xbfffff1f, 0x08421000,
373 	0xd02c, 0xbfffff1f, 0x08421000,
374 	0xd0b8, 0x73773777, 0x12010001,
375 	0xd0b8, 0x73773777, 0x12010001,
376 	0x5bb0, 0x000000f0, 0x00000070,
377 	0x98f8, 0x73773777, 0x12010001,
378 	0x98f8, 0x73773777, 0x12010001,
379 	0x98fc, 0xffffffff, 0x00000010,
380 	0x98fc, 0xffffffff, 0x00000010,
381 	0x9b7c, 0x00ff0000, 0x00fc0000,
382 	0x9b7c, 0x00ff0000, 0x00fc0000,
383 	0x8030, 0x00001f0f, 0x0000100a,
384 	0x8030, 0x00001f0f, 0x0000100a,
385 	0x2f48, 0x73773777, 0x12010001,
386 	0x2f48, 0x73773777, 0x12010001,
387 	0x2408, 0x00030000, 0x000c007f,
388 	0x8a14, 0xf000003f, 0x00000007,
389 	0x8a14, 0xf000003f, 0x00000007,
390 	0x8b24, 0x3fff3fff, 0x00ff0fff,
391 	0x8b24, 0x3fff3fff, 0x00ff0fff,
392 	0x8b10, 0x0000ff0f, 0x00000000,
393 	0x8b10, 0x0000ff0f, 0x00000000,
394 	0x28a4c, 0x07ffffff, 0x06000000,
395 	0x28a4c, 0x07ffffff, 0x06000000,
396 	0x4d8, 0x00000fff, 0x00000100,
397 	0x4d8, 0x00000fff, 0x00000100,
398 	0xa008, 0xffffffff, 0x00010000,
399 	0xa008, 0xffffffff, 0x00010000,
400 	0x913c, 0xffff03ff, 0x01000100,
401 	0x913c, 0xffff03ff, 0x01000100,
402 	0x90e8, 0x001fffff, 0x010400c0,
403 	0x8c00, 0x000000ff, 0x00000003,
404 	0x8c00, 0x000000ff, 0x00000003,
405 	0x8c04, 0xf8ff00ff, 0x40600060,
406 	0x8c04, 0xf8ff00ff, 0x40600060,
407 	0x8c30, 0x0000000f, 0x00040005,
408 	0x8cf0, 0x1fff1fff, 0x08e00410,
409 	0x8cf0, 0x1fff1fff, 0x08e00410,
410 	0x900c, 0x00ffffff, 0x0017071f,
411 	0x28350, 0x00000f01, 0x00000000,
412 	0x28350, 0x00000f01, 0x00000000,
413 	0x9508, 0xf700071f, 0x00000002,
414 	0x9508, 0xf700071f, 0x00000002,
415 	0x9688, 0x00300000, 0x0017000f,
416 	0x960c, 0xffffffff, 0x54763210,
417 	0x960c, 0xffffffff, 0x54763210,
418 	0x20ef8, 0x01ff01ff, 0x00000002,
419 	0x20e98, 0xfffffbff, 0x00200000,
420 	0x2015c, 0xffffffff, 0x00000f40,
421 	0x88c4, 0x001f3ae3, 0x00000082,
422 	0x88c4, 0x001f3ae3, 0x00000082,
423 	0x8978, 0x3fffffff, 0x04050140,
424 	0x8978, 0x3fffffff, 0x04050140,
425 	0x88d4, 0x0000001f, 0x00000010,
426 	0x88d4, 0x0000001f, 0x00000010,
427 	0x8974, 0xffffffff, 0x00000000,
428 	0x8974, 0xffffffff, 0x00000000
429 };
430 
431 static void ni_init_golden_registers(struct radeon_device *rdev)
432 {
433 	switch (rdev->family) {
434 	case CHIP_CAYMAN:
435 		radeon_program_register_sequence(rdev,
436 						 cayman_golden_registers,
437 						 (const u32)ARRAY_SIZE(cayman_golden_registers));
438 		radeon_program_register_sequence(rdev,
439 						 cayman_golden_registers2,
440 						 (const u32)ARRAY_SIZE(cayman_golden_registers2));
441 		break;
442 	case CHIP_ARUBA:
443 		if ((rdev->pdev->device == 0x9900) ||
444 		    (rdev->pdev->device == 0x9901) ||
445 		    (rdev->pdev->device == 0x9903) ||
446 		    (rdev->pdev->device == 0x9904) ||
447 		    (rdev->pdev->device == 0x9905) ||
448 		    (rdev->pdev->device == 0x9906) ||
449 		    (rdev->pdev->device == 0x9907) ||
450 		    (rdev->pdev->device == 0x9908) ||
451 		    (rdev->pdev->device == 0x9909) ||
452 		    (rdev->pdev->device == 0x990A) ||
453 		    (rdev->pdev->device == 0x990B) ||
454 		    (rdev->pdev->device == 0x990C) ||
455 		    (rdev->pdev->device == 0x990D) ||
456 		    (rdev->pdev->device == 0x990E) ||
457 		    (rdev->pdev->device == 0x990F) ||
458 		    (rdev->pdev->device == 0x9910) ||
459 		    (rdev->pdev->device == 0x9913) ||
460 		    (rdev->pdev->device == 0x9917) ||
461 		    (rdev->pdev->device == 0x9918)) {
462 			radeon_program_register_sequence(rdev,
463 							 dvst_golden_registers,
464 							 (const u32)ARRAY_SIZE(dvst_golden_registers));
465 			radeon_program_register_sequence(rdev,
466 							 dvst_golden_registers2,
467 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
468 		} else {
469 			radeon_program_register_sequence(rdev,
470 							 scrapper_golden_registers,
471 							 (const u32)ARRAY_SIZE(scrapper_golden_registers));
472 			radeon_program_register_sequence(rdev,
473 							 dvst_golden_registers2,
474 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
475 		}
476 		break;
477 	default:
478 		break;
479 	}
480 }
481 
482 #define BTC_IO_MC_REGS_SIZE 29
483 
484 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
485 	{0x00000077, 0xff010100},
486 	{0x00000078, 0x00000000},
487 	{0x00000079, 0x00001434},
488 	{0x0000007a, 0xcc08ec08},
489 	{0x0000007b, 0x00040000},
490 	{0x0000007c, 0x000080c0},
491 	{0x0000007d, 0x09000000},
492 	{0x0000007e, 0x00210404},
493 	{0x00000081, 0x08a8e800},
494 	{0x00000082, 0x00030444},
495 	{0x00000083, 0x00000000},
496 	{0x00000085, 0x00000001},
497 	{0x00000086, 0x00000002},
498 	{0x00000087, 0x48490000},
499 	{0x00000088, 0x20244647},
500 	{0x00000089, 0x00000005},
501 	{0x0000008b, 0x66030000},
502 	{0x0000008c, 0x00006603},
503 	{0x0000008d, 0x00000100},
504 	{0x0000008f, 0x00001c0a},
505 	{0x00000090, 0xff000001},
506 	{0x00000094, 0x00101101},
507 	{0x00000095, 0x00000fff},
508 	{0x00000096, 0x00116fff},
509 	{0x00000097, 0x60010000},
510 	{0x00000098, 0x10010000},
511 	{0x00000099, 0x00006000},
512 	{0x0000009a, 0x00001000},
513 	{0x0000009f, 0x00946a00}
514 };
515 
516 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
517 	{0x00000077, 0xff010100},
518 	{0x00000078, 0x00000000},
519 	{0x00000079, 0x00001434},
520 	{0x0000007a, 0xcc08ec08},
521 	{0x0000007b, 0x00040000},
522 	{0x0000007c, 0x000080c0},
523 	{0x0000007d, 0x09000000},
524 	{0x0000007e, 0x00210404},
525 	{0x00000081, 0x08a8e800},
526 	{0x00000082, 0x00030444},
527 	{0x00000083, 0x00000000},
528 	{0x00000085, 0x00000001},
529 	{0x00000086, 0x00000002},
530 	{0x00000087, 0x48490000},
531 	{0x00000088, 0x20244647},
532 	{0x00000089, 0x00000005},
533 	{0x0000008b, 0x66030000},
534 	{0x0000008c, 0x00006603},
535 	{0x0000008d, 0x00000100},
536 	{0x0000008f, 0x00001c0a},
537 	{0x00000090, 0xff000001},
538 	{0x00000094, 0x00101101},
539 	{0x00000095, 0x00000fff},
540 	{0x00000096, 0x00116fff},
541 	{0x00000097, 0x60010000},
542 	{0x00000098, 0x10010000},
543 	{0x00000099, 0x00006000},
544 	{0x0000009a, 0x00001000},
545 	{0x0000009f, 0x00936a00}
546 };
547 
548 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
549 	{0x00000077, 0xff010100},
550 	{0x00000078, 0x00000000},
551 	{0x00000079, 0x00001434},
552 	{0x0000007a, 0xcc08ec08},
553 	{0x0000007b, 0x00040000},
554 	{0x0000007c, 0x000080c0},
555 	{0x0000007d, 0x09000000},
556 	{0x0000007e, 0x00210404},
557 	{0x00000081, 0x08a8e800},
558 	{0x00000082, 0x00030444},
559 	{0x00000083, 0x00000000},
560 	{0x00000085, 0x00000001},
561 	{0x00000086, 0x00000002},
562 	{0x00000087, 0x48490000},
563 	{0x00000088, 0x20244647},
564 	{0x00000089, 0x00000005},
565 	{0x0000008b, 0x66030000},
566 	{0x0000008c, 0x00006603},
567 	{0x0000008d, 0x00000100},
568 	{0x0000008f, 0x00001c0a},
569 	{0x00000090, 0xff000001},
570 	{0x00000094, 0x00101101},
571 	{0x00000095, 0x00000fff},
572 	{0x00000096, 0x00116fff},
573 	{0x00000097, 0x60010000},
574 	{0x00000098, 0x10010000},
575 	{0x00000099, 0x00006000},
576 	{0x0000009a, 0x00001000},
577 	{0x0000009f, 0x00916a00}
578 };
579 
580 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
581 	{0x00000077, 0xff010100},
582 	{0x00000078, 0x00000000},
583 	{0x00000079, 0x00001434},
584 	{0x0000007a, 0xcc08ec08},
585 	{0x0000007b, 0x00040000},
586 	{0x0000007c, 0x000080c0},
587 	{0x0000007d, 0x09000000},
588 	{0x0000007e, 0x00210404},
589 	{0x00000081, 0x08a8e800},
590 	{0x00000082, 0x00030444},
591 	{0x00000083, 0x00000000},
592 	{0x00000085, 0x00000001},
593 	{0x00000086, 0x00000002},
594 	{0x00000087, 0x48490000},
595 	{0x00000088, 0x20244647},
596 	{0x00000089, 0x00000005},
597 	{0x0000008b, 0x66030000},
598 	{0x0000008c, 0x00006603},
599 	{0x0000008d, 0x00000100},
600 	{0x0000008f, 0x00001c0a},
601 	{0x00000090, 0xff000001},
602 	{0x00000094, 0x00101101},
603 	{0x00000095, 0x00000fff},
604 	{0x00000096, 0x00116fff},
605 	{0x00000097, 0x60010000},
606 	{0x00000098, 0x10010000},
607 	{0x00000099, 0x00006000},
608 	{0x0000009a, 0x00001000},
609 	{0x0000009f, 0x00976b00}
610 };
611 
612 int ni_mc_load_microcode(struct radeon_device *rdev)
613 {
614 	const __be32 *fw_data;
615 	u32 mem_type, running, blackout = 0;
616 	u32 *io_mc_regs;
617 	int i, ucode_size, regs_size;
618 
619 	if (!rdev->mc_fw)
620 		return -EINVAL;
621 
622 	switch (rdev->family) {
623 	case CHIP_BARTS:
624 		io_mc_regs = (u32 *)&barts_io_mc_regs;
625 		ucode_size = BTC_MC_UCODE_SIZE;
626 		regs_size = BTC_IO_MC_REGS_SIZE;
627 		break;
628 	case CHIP_TURKS:
629 		io_mc_regs = (u32 *)&turks_io_mc_regs;
630 		ucode_size = BTC_MC_UCODE_SIZE;
631 		regs_size = BTC_IO_MC_REGS_SIZE;
632 		break;
633 	case CHIP_CAICOS:
634 	default:
635 		io_mc_regs = (u32 *)&caicos_io_mc_regs;
636 		ucode_size = BTC_MC_UCODE_SIZE;
637 		regs_size = BTC_IO_MC_REGS_SIZE;
638 		break;
639 	case CHIP_CAYMAN:
640 		io_mc_regs = (u32 *)&cayman_io_mc_regs;
641 		ucode_size = CAYMAN_MC_UCODE_SIZE;
642 		regs_size = BTC_IO_MC_REGS_SIZE;
643 		break;
644 	}
645 
646 	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
647 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
648 
649 	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
650 		if (running) {
651 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
652 			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
653 		}
654 
655 		/* reset the engine and set to writable */
656 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
657 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
658 
659 		/* load mc io regs */
660 		for (i = 0; i < regs_size; i++) {
661 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
662 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
663 		}
664 		/* load the MC ucode */
665 		fw_data = (const __be32 *)rdev->mc_fw->data;
666 		for (i = 0; i < ucode_size; i++)
667 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
668 
669 		/* put the engine back into the active state */
670 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
671 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
672 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
673 
674 		/* wait for training to complete */
675 		for (i = 0; i < rdev->usec_timeout; i++) {
676 			if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
677 				break;
678 			udelay(1);
679 		}
680 
681 		if (running)
682 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
683 	}
684 
685 	return 0;
686 }
687 
688 int ni_init_microcode(struct radeon_device *rdev)
689 {
690 	const char *chip_name;
691 	const char *rlc_chip_name;
692 	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
693 	size_t smc_req_size = 0;
694 	char fw_name[30];
695 	int err;
696 
697 	DRM_DEBUG("\n");
698 
699 	switch (rdev->family) {
700 	case CHIP_BARTS:
701 		chip_name = "BARTS";
702 		rlc_chip_name = "BTC";
703 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
704 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
705 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
706 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
707 		smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
708 		break;
709 	case CHIP_TURKS:
710 		chip_name = "TURKS";
711 		rlc_chip_name = "BTC";
712 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
713 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
714 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
715 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
716 		smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
717 		break;
718 	case CHIP_CAICOS:
719 		chip_name = "CAICOS";
720 		rlc_chip_name = "BTC";
721 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
722 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
723 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
724 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
725 		smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
726 		break;
727 	case CHIP_CAYMAN:
728 		chip_name = "CAYMAN";
729 		rlc_chip_name = "CAYMAN";
730 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
731 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
732 		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
733 		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
734 		smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
735 		break;
736 	case CHIP_ARUBA:
737 		chip_name = "ARUBA";
738 		rlc_chip_name = "ARUBA";
739 		/* pfp/me same size as CAYMAN */
740 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
741 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
742 		rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
743 		mc_req_size = 0;
744 		break;
745 	default: BUG();
746 	}
747 
748 	DRM_INFO("Loading %s Microcode\n", chip_name);
749 
750 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
751 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
752 	if (err)
753 		goto out;
754 	if (rdev->pfp_fw->size != pfp_req_size) {
755 		printk(KERN_ERR
756 		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
757 		       rdev->pfp_fw->size, fw_name);
758 		err = -EINVAL;
759 		goto out;
760 	}
761 
762 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
763 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
764 	if (err)
765 		goto out;
766 	if (rdev->me_fw->size != me_req_size) {
767 		printk(KERN_ERR
768 		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
769 		       rdev->me_fw->size, fw_name);
770 		err = -EINVAL;
771 	}
772 
773 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
774 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
775 	if (err)
776 		goto out;
777 	if (rdev->rlc_fw->size != rlc_req_size) {
778 		printk(KERN_ERR
779 		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
780 		       rdev->rlc_fw->size, fw_name);
781 		err = -EINVAL;
782 	}
783 
784 	/* no MC ucode on TN */
785 	if (!(rdev->flags & RADEON_IS_IGP)) {
786 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
787 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
788 		if (err)
789 			goto out;
790 		if (rdev->mc_fw->size != mc_req_size) {
791 			printk(KERN_ERR
792 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
793 			       rdev->mc_fw->size, fw_name);
794 			err = -EINVAL;
795 		}
796 	}
797 
798 	if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
799 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
800 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
801 		if (err) {
802 			printk(KERN_ERR
803 			       "smc: error loading firmware \"%s\"\n",
804 			       fw_name);
805 			release_firmware(rdev->smc_fw);
806 			rdev->smc_fw = NULL;
807 		} else if (rdev->smc_fw->size != smc_req_size) {
808 			printk(KERN_ERR
809 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
810 			       rdev->mc_fw->size, fw_name);
811 			err = -EINVAL;
812 		}
813 	}
814 
815 out:
816 	if (err) {
817 		if (err != -EINVAL)
818 			printk(KERN_ERR
819 			       "ni_cp: Failed to load firmware \"%s\"\n",
820 			       fw_name);
821 		release_firmware(rdev->pfp_fw);
822 		rdev->pfp_fw = NULL;
823 		release_firmware(rdev->me_fw);
824 		rdev->me_fw = NULL;
825 		release_firmware(rdev->rlc_fw);
826 		rdev->rlc_fw = NULL;
827 		release_firmware(rdev->mc_fw);
828 		rdev->mc_fw = NULL;
829 	}
830 	return err;
831 }
832 
833 int tn_get_temp(struct radeon_device *rdev)
834 {
835 	u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
836 	int actual_temp = (temp / 8) - 49;
837 
838 	return actual_temp * 1000;
839 }
840 
841 /*
842  * Core functions
843  */
844 static void cayman_gpu_init(struct radeon_device *rdev)
845 {
846 	u32 gb_addr_config = 0;
847 	u32 mc_shared_chmap, mc_arb_ramcfg;
848 	u32 cgts_tcc_disable;
849 	u32 sx_debug_1;
850 	u32 smx_dc_ctl0;
851 	u32 cgts_sm_ctrl_reg;
852 	u32 hdp_host_path_cntl;
853 	u32 tmp;
854 	u32 disabled_rb_mask;
855 	int i, j;
856 
857 	switch (rdev->family) {
858 	case CHIP_CAYMAN:
859 		rdev->config.cayman.max_shader_engines = 2;
860 		rdev->config.cayman.max_pipes_per_simd = 4;
861 		rdev->config.cayman.max_tile_pipes = 8;
862 		rdev->config.cayman.max_simds_per_se = 12;
863 		rdev->config.cayman.max_backends_per_se = 4;
864 		rdev->config.cayman.max_texture_channel_caches = 8;
865 		rdev->config.cayman.max_gprs = 256;
866 		rdev->config.cayman.max_threads = 256;
867 		rdev->config.cayman.max_gs_threads = 32;
868 		rdev->config.cayman.max_stack_entries = 512;
869 		rdev->config.cayman.sx_num_of_sets = 8;
870 		rdev->config.cayman.sx_max_export_size = 256;
871 		rdev->config.cayman.sx_max_export_pos_size = 64;
872 		rdev->config.cayman.sx_max_export_smx_size = 192;
873 		rdev->config.cayman.max_hw_contexts = 8;
874 		rdev->config.cayman.sq_num_cf_insts = 2;
875 
876 		rdev->config.cayman.sc_prim_fifo_size = 0x100;
877 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
878 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
879 		gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
880 		break;
881 	case CHIP_ARUBA:
882 	default:
883 		rdev->config.cayman.max_shader_engines = 1;
884 		rdev->config.cayman.max_pipes_per_simd = 4;
885 		rdev->config.cayman.max_tile_pipes = 2;
886 		if ((rdev->pdev->device == 0x9900) ||
887 		    (rdev->pdev->device == 0x9901) ||
888 		    (rdev->pdev->device == 0x9905) ||
889 		    (rdev->pdev->device == 0x9906) ||
890 		    (rdev->pdev->device == 0x9907) ||
891 		    (rdev->pdev->device == 0x9908) ||
892 		    (rdev->pdev->device == 0x9909) ||
893 		    (rdev->pdev->device == 0x990B) ||
894 		    (rdev->pdev->device == 0x990C) ||
895 		    (rdev->pdev->device == 0x990F) ||
896 		    (rdev->pdev->device == 0x9910) ||
897 		    (rdev->pdev->device == 0x9917) ||
898 		    (rdev->pdev->device == 0x9999) ||
899 		    (rdev->pdev->device == 0x999C)) {
900 			rdev->config.cayman.max_simds_per_se = 6;
901 			rdev->config.cayman.max_backends_per_se = 2;
902 		} else if ((rdev->pdev->device == 0x9903) ||
903 			   (rdev->pdev->device == 0x9904) ||
904 			   (rdev->pdev->device == 0x990A) ||
905 			   (rdev->pdev->device == 0x990D) ||
906 			   (rdev->pdev->device == 0x990E) ||
907 			   (rdev->pdev->device == 0x9913) ||
908 			   (rdev->pdev->device == 0x9918) ||
909 			   (rdev->pdev->device == 0x999D)) {
910 			rdev->config.cayman.max_simds_per_se = 4;
911 			rdev->config.cayman.max_backends_per_se = 2;
912 		} else if ((rdev->pdev->device == 0x9919) ||
913 			   (rdev->pdev->device == 0x9990) ||
914 			   (rdev->pdev->device == 0x9991) ||
915 			   (rdev->pdev->device == 0x9994) ||
916 			   (rdev->pdev->device == 0x9995) ||
917 			   (rdev->pdev->device == 0x9996) ||
918 			   (rdev->pdev->device == 0x999A) ||
919 			   (rdev->pdev->device == 0x99A0)) {
920 			rdev->config.cayman.max_simds_per_se = 3;
921 			rdev->config.cayman.max_backends_per_se = 1;
922 		} else {
923 			rdev->config.cayman.max_simds_per_se = 2;
924 			rdev->config.cayman.max_backends_per_se = 1;
925 		}
926 		rdev->config.cayman.max_texture_channel_caches = 2;
927 		rdev->config.cayman.max_gprs = 256;
928 		rdev->config.cayman.max_threads = 256;
929 		rdev->config.cayman.max_gs_threads = 32;
930 		rdev->config.cayman.max_stack_entries = 512;
931 		rdev->config.cayman.sx_num_of_sets = 8;
932 		rdev->config.cayman.sx_max_export_size = 256;
933 		rdev->config.cayman.sx_max_export_pos_size = 64;
934 		rdev->config.cayman.sx_max_export_smx_size = 192;
935 		rdev->config.cayman.max_hw_contexts = 8;
936 		rdev->config.cayman.sq_num_cf_insts = 2;
937 
938 		rdev->config.cayman.sc_prim_fifo_size = 0x40;
939 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
940 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
941 		gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
942 		break;
943 	}
944 
945 	/* Initialize HDP */
946 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
947 		WREG32((0x2c14 + j), 0x00000000);
948 		WREG32((0x2c18 + j), 0x00000000);
949 		WREG32((0x2c1c + j), 0x00000000);
950 		WREG32((0x2c20 + j), 0x00000000);
951 		WREG32((0x2c24 + j), 0x00000000);
952 	}
953 
954 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
955 
956 	evergreen_fix_pci_max_read_req_size(rdev);
957 
958 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
959 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
960 
961 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
962 	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
963 	if (rdev->config.cayman.mem_row_size_in_kb > 4)
964 		rdev->config.cayman.mem_row_size_in_kb = 4;
965 	/* XXX use MC settings? */
966 	rdev->config.cayman.shader_engine_tile_size = 32;
967 	rdev->config.cayman.num_gpus = 1;
968 	rdev->config.cayman.multi_gpu_tile_size = 64;
969 
970 	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
971 	rdev->config.cayman.num_tile_pipes = (1 << tmp);
972 	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
973 	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
974 	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
975 	rdev->config.cayman.num_shader_engines = tmp + 1;
976 	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
977 	rdev->config.cayman.num_gpus = tmp + 1;
978 	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
979 	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
980 	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
981 	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
982 
983 
984 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
985 	 * not have bank info, so create a custom tiling dword.
986 	 * bits 3:0   num_pipes
987 	 * bits 7:4   num_banks
988 	 * bits 11:8  group_size
989 	 * bits 15:12 row_size
990 	 */
991 	rdev->config.cayman.tile_config = 0;
992 	switch (rdev->config.cayman.num_tile_pipes) {
993 	case 1:
994 	default:
995 		rdev->config.cayman.tile_config |= (0 << 0);
996 		break;
997 	case 2:
998 		rdev->config.cayman.tile_config |= (1 << 0);
999 		break;
1000 	case 4:
1001 		rdev->config.cayman.tile_config |= (2 << 0);
1002 		break;
1003 	case 8:
1004 		rdev->config.cayman.tile_config |= (3 << 0);
1005 		break;
1006 	}
1007 
1008 	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1009 	if (rdev->flags & RADEON_IS_IGP)
1010 		rdev->config.cayman.tile_config |= 1 << 4;
1011 	else {
1012 		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1013 		case 0: /* four banks */
1014 			rdev->config.cayman.tile_config |= 0 << 4;
1015 			break;
1016 		case 1: /* eight banks */
1017 			rdev->config.cayman.tile_config |= 1 << 4;
1018 			break;
1019 		case 2: /* sixteen banks */
1020 		default:
1021 			rdev->config.cayman.tile_config |= 2 << 4;
1022 			break;
1023 		}
1024 	}
1025 	rdev->config.cayman.tile_config |=
1026 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1027 	rdev->config.cayman.tile_config |=
1028 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1029 
1030 	tmp = 0;
1031 	for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1032 		u32 rb_disable_bitmap;
1033 
1034 		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1035 		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1036 		rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1037 		tmp <<= 4;
1038 		tmp |= rb_disable_bitmap;
1039 	}
1040 	/* enabled rb are just the one not disabled :) */
1041 	disabled_rb_mask = tmp;
1042 	tmp = 0;
1043 	for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1044 		tmp |= (1 << i);
1045 	/* if all the backends are disabled, fix it up here */
1046 	if ((disabled_rb_mask & tmp) == tmp) {
1047 		for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1048 			disabled_rb_mask &= ~(1 << i);
1049 	}
1050 
1051 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1052 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1053 
1054 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1055 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1056 	if (ASIC_IS_DCE6(rdev))
1057 		WREG32(DMIF_ADDR_CALC, gb_addr_config);
1058 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1059 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1060 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1061 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1062 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1063 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1064 
1065 	if ((rdev->config.cayman.max_backends_per_se == 1) &&
1066 	    (rdev->flags & RADEON_IS_IGP)) {
1067 		if ((disabled_rb_mask & 3) == 1) {
1068 			/* RB0 disabled, RB1 enabled */
1069 			tmp = 0x11111111;
1070 		} else {
1071 			/* RB1 disabled, RB0 enabled */
1072 			tmp = 0x00000000;
1073 		}
1074 	} else {
1075 		tmp = gb_addr_config & NUM_PIPES_MASK;
1076 		tmp = r6xx_remap_render_backend(rdev, tmp,
1077 						rdev->config.cayman.max_backends_per_se *
1078 						rdev->config.cayman.max_shader_engines,
1079 						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1080 	}
1081 	WREG32(GB_BACKEND_MAP, tmp);
1082 
1083 	cgts_tcc_disable = 0xffff0000;
1084 	for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1085 		cgts_tcc_disable &= ~(1 << (16 + i));
1086 	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1087 	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1088 	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1089 	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1090 
1091 	/* reprogram the shader complex */
1092 	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1093 	for (i = 0; i < 16; i++)
1094 		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1095 	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1096 
1097 	/* set HW defaults for 3D engine */
1098 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1099 
1100 	sx_debug_1 = RREG32(SX_DEBUG_1);
1101 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1102 	WREG32(SX_DEBUG_1, sx_debug_1);
1103 
1104 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1105 	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1106 	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1107 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1108 
1109 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1110 
1111 	/* need to be explicitly zero-ed */
1112 	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1113 	WREG32(SQ_LSTMP_RING_BASE, 0);
1114 	WREG32(SQ_HSTMP_RING_BASE, 0);
1115 	WREG32(SQ_ESTMP_RING_BASE, 0);
1116 	WREG32(SQ_GSTMP_RING_BASE, 0);
1117 	WREG32(SQ_VSTMP_RING_BASE, 0);
1118 	WREG32(SQ_PSTMP_RING_BASE, 0);
1119 
1120 	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1121 
1122 	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1123 					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1124 					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1125 
1126 	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1127 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1128 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1129 
1130 
1131 	WREG32(VGT_NUM_INSTANCES, 1);
1132 
1133 	WREG32(CP_PERFMON_CNTL, 0);
1134 
1135 	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1136 				  FETCH_FIFO_HIWATER(0x4) |
1137 				  DONE_FIFO_HIWATER(0xe0) |
1138 				  ALU_UPDATE_FIFO_HIWATER(0x8)));
1139 
1140 	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1141 	WREG32(SQ_CONFIG, (VC_ENABLE |
1142 			   EXPORT_SRC_C |
1143 			   GFX_PRIO(0) |
1144 			   CS1_PRIO(0) |
1145 			   CS2_PRIO(1)));
1146 	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1147 
1148 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1149 					  FORCE_EOV_MAX_REZ_CNT(255)));
1150 
1151 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1152 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1153 
1154 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1155 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1156 
1157 	WREG32(CB_PERF_CTR0_SEL_0, 0);
1158 	WREG32(CB_PERF_CTR0_SEL_1, 0);
1159 	WREG32(CB_PERF_CTR1_SEL_0, 0);
1160 	WREG32(CB_PERF_CTR1_SEL_1, 0);
1161 	WREG32(CB_PERF_CTR2_SEL_0, 0);
1162 	WREG32(CB_PERF_CTR2_SEL_1, 0);
1163 	WREG32(CB_PERF_CTR3_SEL_0, 0);
1164 	WREG32(CB_PERF_CTR3_SEL_1, 0);
1165 
1166 	tmp = RREG32(HDP_MISC_CNTL);
1167 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1168 	WREG32(HDP_MISC_CNTL, tmp);
1169 
1170 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1171 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1172 
1173 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1174 
1175 	udelay(50);
1176 
1177 	/* set clockgating golden values on TN */
1178 	if (rdev->family == CHIP_ARUBA) {
1179 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1180 		tmp &= ~0x00380000;
1181 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1182                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1183 		tmp &= ~0x0e000000;
1184 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1185 	}
1186 }
1187 
1188 /*
1189  * GART
1190  */
1191 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1192 {
1193 	/* flush hdp cache */
1194 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1195 
1196 	/* bits 0-7 are the VM contexts0-7 */
1197 	WREG32(VM_INVALIDATE_REQUEST, 1);
1198 }
1199 
1200 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1201 {
1202 	int i, r;
1203 
1204 	if (rdev->gart.robj == NULL) {
1205 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1206 		return -EINVAL;
1207 	}
1208 	r = radeon_gart_table_vram_pin(rdev);
1209 	if (r)
1210 		return r;
1211 	radeon_gart_restore(rdev);
1212 	/* Setup TLB control */
1213 	WREG32(MC_VM_MX_L1_TLB_CNTL,
1214 	       (0xA << 7) |
1215 	       ENABLE_L1_TLB |
1216 	       ENABLE_L1_FRAGMENT_PROCESSING |
1217 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1218 	       ENABLE_ADVANCED_DRIVER_MODEL |
1219 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1220 	/* Setup L2 cache */
1221 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1222 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1223 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1224 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1225 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1226 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1227 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1228 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1229 	/* setup context0 */
1230 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1231 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1232 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1233 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1234 			(u32)(rdev->dummy_page.addr >> 12));
1235 	WREG32(VM_CONTEXT0_CNTL2, 0);
1236 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1237 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1238 
1239 	WREG32(0x15D4, 0);
1240 	WREG32(0x15D8, 0);
1241 	WREG32(0x15DC, 0);
1242 
1243 	/* empty context1-7 */
1244 	/* Assign the pt base to something valid for now; the pts used for
1245 	 * the VMs are determined by the application and setup and assigned
1246 	 * on the fly in the vm part of radeon_gart.c
1247 	 */
1248 	for (i = 1; i < 8; i++) {
1249 		WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1250 		WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1251 		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1252 			rdev->gart.table_addr >> 12);
1253 	}
1254 
1255 	/* enable context1-7 */
1256 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1257 	       (u32)(rdev->dummy_page.addr >> 12));
1258 	WREG32(VM_CONTEXT1_CNTL2, 4);
1259 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1260 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1261 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1262 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1263 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1264 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1265 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1266 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1267 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1268 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1269 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1270 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1271 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1272 
1273 	cayman_pcie_gart_tlb_flush(rdev);
1274 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1275 		 (unsigned)(rdev->mc.gtt_size >> 20),
1276 		 (unsigned long long)rdev->gart.table_addr);
1277 	rdev->gart.ready = true;
1278 	return 0;
1279 }
1280 
1281 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1282 {
1283 	/* Disable all tables */
1284 	WREG32(VM_CONTEXT0_CNTL, 0);
1285 	WREG32(VM_CONTEXT1_CNTL, 0);
1286 	/* Setup TLB control */
1287 	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1288 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1289 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1290 	/* Setup L2 cache */
1291 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1292 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1293 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1294 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1295 	WREG32(VM_L2_CNTL2, 0);
1296 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1297 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1298 	radeon_gart_table_vram_unpin(rdev);
1299 }
1300 
1301 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1302 {
1303 	cayman_pcie_gart_disable(rdev);
1304 	radeon_gart_table_vram_free(rdev);
1305 	radeon_gart_fini(rdev);
1306 }
1307 
1308 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1309 			      int ring, u32 cp_int_cntl)
1310 {
1311 	u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1312 
1313 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1314 	WREG32(CP_INT_CNTL, cp_int_cntl);
1315 }
1316 
1317 /*
1318  * CP.
1319  */
1320 void cayman_fence_ring_emit(struct radeon_device *rdev,
1321 			    struct radeon_fence *fence)
1322 {
1323 	struct radeon_ring *ring = &rdev->ring[fence->ring];
1324 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1325 
1326 	/* flush read cache over gart for this vmid */
1327 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1328 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1329 	radeon_ring_write(ring, 0);
1330 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1331 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1332 	radeon_ring_write(ring, 0xFFFFFFFF);
1333 	radeon_ring_write(ring, 0);
1334 	radeon_ring_write(ring, 10); /* poll interval */
1335 	/* EVENT_WRITE_EOP - flush caches, send int */
1336 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1337 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1338 	radeon_ring_write(ring, addr & 0xffffffff);
1339 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1340 	radeon_ring_write(ring, fence->seq);
1341 	radeon_ring_write(ring, 0);
1342 }
1343 
1344 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1345 {
1346 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1347 
1348 	/* set to DX10/11 mode */
1349 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1350 	radeon_ring_write(ring, 1);
1351 
1352 	if (ring->rptr_save_reg) {
1353 		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1354 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1355 		radeon_ring_write(ring, ((ring->rptr_save_reg -
1356 					  PACKET3_SET_CONFIG_REG_START) >> 2));
1357 		radeon_ring_write(ring, next_rptr);
1358 	}
1359 
1360 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1361 	radeon_ring_write(ring,
1362 #ifdef __BIG_ENDIAN
1363 			  (2 << 0) |
1364 #endif
1365 			  (ib->gpu_addr & 0xFFFFFFFC));
1366 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1367 	radeon_ring_write(ring, ib->length_dw |
1368 			  (ib->vm ? (ib->vm->id << 24) : 0));
1369 
1370 	/* flush read cache over gart for this vmid */
1371 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1372 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1373 	radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1374 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1375 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1376 	radeon_ring_write(ring, 0xFFFFFFFF);
1377 	radeon_ring_write(ring, 0);
1378 	radeon_ring_write(ring, 10); /* poll interval */
1379 }
1380 
1381 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1382 {
1383 	if (enable)
1384 		WREG32(CP_ME_CNTL, 0);
1385 	else {
1386 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1387 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1388 		WREG32(SCRATCH_UMSK, 0);
1389 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1390 	}
1391 }
1392 
1393 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1394 {
1395 	const __be32 *fw_data;
1396 	int i;
1397 
1398 	if (!rdev->me_fw || !rdev->pfp_fw)
1399 		return -EINVAL;
1400 
1401 	cayman_cp_enable(rdev, false);
1402 
1403 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1404 	WREG32(CP_PFP_UCODE_ADDR, 0);
1405 	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1406 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1407 	WREG32(CP_PFP_UCODE_ADDR, 0);
1408 
1409 	fw_data = (const __be32 *)rdev->me_fw->data;
1410 	WREG32(CP_ME_RAM_WADDR, 0);
1411 	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1412 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1413 
1414 	WREG32(CP_PFP_UCODE_ADDR, 0);
1415 	WREG32(CP_ME_RAM_WADDR, 0);
1416 	WREG32(CP_ME_RAM_RADDR, 0);
1417 	return 0;
1418 }
1419 
1420 static int cayman_cp_start(struct radeon_device *rdev)
1421 {
1422 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1423 	int r, i;
1424 
1425 	r = radeon_ring_lock(rdev, ring, 7);
1426 	if (r) {
1427 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1428 		return r;
1429 	}
1430 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1431 	radeon_ring_write(ring, 0x1);
1432 	radeon_ring_write(ring, 0x0);
1433 	radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1434 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1435 	radeon_ring_write(ring, 0);
1436 	radeon_ring_write(ring, 0);
1437 	radeon_ring_unlock_commit(rdev, ring);
1438 
1439 	cayman_cp_enable(rdev, true);
1440 
1441 	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1442 	if (r) {
1443 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1444 		return r;
1445 	}
1446 
1447 	/* setup clear context state */
1448 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1449 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1450 
1451 	for (i = 0; i < cayman_default_size; i++)
1452 		radeon_ring_write(ring, cayman_default_state[i]);
1453 
1454 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1455 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1456 
1457 	/* set clear context state */
1458 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1459 	radeon_ring_write(ring, 0);
1460 
1461 	/* SQ_VTX_BASE_VTX_LOC */
1462 	radeon_ring_write(ring, 0xc0026f00);
1463 	radeon_ring_write(ring, 0x00000000);
1464 	radeon_ring_write(ring, 0x00000000);
1465 	radeon_ring_write(ring, 0x00000000);
1466 
1467 	/* Clear consts */
1468 	radeon_ring_write(ring, 0xc0036f00);
1469 	radeon_ring_write(ring, 0x00000bc4);
1470 	radeon_ring_write(ring, 0xffffffff);
1471 	radeon_ring_write(ring, 0xffffffff);
1472 	radeon_ring_write(ring, 0xffffffff);
1473 
1474 	radeon_ring_write(ring, 0xc0026900);
1475 	radeon_ring_write(ring, 0x00000316);
1476 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1477 	radeon_ring_write(ring, 0x00000010); /*  */
1478 
1479 	radeon_ring_unlock_commit(rdev, ring);
1480 
1481 	/* XXX init other rings */
1482 
1483 	return 0;
1484 }
1485 
1486 static void cayman_cp_fini(struct radeon_device *rdev)
1487 {
1488 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1489 	cayman_cp_enable(rdev, false);
1490 	radeon_ring_fini(rdev, ring);
1491 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1492 }
1493 
1494 static int cayman_cp_resume(struct radeon_device *rdev)
1495 {
1496 	static const int ridx[] = {
1497 		RADEON_RING_TYPE_GFX_INDEX,
1498 		CAYMAN_RING_TYPE_CP1_INDEX,
1499 		CAYMAN_RING_TYPE_CP2_INDEX
1500 	};
1501 	static const unsigned cp_rb_cntl[] = {
1502 		CP_RB0_CNTL,
1503 		CP_RB1_CNTL,
1504 		CP_RB2_CNTL,
1505 	};
1506 	static const unsigned cp_rb_rptr_addr[] = {
1507 		CP_RB0_RPTR_ADDR,
1508 		CP_RB1_RPTR_ADDR,
1509 		CP_RB2_RPTR_ADDR
1510 	};
1511 	static const unsigned cp_rb_rptr_addr_hi[] = {
1512 		CP_RB0_RPTR_ADDR_HI,
1513 		CP_RB1_RPTR_ADDR_HI,
1514 		CP_RB2_RPTR_ADDR_HI
1515 	};
1516 	static const unsigned cp_rb_base[] = {
1517 		CP_RB0_BASE,
1518 		CP_RB1_BASE,
1519 		CP_RB2_BASE
1520 	};
1521 	struct radeon_ring *ring;
1522 	int i, r;
1523 
1524 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1525 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1526 				 SOFT_RESET_PA |
1527 				 SOFT_RESET_SH |
1528 				 SOFT_RESET_VGT |
1529 				 SOFT_RESET_SPI |
1530 				 SOFT_RESET_SX));
1531 	RREG32(GRBM_SOFT_RESET);
1532 	mdelay(15);
1533 	WREG32(GRBM_SOFT_RESET, 0);
1534 	RREG32(GRBM_SOFT_RESET);
1535 
1536 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1537 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1538 
1539 	/* Set the write pointer delay */
1540 	WREG32(CP_RB_WPTR_DELAY, 0);
1541 
1542 	WREG32(CP_DEBUG, (1 << 27));
1543 
1544 	/* set the wb address whether it's enabled or not */
1545 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1546 	WREG32(SCRATCH_UMSK, 0xff);
1547 
1548 	for (i = 0; i < 3; ++i) {
1549 		uint32_t rb_cntl;
1550 		uint64_t addr;
1551 
1552 		/* Set ring buffer size */
1553 		ring = &rdev->ring[ridx[i]];
1554 		rb_cntl = order_base_2(ring->ring_size / 8);
1555 		rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8;
1556 #ifdef __BIG_ENDIAN
1557 		rb_cntl |= BUF_SWAP_32BIT;
1558 #endif
1559 		WREG32(cp_rb_cntl[i], rb_cntl);
1560 
1561 		/* set the wb address whether it's enabled or not */
1562 		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1563 		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1564 		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1565 	}
1566 
1567 	/* set the rb base addr, this causes an internal reset of ALL rings */
1568 	for (i = 0; i < 3; ++i) {
1569 		ring = &rdev->ring[ridx[i]];
1570 		WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1571 	}
1572 
1573 	for (i = 0; i < 3; ++i) {
1574 		/* Initialize the ring buffer's read and write pointers */
1575 		ring = &rdev->ring[ridx[i]];
1576 		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1577 
1578 		ring->rptr = ring->wptr = 0;
1579 		WREG32(ring->rptr_reg, ring->rptr);
1580 		WREG32(ring->wptr_reg, ring->wptr);
1581 
1582 		mdelay(1);
1583 		WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1584 	}
1585 
1586 	/* start the rings */
1587 	cayman_cp_start(rdev);
1588 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1589 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1590 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1591 	/* this only test cp0 */
1592 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1593 	if (r) {
1594 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1595 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1596 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1597 		return r;
1598 	}
1599 
1600 	return 0;
1601 }
1602 
1603 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1604 {
1605 	u32 reset_mask = 0;
1606 	u32 tmp;
1607 
1608 	/* GRBM_STATUS */
1609 	tmp = RREG32(GRBM_STATUS);
1610 	if (tmp & (PA_BUSY | SC_BUSY |
1611 		   SH_BUSY | SX_BUSY |
1612 		   TA_BUSY | VGT_BUSY |
1613 		   DB_BUSY | CB_BUSY |
1614 		   GDS_BUSY | SPI_BUSY |
1615 		   IA_BUSY | IA_BUSY_NO_DMA))
1616 		reset_mask |= RADEON_RESET_GFX;
1617 
1618 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1619 		   CP_BUSY | CP_COHERENCY_BUSY))
1620 		reset_mask |= RADEON_RESET_CP;
1621 
1622 	if (tmp & GRBM_EE_BUSY)
1623 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1624 
1625 	/* DMA_STATUS_REG 0 */
1626 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1627 	if (!(tmp & DMA_IDLE))
1628 		reset_mask |= RADEON_RESET_DMA;
1629 
1630 	/* DMA_STATUS_REG 1 */
1631 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1632 	if (!(tmp & DMA_IDLE))
1633 		reset_mask |= RADEON_RESET_DMA1;
1634 
1635 	/* SRBM_STATUS2 */
1636 	tmp = RREG32(SRBM_STATUS2);
1637 	if (tmp & DMA_BUSY)
1638 		reset_mask |= RADEON_RESET_DMA;
1639 
1640 	if (tmp & DMA1_BUSY)
1641 		reset_mask |= RADEON_RESET_DMA1;
1642 
1643 	/* SRBM_STATUS */
1644 	tmp = RREG32(SRBM_STATUS);
1645 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1646 		reset_mask |= RADEON_RESET_RLC;
1647 
1648 	if (tmp & IH_BUSY)
1649 		reset_mask |= RADEON_RESET_IH;
1650 
1651 	if (tmp & SEM_BUSY)
1652 		reset_mask |= RADEON_RESET_SEM;
1653 
1654 	if (tmp & GRBM_RQ_PENDING)
1655 		reset_mask |= RADEON_RESET_GRBM;
1656 
1657 	if (tmp & VMC_BUSY)
1658 		reset_mask |= RADEON_RESET_VMC;
1659 
1660 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1661 		   MCC_BUSY | MCD_BUSY))
1662 		reset_mask |= RADEON_RESET_MC;
1663 
1664 	if (evergreen_is_display_hung(rdev))
1665 		reset_mask |= RADEON_RESET_DISPLAY;
1666 
1667 	/* VM_L2_STATUS */
1668 	tmp = RREG32(VM_L2_STATUS);
1669 	if (tmp & L2_BUSY)
1670 		reset_mask |= RADEON_RESET_VMC;
1671 
1672 	/* Skip MC reset as it's mostly likely not hung, just busy */
1673 	if (reset_mask & RADEON_RESET_MC) {
1674 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1675 		reset_mask &= ~RADEON_RESET_MC;
1676 	}
1677 
1678 	return reset_mask;
1679 }
1680 
1681 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1682 {
1683 	struct evergreen_mc_save save;
1684 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1685 	u32 tmp;
1686 
1687 	if (reset_mask == 0)
1688 		return;
1689 
1690 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1691 
1692 	evergreen_print_gpu_status_regs(rdev);
1693 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1694 		 RREG32(0x14F8));
1695 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1696 		 RREG32(0x14D8));
1697 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1698 		 RREG32(0x14FC));
1699 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1700 		 RREG32(0x14DC));
1701 
1702 	/* Disable CP parsing/prefetching */
1703 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1704 
1705 	if (reset_mask & RADEON_RESET_DMA) {
1706 		/* dma0 */
1707 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1708 		tmp &= ~DMA_RB_ENABLE;
1709 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1710 	}
1711 
1712 	if (reset_mask & RADEON_RESET_DMA1) {
1713 		/* dma1 */
1714 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1715 		tmp &= ~DMA_RB_ENABLE;
1716 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1717 	}
1718 
1719 	udelay(50);
1720 
1721 	evergreen_mc_stop(rdev, &save);
1722 	if (evergreen_mc_wait_for_idle(rdev)) {
1723 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1724 	}
1725 
1726 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1727 		grbm_soft_reset = SOFT_RESET_CB |
1728 			SOFT_RESET_DB |
1729 			SOFT_RESET_GDS |
1730 			SOFT_RESET_PA |
1731 			SOFT_RESET_SC |
1732 			SOFT_RESET_SPI |
1733 			SOFT_RESET_SH |
1734 			SOFT_RESET_SX |
1735 			SOFT_RESET_TC |
1736 			SOFT_RESET_TA |
1737 			SOFT_RESET_VGT |
1738 			SOFT_RESET_IA;
1739 	}
1740 
1741 	if (reset_mask & RADEON_RESET_CP) {
1742 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1743 
1744 		srbm_soft_reset |= SOFT_RESET_GRBM;
1745 	}
1746 
1747 	if (reset_mask & RADEON_RESET_DMA)
1748 		srbm_soft_reset |= SOFT_RESET_DMA;
1749 
1750 	if (reset_mask & RADEON_RESET_DMA1)
1751 		srbm_soft_reset |= SOFT_RESET_DMA1;
1752 
1753 	if (reset_mask & RADEON_RESET_DISPLAY)
1754 		srbm_soft_reset |= SOFT_RESET_DC;
1755 
1756 	if (reset_mask & RADEON_RESET_RLC)
1757 		srbm_soft_reset |= SOFT_RESET_RLC;
1758 
1759 	if (reset_mask & RADEON_RESET_SEM)
1760 		srbm_soft_reset |= SOFT_RESET_SEM;
1761 
1762 	if (reset_mask & RADEON_RESET_IH)
1763 		srbm_soft_reset |= SOFT_RESET_IH;
1764 
1765 	if (reset_mask & RADEON_RESET_GRBM)
1766 		srbm_soft_reset |= SOFT_RESET_GRBM;
1767 
1768 	if (reset_mask & RADEON_RESET_VMC)
1769 		srbm_soft_reset |= SOFT_RESET_VMC;
1770 
1771 	if (!(rdev->flags & RADEON_IS_IGP)) {
1772 		if (reset_mask & RADEON_RESET_MC)
1773 			srbm_soft_reset |= SOFT_RESET_MC;
1774 	}
1775 
1776 	if (grbm_soft_reset) {
1777 		tmp = RREG32(GRBM_SOFT_RESET);
1778 		tmp |= grbm_soft_reset;
1779 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1780 		WREG32(GRBM_SOFT_RESET, tmp);
1781 		tmp = RREG32(GRBM_SOFT_RESET);
1782 
1783 		udelay(50);
1784 
1785 		tmp &= ~grbm_soft_reset;
1786 		WREG32(GRBM_SOFT_RESET, tmp);
1787 		tmp = RREG32(GRBM_SOFT_RESET);
1788 	}
1789 
1790 	if (srbm_soft_reset) {
1791 		tmp = RREG32(SRBM_SOFT_RESET);
1792 		tmp |= srbm_soft_reset;
1793 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1794 		WREG32(SRBM_SOFT_RESET, tmp);
1795 		tmp = RREG32(SRBM_SOFT_RESET);
1796 
1797 		udelay(50);
1798 
1799 		tmp &= ~srbm_soft_reset;
1800 		WREG32(SRBM_SOFT_RESET, tmp);
1801 		tmp = RREG32(SRBM_SOFT_RESET);
1802 	}
1803 
1804 	/* Wait a little for things to settle down */
1805 	udelay(50);
1806 
1807 	evergreen_mc_resume(rdev, &save);
1808 	udelay(50);
1809 
1810 	evergreen_print_gpu_status_regs(rdev);
1811 }
1812 
1813 int cayman_asic_reset(struct radeon_device *rdev)
1814 {
1815 	u32 reset_mask;
1816 
1817 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1818 
1819 	if (reset_mask)
1820 		r600_set_bios_scratch_engine_hung(rdev, true);
1821 
1822 	cayman_gpu_soft_reset(rdev, reset_mask);
1823 
1824 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1825 
1826 	if (!reset_mask)
1827 		r600_set_bios_scratch_engine_hung(rdev, false);
1828 
1829 	return 0;
1830 }
1831 
1832 /**
1833  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1834  *
1835  * @rdev: radeon_device pointer
1836  * @ring: radeon_ring structure holding ring information
1837  *
1838  * Check if the GFX engine is locked up.
1839  * Returns true if the engine appears to be locked up, false if not.
1840  */
1841 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1842 {
1843 	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1844 
1845 	if (!(reset_mask & (RADEON_RESET_GFX |
1846 			    RADEON_RESET_COMPUTE |
1847 			    RADEON_RESET_CP))) {
1848 		radeon_ring_lockup_update(ring);
1849 		return false;
1850 	}
1851 	/* force CP activities */
1852 	radeon_ring_force_activity(rdev, ring);
1853 	return radeon_ring_test_lockup(rdev, ring);
1854 }
1855 
1856 static int cayman_startup(struct radeon_device *rdev)
1857 {
1858 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1859 	int r;
1860 
1861 	/* enable pcie gen2 link */
1862 	evergreen_pcie_gen2_enable(rdev);
1863 	/* enable aspm */
1864 	evergreen_program_aspm(rdev);
1865 
1866 	/* scratch needs to be initialized before MC */
1867 	r = r600_vram_scratch_init(rdev);
1868 	if (r)
1869 		return r;
1870 
1871 	evergreen_mc_program(rdev);
1872 
1873 	if (rdev->flags & RADEON_IS_IGP) {
1874 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1875 			r = ni_init_microcode(rdev);
1876 			if (r) {
1877 				DRM_ERROR("Failed to load firmware!\n");
1878 				return r;
1879 			}
1880 		}
1881 	} else {
1882 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1883 			r = ni_init_microcode(rdev);
1884 			if (r) {
1885 				DRM_ERROR("Failed to load firmware!\n");
1886 				return r;
1887 			}
1888 		}
1889 
1890 		r = ni_mc_load_microcode(rdev);
1891 		if (r) {
1892 			DRM_ERROR("Failed to load MC firmware!\n");
1893 			return r;
1894 		}
1895 	}
1896 
1897 	r = cayman_pcie_gart_enable(rdev);
1898 	if (r)
1899 		return r;
1900 	cayman_gpu_init(rdev);
1901 
1902 	/* allocate rlc buffers */
1903 	if (rdev->flags & RADEON_IS_IGP) {
1904 		rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
1905 		rdev->rlc.reg_list_size =
1906 			(u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
1907 		rdev->rlc.cs_data = cayman_cs_data;
1908 		r = sumo_rlc_init(rdev);
1909 		if (r) {
1910 			DRM_ERROR("Failed to init rlc BOs!\n");
1911 			return r;
1912 		}
1913 	}
1914 
1915 	/* allocate wb buffer */
1916 	r = radeon_wb_init(rdev);
1917 	if (r)
1918 		return r;
1919 
1920 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1921 	if (r) {
1922 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1923 		return r;
1924 	}
1925 
1926 	r = uvd_v2_2_resume(rdev);
1927 	if (!r) {
1928 		r = radeon_fence_driver_start_ring(rdev,
1929 						   R600_RING_TYPE_UVD_INDEX);
1930 		if (r)
1931 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
1932 	}
1933 	if (r)
1934 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
1935 
1936 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
1937 	if (r) {
1938 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1939 		return r;
1940 	}
1941 
1942 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
1943 	if (r) {
1944 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1945 		return r;
1946 	}
1947 
1948 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1949 	if (r) {
1950 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1951 		return r;
1952 	}
1953 
1954 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1955 	if (r) {
1956 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1957 		return r;
1958 	}
1959 
1960 	/* Enable IRQ */
1961 	if (!rdev->irq.installed) {
1962 		r = radeon_irq_kms_init(rdev);
1963 		if (r)
1964 			return r;
1965 	}
1966 
1967 	r = r600_irq_init(rdev);
1968 	if (r) {
1969 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1970 		radeon_irq_kms_fini(rdev);
1971 		return r;
1972 	}
1973 	evergreen_irq_set(rdev);
1974 
1975 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1976 			     CP_RB0_RPTR, CP_RB0_WPTR,
1977 			     RADEON_CP_PACKET2);
1978 	if (r)
1979 		return r;
1980 
1981 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1982 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1983 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1984 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1985 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1986 	if (r)
1987 		return r;
1988 
1989 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1990 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1991 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1992 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1993 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1994 	if (r)
1995 		return r;
1996 
1997 	r = cayman_cp_load_microcode(rdev);
1998 	if (r)
1999 		return r;
2000 	r = cayman_cp_resume(rdev);
2001 	if (r)
2002 		return r;
2003 
2004 	r = cayman_dma_resume(rdev);
2005 	if (r)
2006 		return r;
2007 
2008 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2009 	if (ring->ring_size) {
2010 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
2011 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2012 				     RADEON_CP_PACKET2);
2013 		if (!r)
2014 			r = uvd_v1_0_init(rdev);
2015 		if (r)
2016 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2017 	}
2018 
2019 	r = radeon_ib_pool_init(rdev);
2020 	if (r) {
2021 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2022 		return r;
2023 	}
2024 
2025 	r = radeon_vm_manager_init(rdev);
2026 	if (r) {
2027 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2028 		return r;
2029 	}
2030 
2031 	if (ASIC_IS_DCE6(rdev)) {
2032 		r = dce6_audio_init(rdev);
2033 		if (r)
2034 			return r;
2035 	} else {
2036 		r = r600_audio_init(rdev);
2037 		if (r)
2038 			return r;
2039 	}
2040 
2041 	return 0;
2042 }
2043 
2044 int cayman_resume(struct radeon_device *rdev)
2045 {
2046 	int r;
2047 
2048 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2049 	 * posting will perform necessary task to bring back GPU into good
2050 	 * shape.
2051 	 */
2052 	/* post card */
2053 	atom_asic_init(rdev->mode_info.atom_context);
2054 
2055 	/* init golden registers */
2056 	ni_init_golden_registers(rdev);
2057 
2058 	rdev->accel_working = true;
2059 	r = cayman_startup(rdev);
2060 	if (r) {
2061 		DRM_ERROR("cayman startup failed on resume\n");
2062 		rdev->accel_working = false;
2063 		return r;
2064 	}
2065 	return r;
2066 }
2067 
2068 int cayman_suspend(struct radeon_device *rdev)
2069 {
2070 	if (ASIC_IS_DCE6(rdev))
2071 		dce6_audio_fini(rdev);
2072 	else
2073 		r600_audio_fini(rdev);
2074 	radeon_vm_manager_fini(rdev);
2075 	cayman_cp_enable(rdev, false);
2076 	cayman_dma_stop(rdev);
2077 	uvd_v1_0_fini(rdev);
2078 	radeon_uvd_suspend(rdev);
2079 	evergreen_irq_suspend(rdev);
2080 	radeon_wb_disable(rdev);
2081 	cayman_pcie_gart_disable(rdev);
2082 	return 0;
2083 }
2084 
2085 /* Plan is to move initialization in that function and use
2086  * helper function so that radeon_device_init pretty much
2087  * do nothing more than calling asic specific function. This
2088  * should also allow to remove a bunch of callback function
2089  * like vram_info.
2090  */
2091 int cayman_init(struct radeon_device *rdev)
2092 {
2093 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2094 	int r;
2095 
2096 	/* Read BIOS */
2097 	if (!radeon_get_bios(rdev)) {
2098 		if (ASIC_IS_AVIVO(rdev))
2099 			return -EINVAL;
2100 	}
2101 	/* Must be an ATOMBIOS */
2102 	if (!rdev->is_atom_bios) {
2103 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2104 		return -EINVAL;
2105 	}
2106 	r = radeon_atombios_init(rdev);
2107 	if (r)
2108 		return r;
2109 
2110 	/* Post card if necessary */
2111 	if (!radeon_card_posted(rdev)) {
2112 		if (!rdev->bios) {
2113 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2114 			return -EINVAL;
2115 		}
2116 		DRM_INFO("GPU not posted. posting now...\n");
2117 		atom_asic_init(rdev->mode_info.atom_context);
2118 	}
2119 	/* init golden registers */
2120 	ni_init_golden_registers(rdev);
2121 	/* Initialize scratch registers */
2122 	r600_scratch_init(rdev);
2123 	/* Initialize surface registers */
2124 	radeon_surface_init(rdev);
2125 	/* Initialize clocks */
2126 	radeon_get_clock_info(rdev->ddev);
2127 	/* Fence driver */
2128 	r = radeon_fence_driver_init(rdev);
2129 	if (r)
2130 		return r;
2131 	/* initialize memory controller */
2132 	r = evergreen_mc_init(rdev);
2133 	if (r)
2134 		return r;
2135 	/* Memory manager */
2136 	r = radeon_bo_init(rdev);
2137 	if (r)
2138 		return r;
2139 
2140 	ring->ring_obj = NULL;
2141 	r600_ring_init(rdev, ring, 1024 * 1024);
2142 
2143 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2144 	ring->ring_obj = NULL;
2145 	r600_ring_init(rdev, ring, 64 * 1024);
2146 
2147 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2148 	ring->ring_obj = NULL;
2149 	r600_ring_init(rdev, ring, 64 * 1024);
2150 
2151 	r = radeon_uvd_init(rdev);
2152 	if (!r) {
2153 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2154 		ring->ring_obj = NULL;
2155 		r600_ring_init(rdev, ring, 4096);
2156 	}
2157 
2158 	rdev->ih.ring_obj = NULL;
2159 	r600_ih_ring_init(rdev, 64 * 1024);
2160 
2161 	r = r600_pcie_gart_init(rdev);
2162 	if (r)
2163 		return r;
2164 
2165 	rdev->accel_working = true;
2166 	r = cayman_startup(rdev);
2167 	if (r) {
2168 		dev_err(rdev->dev, "disabling GPU acceleration\n");
2169 		cayman_cp_fini(rdev);
2170 		cayman_dma_fini(rdev);
2171 		r600_irq_fini(rdev);
2172 		if (rdev->flags & RADEON_IS_IGP)
2173 			sumo_rlc_fini(rdev);
2174 		radeon_wb_fini(rdev);
2175 		radeon_ib_pool_fini(rdev);
2176 		radeon_vm_manager_fini(rdev);
2177 		radeon_irq_kms_fini(rdev);
2178 		cayman_pcie_gart_fini(rdev);
2179 		rdev->accel_working = false;
2180 	}
2181 
2182 	/* Don't start up if the MC ucode is missing.
2183 	 * The default clocks and voltages before the MC ucode
2184 	 * is loaded are not suffient for advanced operations.
2185 	 *
2186 	 * We can skip this check for TN, because there is no MC
2187 	 * ucode.
2188 	 */
2189 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2190 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
2191 		return -EINVAL;
2192 	}
2193 
2194 	return 0;
2195 }
2196 
2197 void cayman_fini(struct radeon_device *rdev)
2198 {
2199 	cayman_cp_fini(rdev);
2200 	cayman_dma_fini(rdev);
2201 	r600_irq_fini(rdev);
2202 	if (rdev->flags & RADEON_IS_IGP)
2203 		sumo_rlc_fini(rdev);
2204 	radeon_wb_fini(rdev);
2205 	radeon_vm_manager_fini(rdev);
2206 	radeon_ib_pool_fini(rdev);
2207 	radeon_irq_kms_fini(rdev);
2208 	uvd_v1_0_fini(rdev);
2209 	radeon_uvd_fini(rdev);
2210 	cayman_pcie_gart_fini(rdev);
2211 	r600_vram_scratch_fini(rdev);
2212 	radeon_gem_fini(rdev);
2213 	radeon_fence_driver_fini(rdev);
2214 	radeon_bo_fini(rdev);
2215 	radeon_atombios_fini(rdev);
2216 	kfree(rdev->bios);
2217 	rdev->bios = NULL;
2218 }
2219 
2220 /*
2221  * vm
2222  */
2223 int cayman_vm_init(struct radeon_device *rdev)
2224 {
2225 	/* number of VMs */
2226 	rdev->vm_manager.nvm = 8;
2227 	/* base offset of vram pages */
2228 	if (rdev->flags & RADEON_IS_IGP) {
2229 		u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2230 		tmp <<= 22;
2231 		rdev->vm_manager.vram_base_offset = tmp;
2232 	} else
2233 		rdev->vm_manager.vram_base_offset = 0;
2234 	return 0;
2235 }
2236 
2237 void cayman_vm_fini(struct radeon_device *rdev)
2238 {
2239 }
2240 
2241 /**
2242  * cayman_vm_decode_fault - print human readable fault info
2243  *
2244  * @rdev: radeon_device pointer
2245  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2246  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2247  *
2248  * Print human readable fault information (cayman/TN).
2249  */
2250 void cayman_vm_decode_fault(struct radeon_device *rdev,
2251 			    u32 status, u32 addr)
2252 {
2253 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2254 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2255 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2256 	char *block;
2257 
2258 	switch (mc_id) {
2259 	case 32:
2260 	case 16:
2261 	case 96:
2262 	case 80:
2263 	case 160:
2264 	case 144:
2265 	case 224:
2266 	case 208:
2267 		block = "CB";
2268 		break;
2269 	case 33:
2270 	case 17:
2271 	case 97:
2272 	case 81:
2273 	case 161:
2274 	case 145:
2275 	case 225:
2276 	case 209:
2277 		block = "CB_FMASK";
2278 		break;
2279 	case 34:
2280 	case 18:
2281 	case 98:
2282 	case 82:
2283 	case 162:
2284 	case 146:
2285 	case 226:
2286 	case 210:
2287 		block = "CB_CMASK";
2288 		break;
2289 	case 35:
2290 	case 19:
2291 	case 99:
2292 	case 83:
2293 	case 163:
2294 	case 147:
2295 	case 227:
2296 	case 211:
2297 		block = "CB_IMMED";
2298 		break;
2299 	case 36:
2300 	case 20:
2301 	case 100:
2302 	case 84:
2303 	case 164:
2304 	case 148:
2305 	case 228:
2306 	case 212:
2307 		block = "DB";
2308 		break;
2309 	case 37:
2310 	case 21:
2311 	case 101:
2312 	case 85:
2313 	case 165:
2314 	case 149:
2315 	case 229:
2316 	case 213:
2317 		block = "DB_HTILE";
2318 		break;
2319 	case 38:
2320 	case 22:
2321 	case 102:
2322 	case 86:
2323 	case 166:
2324 	case 150:
2325 	case 230:
2326 	case 214:
2327 		block = "SX";
2328 		break;
2329 	case 39:
2330 	case 23:
2331 	case 103:
2332 	case 87:
2333 	case 167:
2334 	case 151:
2335 	case 231:
2336 	case 215:
2337 		block = "DB_STEN";
2338 		break;
2339 	case 40:
2340 	case 24:
2341 	case 104:
2342 	case 88:
2343 	case 232:
2344 	case 216:
2345 	case 168:
2346 	case 152:
2347 		block = "TC_TFETCH";
2348 		break;
2349 	case 41:
2350 	case 25:
2351 	case 105:
2352 	case 89:
2353 	case 233:
2354 	case 217:
2355 	case 169:
2356 	case 153:
2357 		block = "TC_VFETCH";
2358 		break;
2359 	case 42:
2360 	case 26:
2361 	case 106:
2362 	case 90:
2363 	case 234:
2364 	case 218:
2365 	case 170:
2366 	case 154:
2367 		block = "VC";
2368 		break;
2369 	case 112:
2370 		block = "CP";
2371 		break;
2372 	case 113:
2373 	case 114:
2374 		block = "SH";
2375 		break;
2376 	case 115:
2377 		block = "VGT";
2378 		break;
2379 	case 178:
2380 		block = "IH";
2381 		break;
2382 	case 51:
2383 		block = "RLC";
2384 		break;
2385 	case 55:
2386 		block = "DMA";
2387 		break;
2388 	case 56:
2389 		block = "HDP";
2390 		break;
2391 	default:
2392 		block = "unknown";
2393 		break;
2394 	}
2395 
2396 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2397 	       protections, vmid, addr,
2398 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2399 	       block, mc_id);
2400 }
2401 
2402 #define R600_ENTRY_VALID   (1 << 0)
2403 #define R600_PTE_SYSTEM    (1 << 1)
2404 #define R600_PTE_SNOOPED   (1 << 2)
2405 #define R600_PTE_READABLE  (1 << 5)
2406 #define R600_PTE_WRITEABLE (1 << 6)
2407 
2408 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2409 {
2410 	uint32_t r600_flags = 0;
2411 	r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2412 	r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2413 	r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2414 	if (flags & RADEON_VM_PAGE_SYSTEM) {
2415 		r600_flags |= R600_PTE_SYSTEM;
2416 		r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2417 	}
2418 	return r600_flags;
2419 }
2420 
2421 /**
2422  * cayman_vm_set_page - update the page tables using the CP
2423  *
2424  * @rdev: radeon_device pointer
2425  * @ib: indirect buffer to fill with commands
2426  * @pe: addr of the page entry
2427  * @addr: dst addr to write into pe
2428  * @count: number of page entries to update
2429  * @incr: increase next addr by incr bytes
2430  * @flags: access flags
2431  *
2432  * Update the page tables using the CP (cayman/TN).
2433  */
2434 void cayman_vm_set_page(struct radeon_device *rdev,
2435 			struct radeon_ib *ib,
2436 			uint64_t pe,
2437 			uint64_t addr, unsigned count,
2438 			uint32_t incr, uint32_t flags)
2439 {
2440 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2441 	uint64_t value;
2442 	unsigned ndw;
2443 
2444 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2445 		while (count) {
2446 			ndw = 1 + count * 2;
2447 			if (ndw > 0x3FFF)
2448 				ndw = 0x3FFF;
2449 
2450 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2451 			ib->ptr[ib->length_dw++] = pe;
2452 			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2453 			for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2454 				if (flags & RADEON_VM_PAGE_SYSTEM) {
2455 					value = radeon_vm_map_gart(rdev, addr);
2456 					value &= 0xFFFFFFFFFFFFF000ULL;
2457 				} else if (flags & RADEON_VM_PAGE_VALID) {
2458 					value = addr;
2459 				} else {
2460 					value = 0;
2461 				}
2462 				addr += incr;
2463 				value |= r600_flags;
2464 				ib->ptr[ib->length_dw++] = value;
2465 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
2466 			}
2467 		}
2468 	} else {
2469 		cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
2470 	}
2471 }
2472 
2473 /**
2474  * cayman_vm_flush - vm flush using the CP
2475  *
2476  * @rdev: radeon_device pointer
2477  *
2478  * Update the page table base and flush the VM TLB
2479  * using the CP (cayman-si).
2480  */
2481 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2482 {
2483 	struct radeon_ring *ring = &rdev->ring[ridx];
2484 
2485 	if (vm == NULL)
2486 		return;
2487 
2488 	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2489 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2490 
2491 	/* flush hdp cache */
2492 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2493 	radeon_ring_write(ring, 0x1);
2494 
2495 	/* bits 0-7 are the VM contexts0-7 */
2496 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2497 	radeon_ring_write(ring, 1 << vm->id);
2498 
2499 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
2500 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2501 	radeon_ring_write(ring, 0x0);
2502 }
2503