xref: /openbmc/linux/drivers/gpu/drm/radeon/ni.c (revision f7777dcc)
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "nid.h"
32 #include "atom.h"
33 #include "ni_reg.h"
34 #include "cayman_blit_shaders.h"
35 #include "radeon_ucode.h"
36 #include "clearstate_cayman.h"
37 
38 static const u32 tn_rlc_save_restore_register_list[] =
39 {
40 	0x98fc,
41 	0x98f0,
42 	0x9834,
43 	0x9838,
44 	0x9870,
45 	0x9874,
46 	0x8a14,
47 	0x8b24,
48 	0x8bcc,
49 	0x8b10,
50 	0x8c30,
51 	0x8d00,
52 	0x8d04,
53 	0x8c00,
54 	0x8c04,
55 	0x8c10,
56 	0x8c14,
57 	0x8d8c,
58 	0x8cf0,
59 	0x8e38,
60 	0x9508,
61 	0x9688,
62 	0x9608,
63 	0x960c,
64 	0x9610,
65 	0x9614,
66 	0x88c4,
67 	0x8978,
68 	0x88d4,
69 	0x900c,
70 	0x9100,
71 	0x913c,
72 	0x90e8,
73 	0x9354,
74 	0xa008,
75 	0x98f8,
76 	0x9148,
77 	0x914c,
78 	0x3f94,
79 	0x98f4,
80 	0x9b7c,
81 	0x3f8c,
82 	0x8950,
83 	0x8954,
84 	0x8a18,
85 	0x8b28,
86 	0x9144,
87 	0x3f90,
88 	0x915c,
89 	0x9160,
90 	0x9178,
91 	0x917c,
92 	0x9180,
93 	0x918c,
94 	0x9190,
95 	0x9194,
96 	0x9198,
97 	0x919c,
98 	0x91a8,
99 	0x91ac,
100 	0x91b0,
101 	0x91b4,
102 	0x91b8,
103 	0x91c4,
104 	0x91c8,
105 	0x91cc,
106 	0x91d0,
107 	0x91d4,
108 	0x91e0,
109 	0x91e4,
110 	0x91ec,
111 	0x91f0,
112 	0x91f4,
113 	0x9200,
114 	0x9204,
115 	0x929c,
116 	0x8030,
117 	0x9150,
118 	0x9a60,
119 	0x920c,
120 	0x9210,
121 	0x9228,
122 	0x922c,
123 	0x9244,
124 	0x9248,
125 	0x91e8,
126 	0x9294,
127 	0x9208,
128 	0x9224,
129 	0x9240,
130 	0x9220,
131 	0x923c,
132 	0x9258,
133 	0x9744,
134 	0xa200,
135 	0xa204,
136 	0xa208,
137 	0xa20c,
138 	0x8d58,
139 	0x9030,
140 	0x9034,
141 	0x9038,
142 	0x903c,
143 	0x9040,
144 	0x9654,
145 	0x897c,
146 	0xa210,
147 	0xa214,
148 	0x9868,
149 	0xa02c,
150 	0x9664,
151 	0x9698,
152 	0x949c,
153 	0x8e10,
154 	0x8e18,
155 	0x8c50,
156 	0x8c58,
157 	0x8c60,
158 	0x8c68,
159 	0x89b4,
160 	0x9830,
161 	0x802c,
162 };
163 
164 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
165 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
166 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
167 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
168 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
169 extern void evergreen_mc_program(struct radeon_device *rdev);
170 extern void evergreen_irq_suspend(struct radeon_device *rdev);
171 extern int evergreen_mc_init(struct radeon_device *rdev);
172 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
173 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174 extern void evergreen_program_aspm(struct radeon_device *rdev);
175 extern void sumo_rlc_fini(struct radeon_device *rdev);
176 extern int sumo_rlc_init(struct radeon_device *rdev);
177 extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
178 				   struct radeon_ib *ib,
179 				   uint64_t pe,
180 				   uint64_t addr, unsigned count,
181 				   uint32_t incr, uint32_t flags);
182 
183 /* Firmware Names */
184 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
185 MODULE_FIRMWARE("radeon/BARTS_me.bin");
186 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
187 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
188 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
189 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
190 MODULE_FIRMWARE("radeon/TURKS_me.bin");
191 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
192 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
193 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
194 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
195 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
196 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
197 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
198 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
199 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
200 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
201 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
202 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
203 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
204 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
205 
206 
207 static const u32 cayman_golden_registers2[] =
208 {
209 	0x3e5c, 0xffffffff, 0x00000000,
210 	0x3e48, 0xffffffff, 0x00000000,
211 	0x3e4c, 0xffffffff, 0x00000000,
212 	0x3e64, 0xffffffff, 0x00000000,
213 	0x3e50, 0xffffffff, 0x00000000,
214 	0x3e60, 0xffffffff, 0x00000000
215 };
216 
217 static const u32 cayman_golden_registers[] =
218 {
219 	0x5eb4, 0xffffffff, 0x00000002,
220 	0x5e78, 0x8f311ff1, 0x001000f0,
221 	0x3f90, 0xffff0000, 0xff000000,
222 	0x9148, 0xffff0000, 0xff000000,
223 	0x3f94, 0xffff0000, 0xff000000,
224 	0x914c, 0xffff0000, 0xff000000,
225 	0xc78, 0x00000080, 0x00000080,
226 	0xbd4, 0x70073777, 0x00011003,
227 	0xd02c, 0xbfffff1f, 0x08421000,
228 	0xd0b8, 0x73773777, 0x02011003,
229 	0x5bc0, 0x00200000, 0x50100000,
230 	0x98f8, 0x33773777, 0x02011003,
231 	0x98fc, 0xffffffff, 0x76541032,
232 	0x7030, 0x31000311, 0x00000011,
233 	0x2f48, 0x33773777, 0x42010001,
234 	0x6b28, 0x00000010, 0x00000012,
235 	0x7728, 0x00000010, 0x00000012,
236 	0x10328, 0x00000010, 0x00000012,
237 	0x10f28, 0x00000010, 0x00000012,
238 	0x11b28, 0x00000010, 0x00000012,
239 	0x12728, 0x00000010, 0x00000012,
240 	0x240c, 0x000007ff, 0x00000000,
241 	0x8a14, 0xf000001f, 0x00000007,
242 	0x8b24, 0x3fff3fff, 0x00ff0fff,
243 	0x8b10, 0x0000ff0f, 0x00000000,
244 	0x28a4c, 0x07ffffff, 0x06000000,
245 	0x10c, 0x00000001, 0x00010003,
246 	0xa02c, 0xffffffff, 0x0000009b,
247 	0x913c, 0x0000010f, 0x01000100,
248 	0x8c04, 0xf8ff00ff, 0x40600060,
249 	0x28350, 0x00000f01, 0x00000000,
250 	0x9508, 0x3700001f, 0x00000002,
251 	0x960c, 0xffffffff, 0x54763210,
252 	0x88c4, 0x001f3ae3, 0x00000082,
253 	0x88d0, 0xffffffff, 0x0f40df40,
254 	0x88d4, 0x0000001f, 0x00000010,
255 	0x8974, 0xffffffff, 0x00000000
256 };
257 
258 static const u32 dvst_golden_registers2[] =
259 {
260 	0x8f8, 0xffffffff, 0,
261 	0x8fc, 0x00380000, 0,
262 	0x8f8, 0xffffffff, 1,
263 	0x8fc, 0x0e000000, 0
264 };
265 
266 static const u32 dvst_golden_registers[] =
267 {
268 	0x690, 0x3fff3fff, 0x20c00033,
269 	0x918c, 0x0fff0fff, 0x00010006,
270 	0x91a8, 0x0fff0fff, 0x00010006,
271 	0x9150, 0xffffdfff, 0x6e944040,
272 	0x917c, 0x0fff0fff, 0x00030002,
273 	0x9198, 0x0fff0fff, 0x00030002,
274 	0x915c, 0x0fff0fff, 0x00010000,
275 	0x3f90, 0xffff0001, 0xff000000,
276 	0x9178, 0x0fff0fff, 0x00070000,
277 	0x9194, 0x0fff0fff, 0x00070000,
278 	0x9148, 0xffff0001, 0xff000000,
279 	0x9190, 0x0fff0fff, 0x00090008,
280 	0x91ac, 0x0fff0fff, 0x00090008,
281 	0x3f94, 0xffff0000, 0xff000000,
282 	0x914c, 0xffff0000, 0xff000000,
283 	0x929c, 0x00000fff, 0x00000001,
284 	0x55e4, 0xff607fff, 0xfc000100,
285 	0x8a18, 0xff000fff, 0x00000100,
286 	0x8b28, 0xff000fff, 0x00000100,
287 	0x9144, 0xfffc0fff, 0x00000100,
288 	0x6ed8, 0x00010101, 0x00010000,
289 	0x9830, 0xffffffff, 0x00000000,
290 	0x9834, 0xf00fffff, 0x00000400,
291 	0x9838, 0xfffffffe, 0x00000000,
292 	0xd0c0, 0xff000fff, 0x00000100,
293 	0xd02c, 0xbfffff1f, 0x08421000,
294 	0xd0b8, 0x73773777, 0x12010001,
295 	0x5bb0, 0x000000f0, 0x00000070,
296 	0x98f8, 0x73773777, 0x12010001,
297 	0x98fc, 0xffffffff, 0x00000010,
298 	0x9b7c, 0x00ff0000, 0x00fc0000,
299 	0x8030, 0x00001f0f, 0x0000100a,
300 	0x2f48, 0x73773777, 0x12010001,
301 	0x2408, 0x00030000, 0x000c007f,
302 	0x8a14, 0xf000003f, 0x00000007,
303 	0x8b24, 0x3fff3fff, 0x00ff0fff,
304 	0x8b10, 0x0000ff0f, 0x00000000,
305 	0x28a4c, 0x07ffffff, 0x06000000,
306 	0x4d8, 0x00000fff, 0x00000100,
307 	0xa008, 0xffffffff, 0x00010000,
308 	0x913c, 0xffff03ff, 0x01000100,
309 	0x8c00, 0x000000ff, 0x00000003,
310 	0x8c04, 0xf8ff00ff, 0x40600060,
311 	0x8cf0, 0x1fff1fff, 0x08e00410,
312 	0x28350, 0x00000f01, 0x00000000,
313 	0x9508, 0xf700071f, 0x00000002,
314 	0x960c, 0xffffffff, 0x54763210,
315 	0x20ef8, 0x01ff01ff, 0x00000002,
316 	0x20e98, 0xfffffbff, 0x00200000,
317 	0x2015c, 0xffffffff, 0x00000f40,
318 	0x88c4, 0x001f3ae3, 0x00000082,
319 	0x8978, 0x3fffffff, 0x04050140,
320 	0x88d4, 0x0000001f, 0x00000010,
321 	0x8974, 0xffffffff, 0x00000000
322 };
323 
324 static const u32 scrapper_golden_registers[] =
325 {
326 	0x690, 0x3fff3fff, 0x20c00033,
327 	0x918c, 0x0fff0fff, 0x00010006,
328 	0x918c, 0x0fff0fff, 0x00010006,
329 	0x91a8, 0x0fff0fff, 0x00010006,
330 	0x91a8, 0x0fff0fff, 0x00010006,
331 	0x9150, 0xffffdfff, 0x6e944040,
332 	0x9150, 0xffffdfff, 0x6e944040,
333 	0x917c, 0x0fff0fff, 0x00030002,
334 	0x917c, 0x0fff0fff, 0x00030002,
335 	0x9198, 0x0fff0fff, 0x00030002,
336 	0x9198, 0x0fff0fff, 0x00030002,
337 	0x915c, 0x0fff0fff, 0x00010000,
338 	0x915c, 0x0fff0fff, 0x00010000,
339 	0x3f90, 0xffff0001, 0xff000000,
340 	0x3f90, 0xffff0001, 0xff000000,
341 	0x9178, 0x0fff0fff, 0x00070000,
342 	0x9178, 0x0fff0fff, 0x00070000,
343 	0x9194, 0x0fff0fff, 0x00070000,
344 	0x9194, 0x0fff0fff, 0x00070000,
345 	0x9148, 0xffff0001, 0xff000000,
346 	0x9148, 0xffff0001, 0xff000000,
347 	0x9190, 0x0fff0fff, 0x00090008,
348 	0x9190, 0x0fff0fff, 0x00090008,
349 	0x91ac, 0x0fff0fff, 0x00090008,
350 	0x91ac, 0x0fff0fff, 0x00090008,
351 	0x3f94, 0xffff0000, 0xff000000,
352 	0x3f94, 0xffff0000, 0xff000000,
353 	0x914c, 0xffff0000, 0xff000000,
354 	0x914c, 0xffff0000, 0xff000000,
355 	0x929c, 0x00000fff, 0x00000001,
356 	0x929c, 0x00000fff, 0x00000001,
357 	0x55e4, 0xff607fff, 0xfc000100,
358 	0x8a18, 0xff000fff, 0x00000100,
359 	0x8a18, 0xff000fff, 0x00000100,
360 	0x8b28, 0xff000fff, 0x00000100,
361 	0x8b28, 0xff000fff, 0x00000100,
362 	0x9144, 0xfffc0fff, 0x00000100,
363 	0x9144, 0xfffc0fff, 0x00000100,
364 	0x6ed8, 0x00010101, 0x00010000,
365 	0x9830, 0xffffffff, 0x00000000,
366 	0x9830, 0xffffffff, 0x00000000,
367 	0x9834, 0xf00fffff, 0x00000400,
368 	0x9834, 0xf00fffff, 0x00000400,
369 	0x9838, 0xfffffffe, 0x00000000,
370 	0x9838, 0xfffffffe, 0x00000000,
371 	0xd0c0, 0xff000fff, 0x00000100,
372 	0xd02c, 0xbfffff1f, 0x08421000,
373 	0xd02c, 0xbfffff1f, 0x08421000,
374 	0xd0b8, 0x73773777, 0x12010001,
375 	0xd0b8, 0x73773777, 0x12010001,
376 	0x5bb0, 0x000000f0, 0x00000070,
377 	0x98f8, 0x73773777, 0x12010001,
378 	0x98f8, 0x73773777, 0x12010001,
379 	0x98fc, 0xffffffff, 0x00000010,
380 	0x98fc, 0xffffffff, 0x00000010,
381 	0x9b7c, 0x00ff0000, 0x00fc0000,
382 	0x9b7c, 0x00ff0000, 0x00fc0000,
383 	0x8030, 0x00001f0f, 0x0000100a,
384 	0x8030, 0x00001f0f, 0x0000100a,
385 	0x2f48, 0x73773777, 0x12010001,
386 	0x2f48, 0x73773777, 0x12010001,
387 	0x2408, 0x00030000, 0x000c007f,
388 	0x8a14, 0xf000003f, 0x00000007,
389 	0x8a14, 0xf000003f, 0x00000007,
390 	0x8b24, 0x3fff3fff, 0x00ff0fff,
391 	0x8b24, 0x3fff3fff, 0x00ff0fff,
392 	0x8b10, 0x0000ff0f, 0x00000000,
393 	0x8b10, 0x0000ff0f, 0x00000000,
394 	0x28a4c, 0x07ffffff, 0x06000000,
395 	0x28a4c, 0x07ffffff, 0x06000000,
396 	0x4d8, 0x00000fff, 0x00000100,
397 	0x4d8, 0x00000fff, 0x00000100,
398 	0xa008, 0xffffffff, 0x00010000,
399 	0xa008, 0xffffffff, 0x00010000,
400 	0x913c, 0xffff03ff, 0x01000100,
401 	0x913c, 0xffff03ff, 0x01000100,
402 	0x90e8, 0x001fffff, 0x010400c0,
403 	0x8c00, 0x000000ff, 0x00000003,
404 	0x8c00, 0x000000ff, 0x00000003,
405 	0x8c04, 0xf8ff00ff, 0x40600060,
406 	0x8c04, 0xf8ff00ff, 0x40600060,
407 	0x8c30, 0x0000000f, 0x00040005,
408 	0x8cf0, 0x1fff1fff, 0x08e00410,
409 	0x8cf0, 0x1fff1fff, 0x08e00410,
410 	0x900c, 0x00ffffff, 0x0017071f,
411 	0x28350, 0x00000f01, 0x00000000,
412 	0x28350, 0x00000f01, 0x00000000,
413 	0x9508, 0xf700071f, 0x00000002,
414 	0x9508, 0xf700071f, 0x00000002,
415 	0x9688, 0x00300000, 0x0017000f,
416 	0x960c, 0xffffffff, 0x54763210,
417 	0x960c, 0xffffffff, 0x54763210,
418 	0x20ef8, 0x01ff01ff, 0x00000002,
419 	0x20e98, 0xfffffbff, 0x00200000,
420 	0x2015c, 0xffffffff, 0x00000f40,
421 	0x88c4, 0x001f3ae3, 0x00000082,
422 	0x88c4, 0x001f3ae3, 0x00000082,
423 	0x8978, 0x3fffffff, 0x04050140,
424 	0x8978, 0x3fffffff, 0x04050140,
425 	0x88d4, 0x0000001f, 0x00000010,
426 	0x88d4, 0x0000001f, 0x00000010,
427 	0x8974, 0xffffffff, 0x00000000,
428 	0x8974, 0xffffffff, 0x00000000
429 };
430 
431 static void ni_init_golden_registers(struct radeon_device *rdev)
432 {
433 	switch (rdev->family) {
434 	case CHIP_CAYMAN:
435 		radeon_program_register_sequence(rdev,
436 						 cayman_golden_registers,
437 						 (const u32)ARRAY_SIZE(cayman_golden_registers));
438 		radeon_program_register_sequence(rdev,
439 						 cayman_golden_registers2,
440 						 (const u32)ARRAY_SIZE(cayman_golden_registers2));
441 		break;
442 	case CHIP_ARUBA:
443 		if ((rdev->pdev->device == 0x9900) ||
444 		    (rdev->pdev->device == 0x9901) ||
445 		    (rdev->pdev->device == 0x9903) ||
446 		    (rdev->pdev->device == 0x9904) ||
447 		    (rdev->pdev->device == 0x9905) ||
448 		    (rdev->pdev->device == 0x9906) ||
449 		    (rdev->pdev->device == 0x9907) ||
450 		    (rdev->pdev->device == 0x9908) ||
451 		    (rdev->pdev->device == 0x9909) ||
452 		    (rdev->pdev->device == 0x990A) ||
453 		    (rdev->pdev->device == 0x990B) ||
454 		    (rdev->pdev->device == 0x990C) ||
455 		    (rdev->pdev->device == 0x990D) ||
456 		    (rdev->pdev->device == 0x990E) ||
457 		    (rdev->pdev->device == 0x990F) ||
458 		    (rdev->pdev->device == 0x9910) ||
459 		    (rdev->pdev->device == 0x9913) ||
460 		    (rdev->pdev->device == 0x9917) ||
461 		    (rdev->pdev->device == 0x9918)) {
462 			radeon_program_register_sequence(rdev,
463 							 dvst_golden_registers,
464 							 (const u32)ARRAY_SIZE(dvst_golden_registers));
465 			radeon_program_register_sequence(rdev,
466 							 dvst_golden_registers2,
467 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
468 		} else {
469 			radeon_program_register_sequence(rdev,
470 							 scrapper_golden_registers,
471 							 (const u32)ARRAY_SIZE(scrapper_golden_registers));
472 			radeon_program_register_sequence(rdev,
473 							 dvst_golden_registers2,
474 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
475 		}
476 		break;
477 	default:
478 		break;
479 	}
480 }
481 
482 #define BTC_IO_MC_REGS_SIZE 29
483 
484 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
485 	{0x00000077, 0xff010100},
486 	{0x00000078, 0x00000000},
487 	{0x00000079, 0x00001434},
488 	{0x0000007a, 0xcc08ec08},
489 	{0x0000007b, 0x00040000},
490 	{0x0000007c, 0x000080c0},
491 	{0x0000007d, 0x09000000},
492 	{0x0000007e, 0x00210404},
493 	{0x00000081, 0x08a8e800},
494 	{0x00000082, 0x00030444},
495 	{0x00000083, 0x00000000},
496 	{0x00000085, 0x00000001},
497 	{0x00000086, 0x00000002},
498 	{0x00000087, 0x48490000},
499 	{0x00000088, 0x20244647},
500 	{0x00000089, 0x00000005},
501 	{0x0000008b, 0x66030000},
502 	{0x0000008c, 0x00006603},
503 	{0x0000008d, 0x00000100},
504 	{0x0000008f, 0x00001c0a},
505 	{0x00000090, 0xff000001},
506 	{0x00000094, 0x00101101},
507 	{0x00000095, 0x00000fff},
508 	{0x00000096, 0x00116fff},
509 	{0x00000097, 0x60010000},
510 	{0x00000098, 0x10010000},
511 	{0x00000099, 0x00006000},
512 	{0x0000009a, 0x00001000},
513 	{0x0000009f, 0x00946a00}
514 };
515 
516 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
517 	{0x00000077, 0xff010100},
518 	{0x00000078, 0x00000000},
519 	{0x00000079, 0x00001434},
520 	{0x0000007a, 0xcc08ec08},
521 	{0x0000007b, 0x00040000},
522 	{0x0000007c, 0x000080c0},
523 	{0x0000007d, 0x09000000},
524 	{0x0000007e, 0x00210404},
525 	{0x00000081, 0x08a8e800},
526 	{0x00000082, 0x00030444},
527 	{0x00000083, 0x00000000},
528 	{0x00000085, 0x00000001},
529 	{0x00000086, 0x00000002},
530 	{0x00000087, 0x48490000},
531 	{0x00000088, 0x20244647},
532 	{0x00000089, 0x00000005},
533 	{0x0000008b, 0x66030000},
534 	{0x0000008c, 0x00006603},
535 	{0x0000008d, 0x00000100},
536 	{0x0000008f, 0x00001c0a},
537 	{0x00000090, 0xff000001},
538 	{0x00000094, 0x00101101},
539 	{0x00000095, 0x00000fff},
540 	{0x00000096, 0x00116fff},
541 	{0x00000097, 0x60010000},
542 	{0x00000098, 0x10010000},
543 	{0x00000099, 0x00006000},
544 	{0x0000009a, 0x00001000},
545 	{0x0000009f, 0x00936a00}
546 };
547 
548 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
549 	{0x00000077, 0xff010100},
550 	{0x00000078, 0x00000000},
551 	{0x00000079, 0x00001434},
552 	{0x0000007a, 0xcc08ec08},
553 	{0x0000007b, 0x00040000},
554 	{0x0000007c, 0x000080c0},
555 	{0x0000007d, 0x09000000},
556 	{0x0000007e, 0x00210404},
557 	{0x00000081, 0x08a8e800},
558 	{0x00000082, 0x00030444},
559 	{0x00000083, 0x00000000},
560 	{0x00000085, 0x00000001},
561 	{0x00000086, 0x00000002},
562 	{0x00000087, 0x48490000},
563 	{0x00000088, 0x20244647},
564 	{0x00000089, 0x00000005},
565 	{0x0000008b, 0x66030000},
566 	{0x0000008c, 0x00006603},
567 	{0x0000008d, 0x00000100},
568 	{0x0000008f, 0x00001c0a},
569 	{0x00000090, 0xff000001},
570 	{0x00000094, 0x00101101},
571 	{0x00000095, 0x00000fff},
572 	{0x00000096, 0x00116fff},
573 	{0x00000097, 0x60010000},
574 	{0x00000098, 0x10010000},
575 	{0x00000099, 0x00006000},
576 	{0x0000009a, 0x00001000},
577 	{0x0000009f, 0x00916a00}
578 };
579 
580 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
581 	{0x00000077, 0xff010100},
582 	{0x00000078, 0x00000000},
583 	{0x00000079, 0x00001434},
584 	{0x0000007a, 0xcc08ec08},
585 	{0x0000007b, 0x00040000},
586 	{0x0000007c, 0x000080c0},
587 	{0x0000007d, 0x09000000},
588 	{0x0000007e, 0x00210404},
589 	{0x00000081, 0x08a8e800},
590 	{0x00000082, 0x00030444},
591 	{0x00000083, 0x00000000},
592 	{0x00000085, 0x00000001},
593 	{0x00000086, 0x00000002},
594 	{0x00000087, 0x48490000},
595 	{0x00000088, 0x20244647},
596 	{0x00000089, 0x00000005},
597 	{0x0000008b, 0x66030000},
598 	{0x0000008c, 0x00006603},
599 	{0x0000008d, 0x00000100},
600 	{0x0000008f, 0x00001c0a},
601 	{0x00000090, 0xff000001},
602 	{0x00000094, 0x00101101},
603 	{0x00000095, 0x00000fff},
604 	{0x00000096, 0x00116fff},
605 	{0x00000097, 0x60010000},
606 	{0x00000098, 0x10010000},
607 	{0x00000099, 0x00006000},
608 	{0x0000009a, 0x00001000},
609 	{0x0000009f, 0x00976b00}
610 };
611 
612 int ni_mc_load_microcode(struct radeon_device *rdev)
613 {
614 	const __be32 *fw_data;
615 	u32 mem_type, running, blackout = 0;
616 	u32 *io_mc_regs;
617 	int i, ucode_size, regs_size;
618 
619 	if (!rdev->mc_fw)
620 		return -EINVAL;
621 
622 	switch (rdev->family) {
623 	case CHIP_BARTS:
624 		io_mc_regs = (u32 *)&barts_io_mc_regs;
625 		ucode_size = BTC_MC_UCODE_SIZE;
626 		regs_size = BTC_IO_MC_REGS_SIZE;
627 		break;
628 	case CHIP_TURKS:
629 		io_mc_regs = (u32 *)&turks_io_mc_regs;
630 		ucode_size = BTC_MC_UCODE_SIZE;
631 		regs_size = BTC_IO_MC_REGS_SIZE;
632 		break;
633 	case CHIP_CAICOS:
634 	default:
635 		io_mc_regs = (u32 *)&caicos_io_mc_regs;
636 		ucode_size = BTC_MC_UCODE_SIZE;
637 		regs_size = BTC_IO_MC_REGS_SIZE;
638 		break;
639 	case CHIP_CAYMAN:
640 		io_mc_regs = (u32 *)&cayman_io_mc_regs;
641 		ucode_size = CAYMAN_MC_UCODE_SIZE;
642 		regs_size = BTC_IO_MC_REGS_SIZE;
643 		break;
644 	}
645 
646 	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
647 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
648 
649 	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
650 		if (running) {
651 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
652 			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
653 		}
654 
655 		/* reset the engine and set to writable */
656 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
657 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
658 
659 		/* load mc io regs */
660 		for (i = 0; i < regs_size; i++) {
661 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
662 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
663 		}
664 		/* load the MC ucode */
665 		fw_data = (const __be32 *)rdev->mc_fw->data;
666 		for (i = 0; i < ucode_size; i++)
667 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
668 
669 		/* put the engine back into the active state */
670 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
671 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
672 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
673 
674 		/* wait for training to complete */
675 		for (i = 0; i < rdev->usec_timeout; i++) {
676 			if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
677 				break;
678 			udelay(1);
679 		}
680 
681 		if (running)
682 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
683 	}
684 
685 	return 0;
686 }
687 
688 int ni_init_microcode(struct radeon_device *rdev)
689 {
690 	const char *chip_name;
691 	const char *rlc_chip_name;
692 	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
693 	size_t smc_req_size = 0;
694 	char fw_name[30];
695 	int err;
696 
697 	DRM_DEBUG("\n");
698 
699 	switch (rdev->family) {
700 	case CHIP_BARTS:
701 		chip_name = "BARTS";
702 		rlc_chip_name = "BTC";
703 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
704 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
705 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
706 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
707 		smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
708 		break;
709 	case CHIP_TURKS:
710 		chip_name = "TURKS";
711 		rlc_chip_name = "BTC";
712 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
713 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
714 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
715 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
716 		smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
717 		break;
718 	case CHIP_CAICOS:
719 		chip_name = "CAICOS";
720 		rlc_chip_name = "BTC";
721 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
722 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
723 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
724 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
725 		smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
726 		break;
727 	case CHIP_CAYMAN:
728 		chip_name = "CAYMAN";
729 		rlc_chip_name = "CAYMAN";
730 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
731 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
732 		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
733 		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
734 		smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
735 		break;
736 	case CHIP_ARUBA:
737 		chip_name = "ARUBA";
738 		rlc_chip_name = "ARUBA";
739 		/* pfp/me same size as CAYMAN */
740 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
741 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
742 		rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
743 		mc_req_size = 0;
744 		break;
745 	default: BUG();
746 	}
747 
748 	DRM_INFO("Loading %s Microcode\n", chip_name);
749 
750 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
751 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
752 	if (err)
753 		goto out;
754 	if (rdev->pfp_fw->size != pfp_req_size) {
755 		printk(KERN_ERR
756 		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
757 		       rdev->pfp_fw->size, fw_name);
758 		err = -EINVAL;
759 		goto out;
760 	}
761 
762 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
763 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
764 	if (err)
765 		goto out;
766 	if (rdev->me_fw->size != me_req_size) {
767 		printk(KERN_ERR
768 		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
769 		       rdev->me_fw->size, fw_name);
770 		err = -EINVAL;
771 	}
772 
773 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
774 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
775 	if (err)
776 		goto out;
777 	if (rdev->rlc_fw->size != rlc_req_size) {
778 		printk(KERN_ERR
779 		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
780 		       rdev->rlc_fw->size, fw_name);
781 		err = -EINVAL;
782 	}
783 
784 	/* no MC ucode on TN */
785 	if (!(rdev->flags & RADEON_IS_IGP)) {
786 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
787 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
788 		if (err)
789 			goto out;
790 		if (rdev->mc_fw->size != mc_req_size) {
791 			printk(KERN_ERR
792 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
793 			       rdev->mc_fw->size, fw_name);
794 			err = -EINVAL;
795 		}
796 	}
797 
798 	if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
799 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
800 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
801 		if (err) {
802 			printk(KERN_ERR
803 			       "smc: error loading firmware \"%s\"\n",
804 			       fw_name);
805 			release_firmware(rdev->smc_fw);
806 			rdev->smc_fw = NULL;
807 			err = 0;
808 		} else if (rdev->smc_fw->size != smc_req_size) {
809 			printk(KERN_ERR
810 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
811 			       rdev->mc_fw->size, fw_name);
812 			err = -EINVAL;
813 		}
814 	}
815 
816 out:
817 	if (err) {
818 		if (err != -EINVAL)
819 			printk(KERN_ERR
820 			       "ni_cp: Failed to load firmware \"%s\"\n",
821 			       fw_name);
822 		release_firmware(rdev->pfp_fw);
823 		rdev->pfp_fw = NULL;
824 		release_firmware(rdev->me_fw);
825 		rdev->me_fw = NULL;
826 		release_firmware(rdev->rlc_fw);
827 		rdev->rlc_fw = NULL;
828 		release_firmware(rdev->mc_fw);
829 		rdev->mc_fw = NULL;
830 	}
831 	return err;
832 }
833 
834 int tn_get_temp(struct radeon_device *rdev)
835 {
836 	u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
837 	int actual_temp = (temp / 8) - 49;
838 
839 	return actual_temp * 1000;
840 }
841 
842 /*
843  * Core functions
844  */
845 static void cayman_gpu_init(struct radeon_device *rdev)
846 {
847 	u32 gb_addr_config = 0;
848 	u32 mc_shared_chmap, mc_arb_ramcfg;
849 	u32 cgts_tcc_disable;
850 	u32 sx_debug_1;
851 	u32 smx_dc_ctl0;
852 	u32 cgts_sm_ctrl_reg;
853 	u32 hdp_host_path_cntl;
854 	u32 tmp;
855 	u32 disabled_rb_mask;
856 	int i, j;
857 
858 	switch (rdev->family) {
859 	case CHIP_CAYMAN:
860 		rdev->config.cayman.max_shader_engines = 2;
861 		rdev->config.cayman.max_pipes_per_simd = 4;
862 		rdev->config.cayman.max_tile_pipes = 8;
863 		rdev->config.cayman.max_simds_per_se = 12;
864 		rdev->config.cayman.max_backends_per_se = 4;
865 		rdev->config.cayman.max_texture_channel_caches = 8;
866 		rdev->config.cayman.max_gprs = 256;
867 		rdev->config.cayman.max_threads = 256;
868 		rdev->config.cayman.max_gs_threads = 32;
869 		rdev->config.cayman.max_stack_entries = 512;
870 		rdev->config.cayman.sx_num_of_sets = 8;
871 		rdev->config.cayman.sx_max_export_size = 256;
872 		rdev->config.cayman.sx_max_export_pos_size = 64;
873 		rdev->config.cayman.sx_max_export_smx_size = 192;
874 		rdev->config.cayman.max_hw_contexts = 8;
875 		rdev->config.cayman.sq_num_cf_insts = 2;
876 
877 		rdev->config.cayman.sc_prim_fifo_size = 0x100;
878 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
879 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
880 		gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
881 		break;
882 	case CHIP_ARUBA:
883 	default:
884 		rdev->config.cayman.max_shader_engines = 1;
885 		rdev->config.cayman.max_pipes_per_simd = 4;
886 		rdev->config.cayman.max_tile_pipes = 2;
887 		if ((rdev->pdev->device == 0x9900) ||
888 		    (rdev->pdev->device == 0x9901) ||
889 		    (rdev->pdev->device == 0x9905) ||
890 		    (rdev->pdev->device == 0x9906) ||
891 		    (rdev->pdev->device == 0x9907) ||
892 		    (rdev->pdev->device == 0x9908) ||
893 		    (rdev->pdev->device == 0x9909) ||
894 		    (rdev->pdev->device == 0x990B) ||
895 		    (rdev->pdev->device == 0x990C) ||
896 		    (rdev->pdev->device == 0x990F) ||
897 		    (rdev->pdev->device == 0x9910) ||
898 		    (rdev->pdev->device == 0x9917) ||
899 		    (rdev->pdev->device == 0x9999) ||
900 		    (rdev->pdev->device == 0x999C)) {
901 			rdev->config.cayman.max_simds_per_se = 6;
902 			rdev->config.cayman.max_backends_per_se = 2;
903 		} else if ((rdev->pdev->device == 0x9903) ||
904 			   (rdev->pdev->device == 0x9904) ||
905 			   (rdev->pdev->device == 0x990A) ||
906 			   (rdev->pdev->device == 0x990D) ||
907 			   (rdev->pdev->device == 0x990E) ||
908 			   (rdev->pdev->device == 0x9913) ||
909 			   (rdev->pdev->device == 0x9918) ||
910 			   (rdev->pdev->device == 0x999D)) {
911 			rdev->config.cayman.max_simds_per_se = 4;
912 			rdev->config.cayman.max_backends_per_se = 2;
913 		} else if ((rdev->pdev->device == 0x9919) ||
914 			   (rdev->pdev->device == 0x9990) ||
915 			   (rdev->pdev->device == 0x9991) ||
916 			   (rdev->pdev->device == 0x9994) ||
917 			   (rdev->pdev->device == 0x9995) ||
918 			   (rdev->pdev->device == 0x9996) ||
919 			   (rdev->pdev->device == 0x999A) ||
920 			   (rdev->pdev->device == 0x99A0)) {
921 			rdev->config.cayman.max_simds_per_se = 3;
922 			rdev->config.cayman.max_backends_per_se = 1;
923 		} else {
924 			rdev->config.cayman.max_simds_per_se = 2;
925 			rdev->config.cayman.max_backends_per_se = 1;
926 		}
927 		rdev->config.cayman.max_texture_channel_caches = 2;
928 		rdev->config.cayman.max_gprs = 256;
929 		rdev->config.cayman.max_threads = 256;
930 		rdev->config.cayman.max_gs_threads = 32;
931 		rdev->config.cayman.max_stack_entries = 512;
932 		rdev->config.cayman.sx_num_of_sets = 8;
933 		rdev->config.cayman.sx_max_export_size = 256;
934 		rdev->config.cayman.sx_max_export_pos_size = 64;
935 		rdev->config.cayman.sx_max_export_smx_size = 192;
936 		rdev->config.cayman.max_hw_contexts = 8;
937 		rdev->config.cayman.sq_num_cf_insts = 2;
938 
939 		rdev->config.cayman.sc_prim_fifo_size = 0x40;
940 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
941 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
942 		gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
943 		break;
944 	}
945 
946 	/* Initialize HDP */
947 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
948 		WREG32((0x2c14 + j), 0x00000000);
949 		WREG32((0x2c18 + j), 0x00000000);
950 		WREG32((0x2c1c + j), 0x00000000);
951 		WREG32((0x2c20 + j), 0x00000000);
952 		WREG32((0x2c24 + j), 0x00000000);
953 	}
954 
955 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
956 
957 	evergreen_fix_pci_max_read_req_size(rdev);
958 
959 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
960 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
961 
962 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
963 	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
964 	if (rdev->config.cayman.mem_row_size_in_kb > 4)
965 		rdev->config.cayman.mem_row_size_in_kb = 4;
966 	/* XXX use MC settings? */
967 	rdev->config.cayman.shader_engine_tile_size = 32;
968 	rdev->config.cayman.num_gpus = 1;
969 	rdev->config.cayman.multi_gpu_tile_size = 64;
970 
971 	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
972 	rdev->config.cayman.num_tile_pipes = (1 << tmp);
973 	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
974 	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
975 	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
976 	rdev->config.cayman.num_shader_engines = tmp + 1;
977 	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
978 	rdev->config.cayman.num_gpus = tmp + 1;
979 	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
980 	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
981 	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
982 	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
983 
984 
985 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
986 	 * not have bank info, so create a custom tiling dword.
987 	 * bits 3:0   num_pipes
988 	 * bits 7:4   num_banks
989 	 * bits 11:8  group_size
990 	 * bits 15:12 row_size
991 	 */
992 	rdev->config.cayman.tile_config = 0;
993 	switch (rdev->config.cayman.num_tile_pipes) {
994 	case 1:
995 	default:
996 		rdev->config.cayman.tile_config |= (0 << 0);
997 		break;
998 	case 2:
999 		rdev->config.cayman.tile_config |= (1 << 0);
1000 		break;
1001 	case 4:
1002 		rdev->config.cayman.tile_config |= (2 << 0);
1003 		break;
1004 	case 8:
1005 		rdev->config.cayman.tile_config |= (3 << 0);
1006 		break;
1007 	}
1008 
1009 	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1010 	if (rdev->flags & RADEON_IS_IGP)
1011 		rdev->config.cayman.tile_config |= 1 << 4;
1012 	else {
1013 		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1014 		case 0: /* four banks */
1015 			rdev->config.cayman.tile_config |= 0 << 4;
1016 			break;
1017 		case 1: /* eight banks */
1018 			rdev->config.cayman.tile_config |= 1 << 4;
1019 			break;
1020 		case 2: /* sixteen banks */
1021 		default:
1022 			rdev->config.cayman.tile_config |= 2 << 4;
1023 			break;
1024 		}
1025 	}
1026 	rdev->config.cayman.tile_config |=
1027 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1028 	rdev->config.cayman.tile_config |=
1029 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1030 
1031 	tmp = 0;
1032 	for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1033 		u32 rb_disable_bitmap;
1034 
1035 		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1036 		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1037 		rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1038 		tmp <<= 4;
1039 		tmp |= rb_disable_bitmap;
1040 	}
1041 	/* enabled rb are just the one not disabled :) */
1042 	disabled_rb_mask = tmp;
1043 	tmp = 0;
1044 	for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1045 		tmp |= (1 << i);
1046 	/* if all the backends are disabled, fix it up here */
1047 	if ((disabled_rb_mask & tmp) == tmp) {
1048 		for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1049 			disabled_rb_mask &= ~(1 << i);
1050 	}
1051 
1052 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1053 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1054 
1055 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1056 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1057 	if (ASIC_IS_DCE6(rdev))
1058 		WREG32(DMIF_ADDR_CALC, gb_addr_config);
1059 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1060 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1061 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1062 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1063 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1064 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1065 
1066 	if ((rdev->config.cayman.max_backends_per_se == 1) &&
1067 	    (rdev->flags & RADEON_IS_IGP)) {
1068 		if ((disabled_rb_mask & 3) == 1) {
1069 			/* RB0 disabled, RB1 enabled */
1070 			tmp = 0x11111111;
1071 		} else {
1072 			/* RB1 disabled, RB0 enabled */
1073 			tmp = 0x00000000;
1074 		}
1075 	} else {
1076 		tmp = gb_addr_config & NUM_PIPES_MASK;
1077 		tmp = r6xx_remap_render_backend(rdev, tmp,
1078 						rdev->config.cayman.max_backends_per_se *
1079 						rdev->config.cayman.max_shader_engines,
1080 						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1081 	}
1082 	WREG32(GB_BACKEND_MAP, tmp);
1083 
1084 	cgts_tcc_disable = 0xffff0000;
1085 	for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1086 		cgts_tcc_disable &= ~(1 << (16 + i));
1087 	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1088 	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1089 	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1090 	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1091 
1092 	/* reprogram the shader complex */
1093 	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1094 	for (i = 0; i < 16; i++)
1095 		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1096 	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1097 
1098 	/* set HW defaults for 3D engine */
1099 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1100 
1101 	sx_debug_1 = RREG32(SX_DEBUG_1);
1102 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1103 	WREG32(SX_DEBUG_1, sx_debug_1);
1104 
1105 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1106 	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1107 	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1108 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1109 
1110 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1111 
1112 	/* need to be explicitly zero-ed */
1113 	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1114 	WREG32(SQ_LSTMP_RING_BASE, 0);
1115 	WREG32(SQ_HSTMP_RING_BASE, 0);
1116 	WREG32(SQ_ESTMP_RING_BASE, 0);
1117 	WREG32(SQ_GSTMP_RING_BASE, 0);
1118 	WREG32(SQ_VSTMP_RING_BASE, 0);
1119 	WREG32(SQ_PSTMP_RING_BASE, 0);
1120 
1121 	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1122 
1123 	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1124 					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1125 					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1126 
1127 	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1128 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1129 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1130 
1131 
1132 	WREG32(VGT_NUM_INSTANCES, 1);
1133 
1134 	WREG32(CP_PERFMON_CNTL, 0);
1135 
1136 	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1137 				  FETCH_FIFO_HIWATER(0x4) |
1138 				  DONE_FIFO_HIWATER(0xe0) |
1139 				  ALU_UPDATE_FIFO_HIWATER(0x8)));
1140 
1141 	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1142 	WREG32(SQ_CONFIG, (VC_ENABLE |
1143 			   EXPORT_SRC_C |
1144 			   GFX_PRIO(0) |
1145 			   CS1_PRIO(0) |
1146 			   CS2_PRIO(1)));
1147 	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1148 
1149 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1150 					  FORCE_EOV_MAX_REZ_CNT(255)));
1151 
1152 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1153 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1154 
1155 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1156 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1157 
1158 	WREG32(CB_PERF_CTR0_SEL_0, 0);
1159 	WREG32(CB_PERF_CTR0_SEL_1, 0);
1160 	WREG32(CB_PERF_CTR1_SEL_0, 0);
1161 	WREG32(CB_PERF_CTR1_SEL_1, 0);
1162 	WREG32(CB_PERF_CTR2_SEL_0, 0);
1163 	WREG32(CB_PERF_CTR2_SEL_1, 0);
1164 	WREG32(CB_PERF_CTR3_SEL_0, 0);
1165 	WREG32(CB_PERF_CTR3_SEL_1, 0);
1166 
1167 	tmp = RREG32(HDP_MISC_CNTL);
1168 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1169 	WREG32(HDP_MISC_CNTL, tmp);
1170 
1171 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1172 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1173 
1174 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1175 
1176 	udelay(50);
1177 
1178 	/* set clockgating golden values on TN */
1179 	if (rdev->family == CHIP_ARUBA) {
1180 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1181 		tmp &= ~0x00380000;
1182 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1183                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1184 		tmp &= ~0x0e000000;
1185 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1186 	}
1187 }
1188 
1189 /*
1190  * GART
1191  */
1192 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1193 {
1194 	/* flush hdp cache */
1195 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1196 
1197 	/* bits 0-7 are the VM contexts0-7 */
1198 	WREG32(VM_INVALIDATE_REQUEST, 1);
1199 }
1200 
1201 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1202 {
1203 	int i, r;
1204 
1205 	if (rdev->gart.robj == NULL) {
1206 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1207 		return -EINVAL;
1208 	}
1209 	r = radeon_gart_table_vram_pin(rdev);
1210 	if (r)
1211 		return r;
1212 	radeon_gart_restore(rdev);
1213 	/* Setup TLB control */
1214 	WREG32(MC_VM_MX_L1_TLB_CNTL,
1215 	       (0xA << 7) |
1216 	       ENABLE_L1_TLB |
1217 	       ENABLE_L1_FRAGMENT_PROCESSING |
1218 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1219 	       ENABLE_ADVANCED_DRIVER_MODEL |
1220 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1221 	/* Setup L2 cache */
1222 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1223 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1224 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1225 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1226 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1227 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1228 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1229 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1230 	/* setup context0 */
1231 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1232 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1233 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1234 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1235 			(u32)(rdev->dummy_page.addr >> 12));
1236 	WREG32(VM_CONTEXT0_CNTL2, 0);
1237 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1238 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1239 
1240 	WREG32(0x15D4, 0);
1241 	WREG32(0x15D8, 0);
1242 	WREG32(0x15DC, 0);
1243 
1244 	/* empty context1-7 */
1245 	/* Assign the pt base to something valid for now; the pts used for
1246 	 * the VMs are determined by the application and setup and assigned
1247 	 * on the fly in the vm part of radeon_gart.c
1248 	 */
1249 	for (i = 1; i < 8; i++) {
1250 		WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1251 		WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1252 		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1253 			rdev->gart.table_addr >> 12);
1254 	}
1255 
1256 	/* enable context1-7 */
1257 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1258 	       (u32)(rdev->dummy_page.addr >> 12));
1259 	WREG32(VM_CONTEXT1_CNTL2, 4);
1260 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1261 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1262 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1263 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1264 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1265 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1266 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1267 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1268 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1269 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1270 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1271 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1272 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1273 
1274 	cayman_pcie_gart_tlb_flush(rdev);
1275 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1276 		 (unsigned)(rdev->mc.gtt_size >> 20),
1277 		 (unsigned long long)rdev->gart.table_addr);
1278 	rdev->gart.ready = true;
1279 	return 0;
1280 }
1281 
1282 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1283 {
1284 	/* Disable all tables */
1285 	WREG32(VM_CONTEXT0_CNTL, 0);
1286 	WREG32(VM_CONTEXT1_CNTL, 0);
1287 	/* Setup TLB control */
1288 	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1289 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1290 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1291 	/* Setup L2 cache */
1292 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1293 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1294 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1295 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1296 	WREG32(VM_L2_CNTL2, 0);
1297 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1298 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1299 	radeon_gart_table_vram_unpin(rdev);
1300 }
1301 
1302 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1303 {
1304 	cayman_pcie_gart_disable(rdev);
1305 	radeon_gart_table_vram_free(rdev);
1306 	radeon_gart_fini(rdev);
1307 }
1308 
1309 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1310 			      int ring, u32 cp_int_cntl)
1311 {
1312 	u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1313 
1314 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1315 	WREG32(CP_INT_CNTL, cp_int_cntl);
1316 }
1317 
1318 /*
1319  * CP.
1320  */
1321 void cayman_fence_ring_emit(struct radeon_device *rdev,
1322 			    struct radeon_fence *fence)
1323 {
1324 	struct radeon_ring *ring = &rdev->ring[fence->ring];
1325 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1326 
1327 	/* flush read cache over gart for this vmid */
1328 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1329 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1330 	radeon_ring_write(ring, 0);
1331 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1332 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1333 	radeon_ring_write(ring, 0xFFFFFFFF);
1334 	radeon_ring_write(ring, 0);
1335 	radeon_ring_write(ring, 10); /* poll interval */
1336 	/* EVENT_WRITE_EOP - flush caches, send int */
1337 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1338 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1339 	radeon_ring_write(ring, addr & 0xffffffff);
1340 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1341 	radeon_ring_write(ring, fence->seq);
1342 	radeon_ring_write(ring, 0);
1343 }
1344 
1345 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1346 {
1347 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1348 
1349 	/* set to DX10/11 mode */
1350 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1351 	radeon_ring_write(ring, 1);
1352 
1353 	if (ring->rptr_save_reg) {
1354 		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1355 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1356 		radeon_ring_write(ring, ((ring->rptr_save_reg -
1357 					  PACKET3_SET_CONFIG_REG_START) >> 2));
1358 		radeon_ring_write(ring, next_rptr);
1359 	}
1360 
1361 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1362 	radeon_ring_write(ring,
1363 #ifdef __BIG_ENDIAN
1364 			  (2 << 0) |
1365 #endif
1366 			  (ib->gpu_addr & 0xFFFFFFFC));
1367 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1368 	radeon_ring_write(ring, ib->length_dw |
1369 			  (ib->vm ? (ib->vm->id << 24) : 0));
1370 
1371 	/* flush read cache over gart for this vmid */
1372 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1373 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1374 	radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1375 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1376 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1377 	radeon_ring_write(ring, 0xFFFFFFFF);
1378 	radeon_ring_write(ring, 0);
1379 	radeon_ring_write(ring, 10); /* poll interval */
1380 }
1381 
1382 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1383 {
1384 	if (enable)
1385 		WREG32(CP_ME_CNTL, 0);
1386 	else {
1387 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1388 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1389 		WREG32(SCRATCH_UMSK, 0);
1390 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1391 	}
1392 }
1393 
1394 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1395 {
1396 	const __be32 *fw_data;
1397 	int i;
1398 
1399 	if (!rdev->me_fw || !rdev->pfp_fw)
1400 		return -EINVAL;
1401 
1402 	cayman_cp_enable(rdev, false);
1403 
1404 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1405 	WREG32(CP_PFP_UCODE_ADDR, 0);
1406 	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1407 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1408 	WREG32(CP_PFP_UCODE_ADDR, 0);
1409 
1410 	fw_data = (const __be32 *)rdev->me_fw->data;
1411 	WREG32(CP_ME_RAM_WADDR, 0);
1412 	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1413 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1414 
1415 	WREG32(CP_PFP_UCODE_ADDR, 0);
1416 	WREG32(CP_ME_RAM_WADDR, 0);
1417 	WREG32(CP_ME_RAM_RADDR, 0);
1418 	return 0;
1419 }
1420 
1421 static int cayman_cp_start(struct radeon_device *rdev)
1422 {
1423 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1424 	int r, i;
1425 
1426 	r = radeon_ring_lock(rdev, ring, 7);
1427 	if (r) {
1428 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1429 		return r;
1430 	}
1431 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1432 	radeon_ring_write(ring, 0x1);
1433 	radeon_ring_write(ring, 0x0);
1434 	radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1435 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1436 	radeon_ring_write(ring, 0);
1437 	radeon_ring_write(ring, 0);
1438 	radeon_ring_unlock_commit(rdev, ring);
1439 
1440 	cayman_cp_enable(rdev, true);
1441 
1442 	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1443 	if (r) {
1444 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1445 		return r;
1446 	}
1447 
1448 	/* setup clear context state */
1449 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1450 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1451 
1452 	for (i = 0; i < cayman_default_size; i++)
1453 		radeon_ring_write(ring, cayman_default_state[i]);
1454 
1455 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1456 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1457 
1458 	/* set clear context state */
1459 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1460 	radeon_ring_write(ring, 0);
1461 
1462 	/* SQ_VTX_BASE_VTX_LOC */
1463 	radeon_ring_write(ring, 0xc0026f00);
1464 	radeon_ring_write(ring, 0x00000000);
1465 	radeon_ring_write(ring, 0x00000000);
1466 	radeon_ring_write(ring, 0x00000000);
1467 
1468 	/* Clear consts */
1469 	radeon_ring_write(ring, 0xc0036f00);
1470 	radeon_ring_write(ring, 0x00000bc4);
1471 	radeon_ring_write(ring, 0xffffffff);
1472 	radeon_ring_write(ring, 0xffffffff);
1473 	radeon_ring_write(ring, 0xffffffff);
1474 
1475 	radeon_ring_write(ring, 0xc0026900);
1476 	radeon_ring_write(ring, 0x00000316);
1477 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1478 	radeon_ring_write(ring, 0x00000010); /*  */
1479 
1480 	radeon_ring_unlock_commit(rdev, ring);
1481 
1482 	/* XXX init other rings */
1483 
1484 	return 0;
1485 }
1486 
1487 static void cayman_cp_fini(struct radeon_device *rdev)
1488 {
1489 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1490 	cayman_cp_enable(rdev, false);
1491 	radeon_ring_fini(rdev, ring);
1492 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1493 }
1494 
1495 static int cayman_cp_resume(struct radeon_device *rdev)
1496 {
1497 	static const int ridx[] = {
1498 		RADEON_RING_TYPE_GFX_INDEX,
1499 		CAYMAN_RING_TYPE_CP1_INDEX,
1500 		CAYMAN_RING_TYPE_CP2_INDEX
1501 	};
1502 	static const unsigned cp_rb_cntl[] = {
1503 		CP_RB0_CNTL,
1504 		CP_RB1_CNTL,
1505 		CP_RB2_CNTL,
1506 	};
1507 	static const unsigned cp_rb_rptr_addr[] = {
1508 		CP_RB0_RPTR_ADDR,
1509 		CP_RB1_RPTR_ADDR,
1510 		CP_RB2_RPTR_ADDR
1511 	};
1512 	static const unsigned cp_rb_rptr_addr_hi[] = {
1513 		CP_RB0_RPTR_ADDR_HI,
1514 		CP_RB1_RPTR_ADDR_HI,
1515 		CP_RB2_RPTR_ADDR_HI
1516 	};
1517 	static const unsigned cp_rb_base[] = {
1518 		CP_RB0_BASE,
1519 		CP_RB1_BASE,
1520 		CP_RB2_BASE
1521 	};
1522 	struct radeon_ring *ring;
1523 	int i, r;
1524 
1525 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1526 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1527 				 SOFT_RESET_PA |
1528 				 SOFT_RESET_SH |
1529 				 SOFT_RESET_VGT |
1530 				 SOFT_RESET_SPI |
1531 				 SOFT_RESET_SX));
1532 	RREG32(GRBM_SOFT_RESET);
1533 	mdelay(15);
1534 	WREG32(GRBM_SOFT_RESET, 0);
1535 	RREG32(GRBM_SOFT_RESET);
1536 
1537 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1538 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1539 
1540 	/* Set the write pointer delay */
1541 	WREG32(CP_RB_WPTR_DELAY, 0);
1542 
1543 	WREG32(CP_DEBUG, (1 << 27));
1544 
1545 	/* set the wb address whether it's enabled or not */
1546 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1547 	WREG32(SCRATCH_UMSK, 0xff);
1548 
1549 	for (i = 0; i < 3; ++i) {
1550 		uint32_t rb_cntl;
1551 		uint64_t addr;
1552 
1553 		/* Set ring buffer size */
1554 		ring = &rdev->ring[ridx[i]];
1555 		rb_cntl = order_base_2(ring->ring_size / 8);
1556 		rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8;
1557 #ifdef __BIG_ENDIAN
1558 		rb_cntl |= BUF_SWAP_32BIT;
1559 #endif
1560 		WREG32(cp_rb_cntl[i], rb_cntl);
1561 
1562 		/* set the wb address whether it's enabled or not */
1563 		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1564 		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1565 		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1566 	}
1567 
1568 	/* set the rb base addr, this causes an internal reset of ALL rings */
1569 	for (i = 0; i < 3; ++i) {
1570 		ring = &rdev->ring[ridx[i]];
1571 		WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1572 	}
1573 
1574 	for (i = 0; i < 3; ++i) {
1575 		/* Initialize the ring buffer's read and write pointers */
1576 		ring = &rdev->ring[ridx[i]];
1577 		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1578 
1579 		ring->rptr = ring->wptr = 0;
1580 		WREG32(ring->rptr_reg, ring->rptr);
1581 		WREG32(ring->wptr_reg, ring->wptr);
1582 
1583 		mdelay(1);
1584 		WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1585 	}
1586 
1587 	/* start the rings */
1588 	cayman_cp_start(rdev);
1589 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1590 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1591 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1592 	/* this only test cp0 */
1593 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1594 	if (r) {
1595 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1596 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1597 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1598 		return r;
1599 	}
1600 
1601 	return 0;
1602 }
1603 
1604 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1605 {
1606 	u32 reset_mask = 0;
1607 	u32 tmp;
1608 
1609 	/* GRBM_STATUS */
1610 	tmp = RREG32(GRBM_STATUS);
1611 	if (tmp & (PA_BUSY | SC_BUSY |
1612 		   SH_BUSY | SX_BUSY |
1613 		   TA_BUSY | VGT_BUSY |
1614 		   DB_BUSY | CB_BUSY |
1615 		   GDS_BUSY | SPI_BUSY |
1616 		   IA_BUSY | IA_BUSY_NO_DMA))
1617 		reset_mask |= RADEON_RESET_GFX;
1618 
1619 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1620 		   CP_BUSY | CP_COHERENCY_BUSY))
1621 		reset_mask |= RADEON_RESET_CP;
1622 
1623 	if (tmp & GRBM_EE_BUSY)
1624 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1625 
1626 	/* DMA_STATUS_REG 0 */
1627 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1628 	if (!(tmp & DMA_IDLE))
1629 		reset_mask |= RADEON_RESET_DMA;
1630 
1631 	/* DMA_STATUS_REG 1 */
1632 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1633 	if (!(tmp & DMA_IDLE))
1634 		reset_mask |= RADEON_RESET_DMA1;
1635 
1636 	/* SRBM_STATUS2 */
1637 	tmp = RREG32(SRBM_STATUS2);
1638 	if (tmp & DMA_BUSY)
1639 		reset_mask |= RADEON_RESET_DMA;
1640 
1641 	if (tmp & DMA1_BUSY)
1642 		reset_mask |= RADEON_RESET_DMA1;
1643 
1644 	/* SRBM_STATUS */
1645 	tmp = RREG32(SRBM_STATUS);
1646 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1647 		reset_mask |= RADEON_RESET_RLC;
1648 
1649 	if (tmp & IH_BUSY)
1650 		reset_mask |= RADEON_RESET_IH;
1651 
1652 	if (tmp & SEM_BUSY)
1653 		reset_mask |= RADEON_RESET_SEM;
1654 
1655 	if (tmp & GRBM_RQ_PENDING)
1656 		reset_mask |= RADEON_RESET_GRBM;
1657 
1658 	if (tmp & VMC_BUSY)
1659 		reset_mask |= RADEON_RESET_VMC;
1660 
1661 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1662 		   MCC_BUSY | MCD_BUSY))
1663 		reset_mask |= RADEON_RESET_MC;
1664 
1665 	if (evergreen_is_display_hung(rdev))
1666 		reset_mask |= RADEON_RESET_DISPLAY;
1667 
1668 	/* VM_L2_STATUS */
1669 	tmp = RREG32(VM_L2_STATUS);
1670 	if (tmp & L2_BUSY)
1671 		reset_mask |= RADEON_RESET_VMC;
1672 
1673 	/* Skip MC reset as it's mostly likely not hung, just busy */
1674 	if (reset_mask & RADEON_RESET_MC) {
1675 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1676 		reset_mask &= ~RADEON_RESET_MC;
1677 	}
1678 
1679 	return reset_mask;
1680 }
1681 
1682 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1683 {
1684 	struct evergreen_mc_save save;
1685 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1686 	u32 tmp;
1687 
1688 	if (reset_mask == 0)
1689 		return;
1690 
1691 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1692 
1693 	evergreen_print_gpu_status_regs(rdev);
1694 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1695 		 RREG32(0x14F8));
1696 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1697 		 RREG32(0x14D8));
1698 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1699 		 RREG32(0x14FC));
1700 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1701 		 RREG32(0x14DC));
1702 
1703 	/* Disable CP parsing/prefetching */
1704 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1705 
1706 	if (reset_mask & RADEON_RESET_DMA) {
1707 		/* dma0 */
1708 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1709 		tmp &= ~DMA_RB_ENABLE;
1710 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1711 	}
1712 
1713 	if (reset_mask & RADEON_RESET_DMA1) {
1714 		/* dma1 */
1715 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1716 		tmp &= ~DMA_RB_ENABLE;
1717 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1718 	}
1719 
1720 	udelay(50);
1721 
1722 	evergreen_mc_stop(rdev, &save);
1723 	if (evergreen_mc_wait_for_idle(rdev)) {
1724 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1725 	}
1726 
1727 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1728 		grbm_soft_reset = SOFT_RESET_CB |
1729 			SOFT_RESET_DB |
1730 			SOFT_RESET_GDS |
1731 			SOFT_RESET_PA |
1732 			SOFT_RESET_SC |
1733 			SOFT_RESET_SPI |
1734 			SOFT_RESET_SH |
1735 			SOFT_RESET_SX |
1736 			SOFT_RESET_TC |
1737 			SOFT_RESET_TA |
1738 			SOFT_RESET_VGT |
1739 			SOFT_RESET_IA;
1740 	}
1741 
1742 	if (reset_mask & RADEON_RESET_CP) {
1743 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1744 
1745 		srbm_soft_reset |= SOFT_RESET_GRBM;
1746 	}
1747 
1748 	if (reset_mask & RADEON_RESET_DMA)
1749 		srbm_soft_reset |= SOFT_RESET_DMA;
1750 
1751 	if (reset_mask & RADEON_RESET_DMA1)
1752 		srbm_soft_reset |= SOFT_RESET_DMA1;
1753 
1754 	if (reset_mask & RADEON_RESET_DISPLAY)
1755 		srbm_soft_reset |= SOFT_RESET_DC;
1756 
1757 	if (reset_mask & RADEON_RESET_RLC)
1758 		srbm_soft_reset |= SOFT_RESET_RLC;
1759 
1760 	if (reset_mask & RADEON_RESET_SEM)
1761 		srbm_soft_reset |= SOFT_RESET_SEM;
1762 
1763 	if (reset_mask & RADEON_RESET_IH)
1764 		srbm_soft_reset |= SOFT_RESET_IH;
1765 
1766 	if (reset_mask & RADEON_RESET_GRBM)
1767 		srbm_soft_reset |= SOFT_RESET_GRBM;
1768 
1769 	if (reset_mask & RADEON_RESET_VMC)
1770 		srbm_soft_reset |= SOFT_RESET_VMC;
1771 
1772 	if (!(rdev->flags & RADEON_IS_IGP)) {
1773 		if (reset_mask & RADEON_RESET_MC)
1774 			srbm_soft_reset |= SOFT_RESET_MC;
1775 	}
1776 
1777 	if (grbm_soft_reset) {
1778 		tmp = RREG32(GRBM_SOFT_RESET);
1779 		tmp |= grbm_soft_reset;
1780 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1781 		WREG32(GRBM_SOFT_RESET, tmp);
1782 		tmp = RREG32(GRBM_SOFT_RESET);
1783 
1784 		udelay(50);
1785 
1786 		tmp &= ~grbm_soft_reset;
1787 		WREG32(GRBM_SOFT_RESET, tmp);
1788 		tmp = RREG32(GRBM_SOFT_RESET);
1789 	}
1790 
1791 	if (srbm_soft_reset) {
1792 		tmp = RREG32(SRBM_SOFT_RESET);
1793 		tmp |= srbm_soft_reset;
1794 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1795 		WREG32(SRBM_SOFT_RESET, tmp);
1796 		tmp = RREG32(SRBM_SOFT_RESET);
1797 
1798 		udelay(50);
1799 
1800 		tmp &= ~srbm_soft_reset;
1801 		WREG32(SRBM_SOFT_RESET, tmp);
1802 		tmp = RREG32(SRBM_SOFT_RESET);
1803 	}
1804 
1805 	/* Wait a little for things to settle down */
1806 	udelay(50);
1807 
1808 	evergreen_mc_resume(rdev, &save);
1809 	udelay(50);
1810 
1811 	evergreen_print_gpu_status_regs(rdev);
1812 }
1813 
1814 int cayman_asic_reset(struct radeon_device *rdev)
1815 {
1816 	u32 reset_mask;
1817 
1818 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1819 
1820 	if (reset_mask)
1821 		r600_set_bios_scratch_engine_hung(rdev, true);
1822 
1823 	cayman_gpu_soft_reset(rdev, reset_mask);
1824 
1825 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1826 
1827 	if (!reset_mask)
1828 		r600_set_bios_scratch_engine_hung(rdev, false);
1829 
1830 	return 0;
1831 }
1832 
1833 /**
1834  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1835  *
1836  * @rdev: radeon_device pointer
1837  * @ring: radeon_ring structure holding ring information
1838  *
1839  * Check if the GFX engine is locked up.
1840  * Returns true if the engine appears to be locked up, false if not.
1841  */
1842 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1843 {
1844 	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1845 
1846 	if (!(reset_mask & (RADEON_RESET_GFX |
1847 			    RADEON_RESET_COMPUTE |
1848 			    RADEON_RESET_CP))) {
1849 		radeon_ring_lockup_update(ring);
1850 		return false;
1851 	}
1852 	/* force CP activities */
1853 	radeon_ring_force_activity(rdev, ring);
1854 	return radeon_ring_test_lockup(rdev, ring);
1855 }
1856 
1857 static int cayman_startup(struct radeon_device *rdev)
1858 {
1859 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1860 	int r;
1861 
1862 	/* enable pcie gen2 link */
1863 	evergreen_pcie_gen2_enable(rdev);
1864 	/* enable aspm */
1865 	evergreen_program_aspm(rdev);
1866 
1867 	/* scratch needs to be initialized before MC */
1868 	r = r600_vram_scratch_init(rdev);
1869 	if (r)
1870 		return r;
1871 
1872 	evergreen_mc_program(rdev);
1873 
1874 	if (rdev->flags & RADEON_IS_IGP) {
1875 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1876 			r = ni_init_microcode(rdev);
1877 			if (r) {
1878 				DRM_ERROR("Failed to load firmware!\n");
1879 				return r;
1880 			}
1881 		}
1882 	} else {
1883 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1884 			r = ni_init_microcode(rdev);
1885 			if (r) {
1886 				DRM_ERROR("Failed to load firmware!\n");
1887 				return r;
1888 			}
1889 		}
1890 
1891 		r = ni_mc_load_microcode(rdev);
1892 		if (r) {
1893 			DRM_ERROR("Failed to load MC firmware!\n");
1894 			return r;
1895 		}
1896 	}
1897 
1898 	r = cayman_pcie_gart_enable(rdev);
1899 	if (r)
1900 		return r;
1901 	cayman_gpu_init(rdev);
1902 
1903 	/* allocate rlc buffers */
1904 	if (rdev->flags & RADEON_IS_IGP) {
1905 		rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
1906 		rdev->rlc.reg_list_size =
1907 			(u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
1908 		rdev->rlc.cs_data = cayman_cs_data;
1909 		r = sumo_rlc_init(rdev);
1910 		if (r) {
1911 			DRM_ERROR("Failed to init rlc BOs!\n");
1912 			return r;
1913 		}
1914 	}
1915 
1916 	/* allocate wb buffer */
1917 	r = radeon_wb_init(rdev);
1918 	if (r)
1919 		return r;
1920 
1921 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1922 	if (r) {
1923 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1924 		return r;
1925 	}
1926 
1927 	r = uvd_v2_2_resume(rdev);
1928 	if (!r) {
1929 		r = radeon_fence_driver_start_ring(rdev,
1930 						   R600_RING_TYPE_UVD_INDEX);
1931 		if (r)
1932 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
1933 	}
1934 	if (r)
1935 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
1936 
1937 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
1938 	if (r) {
1939 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1940 		return r;
1941 	}
1942 
1943 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
1944 	if (r) {
1945 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1946 		return r;
1947 	}
1948 
1949 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1950 	if (r) {
1951 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1952 		return r;
1953 	}
1954 
1955 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1956 	if (r) {
1957 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1958 		return r;
1959 	}
1960 
1961 	/* Enable IRQ */
1962 	if (!rdev->irq.installed) {
1963 		r = radeon_irq_kms_init(rdev);
1964 		if (r)
1965 			return r;
1966 	}
1967 
1968 	r = r600_irq_init(rdev);
1969 	if (r) {
1970 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1971 		radeon_irq_kms_fini(rdev);
1972 		return r;
1973 	}
1974 	evergreen_irq_set(rdev);
1975 
1976 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1977 			     CP_RB0_RPTR, CP_RB0_WPTR,
1978 			     RADEON_CP_PACKET2);
1979 	if (r)
1980 		return r;
1981 
1982 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1983 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1984 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1985 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1986 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1987 	if (r)
1988 		return r;
1989 
1990 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1991 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1992 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1993 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1994 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1995 	if (r)
1996 		return r;
1997 
1998 	r = cayman_cp_load_microcode(rdev);
1999 	if (r)
2000 		return r;
2001 	r = cayman_cp_resume(rdev);
2002 	if (r)
2003 		return r;
2004 
2005 	r = cayman_dma_resume(rdev);
2006 	if (r)
2007 		return r;
2008 
2009 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2010 	if (ring->ring_size) {
2011 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
2012 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2013 				     RADEON_CP_PACKET2);
2014 		if (!r)
2015 			r = uvd_v1_0_init(rdev);
2016 		if (r)
2017 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2018 	}
2019 
2020 	r = radeon_ib_pool_init(rdev);
2021 	if (r) {
2022 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2023 		return r;
2024 	}
2025 
2026 	r = radeon_vm_manager_init(rdev);
2027 	if (r) {
2028 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2029 		return r;
2030 	}
2031 
2032 	if (ASIC_IS_DCE6(rdev)) {
2033 		r = dce6_audio_init(rdev);
2034 		if (r)
2035 			return r;
2036 	} else {
2037 		r = r600_audio_init(rdev);
2038 		if (r)
2039 			return r;
2040 	}
2041 
2042 	return 0;
2043 }
2044 
2045 int cayman_resume(struct radeon_device *rdev)
2046 {
2047 	int r;
2048 
2049 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2050 	 * posting will perform necessary task to bring back GPU into good
2051 	 * shape.
2052 	 */
2053 	/* post card */
2054 	atom_asic_init(rdev->mode_info.atom_context);
2055 
2056 	/* init golden registers */
2057 	ni_init_golden_registers(rdev);
2058 
2059 	rdev->accel_working = true;
2060 	r = cayman_startup(rdev);
2061 	if (r) {
2062 		DRM_ERROR("cayman startup failed on resume\n");
2063 		rdev->accel_working = false;
2064 		return r;
2065 	}
2066 	return r;
2067 }
2068 
2069 int cayman_suspend(struct radeon_device *rdev)
2070 {
2071 	if (ASIC_IS_DCE6(rdev))
2072 		dce6_audio_fini(rdev);
2073 	else
2074 		r600_audio_fini(rdev);
2075 	radeon_vm_manager_fini(rdev);
2076 	cayman_cp_enable(rdev, false);
2077 	cayman_dma_stop(rdev);
2078 	uvd_v1_0_fini(rdev);
2079 	radeon_uvd_suspend(rdev);
2080 	evergreen_irq_suspend(rdev);
2081 	radeon_wb_disable(rdev);
2082 	cayman_pcie_gart_disable(rdev);
2083 	return 0;
2084 }
2085 
2086 /* Plan is to move initialization in that function and use
2087  * helper function so that radeon_device_init pretty much
2088  * do nothing more than calling asic specific function. This
2089  * should also allow to remove a bunch of callback function
2090  * like vram_info.
2091  */
2092 int cayman_init(struct radeon_device *rdev)
2093 {
2094 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2095 	int r;
2096 
2097 	/* Read BIOS */
2098 	if (!radeon_get_bios(rdev)) {
2099 		if (ASIC_IS_AVIVO(rdev))
2100 			return -EINVAL;
2101 	}
2102 	/* Must be an ATOMBIOS */
2103 	if (!rdev->is_atom_bios) {
2104 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2105 		return -EINVAL;
2106 	}
2107 	r = radeon_atombios_init(rdev);
2108 	if (r)
2109 		return r;
2110 
2111 	/* Post card if necessary */
2112 	if (!radeon_card_posted(rdev)) {
2113 		if (!rdev->bios) {
2114 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2115 			return -EINVAL;
2116 		}
2117 		DRM_INFO("GPU not posted. posting now...\n");
2118 		atom_asic_init(rdev->mode_info.atom_context);
2119 	}
2120 	/* init golden registers */
2121 	ni_init_golden_registers(rdev);
2122 	/* Initialize scratch registers */
2123 	r600_scratch_init(rdev);
2124 	/* Initialize surface registers */
2125 	radeon_surface_init(rdev);
2126 	/* Initialize clocks */
2127 	radeon_get_clock_info(rdev->ddev);
2128 	/* Fence driver */
2129 	r = radeon_fence_driver_init(rdev);
2130 	if (r)
2131 		return r;
2132 	/* initialize memory controller */
2133 	r = evergreen_mc_init(rdev);
2134 	if (r)
2135 		return r;
2136 	/* Memory manager */
2137 	r = radeon_bo_init(rdev);
2138 	if (r)
2139 		return r;
2140 
2141 	ring->ring_obj = NULL;
2142 	r600_ring_init(rdev, ring, 1024 * 1024);
2143 
2144 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2145 	ring->ring_obj = NULL;
2146 	r600_ring_init(rdev, ring, 64 * 1024);
2147 
2148 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2149 	ring->ring_obj = NULL;
2150 	r600_ring_init(rdev, ring, 64 * 1024);
2151 
2152 	r = radeon_uvd_init(rdev);
2153 	if (!r) {
2154 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2155 		ring->ring_obj = NULL;
2156 		r600_ring_init(rdev, ring, 4096);
2157 	}
2158 
2159 	rdev->ih.ring_obj = NULL;
2160 	r600_ih_ring_init(rdev, 64 * 1024);
2161 
2162 	r = r600_pcie_gart_init(rdev);
2163 	if (r)
2164 		return r;
2165 
2166 	rdev->accel_working = true;
2167 	r = cayman_startup(rdev);
2168 	if (r) {
2169 		dev_err(rdev->dev, "disabling GPU acceleration\n");
2170 		cayman_cp_fini(rdev);
2171 		cayman_dma_fini(rdev);
2172 		r600_irq_fini(rdev);
2173 		if (rdev->flags & RADEON_IS_IGP)
2174 			sumo_rlc_fini(rdev);
2175 		radeon_wb_fini(rdev);
2176 		radeon_ib_pool_fini(rdev);
2177 		radeon_vm_manager_fini(rdev);
2178 		radeon_irq_kms_fini(rdev);
2179 		cayman_pcie_gart_fini(rdev);
2180 		rdev->accel_working = false;
2181 	}
2182 
2183 	/* Don't start up if the MC ucode is missing.
2184 	 * The default clocks and voltages before the MC ucode
2185 	 * is loaded are not suffient for advanced operations.
2186 	 *
2187 	 * We can skip this check for TN, because there is no MC
2188 	 * ucode.
2189 	 */
2190 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2191 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
2192 		return -EINVAL;
2193 	}
2194 
2195 	return 0;
2196 }
2197 
2198 void cayman_fini(struct radeon_device *rdev)
2199 {
2200 	cayman_cp_fini(rdev);
2201 	cayman_dma_fini(rdev);
2202 	r600_irq_fini(rdev);
2203 	if (rdev->flags & RADEON_IS_IGP)
2204 		sumo_rlc_fini(rdev);
2205 	radeon_wb_fini(rdev);
2206 	radeon_vm_manager_fini(rdev);
2207 	radeon_ib_pool_fini(rdev);
2208 	radeon_irq_kms_fini(rdev);
2209 	uvd_v1_0_fini(rdev);
2210 	radeon_uvd_fini(rdev);
2211 	cayman_pcie_gart_fini(rdev);
2212 	r600_vram_scratch_fini(rdev);
2213 	radeon_gem_fini(rdev);
2214 	radeon_fence_driver_fini(rdev);
2215 	radeon_bo_fini(rdev);
2216 	radeon_atombios_fini(rdev);
2217 	kfree(rdev->bios);
2218 	rdev->bios = NULL;
2219 }
2220 
2221 /*
2222  * vm
2223  */
2224 int cayman_vm_init(struct radeon_device *rdev)
2225 {
2226 	/* number of VMs */
2227 	rdev->vm_manager.nvm = 8;
2228 	/* base offset of vram pages */
2229 	if (rdev->flags & RADEON_IS_IGP) {
2230 		u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2231 		tmp <<= 22;
2232 		rdev->vm_manager.vram_base_offset = tmp;
2233 	} else
2234 		rdev->vm_manager.vram_base_offset = 0;
2235 	return 0;
2236 }
2237 
2238 void cayman_vm_fini(struct radeon_device *rdev)
2239 {
2240 }
2241 
2242 /**
2243  * cayman_vm_decode_fault - print human readable fault info
2244  *
2245  * @rdev: radeon_device pointer
2246  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2247  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2248  *
2249  * Print human readable fault information (cayman/TN).
2250  */
2251 void cayman_vm_decode_fault(struct radeon_device *rdev,
2252 			    u32 status, u32 addr)
2253 {
2254 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2255 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2256 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2257 	char *block;
2258 
2259 	switch (mc_id) {
2260 	case 32:
2261 	case 16:
2262 	case 96:
2263 	case 80:
2264 	case 160:
2265 	case 144:
2266 	case 224:
2267 	case 208:
2268 		block = "CB";
2269 		break;
2270 	case 33:
2271 	case 17:
2272 	case 97:
2273 	case 81:
2274 	case 161:
2275 	case 145:
2276 	case 225:
2277 	case 209:
2278 		block = "CB_FMASK";
2279 		break;
2280 	case 34:
2281 	case 18:
2282 	case 98:
2283 	case 82:
2284 	case 162:
2285 	case 146:
2286 	case 226:
2287 	case 210:
2288 		block = "CB_CMASK";
2289 		break;
2290 	case 35:
2291 	case 19:
2292 	case 99:
2293 	case 83:
2294 	case 163:
2295 	case 147:
2296 	case 227:
2297 	case 211:
2298 		block = "CB_IMMED";
2299 		break;
2300 	case 36:
2301 	case 20:
2302 	case 100:
2303 	case 84:
2304 	case 164:
2305 	case 148:
2306 	case 228:
2307 	case 212:
2308 		block = "DB";
2309 		break;
2310 	case 37:
2311 	case 21:
2312 	case 101:
2313 	case 85:
2314 	case 165:
2315 	case 149:
2316 	case 229:
2317 	case 213:
2318 		block = "DB_HTILE";
2319 		break;
2320 	case 38:
2321 	case 22:
2322 	case 102:
2323 	case 86:
2324 	case 166:
2325 	case 150:
2326 	case 230:
2327 	case 214:
2328 		block = "SX";
2329 		break;
2330 	case 39:
2331 	case 23:
2332 	case 103:
2333 	case 87:
2334 	case 167:
2335 	case 151:
2336 	case 231:
2337 	case 215:
2338 		block = "DB_STEN";
2339 		break;
2340 	case 40:
2341 	case 24:
2342 	case 104:
2343 	case 88:
2344 	case 232:
2345 	case 216:
2346 	case 168:
2347 	case 152:
2348 		block = "TC_TFETCH";
2349 		break;
2350 	case 41:
2351 	case 25:
2352 	case 105:
2353 	case 89:
2354 	case 233:
2355 	case 217:
2356 	case 169:
2357 	case 153:
2358 		block = "TC_VFETCH";
2359 		break;
2360 	case 42:
2361 	case 26:
2362 	case 106:
2363 	case 90:
2364 	case 234:
2365 	case 218:
2366 	case 170:
2367 	case 154:
2368 		block = "VC";
2369 		break;
2370 	case 112:
2371 		block = "CP";
2372 		break;
2373 	case 113:
2374 	case 114:
2375 		block = "SH";
2376 		break;
2377 	case 115:
2378 		block = "VGT";
2379 		break;
2380 	case 178:
2381 		block = "IH";
2382 		break;
2383 	case 51:
2384 		block = "RLC";
2385 		break;
2386 	case 55:
2387 		block = "DMA";
2388 		break;
2389 	case 56:
2390 		block = "HDP";
2391 		break;
2392 	default:
2393 		block = "unknown";
2394 		break;
2395 	}
2396 
2397 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2398 	       protections, vmid, addr,
2399 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2400 	       block, mc_id);
2401 }
2402 
2403 #define R600_ENTRY_VALID   (1 << 0)
2404 #define R600_PTE_SYSTEM    (1 << 1)
2405 #define R600_PTE_SNOOPED   (1 << 2)
2406 #define R600_PTE_READABLE  (1 << 5)
2407 #define R600_PTE_WRITEABLE (1 << 6)
2408 
2409 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2410 {
2411 	uint32_t r600_flags = 0;
2412 	r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2413 	r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2414 	r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2415 	if (flags & RADEON_VM_PAGE_SYSTEM) {
2416 		r600_flags |= R600_PTE_SYSTEM;
2417 		r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2418 	}
2419 	return r600_flags;
2420 }
2421 
2422 /**
2423  * cayman_vm_set_page - update the page tables using the CP
2424  *
2425  * @rdev: radeon_device pointer
2426  * @ib: indirect buffer to fill with commands
2427  * @pe: addr of the page entry
2428  * @addr: dst addr to write into pe
2429  * @count: number of page entries to update
2430  * @incr: increase next addr by incr bytes
2431  * @flags: access flags
2432  *
2433  * Update the page tables using the CP (cayman/TN).
2434  */
2435 void cayman_vm_set_page(struct radeon_device *rdev,
2436 			struct radeon_ib *ib,
2437 			uint64_t pe,
2438 			uint64_t addr, unsigned count,
2439 			uint32_t incr, uint32_t flags)
2440 {
2441 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2442 	uint64_t value;
2443 	unsigned ndw;
2444 
2445 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2446 		while (count) {
2447 			ndw = 1 + count * 2;
2448 			if (ndw > 0x3FFF)
2449 				ndw = 0x3FFF;
2450 
2451 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2452 			ib->ptr[ib->length_dw++] = pe;
2453 			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2454 			for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2455 				if (flags & RADEON_VM_PAGE_SYSTEM) {
2456 					value = radeon_vm_map_gart(rdev, addr);
2457 					value &= 0xFFFFFFFFFFFFF000ULL;
2458 				} else if (flags & RADEON_VM_PAGE_VALID) {
2459 					value = addr;
2460 				} else {
2461 					value = 0;
2462 				}
2463 				addr += incr;
2464 				value |= r600_flags;
2465 				ib->ptr[ib->length_dw++] = value;
2466 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
2467 			}
2468 		}
2469 	} else {
2470 		cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
2471 	}
2472 }
2473 
2474 /**
2475  * cayman_vm_flush - vm flush using the CP
2476  *
2477  * @rdev: radeon_device pointer
2478  *
2479  * Update the page table base and flush the VM TLB
2480  * using the CP (cayman-si).
2481  */
2482 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2483 {
2484 	struct radeon_ring *ring = &rdev->ring[ridx];
2485 
2486 	if (vm == NULL)
2487 		return;
2488 
2489 	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2490 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2491 
2492 	/* flush hdp cache */
2493 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2494 	radeon_ring_write(ring, 0x1);
2495 
2496 	/* bits 0-7 are the VM contexts0-7 */
2497 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2498 	radeon_ring_write(ring, 1 << vm->id);
2499 
2500 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
2501 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2502 	radeon_ring_write(ring, 0x0);
2503 }
2504