xref: /openbmc/linux/drivers/gpu/drm/radeon/r600.c (revision 1614f8b17b8cc3ad143541d41569623d30dbc9ec)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include <linux/firmware.h>
30 #include <linux/platform_device.h>
31 #include "drmP.h"
32 #include "radeon_drm.h"
33 #include "radeon.h"
34 #include "radeon_mode.h"
35 #include "r600d.h"
36 #include "atom.h"
37 #include "avivod.h"
38 
39 #define PFP_UCODE_SIZE 576
40 #define PM4_UCODE_SIZE 1792
41 #define RLC_UCODE_SIZE 768
42 #define R700_PFP_UCODE_SIZE 848
43 #define R700_PM4_UCODE_SIZE 1360
44 #define R700_RLC_UCODE_SIZE 1024
45 
46 /* Firmware Names */
47 MODULE_FIRMWARE("radeon/R600_pfp.bin");
48 MODULE_FIRMWARE("radeon/R600_me.bin");
49 MODULE_FIRMWARE("radeon/RV610_pfp.bin");
50 MODULE_FIRMWARE("radeon/RV610_me.bin");
51 MODULE_FIRMWARE("radeon/RV630_pfp.bin");
52 MODULE_FIRMWARE("radeon/RV630_me.bin");
53 MODULE_FIRMWARE("radeon/RV620_pfp.bin");
54 MODULE_FIRMWARE("radeon/RV620_me.bin");
55 MODULE_FIRMWARE("radeon/RV635_pfp.bin");
56 MODULE_FIRMWARE("radeon/RV635_me.bin");
57 MODULE_FIRMWARE("radeon/RV670_pfp.bin");
58 MODULE_FIRMWARE("radeon/RV670_me.bin");
59 MODULE_FIRMWARE("radeon/RS780_pfp.bin");
60 MODULE_FIRMWARE("radeon/RS780_me.bin");
61 MODULE_FIRMWARE("radeon/RV770_pfp.bin");
62 MODULE_FIRMWARE("radeon/RV770_me.bin");
63 MODULE_FIRMWARE("radeon/RV730_pfp.bin");
64 MODULE_FIRMWARE("radeon/RV730_me.bin");
65 MODULE_FIRMWARE("radeon/RV710_pfp.bin");
66 MODULE_FIRMWARE("radeon/RV710_me.bin");
67 MODULE_FIRMWARE("radeon/R600_rlc.bin");
68 MODULE_FIRMWARE("radeon/R700_rlc.bin");
69 
70 int r600_debugfs_mc_info_init(struct radeon_device *rdev);
71 
72 /* r600,rv610,rv630,rv620,rv635,rv670 */
73 int r600_mc_wait_for_idle(struct radeon_device *rdev);
74 void r600_gpu_init(struct radeon_device *rdev);
75 void r600_fini(struct radeon_device *rdev);
76 
77 /*
78  * R600 PCIE GART
79  */
80 int r600_gart_clear_page(struct radeon_device *rdev, int i)
81 {
82 	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
83 	u64 pte;
84 
85 	if (i < 0 || i > rdev->gart.num_gpu_pages)
86 		return -EINVAL;
87 	pte = 0;
88 	writeq(pte, ((void __iomem *)ptr) + (i * 8));
89 	return 0;
90 }
91 
92 void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
93 {
94 	unsigned i;
95 	u32 tmp;
96 
97 	WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
98 	WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
99 	WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
100 	for (i = 0; i < rdev->usec_timeout; i++) {
101 		/* read MC_STATUS */
102 		tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE);
103 		tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT;
104 		if (tmp == 2) {
105 			printk(KERN_WARNING "[drm] r600 flush TLB failed\n");
106 			return;
107 		}
108 		if (tmp) {
109 			return;
110 		}
111 		udelay(1);
112 	}
113 }
114 
115 int r600_pcie_gart_init(struct radeon_device *rdev)
116 {
117 	int r;
118 
119 	if (rdev->gart.table.vram.robj) {
120 		WARN(1, "R600 PCIE GART already initialized.\n");
121 		return 0;
122 	}
123 	/* Initialize common gart structure */
124 	r = radeon_gart_init(rdev);
125 	if (r)
126 		return r;
127 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
128 	return radeon_gart_table_vram_alloc(rdev);
129 }
130 
131 int r600_pcie_gart_enable(struct radeon_device *rdev)
132 {
133 	u32 tmp;
134 	int r, i;
135 
136 	if (rdev->gart.table.vram.robj == NULL) {
137 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
138 		return -EINVAL;
139 	}
140 	r = radeon_gart_table_vram_pin(rdev);
141 	if (r)
142 		return r;
143 
144 	/* Setup L2 cache */
145 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
146 				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
147 				EFFECTIVE_L2_QUEUE_SIZE(7));
148 	WREG32(VM_L2_CNTL2, 0);
149 	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
150 	/* Setup TLB control */
151 	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
152 		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
153 		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
154 		ENABLE_WAIT_L2_QUERY;
155 	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
156 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
157 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
158 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
159 	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
160 	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
161 	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
162 	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
163 	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
164 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
165 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
166 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
167 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
168 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
169 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
170 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
171 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
172 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
173 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
174 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
175 			(u32)(rdev->dummy_page.addr >> 12));
176 	for (i = 1; i < 7; i++)
177 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
178 
179 	r600_pcie_gart_tlb_flush(rdev);
180 	rdev->gart.ready = true;
181 	return 0;
182 }
183 
184 void r600_pcie_gart_disable(struct radeon_device *rdev)
185 {
186 	u32 tmp;
187 	int i;
188 
189 	/* Disable all tables */
190 	for (i = 0; i < 7; i++)
191 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
192 
193 	/* Disable L2 cache */
194 	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
195 				EFFECTIVE_L2_QUEUE_SIZE(7));
196 	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
197 	/* Setup L1 TLB control */
198 	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
199 		ENABLE_WAIT_L2_QUERY;
200 	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
201 	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
202 	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
203 	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
204 	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
205 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
206 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
207 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
208 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp);
209 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp);
210 	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
211 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
212 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
213 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
214 	if (rdev->gart.table.vram.robj) {
215 		radeon_object_kunmap(rdev->gart.table.vram.robj);
216 		radeon_object_unpin(rdev->gart.table.vram.robj);
217 	}
218 }
219 
220 void r600_pcie_gart_fini(struct radeon_device *rdev)
221 {
222 	r600_pcie_gart_disable(rdev);
223 	radeon_gart_table_vram_free(rdev);
224 	radeon_gart_fini(rdev);
225 }
226 
227 void r600_agp_enable(struct radeon_device *rdev)
228 {
229 	u32 tmp;
230 	int i;
231 
232 	/* Setup L2 cache */
233 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
234 				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
235 				EFFECTIVE_L2_QUEUE_SIZE(7));
236 	WREG32(VM_L2_CNTL2, 0);
237 	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
238 	/* Setup TLB control */
239 	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
240 		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
241 		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
242 		ENABLE_WAIT_L2_QUERY;
243 	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
244 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
245 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
246 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
247 	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
248 	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
249 	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
250 	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
251 	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
252 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
253 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
254 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
255 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
256 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
257 	for (i = 0; i < 7; i++)
258 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
259 }
260 
261 int r600_mc_wait_for_idle(struct radeon_device *rdev)
262 {
263 	unsigned i;
264 	u32 tmp;
265 
266 	for (i = 0; i < rdev->usec_timeout; i++) {
267 		/* read MC_STATUS */
268 		tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00;
269 		if (!tmp)
270 			return 0;
271 		udelay(1);
272 	}
273 	return -1;
274 }
275 
276 static void r600_mc_program(struct radeon_device *rdev)
277 {
278 	struct rv515_mc_save save;
279 	u32 tmp;
280 	int i, j;
281 
282 	/* Initialize HDP */
283 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
284 		WREG32((0x2c14 + j), 0x00000000);
285 		WREG32((0x2c18 + j), 0x00000000);
286 		WREG32((0x2c1c + j), 0x00000000);
287 		WREG32((0x2c20 + j), 0x00000000);
288 		WREG32((0x2c24 + j), 0x00000000);
289 	}
290 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
291 
292 	rv515_mc_stop(rdev, &save);
293 	if (r600_mc_wait_for_idle(rdev)) {
294 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
295 	}
296 	/* Lockout access through VGA aperture (doesn't exist before R600) */
297 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
298 	/* Update configuration */
299 	if (rdev->flags & RADEON_IS_AGP) {
300 		if (rdev->mc.vram_start < rdev->mc.gtt_start) {
301 			/* VRAM before AGP */
302 			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
303 				rdev->mc.vram_start >> 12);
304 			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
305 				rdev->mc.gtt_end >> 12);
306 		} else {
307 			/* VRAM after AGP */
308 			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
309 				rdev->mc.gtt_start >> 12);
310 			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
311 				rdev->mc.vram_end >> 12);
312 		}
313 	} else {
314 		WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
315 		WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, rdev->mc.vram_end >> 12);
316 	}
317 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
318 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
319 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
320 	WREG32(MC_VM_FB_LOCATION, tmp);
321 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
322 	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
323 	WREG32(HDP_NONSURFACE_SIZE, rdev->mc.mc_vram_size | 0x3FF);
324 	if (rdev->flags & RADEON_IS_AGP) {
325 		WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 22);
326 		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 22);
327 		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
328 	} else {
329 		WREG32(MC_VM_AGP_BASE, 0);
330 		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
331 		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
332 	}
333 	if (r600_mc_wait_for_idle(rdev)) {
334 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
335 	}
336 	rv515_mc_resume(rdev, &save);
337 	/* we need to own VRAM, so turn off the VGA renderer here
338 	 * to stop it overwriting our objects */
339 	rv515_vga_render_disable(rdev);
340 }
341 
342 int r600_mc_init(struct radeon_device *rdev)
343 {
344 	fixed20_12 a;
345 	u32 tmp;
346 	int chansize, numchan;
347 	int r;
348 
349 	/* Get VRAM informations */
350 	rdev->mc.vram_is_ddr = true;
351 	tmp = RREG32(RAMCFG);
352 	if (tmp & CHANSIZE_OVERRIDE) {
353 		chansize = 16;
354 	} else if (tmp & CHANSIZE_MASK) {
355 		chansize = 64;
356 	} else {
357 		chansize = 32;
358 	}
359 	tmp = RREG32(CHMAP);
360 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
361 	case 0:
362 	default:
363 		numchan = 1;
364 		break;
365 	case 1:
366 		numchan = 2;
367 		break;
368 	case 2:
369 		numchan = 4;
370 		break;
371 	case 3:
372 		numchan = 8;
373 		break;
374 	}
375 	rdev->mc.vram_width = numchan * chansize;
376 	/* Could aper size report 0 ? */
377 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
378 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
379 	/* Setup GPU memory space */
380 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
381 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
382 
383 	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
384 		rdev->mc.mc_vram_size = rdev->mc.aper_size;
385 
386 	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
387 		rdev->mc.real_vram_size = rdev->mc.aper_size;
388 
389 	if (rdev->flags & RADEON_IS_AGP) {
390 		r = radeon_agp_init(rdev);
391 		if (r)
392 			return r;
393 		/* gtt_size is setup by radeon_agp_init */
394 		rdev->mc.gtt_location = rdev->mc.agp_base;
395 		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
396 		/* Try to put vram before or after AGP because we
397 		 * we want SYSTEM_APERTURE to cover both VRAM and
398 		 * AGP so that GPU can catch out of VRAM/AGP access
399 		 */
400 		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
401 			/* Enought place before */
402 			rdev->mc.vram_location = rdev->mc.gtt_location -
403 							rdev->mc.mc_vram_size;
404 		} else if (tmp > rdev->mc.mc_vram_size) {
405 			/* Enought place after */
406 			rdev->mc.vram_location = rdev->mc.gtt_location +
407 							rdev->mc.gtt_size;
408 		} else {
409 			/* Try to setup VRAM then AGP might not
410 			 * not work on some card
411 			 */
412 			rdev->mc.vram_location = 0x00000000UL;
413 			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
414 		}
415 	} else {
416 		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
417 		rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
418 							0xFFFF) << 24;
419 		tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
420 		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
421 			/* Enough place after vram */
422 			rdev->mc.gtt_location = tmp;
423 		} else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
424 			/* Enough place before vram */
425 			rdev->mc.gtt_location = 0;
426 		} else {
427 			/* Not enough place after or before shrink
428 			 * gart size
429 			 */
430 			if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
431 				rdev->mc.gtt_location = 0;
432 				rdev->mc.gtt_size = rdev->mc.vram_location;
433 			} else {
434 				rdev->mc.gtt_location = tmp;
435 				rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
436 			}
437 		}
438 		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
439 	}
440 	rdev->mc.vram_start = rdev->mc.vram_location;
441 	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
442 	rdev->mc.gtt_start = rdev->mc.gtt_location;
443 	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
444 	/* FIXME: we should enforce default clock in case GPU is not in
445 	 * default setup
446 	 */
447 	a.full = rfixed_const(100);
448 	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
449 	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
450 	return 0;
451 }
452 
453 /* We doesn't check that the GPU really needs a reset we simply do the
454  * reset, it's up to the caller to determine if the GPU needs one. We
455  * might add an helper function to check that.
456  */
457 int r600_gpu_soft_reset(struct radeon_device *rdev)
458 {
459 	struct rv515_mc_save save;
460 	u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
461 				S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
462 				S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
463 				S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) |
464 				S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) |
465 				S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) |
466 				S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) |
467 				S_008010_GUI_ACTIVE(1);
468 	u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) |
469 			S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) |
470 			S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) |
471 			S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) |
472 			S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) |
473 			S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) |
474 			S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) |
475 			S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1);
476 	u32 srbm_reset = 0;
477 	u32 tmp;
478 
479 	dev_info(rdev->dev, "GPU softreset \n");
480 	dev_info(rdev->dev, "  R_008010_GRBM_STATUS=0x%08X\n",
481 		RREG32(R_008010_GRBM_STATUS));
482 	dev_info(rdev->dev, "  R_008014_GRBM_STATUS2=0x%08X\n",
483 		RREG32(R_008014_GRBM_STATUS2));
484 	dev_info(rdev->dev, "  R_000E50_SRBM_STATUS=0x%08X\n",
485 		RREG32(R_000E50_SRBM_STATUS));
486 	rv515_mc_stop(rdev, &save);
487 	if (r600_mc_wait_for_idle(rdev)) {
488 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
489 	}
490 	/* Disable CP parsing/prefetching */
491 	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff));
492 	/* Check if any of the rendering block is busy and reset it */
493 	if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
494 	    (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
495 		tmp = S_008020_SOFT_RESET_CR(1) |
496 			S_008020_SOFT_RESET_DB(1) |
497 			S_008020_SOFT_RESET_CB(1) |
498 			S_008020_SOFT_RESET_PA(1) |
499 			S_008020_SOFT_RESET_SC(1) |
500 			S_008020_SOFT_RESET_SMX(1) |
501 			S_008020_SOFT_RESET_SPI(1) |
502 			S_008020_SOFT_RESET_SX(1) |
503 			S_008020_SOFT_RESET_SH(1) |
504 			S_008020_SOFT_RESET_TC(1) |
505 			S_008020_SOFT_RESET_TA(1) |
506 			S_008020_SOFT_RESET_VC(1) |
507 			S_008020_SOFT_RESET_VGT(1);
508 		dev_info(rdev->dev, "  R_008020_GRBM_SOFT_RESET=0x%08X\n", tmp);
509 		WREG32(R_008020_GRBM_SOFT_RESET, tmp);
510 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
511 		udelay(50);
512 		WREG32(R_008020_GRBM_SOFT_RESET, 0);
513 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
514 	}
515 	/* Reset CP (we always reset CP) */
516 	tmp = S_008020_SOFT_RESET_CP(1);
517 	dev_info(rdev->dev, "R_008020_GRBM_SOFT_RESET=0x%08X\n", tmp);
518 	WREG32(R_008020_GRBM_SOFT_RESET, tmp);
519 	(void)RREG32(R_008020_GRBM_SOFT_RESET);
520 	udelay(50);
521 	WREG32(R_008020_GRBM_SOFT_RESET, 0);
522 	(void)RREG32(R_008020_GRBM_SOFT_RESET);
523 	/* Reset others GPU block if necessary */
524 	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
525 		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
526 	if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
527 		srbm_reset |= S_000E60_SOFT_RESET_GRBM(1);
528 	if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
529 		srbm_reset |= S_000E60_SOFT_RESET_IH(1);
530 	if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
531 		srbm_reset |= S_000E60_SOFT_RESET_VMC(1);
532 	if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS)))
533 		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
534 	if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS)))
535 		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
536 	if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS)))
537 		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
538 	if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS)))
539 		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
540 	if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS)))
541 		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
542 	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
543 		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
544 	if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS)))
545 		srbm_reset |= S_000E60_SOFT_RESET_SEM(1);
546 	if (G_000E50_BIF_BUSY(RREG32(R_000E50_SRBM_STATUS)))
547 		srbm_reset |= S_000E60_SOFT_RESET_BIF(1);
548 	dev_info(rdev->dev, "  R_000E60_SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
549 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
550 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
551 	udelay(50);
552 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
553 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
554 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
555 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
556 	udelay(50);
557 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
558 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
559 	/* Wait a little for things to settle down */
560 	udelay(50);
561 	dev_info(rdev->dev, "  R_008010_GRBM_STATUS=0x%08X\n",
562 		RREG32(R_008010_GRBM_STATUS));
563 	dev_info(rdev->dev, "  R_008014_GRBM_STATUS2=0x%08X\n",
564 		RREG32(R_008014_GRBM_STATUS2));
565 	dev_info(rdev->dev, "  R_000E50_SRBM_STATUS=0x%08X\n",
566 		RREG32(R_000E50_SRBM_STATUS));
567 	/* After reset we need to reinit the asic as GPU often endup in an
568 	 * incoherent state.
569 	 */
570 	atom_asic_init(rdev->mode_info.atom_context);
571 	rv515_mc_resume(rdev, &save);
572 	return 0;
573 }
574 
575 int r600_gpu_reset(struct radeon_device *rdev)
576 {
577 	return r600_gpu_soft_reset(rdev);
578 }
579 
580 static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
581 					     u32 num_backends,
582 					     u32 backend_disable_mask)
583 {
584 	u32 backend_map = 0;
585 	u32 enabled_backends_mask;
586 	u32 enabled_backends_count;
587 	u32 cur_pipe;
588 	u32 swizzle_pipe[R6XX_MAX_PIPES];
589 	u32 cur_backend;
590 	u32 i;
591 
592 	if (num_tile_pipes > R6XX_MAX_PIPES)
593 		num_tile_pipes = R6XX_MAX_PIPES;
594 	if (num_tile_pipes < 1)
595 		num_tile_pipes = 1;
596 	if (num_backends > R6XX_MAX_BACKENDS)
597 		num_backends = R6XX_MAX_BACKENDS;
598 	if (num_backends < 1)
599 		num_backends = 1;
600 
601 	enabled_backends_mask = 0;
602 	enabled_backends_count = 0;
603 	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
604 		if (((backend_disable_mask >> i) & 1) == 0) {
605 			enabled_backends_mask |= (1 << i);
606 			++enabled_backends_count;
607 		}
608 		if (enabled_backends_count == num_backends)
609 			break;
610 	}
611 
612 	if (enabled_backends_count == 0) {
613 		enabled_backends_mask = 1;
614 		enabled_backends_count = 1;
615 	}
616 
617 	if (enabled_backends_count != num_backends)
618 		num_backends = enabled_backends_count;
619 
620 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
621 	switch (num_tile_pipes) {
622 	case 1:
623 		swizzle_pipe[0] = 0;
624 		break;
625 	case 2:
626 		swizzle_pipe[0] = 0;
627 		swizzle_pipe[1] = 1;
628 		break;
629 	case 3:
630 		swizzle_pipe[0] = 0;
631 		swizzle_pipe[1] = 1;
632 		swizzle_pipe[2] = 2;
633 		break;
634 	case 4:
635 		swizzle_pipe[0] = 0;
636 		swizzle_pipe[1] = 1;
637 		swizzle_pipe[2] = 2;
638 		swizzle_pipe[3] = 3;
639 		break;
640 	case 5:
641 		swizzle_pipe[0] = 0;
642 		swizzle_pipe[1] = 1;
643 		swizzle_pipe[2] = 2;
644 		swizzle_pipe[3] = 3;
645 		swizzle_pipe[4] = 4;
646 		break;
647 	case 6:
648 		swizzle_pipe[0] = 0;
649 		swizzle_pipe[1] = 2;
650 		swizzle_pipe[2] = 4;
651 		swizzle_pipe[3] = 5;
652 		swizzle_pipe[4] = 1;
653 		swizzle_pipe[5] = 3;
654 		break;
655 	case 7:
656 		swizzle_pipe[0] = 0;
657 		swizzle_pipe[1] = 2;
658 		swizzle_pipe[2] = 4;
659 		swizzle_pipe[3] = 6;
660 		swizzle_pipe[4] = 1;
661 		swizzle_pipe[5] = 3;
662 		swizzle_pipe[6] = 5;
663 		break;
664 	case 8:
665 		swizzle_pipe[0] = 0;
666 		swizzle_pipe[1] = 2;
667 		swizzle_pipe[2] = 4;
668 		swizzle_pipe[3] = 6;
669 		swizzle_pipe[4] = 1;
670 		swizzle_pipe[5] = 3;
671 		swizzle_pipe[6] = 5;
672 		swizzle_pipe[7] = 7;
673 		break;
674 	}
675 
676 	cur_backend = 0;
677 	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
678 		while (((1 << cur_backend) & enabled_backends_mask) == 0)
679 			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
680 
681 		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
682 
683 		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
684 	}
685 
686 	return backend_map;
687 }
688 
689 int r600_count_pipe_bits(uint32_t val)
690 {
691 	int i, ret = 0;
692 
693 	for (i = 0; i < 32; i++) {
694 		ret += val & 1;
695 		val >>= 1;
696 	}
697 	return ret;
698 }
699 
700 void r600_gpu_init(struct radeon_device *rdev)
701 {
702 	u32 tiling_config;
703 	u32 ramcfg;
704 	u32 tmp;
705 	int i, j;
706 	u32 sq_config;
707 	u32 sq_gpr_resource_mgmt_1 = 0;
708 	u32 sq_gpr_resource_mgmt_2 = 0;
709 	u32 sq_thread_resource_mgmt = 0;
710 	u32 sq_stack_resource_mgmt_1 = 0;
711 	u32 sq_stack_resource_mgmt_2 = 0;
712 
713 	/* FIXME: implement */
714 	switch (rdev->family) {
715 	case CHIP_R600:
716 		rdev->config.r600.max_pipes = 4;
717 		rdev->config.r600.max_tile_pipes = 8;
718 		rdev->config.r600.max_simds = 4;
719 		rdev->config.r600.max_backends = 4;
720 		rdev->config.r600.max_gprs = 256;
721 		rdev->config.r600.max_threads = 192;
722 		rdev->config.r600.max_stack_entries = 256;
723 		rdev->config.r600.max_hw_contexts = 8;
724 		rdev->config.r600.max_gs_threads = 16;
725 		rdev->config.r600.sx_max_export_size = 128;
726 		rdev->config.r600.sx_max_export_pos_size = 16;
727 		rdev->config.r600.sx_max_export_smx_size = 128;
728 		rdev->config.r600.sq_num_cf_insts = 2;
729 		break;
730 	case CHIP_RV630:
731 	case CHIP_RV635:
732 		rdev->config.r600.max_pipes = 2;
733 		rdev->config.r600.max_tile_pipes = 2;
734 		rdev->config.r600.max_simds = 3;
735 		rdev->config.r600.max_backends = 1;
736 		rdev->config.r600.max_gprs = 128;
737 		rdev->config.r600.max_threads = 192;
738 		rdev->config.r600.max_stack_entries = 128;
739 		rdev->config.r600.max_hw_contexts = 8;
740 		rdev->config.r600.max_gs_threads = 4;
741 		rdev->config.r600.sx_max_export_size = 128;
742 		rdev->config.r600.sx_max_export_pos_size = 16;
743 		rdev->config.r600.sx_max_export_smx_size = 128;
744 		rdev->config.r600.sq_num_cf_insts = 2;
745 		break;
746 	case CHIP_RV610:
747 	case CHIP_RV620:
748 	case CHIP_RS780:
749 	case CHIP_RS880:
750 		rdev->config.r600.max_pipes = 1;
751 		rdev->config.r600.max_tile_pipes = 1;
752 		rdev->config.r600.max_simds = 2;
753 		rdev->config.r600.max_backends = 1;
754 		rdev->config.r600.max_gprs = 128;
755 		rdev->config.r600.max_threads = 192;
756 		rdev->config.r600.max_stack_entries = 128;
757 		rdev->config.r600.max_hw_contexts = 4;
758 		rdev->config.r600.max_gs_threads = 4;
759 		rdev->config.r600.sx_max_export_size = 128;
760 		rdev->config.r600.sx_max_export_pos_size = 16;
761 		rdev->config.r600.sx_max_export_smx_size = 128;
762 		rdev->config.r600.sq_num_cf_insts = 1;
763 		break;
764 	case CHIP_RV670:
765 		rdev->config.r600.max_pipes = 4;
766 		rdev->config.r600.max_tile_pipes = 4;
767 		rdev->config.r600.max_simds = 4;
768 		rdev->config.r600.max_backends = 4;
769 		rdev->config.r600.max_gprs = 192;
770 		rdev->config.r600.max_threads = 192;
771 		rdev->config.r600.max_stack_entries = 256;
772 		rdev->config.r600.max_hw_contexts = 8;
773 		rdev->config.r600.max_gs_threads = 16;
774 		rdev->config.r600.sx_max_export_size = 128;
775 		rdev->config.r600.sx_max_export_pos_size = 16;
776 		rdev->config.r600.sx_max_export_smx_size = 128;
777 		rdev->config.r600.sq_num_cf_insts = 2;
778 		break;
779 	default:
780 		break;
781 	}
782 
783 	/* Initialize HDP */
784 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
785 		WREG32((0x2c14 + j), 0x00000000);
786 		WREG32((0x2c18 + j), 0x00000000);
787 		WREG32((0x2c1c + j), 0x00000000);
788 		WREG32((0x2c20 + j), 0x00000000);
789 		WREG32((0x2c24 + j), 0x00000000);
790 	}
791 
792 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
793 
794 	/* Setup tiling */
795 	tiling_config = 0;
796 	ramcfg = RREG32(RAMCFG);
797 	switch (rdev->config.r600.max_tile_pipes) {
798 	case 1:
799 		tiling_config |= PIPE_TILING(0);
800 		break;
801 	case 2:
802 		tiling_config |= PIPE_TILING(1);
803 		break;
804 	case 4:
805 		tiling_config |= PIPE_TILING(2);
806 		break;
807 	case 8:
808 		tiling_config |= PIPE_TILING(3);
809 		break;
810 	default:
811 		break;
812 	}
813 	tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
814 	tiling_config |= GROUP_SIZE(0);
815 	tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
816 	if (tmp > 3) {
817 		tiling_config |= ROW_TILING(3);
818 		tiling_config |= SAMPLE_SPLIT(3);
819 	} else {
820 		tiling_config |= ROW_TILING(tmp);
821 		tiling_config |= SAMPLE_SPLIT(tmp);
822 	}
823 	tiling_config |= BANK_SWAPS(1);
824 	tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
825 						rdev->config.r600.max_backends,
826 						(0xff << rdev->config.r600.max_backends) & 0xff);
827 	tiling_config |= BACKEND_MAP(tmp);
828 	WREG32(GB_TILING_CONFIG, tiling_config);
829 	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
830 	WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
831 
832 	tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
833 	WREG32(CC_RB_BACKEND_DISABLE, tmp);
834 
835 	/* Setup pipes */
836 	tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
837 	tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
838 	WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
839 	WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
840 
841 	tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
842 	WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
843 	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
844 
845 	/* Setup some CP states */
846 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b)));
847 	WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40)));
848 
849 	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT |
850 			     SYNC_WALKER | SYNC_ALIGNER));
851 	/* Setup various GPU states */
852 	if (rdev->family == CHIP_RV670)
853 		WREG32(ARB_GDEC_RD_CNTL, 0x00000021);
854 
855 	tmp = RREG32(SX_DEBUG_1);
856 	tmp |= SMX_EVENT_RELEASE;
857 	if ((rdev->family > CHIP_R600))
858 		tmp |= ENABLE_NEW_SMX_ADDRESS;
859 	WREG32(SX_DEBUG_1, tmp);
860 
861 	if (((rdev->family) == CHIP_R600) ||
862 	    ((rdev->family) == CHIP_RV630) ||
863 	    ((rdev->family) == CHIP_RV610) ||
864 	    ((rdev->family) == CHIP_RV620) ||
865 	    ((rdev->family) == CHIP_RS780) ||
866 	    ((rdev->family) == CHIP_RS880)) {
867 		WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
868 	} else {
869 		WREG32(DB_DEBUG, 0);
870 	}
871 	WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) |
872 			       DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4)));
873 
874 	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
875 	WREG32(VGT_NUM_INSTANCES, 0);
876 
877 	WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
878 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0));
879 
880 	tmp = RREG32(SQ_MS_FIFO_SIZES);
881 	if (((rdev->family) == CHIP_RV610) ||
882 	    ((rdev->family) == CHIP_RV620) ||
883 	    ((rdev->family) == CHIP_RS780) ||
884 	    ((rdev->family) == CHIP_RS880)) {
885 		tmp = (CACHE_FIFO_SIZE(0xa) |
886 		       FETCH_FIFO_HIWATER(0xa) |
887 		       DONE_FIFO_HIWATER(0xe0) |
888 		       ALU_UPDATE_FIFO_HIWATER(0x8));
889 	} else if (((rdev->family) == CHIP_R600) ||
890 		   ((rdev->family) == CHIP_RV630)) {
891 		tmp &= ~DONE_FIFO_HIWATER(0xff);
892 		tmp |= DONE_FIFO_HIWATER(0x4);
893 	}
894 	WREG32(SQ_MS_FIFO_SIZES, tmp);
895 
896 	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
897 	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
898 	 */
899 	sq_config = RREG32(SQ_CONFIG);
900 	sq_config &= ~(PS_PRIO(3) |
901 		       VS_PRIO(3) |
902 		       GS_PRIO(3) |
903 		       ES_PRIO(3));
904 	sq_config |= (DX9_CONSTS |
905 		      VC_ENABLE |
906 		      PS_PRIO(0) |
907 		      VS_PRIO(1) |
908 		      GS_PRIO(2) |
909 		      ES_PRIO(3));
910 
911 	if ((rdev->family) == CHIP_R600) {
912 		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) |
913 					  NUM_VS_GPRS(124) |
914 					  NUM_CLAUSE_TEMP_GPRS(4));
915 		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) |
916 					  NUM_ES_GPRS(0));
917 		sq_thread_resource_mgmt = (NUM_PS_THREADS(136) |
918 					   NUM_VS_THREADS(48) |
919 					   NUM_GS_THREADS(4) |
920 					   NUM_ES_THREADS(4));
921 		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) |
922 					    NUM_VS_STACK_ENTRIES(128));
923 		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) |
924 					    NUM_ES_STACK_ENTRIES(0));
925 	} else if (((rdev->family) == CHIP_RV610) ||
926 		   ((rdev->family) == CHIP_RV620) ||
927 		   ((rdev->family) == CHIP_RS780) ||
928 		   ((rdev->family) == CHIP_RS880)) {
929 		/* no vertex cache */
930 		sq_config &= ~VC_ENABLE;
931 
932 		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
933 					  NUM_VS_GPRS(44) |
934 					  NUM_CLAUSE_TEMP_GPRS(2));
935 		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
936 					  NUM_ES_GPRS(17));
937 		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
938 					   NUM_VS_THREADS(78) |
939 					   NUM_GS_THREADS(4) |
940 					   NUM_ES_THREADS(31));
941 		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
942 					    NUM_VS_STACK_ENTRIES(40));
943 		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
944 					    NUM_ES_STACK_ENTRIES(16));
945 	} else if (((rdev->family) == CHIP_RV630) ||
946 		   ((rdev->family) == CHIP_RV635)) {
947 		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
948 					  NUM_VS_GPRS(44) |
949 					  NUM_CLAUSE_TEMP_GPRS(2));
950 		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) |
951 					  NUM_ES_GPRS(18));
952 		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
953 					   NUM_VS_THREADS(78) |
954 					   NUM_GS_THREADS(4) |
955 					   NUM_ES_THREADS(31));
956 		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
957 					    NUM_VS_STACK_ENTRIES(40));
958 		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
959 					    NUM_ES_STACK_ENTRIES(16));
960 	} else if ((rdev->family) == CHIP_RV670) {
961 		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
962 					  NUM_VS_GPRS(44) |
963 					  NUM_CLAUSE_TEMP_GPRS(2));
964 		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
965 					  NUM_ES_GPRS(17));
966 		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
967 					   NUM_VS_THREADS(78) |
968 					   NUM_GS_THREADS(4) |
969 					   NUM_ES_THREADS(31));
970 		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) |
971 					    NUM_VS_STACK_ENTRIES(64));
972 		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) |
973 					    NUM_ES_STACK_ENTRIES(64));
974 	}
975 
976 	WREG32(SQ_CONFIG, sq_config);
977 	WREG32(SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
978 	WREG32(SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
979 	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
980 	WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
981 	WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
982 
983 	if (((rdev->family) == CHIP_RV610) ||
984 	    ((rdev->family) == CHIP_RV620) ||
985 	    ((rdev->family) == CHIP_RS780) ||
986 	    ((rdev->family) == CHIP_RS880)) {
987 		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY));
988 	} else {
989 		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC));
990 	}
991 
992 	/* More default values. 2D/3D driver should adjust as needed */
993 	WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) |
994 					 S1_X(0x4) | S1_Y(0xc)));
995 	WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) |
996 					 S1_X(0x2) | S1_Y(0x2) |
997 					 S2_X(0xa) | S2_Y(0x6) |
998 					 S3_X(0x6) | S3_Y(0xa)));
999 	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) |
1000 					     S1_X(0x4) | S1_Y(0xc) |
1001 					     S2_X(0x1) | S2_Y(0x6) |
1002 					     S3_X(0xa) | S3_Y(0xe)));
1003 	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) |
1004 					     S5_X(0x0) | S5_Y(0x0) |
1005 					     S6_X(0xb) | S6_Y(0x4) |
1006 					     S7_X(0x7) | S7_Y(0x8)));
1007 
1008 	WREG32(VGT_STRMOUT_EN, 0);
1009 	tmp = rdev->config.r600.max_pipes * 16;
1010 	switch (rdev->family) {
1011 	case CHIP_RV610:
1012 	case CHIP_RV620:
1013 	case CHIP_RS780:
1014 	case CHIP_RS880:
1015 		tmp += 32;
1016 		break;
1017 	case CHIP_RV670:
1018 		tmp += 128;
1019 		break;
1020 	default:
1021 		break;
1022 	}
1023 	if (tmp > 256) {
1024 		tmp = 256;
1025 	}
1026 	WREG32(VGT_ES_PER_GS, 128);
1027 	WREG32(VGT_GS_PER_ES, tmp);
1028 	WREG32(VGT_GS_PER_VS, 2);
1029 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1030 
1031 	/* more default values. 2D/3D driver should adjust as needed */
1032 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1033 	WREG32(VGT_STRMOUT_EN, 0);
1034 	WREG32(SX_MISC, 0);
1035 	WREG32(PA_SC_MODE_CNTL, 0);
1036 	WREG32(PA_SC_AA_CONFIG, 0);
1037 	WREG32(PA_SC_LINE_STIPPLE, 0);
1038 	WREG32(SPI_INPUT_Z, 0);
1039 	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
1040 	WREG32(CB_COLOR7_FRAG, 0);
1041 
1042 	/* Clear render buffer base addresses */
1043 	WREG32(CB_COLOR0_BASE, 0);
1044 	WREG32(CB_COLOR1_BASE, 0);
1045 	WREG32(CB_COLOR2_BASE, 0);
1046 	WREG32(CB_COLOR3_BASE, 0);
1047 	WREG32(CB_COLOR4_BASE, 0);
1048 	WREG32(CB_COLOR5_BASE, 0);
1049 	WREG32(CB_COLOR6_BASE, 0);
1050 	WREG32(CB_COLOR7_BASE, 0);
1051 	WREG32(CB_COLOR7_FRAG, 0);
1052 
1053 	switch (rdev->family) {
1054 	case CHIP_RV610:
1055 	case CHIP_RV620:
1056 	case CHIP_RS780:
1057 	case CHIP_RS880:
1058 		tmp = TC_L2_SIZE(8);
1059 		break;
1060 	case CHIP_RV630:
1061 	case CHIP_RV635:
1062 		tmp = TC_L2_SIZE(4);
1063 		break;
1064 	case CHIP_R600:
1065 		tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT;
1066 		break;
1067 	default:
1068 		tmp = TC_L2_SIZE(0);
1069 		break;
1070 	}
1071 	WREG32(TC_CNTL, tmp);
1072 
1073 	tmp = RREG32(HDP_HOST_PATH_CNTL);
1074 	WREG32(HDP_HOST_PATH_CNTL, tmp);
1075 
1076 	tmp = RREG32(ARB_POP);
1077 	tmp |= ENABLE_TC128;
1078 	WREG32(ARB_POP, tmp);
1079 
1080 	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
1081 	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
1082 			       NUM_CLIP_SEQ(3)));
1083 	WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
1084 }
1085 
1086 
1087 /*
1088  * Indirect registers accessor
1089  */
1090 u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg)
1091 {
1092 	u32 r;
1093 
1094 	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
1095 	(void)RREG32(PCIE_PORT_INDEX);
1096 	r = RREG32(PCIE_PORT_DATA);
1097 	return r;
1098 }
1099 
1100 void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
1101 {
1102 	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
1103 	(void)RREG32(PCIE_PORT_INDEX);
1104 	WREG32(PCIE_PORT_DATA, (v));
1105 	(void)RREG32(PCIE_PORT_DATA);
1106 }
1107 
1108 void r600_hdp_flush(struct radeon_device *rdev)
1109 {
1110 	WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1111 }
1112 
1113 /*
1114  * CP & Ring
1115  */
1116 void r600_cp_stop(struct radeon_device *rdev)
1117 {
1118 	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1119 }
1120 
1121 int r600_init_microcode(struct radeon_device *rdev)
1122 {
1123 	struct platform_device *pdev;
1124 	const char *chip_name;
1125 	const char *rlc_chip_name;
1126 	size_t pfp_req_size, me_req_size, rlc_req_size;
1127 	char fw_name[30];
1128 	int err;
1129 
1130 	DRM_DEBUG("\n");
1131 
1132 	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1133 	err = IS_ERR(pdev);
1134 	if (err) {
1135 		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1136 		return -EINVAL;
1137 	}
1138 
1139 	switch (rdev->family) {
1140 	case CHIP_R600:
1141 		chip_name = "R600";
1142 		rlc_chip_name = "R600";
1143 		break;
1144 	case CHIP_RV610:
1145 		chip_name = "RV610";
1146 		rlc_chip_name = "R600";
1147 		break;
1148 	case CHIP_RV630:
1149 		chip_name = "RV630";
1150 		rlc_chip_name = "R600";
1151 		break;
1152 	case CHIP_RV620:
1153 		chip_name = "RV620";
1154 		rlc_chip_name = "R600";
1155 		break;
1156 	case CHIP_RV635:
1157 		chip_name = "RV635";
1158 		rlc_chip_name = "R600";
1159 		break;
1160 	case CHIP_RV670:
1161 		chip_name = "RV670";
1162 		rlc_chip_name = "R600";
1163 		break;
1164 	case CHIP_RS780:
1165 	case CHIP_RS880:
1166 		chip_name = "RS780";
1167 		rlc_chip_name = "R600";
1168 		break;
1169 	case CHIP_RV770:
1170 		chip_name = "RV770";
1171 		rlc_chip_name = "R700";
1172 		break;
1173 	case CHIP_RV730:
1174 	case CHIP_RV740:
1175 		chip_name = "RV730";
1176 		rlc_chip_name = "R700";
1177 		break;
1178 	case CHIP_RV710:
1179 		chip_name = "RV710";
1180 		rlc_chip_name = "R700";
1181 		break;
1182 	default: BUG();
1183 	}
1184 
1185 	if (rdev->family >= CHIP_RV770) {
1186 		pfp_req_size = R700_PFP_UCODE_SIZE * 4;
1187 		me_req_size = R700_PM4_UCODE_SIZE * 4;
1188 		rlc_req_size = R700_RLC_UCODE_SIZE * 4;
1189 	} else {
1190 		pfp_req_size = PFP_UCODE_SIZE * 4;
1191 		me_req_size = PM4_UCODE_SIZE * 12;
1192 		rlc_req_size = RLC_UCODE_SIZE * 4;
1193 	}
1194 
1195 	DRM_INFO("Loading %s Microcode\n", chip_name);
1196 
1197 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1198 	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1199 	if (err)
1200 		goto out;
1201 	if (rdev->pfp_fw->size != pfp_req_size) {
1202 		printk(KERN_ERR
1203 		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
1204 		       rdev->pfp_fw->size, fw_name);
1205 		err = -EINVAL;
1206 		goto out;
1207 	}
1208 
1209 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1210 	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1211 	if (err)
1212 		goto out;
1213 	if (rdev->me_fw->size != me_req_size) {
1214 		printk(KERN_ERR
1215 		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
1216 		       rdev->me_fw->size, fw_name);
1217 		err = -EINVAL;
1218 	}
1219 
1220 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1221 	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1222 	if (err)
1223 		goto out;
1224 	if (rdev->rlc_fw->size != rlc_req_size) {
1225 		printk(KERN_ERR
1226 		       "r600_rlc: Bogus length %zu in firmware \"%s\"\n",
1227 		       rdev->rlc_fw->size, fw_name);
1228 		err = -EINVAL;
1229 	}
1230 
1231 out:
1232 	platform_device_unregister(pdev);
1233 
1234 	if (err) {
1235 		if (err != -EINVAL)
1236 			printk(KERN_ERR
1237 			       "r600_cp: Failed to load firmware \"%s\"\n",
1238 			       fw_name);
1239 		release_firmware(rdev->pfp_fw);
1240 		rdev->pfp_fw = NULL;
1241 		release_firmware(rdev->me_fw);
1242 		rdev->me_fw = NULL;
1243 		release_firmware(rdev->rlc_fw);
1244 		rdev->rlc_fw = NULL;
1245 	}
1246 	return err;
1247 }
1248 
1249 static int r600_cp_load_microcode(struct radeon_device *rdev)
1250 {
1251 	const __be32 *fw_data;
1252 	int i;
1253 
1254 	if (!rdev->me_fw || !rdev->pfp_fw)
1255 		return -EINVAL;
1256 
1257 	r600_cp_stop(rdev);
1258 
1259 	WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
1260 
1261 	/* Reset cp */
1262 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
1263 	RREG32(GRBM_SOFT_RESET);
1264 	mdelay(15);
1265 	WREG32(GRBM_SOFT_RESET, 0);
1266 
1267 	WREG32(CP_ME_RAM_WADDR, 0);
1268 
1269 	fw_data = (const __be32 *)rdev->me_fw->data;
1270 	WREG32(CP_ME_RAM_WADDR, 0);
1271 	for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
1272 		WREG32(CP_ME_RAM_DATA,
1273 		       be32_to_cpup(fw_data++));
1274 
1275 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1276 	WREG32(CP_PFP_UCODE_ADDR, 0);
1277 	for (i = 0; i < PFP_UCODE_SIZE; i++)
1278 		WREG32(CP_PFP_UCODE_DATA,
1279 		       be32_to_cpup(fw_data++));
1280 
1281 	WREG32(CP_PFP_UCODE_ADDR, 0);
1282 	WREG32(CP_ME_RAM_WADDR, 0);
1283 	WREG32(CP_ME_RAM_RADDR, 0);
1284 	return 0;
1285 }
1286 
1287 int r600_cp_start(struct radeon_device *rdev)
1288 {
1289 	int r;
1290 	uint32_t cp_me;
1291 
1292 	r = radeon_ring_lock(rdev, 7);
1293 	if (r) {
1294 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1295 		return r;
1296 	}
1297 	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
1298 	radeon_ring_write(rdev, 0x1);
1299 	if (rdev->family < CHIP_RV770) {
1300 		radeon_ring_write(rdev, 0x3);
1301 		radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
1302 	} else {
1303 		radeon_ring_write(rdev, 0x0);
1304 		radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
1305 	}
1306 	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1307 	radeon_ring_write(rdev, 0);
1308 	radeon_ring_write(rdev, 0);
1309 	radeon_ring_unlock_commit(rdev);
1310 
1311 	cp_me = 0xff;
1312 	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
1313 	return 0;
1314 }
1315 
1316 int r600_cp_resume(struct radeon_device *rdev)
1317 {
1318 	u32 tmp;
1319 	u32 rb_bufsz;
1320 	int r;
1321 
1322 	/* Reset cp */
1323 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
1324 	RREG32(GRBM_SOFT_RESET);
1325 	mdelay(15);
1326 	WREG32(GRBM_SOFT_RESET, 0);
1327 
1328 	/* Set ring buffer size */
1329 	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
1330 	tmp = RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1331 #ifdef __BIG_ENDIAN
1332 	tmp |= BUF_SWAP_32BIT;
1333 #endif
1334 	WREG32(CP_RB_CNTL, tmp);
1335 	WREG32(CP_SEM_WAIT_TIMER, 0x4);
1336 
1337 	/* Set the write pointer delay */
1338 	WREG32(CP_RB_WPTR_DELAY, 0);
1339 
1340 	/* Initialize the ring buffer's read and write pointers */
1341 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
1342 	WREG32(CP_RB_RPTR_WR, 0);
1343 	WREG32(CP_RB_WPTR, 0);
1344 	WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF);
1345 	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr));
1346 	mdelay(1);
1347 	WREG32(CP_RB_CNTL, tmp);
1348 
1349 	WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
1350 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
1351 
1352 	rdev->cp.rptr = RREG32(CP_RB_RPTR);
1353 	rdev->cp.wptr = RREG32(CP_RB_WPTR);
1354 
1355 	r600_cp_start(rdev);
1356 	rdev->cp.ready = true;
1357 	r = radeon_ring_test(rdev);
1358 	if (r) {
1359 		rdev->cp.ready = false;
1360 		return r;
1361 	}
1362 	return 0;
1363 }
1364 
1365 void r600_cp_commit(struct radeon_device *rdev)
1366 {
1367 	WREG32(CP_RB_WPTR, rdev->cp.wptr);
1368 	(void)RREG32(CP_RB_WPTR);
1369 }
1370 
1371 void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
1372 {
1373 	u32 rb_bufsz;
1374 
1375 	/* Align ring size */
1376 	rb_bufsz = drm_order(ring_size / 8);
1377 	ring_size = (1 << (rb_bufsz + 1)) * 4;
1378 	rdev->cp.ring_size = ring_size;
1379 	rdev->cp.align_mask = 16 - 1;
1380 }
1381 
1382 
1383 /*
1384  * GPU scratch registers helpers function.
1385  */
1386 void r600_scratch_init(struct radeon_device *rdev)
1387 {
1388 	int i;
1389 
1390 	rdev->scratch.num_reg = 7;
1391 	for (i = 0; i < rdev->scratch.num_reg; i++) {
1392 		rdev->scratch.free[i] = true;
1393 		rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4);
1394 	}
1395 }
1396 
1397 int r600_ring_test(struct radeon_device *rdev)
1398 {
1399 	uint32_t scratch;
1400 	uint32_t tmp = 0;
1401 	unsigned i;
1402 	int r;
1403 
1404 	r = radeon_scratch_get(rdev, &scratch);
1405 	if (r) {
1406 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1407 		return r;
1408 	}
1409 	WREG32(scratch, 0xCAFEDEAD);
1410 	r = radeon_ring_lock(rdev, 3);
1411 	if (r) {
1412 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1413 		radeon_scratch_free(rdev, scratch);
1414 		return r;
1415 	}
1416 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1417 	radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
1418 	radeon_ring_write(rdev, 0xDEADBEEF);
1419 	radeon_ring_unlock_commit(rdev);
1420 	for (i = 0; i < rdev->usec_timeout; i++) {
1421 		tmp = RREG32(scratch);
1422 		if (tmp == 0xDEADBEEF)
1423 			break;
1424 		DRM_UDELAY(1);
1425 	}
1426 	if (i < rdev->usec_timeout) {
1427 		DRM_INFO("ring test succeeded in %d usecs\n", i);
1428 	} else {
1429 		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
1430 			  scratch, tmp);
1431 		r = -EINVAL;
1432 	}
1433 	radeon_scratch_free(rdev, scratch);
1434 	return r;
1435 }
1436 
1437 void r600_wb_disable(struct radeon_device *rdev)
1438 {
1439 	WREG32(SCRATCH_UMSK, 0);
1440 	if (rdev->wb.wb_obj) {
1441 		radeon_object_kunmap(rdev->wb.wb_obj);
1442 		radeon_object_unpin(rdev->wb.wb_obj);
1443 	}
1444 }
1445 
1446 void r600_wb_fini(struct radeon_device *rdev)
1447 {
1448 	r600_wb_disable(rdev);
1449 	if (rdev->wb.wb_obj) {
1450 		radeon_object_unref(&rdev->wb.wb_obj);
1451 		rdev->wb.wb = NULL;
1452 		rdev->wb.wb_obj = NULL;
1453 	}
1454 }
1455 
1456 int r600_wb_enable(struct radeon_device *rdev)
1457 {
1458 	int r;
1459 
1460 	if (rdev->wb.wb_obj == NULL) {
1461 		r = radeon_object_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true,
1462 				RADEON_GEM_DOMAIN_GTT, false, &rdev->wb.wb_obj);
1463 		if (r) {
1464 			dev_warn(rdev->dev, "failed to create WB buffer (%d).\n", r);
1465 			return r;
1466 		}
1467 		r = radeon_object_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT,
1468 				&rdev->wb.gpu_addr);
1469 		if (r) {
1470 			dev_warn(rdev->dev, "failed to pin WB buffer (%d).\n", r);
1471 			r600_wb_fini(rdev);
1472 			return r;
1473 		}
1474 		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
1475 		if (r) {
1476 			dev_warn(rdev->dev, "failed to map WB buffer (%d).\n", r);
1477 			r600_wb_fini(rdev);
1478 			return r;
1479 		}
1480 	}
1481 	WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF);
1482 	WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC);
1483 	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF);
1484 	WREG32(SCRATCH_UMSK, 0xff);
1485 	return 0;
1486 }
1487 
1488 void r600_fence_ring_emit(struct radeon_device *rdev,
1489 			  struct radeon_fence *fence)
1490 {
1491 	/* Also consider EVENT_WRITE_EOP.  it handles the interrupts + timestamps + events */
1492 	/* Emit fence sequence & fire IRQ */
1493 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1494 	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
1495 	radeon_ring_write(rdev, fence->seq);
1496 	/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
1497 	radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
1498 	radeon_ring_write(rdev, RB_INT_STAT);
1499 }
1500 
1501 int r600_copy_dma(struct radeon_device *rdev,
1502 		  uint64_t src_offset,
1503 		  uint64_t dst_offset,
1504 		  unsigned num_pages,
1505 		  struct radeon_fence *fence)
1506 {
1507 	/* FIXME: implement */
1508 	return 0;
1509 }
1510 
1511 int r600_copy_blit(struct radeon_device *rdev,
1512 		   uint64_t src_offset, uint64_t dst_offset,
1513 		   unsigned num_pages, struct radeon_fence *fence)
1514 {
1515 	r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
1516 	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
1517 	r600_blit_done_copy(rdev, fence);
1518 	return 0;
1519 }
1520 
1521 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
1522 			 uint32_t tiling_flags, uint32_t pitch,
1523 			 uint32_t offset, uint32_t obj_size)
1524 {
1525 	/* FIXME: implement */
1526 	return 0;
1527 }
1528 
1529 void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
1530 {
1531 	/* FIXME: implement */
1532 }
1533 
1534 
1535 bool r600_card_posted(struct radeon_device *rdev)
1536 {
1537 	uint32_t reg;
1538 
1539 	/* first check CRTCs */
1540 	reg = RREG32(D1CRTC_CONTROL) |
1541 		RREG32(D2CRTC_CONTROL);
1542 	if (reg & CRTC_EN)
1543 		return true;
1544 
1545 	/* then check MEM_SIZE, in case the crtcs are off */
1546 	if (RREG32(CONFIG_MEMSIZE))
1547 		return true;
1548 
1549 	return false;
1550 }
1551 
1552 int r600_startup(struct radeon_device *rdev)
1553 {
1554 	int r;
1555 
1556 	r600_mc_program(rdev);
1557 	if (rdev->flags & RADEON_IS_AGP) {
1558 		r600_agp_enable(rdev);
1559 	} else {
1560 		r = r600_pcie_gart_enable(rdev);
1561 		if (r)
1562 			return r;
1563 	}
1564 	r600_gpu_init(rdev);
1565 
1566 	r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
1567 			      &rdev->r600_blit.shader_gpu_addr);
1568 	if (r) {
1569 		DRM_ERROR("failed to pin blit object %d\n", r);
1570 		return r;
1571 	}
1572 
1573 	/* Enable IRQ */
1574 	r = r600_irq_init(rdev);
1575 	if (r) {
1576 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1577 		radeon_irq_kms_fini(rdev);
1578 		return r;
1579 	}
1580 	r600_irq_set(rdev);
1581 
1582 	r = radeon_ring_init(rdev, rdev->cp.ring_size);
1583 	if (r)
1584 		return r;
1585 	r = r600_cp_load_microcode(rdev);
1586 	if (r)
1587 		return r;
1588 	r = r600_cp_resume(rdev);
1589 	if (r)
1590 		return r;
1591 	/* write back buffer are not vital so don't worry about failure */
1592 	r600_wb_enable(rdev);
1593 	return 0;
1594 }
1595 
1596 void r600_vga_set_state(struct radeon_device *rdev, bool state)
1597 {
1598 	uint32_t temp;
1599 
1600 	temp = RREG32(CONFIG_CNTL);
1601 	if (state == false) {
1602 		temp &= ~(1<<0);
1603 		temp |= (1<<1);
1604 	} else {
1605 		temp &= ~(1<<1);
1606 	}
1607 	WREG32(CONFIG_CNTL, temp);
1608 }
1609 
1610 int r600_resume(struct radeon_device *rdev)
1611 {
1612 	int r;
1613 
1614 	/* Do not reset GPU before posting, on r600 hw unlike on r500 hw,
1615 	 * posting will perform necessary task to bring back GPU into good
1616 	 * shape.
1617 	 */
1618 	/* post card */
1619 	atom_asic_init(rdev->mode_info.atom_context);
1620 	/* Initialize clocks */
1621 	r = radeon_clocks_init(rdev);
1622 	if (r) {
1623 		return r;
1624 	}
1625 
1626 	r = r600_startup(rdev);
1627 	if (r) {
1628 		DRM_ERROR("r600 startup failed on resume\n");
1629 		return r;
1630 	}
1631 
1632 	r = r600_ib_test(rdev);
1633 	if (r) {
1634 		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
1635 		return r;
1636 	}
1637 	return r;
1638 }
1639 
1640 int r600_suspend(struct radeon_device *rdev)
1641 {
1642 	/* FIXME: we should wait for ring to be empty */
1643 	r600_cp_stop(rdev);
1644 	rdev->cp.ready = false;
1645 	r600_wb_disable(rdev);
1646 	r600_pcie_gart_disable(rdev);
1647 	/* unpin shaders bo */
1648 	radeon_object_unpin(rdev->r600_blit.shader_obj);
1649 	return 0;
1650 }
1651 
1652 /* Plan is to move initialization in that function and use
1653  * helper function so that radeon_device_init pretty much
1654  * do nothing more than calling asic specific function. This
1655  * should also allow to remove a bunch of callback function
1656  * like vram_info.
1657  */
1658 int r600_init(struct radeon_device *rdev)
1659 {
1660 	int r;
1661 
1662 	r = radeon_dummy_page_init(rdev);
1663 	if (r)
1664 		return r;
1665 	if (r600_debugfs_mc_info_init(rdev)) {
1666 		DRM_ERROR("Failed to register debugfs file for mc !\n");
1667 	}
1668 	/* This don't do much */
1669 	r = radeon_gem_init(rdev);
1670 	if (r)
1671 		return r;
1672 	/* Read BIOS */
1673 	if (!radeon_get_bios(rdev)) {
1674 		if (ASIC_IS_AVIVO(rdev))
1675 			return -EINVAL;
1676 	}
1677 	/* Must be an ATOMBIOS */
1678 	if (!rdev->is_atom_bios) {
1679 		dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
1680 		return -EINVAL;
1681 	}
1682 	r = radeon_atombios_init(rdev);
1683 	if (r)
1684 		return r;
1685 	/* Post card if necessary */
1686 	if (!r600_card_posted(rdev)) {
1687 		if (!rdev->bios) {
1688 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1689 			return -EINVAL;
1690 		}
1691 		DRM_INFO("GPU not posted. posting now...\n");
1692 		atom_asic_init(rdev->mode_info.atom_context);
1693 	}
1694 	/* Initialize scratch registers */
1695 	r600_scratch_init(rdev);
1696 	/* Initialize surface registers */
1697 	radeon_surface_init(rdev);
1698 	/* Initialize clocks */
1699 	radeon_get_clock_info(rdev->ddev);
1700 	r = radeon_clocks_init(rdev);
1701 	if (r)
1702 		return r;
1703 	/* Initialize power management */
1704 	radeon_pm_init(rdev);
1705 	/* Fence driver */
1706 	r = radeon_fence_driver_init(rdev);
1707 	if (r)
1708 		return r;
1709 	r = r600_mc_init(rdev);
1710 	if (r)
1711 		return r;
1712 	/* Memory manager */
1713 	r = radeon_object_init(rdev);
1714 	if (r)
1715 		return r;
1716 
1717 	r = radeon_irq_kms_init(rdev);
1718 	if (r)
1719 		return r;
1720 
1721 	rdev->cp.ring_obj = NULL;
1722 	r600_ring_init(rdev, 1024 * 1024);
1723 
1724 	rdev->ih.ring_obj = NULL;
1725 	r600_ih_ring_init(rdev, 64 * 1024);
1726 
1727 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1728 		r = r600_init_microcode(rdev);
1729 		if (r) {
1730 			DRM_ERROR("Failed to load firmware!\n");
1731 			return r;
1732 		}
1733 	}
1734 
1735 	r = r600_pcie_gart_init(rdev);
1736 	if (r)
1737 		return r;
1738 
1739 	rdev->accel_working = true;
1740 	r = r600_blit_init(rdev);
1741 	if (r) {
1742 		DRM_ERROR("radeon: failled blitter (%d).\n", r);
1743 		return r;
1744 	}
1745 
1746 	r = r600_startup(rdev);
1747 	if (r) {
1748 		r600_suspend(rdev);
1749 		r600_wb_fini(rdev);
1750 		radeon_ring_fini(rdev);
1751 		r600_pcie_gart_fini(rdev);
1752 		rdev->accel_working = false;
1753 	}
1754 	if (rdev->accel_working) {
1755 		r = radeon_ib_pool_init(rdev);
1756 		if (r) {
1757 			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
1758 			rdev->accel_working = false;
1759 		}
1760 		r = r600_ib_test(rdev);
1761 		if (r) {
1762 			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
1763 			rdev->accel_working = false;
1764 		}
1765 	}
1766 	return 0;
1767 }
1768 
1769 void r600_fini(struct radeon_device *rdev)
1770 {
1771 	/* Suspend operations */
1772 	r600_suspend(rdev);
1773 
1774 	r600_blit_fini(rdev);
1775 	r600_irq_fini(rdev);
1776 	radeon_irq_kms_fini(rdev);
1777 	radeon_ring_fini(rdev);
1778 	r600_wb_fini(rdev);
1779 	r600_pcie_gart_fini(rdev);
1780 	radeon_gem_fini(rdev);
1781 	radeon_fence_driver_fini(rdev);
1782 	radeon_clocks_fini(rdev);
1783 	if (rdev->flags & RADEON_IS_AGP)
1784 		radeon_agp_fini(rdev);
1785 	radeon_object_fini(rdev);
1786 	radeon_atombios_fini(rdev);
1787 	kfree(rdev->bios);
1788 	rdev->bios = NULL;
1789 	radeon_dummy_page_fini(rdev);
1790 }
1791 
1792 
1793 /*
1794  * CS stuff
1795  */
1796 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1797 {
1798 	/* FIXME: implement */
1799 	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1800 	radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
1801 	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
1802 	radeon_ring_write(rdev, ib->length_dw);
1803 }
1804 
1805 int r600_ib_test(struct radeon_device *rdev)
1806 {
1807 	struct radeon_ib *ib;
1808 	uint32_t scratch;
1809 	uint32_t tmp = 0;
1810 	unsigned i;
1811 	int r;
1812 
1813 	r = radeon_scratch_get(rdev, &scratch);
1814 	if (r) {
1815 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1816 		return r;
1817 	}
1818 	WREG32(scratch, 0xCAFEDEAD);
1819 	r = radeon_ib_get(rdev, &ib);
1820 	if (r) {
1821 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1822 		return r;
1823 	}
1824 	ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
1825 	ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
1826 	ib->ptr[2] = 0xDEADBEEF;
1827 	ib->ptr[3] = PACKET2(0);
1828 	ib->ptr[4] = PACKET2(0);
1829 	ib->ptr[5] = PACKET2(0);
1830 	ib->ptr[6] = PACKET2(0);
1831 	ib->ptr[7] = PACKET2(0);
1832 	ib->ptr[8] = PACKET2(0);
1833 	ib->ptr[9] = PACKET2(0);
1834 	ib->ptr[10] = PACKET2(0);
1835 	ib->ptr[11] = PACKET2(0);
1836 	ib->ptr[12] = PACKET2(0);
1837 	ib->ptr[13] = PACKET2(0);
1838 	ib->ptr[14] = PACKET2(0);
1839 	ib->ptr[15] = PACKET2(0);
1840 	ib->length_dw = 16;
1841 	r = radeon_ib_schedule(rdev, ib);
1842 	if (r) {
1843 		radeon_scratch_free(rdev, scratch);
1844 		radeon_ib_free(rdev, &ib);
1845 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1846 		return r;
1847 	}
1848 	r = radeon_fence_wait(ib->fence, false);
1849 	if (r) {
1850 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1851 		return r;
1852 	}
1853 	for (i = 0; i < rdev->usec_timeout; i++) {
1854 		tmp = RREG32(scratch);
1855 		if (tmp == 0xDEADBEEF)
1856 			break;
1857 		DRM_UDELAY(1);
1858 	}
1859 	if (i < rdev->usec_timeout) {
1860 		DRM_INFO("ib test succeeded in %u usecs\n", i);
1861 	} else {
1862 		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
1863 			  scratch, tmp);
1864 		r = -EINVAL;
1865 	}
1866 	radeon_scratch_free(rdev, scratch);
1867 	radeon_ib_free(rdev, &ib);
1868 	return r;
1869 }
1870 
1871 /*
1872  * Interrupts
1873  *
1874  * Interrupts use a ring buffer on r6xx/r7xx hardware.  It works pretty
1875  * the same as the CP ring buffer, but in reverse.  Rather than the CPU
1876  * writing to the ring and the GPU consuming, the GPU writes to the ring
1877  * and host consumes.  As the host irq handler processes interrupts, it
1878  * increments the rptr.  When the rptr catches up with the wptr, all the
1879  * current interrupts have been processed.
1880  */
1881 
1882 void r600_ih_ring_init(struct radeon_device *rdev, unsigned ring_size)
1883 {
1884 	u32 rb_bufsz;
1885 
1886 	/* Align ring size */
1887 	rb_bufsz = drm_order(ring_size / 4);
1888 	ring_size = (1 << rb_bufsz) * 4;
1889 	rdev->ih.ring_size = ring_size;
1890 	rdev->ih.align_mask = 4 - 1;
1891 }
1892 
1893 static int r600_ih_ring_alloc(struct radeon_device *rdev, unsigned ring_size)
1894 {
1895 	int r;
1896 
1897 	rdev->ih.ring_size = ring_size;
1898 	/* Allocate ring buffer */
1899 	if (rdev->ih.ring_obj == NULL) {
1900 		r = radeon_object_create(rdev, NULL, rdev->ih.ring_size,
1901 					 true,
1902 					 RADEON_GEM_DOMAIN_GTT,
1903 					 false,
1904 					 &rdev->ih.ring_obj);
1905 		if (r) {
1906 			DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r);
1907 			return r;
1908 		}
1909 		r = radeon_object_pin(rdev->ih.ring_obj,
1910 				      RADEON_GEM_DOMAIN_GTT,
1911 				      &rdev->ih.gpu_addr);
1912 		if (r) {
1913 			DRM_ERROR("radeon: failed to pin ih ring buffer (%d).\n", r);
1914 			return r;
1915 		}
1916 		r = radeon_object_kmap(rdev->ih.ring_obj,
1917 				       (void **)&rdev->ih.ring);
1918 		if (r) {
1919 			DRM_ERROR("radeon: failed to map ih ring buffer (%d).\n", r);
1920 			return r;
1921 		}
1922 	}
1923 	rdev->ih.ptr_mask = (rdev->cp.ring_size / 4) - 1;
1924 	rdev->ih.rptr = 0;
1925 
1926 	return 0;
1927 }
1928 
1929 static void r600_ih_ring_fini(struct radeon_device *rdev)
1930 {
1931 	if (rdev->ih.ring_obj) {
1932 		radeon_object_kunmap(rdev->ih.ring_obj);
1933 		radeon_object_unpin(rdev->ih.ring_obj);
1934 		radeon_object_unref(&rdev->ih.ring_obj);
1935 		rdev->ih.ring = NULL;
1936 		rdev->ih.ring_obj = NULL;
1937 	}
1938 }
1939 
1940 static void r600_rlc_stop(struct radeon_device *rdev)
1941 {
1942 
1943 	if (rdev->family >= CHIP_RV770) {
1944 		/* r7xx asics need to soft reset RLC before halting */
1945 		WREG32(SRBM_SOFT_RESET, SOFT_RESET_RLC);
1946 		RREG32(SRBM_SOFT_RESET);
1947 		udelay(15000);
1948 		WREG32(SRBM_SOFT_RESET, 0);
1949 		RREG32(SRBM_SOFT_RESET);
1950 	}
1951 
1952 	WREG32(RLC_CNTL, 0);
1953 }
1954 
1955 static void r600_rlc_start(struct radeon_device *rdev)
1956 {
1957 	WREG32(RLC_CNTL, RLC_ENABLE);
1958 }
1959 
1960 static int r600_rlc_init(struct radeon_device *rdev)
1961 {
1962 	u32 i;
1963 	const __be32 *fw_data;
1964 
1965 	if (!rdev->rlc_fw)
1966 		return -EINVAL;
1967 
1968 	r600_rlc_stop(rdev);
1969 
1970 	WREG32(RLC_HB_BASE, 0);
1971 	WREG32(RLC_HB_CNTL, 0);
1972 	WREG32(RLC_HB_RPTR, 0);
1973 	WREG32(RLC_HB_WPTR, 0);
1974 	WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
1975 	WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
1976 	WREG32(RLC_MC_CNTL, 0);
1977 	WREG32(RLC_UCODE_CNTL, 0);
1978 
1979 	fw_data = (const __be32 *)rdev->rlc_fw->data;
1980 	if (rdev->family >= CHIP_RV770) {
1981 		for (i = 0; i < R700_RLC_UCODE_SIZE; i++) {
1982 			WREG32(RLC_UCODE_ADDR, i);
1983 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
1984 		}
1985 	} else {
1986 		for (i = 0; i < RLC_UCODE_SIZE; i++) {
1987 			WREG32(RLC_UCODE_ADDR, i);
1988 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
1989 		}
1990 	}
1991 	WREG32(RLC_UCODE_ADDR, 0);
1992 
1993 	r600_rlc_start(rdev);
1994 
1995 	return 0;
1996 }
1997 
1998 static void r600_enable_interrupts(struct radeon_device *rdev)
1999 {
2000 	u32 ih_cntl = RREG32(IH_CNTL);
2001 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2002 
2003 	ih_cntl |= ENABLE_INTR;
2004 	ih_rb_cntl |= IH_RB_ENABLE;
2005 	WREG32(IH_CNTL, ih_cntl);
2006 	WREG32(IH_RB_CNTL, ih_rb_cntl);
2007 	rdev->ih.enabled = true;
2008 }
2009 
2010 static void r600_disable_interrupts(struct radeon_device *rdev)
2011 {
2012 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2013 	u32 ih_cntl = RREG32(IH_CNTL);
2014 
2015 	ih_rb_cntl &= ~IH_RB_ENABLE;
2016 	ih_cntl &= ~ENABLE_INTR;
2017 	WREG32(IH_RB_CNTL, ih_rb_cntl);
2018 	WREG32(IH_CNTL, ih_cntl);
2019 	/* set rptr, wptr to 0 */
2020 	WREG32(IH_RB_RPTR, 0);
2021 	WREG32(IH_RB_WPTR, 0);
2022 	rdev->ih.enabled = false;
2023 	rdev->ih.wptr = 0;
2024 	rdev->ih.rptr = 0;
2025 }
2026 
2027 int r600_irq_init(struct radeon_device *rdev)
2028 {
2029 	int ret = 0;
2030 	int rb_bufsz;
2031 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
2032 
2033 	/* allocate ring */
2034 	ret = r600_ih_ring_alloc(rdev, rdev->ih.ring_size);
2035 	if (ret)
2036 		return ret;
2037 
2038 	/* disable irqs */
2039 	r600_disable_interrupts(rdev);
2040 
2041 	/* init rlc */
2042 	ret = r600_rlc_init(rdev);
2043 	if (ret) {
2044 		r600_ih_ring_fini(rdev);
2045 		return ret;
2046 	}
2047 
2048 	/* setup interrupt control */
2049 	/* set dummy read address to ring address */
2050 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
2051 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
2052 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
2053 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
2054 	 */
2055 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
2056 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
2057 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
2058 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
2059 
2060 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
2061 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
2062 
2063 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
2064 		      IH_WPTR_OVERFLOW_CLEAR |
2065 		      (rb_bufsz << 1));
2066 	/* WPTR writeback, not yet */
2067 	/*ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;*/
2068 	WREG32(IH_RB_WPTR_ADDR_LO, 0);
2069 	WREG32(IH_RB_WPTR_ADDR_HI, 0);
2070 
2071 	WREG32(IH_RB_CNTL, ih_rb_cntl);
2072 
2073 	/* set rptr, wptr to 0 */
2074 	WREG32(IH_RB_RPTR, 0);
2075 	WREG32(IH_RB_WPTR, 0);
2076 
2077 	/* Default settings for IH_CNTL (disabled at first) */
2078 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10);
2079 	/* RPTR_REARM only works if msi's are enabled */
2080 	if (rdev->msi_enabled)
2081 		ih_cntl |= RPTR_REARM;
2082 
2083 #ifdef __BIG_ENDIAN
2084 	ih_cntl |= IH_MC_SWAP(IH_MC_SWAP_32BIT);
2085 #endif
2086 	WREG32(IH_CNTL, ih_cntl);
2087 
2088 	/* force the active interrupt state to all disabled */
2089 	WREG32(CP_INT_CNTL, 0);
2090 	WREG32(GRBM_INT_CNTL, 0);
2091 	WREG32(DxMODE_INT_MASK, 0);
2092 
2093 	/* enable irqs */
2094 	r600_enable_interrupts(rdev);
2095 
2096 	return ret;
2097 }
2098 
2099 void r600_irq_fini(struct radeon_device *rdev)
2100 {
2101 	r600_disable_interrupts(rdev);
2102 	r600_rlc_stop(rdev);
2103 	r600_ih_ring_fini(rdev);
2104 }
2105 
2106 int r600_irq_set(struct radeon_device *rdev)
2107 {
2108 	uint32_t cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
2109 	uint32_t mode_int = 0;
2110 
2111 	/* don't enable anything if the ih is disabled */
2112 	if (!rdev->ih.enabled)
2113 		return 0;
2114 
2115 	if (rdev->irq.sw_int) {
2116 		DRM_DEBUG("r600_irq_set: sw int\n");
2117 		cp_int_cntl |= RB_INT_ENABLE;
2118 	}
2119 	if (rdev->irq.crtc_vblank_int[0]) {
2120 		DRM_DEBUG("r600_irq_set: vblank 0\n");
2121 		mode_int |= D1MODE_VBLANK_INT_MASK;
2122 	}
2123 	if (rdev->irq.crtc_vblank_int[1]) {
2124 		DRM_DEBUG("r600_irq_set: vblank 1\n");
2125 		mode_int |= D2MODE_VBLANK_INT_MASK;
2126 	}
2127 
2128 	WREG32(CP_INT_CNTL, cp_int_cntl);
2129 	WREG32(DxMODE_INT_MASK, mode_int);
2130 
2131 	return 0;
2132 }
2133 
2134 static inline void r600_irq_ack(struct radeon_device *rdev, u32 disp_int)
2135 {
2136 
2137 	if (disp_int & LB_D1_VBLANK_INTERRUPT)
2138 		WREG32(D1MODE_VBLANK_STATUS, DxMODE_VBLANK_ACK);
2139 	if (disp_int & LB_D1_VLINE_INTERRUPT)
2140 		WREG32(D1MODE_VLINE_STATUS, DxMODE_VLINE_ACK);
2141 	if (disp_int & LB_D2_VBLANK_INTERRUPT)
2142 		WREG32(D2MODE_VBLANK_STATUS, DxMODE_VBLANK_ACK);
2143 	if (disp_int & LB_D2_VLINE_INTERRUPT)
2144 		WREG32(D2MODE_VLINE_STATUS, DxMODE_VLINE_ACK);
2145 
2146 }
2147 
2148 void r600_irq_disable(struct radeon_device *rdev)
2149 {
2150 	u32 disp_int;
2151 
2152 	r600_disable_interrupts(rdev);
2153 	/* Wait and acknowledge irq */
2154 	mdelay(1);
2155 	if (ASIC_IS_DCE3(rdev))
2156 		disp_int = RREG32(DCE3_DISP_INTERRUPT_STATUS);
2157 	else
2158 		disp_int = RREG32(DISP_INTERRUPT_STATUS);
2159 	r600_irq_ack(rdev, disp_int);
2160 }
2161 
2162 static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
2163 {
2164 	u32 wptr, tmp;
2165 
2166 	/* XXX use writeback */
2167 	wptr = RREG32(IH_RB_WPTR);
2168 
2169 	if (wptr & RB_OVERFLOW) {
2170 		WARN_ON(1);
2171 		/* XXX deal with overflow */
2172 		DRM_ERROR("IH RB overflow\n");
2173 		tmp = RREG32(IH_RB_CNTL);
2174 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
2175 		WREG32(IH_RB_CNTL, tmp);
2176 	}
2177 	wptr = wptr & WPTR_OFFSET_MASK;
2178 
2179 	return wptr;
2180 }
2181 
2182 /*        r600 IV Ring
2183  * Each IV ring entry is 128 bits:
2184  * [7:0]    - interrupt source id
2185  * [31:8]   - reserved
2186  * [59:32]  - interrupt source data
2187  * [127:60]  - reserved
2188  *
2189  * The basic interrupt vector entries
2190  * are decoded as follows:
2191  * src_id  src_data  description
2192  *      1         0  D1 Vblank
2193  *      1         1  D1 Vline
2194  *      5         0  D2 Vblank
2195  *      5         1  D2 Vline
2196  *     19         0  FP Hot plug detection A
2197  *     19         1  FP Hot plug detection B
2198  *     19         2  DAC A auto-detection
2199  *     19         3  DAC B auto-detection
2200  *    176         -  CP_INT RB
2201  *    177         -  CP_INT IB1
2202  *    178         -  CP_INT IB2
2203  *    181         -  EOP Interrupt
2204  *    233         -  GUI Idle
2205  *
2206  * Note, these are based on r600 and may need to be
2207  * adjusted or added to on newer asics
2208  */
2209 
2210 int r600_irq_process(struct radeon_device *rdev)
2211 {
2212 	u32 wptr = r600_get_ih_wptr(rdev);
2213 	u32 rptr = rdev->ih.rptr;
2214 	u32 src_id, src_data;
2215 	u32 last_entry = rdev->ih.ring_size - 16;
2216 	u32 ring_index, disp_int;
2217 	unsigned long flags;
2218 
2219 	DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
2220 
2221 	spin_lock_irqsave(&rdev->ih.lock, flags);
2222 
2223 	if (rptr == wptr) {
2224 		spin_unlock_irqrestore(&rdev->ih.lock, flags);
2225 		return IRQ_NONE;
2226 	}
2227 	if (rdev->shutdown) {
2228 		spin_unlock_irqrestore(&rdev->ih.lock, flags);
2229 		return IRQ_NONE;
2230 	}
2231 
2232 restart_ih:
2233 	/* display interrupts */
2234 	if (ASIC_IS_DCE3(rdev))
2235 		disp_int = RREG32(DCE3_DISP_INTERRUPT_STATUS);
2236 	else
2237 		disp_int = RREG32(DISP_INTERRUPT_STATUS);
2238 	r600_irq_ack(rdev, disp_int);
2239 
2240 	rdev->ih.wptr = wptr;
2241 	while (rptr != wptr) {
2242 		/* wptr/rptr are in bytes! */
2243 		ring_index = rptr / 4;
2244 		src_id =  rdev->ih.ring[ring_index] & 0xff;
2245 		src_data = rdev->ih.ring[ring_index + 1] & 0xfffffff;
2246 
2247 		switch (src_id) {
2248 		case 1: /* D1 vblank/vline */
2249 			switch (src_data) {
2250 			case 0: /* D1 vblank */
2251 				if (disp_int & LB_D1_VBLANK_INTERRUPT) {
2252 					drm_handle_vblank(rdev->ddev, 0);
2253 					disp_int &= ~LB_D1_VBLANK_INTERRUPT;
2254 					DRM_DEBUG("IH: D1 vblank\n");
2255 				}
2256 				break;
2257 			case 1: /* D1 vline */
2258 				if (disp_int & LB_D1_VLINE_INTERRUPT) {
2259 					disp_int &= ~LB_D1_VLINE_INTERRUPT;
2260 					DRM_DEBUG("IH: D1 vline\n");
2261 				}
2262 				break;
2263 			default:
2264 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
2265 				break;
2266 			}
2267 			break;
2268 		case 5: /* D2 vblank/vline */
2269 			switch (src_data) {
2270 			case 0: /* D2 vblank */
2271 				if (disp_int & LB_D2_VBLANK_INTERRUPT) {
2272 					drm_handle_vblank(rdev->ddev, 1);
2273 					disp_int &= ~LB_D2_VBLANK_INTERRUPT;
2274 					DRM_DEBUG("IH: D2 vblank\n");
2275 				}
2276 				break;
2277 			case 1: /* D1 vline */
2278 				if (disp_int & LB_D2_VLINE_INTERRUPT) {
2279 					disp_int &= ~LB_D2_VLINE_INTERRUPT;
2280 					DRM_DEBUG("IH: D2 vline\n");
2281 				}
2282 				break;
2283 			default:
2284 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
2285 				break;
2286 			}
2287 			break;
2288 		case 176: /* CP_INT in ring buffer */
2289 		case 177: /* CP_INT in IB1 */
2290 		case 178: /* CP_INT in IB2 */
2291 			DRM_DEBUG("IH: CP int: 0x%08x\n", src_data);
2292 			radeon_fence_process(rdev);
2293 			break;
2294 		case 181: /* CP EOP event */
2295 			DRM_DEBUG("IH: CP EOP\n");
2296 			break;
2297 		default:
2298 			DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
2299 			break;
2300 		}
2301 
2302 		/* wptr/rptr are in bytes! */
2303 		if (rptr == last_entry)
2304 			rptr = 0;
2305 		else
2306 			rptr += 16;
2307 	}
2308 	/* make sure wptr hasn't changed while processing */
2309 	wptr = r600_get_ih_wptr(rdev);
2310 	if (wptr != rdev->ih.wptr)
2311 		goto restart_ih;
2312 	rdev->ih.rptr = rptr;
2313 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
2314 	spin_unlock_irqrestore(&rdev->ih.lock, flags);
2315 	return IRQ_HANDLED;
2316 }
2317 
2318 /*
2319  * Debugfs info
2320  */
2321 #if defined(CONFIG_DEBUG_FS)
2322 
2323 static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
2324 {
2325 	struct drm_info_node *node = (struct drm_info_node *) m->private;
2326 	struct drm_device *dev = node->minor->dev;
2327 	struct radeon_device *rdev = dev->dev_private;
2328 	uint32_t rdp, wdp;
2329 	unsigned count, i, j;
2330 
2331 	radeon_ring_free_size(rdev);
2332 	rdp = RREG32(CP_RB_RPTR);
2333 	wdp = RREG32(CP_RB_WPTR);
2334 	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
2335 	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
2336 	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2337 	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2338 	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
2339 	seq_printf(m, "%u dwords in ring\n", count);
2340 	for (j = 0; j <= count; j++) {
2341 		i = (rdp + j) & rdev->cp.ptr_mask;
2342 		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
2343 	}
2344 	return 0;
2345 }
2346 
2347 static int r600_debugfs_mc_info(struct seq_file *m, void *data)
2348 {
2349 	struct drm_info_node *node = (struct drm_info_node *) m->private;
2350 	struct drm_device *dev = node->minor->dev;
2351 	struct radeon_device *rdev = dev->dev_private;
2352 
2353 	DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS);
2354 	DREG32_SYS(m, rdev, VM_L2_STATUS);
2355 	return 0;
2356 }
2357 
2358 static struct drm_info_list r600_mc_info_list[] = {
2359 	{"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
2360 	{"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
2361 };
2362 #endif
2363 
2364 int r600_debugfs_mc_info_init(struct radeon_device *rdev)
2365 {
2366 #if defined(CONFIG_DEBUG_FS)
2367 	return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list));
2368 #else
2369 	return 0;
2370 #endif
2371 }
2372