xref: /openbmc/linux/drivers/gpu/drm/radeon/r300.c (revision fd589a8f)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34 #include "radeon_share.h"
35 
36 /* r300,r350,rv350,rv370,rv380 depends on : */
37 void r100_hdp_reset(struct radeon_device *rdev);
38 int r100_cp_reset(struct radeon_device *rdev);
39 int r100_rb2d_reset(struct radeon_device *rdev);
40 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
41 int r100_pci_gart_enable(struct radeon_device *rdev);
42 void r100_pci_gart_disable(struct radeon_device *rdev);
43 void r100_mc_setup(struct radeon_device *rdev);
44 void r100_mc_disable_clients(struct radeon_device *rdev);
45 int r100_gui_wait_for_idle(struct radeon_device *rdev);
46 int r100_cs_packet_parse(struct radeon_cs_parser *p,
47 			 struct radeon_cs_packet *pkt,
48 			 unsigned idx);
49 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
50 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
51 			      struct radeon_cs_reloc **cs_reloc);
52 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
53 			  struct radeon_cs_packet *pkt,
54 			  const unsigned *auth, unsigned n,
55 			  radeon_packet0_check_t check);
56 void r100_cs_dump_packet(struct radeon_cs_parser *p,
57 			 struct radeon_cs_packet *pkt);
58 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
59 					 struct radeon_cs_packet *pkt,
60 					 struct radeon_object *robj);
61 
62 /* This files gather functions specifics to:
63  * r300,r350,rv350,rv370,rv380
64  *
65  * Some of these functions might be used by newer ASICs.
66  */
67 void r300_gpu_init(struct radeon_device *rdev);
68 int r300_mc_wait_for_idle(struct radeon_device *rdev);
69 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
70 
71 
72 /*
73  * rv370,rv380 PCIE GART
74  */
75 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
76 {
77 	uint32_t tmp;
78 	int i;
79 
80 	/* Workaround HW bug do flush 2 times */
81 	for (i = 0; i < 2; i++) {
82 		tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
84 		(void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
85 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
86 	}
87 	mb();
88 }
89 
90 int rv370_pcie_gart_enable(struct radeon_device *rdev)
91 {
92 	uint32_t table_addr;
93 	uint32_t tmp;
94 	int r;
95 
96 	/* Initialize common gart structure */
97 	r = radeon_gart_init(rdev);
98 	if (r) {
99 		return r;
100 	}
101 	r = rv370_debugfs_pcie_gart_info_init(rdev);
102 	if (r) {
103 		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
104 	}
105 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
106 	r = radeon_gart_table_vram_alloc(rdev);
107 	if (r) {
108 		return r;
109 	}
110 	/* discard memory request outside of configured range */
111 	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
112 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
113 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
114 	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
115 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
116 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
117 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
118 	table_addr = rdev->gart.table_addr;
119 	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
120 	/* FIXME: setup default page */
121 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
122 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
123 	/* Clear error */
124 	WREG32_PCIE(0x18, 0);
125 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
126 	tmp |= RADEON_PCIE_TX_GART_EN;
127 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
128 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
129 	rv370_pcie_gart_tlb_flush(rdev);
130 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
131 		 rdev->mc.gtt_size >> 20, table_addr);
132 	rdev->gart.ready = true;
133 	return 0;
134 }
135 
136 void rv370_pcie_gart_disable(struct radeon_device *rdev)
137 {
138 	uint32_t tmp;
139 
140 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
141 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
142 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
143 	if (rdev->gart.table.vram.robj) {
144 		radeon_object_kunmap(rdev->gart.table.vram.robj);
145 		radeon_object_unpin(rdev->gart.table.vram.robj);
146 	}
147 }
148 
149 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
150 {
151 	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
152 
153 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
154 		return -EINVAL;
155 	}
156 	addr = (lower_32_bits(addr) >> 8) |
157 	       ((upper_32_bits(addr) & 0xff) << 24) |
158 	       0xc;
159 	/* on x86 we want this to be CPU endian, on powerpc
160 	 * on powerpc without HW swappers, it'll get swapped on way
161 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
162 	writel(addr, ((void __iomem *)ptr) + (i * 4));
163 	return 0;
164 }
165 
166 int r300_gart_enable(struct radeon_device *rdev)
167 {
168 #if __OS_HAS_AGP
169 	if (rdev->flags & RADEON_IS_AGP) {
170 		if (rdev->family > CHIP_RV350) {
171 			rv370_pcie_gart_disable(rdev);
172 		} else {
173 			r100_pci_gart_disable(rdev);
174 		}
175 		return 0;
176 	}
177 #endif
178 	if (rdev->flags & RADEON_IS_PCIE) {
179 		rdev->asic->gart_disable = &rv370_pcie_gart_disable;
180 		rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
181 		rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
182 		return rv370_pcie_gart_enable(rdev);
183 	}
184 	return r100_pci_gart_enable(rdev);
185 }
186 
187 
188 /*
189  * MC
190  */
191 int r300_mc_init(struct radeon_device *rdev)
192 {
193 	int r;
194 
195 	if (r100_debugfs_rbbm_init(rdev)) {
196 		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
197 	}
198 
199 	r300_gpu_init(rdev);
200 	r100_pci_gart_disable(rdev);
201 	if (rdev->flags & RADEON_IS_PCIE) {
202 		rv370_pcie_gart_disable(rdev);
203 	}
204 
205 	/* Setup GPU memory space */
206 	rdev->mc.vram_location = 0xFFFFFFFFUL;
207 	rdev->mc.gtt_location = 0xFFFFFFFFUL;
208 	if (rdev->flags & RADEON_IS_AGP) {
209 		r = radeon_agp_init(rdev);
210 		if (r) {
211 			printk(KERN_WARNING "[drm] Disabling AGP\n");
212 			rdev->flags &= ~RADEON_IS_AGP;
213 			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
214 		} else {
215 			rdev->mc.gtt_location = rdev->mc.agp_base;
216 		}
217 	}
218 	r = radeon_mc_setup(rdev);
219 	if (r) {
220 		return r;
221 	}
222 
223 	/* Program GPU memory space */
224 	r100_mc_disable_clients(rdev);
225 	if (r300_mc_wait_for_idle(rdev)) {
226 		printk(KERN_WARNING "Failed to wait MC idle while "
227 		       "programming pipes. Bad things might happen.\n");
228 	}
229 	r100_mc_setup(rdev);
230 	return 0;
231 }
232 
233 void r300_mc_fini(struct radeon_device *rdev)
234 {
235 	if (rdev->flags & RADEON_IS_PCIE) {
236 		rv370_pcie_gart_disable(rdev);
237 		radeon_gart_table_vram_free(rdev);
238 	} else {
239 		r100_pci_gart_disable(rdev);
240 		radeon_gart_table_ram_free(rdev);
241 	}
242 	radeon_gart_fini(rdev);
243 }
244 
245 
246 /*
247  * Fence emission
248  */
249 void r300_fence_ring_emit(struct radeon_device *rdev,
250 			  struct radeon_fence *fence)
251 {
252 	/* Who ever call radeon_fence_emit should call ring_lock and ask
253 	 * for enough space (today caller are ib schedule and buffer move) */
254 	/* Write SC register so SC & US assert idle */
255 	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
256 	radeon_ring_write(rdev, 0);
257 	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
258 	radeon_ring_write(rdev, 0);
259 	/* Flush 3D cache */
260 	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
261 	radeon_ring_write(rdev, (2 << 0));
262 	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
263 	radeon_ring_write(rdev, (1 << 0));
264 	/* Wait until IDLE & CLEAN */
265 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
266 	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
267 	/* Emit fence sequence & fire IRQ */
268 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
269 	radeon_ring_write(rdev, fence->seq);
270 	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
271 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
272 }
273 
274 
275 /*
276  * Global GPU functions
277  */
278 int r300_copy_dma(struct radeon_device *rdev,
279 		  uint64_t src_offset,
280 		  uint64_t dst_offset,
281 		  unsigned num_pages,
282 		  struct radeon_fence *fence)
283 {
284 	uint32_t size;
285 	uint32_t cur_size;
286 	int i, num_loops;
287 	int r = 0;
288 
289 	/* radeon pitch is /64 */
290 	size = num_pages << PAGE_SHIFT;
291 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
292 	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
293 	if (r) {
294 		DRM_ERROR("radeon: moving bo (%d).\n", r);
295 		return r;
296 	}
297 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
298 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
299 	radeon_ring_write(rdev, (1 << 16));
300 	for (i = 0; i < num_loops; i++) {
301 		cur_size = size;
302 		if (cur_size > 0x1FFFFF) {
303 			cur_size = 0x1FFFFF;
304 		}
305 		size -= cur_size;
306 		radeon_ring_write(rdev, PACKET0(0x720, 2));
307 		radeon_ring_write(rdev, src_offset);
308 		radeon_ring_write(rdev, dst_offset);
309 		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
310 		src_offset += cur_size;
311 		dst_offset += cur_size;
312 	}
313 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
314 	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
315 	if (fence) {
316 		r = radeon_fence_emit(rdev, fence);
317 	}
318 	radeon_ring_unlock_commit(rdev);
319 	return r;
320 }
321 
322 void r300_ring_start(struct radeon_device *rdev)
323 {
324 	unsigned gb_tile_config;
325 	int r;
326 
327 	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
328 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
329 	switch(rdev->num_gb_pipes) {
330 	case 2:
331 		gb_tile_config |= R300_PIPE_COUNT_R300;
332 		break;
333 	case 3:
334 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
335 		break;
336 	case 4:
337 		gb_tile_config |= R300_PIPE_COUNT_R420;
338 		break;
339 	case 1:
340 	default:
341 		gb_tile_config |= R300_PIPE_COUNT_RV350;
342 		break;
343 	}
344 
345 	r = radeon_ring_lock(rdev, 64);
346 	if (r) {
347 		return;
348 	}
349 	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
350 	radeon_ring_write(rdev,
351 			  RADEON_ISYNC_ANY2D_IDLE3D |
352 			  RADEON_ISYNC_ANY3D_IDLE2D |
353 			  RADEON_ISYNC_WAIT_IDLEGUI |
354 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
355 	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
356 	radeon_ring_write(rdev, gb_tile_config);
357 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
358 	radeon_ring_write(rdev,
359 			  RADEON_WAIT_2D_IDLECLEAN |
360 			  RADEON_WAIT_3D_IDLECLEAN);
361 	radeon_ring_write(rdev, PACKET0(0x170C, 0));
362 	radeon_ring_write(rdev, 1 << 31);
363 	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
364 	radeon_ring_write(rdev, 0);
365 	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
366 	radeon_ring_write(rdev, 0);
367 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
368 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
369 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
370 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
371 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
372 	radeon_ring_write(rdev,
373 			  RADEON_WAIT_2D_IDLECLEAN |
374 			  RADEON_WAIT_3D_IDLECLEAN);
375 	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
376 	radeon_ring_write(rdev, 0);
377 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
378 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
379 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
380 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
381 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
382 	radeon_ring_write(rdev,
383 			  ((6 << R300_MS_X0_SHIFT) |
384 			   (6 << R300_MS_Y0_SHIFT) |
385 			   (6 << R300_MS_X1_SHIFT) |
386 			   (6 << R300_MS_Y1_SHIFT) |
387 			   (6 << R300_MS_X2_SHIFT) |
388 			   (6 << R300_MS_Y2_SHIFT) |
389 			   (6 << R300_MSBD0_Y_SHIFT) |
390 			   (6 << R300_MSBD0_X_SHIFT)));
391 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
392 	radeon_ring_write(rdev,
393 			  ((6 << R300_MS_X3_SHIFT) |
394 			   (6 << R300_MS_Y3_SHIFT) |
395 			   (6 << R300_MS_X4_SHIFT) |
396 			   (6 << R300_MS_Y4_SHIFT) |
397 			   (6 << R300_MS_X5_SHIFT) |
398 			   (6 << R300_MS_Y5_SHIFT) |
399 			   (6 << R300_MSBD1_SHIFT)));
400 	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
401 	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
402 	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
403 	radeon_ring_write(rdev,
404 			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
405 	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
406 	radeon_ring_write(rdev,
407 			  R300_GEOMETRY_ROUND_NEAREST |
408 			  R300_COLOR_ROUND_NEAREST);
409 	radeon_ring_unlock_commit(rdev);
410 }
411 
412 void r300_errata(struct radeon_device *rdev)
413 {
414 	rdev->pll_errata = 0;
415 
416 	if (rdev->family == CHIP_R300 &&
417 	    (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
418 		rdev->pll_errata |= CHIP_ERRATA_R300_CG;
419 	}
420 }
421 
422 int r300_mc_wait_for_idle(struct radeon_device *rdev)
423 {
424 	unsigned i;
425 	uint32_t tmp;
426 
427 	for (i = 0; i < rdev->usec_timeout; i++) {
428 		/* read MC_STATUS */
429 		tmp = RREG32(0x0150);
430 		if (tmp & (1 << 4)) {
431 			return 0;
432 		}
433 		DRM_UDELAY(1);
434 	}
435 	return -1;
436 }
437 
438 void r300_gpu_init(struct radeon_device *rdev)
439 {
440 	uint32_t gb_tile_config, tmp;
441 
442 	r100_hdp_reset(rdev);
443 	/* FIXME: rv380 one pipes ? */
444 	if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
445 		/* r300,r350 */
446 		rdev->num_gb_pipes = 2;
447 	} else {
448 		/* rv350,rv370,rv380 */
449 		rdev->num_gb_pipes = 1;
450 	}
451 	rdev->num_z_pipes = 1;
452 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
453 	switch (rdev->num_gb_pipes) {
454 	case 2:
455 		gb_tile_config |= R300_PIPE_COUNT_R300;
456 		break;
457 	case 3:
458 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
459 		break;
460 	case 4:
461 		gb_tile_config |= R300_PIPE_COUNT_R420;
462 		break;
463 	default:
464 	case 1:
465 		gb_tile_config |= R300_PIPE_COUNT_RV350;
466 		break;
467 	}
468 	WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
469 
470 	if (r100_gui_wait_for_idle(rdev)) {
471 		printk(KERN_WARNING "Failed to wait GUI idle while "
472 		       "programming pipes. Bad things might happen.\n");
473 	}
474 
475 	tmp = RREG32(0x170C);
476 	WREG32(0x170C, tmp | (1 << 31));
477 
478 	WREG32(R300_RB2D_DSTCACHE_MODE,
479 	       R300_DC_AUTOFLUSH_ENABLE |
480 	       R300_DC_DC_DISABLE_IGNORE_PE);
481 
482 	if (r100_gui_wait_for_idle(rdev)) {
483 		printk(KERN_WARNING "Failed to wait GUI idle while "
484 		       "programming pipes. Bad things might happen.\n");
485 	}
486 	if (r300_mc_wait_for_idle(rdev)) {
487 		printk(KERN_WARNING "Failed to wait MC idle while "
488 		       "programming pipes. Bad things might happen.\n");
489 	}
490 	DRM_INFO("radeon: %d quad pipes, %d Z pipes initialized.\n",
491 		 rdev->num_gb_pipes, rdev->num_z_pipes);
492 }
493 
494 int r300_ga_reset(struct radeon_device *rdev)
495 {
496 	uint32_t tmp;
497 	bool reinit_cp;
498 	int i;
499 
500 	reinit_cp = rdev->cp.ready;
501 	rdev->cp.ready = false;
502 	for (i = 0; i < rdev->usec_timeout; i++) {
503 		WREG32(RADEON_CP_CSQ_MODE, 0);
504 		WREG32(RADEON_CP_CSQ_CNTL, 0);
505 		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
506 		(void)RREG32(RADEON_RBBM_SOFT_RESET);
507 		udelay(200);
508 		WREG32(RADEON_RBBM_SOFT_RESET, 0);
509 		/* Wait to prevent race in RBBM_STATUS */
510 		mdelay(1);
511 		tmp = RREG32(RADEON_RBBM_STATUS);
512 		if (tmp & ((1 << 20) | (1 << 26))) {
513 			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
514 			/* GA still busy soft reset it */
515 			WREG32(0x429C, 0x200);
516 			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
517 			WREG32(0x43E0, 0);
518 			WREG32(0x43E4, 0);
519 			WREG32(0x24AC, 0);
520 		}
521 		/* Wait to prevent race in RBBM_STATUS */
522 		mdelay(1);
523 		tmp = RREG32(RADEON_RBBM_STATUS);
524 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
525 			break;
526 		}
527 	}
528 	for (i = 0; i < rdev->usec_timeout; i++) {
529 		tmp = RREG32(RADEON_RBBM_STATUS);
530 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
531 			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
532 				 tmp);
533 			if (reinit_cp) {
534 				return r100_cp_init(rdev, rdev->cp.ring_size);
535 			}
536 			return 0;
537 		}
538 		DRM_UDELAY(1);
539 	}
540 	tmp = RREG32(RADEON_RBBM_STATUS);
541 	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
542 	return -1;
543 }
544 
545 int r300_gpu_reset(struct radeon_device *rdev)
546 {
547 	uint32_t status;
548 
549 	/* reset order likely matter */
550 	status = RREG32(RADEON_RBBM_STATUS);
551 	/* reset HDP */
552 	r100_hdp_reset(rdev);
553 	/* reset rb2d */
554 	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
555 		r100_rb2d_reset(rdev);
556 	}
557 	/* reset GA */
558 	if (status & ((1 << 20) | (1 << 26))) {
559 		r300_ga_reset(rdev);
560 	}
561 	/* reset CP */
562 	status = RREG32(RADEON_RBBM_STATUS);
563 	if (status & (1 << 16)) {
564 		r100_cp_reset(rdev);
565 	}
566 	/* Check if GPU is idle */
567 	status = RREG32(RADEON_RBBM_STATUS);
568 	if (status & (1 << 31)) {
569 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
570 		return -1;
571 	}
572 	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
573 	return 0;
574 }
575 
576 
577 /*
578  * r300,r350,rv350,rv380 VRAM info
579  */
580 void r300_vram_info(struct radeon_device *rdev)
581 {
582 	uint32_t tmp;
583 
584 	/* DDR for all card after R300 & IGP */
585 	rdev->mc.vram_is_ddr = true;
586 	tmp = RREG32(RADEON_MEM_CNTL);
587 	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
588 		rdev->mc.vram_width = 128;
589 	} else {
590 		rdev->mc.vram_width = 64;
591 	}
592 
593 	r100_vram_init_sizes(rdev);
594 }
595 
596 
597 /*
598  * PCIE Lanes
599  */
600 
601 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
602 {
603 	uint32_t link_width_cntl, mask;
604 
605 	if (rdev->flags & RADEON_IS_IGP)
606 		return;
607 
608 	if (!(rdev->flags & RADEON_IS_PCIE))
609 		return;
610 
611 	/* FIXME wait for idle */
612 
613 	switch (lanes) {
614 	case 0:
615 		mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
616 		break;
617 	case 1:
618 		mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
619 		break;
620 	case 2:
621 		mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
622 		break;
623 	case 4:
624 		mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
625 		break;
626 	case 8:
627 		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
628 		break;
629 	case 12:
630 		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
631 		break;
632 	case 16:
633 	default:
634 		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
635 		break;
636 	}
637 
638 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
639 
640 	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
641 	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
642 		return;
643 
644 	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
645 			     RADEON_PCIE_LC_RECONFIG_NOW |
646 			     RADEON_PCIE_LC_RECONFIG_LATER |
647 			     RADEON_PCIE_LC_SHORT_RECONFIG_EN);
648 	link_width_cntl |= mask;
649 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
650 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
651 						     RADEON_PCIE_LC_RECONFIG_NOW));
652 
653 	/* wait for lane set to complete */
654 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
655 	while (link_width_cntl == 0xffffffff)
656 		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
657 
658 }
659 
660 
661 /*
662  * Debugfs info
663  */
664 #if defined(CONFIG_DEBUG_FS)
665 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
666 {
667 	struct drm_info_node *node = (struct drm_info_node *) m->private;
668 	struct drm_device *dev = node->minor->dev;
669 	struct radeon_device *rdev = dev->dev_private;
670 	uint32_t tmp;
671 
672 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
673 	seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
674 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
675 	seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
676 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
677 	seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
678 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
679 	seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
680 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
681 	seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
682 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
683 	seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
684 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
685 	seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
686 	return 0;
687 }
688 
689 static struct drm_info_list rv370_pcie_gart_info_list[] = {
690 	{"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
691 };
692 #endif
693 
694 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
695 {
696 #if defined(CONFIG_DEBUG_FS)
697 	return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
698 #else
699 	return 0;
700 #endif
701 }
702 
703 
704 /*
705  * CS functions
706  */
707 struct r300_cs_track_cb {
708 	struct radeon_object	*robj;
709 	unsigned		pitch;
710 	unsigned		cpp;
711 	unsigned		offset;
712 };
713 
714 struct r300_cs_track_array {
715 	struct radeon_object	*robj;
716 	unsigned		esize;
717 };
718 
719 struct r300_cs_track_texture {
720 	struct radeon_object	*robj;
721 	unsigned		pitch;
722 	unsigned		width;
723 	unsigned		height;
724 	unsigned		num_levels;
725 	unsigned		cpp;
726 	unsigned		tex_coord_type;
727 	unsigned		txdepth;
728 	unsigned		width_11;
729 	unsigned		height_11;
730 	bool			use_pitch;
731 	bool			enabled;
732 	bool			roundup_w;
733 	bool			roundup_h;
734 };
735 
736 struct r300_cs_track {
737 	unsigned			num_cb;
738 	unsigned			maxy;
739 	unsigned			vtx_size;
740 	unsigned			vap_vf_cntl;
741 	unsigned			immd_dwords;
742 	unsigned			num_arrays;
743 	unsigned			max_indx;
744 	struct r300_cs_track_array	arrays[11];
745 	struct r300_cs_track_cb 	cb[4];
746 	struct r300_cs_track_cb 	zb;
747 	struct r300_cs_track_texture	textures[16];
748 	bool				z_enabled;
749 };
750 
751 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
752 {
753 	DRM_ERROR("pitch                      %d\n", t->pitch);
754 	DRM_ERROR("width                      %d\n", t->width);
755 	DRM_ERROR("height                     %d\n", t->height);
756 	DRM_ERROR("num levels                 %d\n", t->num_levels);
757 	DRM_ERROR("depth                      %d\n", t->txdepth);
758 	DRM_ERROR("bpp                        %d\n", t->cpp);
759 	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
760 	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
761 	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
762 }
763 
764 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
765 					      struct r300_cs_track *track)
766 {
767 	struct radeon_object *robj;
768 	unsigned long size;
769 	unsigned u, i, w, h;
770 
771 	for (u = 0; u < 16; u++) {
772 		if (!track->textures[u].enabled)
773 			continue;
774 		robj = track->textures[u].robj;
775 		if (robj == NULL) {
776 			DRM_ERROR("No texture bound to unit %u\n", u);
777 			return -EINVAL;
778 		}
779 		size = 0;
780 		for (i = 0; i <= track->textures[u].num_levels; i++) {
781 			if (track->textures[u].use_pitch) {
782 				w = track->textures[u].pitch / (1 << i);
783 			} else {
784 				w = track->textures[u].width / (1 << i);
785 				if (rdev->family >= CHIP_RV515)
786 					w |= track->textures[u].width_11;
787 				if (track->textures[u].roundup_w)
788 					w = roundup_pow_of_two(w);
789 			}
790 			h = track->textures[u].height / (1 << i);
791 			if (rdev->family >= CHIP_RV515)
792 				h |= track->textures[u].height_11;
793 			if (track->textures[u].roundup_h)
794 				h = roundup_pow_of_two(h);
795 			size += w * h;
796 		}
797 		size *= track->textures[u].cpp;
798 		switch (track->textures[u].tex_coord_type) {
799 		case 0:
800 			break;
801 		case 1:
802 			size *= (1 << track->textures[u].txdepth);
803 			break;
804 		case 2:
805 			size *= 6;
806 			break;
807 		default:
808 			DRM_ERROR("Invalid texture coordinate type %u for unit "
809 				  "%u\n", track->textures[u].tex_coord_type, u);
810 			return -EINVAL;
811 		}
812 		if (size > radeon_object_size(robj)) {
813 			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
814 				  "%lu\n", u, size, radeon_object_size(robj));
815 			r300_cs_track_texture_print(&track->textures[u]);
816 			return -EINVAL;
817 		}
818 	}
819 	return 0;
820 }
821 
822 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
823 {
824 	unsigned i;
825 	unsigned long size;
826 	unsigned prim_walk;
827 	unsigned nverts;
828 
829 	for (i = 0; i < track->num_cb; i++) {
830 		if (track->cb[i].robj == NULL) {
831 			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
832 			return -EINVAL;
833 		}
834 		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
835 		size += track->cb[i].offset;
836 		if (size > radeon_object_size(track->cb[i].robj)) {
837 			DRM_ERROR("[drm] Buffer too small for color buffer %d "
838 				  "(need %lu have %lu) !\n", i, size,
839 				  radeon_object_size(track->cb[i].robj));
840 			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
841 				  i, track->cb[i].pitch, track->cb[i].cpp,
842 				  track->cb[i].offset, track->maxy);
843 			return -EINVAL;
844 		}
845 	}
846 	if (track->z_enabled) {
847 		if (track->zb.robj == NULL) {
848 			DRM_ERROR("[drm] No buffer for z buffer !\n");
849 			return -EINVAL;
850 		}
851 		size = track->zb.pitch * track->zb.cpp * track->maxy;
852 		size += track->zb.offset;
853 		if (size > radeon_object_size(track->zb.robj)) {
854 			DRM_ERROR("[drm] Buffer too small for z buffer "
855 				  "(need %lu have %lu) !\n", size,
856 				  radeon_object_size(track->zb.robj));
857 			return -EINVAL;
858 		}
859 	}
860 	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
861 	nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
862 	switch (prim_walk) {
863 	case 1:
864 		for (i = 0; i < track->num_arrays; i++) {
865 			size = track->arrays[i].esize * track->max_indx * 4;
866 			if (track->arrays[i].robj == NULL) {
867 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
868 					  "bound\n", prim_walk, i);
869 				return -EINVAL;
870 			}
871 			if (size > radeon_object_size(track->arrays[i].robj)) {
872 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
873 					   "have %lu dwords\n", prim_walk, i,
874 					   size >> 2,
875 					   radeon_object_size(track->arrays[i].robj) >> 2);
876 				DRM_ERROR("Max indices %u\n", track->max_indx);
877 				return -EINVAL;
878 			}
879 		}
880 		break;
881 	case 2:
882 		for (i = 0; i < track->num_arrays; i++) {
883 			size = track->arrays[i].esize * (nverts - 1) * 4;
884 			if (track->arrays[i].robj == NULL) {
885 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
886 					  "bound\n", prim_walk, i);
887 				return -EINVAL;
888 			}
889 			if (size > radeon_object_size(track->arrays[i].robj)) {
890 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
891 					   "have %lu dwords\n", prim_walk, i, size >> 2,
892 					   radeon_object_size(track->arrays[i].robj) >> 2);
893 				return -EINVAL;
894 			}
895 		}
896 		break;
897 	case 3:
898 		size = track->vtx_size * nverts;
899 		if (size != track->immd_dwords) {
900 			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
901 				  track->immd_dwords, size);
902 			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
903 				  nverts, track->vtx_size);
904 			return -EINVAL;
905 		}
906 		break;
907 	default:
908 		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
909 			  prim_walk);
910 		return -EINVAL;
911 	}
912 	return r300_cs_track_texture_check(rdev, track);
913 }
914 
915 static inline void r300_cs_track_clear(struct r300_cs_track *track)
916 {
917 	unsigned i;
918 
919 	track->num_cb = 4;
920 	track->maxy = 4096;
921 	for (i = 0; i < track->num_cb; i++) {
922 		track->cb[i].robj = NULL;
923 		track->cb[i].pitch = 8192;
924 		track->cb[i].cpp = 16;
925 		track->cb[i].offset = 0;
926 	}
927 	track->z_enabled = true;
928 	track->zb.robj = NULL;
929 	track->zb.pitch = 8192;
930 	track->zb.cpp = 4;
931 	track->zb.offset = 0;
932 	track->vtx_size = 0x7F;
933 	track->immd_dwords = 0xFFFFFFFFUL;
934 	track->num_arrays = 11;
935 	track->max_indx = 0x00FFFFFFUL;
936 	for (i = 0; i < track->num_arrays; i++) {
937 		track->arrays[i].robj = NULL;
938 		track->arrays[i].esize = 0x7F;
939 	}
940 	for (i = 0; i < 16; i++) {
941 		track->textures[i].pitch = 16536;
942 		track->textures[i].width = 16536;
943 		track->textures[i].height = 16536;
944 		track->textures[i].width_11 = 1 << 11;
945 		track->textures[i].height_11 = 1 << 11;
946 		track->textures[i].num_levels = 12;
947 		track->textures[i].txdepth = 16;
948 		track->textures[i].cpp = 64;
949 		track->textures[i].tex_coord_type = 1;
950 		track->textures[i].robj = NULL;
951 		/* CS IB emission code makes sure texture unit are disabled */
952 		track->textures[i].enabled = false;
953 		track->textures[i].roundup_w = true;
954 		track->textures[i].roundup_h = true;
955 	}
956 }
957 
958 static const unsigned r300_reg_safe_bm[159] = {
959 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
960 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
961 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
962 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
963 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
964 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
965 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
966 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
967 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
968 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
969 	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
970 	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
971 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
972 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
974 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
975 	0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
976 	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
977 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985 	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
989 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
990 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991 	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
992 	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
993 	0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
994 	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
995 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
996 	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
997 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
998 	0x0003FC01, 0xFFFFFCF8, 0xFF800B19,
999 };
1000 
1001 static int r300_packet0_check(struct radeon_cs_parser *p,
1002 		struct radeon_cs_packet *pkt,
1003 		unsigned idx, unsigned reg)
1004 {
1005 	struct radeon_cs_chunk *ib_chunk;
1006 	struct radeon_cs_reloc *reloc;
1007 	struct r300_cs_track *track;
1008 	volatile uint32_t *ib;
1009 	uint32_t tmp, tile_flags = 0;
1010 	unsigned i;
1011 	int r;
1012 
1013 	ib = p->ib->ptr;
1014 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1015 	track = (struct r300_cs_track*)p->track;
1016 	switch(reg) {
1017 	case AVIVO_D1MODE_VLINE_START_END:
1018 	case RADEON_CRTC_GUI_TRIG_VLINE:
1019 		r = r100_cs_packet_parse_vline(p);
1020 		if (r) {
1021 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1022 					idx, reg);
1023 			r100_cs_dump_packet(p, pkt);
1024 			return r;
1025 		}
1026 		break;
1027 	case RADEON_DST_PITCH_OFFSET:
1028 	case RADEON_SRC_PITCH_OFFSET:
1029 		r = r100_cs_packet_next_reloc(p, &reloc);
1030 		if (r) {
1031 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1032 					idx, reg);
1033 			r100_cs_dump_packet(p, pkt);
1034 			return r;
1035 		}
1036 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
1037 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1038 
1039 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1040 			tile_flags |= RADEON_DST_TILE_MACRO;
1041 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1042 			if (reg == RADEON_SRC_PITCH_OFFSET) {
1043 				DRM_ERROR("Cannot src blit from microtiled surface\n");
1044 				r100_cs_dump_packet(p, pkt);
1045 				return -EINVAL;
1046 			}
1047 			tile_flags |= RADEON_DST_TILE_MICRO;
1048 		}
1049 		tmp |= tile_flags;
1050 		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1051 		break;
1052 	case R300_RB3D_COLOROFFSET0:
1053 	case R300_RB3D_COLOROFFSET1:
1054 	case R300_RB3D_COLOROFFSET2:
1055 	case R300_RB3D_COLOROFFSET3:
1056 		i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1057 		r = r100_cs_packet_next_reloc(p, &reloc);
1058 		if (r) {
1059 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1060 					idx, reg);
1061 			r100_cs_dump_packet(p, pkt);
1062 			return r;
1063 		}
1064 		track->cb[i].robj = reloc->robj;
1065 		track->cb[i].offset = ib_chunk->kdata[idx];
1066 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1067 		break;
1068 	case R300_ZB_DEPTHOFFSET:
1069 		r = r100_cs_packet_next_reloc(p, &reloc);
1070 		if (r) {
1071 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1072 					idx, reg);
1073 			r100_cs_dump_packet(p, pkt);
1074 			return r;
1075 		}
1076 		track->zb.robj = reloc->robj;
1077 		track->zb.offset = ib_chunk->kdata[idx];
1078 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1079 		break;
1080 	case R300_TX_OFFSET_0:
1081 	case R300_TX_OFFSET_0+4:
1082 	case R300_TX_OFFSET_0+8:
1083 	case R300_TX_OFFSET_0+12:
1084 	case R300_TX_OFFSET_0+16:
1085 	case R300_TX_OFFSET_0+20:
1086 	case R300_TX_OFFSET_0+24:
1087 	case R300_TX_OFFSET_0+28:
1088 	case R300_TX_OFFSET_0+32:
1089 	case R300_TX_OFFSET_0+36:
1090 	case R300_TX_OFFSET_0+40:
1091 	case R300_TX_OFFSET_0+44:
1092 	case R300_TX_OFFSET_0+48:
1093 	case R300_TX_OFFSET_0+52:
1094 	case R300_TX_OFFSET_0+56:
1095 	case R300_TX_OFFSET_0+60:
1096 		i = (reg - R300_TX_OFFSET_0) >> 2;
1097 		r = r100_cs_packet_next_reloc(p, &reloc);
1098 		if (r) {
1099 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1100 					idx, reg);
1101 			r100_cs_dump_packet(p, pkt);
1102 			return r;
1103 		}
1104 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1105 		track->textures[i].robj = reloc->robj;
1106 		break;
1107 	/* Tracked registers */
1108 	case 0x2084:
1109 		/* VAP_VF_CNTL */
1110 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1111 		break;
1112 	case 0x20B4:
1113 		/* VAP_VTX_SIZE */
1114 		track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1115 		break;
1116 	case 0x2134:
1117 		/* VAP_VF_MAX_VTX_INDX */
1118 		track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1119 		break;
1120 	case 0x43E4:
1121 		/* SC_SCISSOR1 */
1122 		track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1123 		if (p->rdev->family < CHIP_RV515) {
1124 			track->maxy -= 1440;
1125 		}
1126 		break;
1127 	case 0x4E00:
1128 		/* RB3D_CCTL */
1129 		track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1130 		break;
1131 	case 0x4E38:
1132 	case 0x4E3C:
1133 	case 0x4E40:
1134 	case 0x4E44:
1135 		/* RB3D_COLORPITCH0 */
1136 		/* RB3D_COLORPITCH1 */
1137 		/* RB3D_COLORPITCH2 */
1138 		/* RB3D_COLORPITCH3 */
1139 		r = r100_cs_packet_next_reloc(p, &reloc);
1140 		if (r) {
1141 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1142 				  idx, reg);
1143 			r100_cs_dump_packet(p, pkt);
1144 			return r;
1145 		}
1146 
1147 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1148 			tile_flags |= R300_COLOR_TILE_ENABLE;
1149 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1150 			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1151 
1152 		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1153 		tmp |= tile_flags;
1154 		ib[idx] = tmp;
1155 
1156 		i = (reg - 0x4E38) >> 2;
1157 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1158 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1159 		case 9:
1160 		case 11:
1161 		case 12:
1162 			track->cb[i].cpp = 1;
1163 			break;
1164 		case 3:
1165 		case 4:
1166 		case 13:
1167 		case 15:
1168 			track->cb[i].cpp = 2;
1169 			break;
1170 		case 6:
1171 			track->cb[i].cpp = 4;
1172 			break;
1173 		case 10:
1174 			track->cb[i].cpp = 8;
1175 			break;
1176 		case 7:
1177 			track->cb[i].cpp = 16;
1178 			break;
1179 		default:
1180 			DRM_ERROR("Invalid color buffer format (%d) !\n",
1181 				  ((ib_chunk->kdata[idx] >> 21) & 0xF));
1182 			return -EINVAL;
1183 		}
1184 		break;
1185 	case 0x4F00:
1186 		/* ZB_CNTL */
1187 		if (ib_chunk->kdata[idx] & 2) {
1188 			track->z_enabled = true;
1189 		} else {
1190 			track->z_enabled = false;
1191 		}
1192 		break;
1193 	case 0x4F10:
1194 		/* ZB_FORMAT */
1195 		switch ((ib_chunk->kdata[idx] & 0xF)) {
1196 		case 0:
1197 		case 1:
1198 			track->zb.cpp = 2;
1199 			break;
1200 		case 2:
1201 			track->zb.cpp = 4;
1202 			break;
1203 		default:
1204 			DRM_ERROR("Invalid z buffer format (%d) !\n",
1205 				  (ib_chunk->kdata[idx] & 0xF));
1206 			return -EINVAL;
1207 		}
1208 		break;
1209 	case 0x4F24:
1210 		/* ZB_DEPTHPITCH */
1211 		r = r100_cs_packet_next_reloc(p, &reloc);
1212 		if (r) {
1213 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1214 				  idx, reg);
1215 			r100_cs_dump_packet(p, pkt);
1216 			return r;
1217 		}
1218 
1219 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1220 			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1221 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1222 			tile_flags |= R300_DEPTHMICROTILE_TILED;;
1223 
1224 		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1225 		tmp |= tile_flags;
1226 		ib[idx] = tmp;
1227 
1228 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1229 		break;
1230 	case 0x4104:
1231 		for (i = 0; i < 16; i++) {
1232 			bool enabled;
1233 
1234 			enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1235 			track->textures[i].enabled = enabled;
1236 		}
1237 		break;
1238 	case 0x44C0:
1239 	case 0x44C4:
1240 	case 0x44C8:
1241 	case 0x44CC:
1242 	case 0x44D0:
1243 	case 0x44D4:
1244 	case 0x44D8:
1245 	case 0x44DC:
1246 	case 0x44E0:
1247 	case 0x44E4:
1248 	case 0x44E8:
1249 	case 0x44EC:
1250 	case 0x44F0:
1251 	case 0x44F4:
1252 	case 0x44F8:
1253 	case 0x44FC:
1254 		/* TX_FORMAT1_[0-15] */
1255 		i = (reg - 0x44C0) >> 2;
1256 		tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1257 		track->textures[i].tex_coord_type = tmp;
1258 		switch ((ib_chunk->kdata[idx] & 0x1F)) {
1259 		case 0:
1260 		case 2:
1261 		case 5:
1262 		case 18:
1263 		case 20:
1264 		case 21:
1265 			track->textures[i].cpp = 1;
1266 			break;
1267 		case 1:
1268 		case 3:
1269 		case 6:
1270 		case 7:
1271 		case 10:
1272 		case 11:
1273 		case 19:
1274 		case 22:
1275 		case 24:
1276 			track->textures[i].cpp = 2;
1277 			break;
1278 		case 4:
1279 		case 8:
1280 		case 9:
1281 		case 12:
1282 		case 13:
1283 		case 23:
1284 		case 25:
1285 		case 27:
1286 		case 30:
1287 			track->textures[i].cpp = 4;
1288 			break;
1289 		case 14:
1290 		case 26:
1291 		case 28:
1292 			track->textures[i].cpp = 8;
1293 			break;
1294 		case 29:
1295 			track->textures[i].cpp = 16;
1296 			break;
1297 		default:
1298 			DRM_ERROR("Invalid texture format %u\n",
1299 				  (ib_chunk->kdata[idx] & 0x1F));
1300 			return -EINVAL;
1301 			break;
1302 		}
1303 		break;
1304 	case 0x4400:
1305 	case 0x4404:
1306 	case 0x4408:
1307 	case 0x440C:
1308 	case 0x4410:
1309 	case 0x4414:
1310 	case 0x4418:
1311 	case 0x441C:
1312 	case 0x4420:
1313 	case 0x4424:
1314 	case 0x4428:
1315 	case 0x442C:
1316 	case 0x4430:
1317 	case 0x4434:
1318 	case 0x4438:
1319 	case 0x443C:
1320 		/* TX_FILTER0_[0-15] */
1321 		i = (reg - 0x4400) >> 2;
1322 		tmp = ib_chunk->kdata[idx] & 0x7;;
1323 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1324 			track->textures[i].roundup_w = false;
1325 		}
1326 		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1327 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1328 			track->textures[i].roundup_h = false;
1329 		}
1330 		break;
1331 	case 0x4500:
1332 	case 0x4504:
1333 	case 0x4508:
1334 	case 0x450C:
1335 	case 0x4510:
1336 	case 0x4514:
1337 	case 0x4518:
1338 	case 0x451C:
1339 	case 0x4520:
1340 	case 0x4524:
1341 	case 0x4528:
1342 	case 0x452C:
1343 	case 0x4530:
1344 	case 0x4534:
1345 	case 0x4538:
1346 	case 0x453C:
1347 		/* TX_FORMAT2_[0-15] */
1348 		i = (reg - 0x4500) >> 2;
1349 		tmp = ib_chunk->kdata[idx] & 0x3FFF;
1350 		track->textures[i].pitch = tmp + 1;
1351 		if (p->rdev->family >= CHIP_RV515) {
1352 			tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1353 			track->textures[i].width_11 = tmp;
1354 			tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1355 			track->textures[i].height_11 = tmp;
1356 		}
1357 		break;
1358 	case 0x4480:
1359 	case 0x4484:
1360 	case 0x4488:
1361 	case 0x448C:
1362 	case 0x4490:
1363 	case 0x4494:
1364 	case 0x4498:
1365 	case 0x449C:
1366 	case 0x44A0:
1367 	case 0x44A4:
1368 	case 0x44A8:
1369 	case 0x44AC:
1370 	case 0x44B0:
1371 	case 0x44B4:
1372 	case 0x44B8:
1373 	case 0x44BC:
1374 		/* TX_FORMAT0_[0-15] */
1375 		i = (reg - 0x4480) >> 2;
1376 		tmp = ib_chunk->kdata[idx] & 0x7FF;
1377 		track->textures[i].width = tmp + 1;
1378 		tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1379 		track->textures[i].height = tmp + 1;
1380 		tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1381 		track->textures[i].num_levels = tmp;
1382 		tmp = ib_chunk->kdata[idx] & (1 << 31);
1383 		track->textures[i].use_pitch = !!tmp;
1384 		tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1385 		track->textures[i].txdepth = tmp;
1386 		break;
1387 	case R300_ZB_ZPASS_ADDR:
1388 		r = r100_cs_packet_next_reloc(p, &reloc);
1389 		if (r) {
1390 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1391 					idx, reg);
1392 			r100_cs_dump_packet(p, pkt);
1393 			return r;
1394 		}
1395 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1396 		break;
1397 	case 0x4be8:
1398 		/* valid register only on RV530 */
1399 		if (p->rdev->family == CHIP_RV530)
1400 			break;
1401 		/* fallthrough do not move */
1402 	default:
1403 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1404 		       reg, idx);
1405 		return -EINVAL;
1406 	}
1407 	return 0;
1408 }
1409 
1410 static int r300_packet3_check(struct radeon_cs_parser *p,
1411 			      struct radeon_cs_packet *pkt)
1412 {
1413 	struct radeon_cs_chunk *ib_chunk;
1414 	struct radeon_cs_reloc *reloc;
1415 	struct r300_cs_track *track;
1416 	volatile uint32_t *ib;
1417 	unsigned idx;
1418 	unsigned i, c;
1419 	int r;
1420 
1421 	ib = p->ib->ptr;
1422 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1423 	idx = pkt->idx + 1;
1424 	track = (struct r300_cs_track*)p->track;
1425 	switch(pkt->opcode) {
1426 	case PACKET3_3D_LOAD_VBPNTR:
1427 		c = ib_chunk->kdata[idx++] & 0x1F;
1428 		track->num_arrays = c;
1429 		for (i = 0; i < (c - 1); i+=2, idx+=3) {
1430 			r = r100_cs_packet_next_reloc(p, &reloc);
1431 			if (r) {
1432 				DRM_ERROR("No reloc for packet3 %d\n",
1433 					  pkt->opcode);
1434 				r100_cs_dump_packet(p, pkt);
1435 				return r;
1436 			}
1437 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1438 			track->arrays[i + 0].robj = reloc->robj;
1439 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1440 			track->arrays[i + 0].esize &= 0x7F;
1441 			r = r100_cs_packet_next_reloc(p, &reloc);
1442 			if (r) {
1443 				DRM_ERROR("No reloc for packet3 %d\n",
1444 					  pkt->opcode);
1445 				r100_cs_dump_packet(p, pkt);
1446 				return r;
1447 			}
1448 			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1449 			track->arrays[i + 1].robj = reloc->robj;
1450 			track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1451 			track->arrays[i + 1].esize &= 0x7F;
1452 		}
1453 		if (c & 1) {
1454 			r = r100_cs_packet_next_reloc(p, &reloc);
1455 			if (r) {
1456 				DRM_ERROR("No reloc for packet3 %d\n",
1457 					  pkt->opcode);
1458 				r100_cs_dump_packet(p, pkt);
1459 				return r;
1460 			}
1461 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1462 			track->arrays[i + 0].robj = reloc->robj;
1463 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1464 			track->arrays[i + 0].esize &= 0x7F;
1465 		}
1466 		break;
1467 	case PACKET3_INDX_BUFFER:
1468 		r = r100_cs_packet_next_reloc(p, &reloc);
1469 		if (r) {
1470 			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1471 			r100_cs_dump_packet(p, pkt);
1472 			return r;
1473 		}
1474 		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1475 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1476 		if (r) {
1477 			return r;
1478 		}
1479 		break;
1480 	/* Draw packet */
1481 	case PACKET3_3D_DRAW_IMMD:
1482 		/* Number of dwords is vtx_size * (num_vertices - 1)
1483 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1484 		 * in cmd stream */
1485 		if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1486 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1487 			return -EINVAL;
1488 		}
1489 		track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1490 		track->immd_dwords = pkt->count - 1;
1491 		r = r300_cs_track_check(p->rdev, track);
1492 		if (r) {
1493 			return r;
1494 		}
1495 		break;
1496 	case PACKET3_3D_DRAW_IMMD_2:
1497 		/* Number of dwords is vtx_size * (num_vertices - 1)
1498 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1499 		 * in cmd stream */
1500 		if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1501 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1502 			return -EINVAL;
1503 		}
1504 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1505 		track->immd_dwords = pkt->count;
1506 		r = r300_cs_track_check(p->rdev, track);
1507 		if (r) {
1508 			return r;
1509 		}
1510 		break;
1511 	case PACKET3_3D_DRAW_VBUF:
1512 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1513 		r = r300_cs_track_check(p->rdev, track);
1514 		if (r) {
1515 			return r;
1516 		}
1517 		break;
1518 	case PACKET3_3D_DRAW_VBUF_2:
1519 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1520 		r = r300_cs_track_check(p->rdev, track);
1521 		if (r) {
1522 			return r;
1523 		}
1524 		break;
1525 	case PACKET3_3D_DRAW_INDX:
1526 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1527 		r = r300_cs_track_check(p->rdev, track);
1528 		if (r) {
1529 			return r;
1530 		}
1531 		break;
1532 	case PACKET3_3D_DRAW_INDX_2:
1533 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1534 		r = r300_cs_track_check(p->rdev, track);
1535 		if (r) {
1536 			return r;
1537 		}
1538 		break;
1539 	case PACKET3_NOP:
1540 		break;
1541 	default:
1542 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1543 		return -EINVAL;
1544 	}
1545 	return 0;
1546 }
1547 
1548 int r300_cs_parse(struct radeon_cs_parser *p)
1549 {
1550 	struct radeon_cs_packet pkt;
1551 	struct r300_cs_track track;
1552 	int r;
1553 
1554 	r300_cs_track_clear(&track);
1555 	p->track = &track;
1556 	do {
1557 		r = r100_cs_packet_parse(p, &pkt, p->idx);
1558 		if (r) {
1559 			return r;
1560 		}
1561 		p->idx += pkt.count + 2;
1562 		switch (pkt.type) {
1563 		case PACKET_TYPE0:
1564 			r = r100_cs_parse_packet0(p, &pkt,
1565 						  p->rdev->config.r300.reg_safe_bm,
1566 						  p->rdev->config.r300.reg_safe_bm_size,
1567 						  &r300_packet0_check);
1568 			break;
1569 		case PACKET_TYPE2:
1570 			break;
1571 		case PACKET_TYPE3:
1572 			r = r300_packet3_check(p, &pkt);
1573 			break;
1574 		default:
1575 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1576 			return -EINVAL;
1577 		}
1578 		if (r) {
1579 			return r;
1580 		}
1581 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1582 	return 0;
1583 }
1584 
1585 int r300_init(struct radeon_device *rdev)
1586 {
1587 	rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1588 	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1589 	return 0;
1590 }
1591