xref: /openbmc/linux/drivers/gpu/drm/radeon/radeon_cs.c (revision a03a8dbe20eff6d57aae3147577bf84b52aba4e6)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #include "radeon_trace.h"
33 
34 #define RADEON_CS_MAX_PRIORITY		32u
35 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36 
37 /* This is based on the bucket sort with O(n) time complexity.
38  * An item with priority "i" is added to bucket[i]. The lists are then
39  * concatenated in descending order.
40  */
41 struct radeon_cs_buckets {
42 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43 };
44 
45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46 {
47 	unsigned i;
48 
49 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 		INIT_LIST_HEAD(&b->bucket[i]);
51 }
52 
53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 				  struct list_head *item, unsigned priority)
55 {
56 	/* Since buffers which appear sooner in the relocation list are
57 	 * likely to be used more often than buffers which appear later
58 	 * in the list, the sort mustn't change the ordering of buffers
59 	 * with the same priority, i.e. it must be stable.
60 	 */
61 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62 }
63 
64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 				       struct list_head *out_list)
66 {
67 	unsigned i;
68 
69 	/* Connect the sorted buckets in the output list. */
70 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 		list_splice(&b->bucket[i], out_list);
72 	}
73 }
74 
75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
76 {
77 	struct drm_device *ddev = p->rdev->ddev;
78 	struct radeon_cs_chunk *chunk;
79 	struct radeon_cs_buckets buckets;
80 	unsigned i;
81 	bool need_mmap_lock = false;
82 	int r;
83 
84 	if (p->chunk_relocs == NULL) {
85 		return 0;
86 	}
87 	chunk = p->chunk_relocs;
88 	p->dma_reloc_idx = 0;
89 	/* FIXME: we assume that each relocs use 4 dwords */
90 	p->nrelocs = chunk->length_dw / 4;
91 	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
92 	if (p->relocs == NULL) {
93 		return -ENOMEM;
94 	}
95 
96 	radeon_cs_buckets_init(&buckets);
97 
98 	for (i = 0; i < p->nrelocs; i++) {
99 		struct drm_radeon_cs_reloc *r;
100 		struct drm_gem_object *gobj;
101 		unsigned priority;
102 
103 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
104 		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
105 		if (gobj == NULL) {
106 			DRM_ERROR("gem object lookup failed 0x%x\n",
107 				  r->handle);
108 			return -ENOENT;
109 		}
110 		p->relocs[i].robj = gem_to_radeon_bo(gobj);
111 
112 		/* The userspace buffer priorities are from 0 to 15. A higher
113 		 * number means the buffer is more important.
114 		 * Also, the buffers used for write have a higher priority than
115 		 * the buffers used for read only, which doubles the range
116 		 * to 0 to 31. 32 is reserved for the kernel driver.
117 		 */
118 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
119 			   + !!r->write_domain;
120 
121 		/* the first reloc of an UVD job is the msg and that must be in
122 		   VRAM, also but everything into VRAM on AGP cards and older
123 		   IGP chips to avoid image corruptions */
124 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
125 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
126 		     p->rdev->family == CHIP_RS780 ||
127 		     p->rdev->family == CHIP_RS880)) {
128 
129 			/* TODO: is this still needed for NI+ ? */
130 			p->relocs[i].prefered_domains =
131 				RADEON_GEM_DOMAIN_VRAM;
132 
133 			p->relocs[i].allowed_domains =
134 				RADEON_GEM_DOMAIN_VRAM;
135 
136 			/* prioritize this over any other relocation */
137 			priority = RADEON_CS_MAX_PRIORITY;
138 		} else {
139 			uint32_t domain = r->write_domain ?
140 				r->write_domain : r->read_domains;
141 
142 			if (domain & RADEON_GEM_DOMAIN_CPU) {
143 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
144 					  "for command submission\n");
145 				return -EINVAL;
146 			}
147 
148 			p->relocs[i].prefered_domains = domain;
149 			if (domain == RADEON_GEM_DOMAIN_VRAM)
150 				domain |= RADEON_GEM_DOMAIN_GTT;
151 			p->relocs[i].allowed_domains = domain;
152 		}
153 
154 		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
155 			uint32_t domain = p->relocs[i].prefered_domains;
156 			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
157 				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
158 					  "allowed for userptr BOs\n");
159 				return -EINVAL;
160 			}
161 			need_mmap_lock = true;
162 			domain = RADEON_GEM_DOMAIN_GTT;
163 			p->relocs[i].prefered_domains = domain;
164 			p->relocs[i].allowed_domains = domain;
165 		}
166 
167 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
168 		p->relocs[i].tv.shared = !r->write_domain;
169 
170 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
171 				      priority);
172 	}
173 
174 	radeon_cs_buckets_get_list(&buckets, &p->validated);
175 
176 	if (p->cs_flags & RADEON_CS_USE_VM)
177 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
178 					      &p->validated);
179 	if (need_mmap_lock)
180 		down_read(&current->mm->mmap_sem);
181 
182 	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
183 
184 	if (need_mmap_lock)
185 		up_read(&current->mm->mmap_sem);
186 
187 	return r;
188 }
189 
190 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
191 {
192 	p->priority = priority;
193 
194 	switch (ring) {
195 	default:
196 		DRM_ERROR("unknown ring id: %d\n", ring);
197 		return -EINVAL;
198 	case RADEON_CS_RING_GFX:
199 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
200 		break;
201 	case RADEON_CS_RING_COMPUTE:
202 		if (p->rdev->family >= CHIP_TAHITI) {
203 			if (p->priority > 0)
204 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
205 			else
206 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
207 		} else
208 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
209 		break;
210 	case RADEON_CS_RING_DMA:
211 		if (p->rdev->family >= CHIP_CAYMAN) {
212 			if (p->priority > 0)
213 				p->ring = R600_RING_TYPE_DMA_INDEX;
214 			else
215 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
216 		} else if (p->rdev->family >= CHIP_RV770) {
217 			p->ring = R600_RING_TYPE_DMA_INDEX;
218 		} else {
219 			return -EINVAL;
220 		}
221 		break;
222 	case RADEON_CS_RING_UVD:
223 		p->ring = R600_RING_TYPE_UVD_INDEX;
224 		break;
225 	case RADEON_CS_RING_VCE:
226 		/* TODO: only use the low priority ring for now */
227 		p->ring = TN_RING_TYPE_VCE1_INDEX;
228 		break;
229 	}
230 	return 0;
231 }
232 
233 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
234 {
235 	struct radeon_bo_list *reloc;
236 	int r;
237 
238 	list_for_each_entry(reloc, &p->validated, tv.head) {
239 		struct reservation_object *resv;
240 
241 		resv = reloc->robj->tbo.resv;
242 		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
243 				     reloc->tv.shared);
244 		if (r)
245 			return r;
246 	}
247 	return 0;
248 }
249 
250 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
251 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
252 {
253 	struct drm_radeon_cs *cs = data;
254 	uint64_t *chunk_array_ptr;
255 	unsigned size, i;
256 	u32 ring = RADEON_CS_RING_GFX;
257 	s32 priority = 0;
258 
259 	if (!cs->num_chunks) {
260 		return 0;
261 	}
262 	/* get chunks */
263 	INIT_LIST_HEAD(&p->validated);
264 	p->idx = 0;
265 	p->ib.sa_bo = NULL;
266 	p->const_ib.sa_bo = NULL;
267 	p->chunk_ib = NULL;
268 	p->chunk_relocs = NULL;
269 	p->chunk_flags = NULL;
270 	p->chunk_const_ib = NULL;
271 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
272 	if (p->chunks_array == NULL) {
273 		return -ENOMEM;
274 	}
275 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
276 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
277 			       sizeof(uint64_t)*cs->num_chunks)) {
278 		return -EFAULT;
279 	}
280 	p->cs_flags = 0;
281 	p->nchunks = cs->num_chunks;
282 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
283 	if (p->chunks == NULL) {
284 		return -ENOMEM;
285 	}
286 	for (i = 0; i < p->nchunks; i++) {
287 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
288 		struct drm_radeon_cs_chunk user_chunk;
289 		uint32_t __user *cdata;
290 
291 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
292 		if (copy_from_user(&user_chunk, chunk_ptr,
293 				       sizeof(struct drm_radeon_cs_chunk))) {
294 			return -EFAULT;
295 		}
296 		p->chunks[i].length_dw = user_chunk.length_dw;
297 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
298 			p->chunk_relocs = &p->chunks[i];
299 		}
300 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
301 			p->chunk_ib = &p->chunks[i];
302 			/* zero length IB isn't useful */
303 			if (p->chunks[i].length_dw == 0)
304 				return -EINVAL;
305 		}
306 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
307 			p->chunk_const_ib = &p->chunks[i];
308 			/* zero length CONST IB isn't useful */
309 			if (p->chunks[i].length_dw == 0)
310 				return -EINVAL;
311 		}
312 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
313 			p->chunk_flags = &p->chunks[i];
314 			/* zero length flags aren't useful */
315 			if (p->chunks[i].length_dw == 0)
316 				return -EINVAL;
317 		}
318 
319 		size = p->chunks[i].length_dw;
320 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
321 		p->chunks[i].user_ptr = cdata;
322 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
323 			continue;
324 
325 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
326 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
327 				continue;
328 		}
329 
330 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
331 		size *= sizeof(uint32_t);
332 		if (p->chunks[i].kdata == NULL) {
333 			return -ENOMEM;
334 		}
335 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
336 			return -EFAULT;
337 		}
338 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
339 			p->cs_flags = p->chunks[i].kdata[0];
340 			if (p->chunks[i].length_dw > 1)
341 				ring = p->chunks[i].kdata[1];
342 			if (p->chunks[i].length_dw > 2)
343 				priority = (s32)p->chunks[i].kdata[2];
344 		}
345 	}
346 
347 	/* these are KMS only */
348 	if (p->rdev) {
349 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
350 		    !p->rdev->vm_manager.enabled) {
351 			DRM_ERROR("VM not active on asic!\n");
352 			return -EINVAL;
353 		}
354 
355 		if (radeon_cs_get_ring(p, ring, priority))
356 			return -EINVAL;
357 
358 		/* we only support VM on some SI+ rings */
359 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
360 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
361 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
362 				return -EINVAL;
363 			}
364 		} else {
365 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
366 				DRM_ERROR("VM not supported on ring %d!\n",
367 					  p->ring);
368 				return -EINVAL;
369 			}
370 		}
371 	}
372 
373 	return 0;
374 }
375 
376 static int cmp_size_smaller_first(void *priv, struct list_head *a,
377 				  struct list_head *b)
378 {
379 	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
380 	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
381 
382 	/* Sort A before B if A is smaller. */
383 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
384 }
385 
386 /**
387  * cs_parser_fini() - clean parser states
388  * @parser:	parser structure holding parsing context.
389  * @error:	error number
390  *
391  * If error is set than unvalidate buffer, otherwise just free memory
392  * used by parsing context.
393  **/
394 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
395 {
396 	unsigned i;
397 
398 	if (!error) {
399 		/* Sort the buffer list from the smallest to largest buffer,
400 		 * which affects the order of buffers in the LRU list.
401 		 * This assures that the smallest buffers are added first
402 		 * to the LRU list, so they are likely to be later evicted
403 		 * first, instead of large buffers whose eviction is more
404 		 * expensive.
405 		 *
406 		 * This slightly lowers the number of bytes moved by TTM
407 		 * per frame under memory pressure.
408 		 */
409 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
410 
411 		ttm_eu_fence_buffer_objects(&parser->ticket,
412 					    &parser->validated,
413 					    &parser->ib.fence->base);
414 	} else if (backoff) {
415 		ttm_eu_backoff_reservation(&parser->ticket,
416 					   &parser->validated);
417 	}
418 
419 	if (parser->relocs != NULL) {
420 		for (i = 0; i < parser->nrelocs; i++) {
421 			struct radeon_bo *bo = parser->relocs[i].robj;
422 			if (bo == NULL)
423 				continue;
424 
425 			drm_gem_object_unreference_unlocked(&bo->gem_base);
426 		}
427 	}
428 	kfree(parser->track);
429 	kfree(parser->relocs);
430 	drm_free_large(parser->vm_bos);
431 	for (i = 0; i < parser->nchunks; i++)
432 		drm_free_large(parser->chunks[i].kdata);
433 	kfree(parser->chunks);
434 	kfree(parser->chunks_array);
435 	radeon_ib_free(parser->rdev, &parser->ib);
436 	radeon_ib_free(parser->rdev, &parser->const_ib);
437 }
438 
439 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
440 			      struct radeon_cs_parser *parser)
441 {
442 	int r;
443 
444 	if (parser->chunk_ib == NULL)
445 		return 0;
446 
447 	if (parser->cs_flags & RADEON_CS_USE_VM)
448 		return 0;
449 
450 	r = radeon_cs_parse(rdev, parser->ring, parser);
451 	if (r || parser->parser_error) {
452 		DRM_ERROR("Invalid command stream !\n");
453 		return r;
454 	}
455 
456 	r = radeon_cs_sync_rings(parser);
457 	if (r) {
458 		if (r != -ERESTARTSYS)
459 			DRM_ERROR("Failed to sync rings: %i\n", r);
460 		return r;
461 	}
462 
463 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
464 		radeon_uvd_note_usage(rdev);
465 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
466 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
467 		radeon_vce_note_usage(rdev);
468 
469 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
470 	if (r) {
471 		DRM_ERROR("Failed to schedule IB !\n");
472 	}
473 	return r;
474 }
475 
476 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
477 				   struct radeon_vm *vm)
478 {
479 	struct radeon_device *rdev = p->rdev;
480 	struct radeon_bo_va *bo_va;
481 	int i, r;
482 
483 	r = radeon_vm_update_page_directory(rdev, vm);
484 	if (r)
485 		return r;
486 
487 	r = radeon_vm_clear_freed(rdev, vm);
488 	if (r)
489 		return r;
490 
491 	if (vm->ib_bo_va == NULL) {
492 		DRM_ERROR("Tmp BO not in VM!\n");
493 		return -EINVAL;
494 	}
495 
496 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
497 				&rdev->ring_tmp_bo.bo->tbo.mem);
498 	if (r)
499 		return r;
500 
501 	for (i = 0; i < p->nrelocs; i++) {
502 		struct radeon_bo *bo;
503 
504 		bo = p->relocs[i].robj;
505 		bo_va = radeon_vm_bo_find(vm, bo);
506 		if (bo_va == NULL) {
507 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
508 			return -EINVAL;
509 		}
510 
511 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
512 		if (r)
513 			return r;
514 
515 		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
516 	}
517 
518 	return radeon_vm_clear_invalids(rdev, vm);
519 }
520 
521 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
522 				 struct radeon_cs_parser *parser)
523 {
524 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
525 	struct radeon_vm *vm = &fpriv->vm;
526 	int r;
527 
528 	if (parser->chunk_ib == NULL)
529 		return 0;
530 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
531 		return 0;
532 
533 	if (parser->const_ib.length_dw) {
534 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
535 		if (r) {
536 			return r;
537 		}
538 	}
539 
540 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
541 	if (r) {
542 		return r;
543 	}
544 
545 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
546 		radeon_uvd_note_usage(rdev);
547 
548 	mutex_lock(&vm->mutex);
549 	r = radeon_bo_vm_update_pte(parser, vm);
550 	if (r) {
551 		goto out;
552 	}
553 
554 	r = radeon_cs_sync_rings(parser);
555 	if (r) {
556 		if (r != -ERESTARTSYS)
557 			DRM_ERROR("Failed to sync rings: %i\n", r);
558 		goto out;
559 	}
560 
561 	if ((rdev->family >= CHIP_TAHITI) &&
562 	    (parser->chunk_const_ib != NULL)) {
563 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
564 	} else {
565 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
566 	}
567 
568 out:
569 	mutex_unlock(&vm->mutex);
570 	return r;
571 }
572 
573 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
574 {
575 	if (r == -EDEADLK) {
576 		r = radeon_gpu_reset(rdev);
577 		if (!r)
578 			r = -EAGAIN;
579 	}
580 	return r;
581 }
582 
583 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
584 {
585 	struct radeon_cs_chunk *ib_chunk;
586 	struct radeon_vm *vm = NULL;
587 	int r;
588 
589 	if (parser->chunk_ib == NULL)
590 		return 0;
591 
592 	if (parser->cs_flags & RADEON_CS_USE_VM) {
593 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
594 		vm = &fpriv->vm;
595 
596 		if ((rdev->family >= CHIP_TAHITI) &&
597 		    (parser->chunk_const_ib != NULL)) {
598 			ib_chunk = parser->chunk_const_ib;
599 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
600 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
601 				return -EINVAL;
602 			}
603 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
604 					   vm, ib_chunk->length_dw * 4);
605 			if (r) {
606 				DRM_ERROR("Failed to get const ib !\n");
607 				return r;
608 			}
609 			parser->const_ib.is_const_ib = true;
610 			parser->const_ib.length_dw = ib_chunk->length_dw;
611 			if (copy_from_user(parser->const_ib.ptr,
612 					       ib_chunk->user_ptr,
613 					       ib_chunk->length_dw * 4))
614 				return -EFAULT;
615 		}
616 
617 		ib_chunk = parser->chunk_ib;
618 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
619 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
620 			return -EINVAL;
621 		}
622 	}
623 	ib_chunk = parser->chunk_ib;
624 
625 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
626 			   vm, ib_chunk->length_dw * 4);
627 	if (r) {
628 		DRM_ERROR("Failed to get ib !\n");
629 		return r;
630 	}
631 	parser->ib.length_dw = ib_chunk->length_dw;
632 	if (ib_chunk->kdata)
633 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
634 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
635 		return -EFAULT;
636 	return 0;
637 }
638 
639 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
640 {
641 	struct radeon_device *rdev = dev->dev_private;
642 	struct radeon_cs_parser parser;
643 	int r;
644 
645 	down_read(&rdev->exclusive_lock);
646 	if (!rdev->accel_working) {
647 		up_read(&rdev->exclusive_lock);
648 		return -EBUSY;
649 	}
650 	if (rdev->in_reset) {
651 		up_read(&rdev->exclusive_lock);
652 		r = radeon_gpu_reset(rdev);
653 		if (!r)
654 			r = -EAGAIN;
655 		return r;
656 	}
657 	/* initialize parser */
658 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
659 	parser.filp = filp;
660 	parser.rdev = rdev;
661 	parser.dev = rdev->dev;
662 	parser.family = rdev->family;
663 	r = radeon_cs_parser_init(&parser, data);
664 	if (r) {
665 		DRM_ERROR("Failed to initialize parser !\n");
666 		radeon_cs_parser_fini(&parser, r, false);
667 		up_read(&rdev->exclusive_lock);
668 		r = radeon_cs_handle_lockup(rdev, r);
669 		return r;
670 	}
671 
672 	r = radeon_cs_ib_fill(rdev, &parser);
673 	if (!r) {
674 		r = radeon_cs_parser_relocs(&parser);
675 		if (r && r != -ERESTARTSYS)
676 			DRM_ERROR("Failed to parse relocation %d!\n", r);
677 	}
678 
679 	if (r) {
680 		radeon_cs_parser_fini(&parser, r, false);
681 		up_read(&rdev->exclusive_lock);
682 		r = radeon_cs_handle_lockup(rdev, r);
683 		return r;
684 	}
685 
686 	trace_radeon_cs(&parser);
687 
688 	r = radeon_cs_ib_chunk(rdev, &parser);
689 	if (r) {
690 		goto out;
691 	}
692 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
693 	if (r) {
694 		goto out;
695 	}
696 out:
697 	radeon_cs_parser_fini(&parser, r, true);
698 	up_read(&rdev->exclusive_lock);
699 	r = radeon_cs_handle_lockup(rdev, r);
700 	return r;
701 }
702 
703 /**
704  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
705  * @parser:	parser structure holding parsing context.
706  * @pkt:	where to store packet information
707  *
708  * Assume that chunk_ib_index is properly set. Will return -EINVAL
709  * if packet is bigger than remaining ib size. or if packets is unknown.
710  **/
711 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
712 			   struct radeon_cs_packet *pkt,
713 			   unsigned idx)
714 {
715 	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
716 	struct radeon_device *rdev = p->rdev;
717 	uint32_t header;
718 	int ret = 0, i;
719 
720 	if (idx >= ib_chunk->length_dw) {
721 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
722 			  idx, ib_chunk->length_dw);
723 		return -EINVAL;
724 	}
725 	header = radeon_get_ib_value(p, idx);
726 	pkt->idx = idx;
727 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
728 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
729 	pkt->one_reg_wr = 0;
730 	switch (pkt->type) {
731 	case RADEON_PACKET_TYPE0:
732 		if (rdev->family < CHIP_R600) {
733 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
734 			pkt->one_reg_wr =
735 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
736 		} else
737 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
738 		break;
739 	case RADEON_PACKET_TYPE3:
740 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
741 		break;
742 	case RADEON_PACKET_TYPE2:
743 		pkt->count = -1;
744 		break;
745 	default:
746 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
747 		ret = -EINVAL;
748 		goto dump_ib;
749 	}
750 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
751 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
752 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
753 		ret = -EINVAL;
754 		goto dump_ib;
755 	}
756 	return 0;
757 
758 dump_ib:
759 	for (i = 0; i < ib_chunk->length_dw; i++) {
760 		if (i == idx)
761 			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
762 		else
763 			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
764 	}
765 	return ret;
766 }
767 
768 /**
769  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
770  * @p:		structure holding the parser context.
771  *
772  * Check if the next packet is NOP relocation packet3.
773  **/
774 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
775 {
776 	struct radeon_cs_packet p3reloc;
777 	int r;
778 
779 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
780 	if (r)
781 		return false;
782 	if (p3reloc.type != RADEON_PACKET_TYPE3)
783 		return false;
784 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
785 		return false;
786 	return true;
787 }
788 
789 /**
790  * radeon_cs_dump_packet() - dump raw packet context
791  * @p:		structure holding the parser context.
792  * @pkt:	structure holding the packet.
793  *
794  * Used mostly for debugging and error reporting.
795  **/
796 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
797 			   struct radeon_cs_packet *pkt)
798 {
799 	volatile uint32_t *ib;
800 	unsigned i;
801 	unsigned idx;
802 
803 	ib = p->ib.ptr;
804 	idx = pkt->idx;
805 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
806 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
807 }
808 
809 /**
810  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
811  * @parser:		parser structure holding parsing context.
812  * @data:		pointer to relocation data
813  * @offset_start:	starting offset
814  * @offset_mask:	offset mask (to align start offset on)
815  * @reloc:		reloc informations
816  *
817  * Check if next packet is relocation packet3, do bo validation and compute
818  * GPU offset using the provided start.
819  **/
820 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
821 				struct radeon_bo_list **cs_reloc,
822 				int nomm)
823 {
824 	struct radeon_cs_chunk *relocs_chunk;
825 	struct radeon_cs_packet p3reloc;
826 	unsigned idx;
827 	int r;
828 
829 	if (p->chunk_relocs == NULL) {
830 		DRM_ERROR("No relocation chunk !\n");
831 		return -EINVAL;
832 	}
833 	*cs_reloc = NULL;
834 	relocs_chunk = p->chunk_relocs;
835 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
836 	if (r)
837 		return r;
838 	p->idx += p3reloc.count + 2;
839 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
840 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
841 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
842 			  p3reloc.idx);
843 		radeon_cs_dump_packet(p, &p3reloc);
844 		return -EINVAL;
845 	}
846 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
847 	if (idx >= relocs_chunk->length_dw) {
848 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
849 			  idx, relocs_chunk->length_dw);
850 		radeon_cs_dump_packet(p, &p3reloc);
851 		return -EINVAL;
852 	}
853 	/* FIXME: we assume reloc size is 4 dwords */
854 	if (nomm) {
855 		*cs_reloc = p->relocs;
856 		(*cs_reloc)->gpu_offset =
857 			(u64)relocs_chunk->kdata[idx + 3] << 32;
858 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
859 	} else
860 		*cs_reloc = &p->relocs[(idx / 4)];
861 	return 0;
862 }
863