xref: /openbmc/linux/drivers/gpu/drm/radeon/radeon_cs.c (revision 995c6a7f)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #include "radeon_trace.h"
33 
34 #define RADEON_CS_MAX_PRIORITY		32u
35 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36 
37 /* This is based on the bucket sort with O(n) time complexity.
38  * An item with priority "i" is added to bucket[i]. The lists are then
39  * concatenated in descending order.
40  */
41 struct radeon_cs_buckets {
42 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43 };
44 
45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46 {
47 	unsigned i;
48 
49 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 		INIT_LIST_HEAD(&b->bucket[i]);
51 }
52 
53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 				  struct list_head *item, unsigned priority)
55 {
56 	/* Since buffers which appear sooner in the relocation list are
57 	 * likely to be used more often than buffers which appear later
58 	 * in the list, the sort mustn't change the ordering of buffers
59 	 * with the same priority, i.e. it must be stable.
60 	 */
61 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62 }
63 
64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 				       struct list_head *out_list)
66 {
67 	unsigned i;
68 
69 	/* Connect the sorted buckets in the output list. */
70 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 		list_splice(&b->bucket[i], out_list);
72 	}
73 }
74 
75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
76 {
77 	struct radeon_cs_chunk *chunk;
78 	struct radeon_cs_buckets buckets;
79 	unsigned i;
80 	bool need_mmap_lock = false;
81 	int r;
82 
83 	if (p->chunk_relocs == NULL) {
84 		return 0;
85 	}
86 	chunk = p->chunk_relocs;
87 	p->dma_reloc_idx = 0;
88 	/* FIXME: we assume that each relocs use 4 dwords */
89 	p->nrelocs = chunk->length_dw / 4;
90 	p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
91 	if (p->relocs == NULL) {
92 		return -ENOMEM;
93 	}
94 
95 	radeon_cs_buckets_init(&buckets);
96 
97 	for (i = 0; i < p->nrelocs; i++) {
98 		struct drm_radeon_cs_reloc *r;
99 		struct drm_gem_object *gobj;
100 		unsigned priority;
101 
102 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
103 		gobj = drm_gem_object_lookup(p->filp, r->handle);
104 		if (gobj == NULL) {
105 			DRM_ERROR("gem object lookup failed 0x%x\n",
106 				  r->handle);
107 			return -ENOENT;
108 		}
109 		p->relocs[i].robj = gem_to_radeon_bo(gobj);
110 
111 		/* The userspace buffer priorities are from 0 to 15. A higher
112 		 * number means the buffer is more important.
113 		 * Also, the buffers used for write have a higher priority than
114 		 * the buffers used for read only, which doubles the range
115 		 * to 0 to 31. 32 is reserved for the kernel driver.
116 		 */
117 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
118 			   + !!r->write_domain;
119 
120 		/* the first reloc of an UVD job is the msg and that must be in
121 		   VRAM, also but everything into VRAM on AGP cards and older
122 		   IGP chips to avoid image corruptions */
123 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
124 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
125 		     p->rdev->family == CHIP_RS780 ||
126 		     p->rdev->family == CHIP_RS880)) {
127 
128 			/* TODO: is this still needed for NI+ ? */
129 			p->relocs[i].prefered_domains =
130 				RADEON_GEM_DOMAIN_VRAM;
131 
132 			p->relocs[i].allowed_domains =
133 				RADEON_GEM_DOMAIN_VRAM;
134 
135 			/* prioritize this over any other relocation */
136 			priority = RADEON_CS_MAX_PRIORITY;
137 		} else {
138 			uint32_t domain = r->write_domain ?
139 				r->write_domain : r->read_domains;
140 
141 			if (domain & RADEON_GEM_DOMAIN_CPU) {
142 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
143 					  "for command submission\n");
144 				return -EINVAL;
145 			}
146 
147 			p->relocs[i].prefered_domains = domain;
148 			if (domain == RADEON_GEM_DOMAIN_VRAM)
149 				domain |= RADEON_GEM_DOMAIN_GTT;
150 			p->relocs[i].allowed_domains = domain;
151 		}
152 
153 		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
154 			uint32_t domain = p->relocs[i].prefered_domains;
155 			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
156 				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
157 					  "allowed for userptr BOs\n");
158 				return -EINVAL;
159 			}
160 			need_mmap_lock = true;
161 			domain = RADEON_GEM_DOMAIN_GTT;
162 			p->relocs[i].prefered_domains = domain;
163 			p->relocs[i].allowed_domains = domain;
164 		}
165 
166 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
167 		p->relocs[i].tv.shared = !r->write_domain;
168 
169 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
170 				      priority);
171 	}
172 
173 	radeon_cs_buckets_get_list(&buckets, &p->validated);
174 
175 	if (p->cs_flags & RADEON_CS_USE_VM)
176 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
177 					      &p->validated);
178 	if (need_mmap_lock)
179 		down_read(&current->mm->mmap_sem);
180 
181 	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
182 
183 	if (need_mmap_lock)
184 		up_read(&current->mm->mmap_sem);
185 
186 	return r;
187 }
188 
189 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
190 {
191 	p->priority = priority;
192 
193 	switch (ring) {
194 	default:
195 		DRM_ERROR("unknown ring id: %d\n", ring);
196 		return -EINVAL;
197 	case RADEON_CS_RING_GFX:
198 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
199 		break;
200 	case RADEON_CS_RING_COMPUTE:
201 		if (p->rdev->family >= CHIP_TAHITI) {
202 			if (p->priority > 0)
203 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
204 			else
205 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
206 		} else
207 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
208 		break;
209 	case RADEON_CS_RING_DMA:
210 		if (p->rdev->family >= CHIP_CAYMAN) {
211 			if (p->priority > 0)
212 				p->ring = R600_RING_TYPE_DMA_INDEX;
213 			else
214 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
215 		} else if (p->rdev->family >= CHIP_RV770) {
216 			p->ring = R600_RING_TYPE_DMA_INDEX;
217 		} else {
218 			return -EINVAL;
219 		}
220 		break;
221 	case RADEON_CS_RING_UVD:
222 		p->ring = R600_RING_TYPE_UVD_INDEX;
223 		break;
224 	case RADEON_CS_RING_VCE:
225 		/* TODO: only use the low priority ring for now */
226 		p->ring = TN_RING_TYPE_VCE1_INDEX;
227 		break;
228 	}
229 	return 0;
230 }
231 
232 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
233 {
234 	struct radeon_bo_list *reloc;
235 	int r;
236 
237 	list_for_each_entry(reloc, &p->validated, tv.head) {
238 		struct reservation_object *resv;
239 
240 		resv = reloc->robj->tbo.resv;
241 		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
242 				     reloc->tv.shared);
243 		if (r)
244 			return r;
245 	}
246 	return 0;
247 }
248 
249 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
250 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
251 {
252 	struct drm_radeon_cs *cs = data;
253 	uint64_t *chunk_array_ptr;
254 	unsigned size, i;
255 	u32 ring = RADEON_CS_RING_GFX;
256 	s32 priority = 0;
257 
258 	INIT_LIST_HEAD(&p->validated);
259 
260 	if (!cs->num_chunks) {
261 		return 0;
262 	}
263 
264 	/* get chunks */
265 	p->idx = 0;
266 	p->ib.sa_bo = NULL;
267 	p->const_ib.sa_bo = NULL;
268 	p->chunk_ib = NULL;
269 	p->chunk_relocs = NULL;
270 	p->chunk_flags = NULL;
271 	p->chunk_const_ib = NULL;
272 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
273 	if (p->chunks_array == NULL) {
274 		return -ENOMEM;
275 	}
276 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
277 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
278 			       sizeof(uint64_t)*cs->num_chunks)) {
279 		return -EFAULT;
280 	}
281 	p->cs_flags = 0;
282 	p->nchunks = cs->num_chunks;
283 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
284 	if (p->chunks == NULL) {
285 		return -ENOMEM;
286 	}
287 	for (i = 0; i < p->nchunks; i++) {
288 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
289 		struct drm_radeon_cs_chunk user_chunk;
290 		uint32_t __user *cdata;
291 
292 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
293 		if (copy_from_user(&user_chunk, chunk_ptr,
294 				       sizeof(struct drm_radeon_cs_chunk))) {
295 			return -EFAULT;
296 		}
297 		p->chunks[i].length_dw = user_chunk.length_dw;
298 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
299 			p->chunk_relocs = &p->chunks[i];
300 		}
301 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
302 			p->chunk_ib = &p->chunks[i];
303 			/* zero length IB isn't useful */
304 			if (p->chunks[i].length_dw == 0)
305 				return -EINVAL;
306 		}
307 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
308 			p->chunk_const_ib = &p->chunks[i];
309 			/* zero length CONST IB isn't useful */
310 			if (p->chunks[i].length_dw == 0)
311 				return -EINVAL;
312 		}
313 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
314 			p->chunk_flags = &p->chunks[i];
315 			/* zero length flags aren't useful */
316 			if (p->chunks[i].length_dw == 0)
317 				return -EINVAL;
318 		}
319 
320 		size = p->chunks[i].length_dw;
321 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
322 		p->chunks[i].user_ptr = cdata;
323 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
324 			continue;
325 
326 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
327 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
328 				continue;
329 		}
330 
331 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
332 		size *= sizeof(uint32_t);
333 		if (p->chunks[i].kdata == NULL) {
334 			return -ENOMEM;
335 		}
336 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
337 			return -EFAULT;
338 		}
339 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
340 			p->cs_flags = p->chunks[i].kdata[0];
341 			if (p->chunks[i].length_dw > 1)
342 				ring = p->chunks[i].kdata[1];
343 			if (p->chunks[i].length_dw > 2)
344 				priority = (s32)p->chunks[i].kdata[2];
345 		}
346 	}
347 
348 	/* these are KMS only */
349 	if (p->rdev) {
350 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
351 		    !p->rdev->vm_manager.enabled) {
352 			DRM_ERROR("VM not active on asic!\n");
353 			return -EINVAL;
354 		}
355 
356 		if (radeon_cs_get_ring(p, ring, priority))
357 			return -EINVAL;
358 
359 		/* we only support VM on some SI+ rings */
360 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
361 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
362 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
363 				return -EINVAL;
364 			}
365 		} else {
366 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
367 				DRM_ERROR("VM not supported on ring %d!\n",
368 					  p->ring);
369 				return -EINVAL;
370 			}
371 		}
372 	}
373 
374 	return 0;
375 }
376 
377 static int cmp_size_smaller_first(void *priv, struct list_head *a,
378 				  struct list_head *b)
379 {
380 	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
381 	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
382 
383 	/* Sort A before B if A is smaller. */
384 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
385 }
386 
387 /**
388  * cs_parser_fini() - clean parser states
389  * @parser:	parser structure holding parsing context.
390  * @error:	error number
391  *
392  * If error is set than unvalidate buffer, otherwise just free memory
393  * used by parsing context.
394  **/
395 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
396 {
397 	unsigned i;
398 
399 	if (!error) {
400 		/* Sort the buffer list from the smallest to largest buffer,
401 		 * which affects the order of buffers in the LRU list.
402 		 * This assures that the smallest buffers are added first
403 		 * to the LRU list, so they are likely to be later evicted
404 		 * first, instead of large buffers whose eviction is more
405 		 * expensive.
406 		 *
407 		 * This slightly lowers the number of bytes moved by TTM
408 		 * per frame under memory pressure.
409 		 */
410 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
411 
412 		ttm_eu_fence_buffer_objects(&parser->ticket,
413 					    &parser->validated,
414 					    &parser->ib.fence->base);
415 	} else if (backoff) {
416 		ttm_eu_backoff_reservation(&parser->ticket,
417 					   &parser->validated);
418 	}
419 
420 	if (parser->relocs != NULL) {
421 		for (i = 0; i < parser->nrelocs; i++) {
422 			struct radeon_bo *bo = parser->relocs[i].robj;
423 			if (bo == NULL)
424 				continue;
425 
426 			drm_gem_object_unreference_unlocked(&bo->gem_base);
427 		}
428 	}
429 	kfree(parser->track);
430 	drm_free_large(parser->relocs);
431 	drm_free_large(parser->vm_bos);
432 	for (i = 0; i < parser->nchunks; i++)
433 		drm_free_large(parser->chunks[i].kdata);
434 	kfree(parser->chunks);
435 	kfree(parser->chunks_array);
436 	radeon_ib_free(parser->rdev, &parser->ib);
437 	radeon_ib_free(parser->rdev, &parser->const_ib);
438 }
439 
440 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
441 			      struct radeon_cs_parser *parser)
442 {
443 	int r;
444 
445 	if (parser->chunk_ib == NULL)
446 		return 0;
447 
448 	if (parser->cs_flags & RADEON_CS_USE_VM)
449 		return 0;
450 
451 	r = radeon_cs_parse(rdev, parser->ring, parser);
452 	if (r || parser->parser_error) {
453 		DRM_ERROR("Invalid command stream !\n");
454 		return r;
455 	}
456 
457 	r = radeon_cs_sync_rings(parser);
458 	if (r) {
459 		if (r != -ERESTARTSYS)
460 			DRM_ERROR("Failed to sync rings: %i\n", r);
461 		return r;
462 	}
463 
464 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
465 		radeon_uvd_note_usage(rdev);
466 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
467 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
468 		radeon_vce_note_usage(rdev);
469 
470 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
471 	if (r) {
472 		DRM_ERROR("Failed to schedule IB !\n");
473 	}
474 	return r;
475 }
476 
477 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
478 				   struct radeon_vm *vm)
479 {
480 	struct radeon_device *rdev = p->rdev;
481 	struct radeon_bo_va *bo_va;
482 	int i, r;
483 
484 	r = radeon_vm_update_page_directory(rdev, vm);
485 	if (r)
486 		return r;
487 
488 	r = radeon_vm_clear_freed(rdev, vm);
489 	if (r)
490 		return r;
491 
492 	if (vm->ib_bo_va == NULL) {
493 		DRM_ERROR("Tmp BO not in VM!\n");
494 		return -EINVAL;
495 	}
496 
497 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
498 				&rdev->ring_tmp_bo.bo->tbo.mem);
499 	if (r)
500 		return r;
501 
502 	for (i = 0; i < p->nrelocs; i++) {
503 		struct radeon_bo *bo;
504 
505 		bo = p->relocs[i].robj;
506 		bo_va = radeon_vm_bo_find(vm, bo);
507 		if (bo_va == NULL) {
508 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
509 			return -EINVAL;
510 		}
511 
512 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
513 		if (r)
514 			return r;
515 
516 		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
517 	}
518 
519 	return radeon_vm_clear_invalids(rdev, vm);
520 }
521 
522 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
523 				 struct radeon_cs_parser *parser)
524 {
525 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
526 	struct radeon_vm *vm = &fpriv->vm;
527 	int r;
528 
529 	if (parser->chunk_ib == NULL)
530 		return 0;
531 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
532 		return 0;
533 
534 	if (parser->const_ib.length_dw) {
535 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
536 		if (r) {
537 			return r;
538 		}
539 	}
540 
541 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
542 	if (r) {
543 		return r;
544 	}
545 
546 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
547 		radeon_uvd_note_usage(rdev);
548 
549 	mutex_lock(&vm->mutex);
550 	r = radeon_bo_vm_update_pte(parser, vm);
551 	if (r) {
552 		goto out;
553 	}
554 
555 	r = radeon_cs_sync_rings(parser);
556 	if (r) {
557 		if (r != -ERESTARTSYS)
558 			DRM_ERROR("Failed to sync rings: %i\n", r);
559 		goto out;
560 	}
561 
562 	if ((rdev->family >= CHIP_TAHITI) &&
563 	    (parser->chunk_const_ib != NULL)) {
564 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
565 	} else {
566 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
567 	}
568 
569 out:
570 	mutex_unlock(&vm->mutex);
571 	return r;
572 }
573 
574 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
575 {
576 	if (r == -EDEADLK) {
577 		r = radeon_gpu_reset(rdev);
578 		if (!r)
579 			r = -EAGAIN;
580 	}
581 	return r;
582 }
583 
584 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
585 {
586 	struct radeon_cs_chunk *ib_chunk;
587 	struct radeon_vm *vm = NULL;
588 	int r;
589 
590 	if (parser->chunk_ib == NULL)
591 		return 0;
592 
593 	if (parser->cs_flags & RADEON_CS_USE_VM) {
594 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
595 		vm = &fpriv->vm;
596 
597 		if ((rdev->family >= CHIP_TAHITI) &&
598 		    (parser->chunk_const_ib != NULL)) {
599 			ib_chunk = parser->chunk_const_ib;
600 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
601 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
602 				return -EINVAL;
603 			}
604 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
605 					   vm, ib_chunk->length_dw * 4);
606 			if (r) {
607 				DRM_ERROR("Failed to get const ib !\n");
608 				return r;
609 			}
610 			parser->const_ib.is_const_ib = true;
611 			parser->const_ib.length_dw = ib_chunk->length_dw;
612 			if (copy_from_user(parser->const_ib.ptr,
613 					       ib_chunk->user_ptr,
614 					       ib_chunk->length_dw * 4))
615 				return -EFAULT;
616 		}
617 
618 		ib_chunk = parser->chunk_ib;
619 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
620 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
621 			return -EINVAL;
622 		}
623 	}
624 	ib_chunk = parser->chunk_ib;
625 
626 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
627 			   vm, ib_chunk->length_dw * 4);
628 	if (r) {
629 		DRM_ERROR("Failed to get ib !\n");
630 		return r;
631 	}
632 	parser->ib.length_dw = ib_chunk->length_dw;
633 	if (ib_chunk->kdata)
634 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
635 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
636 		return -EFAULT;
637 	return 0;
638 }
639 
640 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
641 {
642 	struct radeon_device *rdev = dev->dev_private;
643 	struct radeon_cs_parser parser;
644 	int r;
645 
646 	down_read(&rdev->exclusive_lock);
647 	if (!rdev->accel_working) {
648 		up_read(&rdev->exclusive_lock);
649 		return -EBUSY;
650 	}
651 	if (rdev->in_reset) {
652 		up_read(&rdev->exclusive_lock);
653 		r = radeon_gpu_reset(rdev);
654 		if (!r)
655 			r = -EAGAIN;
656 		return r;
657 	}
658 	/* initialize parser */
659 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
660 	parser.filp = filp;
661 	parser.rdev = rdev;
662 	parser.dev = rdev->dev;
663 	parser.family = rdev->family;
664 	r = radeon_cs_parser_init(&parser, data);
665 	if (r) {
666 		DRM_ERROR("Failed to initialize parser !\n");
667 		radeon_cs_parser_fini(&parser, r, false);
668 		up_read(&rdev->exclusive_lock);
669 		r = radeon_cs_handle_lockup(rdev, r);
670 		return r;
671 	}
672 
673 	r = radeon_cs_ib_fill(rdev, &parser);
674 	if (!r) {
675 		r = radeon_cs_parser_relocs(&parser);
676 		if (r && r != -ERESTARTSYS)
677 			DRM_ERROR("Failed to parse relocation %d!\n", r);
678 	}
679 
680 	if (r) {
681 		radeon_cs_parser_fini(&parser, r, false);
682 		up_read(&rdev->exclusive_lock);
683 		r = radeon_cs_handle_lockup(rdev, r);
684 		return r;
685 	}
686 
687 	trace_radeon_cs(&parser);
688 
689 	r = radeon_cs_ib_chunk(rdev, &parser);
690 	if (r) {
691 		goto out;
692 	}
693 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
694 	if (r) {
695 		goto out;
696 	}
697 out:
698 	radeon_cs_parser_fini(&parser, r, true);
699 	up_read(&rdev->exclusive_lock);
700 	r = radeon_cs_handle_lockup(rdev, r);
701 	return r;
702 }
703 
704 /**
705  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
706  * @parser:	parser structure holding parsing context.
707  * @pkt:	where to store packet information
708  *
709  * Assume that chunk_ib_index is properly set. Will return -EINVAL
710  * if packet is bigger than remaining ib size. or if packets is unknown.
711  **/
712 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
713 			   struct radeon_cs_packet *pkt,
714 			   unsigned idx)
715 {
716 	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
717 	struct radeon_device *rdev = p->rdev;
718 	uint32_t header;
719 	int ret = 0, i;
720 
721 	if (idx >= ib_chunk->length_dw) {
722 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
723 			  idx, ib_chunk->length_dw);
724 		return -EINVAL;
725 	}
726 	header = radeon_get_ib_value(p, idx);
727 	pkt->idx = idx;
728 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
729 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
730 	pkt->one_reg_wr = 0;
731 	switch (pkt->type) {
732 	case RADEON_PACKET_TYPE0:
733 		if (rdev->family < CHIP_R600) {
734 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
735 			pkt->one_reg_wr =
736 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
737 		} else
738 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
739 		break;
740 	case RADEON_PACKET_TYPE3:
741 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
742 		break;
743 	case RADEON_PACKET_TYPE2:
744 		pkt->count = -1;
745 		break;
746 	default:
747 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
748 		ret = -EINVAL;
749 		goto dump_ib;
750 	}
751 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
752 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
753 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
754 		ret = -EINVAL;
755 		goto dump_ib;
756 	}
757 	return 0;
758 
759 dump_ib:
760 	for (i = 0; i < ib_chunk->length_dw; i++) {
761 		if (i == idx)
762 			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
763 		else
764 			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
765 	}
766 	return ret;
767 }
768 
769 /**
770  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
771  * @p:		structure holding the parser context.
772  *
773  * Check if the next packet is NOP relocation packet3.
774  **/
775 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
776 {
777 	struct radeon_cs_packet p3reloc;
778 	int r;
779 
780 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
781 	if (r)
782 		return false;
783 	if (p3reloc.type != RADEON_PACKET_TYPE3)
784 		return false;
785 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
786 		return false;
787 	return true;
788 }
789 
790 /**
791  * radeon_cs_dump_packet() - dump raw packet context
792  * @p:		structure holding the parser context.
793  * @pkt:	structure holding the packet.
794  *
795  * Used mostly for debugging and error reporting.
796  **/
797 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
798 			   struct radeon_cs_packet *pkt)
799 {
800 	volatile uint32_t *ib;
801 	unsigned i;
802 	unsigned idx;
803 
804 	ib = p->ib.ptr;
805 	idx = pkt->idx;
806 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
807 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
808 }
809 
810 /**
811  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
812  * @parser:		parser structure holding parsing context.
813  * @data:		pointer to relocation data
814  * @offset_start:	starting offset
815  * @offset_mask:	offset mask (to align start offset on)
816  * @reloc:		reloc informations
817  *
818  * Check if next packet is relocation packet3, do bo validation and compute
819  * GPU offset using the provided start.
820  **/
821 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
822 				struct radeon_bo_list **cs_reloc,
823 				int nomm)
824 {
825 	struct radeon_cs_chunk *relocs_chunk;
826 	struct radeon_cs_packet p3reloc;
827 	unsigned idx;
828 	int r;
829 
830 	if (p->chunk_relocs == NULL) {
831 		DRM_ERROR("No relocation chunk !\n");
832 		return -EINVAL;
833 	}
834 	*cs_reloc = NULL;
835 	relocs_chunk = p->chunk_relocs;
836 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
837 	if (r)
838 		return r;
839 	p->idx += p3reloc.count + 2;
840 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
841 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
842 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
843 			  p3reloc.idx);
844 		radeon_cs_dump_packet(p, &p3reloc);
845 		return -EINVAL;
846 	}
847 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
848 	if (idx >= relocs_chunk->length_dw) {
849 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
850 			  idx, relocs_chunk->length_dw);
851 		radeon_cs_dump_packet(p, &p3reloc);
852 		return -EINVAL;
853 	}
854 	/* FIXME: we assume reloc size is 4 dwords */
855 	if (nomm) {
856 		*cs_reloc = p->relocs;
857 		(*cs_reloc)->gpu_offset =
858 			(u64)relocs_chunk->kdata[idx + 3] << 32;
859 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
860 	} else
861 		*cs_reloc = &p->relocs[(idx / 4)];
862 	return 0;
863 }
864