1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <linux/list_sort.h> 28 #include <drm/drmP.h> 29 #include <drm/radeon_drm.h> 30 #include "radeon_reg.h" 31 #include "radeon.h" 32 #include "radeon_trace.h" 33 34 #define RADEON_CS_MAX_PRIORITY 32u 35 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 36 37 /* This is based on the bucket sort with O(n) time complexity. 38 * An item with priority "i" is added to bucket[i]. The lists are then 39 * concatenated in descending order. 40 */ 41 struct radeon_cs_buckets { 42 struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 43 }; 44 45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 46 { 47 unsigned i; 48 49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 50 INIT_LIST_HEAD(&b->bucket[i]); 51 } 52 53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 54 struct list_head *item, unsigned priority) 55 { 56 /* Since buffers which appear sooner in the relocation list are 57 * likely to be used more often than buffers which appear later 58 * in the list, the sort mustn't change the ordering of buffers 59 * with the same priority, i.e. it must be stable. 60 */ 61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 62 } 63 64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 65 struct list_head *out_list) 66 { 67 unsigned i; 68 69 /* Connect the sorted buckets in the output list. */ 70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 71 list_splice(&b->bucket[i], out_list); 72 } 73 } 74 75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 76 { 77 struct radeon_cs_chunk *chunk; 78 struct radeon_cs_buckets buckets; 79 unsigned i; 80 bool need_mmap_lock = false; 81 int r; 82 83 if (p->chunk_relocs == NULL) { 84 return 0; 85 } 86 chunk = p->chunk_relocs; 87 p->dma_reloc_idx = 0; 88 /* FIXME: we assume that each relocs use 4 dwords */ 89 p->nrelocs = chunk->length_dw / 4; 90 p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list)); 91 if (p->relocs == NULL) { 92 return -ENOMEM; 93 } 94 95 radeon_cs_buckets_init(&buckets); 96 97 for (i = 0; i < p->nrelocs; i++) { 98 struct drm_radeon_cs_reloc *r; 99 struct drm_gem_object *gobj; 100 unsigned priority; 101 102 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 103 gobj = drm_gem_object_lookup(p->filp, r->handle); 104 if (gobj == NULL) { 105 DRM_ERROR("gem object lookup failed 0x%x\n", 106 r->handle); 107 return -ENOENT; 108 } 109 p->relocs[i].robj = gem_to_radeon_bo(gobj); 110 111 /* The userspace buffer priorities are from 0 to 15. A higher 112 * number means the buffer is more important. 113 * Also, the buffers used for write have a higher priority than 114 * the buffers used for read only, which doubles the range 115 * to 0 to 31. 32 is reserved for the kernel driver. 116 */ 117 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 118 + !!r->write_domain; 119 120 /* the first reloc of an UVD job is the msg and that must be in 121 VRAM, also but everything into VRAM on AGP cards and older 122 IGP chips to avoid image corruptions */ 123 if (p->ring == R600_RING_TYPE_UVD_INDEX && 124 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || 125 p->rdev->family == CHIP_RS780 || 126 p->rdev->family == CHIP_RS880)) { 127 128 /* TODO: is this still needed for NI+ ? */ 129 p->relocs[i].prefered_domains = 130 RADEON_GEM_DOMAIN_VRAM; 131 132 p->relocs[i].allowed_domains = 133 RADEON_GEM_DOMAIN_VRAM; 134 135 /* prioritize this over any other relocation */ 136 priority = RADEON_CS_MAX_PRIORITY; 137 } else { 138 uint32_t domain = r->write_domain ? 139 r->write_domain : r->read_domains; 140 141 if (domain & RADEON_GEM_DOMAIN_CPU) { 142 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " 143 "for command submission\n"); 144 return -EINVAL; 145 } 146 147 p->relocs[i].prefered_domains = domain; 148 if (domain == RADEON_GEM_DOMAIN_VRAM) 149 domain |= RADEON_GEM_DOMAIN_GTT; 150 p->relocs[i].allowed_domains = domain; 151 } 152 153 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { 154 uint32_t domain = p->relocs[i].prefered_domains; 155 if (!(domain & RADEON_GEM_DOMAIN_GTT)) { 156 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " 157 "allowed for userptr BOs\n"); 158 return -EINVAL; 159 } 160 need_mmap_lock = true; 161 domain = RADEON_GEM_DOMAIN_GTT; 162 p->relocs[i].prefered_domains = domain; 163 p->relocs[i].allowed_domains = domain; 164 } 165 166 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 167 p->relocs[i].tv.shared = !r->write_domain; 168 169 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 170 priority); 171 } 172 173 radeon_cs_buckets_get_list(&buckets, &p->validated); 174 175 if (p->cs_flags & RADEON_CS_USE_VM) 176 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 177 &p->validated); 178 if (need_mmap_lock) 179 down_read(¤t->mm->mmap_sem); 180 181 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 182 183 if (need_mmap_lock) 184 up_read(¤t->mm->mmap_sem); 185 186 return r; 187 } 188 189 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 190 { 191 p->priority = priority; 192 193 switch (ring) { 194 default: 195 DRM_ERROR("unknown ring id: %d\n", ring); 196 return -EINVAL; 197 case RADEON_CS_RING_GFX: 198 p->ring = RADEON_RING_TYPE_GFX_INDEX; 199 break; 200 case RADEON_CS_RING_COMPUTE: 201 if (p->rdev->family >= CHIP_TAHITI) { 202 if (p->priority > 0) 203 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 204 else 205 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 206 } else 207 p->ring = RADEON_RING_TYPE_GFX_INDEX; 208 break; 209 case RADEON_CS_RING_DMA: 210 if (p->rdev->family >= CHIP_CAYMAN) { 211 if (p->priority > 0) 212 p->ring = R600_RING_TYPE_DMA_INDEX; 213 else 214 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 215 } else if (p->rdev->family >= CHIP_RV770) { 216 p->ring = R600_RING_TYPE_DMA_INDEX; 217 } else { 218 return -EINVAL; 219 } 220 break; 221 case RADEON_CS_RING_UVD: 222 p->ring = R600_RING_TYPE_UVD_INDEX; 223 break; 224 case RADEON_CS_RING_VCE: 225 /* TODO: only use the low priority ring for now */ 226 p->ring = TN_RING_TYPE_VCE1_INDEX; 227 break; 228 } 229 return 0; 230 } 231 232 static int radeon_cs_sync_rings(struct radeon_cs_parser *p) 233 { 234 struct radeon_bo_list *reloc; 235 int r; 236 237 list_for_each_entry(reloc, &p->validated, tv.head) { 238 struct reservation_object *resv; 239 240 resv = reloc->robj->tbo.resv; 241 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, 242 reloc->tv.shared); 243 if (r) 244 return r; 245 } 246 return 0; 247 } 248 249 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 250 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 251 { 252 struct drm_radeon_cs *cs = data; 253 uint64_t *chunk_array_ptr; 254 unsigned size, i; 255 u32 ring = RADEON_CS_RING_GFX; 256 s32 priority = 0; 257 258 INIT_LIST_HEAD(&p->validated); 259 260 if (!cs->num_chunks) { 261 return 0; 262 } 263 264 /* get chunks */ 265 p->idx = 0; 266 p->ib.sa_bo = NULL; 267 p->const_ib.sa_bo = NULL; 268 p->chunk_ib = NULL; 269 p->chunk_relocs = NULL; 270 p->chunk_flags = NULL; 271 p->chunk_const_ib = NULL; 272 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 273 if (p->chunks_array == NULL) { 274 return -ENOMEM; 275 } 276 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 277 if (copy_from_user(p->chunks_array, chunk_array_ptr, 278 sizeof(uint64_t)*cs->num_chunks)) { 279 return -EFAULT; 280 } 281 p->cs_flags = 0; 282 p->nchunks = cs->num_chunks; 283 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 284 if (p->chunks == NULL) { 285 return -ENOMEM; 286 } 287 for (i = 0; i < p->nchunks; i++) { 288 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 289 struct drm_radeon_cs_chunk user_chunk; 290 uint32_t __user *cdata; 291 292 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 293 if (copy_from_user(&user_chunk, chunk_ptr, 294 sizeof(struct drm_radeon_cs_chunk))) { 295 return -EFAULT; 296 } 297 p->chunks[i].length_dw = user_chunk.length_dw; 298 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) { 299 p->chunk_relocs = &p->chunks[i]; 300 } 301 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 302 p->chunk_ib = &p->chunks[i]; 303 /* zero length IB isn't useful */ 304 if (p->chunks[i].length_dw == 0) 305 return -EINVAL; 306 } 307 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) { 308 p->chunk_const_ib = &p->chunks[i]; 309 /* zero length CONST IB isn't useful */ 310 if (p->chunks[i].length_dw == 0) 311 return -EINVAL; 312 } 313 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 314 p->chunk_flags = &p->chunks[i]; 315 /* zero length flags aren't useful */ 316 if (p->chunks[i].length_dw == 0) 317 return -EINVAL; 318 } 319 320 size = p->chunks[i].length_dw; 321 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 322 p->chunks[i].user_ptr = cdata; 323 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) 324 continue; 325 326 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 327 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) 328 continue; 329 } 330 331 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 332 size *= sizeof(uint32_t); 333 if (p->chunks[i].kdata == NULL) { 334 return -ENOMEM; 335 } 336 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 337 return -EFAULT; 338 } 339 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 340 p->cs_flags = p->chunks[i].kdata[0]; 341 if (p->chunks[i].length_dw > 1) 342 ring = p->chunks[i].kdata[1]; 343 if (p->chunks[i].length_dw > 2) 344 priority = (s32)p->chunks[i].kdata[2]; 345 } 346 } 347 348 /* these are KMS only */ 349 if (p->rdev) { 350 if ((p->cs_flags & RADEON_CS_USE_VM) && 351 !p->rdev->vm_manager.enabled) { 352 DRM_ERROR("VM not active on asic!\n"); 353 return -EINVAL; 354 } 355 356 if (radeon_cs_get_ring(p, ring, priority)) 357 return -EINVAL; 358 359 /* we only support VM on some SI+ rings */ 360 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { 361 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { 362 DRM_ERROR("Ring %d requires VM!\n", p->ring); 363 return -EINVAL; 364 } 365 } else { 366 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { 367 DRM_ERROR("VM not supported on ring %d!\n", 368 p->ring); 369 return -EINVAL; 370 } 371 } 372 } 373 374 return 0; 375 } 376 377 static int cmp_size_smaller_first(void *priv, struct list_head *a, 378 struct list_head *b) 379 { 380 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); 381 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); 382 383 /* Sort A before B if A is smaller. */ 384 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 385 } 386 387 /** 388 * cs_parser_fini() - clean parser states 389 * @parser: parser structure holding parsing context. 390 * @error: error number 391 * 392 * If error is set than unvalidate buffer, otherwise just free memory 393 * used by parsing context. 394 **/ 395 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 396 { 397 unsigned i; 398 399 if (!error) { 400 /* Sort the buffer list from the smallest to largest buffer, 401 * which affects the order of buffers in the LRU list. 402 * This assures that the smallest buffers are added first 403 * to the LRU list, so they are likely to be later evicted 404 * first, instead of large buffers whose eviction is more 405 * expensive. 406 * 407 * This slightly lowers the number of bytes moved by TTM 408 * per frame under memory pressure. 409 */ 410 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 411 412 ttm_eu_fence_buffer_objects(&parser->ticket, 413 &parser->validated, 414 &parser->ib.fence->base); 415 } else if (backoff) { 416 ttm_eu_backoff_reservation(&parser->ticket, 417 &parser->validated); 418 } 419 420 if (parser->relocs != NULL) { 421 for (i = 0; i < parser->nrelocs; i++) { 422 struct radeon_bo *bo = parser->relocs[i].robj; 423 if (bo == NULL) 424 continue; 425 426 drm_gem_object_unreference_unlocked(&bo->gem_base); 427 } 428 } 429 kfree(parser->track); 430 drm_free_large(parser->relocs); 431 drm_free_large(parser->vm_bos); 432 for (i = 0; i < parser->nchunks; i++) 433 drm_free_large(parser->chunks[i].kdata); 434 kfree(parser->chunks); 435 kfree(parser->chunks_array); 436 radeon_ib_free(parser->rdev, &parser->ib); 437 radeon_ib_free(parser->rdev, &parser->const_ib); 438 } 439 440 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 441 struct radeon_cs_parser *parser) 442 { 443 int r; 444 445 if (parser->chunk_ib == NULL) 446 return 0; 447 448 if (parser->cs_flags & RADEON_CS_USE_VM) 449 return 0; 450 451 r = radeon_cs_parse(rdev, parser->ring, parser); 452 if (r || parser->parser_error) { 453 DRM_ERROR("Invalid command stream !\n"); 454 return r; 455 } 456 457 r = radeon_cs_sync_rings(parser); 458 if (r) { 459 if (r != -ERESTARTSYS) 460 DRM_ERROR("Failed to sync rings: %i\n", r); 461 return r; 462 } 463 464 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 465 radeon_uvd_note_usage(rdev); 466 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 467 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 468 radeon_vce_note_usage(rdev); 469 470 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 471 if (r) { 472 DRM_ERROR("Failed to schedule IB !\n"); 473 } 474 return r; 475 } 476 477 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 478 struct radeon_vm *vm) 479 { 480 struct radeon_device *rdev = p->rdev; 481 struct radeon_bo_va *bo_va; 482 int i, r; 483 484 r = radeon_vm_update_page_directory(rdev, vm); 485 if (r) 486 return r; 487 488 r = radeon_vm_clear_freed(rdev, vm); 489 if (r) 490 return r; 491 492 if (vm->ib_bo_va == NULL) { 493 DRM_ERROR("Tmp BO not in VM!\n"); 494 return -EINVAL; 495 } 496 497 r = radeon_vm_bo_update(rdev, vm->ib_bo_va, 498 &rdev->ring_tmp_bo.bo->tbo.mem); 499 if (r) 500 return r; 501 502 for (i = 0; i < p->nrelocs; i++) { 503 struct radeon_bo *bo; 504 505 bo = p->relocs[i].robj; 506 bo_va = radeon_vm_bo_find(vm, bo); 507 if (bo_va == NULL) { 508 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 509 return -EINVAL; 510 } 511 512 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); 513 if (r) 514 return r; 515 516 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); 517 } 518 519 return radeon_vm_clear_invalids(rdev, vm); 520 } 521 522 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 523 struct radeon_cs_parser *parser) 524 { 525 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 526 struct radeon_vm *vm = &fpriv->vm; 527 int r; 528 529 if (parser->chunk_ib == NULL) 530 return 0; 531 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 532 return 0; 533 534 if (parser->const_ib.length_dw) { 535 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 536 if (r) { 537 return r; 538 } 539 } 540 541 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 542 if (r) { 543 return r; 544 } 545 546 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 547 radeon_uvd_note_usage(rdev); 548 549 mutex_lock(&vm->mutex); 550 r = radeon_bo_vm_update_pte(parser, vm); 551 if (r) { 552 goto out; 553 } 554 555 r = radeon_cs_sync_rings(parser); 556 if (r) { 557 if (r != -ERESTARTSYS) 558 DRM_ERROR("Failed to sync rings: %i\n", r); 559 goto out; 560 } 561 562 if ((rdev->family >= CHIP_TAHITI) && 563 (parser->chunk_const_ib != NULL)) { 564 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); 565 } else { 566 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 567 } 568 569 out: 570 mutex_unlock(&vm->mutex); 571 return r; 572 } 573 574 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 575 { 576 if (r == -EDEADLK) { 577 r = radeon_gpu_reset(rdev); 578 if (!r) 579 r = -EAGAIN; 580 } 581 return r; 582 } 583 584 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) 585 { 586 struct radeon_cs_chunk *ib_chunk; 587 struct radeon_vm *vm = NULL; 588 int r; 589 590 if (parser->chunk_ib == NULL) 591 return 0; 592 593 if (parser->cs_flags & RADEON_CS_USE_VM) { 594 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 595 vm = &fpriv->vm; 596 597 if ((rdev->family >= CHIP_TAHITI) && 598 (parser->chunk_const_ib != NULL)) { 599 ib_chunk = parser->chunk_const_ib; 600 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 601 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 602 return -EINVAL; 603 } 604 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 605 vm, ib_chunk->length_dw * 4); 606 if (r) { 607 DRM_ERROR("Failed to get const ib !\n"); 608 return r; 609 } 610 parser->const_ib.is_const_ib = true; 611 parser->const_ib.length_dw = ib_chunk->length_dw; 612 if (copy_from_user(parser->const_ib.ptr, 613 ib_chunk->user_ptr, 614 ib_chunk->length_dw * 4)) 615 return -EFAULT; 616 } 617 618 ib_chunk = parser->chunk_ib; 619 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 620 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 621 return -EINVAL; 622 } 623 } 624 ib_chunk = parser->chunk_ib; 625 626 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 627 vm, ib_chunk->length_dw * 4); 628 if (r) { 629 DRM_ERROR("Failed to get ib !\n"); 630 return r; 631 } 632 parser->ib.length_dw = ib_chunk->length_dw; 633 if (ib_chunk->kdata) 634 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); 635 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) 636 return -EFAULT; 637 return 0; 638 } 639 640 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 641 { 642 struct radeon_device *rdev = dev->dev_private; 643 struct radeon_cs_parser parser; 644 int r; 645 646 down_read(&rdev->exclusive_lock); 647 if (!rdev->accel_working) { 648 up_read(&rdev->exclusive_lock); 649 return -EBUSY; 650 } 651 if (rdev->in_reset) { 652 up_read(&rdev->exclusive_lock); 653 r = radeon_gpu_reset(rdev); 654 if (!r) 655 r = -EAGAIN; 656 return r; 657 } 658 /* initialize parser */ 659 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 660 parser.filp = filp; 661 parser.rdev = rdev; 662 parser.dev = rdev->dev; 663 parser.family = rdev->family; 664 r = radeon_cs_parser_init(&parser, data); 665 if (r) { 666 DRM_ERROR("Failed to initialize parser !\n"); 667 radeon_cs_parser_fini(&parser, r, false); 668 up_read(&rdev->exclusive_lock); 669 r = radeon_cs_handle_lockup(rdev, r); 670 return r; 671 } 672 673 r = radeon_cs_ib_fill(rdev, &parser); 674 if (!r) { 675 r = radeon_cs_parser_relocs(&parser); 676 if (r && r != -ERESTARTSYS) 677 DRM_ERROR("Failed to parse relocation %d!\n", r); 678 } 679 680 if (r) { 681 radeon_cs_parser_fini(&parser, r, false); 682 up_read(&rdev->exclusive_lock); 683 r = radeon_cs_handle_lockup(rdev, r); 684 return r; 685 } 686 687 trace_radeon_cs(&parser); 688 689 r = radeon_cs_ib_chunk(rdev, &parser); 690 if (r) { 691 goto out; 692 } 693 r = radeon_cs_ib_vm_chunk(rdev, &parser); 694 if (r) { 695 goto out; 696 } 697 out: 698 radeon_cs_parser_fini(&parser, r, true); 699 up_read(&rdev->exclusive_lock); 700 r = radeon_cs_handle_lockup(rdev, r); 701 return r; 702 } 703 704 /** 705 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 706 * @parser: parser structure holding parsing context. 707 * @pkt: where to store packet information 708 * 709 * Assume that chunk_ib_index is properly set. Will return -EINVAL 710 * if packet is bigger than remaining ib size. or if packets is unknown. 711 **/ 712 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 713 struct radeon_cs_packet *pkt, 714 unsigned idx) 715 { 716 struct radeon_cs_chunk *ib_chunk = p->chunk_ib; 717 struct radeon_device *rdev = p->rdev; 718 uint32_t header; 719 int ret = 0, i; 720 721 if (idx >= ib_chunk->length_dw) { 722 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 723 idx, ib_chunk->length_dw); 724 return -EINVAL; 725 } 726 header = radeon_get_ib_value(p, idx); 727 pkt->idx = idx; 728 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 729 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 730 pkt->one_reg_wr = 0; 731 switch (pkt->type) { 732 case RADEON_PACKET_TYPE0: 733 if (rdev->family < CHIP_R600) { 734 pkt->reg = R100_CP_PACKET0_GET_REG(header); 735 pkt->one_reg_wr = 736 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 737 } else 738 pkt->reg = R600_CP_PACKET0_GET_REG(header); 739 break; 740 case RADEON_PACKET_TYPE3: 741 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 742 break; 743 case RADEON_PACKET_TYPE2: 744 pkt->count = -1; 745 break; 746 default: 747 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 748 ret = -EINVAL; 749 goto dump_ib; 750 } 751 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 752 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 753 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 754 ret = -EINVAL; 755 goto dump_ib; 756 } 757 return 0; 758 759 dump_ib: 760 for (i = 0; i < ib_chunk->length_dw; i++) { 761 if (i == idx) 762 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i)); 763 else 764 printk("\t0x%08x\n", radeon_get_ib_value(p, i)); 765 } 766 return ret; 767 } 768 769 /** 770 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 771 * @p: structure holding the parser context. 772 * 773 * Check if the next packet is NOP relocation packet3. 774 **/ 775 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 776 { 777 struct radeon_cs_packet p3reloc; 778 int r; 779 780 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 781 if (r) 782 return false; 783 if (p3reloc.type != RADEON_PACKET_TYPE3) 784 return false; 785 if (p3reloc.opcode != RADEON_PACKET3_NOP) 786 return false; 787 return true; 788 } 789 790 /** 791 * radeon_cs_dump_packet() - dump raw packet context 792 * @p: structure holding the parser context. 793 * @pkt: structure holding the packet. 794 * 795 * Used mostly for debugging and error reporting. 796 **/ 797 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 798 struct radeon_cs_packet *pkt) 799 { 800 volatile uint32_t *ib; 801 unsigned i; 802 unsigned idx; 803 804 ib = p->ib.ptr; 805 idx = pkt->idx; 806 for (i = 0; i <= (pkt->count + 1); i++, idx++) 807 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 808 } 809 810 /** 811 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 812 * @parser: parser structure holding parsing context. 813 * @data: pointer to relocation data 814 * @offset_start: starting offset 815 * @offset_mask: offset mask (to align start offset on) 816 * @reloc: reloc informations 817 * 818 * Check if next packet is relocation packet3, do bo validation and compute 819 * GPU offset using the provided start. 820 **/ 821 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 822 struct radeon_bo_list **cs_reloc, 823 int nomm) 824 { 825 struct radeon_cs_chunk *relocs_chunk; 826 struct radeon_cs_packet p3reloc; 827 unsigned idx; 828 int r; 829 830 if (p->chunk_relocs == NULL) { 831 DRM_ERROR("No relocation chunk !\n"); 832 return -EINVAL; 833 } 834 *cs_reloc = NULL; 835 relocs_chunk = p->chunk_relocs; 836 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 837 if (r) 838 return r; 839 p->idx += p3reloc.count + 2; 840 if (p3reloc.type != RADEON_PACKET_TYPE3 || 841 p3reloc.opcode != RADEON_PACKET3_NOP) { 842 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 843 p3reloc.idx); 844 radeon_cs_dump_packet(p, &p3reloc); 845 return -EINVAL; 846 } 847 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 848 if (idx >= relocs_chunk->length_dw) { 849 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 850 idx, relocs_chunk->length_dw); 851 radeon_cs_dump_packet(p, &p3reloc); 852 return -EINVAL; 853 } 854 /* FIXME: we assume reloc size is 4 dwords */ 855 if (nomm) { 856 *cs_reloc = p->relocs; 857 (*cs_reloc)->gpu_offset = 858 (u64)relocs_chunk->kdata[idx + 3] << 32; 859 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 860 } else 861 *cs_reloc = &p->relocs[(idx / 4)]; 862 return 0; 863 } 864