1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <linux/list_sort.h> 28 #include <drm/drmP.h> 29 #include <drm/amdgpu_drm.h> 30 #include "amdgpu.h" 31 #include "amdgpu_trace.h" 32 33 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 34 u32 ip_instance, u32 ring, 35 struct amdgpu_ring **out_ring) 36 { 37 /* Right now all IPs have only one instance - multiple rings. */ 38 if (ip_instance != 0) { 39 DRM_ERROR("invalid ip instance: %d\n", ip_instance); 40 return -EINVAL; 41 } 42 43 switch (ip_type) { 44 default: 45 DRM_ERROR("unknown ip type: %d\n", ip_type); 46 return -EINVAL; 47 case AMDGPU_HW_IP_GFX: 48 if (ring < adev->gfx.num_gfx_rings) { 49 *out_ring = &adev->gfx.gfx_ring[ring]; 50 } else { 51 DRM_ERROR("only %d gfx rings are supported now\n", 52 adev->gfx.num_gfx_rings); 53 return -EINVAL; 54 } 55 break; 56 case AMDGPU_HW_IP_COMPUTE: 57 if (ring < adev->gfx.num_compute_rings) { 58 *out_ring = &adev->gfx.compute_ring[ring]; 59 } else { 60 DRM_ERROR("only %d compute rings are supported now\n", 61 adev->gfx.num_compute_rings); 62 return -EINVAL; 63 } 64 break; 65 case AMDGPU_HW_IP_DMA: 66 if (ring < adev->sdma.num_instances) { 67 *out_ring = &adev->sdma.instance[ring].ring; 68 } else { 69 DRM_ERROR("only %d SDMA rings are supported\n", 70 adev->sdma.num_instances); 71 return -EINVAL; 72 } 73 break; 74 case AMDGPU_HW_IP_UVD: 75 *out_ring = &adev->uvd.ring; 76 break; 77 case AMDGPU_HW_IP_VCE: 78 if (ring < 2){ 79 *out_ring = &adev->vce.ring[ring]; 80 } else { 81 DRM_ERROR("only two VCE rings are supported\n"); 82 return -EINVAL; 83 } 84 break; 85 } 86 return 0; 87 } 88 89 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 90 struct amdgpu_user_fence *uf, 91 struct drm_amdgpu_cs_chunk_fence *fence_data) 92 { 93 struct drm_gem_object *gobj; 94 uint32_t handle; 95 96 handle = fence_data->handle; 97 gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, 98 fence_data->handle); 99 if (gobj == NULL) 100 return -EINVAL; 101 102 uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 103 uf->offset = fence_data->offset; 104 105 if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { 106 drm_gem_object_unreference_unlocked(gobj); 107 return -EINVAL; 108 } 109 110 p->uf_entry.robj = amdgpu_bo_ref(uf->bo); 111 p->uf_entry.priority = 0; 112 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 113 p->uf_entry.tv.shared = true; 114 115 drm_gem_object_unreference_unlocked(gobj); 116 return 0; 117 } 118 119 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 120 { 121 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 122 union drm_amdgpu_cs *cs = data; 123 uint64_t *chunk_array_user; 124 uint64_t *chunk_array; 125 struct amdgpu_user_fence uf = {}; 126 unsigned size, num_ibs = 0; 127 int i; 128 int ret; 129 130 if (cs->in.num_chunks == 0) 131 return 0; 132 133 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); 134 if (!chunk_array) 135 return -ENOMEM; 136 137 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 138 if (!p->ctx) { 139 ret = -EINVAL; 140 goto free_chunk; 141 } 142 143 /* get chunks */ 144 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); 145 if (copy_from_user(chunk_array, chunk_array_user, 146 sizeof(uint64_t)*cs->in.num_chunks)) { 147 ret = -EFAULT; 148 goto put_ctx; 149 } 150 151 p->nchunks = cs->in.num_chunks; 152 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 153 GFP_KERNEL); 154 if (!p->chunks) { 155 ret = -ENOMEM; 156 goto put_ctx; 157 } 158 159 for (i = 0; i < p->nchunks; i++) { 160 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; 161 struct drm_amdgpu_cs_chunk user_chunk; 162 uint32_t __user *cdata; 163 164 chunk_ptr = (void __user *)(unsigned long)chunk_array[i]; 165 if (copy_from_user(&user_chunk, chunk_ptr, 166 sizeof(struct drm_amdgpu_cs_chunk))) { 167 ret = -EFAULT; 168 i--; 169 goto free_partial_kdata; 170 } 171 p->chunks[i].chunk_id = user_chunk.chunk_id; 172 p->chunks[i].length_dw = user_chunk.length_dw; 173 174 size = p->chunks[i].length_dw; 175 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 176 177 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 178 if (p->chunks[i].kdata == NULL) { 179 ret = -ENOMEM; 180 i--; 181 goto free_partial_kdata; 182 } 183 size *= sizeof(uint32_t); 184 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 185 ret = -EFAULT; 186 goto free_partial_kdata; 187 } 188 189 switch (p->chunks[i].chunk_id) { 190 case AMDGPU_CHUNK_ID_IB: 191 ++num_ibs; 192 break; 193 194 case AMDGPU_CHUNK_ID_FENCE: 195 size = sizeof(struct drm_amdgpu_cs_chunk_fence); 196 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 197 ret = -EINVAL; 198 goto free_partial_kdata; 199 } 200 201 ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); 202 if (ret) 203 goto free_partial_kdata; 204 205 break; 206 207 case AMDGPU_CHUNK_ID_DEPENDENCIES: 208 break; 209 210 default: 211 ret = -EINVAL; 212 goto free_partial_kdata; 213 } 214 } 215 216 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); 217 if (ret) 218 goto free_all_kdata; 219 220 p->job->uf = uf; 221 222 kfree(chunk_array); 223 return 0; 224 225 free_all_kdata: 226 i = p->nchunks - 1; 227 free_partial_kdata: 228 for (; i >= 0; i--) 229 drm_free_large(p->chunks[i].kdata); 230 kfree(p->chunks); 231 put_ctx: 232 amdgpu_ctx_put(p->ctx); 233 free_chunk: 234 kfree(chunk_array); 235 236 return ret; 237 } 238 239 /* Returns how many bytes TTM can move per IB. 240 */ 241 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 242 { 243 u64 real_vram_size = adev->mc.real_vram_size; 244 u64 vram_usage = atomic64_read(&adev->vram_usage); 245 246 /* This function is based on the current VRAM usage. 247 * 248 * - If all of VRAM is free, allow relocating the number of bytes that 249 * is equal to 1/4 of the size of VRAM for this IB. 250 251 * - If more than one half of VRAM is occupied, only allow relocating 252 * 1 MB of data for this IB. 253 * 254 * - From 0 to one half of used VRAM, the threshold decreases 255 * linearly. 256 * __________________ 257 * 1/4 of -|\ | 258 * VRAM | \ | 259 * | \ | 260 * | \ | 261 * | \ | 262 * | \ | 263 * | \ | 264 * | \________|1 MB 265 * |----------------| 266 * VRAM 0 % 100 % 267 * used used 268 * 269 * Note: It's a threshold, not a limit. The threshold must be crossed 270 * for buffer relocations to stop, so any buffer of an arbitrary size 271 * can be moved as long as the threshold isn't crossed before 272 * the relocation takes place. We don't want to disable buffer 273 * relocations completely. 274 * 275 * The idea is that buffers should be placed in VRAM at creation time 276 * and TTM should only do a minimum number of relocations during 277 * command submission. In practice, you need to submit at least 278 * a dozen IBs to move all buffers to VRAM if they are in GTT. 279 * 280 * Also, things can get pretty crazy under memory pressure and actual 281 * VRAM usage can change a lot, so playing safe even at 50% does 282 * consistently increase performance. 283 */ 284 285 u64 half_vram = real_vram_size >> 1; 286 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 287 u64 bytes_moved_threshold = half_free_vram >> 1; 288 return max(bytes_moved_threshold, 1024*1024ull); 289 } 290 291 int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, 292 struct list_head *validated) 293 { 294 struct amdgpu_bo_list_entry *lobj; 295 u64 initial_bytes_moved; 296 int r; 297 298 list_for_each_entry(lobj, validated, tv.head) { 299 struct amdgpu_bo *bo = lobj->robj; 300 struct mm_struct *usermm; 301 uint32_t domain; 302 303 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); 304 if (usermm && usermm != current->mm) 305 return -EPERM; 306 307 if (bo->pin_count) 308 continue; 309 310 /* Avoid moving this one if we have moved too many buffers 311 * for this IB already. 312 * 313 * Note that this allows moving at least one buffer of 314 * any size, because it doesn't take the current "bo" 315 * into account. We don't want to disallow buffer moves 316 * completely. 317 */ 318 if (p->bytes_moved <= p->bytes_moved_threshold) 319 domain = bo->prefered_domains; 320 else 321 domain = bo->allowed_domains; 322 323 retry: 324 amdgpu_ttm_placement_from_domain(bo, domain); 325 initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); 326 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 327 p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - 328 initial_bytes_moved; 329 330 if (unlikely(r)) { 331 if (r != -ERESTARTSYS && domain != bo->allowed_domains) { 332 domain = bo->allowed_domains; 333 goto retry; 334 } 335 return r; 336 } 337 } 338 return 0; 339 } 340 341 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 342 union drm_amdgpu_cs *cs) 343 { 344 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 345 struct list_head duplicates; 346 bool need_mmap_lock = false; 347 int r; 348 349 INIT_LIST_HEAD(&p->validated); 350 351 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 352 if (p->bo_list) { 353 need_mmap_lock = p->bo_list->has_userptr; 354 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 355 } 356 357 INIT_LIST_HEAD(&duplicates); 358 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 359 360 if (p->job->uf.bo) 361 list_add(&p->uf_entry.tv.head, &p->validated); 362 363 if (need_mmap_lock) 364 down_read(¤t->mm->mmap_sem); 365 366 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); 367 if (unlikely(r != 0)) 368 goto error_reserve; 369 370 amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); 371 372 p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); 373 p->bytes_moved = 0; 374 375 r = amdgpu_cs_list_validate(p, &duplicates); 376 if (r) 377 goto error_validate; 378 379 r = amdgpu_cs_list_validate(p, &p->validated); 380 if (r) 381 goto error_validate; 382 383 if (p->bo_list) { 384 struct amdgpu_vm *vm = &fpriv->vm; 385 unsigned i; 386 387 for (i = 0; i < p->bo_list->num_entries; i++) { 388 struct amdgpu_bo *bo = p->bo_list->array[i].robj; 389 390 p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); 391 } 392 } 393 394 error_validate: 395 if (r) { 396 amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); 397 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 398 } 399 400 error_reserve: 401 if (need_mmap_lock) 402 up_read(¤t->mm->mmap_sem); 403 404 return r; 405 } 406 407 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 408 { 409 struct amdgpu_bo_list_entry *e; 410 int r; 411 412 list_for_each_entry(e, &p->validated, tv.head) { 413 struct reservation_object *resv = e->robj->tbo.resv; 414 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); 415 416 if (r) 417 return r; 418 } 419 return 0; 420 } 421 422 static int cmp_size_smaller_first(void *priv, struct list_head *a, 423 struct list_head *b) 424 { 425 struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); 426 struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); 427 428 /* Sort A before B if A is smaller. */ 429 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 430 } 431 432 /** 433 * cs_parser_fini() - clean parser states 434 * @parser: parser structure holding parsing context. 435 * @error: error number 436 * 437 * If error is set than unvalidate buffer, otherwise just free memory 438 * used by parsing context. 439 **/ 440 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) 441 { 442 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 443 unsigned i; 444 445 if (!error) { 446 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); 447 448 /* Sort the buffer list from the smallest to largest buffer, 449 * which affects the order of buffers in the LRU list. 450 * This assures that the smallest buffers are added first 451 * to the LRU list, so they are likely to be later evicted 452 * first, instead of large buffers whose eviction is more 453 * expensive. 454 * 455 * This slightly lowers the number of bytes moved by TTM 456 * per frame under memory pressure. 457 */ 458 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 459 460 ttm_eu_fence_buffer_objects(&parser->ticket, 461 &parser->validated, 462 parser->fence); 463 } else if (backoff) { 464 ttm_eu_backoff_reservation(&parser->ticket, 465 &parser->validated); 466 } 467 fence_put(parser->fence); 468 469 if (parser->ctx) 470 amdgpu_ctx_put(parser->ctx); 471 if (parser->bo_list) 472 amdgpu_bo_list_put(parser->bo_list); 473 474 for (i = 0; i < parser->nchunks; i++) 475 drm_free_large(parser->chunks[i].kdata); 476 kfree(parser->chunks); 477 if (parser->job) 478 amdgpu_job_free(parser->job); 479 amdgpu_bo_unref(&parser->uf_entry.robj); 480 } 481 482 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 483 struct amdgpu_vm *vm) 484 { 485 struct amdgpu_device *adev = p->adev; 486 struct amdgpu_bo_va *bo_va; 487 struct amdgpu_bo *bo; 488 int i, r; 489 490 r = amdgpu_vm_update_page_directory(adev, vm); 491 if (r) 492 return r; 493 494 r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); 495 if (r) 496 return r; 497 498 r = amdgpu_vm_clear_freed(adev, vm); 499 if (r) 500 return r; 501 502 if (p->bo_list) { 503 for (i = 0; i < p->bo_list->num_entries; i++) { 504 struct fence *f; 505 506 /* ignore duplicates */ 507 bo = p->bo_list->array[i].robj; 508 if (!bo) 509 continue; 510 511 bo_va = p->bo_list->array[i].bo_va; 512 if (bo_va == NULL) 513 continue; 514 515 r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem); 516 if (r) 517 return r; 518 519 f = bo_va->last_pt_update; 520 r = amdgpu_sync_fence(adev, &p->job->sync, f); 521 if (r) 522 return r; 523 } 524 525 } 526 527 r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); 528 529 if (amdgpu_vm_debug && p->bo_list) { 530 /* Invalidate all BOs to test for userspace bugs */ 531 for (i = 0; i < p->bo_list->num_entries; i++) { 532 /* ignore duplicates */ 533 bo = p->bo_list->array[i].robj; 534 if (!bo) 535 continue; 536 537 amdgpu_vm_bo_invalidate(adev, bo); 538 } 539 } 540 541 return r; 542 } 543 544 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 545 struct amdgpu_cs_parser *p) 546 { 547 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 548 struct amdgpu_vm *vm = &fpriv->vm; 549 struct amdgpu_ring *ring = p->job->ring; 550 int i, r; 551 552 /* Only for UVD/VCE VM emulation */ 553 if (ring->funcs->parse_cs) { 554 for (i = 0; i < p->job->num_ibs; i++) { 555 r = amdgpu_ring_parse_cs(ring, p, i); 556 if (r) 557 return r; 558 } 559 } 560 561 r = amdgpu_bo_vm_update_pte(p, vm); 562 if (!r) 563 amdgpu_cs_sync_rings(p); 564 565 return r; 566 } 567 568 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r) 569 { 570 if (r == -EDEADLK) { 571 r = amdgpu_gpu_reset(adev); 572 if (!r) 573 r = -EAGAIN; 574 } 575 return r; 576 } 577 578 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, 579 struct amdgpu_cs_parser *parser) 580 { 581 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 582 struct amdgpu_vm *vm = &fpriv->vm; 583 int i, j; 584 int r; 585 586 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { 587 struct amdgpu_cs_chunk *chunk; 588 struct amdgpu_ib *ib; 589 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 590 struct amdgpu_ring *ring; 591 592 chunk = &parser->chunks[i]; 593 ib = &parser->job->ibs[j]; 594 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 595 596 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 597 continue; 598 599 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, 600 chunk_ib->ip_instance, chunk_ib->ring, 601 &ring); 602 if (r) 603 return r; 604 605 if (parser->job->ring && parser->job->ring != ring) 606 return -EINVAL; 607 608 parser->job->ring = ring; 609 610 if (ring->funcs->parse_cs) { 611 struct amdgpu_bo_va_mapping *m; 612 struct amdgpu_bo *aobj = NULL; 613 uint64_t offset; 614 uint8_t *kptr; 615 616 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, 617 &aobj); 618 if (!aobj) { 619 DRM_ERROR("IB va_start is invalid\n"); 620 return -EINVAL; 621 } 622 623 if ((chunk_ib->va_start + chunk_ib->ib_bytes) > 624 (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { 625 DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 626 return -EINVAL; 627 } 628 629 /* the IB should be reserved at this point */ 630 r = amdgpu_bo_kmap(aobj, (void **)&kptr); 631 if (r) { 632 return r; 633 } 634 635 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; 636 kptr += chunk_ib->va_start - offset; 637 638 r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); 639 if (r) { 640 DRM_ERROR("Failed to get ib !\n"); 641 return r; 642 } 643 644 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 645 amdgpu_bo_kunmap(aobj); 646 } else { 647 r = amdgpu_ib_get(adev, vm, 0, ib); 648 if (r) { 649 DRM_ERROR("Failed to get ib !\n"); 650 return r; 651 } 652 653 ib->gpu_addr = chunk_ib->va_start; 654 } 655 656 ib->length_dw = chunk_ib->ib_bytes / 4; 657 ib->flags = chunk_ib->flags; 658 ib->ctx = parser->ctx; 659 j++; 660 } 661 662 /* add GDS resources to first IB */ 663 if (parser->bo_list) { 664 struct amdgpu_bo *gds = parser->bo_list->gds_obj; 665 struct amdgpu_bo *gws = parser->bo_list->gws_obj; 666 struct amdgpu_bo *oa = parser->bo_list->oa_obj; 667 struct amdgpu_ib *ib = &parser->job->ibs[0]; 668 669 if (gds) { 670 ib->gds_base = amdgpu_bo_gpu_offset(gds); 671 ib->gds_size = amdgpu_bo_size(gds); 672 } 673 if (gws) { 674 ib->gws_base = amdgpu_bo_gpu_offset(gws); 675 ib->gws_size = amdgpu_bo_size(gws); 676 } 677 if (oa) { 678 ib->oa_base = amdgpu_bo_gpu_offset(oa); 679 ib->oa_size = amdgpu_bo_size(oa); 680 } 681 } 682 /* wrap the last IB with user fence */ 683 if (parser->job->uf.bo) { 684 struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; 685 686 /* UVD & VCE fw doesn't support user fences */ 687 if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || 688 parser->job->ring->type == AMDGPU_RING_TYPE_VCE) 689 return -EINVAL; 690 691 ib->user = &parser->job->uf; 692 } 693 694 return 0; 695 } 696 697 static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 698 struct amdgpu_cs_parser *p) 699 { 700 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 701 int i, j, r; 702 703 for (i = 0; i < p->nchunks; ++i) { 704 struct drm_amdgpu_cs_chunk_dep *deps; 705 struct amdgpu_cs_chunk *chunk; 706 unsigned num_deps; 707 708 chunk = &p->chunks[i]; 709 710 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) 711 continue; 712 713 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 714 num_deps = chunk->length_dw * 4 / 715 sizeof(struct drm_amdgpu_cs_chunk_dep); 716 717 for (j = 0; j < num_deps; ++j) { 718 struct amdgpu_ring *ring; 719 struct amdgpu_ctx *ctx; 720 struct fence *fence; 721 722 r = amdgpu_cs_get_ring(adev, deps[j].ip_type, 723 deps[j].ip_instance, 724 deps[j].ring, &ring); 725 if (r) 726 return r; 727 728 ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); 729 if (ctx == NULL) 730 return -EINVAL; 731 732 fence = amdgpu_ctx_get_fence(ctx, ring, 733 deps[j].handle); 734 if (IS_ERR(fence)) { 735 r = PTR_ERR(fence); 736 amdgpu_ctx_put(ctx); 737 return r; 738 739 } else if (fence) { 740 r = amdgpu_sync_fence(adev, &p->job->sync, 741 fence); 742 fence_put(fence); 743 amdgpu_ctx_put(ctx); 744 if (r) 745 return r; 746 } 747 } 748 } 749 750 return 0; 751 } 752 753 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 754 union drm_amdgpu_cs *cs) 755 { 756 struct amdgpu_ring *ring = p->job->ring; 757 struct amd_sched_fence *fence; 758 struct amdgpu_job *job; 759 760 job = p->job; 761 p->job = NULL; 762 763 job->base.sched = &ring->sched; 764 job->base.s_entity = &p->ctx->rings[ring->idx].entity; 765 job->owner = p->filp; 766 767 fence = amd_sched_fence_create(job->base.s_entity, p->filp); 768 if (!fence) { 769 amdgpu_job_free(job); 770 return -ENOMEM; 771 } 772 773 job->base.s_fence = fence; 774 p->fence = fence_get(&fence->base); 775 776 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, 777 &fence->base); 778 job->ibs[job->num_ibs - 1].sequence = cs->out.handle; 779 780 trace_amdgpu_cs_ioctl(job); 781 amd_sched_entity_push_job(&job->base); 782 783 return 0; 784 } 785 786 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 787 { 788 struct amdgpu_device *adev = dev->dev_private; 789 union drm_amdgpu_cs *cs = data; 790 struct amdgpu_cs_parser parser = {}; 791 bool reserved_buffers = false; 792 int i, r; 793 794 if (!adev->accel_working) 795 return -EBUSY; 796 797 parser.adev = adev; 798 parser.filp = filp; 799 800 r = amdgpu_cs_parser_init(&parser, data); 801 if (r) { 802 DRM_ERROR("Failed to initialize parser !\n"); 803 amdgpu_cs_parser_fini(&parser, r, false); 804 r = amdgpu_cs_handle_lockup(adev, r); 805 return r; 806 } 807 r = amdgpu_cs_parser_bos(&parser, data); 808 if (r == -ENOMEM) 809 DRM_ERROR("Not enough memory for command submission!\n"); 810 else if (r && r != -ERESTARTSYS) 811 DRM_ERROR("Failed to process the buffer list %d!\n", r); 812 else if (!r) { 813 reserved_buffers = true; 814 r = amdgpu_cs_ib_fill(adev, &parser); 815 } 816 817 if (!r) { 818 r = amdgpu_cs_dependencies(adev, &parser); 819 if (r) 820 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 821 } 822 823 if (r) 824 goto out; 825 826 for (i = 0; i < parser.job->num_ibs; i++) 827 trace_amdgpu_cs(&parser, i); 828 829 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 830 if (r) 831 goto out; 832 833 r = amdgpu_cs_submit(&parser, cs); 834 835 out: 836 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 837 r = amdgpu_cs_handle_lockup(adev, r); 838 return r; 839 } 840 841 /** 842 * amdgpu_cs_wait_ioctl - wait for a command submission to finish 843 * 844 * @dev: drm device 845 * @data: data from userspace 846 * @filp: file private 847 * 848 * Wait for the command submission identified by handle to finish. 849 */ 850 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 851 struct drm_file *filp) 852 { 853 union drm_amdgpu_wait_cs *wait = data; 854 struct amdgpu_device *adev = dev->dev_private; 855 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 856 struct amdgpu_ring *ring = NULL; 857 struct amdgpu_ctx *ctx; 858 struct fence *fence; 859 long r; 860 861 r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, 862 wait->in.ring, &ring); 863 if (r) 864 return r; 865 866 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 867 if (ctx == NULL) 868 return -EINVAL; 869 870 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 871 if (IS_ERR(fence)) 872 r = PTR_ERR(fence); 873 else if (fence) { 874 r = fence_wait_timeout(fence, true, timeout); 875 fence_put(fence); 876 } else 877 r = 1; 878 879 amdgpu_ctx_put(ctx); 880 if (r < 0) 881 return r; 882 883 memset(wait, 0, sizeof(*wait)); 884 wait->out.status = (r == 0); 885 886 return 0; 887 } 888 889 /** 890 * amdgpu_cs_find_bo_va - find bo_va for VM address 891 * 892 * @parser: command submission parser context 893 * @addr: VM address 894 * @bo: resulting BO of the mapping found 895 * 896 * Search the buffer objects in the command submission context for a certain 897 * virtual memory address. Returns allocation structure when found, NULL 898 * otherwise. 899 */ 900 struct amdgpu_bo_va_mapping * 901 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 902 uint64_t addr, struct amdgpu_bo **bo) 903 { 904 struct amdgpu_bo_va_mapping *mapping; 905 unsigned i; 906 907 if (!parser->bo_list) 908 return NULL; 909 910 addr /= AMDGPU_GPU_PAGE_SIZE; 911 912 for (i = 0; i < parser->bo_list->num_entries; i++) { 913 struct amdgpu_bo_list_entry *lobj; 914 915 lobj = &parser->bo_list->array[i]; 916 if (!lobj->bo_va) 917 continue; 918 919 list_for_each_entry(mapping, &lobj->bo_va->valids, list) { 920 if (mapping->it.start > addr || 921 addr > mapping->it.last) 922 continue; 923 924 *bo = lobj->bo_va->bo; 925 return mapping; 926 } 927 928 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { 929 if (mapping->it.start > addr || 930 addr > mapping->it.last) 931 continue; 932 933 *bo = lobj->bo_va->bo; 934 return mapping; 935 } 936 } 937 938 return NULL; 939 } 940