1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <linux/pagemap.h> 28 #include <linux/sync_file.h> 29 #include <drm/drmP.h> 30 #include <drm/amdgpu_drm.h> 31 #include <drm/drm_syncobj.h> 32 #include "amdgpu.h" 33 #include "amdgpu_trace.h" 34 #include "amdgpu_gmc.h" 35 36 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 37 struct drm_amdgpu_cs_chunk_fence *data, 38 uint32_t *offset) 39 { 40 struct drm_gem_object *gobj; 41 unsigned long size; 42 43 gobj = drm_gem_object_lookup(p->filp, data->handle); 44 if (gobj == NULL) 45 return -EINVAL; 46 47 p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 48 p->uf_entry.priority = 0; 49 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 50 p->uf_entry.tv.shared = true; 51 p->uf_entry.user_pages = NULL; 52 53 size = amdgpu_bo_size(p->uf_entry.robj); 54 if (size != PAGE_SIZE || (data->offset + 8) > size) 55 return -EINVAL; 56 57 *offset = data->offset; 58 59 drm_gem_object_put_unlocked(gobj); 60 61 if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { 62 amdgpu_bo_unref(&p->uf_entry.robj); 63 return -EINVAL; 64 } 65 66 return 0; 67 } 68 69 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, 70 struct drm_amdgpu_bo_list_in *data) 71 { 72 int r; 73 struct drm_amdgpu_bo_list_entry *info = NULL; 74 75 r = amdgpu_bo_create_list_entry_array(data, &info); 76 if (r) 77 return r; 78 79 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, 80 &p->bo_list); 81 if (r) 82 goto error_free; 83 84 kvfree(info); 85 return 0; 86 87 error_free: 88 if (info) 89 kvfree(info); 90 91 return r; 92 } 93 94 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) 95 { 96 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 97 struct amdgpu_vm *vm = &fpriv->vm; 98 uint64_t *chunk_array_user; 99 uint64_t *chunk_array; 100 unsigned size, num_ibs = 0; 101 uint32_t uf_offset = 0; 102 int i; 103 int ret; 104 105 if (cs->in.num_chunks == 0) 106 return 0; 107 108 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); 109 if (!chunk_array) 110 return -ENOMEM; 111 112 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 113 if (!p->ctx) { 114 ret = -EINVAL; 115 goto free_chunk; 116 } 117 118 /* skip guilty context job */ 119 if (atomic_read(&p->ctx->guilty) == 1) { 120 ret = -ECANCELED; 121 goto free_chunk; 122 } 123 124 mutex_lock(&p->ctx->lock); 125 126 /* get chunks */ 127 chunk_array_user = u64_to_user_ptr(cs->in.chunks); 128 if (copy_from_user(chunk_array, chunk_array_user, 129 sizeof(uint64_t)*cs->in.num_chunks)) { 130 ret = -EFAULT; 131 goto free_chunk; 132 } 133 134 p->nchunks = cs->in.num_chunks; 135 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 136 GFP_KERNEL); 137 if (!p->chunks) { 138 ret = -ENOMEM; 139 goto free_chunk; 140 } 141 142 for (i = 0; i < p->nchunks; i++) { 143 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; 144 struct drm_amdgpu_cs_chunk user_chunk; 145 uint32_t __user *cdata; 146 147 chunk_ptr = u64_to_user_ptr(chunk_array[i]); 148 if (copy_from_user(&user_chunk, chunk_ptr, 149 sizeof(struct drm_amdgpu_cs_chunk))) { 150 ret = -EFAULT; 151 i--; 152 goto free_partial_kdata; 153 } 154 p->chunks[i].chunk_id = user_chunk.chunk_id; 155 p->chunks[i].length_dw = user_chunk.length_dw; 156 157 size = p->chunks[i].length_dw; 158 cdata = u64_to_user_ptr(user_chunk.chunk_data); 159 160 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 161 if (p->chunks[i].kdata == NULL) { 162 ret = -ENOMEM; 163 i--; 164 goto free_partial_kdata; 165 } 166 size *= sizeof(uint32_t); 167 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 168 ret = -EFAULT; 169 goto free_partial_kdata; 170 } 171 172 switch (p->chunks[i].chunk_id) { 173 case AMDGPU_CHUNK_ID_IB: 174 ++num_ibs; 175 break; 176 177 case AMDGPU_CHUNK_ID_FENCE: 178 size = sizeof(struct drm_amdgpu_cs_chunk_fence); 179 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 180 ret = -EINVAL; 181 goto free_partial_kdata; 182 } 183 184 ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, 185 &uf_offset); 186 if (ret) 187 goto free_partial_kdata; 188 189 break; 190 191 case AMDGPU_CHUNK_ID_BO_HANDLES: 192 size = sizeof(struct drm_amdgpu_bo_list_in); 193 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 194 ret = -EINVAL; 195 goto free_partial_kdata; 196 } 197 198 ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); 199 if (ret) 200 goto free_partial_kdata; 201 202 break; 203 204 case AMDGPU_CHUNK_ID_DEPENDENCIES: 205 case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 206 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 207 break; 208 209 default: 210 ret = -EINVAL; 211 goto free_partial_kdata; 212 } 213 } 214 215 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); 216 if (ret) 217 goto free_all_kdata; 218 219 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { 220 ret = -ECANCELED; 221 goto free_all_kdata; 222 } 223 224 if (p->uf_entry.robj) 225 p->job->uf_addr = uf_offset; 226 kfree(chunk_array); 227 228 /* Use this opportunity to fill in task info for the vm */ 229 amdgpu_vm_set_task_info(vm); 230 231 return 0; 232 233 free_all_kdata: 234 i = p->nchunks - 1; 235 free_partial_kdata: 236 for (; i >= 0; i--) 237 kvfree(p->chunks[i].kdata); 238 kfree(p->chunks); 239 p->chunks = NULL; 240 p->nchunks = 0; 241 free_chunk: 242 kfree(chunk_array); 243 244 return ret; 245 } 246 247 /* Convert microseconds to bytes. */ 248 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) 249 { 250 if (us <= 0 || !adev->mm_stats.log2_max_MBps) 251 return 0; 252 253 /* Since accum_us is incremented by a million per second, just 254 * multiply it by the number of MB/s to get the number of bytes. 255 */ 256 return us << adev->mm_stats.log2_max_MBps; 257 } 258 259 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) 260 { 261 if (!adev->mm_stats.log2_max_MBps) 262 return 0; 263 264 return bytes >> adev->mm_stats.log2_max_MBps; 265 } 266 267 /* Returns how many bytes TTM can move right now. If no bytes can be moved, 268 * it returns 0. If it returns non-zero, it's OK to move at least one buffer, 269 * which means it can go over the threshold once. If that happens, the driver 270 * will be in debt and no other buffer migrations can be done until that debt 271 * is repaid. 272 * 273 * This approach allows moving a buffer of any size (it's important to allow 274 * that). 275 * 276 * The currency is simply time in microseconds and it increases as the clock 277 * ticks. The accumulated microseconds (us) are converted to bytes and 278 * returned. 279 */ 280 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, 281 u64 *max_bytes, 282 u64 *max_vis_bytes) 283 { 284 s64 time_us, increment_us; 285 u64 free_vram, total_vram, used_vram; 286 287 /* Allow a maximum of 200 accumulated ms. This is basically per-IB 288 * throttling. 289 * 290 * It means that in order to get full max MBps, at least 5 IBs per 291 * second must be submitted and not more than 200ms apart from each 292 * other. 293 */ 294 const s64 us_upper_bound = 200000; 295 296 if (!adev->mm_stats.log2_max_MBps) { 297 *max_bytes = 0; 298 *max_vis_bytes = 0; 299 return; 300 } 301 302 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); 303 used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 304 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 305 306 spin_lock(&adev->mm_stats.lock); 307 308 /* Increase the amount of accumulated us. */ 309 time_us = ktime_to_us(ktime_get()); 310 increment_us = time_us - adev->mm_stats.last_update_us; 311 adev->mm_stats.last_update_us = time_us; 312 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, 313 us_upper_bound); 314 315 /* This prevents the short period of low performance when the VRAM 316 * usage is low and the driver is in debt or doesn't have enough 317 * accumulated us to fill VRAM quickly. 318 * 319 * The situation can occur in these cases: 320 * - a lot of VRAM is freed by userspace 321 * - the presence of a big buffer causes a lot of evictions 322 * (solution: split buffers into smaller ones) 323 * 324 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting 325 * accum_us to a positive number. 326 */ 327 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { 328 s64 min_us; 329 330 /* Be more aggresive on dGPUs. Try to fill a portion of free 331 * VRAM now. 332 */ 333 if (!(adev->flags & AMD_IS_APU)) 334 min_us = bytes_to_us(adev, free_vram / 4); 335 else 336 min_us = 0; /* Reset accum_us on APUs. */ 337 338 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 339 } 340 341 /* This is set to 0 if the driver is in debt to disallow (optional) 342 * buffer moves. 343 */ 344 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 345 346 /* Do the same for visible VRAM if half of it is free */ 347 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { 348 u64 total_vis_vram = adev->gmc.visible_vram_size; 349 u64 used_vis_vram = 350 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 351 352 if (used_vis_vram < total_vis_vram) { 353 u64 free_vis_vram = total_vis_vram - used_vis_vram; 354 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + 355 increment_us, us_upper_bound); 356 357 if (free_vis_vram >= total_vis_vram / 2) 358 adev->mm_stats.accum_us_vis = 359 max(bytes_to_us(adev, free_vis_vram / 2), 360 adev->mm_stats.accum_us_vis); 361 } 362 363 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); 364 } else { 365 *max_vis_bytes = 0; 366 } 367 368 spin_unlock(&adev->mm_stats.lock); 369 } 370 371 /* Report how many bytes have really been moved for the last command 372 * submission. This can result in a debt that can stop buffer migrations 373 * temporarily. 374 */ 375 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, 376 u64 num_vis_bytes) 377 { 378 spin_lock(&adev->mm_stats.lock); 379 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 380 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); 381 spin_unlock(&adev->mm_stats.lock); 382 } 383 384 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, 385 struct amdgpu_bo *bo) 386 { 387 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 388 struct ttm_operation_ctx ctx = { 389 .interruptible = true, 390 .no_wait_gpu = false, 391 .resv = bo->tbo.resv, 392 .flags = 0 393 }; 394 uint32_t domain; 395 int r; 396 397 if (bo->pin_count) 398 return 0; 399 400 /* Don't move this buffer if we have depleted our allowance 401 * to move it. Don't move anything if the threshold is zero. 402 */ 403 if (p->bytes_moved < p->bytes_moved_threshold) { 404 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 405 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 406 /* And don't move a CPU_ACCESS_REQUIRED BO to limited 407 * visible VRAM if we've depleted our allowance to do 408 * that. 409 */ 410 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) 411 domain = bo->preferred_domains; 412 else 413 domain = bo->allowed_domains; 414 } else { 415 domain = bo->preferred_domains; 416 } 417 } else { 418 domain = bo->allowed_domains; 419 } 420 421 retry: 422 amdgpu_bo_placement_from_domain(bo, domain); 423 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 424 425 p->bytes_moved += ctx.bytes_moved; 426 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 427 amdgpu_bo_in_cpu_visible_vram(bo)) 428 p->bytes_moved_vis += ctx.bytes_moved; 429 430 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 431 domain = bo->allowed_domains; 432 goto retry; 433 } 434 435 return r; 436 } 437 438 /* Last resort, try to evict something from the current working set */ 439 static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, 440 struct amdgpu_bo *validated) 441 { 442 uint32_t domain = validated->allowed_domains; 443 struct ttm_operation_ctx ctx = { true, false }; 444 int r; 445 446 if (!p->evictable) 447 return false; 448 449 for (;&p->evictable->tv.head != &p->validated; 450 p->evictable = list_prev_entry(p->evictable, tv.head)) { 451 452 struct amdgpu_bo_list_entry *candidate = p->evictable; 453 struct amdgpu_bo *bo = candidate->robj; 454 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 455 bool update_bytes_moved_vis; 456 uint32_t other; 457 458 /* If we reached our current BO we can forget it */ 459 if (candidate->robj == validated) 460 break; 461 462 /* We can't move pinned BOs here */ 463 if (bo->pin_count) 464 continue; 465 466 other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); 467 468 /* Check if this BO is in one of the domains we need space for */ 469 if (!(other & domain)) 470 continue; 471 472 /* Check if we can move this BO somewhere else */ 473 other = bo->allowed_domains & ~domain; 474 if (!other) 475 continue; 476 477 /* Good we can try to move this BO somewhere else */ 478 update_bytes_moved_vis = 479 !amdgpu_gmc_vram_full_visible(&adev->gmc) && 480 amdgpu_bo_in_cpu_visible_vram(bo); 481 amdgpu_bo_placement_from_domain(bo, other); 482 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 483 p->bytes_moved += ctx.bytes_moved; 484 if (update_bytes_moved_vis) 485 p->bytes_moved_vis += ctx.bytes_moved; 486 487 if (unlikely(r)) 488 break; 489 490 p->evictable = list_prev_entry(p->evictable, tv.head); 491 list_move(&candidate->tv.head, &p->validated); 492 493 return true; 494 } 495 496 return false; 497 } 498 499 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) 500 { 501 struct amdgpu_cs_parser *p = param; 502 int r; 503 504 do { 505 r = amdgpu_cs_bo_validate(p, bo); 506 } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); 507 if (r) 508 return r; 509 510 if (bo->shadow) 511 r = amdgpu_cs_bo_validate(p, bo->shadow); 512 513 return r; 514 } 515 516 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, 517 struct list_head *validated) 518 { 519 struct ttm_operation_ctx ctx = { true, false }; 520 struct amdgpu_bo_list_entry *lobj; 521 int r; 522 523 list_for_each_entry(lobj, validated, tv.head) { 524 struct amdgpu_bo *bo = lobj->robj; 525 bool binding_userptr = false; 526 struct mm_struct *usermm; 527 528 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); 529 if (usermm && usermm != current->mm) 530 return -EPERM; 531 532 /* Check if we have user pages and nobody bound the BO already */ 533 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 534 lobj->user_pages) { 535 amdgpu_bo_placement_from_domain(bo, 536 AMDGPU_GEM_DOMAIN_CPU); 537 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 538 if (r) 539 return r; 540 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 541 lobj->user_pages); 542 binding_userptr = true; 543 } 544 545 if (p->evictable == lobj) 546 p->evictable = NULL; 547 548 r = amdgpu_cs_validate(p, bo); 549 if (r) 550 return r; 551 552 if (binding_userptr) { 553 kvfree(lobj->user_pages); 554 lobj->user_pages = NULL; 555 } 556 } 557 return 0; 558 } 559 560 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 561 union drm_amdgpu_cs *cs) 562 { 563 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 564 struct amdgpu_vm *vm = &fpriv->vm; 565 struct amdgpu_bo_list_entry *e; 566 struct list_head duplicates; 567 struct amdgpu_bo *gds; 568 struct amdgpu_bo *gws; 569 struct amdgpu_bo *oa; 570 unsigned tries = 10; 571 int r; 572 573 INIT_LIST_HEAD(&p->validated); 574 575 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ 576 if (cs->in.bo_list_handle) { 577 if (p->bo_list) 578 return -EINVAL; 579 580 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, 581 &p->bo_list); 582 if (r) 583 return r; 584 } else if (!p->bo_list) { 585 /* Create a empty bo_list when no handle is provided */ 586 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, 587 &p->bo_list); 588 if (r) 589 return r; 590 } 591 592 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 593 if (p->bo_list->first_userptr != p->bo_list->num_entries) 594 p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); 595 596 INIT_LIST_HEAD(&duplicates); 597 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 598 599 if (p->uf_entry.robj && !p->uf_entry.robj->parent) 600 list_add(&p->uf_entry.tv.head, &p->validated); 601 602 while (1) { 603 struct list_head need_pages; 604 605 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 606 &duplicates); 607 if (unlikely(r != 0)) { 608 if (r != -ERESTARTSYS) 609 DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 610 goto error_free_pages; 611 } 612 613 INIT_LIST_HEAD(&need_pages); 614 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 615 struct amdgpu_bo *bo = e->robj; 616 617 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, 618 &e->user_invalidated) && e->user_pages) { 619 620 /* We acquired a page array, but somebody 621 * invalidated it. Free it and try again 622 */ 623 release_pages(e->user_pages, 624 bo->tbo.ttm->num_pages); 625 kvfree(e->user_pages); 626 e->user_pages = NULL; 627 } 628 629 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 630 !e->user_pages) { 631 list_del(&e->tv.head); 632 list_add(&e->tv.head, &need_pages); 633 634 amdgpu_bo_unreserve(e->robj); 635 } 636 } 637 638 if (list_empty(&need_pages)) 639 break; 640 641 /* Unreserve everything again. */ 642 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 643 644 /* We tried too many times, just abort */ 645 if (!--tries) { 646 r = -EDEADLK; 647 DRM_ERROR("deadlock in %s\n", __func__); 648 goto error_free_pages; 649 } 650 651 /* Fill the page arrays for all userptrs. */ 652 list_for_each_entry(e, &need_pages, tv.head) { 653 struct ttm_tt *ttm = e->robj->tbo.ttm; 654 655 e->user_pages = kvmalloc_array(ttm->num_pages, 656 sizeof(struct page*), 657 GFP_KERNEL | __GFP_ZERO); 658 if (!e->user_pages) { 659 r = -ENOMEM; 660 DRM_ERROR("calloc failure in %s\n", __func__); 661 goto error_free_pages; 662 } 663 664 r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); 665 if (r) { 666 DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); 667 kvfree(e->user_pages); 668 e->user_pages = NULL; 669 goto error_free_pages; 670 } 671 } 672 673 /* And try again. */ 674 list_splice(&need_pages, &p->validated); 675 } 676 677 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, 678 &p->bytes_moved_vis_threshold); 679 p->bytes_moved = 0; 680 p->bytes_moved_vis = 0; 681 p->evictable = list_last_entry(&p->validated, 682 struct amdgpu_bo_list_entry, 683 tv.head); 684 685 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, 686 amdgpu_cs_validate, p); 687 if (r) { 688 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); 689 goto error_validate; 690 } 691 692 r = amdgpu_cs_list_validate(p, &duplicates); 693 if (r) { 694 DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); 695 goto error_validate; 696 } 697 698 r = amdgpu_cs_list_validate(p, &p->validated); 699 if (r) { 700 DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); 701 goto error_validate; 702 } 703 704 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 705 p->bytes_moved_vis); 706 707 gds = p->bo_list->gds_obj; 708 gws = p->bo_list->gws_obj; 709 oa = p->bo_list->oa_obj; 710 711 amdgpu_bo_list_for_each_entry(e, p->bo_list) 712 e->bo_va = amdgpu_vm_bo_find(vm, e->robj); 713 714 if (gds) { 715 p->job->gds_base = amdgpu_bo_gpu_offset(gds); 716 p->job->gds_size = amdgpu_bo_size(gds); 717 } 718 if (gws) { 719 p->job->gws_base = amdgpu_bo_gpu_offset(gws); 720 p->job->gws_size = amdgpu_bo_size(gws); 721 } 722 if (oa) { 723 p->job->oa_base = amdgpu_bo_gpu_offset(oa); 724 p->job->oa_size = amdgpu_bo_size(oa); 725 } 726 727 if (!r && p->uf_entry.robj) { 728 struct amdgpu_bo *uf = p->uf_entry.robj; 729 730 r = amdgpu_ttm_alloc_gart(&uf->tbo); 731 p->job->uf_addr += amdgpu_bo_gpu_offset(uf); 732 } 733 734 error_validate: 735 if (r) 736 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 737 738 error_free_pages: 739 740 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 741 if (!e->user_pages) 742 continue; 743 744 release_pages(e->user_pages, 745 e->robj->tbo.ttm->num_pages); 746 kvfree(e->user_pages); 747 } 748 749 return r; 750 } 751 752 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 753 { 754 struct amdgpu_bo_list_entry *e; 755 int r; 756 757 list_for_each_entry(e, &p->validated, tv.head) { 758 struct reservation_object *resv = e->robj->tbo.resv; 759 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, 760 amdgpu_bo_explicit_sync(e->robj)); 761 762 if (r) 763 return r; 764 } 765 return 0; 766 } 767 768 /** 769 * cs_parser_fini() - clean parser states 770 * @parser: parser structure holding parsing context. 771 * @error: error number 772 * 773 * If error is set than unvalidate buffer, otherwise just free memory 774 * used by parsing context. 775 **/ 776 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, 777 bool backoff) 778 { 779 unsigned i; 780 781 if (error && backoff) 782 ttm_eu_backoff_reservation(&parser->ticket, 783 &parser->validated); 784 785 for (i = 0; i < parser->num_post_dep_syncobjs; i++) 786 drm_syncobj_put(parser->post_dep_syncobjs[i]); 787 kfree(parser->post_dep_syncobjs); 788 789 dma_fence_put(parser->fence); 790 791 if (parser->ctx) { 792 mutex_unlock(&parser->ctx->lock); 793 amdgpu_ctx_put(parser->ctx); 794 } 795 if (parser->bo_list) 796 amdgpu_bo_list_put(parser->bo_list); 797 798 for (i = 0; i < parser->nchunks; i++) 799 kvfree(parser->chunks[i].kdata); 800 kfree(parser->chunks); 801 if (parser->job) 802 amdgpu_job_free(parser->job); 803 amdgpu_bo_unref(&parser->uf_entry.robj); 804 } 805 806 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) 807 { 808 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 809 struct amdgpu_device *adev = p->adev; 810 struct amdgpu_vm *vm = &fpriv->vm; 811 struct amdgpu_bo_list_entry *e; 812 struct amdgpu_bo_va *bo_va; 813 struct amdgpu_bo *bo; 814 int r; 815 816 r = amdgpu_vm_clear_freed(adev, vm, NULL); 817 if (r) 818 return r; 819 820 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); 821 if (r) 822 return r; 823 824 r = amdgpu_sync_fence(adev, &p->job->sync, 825 fpriv->prt_va->last_pt_update, false); 826 if (r) 827 return r; 828 829 if (amdgpu_sriov_vf(adev)) { 830 struct dma_fence *f; 831 832 bo_va = fpriv->csa_va; 833 BUG_ON(!bo_va); 834 r = amdgpu_vm_bo_update(adev, bo_va, false); 835 if (r) 836 return r; 837 838 f = bo_va->last_pt_update; 839 r = amdgpu_sync_fence(adev, &p->job->sync, f, false); 840 if (r) 841 return r; 842 } 843 844 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 845 struct dma_fence *f; 846 847 /* ignore duplicates */ 848 bo = e->robj; 849 if (!bo) 850 continue; 851 852 bo_va = e->bo_va; 853 if (bo_va == NULL) 854 continue; 855 856 r = amdgpu_vm_bo_update(adev, bo_va, false); 857 if (r) 858 return r; 859 860 f = bo_va->last_pt_update; 861 r = amdgpu_sync_fence(adev, &p->job->sync, f, false); 862 if (r) 863 return r; 864 } 865 866 r = amdgpu_vm_handle_moved(adev, vm); 867 if (r) 868 return r; 869 870 r = amdgpu_vm_update_directories(adev, vm); 871 if (r) 872 return r; 873 874 r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); 875 if (r) 876 return r; 877 878 if (amdgpu_vm_debug) { 879 /* Invalidate all BOs to test for userspace bugs */ 880 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 881 /* ignore duplicates */ 882 if (!e->robj) 883 continue; 884 885 amdgpu_vm_bo_invalidate(adev, e->robj, false); 886 } 887 } 888 889 return r; 890 } 891 892 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 893 struct amdgpu_cs_parser *p) 894 { 895 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 896 struct amdgpu_vm *vm = &fpriv->vm; 897 struct amdgpu_ring *ring = p->ring; 898 int r; 899 900 /* Only for UVD/VCE VM emulation */ 901 if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { 902 unsigned i, j; 903 904 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { 905 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 906 struct amdgpu_bo_va_mapping *m; 907 struct amdgpu_bo *aobj = NULL; 908 struct amdgpu_cs_chunk *chunk; 909 uint64_t offset, va_start; 910 struct amdgpu_ib *ib; 911 uint8_t *kptr; 912 913 chunk = &p->chunks[i]; 914 ib = &p->job->ibs[j]; 915 chunk_ib = chunk->kdata; 916 917 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 918 continue; 919 920 va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; 921 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); 922 if (r) { 923 DRM_ERROR("IB va_start is invalid\n"); 924 return r; 925 } 926 927 if ((va_start + chunk_ib->ib_bytes) > 928 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { 929 DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 930 return -EINVAL; 931 } 932 933 /* the IB should be reserved at this point */ 934 r = amdgpu_bo_kmap(aobj, (void **)&kptr); 935 if (r) { 936 return r; 937 } 938 939 offset = m->start * AMDGPU_GPU_PAGE_SIZE; 940 kptr += va_start - offset; 941 942 if (p->ring->funcs->parse_cs) { 943 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 944 amdgpu_bo_kunmap(aobj); 945 946 r = amdgpu_ring_parse_cs(ring, p, j); 947 if (r) 948 return r; 949 } else { 950 ib->ptr = (uint32_t *)kptr; 951 r = amdgpu_ring_patch_cs_in_place(ring, p, j); 952 amdgpu_bo_kunmap(aobj); 953 if (r) 954 return r; 955 } 956 957 j++; 958 } 959 } 960 961 if (p->job->vm) { 962 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); 963 964 r = amdgpu_bo_vm_update_pte(p); 965 if (r) 966 return r; 967 968 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); 969 if (r) 970 return r; 971 } 972 973 return amdgpu_cs_sync_rings(p); 974 } 975 976 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, 977 struct amdgpu_cs_parser *parser) 978 { 979 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 980 struct amdgpu_vm *vm = &fpriv->vm; 981 int i, j; 982 int r, ce_preempt = 0, de_preempt = 0; 983 984 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { 985 struct amdgpu_cs_chunk *chunk; 986 struct amdgpu_ib *ib; 987 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 988 struct amdgpu_ring *ring; 989 990 chunk = &parser->chunks[i]; 991 ib = &parser->job->ibs[j]; 992 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 993 994 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 995 continue; 996 997 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) { 998 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 999 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) 1000 ce_preempt++; 1001 else 1002 de_preempt++; 1003 } 1004 1005 /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ 1006 if (ce_preempt > 1 || de_preempt > 1) 1007 return -EINVAL; 1008 } 1009 1010 r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, 1011 chunk_ib->ip_instance, chunk_ib->ring, &ring); 1012 if (r) 1013 return r; 1014 1015 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { 1016 parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; 1017 if (!parser->ctx->preamble_presented) { 1018 parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; 1019 parser->ctx->preamble_presented = true; 1020 } 1021 } 1022 1023 if (parser->ring && parser->ring != ring) 1024 return -EINVAL; 1025 1026 parser->ring = ring; 1027 1028 r = amdgpu_ib_get(adev, vm, 1029 ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, 1030 ib); 1031 if (r) { 1032 DRM_ERROR("Failed to get ib !\n"); 1033 return r; 1034 } 1035 1036 ib->gpu_addr = chunk_ib->va_start; 1037 ib->length_dw = chunk_ib->ib_bytes / 4; 1038 ib->flags = chunk_ib->flags; 1039 1040 j++; 1041 } 1042 1043 /* UVD & VCE fw doesn't support user fences */ 1044 if (parser->job->uf_addr && ( 1045 parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || 1046 parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) 1047 return -EINVAL; 1048 1049 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); 1050 } 1051 1052 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, 1053 struct amdgpu_cs_chunk *chunk) 1054 { 1055 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1056 unsigned num_deps; 1057 int i, r; 1058 struct drm_amdgpu_cs_chunk_dep *deps; 1059 1060 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 1061 num_deps = chunk->length_dw * 4 / 1062 sizeof(struct drm_amdgpu_cs_chunk_dep); 1063 1064 for (i = 0; i < num_deps; ++i) { 1065 struct amdgpu_ring *ring; 1066 struct amdgpu_ctx *ctx; 1067 struct dma_fence *fence; 1068 1069 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); 1070 if (ctx == NULL) 1071 return -EINVAL; 1072 1073 r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, 1074 deps[i].ip_type, 1075 deps[i].ip_instance, 1076 deps[i].ring, &ring); 1077 if (r) { 1078 amdgpu_ctx_put(ctx); 1079 return r; 1080 } 1081 1082 fence = amdgpu_ctx_get_fence(ctx, ring, 1083 deps[i].handle); 1084 if (IS_ERR(fence)) { 1085 r = PTR_ERR(fence); 1086 amdgpu_ctx_put(ctx); 1087 return r; 1088 } else if (fence) { 1089 r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, 1090 true); 1091 dma_fence_put(fence); 1092 amdgpu_ctx_put(ctx); 1093 if (r) 1094 return r; 1095 } 1096 } 1097 return 0; 1098 } 1099 1100 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, 1101 uint32_t handle) 1102 { 1103 int r; 1104 struct dma_fence *fence; 1105 r = drm_syncobj_find_fence(p->filp, handle, &fence); 1106 if (r) 1107 return r; 1108 1109 r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); 1110 dma_fence_put(fence); 1111 1112 return r; 1113 } 1114 1115 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, 1116 struct amdgpu_cs_chunk *chunk) 1117 { 1118 unsigned num_deps; 1119 int i, r; 1120 struct drm_amdgpu_cs_chunk_sem *deps; 1121 1122 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1123 num_deps = chunk->length_dw * 4 / 1124 sizeof(struct drm_amdgpu_cs_chunk_sem); 1125 1126 for (i = 0; i < num_deps; ++i) { 1127 r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); 1128 if (r) 1129 return r; 1130 } 1131 return 0; 1132 } 1133 1134 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, 1135 struct amdgpu_cs_chunk *chunk) 1136 { 1137 unsigned num_deps; 1138 int i; 1139 struct drm_amdgpu_cs_chunk_sem *deps; 1140 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1141 num_deps = chunk->length_dw * 4 / 1142 sizeof(struct drm_amdgpu_cs_chunk_sem); 1143 1144 p->post_dep_syncobjs = kmalloc_array(num_deps, 1145 sizeof(struct drm_syncobj *), 1146 GFP_KERNEL); 1147 p->num_post_dep_syncobjs = 0; 1148 1149 if (!p->post_dep_syncobjs) 1150 return -ENOMEM; 1151 1152 for (i = 0; i < num_deps; ++i) { 1153 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); 1154 if (!p->post_dep_syncobjs[i]) 1155 return -EINVAL; 1156 p->num_post_dep_syncobjs++; 1157 } 1158 return 0; 1159 } 1160 1161 static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 1162 struct amdgpu_cs_parser *p) 1163 { 1164 int i, r; 1165 1166 for (i = 0; i < p->nchunks; ++i) { 1167 struct amdgpu_cs_chunk *chunk; 1168 1169 chunk = &p->chunks[i]; 1170 1171 if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { 1172 r = amdgpu_cs_process_fence_dep(p, chunk); 1173 if (r) 1174 return r; 1175 } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { 1176 r = amdgpu_cs_process_syncobj_in_dep(p, chunk); 1177 if (r) 1178 return r; 1179 } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { 1180 r = amdgpu_cs_process_syncobj_out_dep(p, chunk); 1181 if (r) 1182 return r; 1183 } 1184 } 1185 1186 return 0; 1187 } 1188 1189 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) 1190 { 1191 int i; 1192 1193 for (i = 0; i < p->num_post_dep_syncobjs; ++i) 1194 drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); 1195 } 1196 1197 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 1198 union drm_amdgpu_cs *cs) 1199 { 1200 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1201 struct amdgpu_ring *ring = p->ring; 1202 struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; 1203 enum drm_sched_priority priority; 1204 struct amdgpu_bo_list_entry *e; 1205 struct amdgpu_job *job; 1206 uint64_t seq; 1207 1208 int r; 1209 1210 amdgpu_mn_lock(p->mn); 1211 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1212 struct amdgpu_bo *bo = e->robj; 1213 1214 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { 1215 amdgpu_mn_unlock(p->mn); 1216 return -ERESTARTSYS; 1217 } 1218 } 1219 1220 job = p->job; 1221 p->job = NULL; 1222 1223 r = drm_sched_job_init(&job->base, entity, p->filp); 1224 if (r) { 1225 amdgpu_job_free(job); 1226 amdgpu_mn_unlock(p->mn); 1227 return r; 1228 } 1229 1230 job->owner = p->filp; 1231 p->fence = dma_fence_get(&job->base.s_fence->finished); 1232 1233 r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); 1234 if (r) { 1235 dma_fence_put(p->fence); 1236 dma_fence_put(&job->base.s_fence->finished); 1237 amdgpu_job_free(job); 1238 amdgpu_mn_unlock(p->mn); 1239 return r; 1240 } 1241 1242 amdgpu_cs_post_dependencies(p); 1243 1244 cs->out.handle = seq; 1245 job->uf_sequence = seq; 1246 1247 amdgpu_job_free_resources(job); 1248 1249 trace_amdgpu_cs_ioctl(job); 1250 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); 1251 priority = job->base.s_priority; 1252 drm_sched_entity_push_job(&job->base, entity); 1253 1254 ring = to_amdgpu_ring(entity->rq->sched); 1255 amdgpu_ring_priority_get(ring, priority); 1256 1257 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); 1258 amdgpu_mn_unlock(p->mn); 1259 1260 return 0; 1261 } 1262 1263 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 1264 { 1265 struct amdgpu_device *adev = dev->dev_private; 1266 union drm_amdgpu_cs *cs = data; 1267 struct amdgpu_cs_parser parser = {}; 1268 bool reserved_buffers = false; 1269 int i, r; 1270 1271 if (!adev->accel_working) 1272 return -EBUSY; 1273 1274 parser.adev = adev; 1275 parser.filp = filp; 1276 1277 r = amdgpu_cs_parser_init(&parser, data); 1278 if (r) { 1279 DRM_ERROR("Failed to initialize parser !\n"); 1280 goto out; 1281 } 1282 1283 r = amdgpu_cs_ib_fill(adev, &parser); 1284 if (r) 1285 goto out; 1286 1287 r = amdgpu_cs_parser_bos(&parser, data); 1288 if (r) { 1289 if (r == -ENOMEM) 1290 DRM_ERROR("Not enough memory for command submission!\n"); 1291 else if (r != -ERESTARTSYS) 1292 DRM_ERROR("Failed to process the buffer list %d!\n", r); 1293 goto out; 1294 } 1295 1296 reserved_buffers = true; 1297 1298 r = amdgpu_cs_dependencies(adev, &parser); 1299 if (r) { 1300 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 1301 goto out; 1302 } 1303 1304 for (i = 0; i < parser.job->num_ibs; i++) 1305 trace_amdgpu_cs(&parser, i); 1306 1307 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 1308 if (r) 1309 goto out; 1310 1311 r = amdgpu_cs_submit(&parser, cs); 1312 1313 out: 1314 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1315 return r; 1316 } 1317 1318 /** 1319 * amdgpu_cs_wait_ioctl - wait for a command submission to finish 1320 * 1321 * @dev: drm device 1322 * @data: data from userspace 1323 * @filp: file private 1324 * 1325 * Wait for the command submission identified by handle to finish. 1326 */ 1327 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 1328 struct drm_file *filp) 1329 { 1330 union drm_amdgpu_wait_cs *wait = data; 1331 struct amdgpu_device *adev = dev->dev_private; 1332 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1333 struct amdgpu_ring *ring = NULL; 1334 struct amdgpu_ctx *ctx; 1335 struct dma_fence *fence; 1336 long r; 1337 1338 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 1339 if (ctx == NULL) 1340 return -EINVAL; 1341 1342 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, 1343 wait->in.ip_type, wait->in.ip_instance, 1344 wait->in.ring, &ring); 1345 if (r) { 1346 amdgpu_ctx_put(ctx); 1347 return r; 1348 } 1349 1350 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 1351 if (IS_ERR(fence)) 1352 r = PTR_ERR(fence); 1353 else if (fence) { 1354 r = dma_fence_wait_timeout(fence, true, timeout); 1355 if (r > 0 && fence->error) 1356 r = fence->error; 1357 dma_fence_put(fence); 1358 } else 1359 r = 1; 1360 1361 amdgpu_ctx_put(ctx); 1362 if (r < 0) 1363 return r; 1364 1365 memset(wait, 0, sizeof(*wait)); 1366 wait->out.status = (r == 0); 1367 1368 return 0; 1369 } 1370 1371 /** 1372 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence 1373 * 1374 * @adev: amdgpu device 1375 * @filp: file private 1376 * @user: drm_amdgpu_fence copied from user space 1377 */ 1378 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, 1379 struct drm_file *filp, 1380 struct drm_amdgpu_fence *user) 1381 { 1382 struct amdgpu_ring *ring; 1383 struct amdgpu_ctx *ctx; 1384 struct dma_fence *fence; 1385 int r; 1386 1387 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); 1388 if (ctx == NULL) 1389 return ERR_PTR(-EINVAL); 1390 1391 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, 1392 user->ip_instance, user->ring, &ring); 1393 if (r) { 1394 amdgpu_ctx_put(ctx); 1395 return ERR_PTR(r); 1396 } 1397 1398 fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); 1399 amdgpu_ctx_put(ctx); 1400 1401 return fence; 1402 } 1403 1404 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, 1405 struct drm_file *filp) 1406 { 1407 struct amdgpu_device *adev = dev->dev_private; 1408 union drm_amdgpu_fence_to_handle *info = data; 1409 struct dma_fence *fence; 1410 struct drm_syncobj *syncobj; 1411 struct sync_file *sync_file; 1412 int fd, r; 1413 1414 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); 1415 if (IS_ERR(fence)) 1416 return PTR_ERR(fence); 1417 1418 switch (info->in.what) { 1419 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: 1420 r = drm_syncobj_create(&syncobj, 0, fence); 1421 dma_fence_put(fence); 1422 if (r) 1423 return r; 1424 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); 1425 drm_syncobj_put(syncobj); 1426 return r; 1427 1428 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: 1429 r = drm_syncobj_create(&syncobj, 0, fence); 1430 dma_fence_put(fence); 1431 if (r) 1432 return r; 1433 r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); 1434 drm_syncobj_put(syncobj); 1435 return r; 1436 1437 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: 1438 fd = get_unused_fd_flags(O_CLOEXEC); 1439 if (fd < 0) { 1440 dma_fence_put(fence); 1441 return fd; 1442 } 1443 1444 sync_file = sync_file_create(fence); 1445 dma_fence_put(fence); 1446 if (!sync_file) { 1447 put_unused_fd(fd); 1448 return -ENOMEM; 1449 } 1450 1451 fd_install(fd, sync_file->file); 1452 info->out.handle = fd; 1453 return 0; 1454 1455 default: 1456 return -EINVAL; 1457 } 1458 } 1459 1460 /** 1461 * amdgpu_cs_wait_all_fence - wait on all fences to signal 1462 * 1463 * @adev: amdgpu device 1464 * @filp: file private 1465 * @wait: wait parameters 1466 * @fences: array of drm_amdgpu_fence 1467 */ 1468 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, 1469 struct drm_file *filp, 1470 union drm_amdgpu_wait_fences *wait, 1471 struct drm_amdgpu_fence *fences) 1472 { 1473 uint32_t fence_count = wait->in.fence_count; 1474 unsigned int i; 1475 long r = 1; 1476 1477 for (i = 0; i < fence_count; i++) { 1478 struct dma_fence *fence; 1479 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1480 1481 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1482 if (IS_ERR(fence)) 1483 return PTR_ERR(fence); 1484 else if (!fence) 1485 continue; 1486 1487 r = dma_fence_wait_timeout(fence, true, timeout); 1488 dma_fence_put(fence); 1489 if (r < 0) 1490 return r; 1491 1492 if (r == 0) 1493 break; 1494 1495 if (fence->error) 1496 return fence->error; 1497 } 1498 1499 memset(wait, 0, sizeof(*wait)); 1500 wait->out.status = (r > 0); 1501 1502 return 0; 1503 } 1504 1505 /** 1506 * amdgpu_cs_wait_any_fence - wait on any fence to signal 1507 * 1508 * @adev: amdgpu device 1509 * @filp: file private 1510 * @wait: wait parameters 1511 * @fences: array of drm_amdgpu_fence 1512 */ 1513 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, 1514 struct drm_file *filp, 1515 union drm_amdgpu_wait_fences *wait, 1516 struct drm_amdgpu_fence *fences) 1517 { 1518 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1519 uint32_t fence_count = wait->in.fence_count; 1520 uint32_t first = ~0; 1521 struct dma_fence **array; 1522 unsigned int i; 1523 long r; 1524 1525 /* Prepare the fence array */ 1526 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL); 1527 1528 if (array == NULL) 1529 return -ENOMEM; 1530 1531 for (i = 0; i < fence_count; i++) { 1532 struct dma_fence *fence; 1533 1534 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1535 if (IS_ERR(fence)) { 1536 r = PTR_ERR(fence); 1537 goto err_free_fence_array; 1538 } else if (fence) { 1539 array[i] = fence; 1540 } else { /* NULL, the fence has been already signaled */ 1541 r = 1; 1542 first = i; 1543 goto out; 1544 } 1545 } 1546 1547 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, 1548 &first); 1549 if (r < 0) 1550 goto err_free_fence_array; 1551 1552 out: 1553 memset(wait, 0, sizeof(*wait)); 1554 wait->out.status = (r > 0); 1555 wait->out.first_signaled = first; 1556 1557 if (first < fence_count && array[first]) 1558 r = array[first]->error; 1559 else 1560 r = 0; 1561 1562 err_free_fence_array: 1563 for (i = 0; i < fence_count; i++) 1564 dma_fence_put(array[i]); 1565 kfree(array); 1566 1567 return r; 1568 } 1569 1570 /** 1571 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish 1572 * 1573 * @dev: drm device 1574 * @data: data from userspace 1575 * @filp: file private 1576 */ 1577 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, 1578 struct drm_file *filp) 1579 { 1580 struct amdgpu_device *adev = dev->dev_private; 1581 union drm_amdgpu_wait_fences *wait = data; 1582 uint32_t fence_count = wait->in.fence_count; 1583 struct drm_amdgpu_fence *fences_user; 1584 struct drm_amdgpu_fence *fences; 1585 int r; 1586 1587 /* Get the fences from userspace */ 1588 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 1589 GFP_KERNEL); 1590 if (fences == NULL) 1591 return -ENOMEM; 1592 1593 fences_user = u64_to_user_ptr(wait->in.fences); 1594 if (copy_from_user(fences, fences_user, 1595 sizeof(struct drm_amdgpu_fence) * fence_count)) { 1596 r = -EFAULT; 1597 goto err_free_fences; 1598 } 1599 1600 if (wait->in.wait_all) 1601 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); 1602 else 1603 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); 1604 1605 err_free_fences: 1606 kfree(fences); 1607 1608 return r; 1609 } 1610 1611 /** 1612 * amdgpu_cs_find_bo_va - find bo_va for VM address 1613 * 1614 * @parser: command submission parser context 1615 * @addr: VM address 1616 * @bo: resulting BO of the mapping found 1617 * 1618 * Search the buffer objects in the command submission context for a certain 1619 * virtual memory address. Returns allocation structure when found, NULL 1620 * otherwise. 1621 */ 1622 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1623 uint64_t addr, struct amdgpu_bo **bo, 1624 struct amdgpu_bo_va_mapping **map) 1625 { 1626 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 1627 struct ttm_operation_ctx ctx = { false, false }; 1628 struct amdgpu_vm *vm = &fpriv->vm; 1629 struct amdgpu_bo_va_mapping *mapping; 1630 int r; 1631 1632 addr /= AMDGPU_GPU_PAGE_SIZE; 1633 1634 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); 1635 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) 1636 return -EINVAL; 1637 1638 *bo = mapping->bo_va->base.bo; 1639 *map = mapping; 1640 1641 /* Double check that the BO is reserved by this CS */ 1642 if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) 1643 return -EINVAL; 1644 1645 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { 1646 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1647 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1648 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1649 if (r) 1650 return r; 1651 } 1652 1653 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 1654 } 1655