1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 28 #include <linux/file.h> 29 #include <linux/pagemap.h> 30 #include <linux/sync_file.h> 31 #include <linux/dma-buf.h> 32 33 #include <drm/amdgpu_drm.h> 34 #include <drm/drm_syncobj.h> 35 #include "amdgpu.h" 36 #include "amdgpu_trace.h" 37 #include "amdgpu_gmc.h" 38 #include "amdgpu_gem.h" 39 #include "amdgpu_ras.h" 40 41 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 42 struct drm_amdgpu_cs_chunk_fence *data, 43 uint32_t *offset) 44 { 45 struct drm_gem_object *gobj; 46 struct amdgpu_bo *bo; 47 unsigned long size; 48 int r; 49 50 gobj = drm_gem_object_lookup(p->filp, data->handle); 51 if (gobj == NULL) 52 return -EINVAL; 53 54 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 55 p->uf_entry.priority = 0; 56 p->uf_entry.tv.bo = &bo->tbo; 57 /* One for TTM and one for the CS job */ 58 p->uf_entry.tv.num_shared = 2; 59 60 drm_gem_object_put(gobj); 61 62 size = amdgpu_bo_size(bo); 63 if (size != PAGE_SIZE || (data->offset + 8) > size) { 64 r = -EINVAL; 65 goto error_unref; 66 } 67 68 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 69 r = -EINVAL; 70 goto error_unref; 71 } 72 73 *offset = data->offset; 74 75 return 0; 76 77 error_unref: 78 amdgpu_bo_unref(&bo); 79 return r; 80 } 81 82 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, 83 struct drm_amdgpu_bo_list_in *data) 84 { 85 int r; 86 struct drm_amdgpu_bo_list_entry *info = NULL; 87 88 r = amdgpu_bo_create_list_entry_array(data, &info); 89 if (r) 90 return r; 91 92 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, 93 &p->bo_list); 94 if (r) 95 goto error_free; 96 97 kvfree(info); 98 return 0; 99 100 error_free: 101 kvfree(info); 102 103 return r; 104 } 105 106 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) 107 { 108 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 109 struct amdgpu_vm *vm = &fpriv->vm; 110 uint64_t *chunk_array_user; 111 uint64_t *chunk_array; 112 unsigned size, num_ibs = 0; 113 uint32_t uf_offset = 0; 114 int i; 115 int ret; 116 117 if (cs->in.num_chunks == 0) 118 return 0; 119 120 chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); 121 if (!chunk_array) 122 return -ENOMEM; 123 124 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 125 if (!p->ctx) { 126 ret = -EINVAL; 127 goto free_chunk; 128 } 129 130 /* skip guilty context job */ 131 if (atomic_read(&p->ctx->guilty) == 1) { 132 ret = -ECANCELED; 133 goto free_chunk; 134 } 135 136 /* get chunks */ 137 chunk_array_user = u64_to_user_ptr(cs->in.chunks); 138 if (copy_from_user(chunk_array, chunk_array_user, 139 sizeof(uint64_t)*cs->in.num_chunks)) { 140 ret = -EFAULT; 141 goto free_chunk; 142 } 143 144 p->nchunks = cs->in.num_chunks; 145 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 146 GFP_KERNEL); 147 if (!p->chunks) { 148 ret = -ENOMEM; 149 goto free_chunk; 150 } 151 152 for (i = 0; i < p->nchunks; i++) { 153 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; 154 struct drm_amdgpu_cs_chunk user_chunk; 155 uint32_t __user *cdata; 156 157 chunk_ptr = u64_to_user_ptr(chunk_array[i]); 158 if (copy_from_user(&user_chunk, chunk_ptr, 159 sizeof(struct drm_amdgpu_cs_chunk))) { 160 ret = -EFAULT; 161 i--; 162 goto free_partial_kdata; 163 } 164 p->chunks[i].chunk_id = user_chunk.chunk_id; 165 p->chunks[i].length_dw = user_chunk.length_dw; 166 167 size = p->chunks[i].length_dw; 168 cdata = u64_to_user_ptr(user_chunk.chunk_data); 169 170 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 171 if (p->chunks[i].kdata == NULL) { 172 ret = -ENOMEM; 173 i--; 174 goto free_partial_kdata; 175 } 176 size *= sizeof(uint32_t); 177 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 178 ret = -EFAULT; 179 goto free_partial_kdata; 180 } 181 182 switch (p->chunks[i].chunk_id) { 183 case AMDGPU_CHUNK_ID_IB: 184 ++num_ibs; 185 break; 186 187 case AMDGPU_CHUNK_ID_FENCE: 188 size = sizeof(struct drm_amdgpu_cs_chunk_fence); 189 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 190 ret = -EINVAL; 191 goto free_partial_kdata; 192 } 193 194 ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, 195 &uf_offset); 196 if (ret) 197 goto free_partial_kdata; 198 199 break; 200 201 case AMDGPU_CHUNK_ID_BO_HANDLES: 202 size = sizeof(struct drm_amdgpu_bo_list_in); 203 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 204 ret = -EINVAL; 205 goto free_partial_kdata; 206 } 207 208 ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); 209 if (ret) 210 goto free_partial_kdata; 211 212 break; 213 214 case AMDGPU_CHUNK_ID_DEPENDENCIES: 215 case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 216 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 217 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 218 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 219 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 220 break; 221 222 default: 223 ret = -EINVAL; 224 goto free_partial_kdata; 225 } 226 } 227 228 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); 229 if (ret) 230 goto free_all_kdata; 231 232 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { 233 ret = -ECANCELED; 234 goto free_all_kdata; 235 } 236 237 if (p->uf_entry.tv.bo) 238 p->job->uf_addr = uf_offset; 239 kvfree(chunk_array); 240 241 /* Use this opportunity to fill in task info for the vm */ 242 amdgpu_vm_set_task_info(vm); 243 244 return 0; 245 246 free_all_kdata: 247 i = p->nchunks - 1; 248 free_partial_kdata: 249 for (; i >= 0; i--) 250 kvfree(p->chunks[i].kdata); 251 kvfree(p->chunks); 252 p->chunks = NULL; 253 p->nchunks = 0; 254 free_chunk: 255 kvfree(chunk_array); 256 257 return ret; 258 } 259 260 /* Convert microseconds to bytes. */ 261 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) 262 { 263 if (us <= 0 || !adev->mm_stats.log2_max_MBps) 264 return 0; 265 266 /* Since accum_us is incremented by a million per second, just 267 * multiply it by the number of MB/s to get the number of bytes. 268 */ 269 return us << adev->mm_stats.log2_max_MBps; 270 } 271 272 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) 273 { 274 if (!adev->mm_stats.log2_max_MBps) 275 return 0; 276 277 return bytes >> adev->mm_stats.log2_max_MBps; 278 } 279 280 /* Returns how many bytes TTM can move right now. If no bytes can be moved, 281 * it returns 0. If it returns non-zero, it's OK to move at least one buffer, 282 * which means it can go over the threshold once. If that happens, the driver 283 * will be in debt and no other buffer migrations can be done until that debt 284 * is repaid. 285 * 286 * This approach allows moving a buffer of any size (it's important to allow 287 * that). 288 * 289 * The currency is simply time in microseconds and it increases as the clock 290 * ticks. The accumulated microseconds (us) are converted to bytes and 291 * returned. 292 */ 293 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, 294 u64 *max_bytes, 295 u64 *max_vis_bytes) 296 { 297 s64 time_us, increment_us; 298 u64 free_vram, total_vram, used_vram; 299 /* Allow a maximum of 200 accumulated ms. This is basically per-IB 300 * throttling. 301 * 302 * It means that in order to get full max MBps, at least 5 IBs per 303 * second must be submitted and not more than 200ms apart from each 304 * other. 305 */ 306 const s64 us_upper_bound = 200000; 307 308 if (!adev->mm_stats.log2_max_MBps) { 309 *max_bytes = 0; 310 *max_vis_bytes = 0; 311 return; 312 } 313 314 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); 315 used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); 316 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 317 318 spin_lock(&adev->mm_stats.lock); 319 320 /* Increase the amount of accumulated us. */ 321 time_us = ktime_to_us(ktime_get()); 322 increment_us = time_us - adev->mm_stats.last_update_us; 323 adev->mm_stats.last_update_us = time_us; 324 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, 325 us_upper_bound); 326 327 /* This prevents the short period of low performance when the VRAM 328 * usage is low and the driver is in debt or doesn't have enough 329 * accumulated us to fill VRAM quickly. 330 * 331 * The situation can occur in these cases: 332 * - a lot of VRAM is freed by userspace 333 * - the presence of a big buffer causes a lot of evictions 334 * (solution: split buffers into smaller ones) 335 * 336 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting 337 * accum_us to a positive number. 338 */ 339 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { 340 s64 min_us; 341 342 /* Be more aggresive on dGPUs. Try to fill a portion of free 343 * VRAM now. 344 */ 345 if (!(adev->flags & AMD_IS_APU)) 346 min_us = bytes_to_us(adev, free_vram / 4); 347 else 348 min_us = 0; /* Reset accum_us on APUs. */ 349 350 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 351 } 352 353 /* This is set to 0 if the driver is in debt to disallow (optional) 354 * buffer moves. 355 */ 356 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 357 358 /* Do the same for visible VRAM if half of it is free */ 359 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { 360 u64 total_vis_vram = adev->gmc.visible_vram_size; 361 u64 used_vis_vram = 362 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); 363 364 if (used_vis_vram < total_vis_vram) { 365 u64 free_vis_vram = total_vis_vram - used_vis_vram; 366 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + 367 increment_us, us_upper_bound); 368 369 if (free_vis_vram >= total_vis_vram / 2) 370 adev->mm_stats.accum_us_vis = 371 max(bytes_to_us(adev, free_vis_vram / 2), 372 adev->mm_stats.accum_us_vis); 373 } 374 375 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); 376 } else { 377 *max_vis_bytes = 0; 378 } 379 380 spin_unlock(&adev->mm_stats.lock); 381 } 382 383 /* Report how many bytes have really been moved for the last command 384 * submission. This can result in a debt that can stop buffer migrations 385 * temporarily. 386 */ 387 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, 388 u64 num_vis_bytes) 389 { 390 spin_lock(&adev->mm_stats.lock); 391 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 392 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); 393 spin_unlock(&adev->mm_stats.lock); 394 } 395 396 static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) 397 { 398 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 399 struct amdgpu_cs_parser *p = param; 400 struct ttm_operation_ctx ctx = { 401 .interruptible = true, 402 .no_wait_gpu = false, 403 .resv = bo->tbo.base.resv 404 }; 405 uint32_t domain; 406 int r; 407 408 if (bo->tbo.pin_count) 409 return 0; 410 411 /* Don't move this buffer if we have depleted our allowance 412 * to move it. Don't move anything if the threshold is zero. 413 */ 414 if (p->bytes_moved < p->bytes_moved_threshold && 415 (!bo->tbo.base.dma_buf || 416 list_empty(&bo->tbo.base.dma_buf->attachments))) { 417 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 418 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 419 /* And don't move a CPU_ACCESS_REQUIRED BO to limited 420 * visible VRAM if we've depleted our allowance to do 421 * that. 422 */ 423 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) 424 domain = bo->preferred_domains; 425 else 426 domain = bo->allowed_domains; 427 } else { 428 domain = bo->preferred_domains; 429 } 430 } else { 431 domain = bo->allowed_domains; 432 } 433 434 retry: 435 amdgpu_bo_placement_from_domain(bo, domain); 436 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 437 438 p->bytes_moved += ctx.bytes_moved; 439 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 440 amdgpu_bo_in_cpu_visible_vram(bo)) 441 p->bytes_moved_vis += ctx.bytes_moved; 442 443 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 444 domain = bo->allowed_domains; 445 goto retry; 446 } 447 448 return r; 449 } 450 451 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, 452 struct list_head *validated) 453 { 454 struct ttm_operation_ctx ctx = { true, false }; 455 struct amdgpu_bo_list_entry *lobj; 456 int r; 457 458 list_for_each_entry(lobj, validated, tv.head) { 459 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); 460 struct mm_struct *usermm; 461 462 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); 463 if (usermm && usermm != current->mm) 464 return -EPERM; 465 466 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && 467 lobj->user_invalidated && lobj->user_pages) { 468 amdgpu_bo_placement_from_domain(bo, 469 AMDGPU_GEM_DOMAIN_CPU); 470 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 471 if (r) 472 return r; 473 474 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 475 lobj->user_pages); 476 } 477 478 r = amdgpu_cs_bo_validate(p, bo); 479 if (r) 480 return r; 481 482 kvfree(lobj->user_pages); 483 lobj->user_pages = NULL; 484 } 485 return 0; 486 } 487 488 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 489 union drm_amdgpu_cs *cs) 490 { 491 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 492 struct amdgpu_vm *vm = &fpriv->vm; 493 struct amdgpu_bo_list_entry *e; 494 struct list_head duplicates; 495 struct amdgpu_bo *gds; 496 struct amdgpu_bo *gws; 497 struct amdgpu_bo *oa; 498 int r; 499 500 INIT_LIST_HEAD(&p->validated); 501 502 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ 503 if (cs->in.bo_list_handle) { 504 if (p->bo_list) 505 return -EINVAL; 506 507 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, 508 &p->bo_list); 509 if (r) 510 return r; 511 } else if (!p->bo_list) { 512 /* Create a empty bo_list when no handle is provided */ 513 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, 514 &p->bo_list); 515 if (r) 516 return r; 517 } 518 519 /* One for TTM and one for the CS job */ 520 amdgpu_bo_list_for_each_entry(e, p->bo_list) 521 e->tv.num_shared = 2; 522 523 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 524 525 INIT_LIST_HEAD(&duplicates); 526 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 527 528 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) 529 list_add(&p->uf_entry.tv.head, &p->validated); 530 531 /* Get userptr backing pages. If pages are updated after registered 532 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do 533 * amdgpu_ttm_backend_bind() to flush and invalidate new pages 534 */ 535 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 536 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 537 bool userpage_invalidated = false; 538 int i; 539 540 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 541 sizeof(struct page *), 542 GFP_KERNEL | __GFP_ZERO); 543 if (!e->user_pages) { 544 DRM_ERROR("kvmalloc_array failure\n"); 545 return -ENOMEM; 546 } 547 548 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); 549 if (r) { 550 kvfree(e->user_pages); 551 e->user_pages = NULL; 552 return r; 553 } 554 555 for (i = 0; i < bo->tbo.ttm->num_pages; i++) { 556 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { 557 userpage_invalidated = true; 558 break; 559 } 560 } 561 e->user_invalidated = userpage_invalidated; 562 } 563 564 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 565 &duplicates); 566 if (unlikely(r != 0)) { 567 if (r != -ERESTARTSYS) 568 DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 569 goto out; 570 } 571 572 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 573 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 574 575 e->bo_va = amdgpu_vm_bo_find(vm, bo); 576 577 if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { 578 e->chain = dma_fence_chain_alloc(); 579 if (!e->chain) { 580 r = -ENOMEM; 581 goto error_validate; 582 } 583 } 584 } 585 586 /* Move fence waiting after getting reservation lock of 587 * PD root. Then there is no need on a ctx mutex lock. 588 */ 589 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity); 590 if (unlikely(r != 0)) { 591 if (r != -ERESTARTSYS) 592 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); 593 goto error_validate; 594 } 595 596 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, 597 &p->bytes_moved_vis_threshold); 598 p->bytes_moved = 0; 599 p->bytes_moved_vis = 0; 600 601 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, 602 amdgpu_cs_bo_validate, p); 603 if (r) { 604 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); 605 goto error_validate; 606 } 607 608 r = amdgpu_cs_list_validate(p, &duplicates); 609 if (r) 610 goto error_validate; 611 612 r = amdgpu_cs_list_validate(p, &p->validated); 613 if (r) 614 goto error_validate; 615 616 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 617 p->bytes_moved_vis); 618 619 gds = p->bo_list->gds_obj; 620 gws = p->bo_list->gws_obj; 621 oa = p->bo_list->oa_obj; 622 623 if (gds) { 624 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; 625 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; 626 } 627 if (gws) { 628 p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; 629 p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; 630 } 631 if (oa) { 632 p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; 633 p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; 634 } 635 636 if (!r && p->uf_entry.tv.bo) { 637 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); 638 639 r = amdgpu_ttm_alloc_gart(&uf->tbo); 640 p->job->uf_addr += amdgpu_bo_gpu_offset(uf); 641 } 642 643 error_validate: 644 if (r) { 645 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 646 dma_fence_chain_free(e->chain); 647 e->chain = NULL; 648 } 649 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 650 } 651 out: 652 return r; 653 } 654 655 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 656 { 657 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 658 struct amdgpu_bo_list_entry *e; 659 int r; 660 661 list_for_each_entry(e, &p->validated, tv.head) { 662 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 663 struct dma_resv *resv = bo->tbo.base.resv; 664 enum amdgpu_sync_mode sync_mode; 665 666 sync_mode = amdgpu_bo_explicit_sync(bo) ? 667 AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; 668 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, 669 &fpriv->vm); 670 if (r) 671 return r; 672 } 673 return 0; 674 } 675 676 /** 677 * amdgpu_cs_parser_fini() - clean parser states 678 * @parser: parser structure holding parsing context. 679 * @error: error number 680 * @backoff: indicator to backoff the reservation 681 * 682 * If error is set then unvalidate buffer, otherwise just free memory 683 * used by parsing context. 684 **/ 685 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, 686 bool backoff) 687 { 688 unsigned i; 689 690 if (error && backoff) { 691 struct amdgpu_bo_list_entry *e; 692 693 amdgpu_bo_list_for_each_entry(e, parser->bo_list) { 694 dma_fence_chain_free(e->chain); 695 e->chain = NULL; 696 } 697 698 ttm_eu_backoff_reservation(&parser->ticket, 699 &parser->validated); 700 } 701 702 for (i = 0; i < parser->num_post_deps; i++) { 703 drm_syncobj_put(parser->post_deps[i].syncobj); 704 kfree(parser->post_deps[i].chain); 705 } 706 kfree(parser->post_deps); 707 708 dma_fence_put(parser->fence); 709 710 if (parser->ctx) { 711 amdgpu_ctx_put(parser->ctx); 712 } 713 if (parser->bo_list) 714 amdgpu_bo_list_put(parser->bo_list); 715 716 for (i = 0; i < parser->nchunks; i++) 717 kvfree(parser->chunks[i].kdata); 718 kvfree(parser->chunks); 719 if (parser->job) 720 amdgpu_job_free(parser->job); 721 if (parser->uf_entry.tv.bo) { 722 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); 723 724 amdgpu_bo_unref(&uf); 725 } 726 } 727 728 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) 729 { 730 struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); 731 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 732 struct amdgpu_device *adev = p->adev; 733 struct amdgpu_vm *vm = &fpriv->vm; 734 struct amdgpu_bo_list_entry *e; 735 struct amdgpu_bo_va *bo_va; 736 struct amdgpu_bo *bo; 737 int r; 738 739 /* Only for UVD/VCE VM emulation */ 740 if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { 741 unsigned i, j; 742 743 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { 744 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 745 struct amdgpu_bo_va_mapping *m; 746 struct amdgpu_bo *aobj = NULL; 747 struct amdgpu_cs_chunk *chunk; 748 uint64_t offset, va_start; 749 struct amdgpu_ib *ib; 750 uint8_t *kptr; 751 752 chunk = &p->chunks[i]; 753 ib = &p->job->ibs[j]; 754 chunk_ib = chunk->kdata; 755 756 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 757 continue; 758 759 va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; 760 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); 761 if (r) { 762 DRM_ERROR("IB va_start is invalid\n"); 763 return r; 764 } 765 766 if ((va_start + chunk_ib->ib_bytes) > 767 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { 768 DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 769 return -EINVAL; 770 } 771 772 /* the IB should be reserved at this point */ 773 r = amdgpu_bo_kmap(aobj, (void **)&kptr); 774 if (r) { 775 return r; 776 } 777 778 offset = m->start * AMDGPU_GPU_PAGE_SIZE; 779 kptr += va_start - offset; 780 781 if (ring->funcs->parse_cs) { 782 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 783 amdgpu_bo_kunmap(aobj); 784 785 r = amdgpu_ring_parse_cs(ring, p, j); 786 if (r) 787 return r; 788 } else { 789 ib->ptr = (uint32_t *)kptr; 790 r = amdgpu_ring_patch_cs_in_place(ring, p, j); 791 amdgpu_bo_kunmap(aobj); 792 if (r) 793 return r; 794 } 795 796 j++; 797 } 798 } 799 800 if (!p->job->vm) 801 return amdgpu_cs_sync_rings(p); 802 803 804 r = amdgpu_vm_clear_freed(adev, vm, NULL); 805 if (r) 806 return r; 807 808 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); 809 if (r) 810 return r; 811 812 r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); 813 if (r) 814 return r; 815 816 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 817 bo_va = fpriv->csa_va; 818 BUG_ON(!bo_va); 819 r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); 820 if (r) 821 return r; 822 823 r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 824 if (r) 825 return r; 826 } 827 828 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 829 /* ignore duplicates */ 830 bo = ttm_to_amdgpu_bo(e->tv.bo); 831 if (!bo) 832 continue; 833 834 bo_va = e->bo_va; 835 if (bo_va == NULL) 836 continue; 837 838 r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); 839 if (r) 840 return r; 841 842 r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 843 if (r) 844 return r; 845 } 846 847 r = amdgpu_vm_handle_moved(adev, vm); 848 if (r) 849 return r; 850 851 r = amdgpu_vm_update_pdes(adev, vm, false); 852 if (r) 853 return r; 854 855 r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); 856 if (r) 857 return r; 858 859 p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); 860 861 if (amdgpu_vm_debug) { 862 /* Invalidate all BOs to test for userspace bugs */ 863 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 864 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 865 866 /* ignore duplicates */ 867 if (!bo) 868 continue; 869 870 amdgpu_vm_bo_invalidate(adev, bo, false); 871 } 872 } 873 874 return amdgpu_cs_sync_rings(p); 875 } 876 877 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, 878 struct amdgpu_cs_parser *parser) 879 { 880 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 881 struct amdgpu_vm *vm = &fpriv->vm; 882 int r, ce_preempt = 0, de_preempt = 0; 883 struct amdgpu_ring *ring; 884 int i, j; 885 886 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { 887 struct amdgpu_cs_chunk *chunk; 888 struct amdgpu_ib *ib; 889 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 890 struct drm_sched_entity *entity; 891 892 chunk = &parser->chunks[i]; 893 ib = &parser->job->ibs[j]; 894 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 895 896 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 897 continue; 898 899 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && 900 (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { 901 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 902 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) 903 ce_preempt++; 904 else 905 de_preempt++; 906 } 907 908 /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ 909 if (ce_preempt > 1 || de_preempt > 1) 910 return -EINVAL; 911 } 912 913 r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, 914 chunk_ib->ip_instance, chunk_ib->ring, 915 &entity); 916 if (r) 917 return r; 918 919 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) 920 parser->job->preamble_status |= 921 AMDGPU_PREAMBLE_IB_PRESENT; 922 923 if (parser->entity && parser->entity != entity) 924 return -EINVAL; 925 926 /* Return if there is no run queue associated with this entity. 927 * Possibly because of disabled HW IP*/ 928 if (entity->rq == NULL) 929 return -EINVAL; 930 931 parser->entity = entity; 932 933 ring = to_amdgpu_ring(entity->rq->sched); 934 r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? 935 chunk_ib->ib_bytes : 0, 936 AMDGPU_IB_POOL_DELAYED, ib); 937 if (r) { 938 DRM_ERROR("Failed to get ib !\n"); 939 return r; 940 } 941 942 ib->gpu_addr = chunk_ib->va_start; 943 ib->length_dw = chunk_ib->ib_bytes / 4; 944 ib->flags = chunk_ib->flags; 945 946 j++; 947 } 948 949 /* MM engine doesn't support user fences */ 950 ring = to_amdgpu_ring(parser->entity->rq->sched); 951 if (parser->job->uf_addr && ring->funcs->no_user_fence) 952 return -EINVAL; 953 954 return 0; 955 } 956 957 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, 958 struct amdgpu_cs_chunk *chunk) 959 { 960 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 961 unsigned num_deps; 962 int i, r; 963 struct drm_amdgpu_cs_chunk_dep *deps; 964 965 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 966 num_deps = chunk->length_dw * 4 / 967 sizeof(struct drm_amdgpu_cs_chunk_dep); 968 969 for (i = 0; i < num_deps; ++i) { 970 struct amdgpu_ctx *ctx; 971 struct drm_sched_entity *entity; 972 struct dma_fence *fence; 973 974 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); 975 if (ctx == NULL) 976 return -EINVAL; 977 978 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, 979 deps[i].ip_instance, 980 deps[i].ring, &entity); 981 if (r) { 982 amdgpu_ctx_put(ctx); 983 return r; 984 } 985 986 fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); 987 amdgpu_ctx_put(ctx); 988 989 if (IS_ERR(fence)) 990 return PTR_ERR(fence); 991 else if (!fence) 992 continue; 993 994 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { 995 struct drm_sched_fence *s_fence; 996 struct dma_fence *old = fence; 997 998 s_fence = to_drm_sched_fence(fence); 999 fence = dma_fence_get(&s_fence->scheduled); 1000 dma_fence_put(old); 1001 } 1002 1003 r = amdgpu_sync_fence(&p->job->sync, fence); 1004 dma_fence_put(fence); 1005 if (r) 1006 return r; 1007 } 1008 return 0; 1009 } 1010 1011 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, 1012 uint32_t handle, u64 point, 1013 u64 flags) 1014 { 1015 struct dma_fence *fence; 1016 int r; 1017 1018 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); 1019 if (r) { 1020 DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", 1021 handle, point, r); 1022 return r; 1023 } 1024 1025 r = amdgpu_sync_fence(&p->job->sync, fence); 1026 dma_fence_put(fence); 1027 1028 return r; 1029 } 1030 1031 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, 1032 struct amdgpu_cs_chunk *chunk) 1033 { 1034 struct drm_amdgpu_cs_chunk_sem *deps; 1035 unsigned num_deps; 1036 int i, r; 1037 1038 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1039 num_deps = chunk->length_dw * 4 / 1040 sizeof(struct drm_amdgpu_cs_chunk_sem); 1041 for (i = 0; i < num_deps; ++i) { 1042 r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, 1043 0, 0); 1044 if (r) 1045 return r; 1046 } 1047 1048 return 0; 1049 } 1050 1051 1052 static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, 1053 struct amdgpu_cs_chunk *chunk) 1054 { 1055 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; 1056 unsigned num_deps; 1057 int i, r; 1058 1059 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; 1060 num_deps = chunk->length_dw * 4 / 1061 sizeof(struct drm_amdgpu_cs_chunk_syncobj); 1062 for (i = 0; i < num_deps; ++i) { 1063 r = amdgpu_syncobj_lookup_and_add_to_sync(p, 1064 syncobj_deps[i].handle, 1065 syncobj_deps[i].point, 1066 syncobj_deps[i].flags); 1067 if (r) 1068 return r; 1069 } 1070 1071 return 0; 1072 } 1073 1074 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, 1075 struct amdgpu_cs_chunk *chunk) 1076 { 1077 struct drm_amdgpu_cs_chunk_sem *deps; 1078 unsigned num_deps; 1079 int i; 1080 1081 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1082 num_deps = chunk->length_dw * 4 / 1083 sizeof(struct drm_amdgpu_cs_chunk_sem); 1084 1085 if (p->post_deps) 1086 return -EINVAL; 1087 1088 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 1089 GFP_KERNEL); 1090 p->num_post_deps = 0; 1091 1092 if (!p->post_deps) 1093 return -ENOMEM; 1094 1095 1096 for (i = 0; i < num_deps; ++i) { 1097 p->post_deps[i].syncobj = 1098 drm_syncobj_find(p->filp, deps[i].handle); 1099 if (!p->post_deps[i].syncobj) 1100 return -EINVAL; 1101 p->post_deps[i].chain = NULL; 1102 p->post_deps[i].point = 0; 1103 p->num_post_deps++; 1104 } 1105 1106 return 0; 1107 } 1108 1109 1110 static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, 1111 struct amdgpu_cs_chunk *chunk) 1112 { 1113 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; 1114 unsigned num_deps; 1115 int i; 1116 1117 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; 1118 num_deps = chunk->length_dw * 4 / 1119 sizeof(struct drm_amdgpu_cs_chunk_syncobj); 1120 1121 if (p->post_deps) 1122 return -EINVAL; 1123 1124 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 1125 GFP_KERNEL); 1126 p->num_post_deps = 0; 1127 1128 if (!p->post_deps) 1129 return -ENOMEM; 1130 1131 for (i = 0; i < num_deps; ++i) { 1132 struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; 1133 1134 dep->chain = NULL; 1135 if (syncobj_deps[i].point) { 1136 dep->chain = dma_fence_chain_alloc(); 1137 if (!dep->chain) 1138 return -ENOMEM; 1139 } 1140 1141 dep->syncobj = drm_syncobj_find(p->filp, 1142 syncobj_deps[i].handle); 1143 if (!dep->syncobj) { 1144 dma_fence_chain_free(dep->chain); 1145 return -EINVAL; 1146 } 1147 dep->point = syncobj_deps[i].point; 1148 p->num_post_deps++; 1149 } 1150 1151 return 0; 1152 } 1153 1154 static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 1155 struct amdgpu_cs_parser *p) 1156 { 1157 int i, r; 1158 1159 for (i = 0; i < p->nchunks; ++i) { 1160 struct amdgpu_cs_chunk *chunk; 1161 1162 chunk = &p->chunks[i]; 1163 1164 switch (chunk->chunk_id) { 1165 case AMDGPU_CHUNK_ID_DEPENDENCIES: 1166 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 1167 r = amdgpu_cs_process_fence_dep(p, chunk); 1168 if (r) 1169 return r; 1170 break; 1171 case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 1172 r = amdgpu_cs_process_syncobj_in_dep(p, chunk); 1173 if (r) 1174 return r; 1175 break; 1176 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 1177 r = amdgpu_cs_process_syncobj_out_dep(p, chunk); 1178 if (r) 1179 return r; 1180 break; 1181 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 1182 r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); 1183 if (r) 1184 return r; 1185 break; 1186 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 1187 r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); 1188 if (r) 1189 return r; 1190 break; 1191 } 1192 } 1193 1194 return 0; 1195 } 1196 1197 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) 1198 { 1199 int i; 1200 1201 for (i = 0; i < p->num_post_deps; ++i) { 1202 if (p->post_deps[i].chain && p->post_deps[i].point) { 1203 drm_syncobj_add_point(p->post_deps[i].syncobj, 1204 p->post_deps[i].chain, 1205 p->fence, p->post_deps[i].point); 1206 p->post_deps[i].chain = NULL; 1207 } else { 1208 drm_syncobj_replace_fence(p->post_deps[i].syncobj, 1209 p->fence); 1210 } 1211 } 1212 } 1213 1214 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 1215 union drm_amdgpu_cs *cs) 1216 { 1217 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1218 struct drm_sched_entity *entity = p->entity; 1219 struct amdgpu_bo_list_entry *e; 1220 struct amdgpu_job *job; 1221 uint64_t seq; 1222 int r; 1223 1224 job = p->job; 1225 p->job = NULL; 1226 1227 r = drm_sched_job_init(&job->base, entity, &fpriv->vm); 1228 if (r) 1229 goto error_unlock; 1230 1231 drm_sched_job_arm(&job->base); 1232 1233 /* No memory allocation is allowed while holding the notifier lock. 1234 * The lock is held until amdgpu_cs_submit is finished and fence is 1235 * added to BOs. 1236 */ 1237 mutex_lock(&p->adev->notifier_lock); 1238 1239 /* If userptr are invalidated after amdgpu_cs_parser_bos(), return 1240 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. 1241 */ 1242 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1243 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 1244 1245 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1246 } 1247 if (r) { 1248 r = -EAGAIN; 1249 goto error_abort; 1250 } 1251 1252 p->fence = dma_fence_get(&job->base.s_fence->finished); 1253 1254 amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); 1255 amdgpu_cs_post_dependencies(p); 1256 1257 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && 1258 !p->ctx->preamble_presented) { 1259 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; 1260 p->ctx->preamble_presented = true; 1261 } 1262 1263 cs->out.handle = seq; 1264 job->uf_sequence = seq; 1265 1266 amdgpu_job_free_resources(job); 1267 1268 trace_amdgpu_cs_ioctl(job); 1269 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); 1270 drm_sched_entity_push_job(&job->base); 1271 1272 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); 1273 1274 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 1275 struct dma_resv *resv = e->tv.bo->base.resv; 1276 struct dma_fence_chain *chain = e->chain; 1277 1278 if (!chain) 1279 continue; 1280 1281 /* 1282 * Work around dma_resv shortcommings by wrapping up the 1283 * submission in a dma_fence_chain and add it as exclusive 1284 * fence. 1285 */ 1286 dma_fence_chain_init(chain, dma_resv_excl_fence(resv), 1287 dma_fence_get(p->fence), 1); 1288 1289 rcu_assign_pointer(resv->fence_excl, &chain->base); 1290 e->chain = NULL; 1291 } 1292 1293 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); 1294 mutex_unlock(&p->adev->notifier_lock); 1295 1296 return 0; 1297 1298 error_abort: 1299 drm_sched_job_cleanup(&job->base); 1300 mutex_unlock(&p->adev->notifier_lock); 1301 1302 error_unlock: 1303 amdgpu_job_free(job); 1304 return r; 1305 } 1306 1307 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) 1308 { 1309 int i; 1310 1311 if (!trace_amdgpu_cs_enabled()) 1312 return; 1313 1314 for (i = 0; i < parser->job->num_ibs; i++) 1315 trace_amdgpu_cs(parser, i); 1316 } 1317 1318 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 1319 { 1320 struct amdgpu_device *adev = drm_to_adev(dev); 1321 union drm_amdgpu_cs *cs = data; 1322 struct amdgpu_cs_parser parser = {}; 1323 bool reserved_buffers = false; 1324 int r; 1325 1326 if (amdgpu_ras_intr_triggered()) 1327 return -EHWPOISON; 1328 1329 if (!adev->accel_working) 1330 return -EBUSY; 1331 1332 parser.adev = adev; 1333 parser.filp = filp; 1334 1335 r = amdgpu_cs_parser_init(&parser, data); 1336 if (r) { 1337 if (printk_ratelimit()) 1338 DRM_ERROR("Failed to initialize parser %d!\n", r); 1339 goto out; 1340 } 1341 1342 r = amdgpu_cs_ib_fill(adev, &parser); 1343 if (r) 1344 goto out; 1345 1346 r = amdgpu_cs_dependencies(adev, &parser); 1347 if (r) { 1348 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 1349 goto out; 1350 } 1351 1352 r = amdgpu_cs_parser_bos(&parser, data); 1353 if (r) { 1354 if (r == -ENOMEM) 1355 DRM_ERROR("Not enough memory for command submission!\n"); 1356 else if (r != -ERESTARTSYS && r != -EAGAIN) 1357 DRM_ERROR("Failed to process the buffer list %d!\n", r); 1358 goto out; 1359 } 1360 1361 reserved_buffers = true; 1362 1363 trace_amdgpu_cs_ibs(&parser); 1364 1365 r = amdgpu_cs_vm_handling(&parser); 1366 if (r) 1367 goto out; 1368 1369 r = amdgpu_cs_submit(&parser, cs); 1370 out: 1371 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1372 1373 return r; 1374 } 1375 1376 /** 1377 * amdgpu_cs_wait_ioctl - wait for a command submission to finish 1378 * 1379 * @dev: drm device 1380 * @data: data from userspace 1381 * @filp: file private 1382 * 1383 * Wait for the command submission identified by handle to finish. 1384 */ 1385 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 1386 struct drm_file *filp) 1387 { 1388 union drm_amdgpu_wait_cs *wait = data; 1389 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1390 struct drm_sched_entity *entity; 1391 struct amdgpu_ctx *ctx; 1392 struct dma_fence *fence; 1393 long r; 1394 1395 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 1396 if (ctx == NULL) 1397 return -EINVAL; 1398 1399 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, 1400 wait->in.ring, &entity); 1401 if (r) { 1402 amdgpu_ctx_put(ctx); 1403 return r; 1404 } 1405 1406 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); 1407 if (IS_ERR(fence)) 1408 r = PTR_ERR(fence); 1409 else if (fence) { 1410 r = dma_fence_wait_timeout(fence, true, timeout); 1411 if (r > 0 && fence->error) 1412 r = fence->error; 1413 dma_fence_put(fence); 1414 } else 1415 r = 1; 1416 1417 amdgpu_ctx_put(ctx); 1418 if (r < 0) 1419 return r; 1420 1421 memset(wait, 0, sizeof(*wait)); 1422 wait->out.status = (r == 0); 1423 1424 return 0; 1425 } 1426 1427 /** 1428 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence 1429 * 1430 * @adev: amdgpu device 1431 * @filp: file private 1432 * @user: drm_amdgpu_fence copied from user space 1433 */ 1434 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, 1435 struct drm_file *filp, 1436 struct drm_amdgpu_fence *user) 1437 { 1438 struct drm_sched_entity *entity; 1439 struct amdgpu_ctx *ctx; 1440 struct dma_fence *fence; 1441 int r; 1442 1443 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); 1444 if (ctx == NULL) 1445 return ERR_PTR(-EINVAL); 1446 1447 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, 1448 user->ring, &entity); 1449 if (r) { 1450 amdgpu_ctx_put(ctx); 1451 return ERR_PTR(r); 1452 } 1453 1454 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); 1455 amdgpu_ctx_put(ctx); 1456 1457 return fence; 1458 } 1459 1460 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, 1461 struct drm_file *filp) 1462 { 1463 struct amdgpu_device *adev = drm_to_adev(dev); 1464 union drm_amdgpu_fence_to_handle *info = data; 1465 struct dma_fence *fence; 1466 struct drm_syncobj *syncobj; 1467 struct sync_file *sync_file; 1468 int fd, r; 1469 1470 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); 1471 if (IS_ERR(fence)) 1472 return PTR_ERR(fence); 1473 1474 if (!fence) 1475 fence = dma_fence_get_stub(); 1476 1477 switch (info->in.what) { 1478 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: 1479 r = drm_syncobj_create(&syncobj, 0, fence); 1480 dma_fence_put(fence); 1481 if (r) 1482 return r; 1483 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); 1484 drm_syncobj_put(syncobj); 1485 return r; 1486 1487 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: 1488 r = drm_syncobj_create(&syncobj, 0, fence); 1489 dma_fence_put(fence); 1490 if (r) 1491 return r; 1492 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); 1493 drm_syncobj_put(syncobj); 1494 return r; 1495 1496 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: 1497 fd = get_unused_fd_flags(O_CLOEXEC); 1498 if (fd < 0) { 1499 dma_fence_put(fence); 1500 return fd; 1501 } 1502 1503 sync_file = sync_file_create(fence); 1504 dma_fence_put(fence); 1505 if (!sync_file) { 1506 put_unused_fd(fd); 1507 return -ENOMEM; 1508 } 1509 1510 fd_install(fd, sync_file->file); 1511 info->out.handle = fd; 1512 return 0; 1513 1514 default: 1515 dma_fence_put(fence); 1516 return -EINVAL; 1517 } 1518 } 1519 1520 /** 1521 * amdgpu_cs_wait_all_fences - wait on all fences to signal 1522 * 1523 * @adev: amdgpu device 1524 * @filp: file private 1525 * @wait: wait parameters 1526 * @fences: array of drm_amdgpu_fence 1527 */ 1528 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, 1529 struct drm_file *filp, 1530 union drm_amdgpu_wait_fences *wait, 1531 struct drm_amdgpu_fence *fences) 1532 { 1533 uint32_t fence_count = wait->in.fence_count; 1534 unsigned int i; 1535 long r = 1; 1536 1537 for (i = 0; i < fence_count; i++) { 1538 struct dma_fence *fence; 1539 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1540 1541 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1542 if (IS_ERR(fence)) 1543 return PTR_ERR(fence); 1544 else if (!fence) 1545 continue; 1546 1547 r = dma_fence_wait_timeout(fence, true, timeout); 1548 dma_fence_put(fence); 1549 if (r < 0) 1550 return r; 1551 1552 if (r == 0) 1553 break; 1554 1555 if (fence->error) 1556 return fence->error; 1557 } 1558 1559 memset(wait, 0, sizeof(*wait)); 1560 wait->out.status = (r > 0); 1561 1562 return 0; 1563 } 1564 1565 /** 1566 * amdgpu_cs_wait_any_fence - wait on any fence to signal 1567 * 1568 * @adev: amdgpu device 1569 * @filp: file private 1570 * @wait: wait parameters 1571 * @fences: array of drm_amdgpu_fence 1572 */ 1573 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, 1574 struct drm_file *filp, 1575 union drm_amdgpu_wait_fences *wait, 1576 struct drm_amdgpu_fence *fences) 1577 { 1578 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1579 uint32_t fence_count = wait->in.fence_count; 1580 uint32_t first = ~0; 1581 struct dma_fence **array; 1582 unsigned int i; 1583 long r; 1584 1585 /* Prepare the fence array */ 1586 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL); 1587 1588 if (array == NULL) 1589 return -ENOMEM; 1590 1591 for (i = 0; i < fence_count; i++) { 1592 struct dma_fence *fence; 1593 1594 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1595 if (IS_ERR(fence)) { 1596 r = PTR_ERR(fence); 1597 goto err_free_fence_array; 1598 } else if (fence) { 1599 array[i] = fence; 1600 } else { /* NULL, the fence has been already signaled */ 1601 r = 1; 1602 first = i; 1603 goto out; 1604 } 1605 } 1606 1607 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, 1608 &first); 1609 if (r < 0) 1610 goto err_free_fence_array; 1611 1612 out: 1613 memset(wait, 0, sizeof(*wait)); 1614 wait->out.status = (r > 0); 1615 wait->out.first_signaled = first; 1616 1617 if (first < fence_count && array[first]) 1618 r = array[first]->error; 1619 else 1620 r = 0; 1621 1622 err_free_fence_array: 1623 for (i = 0; i < fence_count; i++) 1624 dma_fence_put(array[i]); 1625 kfree(array); 1626 1627 return r; 1628 } 1629 1630 /** 1631 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish 1632 * 1633 * @dev: drm device 1634 * @data: data from userspace 1635 * @filp: file private 1636 */ 1637 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, 1638 struct drm_file *filp) 1639 { 1640 struct amdgpu_device *adev = drm_to_adev(dev); 1641 union drm_amdgpu_wait_fences *wait = data; 1642 uint32_t fence_count = wait->in.fence_count; 1643 struct drm_amdgpu_fence *fences_user; 1644 struct drm_amdgpu_fence *fences; 1645 int r; 1646 1647 /* Get the fences from userspace */ 1648 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 1649 GFP_KERNEL); 1650 if (fences == NULL) 1651 return -ENOMEM; 1652 1653 fences_user = u64_to_user_ptr(wait->in.fences); 1654 if (copy_from_user(fences, fences_user, 1655 sizeof(struct drm_amdgpu_fence) * fence_count)) { 1656 r = -EFAULT; 1657 goto err_free_fences; 1658 } 1659 1660 if (wait->in.wait_all) 1661 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); 1662 else 1663 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); 1664 1665 err_free_fences: 1666 kfree(fences); 1667 1668 return r; 1669 } 1670 1671 /** 1672 * amdgpu_cs_find_mapping - find bo_va for VM address 1673 * 1674 * @parser: command submission parser context 1675 * @addr: VM address 1676 * @bo: resulting BO of the mapping found 1677 * @map: Placeholder to return found BO mapping 1678 * 1679 * Search the buffer objects in the command submission context for a certain 1680 * virtual memory address. Returns allocation structure when found, NULL 1681 * otherwise. 1682 */ 1683 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1684 uint64_t addr, struct amdgpu_bo **bo, 1685 struct amdgpu_bo_va_mapping **map) 1686 { 1687 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 1688 struct ttm_operation_ctx ctx = { false, false }; 1689 struct amdgpu_vm *vm = &fpriv->vm; 1690 struct amdgpu_bo_va_mapping *mapping; 1691 int r; 1692 1693 addr /= AMDGPU_GPU_PAGE_SIZE; 1694 1695 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); 1696 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) 1697 return -EINVAL; 1698 1699 *bo = mapping->bo_va->base.bo; 1700 *map = mapping; 1701 1702 /* Double check that the BO is reserved by this CS */ 1703 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) 1704 return -EINVAL; 1705 1706 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { 1707 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1708 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1709 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1710 if (r) 1711 return r; 1712 } 1713 1714 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 1715 } 1716