1 /* 2 * Copyright 2014-2018 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #define pr_fmt(fmt) "kfd2kgd: " fmt 24 25 #include <linux/list.h> 26 #include <linux/pagemap.h> 27 #include <linux/sched/mm.h> 28 #include <linux/dma-buf.h> 29 #include <drm/drmP.h> 30 #include "amdgpu_object.h" 31 #include "amdgpu_vm.h" 32 #include "amdgpu_amdkfd.h" 33 34 /* Special VM and GART address alignment needed for VI pre-Fiji due to 35 * a HW bug. 36 */ 37 #define VI_BO_SIZE_ALIGN (0x8000) 38 39 /* BO flag to indicate a KFD userptr BO */ 40 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) 41 42 /* Userptr restore delay, just long enough to allow consecutive VM 43 * changes to accumulate 44 */ 45 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 46 47 /* Impose limit on how much memory KFD can use */ 48 static struct { 49 uint64_t max_system_mem_limit; 50 uint64_t max_ttm_mem_limit; 51 int64_t system_mem_used; 52 int64_t ttm_mem_used; 53 spinlock_t mem_limit_lock; 54 } kfd_mem_limit; 55 56 /* Struct used for amdgpu_amdkfd_bo_validate */ 57 struct amdgpu_vm_parser { 58 uint32_t domain; 59 bool wait; 60 }; 61 62 static const char * const domain_bit_to_string[] = { 63 "CPU", 64 "GTT", 65 "VRAM", 66 "GDS", 67 "GWS", 68 "OA" 69 }; 70 71 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] 72 73 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); 74 75 76 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 77 { 78 return (struct amdgpu_device *)kgd; 79 } 80 81 static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, 82 struct kgd_mem *mem) 83 { 84 struct kfd_bo_va_list *entry; 85 86 list_for_each_entry(entry, &mem->bo_va_list, bo_list) 87 if (entry->bo_va->base.vm == avm) 88 return false; 89 90 return true; 91 } 92 93 /* Set memory usage limits. Current, limits are 94 * System (TTM + userptr) memory - 3/4th System RAM 95 * TTM memory - 3/8th System RAM 96 */ 97 void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 98 { 99 struct sysinfo si; 100 uint64_t mem; 101 102 si_meminfo(&si); 103 mem = si.totalram - si.totalhigh; 104 mem *= si.mem_unit; 105 106 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 107 kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); 108 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); 109 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", 110 (kfd_mem_limit.max_system_mem_limit >> 20), 111 (kfd_mem_limit.max_ttm_mem_limit >> 20)); 112 } 113 114 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 115 uint64_t size, u32 domain, bool sg) 116 { 117 size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; 118 uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; 119 int ret = 0; 120 121 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 122 sizeof(struct amdgpu_bo)); 123 124 vram_needed = 0; 125 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 126 /* TTM GTT memory */ 127 system_mem_needed = acc_size + size; 128 ttm_mem_needed = acc_size + size; 129 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 130 /* Userptr */ 131 system_mem_needed = acc_size + size; 132 ttm_mem_needed = acc_size; 133 } else { 134 /* VRAM and SG */ 135 system_mem_needed = acc_size; 136 ttm_mem_needed = acc_size; 137 if (domain == AMDGPU_GEM_DOMAIN_VRAM) 138 vram_needed = size; 139 } 140 141 spin_lock(&kfd_mem_limit.mem_limit_lock); 142 143 if ((kfd_mem_limit.system_mem_used + system_mem_needed > 144 kfd_mem_limit.max_system_mem_limit) || 145 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > 146 kfd_mem_limit.max_ttm_mem_limit) || 147 (adev->kfd.vram_used + vram_needed > 148 adev->gmc.real_vram_size - reserved_for_pt)) { 149 ret = -ENOMEM; 150 } else { 151 kfd_mem_limit.system_mem_used += system_mem_needed; 152 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 153 adev->kfd.vram_used += vram_needed; 154 } 155 156 spin_unlock(&kfd_mem_limit.mem_limit_lock); 157 return ret; 158 } 159 160 static void unreserve_mem_limit(struct amdgpu_device *adev, 161 uint64_t size, u32 domain, bool sg) 162 { 163 size_t acc_size; 164 165 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 166 sizeof(struct amdgpu_bo)); 167 168 spin_lock(&kfd_mem_limit.mem_limit_lock); 169 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 170 kfd_mem_limit.system_mem_used -= (acc_size + size); 171 kfd_mem_limit.ttm_mem_used -= (acc_size + size); 172 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 173 kfd_mem_limit.system_mem_used -= (acc_size + size); 174 kfd_mem_limit.ttm_mem_used -= acc_size; 175 } else { 176 kfd_mem_limit.system_mem_used -= acc_size; 177 kfd_mem_limit.ttm_mem_used -= acc_size; 178 if (domain == AMDGPU_GEM_DOMAIN_VRAM) { 179 adev->kfd.vram_used -= size; 180 WARN_ONCE(adev->kfd.vram_used < 0, 181 "kfd VRAM memory accounting unbalanced"); 182 } 183 } 184 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 185 "kfd system memory accounting unbalanced"); 186 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, 187 "kfd TTM memory accounting unbalanced"); 188 189 spin_unlock(&kfd_mem_limit.mem_limit_lock); 190 } 191 192 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) 193 { 194 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 195 u32 domain = bo->preferred_domains; 196 bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); 197 198 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { 199 domain = AMDGPU_GEM_DOMAIN_CPU; 200 sg = false; 201 } 202 203 unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); 204 } 205 206 207 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's 208 * reservation object. 209 * 210 * @bo: [IN] Remove eviction fence(s) from this BO 211 * @ef: [IN] If ef is specified, then this eviction fence is removed if it 212 * is present in the shared list. 213 * @ef_list: [OUT] Returns list of eviction fences. These fences are removed 214 * from BO's reservation object shared list. 215 * @ef_count: [OUT] Number of fences in ef_list. 216 * 217 * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be 218 * called to restore the eviction fences and to avoid memory leak. This is 219 * useful for shared BOs. 220 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. 221 */ 222 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, 223 struct amdgpu_amdkfd_fence *ef, 224 struct amdgpu_amdkfd_fence ***ef_list, 225 unsigned int *ef_count) 226 { 227 struct reservation_object *resv = bo->tbo.resv; 228 struct reservation_object_list *old, *new; 229 unsigned int i, j, k; 230 231 if (!ef && !ef_list) 232 return -EINVAL; 233 234 if (ef_list) { 235 *ef_list = NULL; 236 *ef_count = 0; 237 } 238 239 old = reservation_object_get_list(resv); 240 if (!old) 241 return 0; 242 243 new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]), 244 GFP_KERNEL); 245 if (!new) 246 return -ENOMEM; 247 248 /* Go through all the shared fences in the resevation object and sort 249 * the interesting ones to the end of the list. 250 */ 251 for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { 252 struct dma_fence *f; 253 254 f = rcu_dereference_protected(old->shared[i], 255 reservation_object_held(resv)); 256 257 if ((ef && f->context == ef->base.context) || 258 (!ef && to_amdgpu_amdkfd_fence(f))) 259 RCU_INIT_POINTER(new->shared[--j], f); 260 else 261 RCU_INIT_POINTER(new->shared[k++], f); 262 } 263 new->shared_max = old->shared_max; 264 new->shared_count = k; 265 266 if (!ef) { 267 unsigned int count = old->shared_count - j; 268 269 /* Alloc memory for count number of eviction fence pointers. 270 * Fill the ef_list array and ef_count 271 */ 272 *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL); 273 *ef_count = count; 274 275 if (!*ef_list) { 276 kfree(new); 277 return -ENOMEM; 278 } 279 } 280 281 /* Install the new fence list, seqcount provides the barriers */ 282 preempt_disable(); 283 write_seqcount_begin(&resv->seq); 284 RCU_INIT_POINTER(resv->fence, new); 285 write_seqcount_end(&resv->seq); 286 preempt_enable(); 287 288 /* Drop the references to the removed fences or move them to ef_list */ 289 for (i = j, k = 0; i < old->shared_count; ++i) { 290 struct dma_fence *f; 291 292 f = rcu_dereference_protected(new->shared[i], 293 reservation_object_held(resv)); 294 if (!ef) 295 (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f); 296 else 297 dma_fence_put(f); 298 } 299 kfree_rcu(old, rcu); 300 301 return 0; 302 } 303 304 /* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's 305 * reservation object. 306 * 307 * @bo: [IN] Add eviction fences to this BO 308 * @ef_list: [IN] List of eviction fences to be added 309 * @ef_count: [IN] Number of fences in ef_list. 310 * 311 * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this 312 * function. 313 */ 314 static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, 315 struct amdgpu_amdkfd_fence **ef_list, 316 unsigned int ef_count) 317 { 318 int i; 319 320 if (!ef_list || !ef_count) 321 return; 322 323 for (i = 0; i < ef_count; i++) { 324 amdgpu_bo_fence(bo, &ef_list[i]->base, true); 325 /* Re-adding the fence takes an additional reference. Drop that 326 * reference. 327 */ 328 dma_fence_put(&ef_list[i]->base); 329 } 330 331 kfree(ef_list); 332 } 333 334 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, 335 bool wait) 336 { 337 struct ttm_operation_ctx ctx = { false, false }; 338 int ret; 339 340 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), 341 "Called with userptr BO")) 342 return -EINVAL; 343 344 amdgpu_bo_placement_from_domain(bo, domain); 345 346 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 347 if (ret) 348 goto validate_fail; 349 if (wait) { 350 struct amdgpu_amdkfd_fence **ef_list; 351 unsigned int ef_count; 352 353 ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, 354 &ef_count); 355 if (ret) 356 goto validate_fail; 357 358 ttm_bo_wait(&bo->tbo, false, false); 359 amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); 360 } 361 362 validate_fail: 363 return ret; 364 } 365 366 static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) 367 { 368 struct amdgpu_vm_parser *p = param; 369 370 return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); 371 } 372 373 /* vm_validate_pt_pd_bos - Validate page table and directory BOs 374 * 375 * Page directories are not updated here because huge page handling 376 * during page table updates can invalidate page directory entries 377 * again. Page directories are only updated after updating page 378 * tables. 379 */ 380 static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) 381 { 382 struct amdgpu_bo *pd = vm->root.base.bo; 383 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 384 struct amdgpu_vm_parser param; 385 int ret; 386 387 param.domain = AMDGPU_GEM_DOMAIN_VRAM; 388 param.wait = false; 389 390 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, 391 ¶m); 392 if (ret) { 393 pr_err("amdgpu: failed to validate PT BOs\n"); 394 return ret; 395 } 396 397 ret = amdgpu_amdkfd_validate(¶m, pd); 398 if (ret) { 399 pr_err("amdgpu: failed to validate PD\n"); 400 return ret; 401 } 402 403 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); 404 405 if (vm->use_cpu_for_update) { 406 ret = amdgpu_bo_kmap(pd, NULL); 407 if (ret) { 408 pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); 409 return ret; 410 } 411 } 412 413 return 0; 414 } 415 416 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) 417 { 418 struct amdgpu_bo *pd = vm->root.base.bo; 419 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 420 int ret; 421 422 ret = amdgpu_vm_update_directories(adev, vm); 423 if (ret) 424 return ret; 425 426 return amdgpu_sync_fence(NULL, sync, vm->last_update, false); 427 } 428 429 /* add_bo_to_vm - Add a BO to a VM 430 * 431 * Everything that needs to bo done only once when a BO is first added 432 * to a VM. It can later be mapped and unmapped many times without 433 * repeating these steps. 434 * 435 * 1. Allocate and initialize BO VA entry data structure 436 * 2. Add BO to the VM 437 * 3. Determine ASIC-specific PTE flags 438 * 4. Alloc page tables and directories if needed 439 * 4a. Validate new page tables and directories 440 */ 441 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, 442 struct amdgpu_vm *vm, bool is_aql, 443 struct kfd_bo_va_list **p_bo_va_entry) 444 { 445 int ret; 446 struct kfd_bo_va_list *bo_va_entry; 447 struct amdgpu_bo *pd = vm->root.base.bo; 448 struct amdgpu_bo *bo = mem->bo; 449 uint64_t va = mem->va; 450 struct list_head *list_bo_va = &mem->bo_va_list; 451 unsigned long bo_size = bo->tbo.mem.size; 452 453 if (!va) { 454 pr_err("Invalid VA when adding BO to VM\n"); 455 return -EINVAL; 456 } 457 458 if (is_aql) 459 va += bo_size; 460 461 bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); 462 if (!bo_va_entry) 463 return -ENOMEM; 464 465 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, 466 va + bo_size, vm); 467 468 /* Add BO to VM internal data structures*/ 469 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); 470 if (!bo_va_entry->bo_va) { 471 ret = -EINVAL; 472 pr_err("Failed to add BO object to VM. ret == %d\n", 473 ret); 474 goto err_vmadd; 475 } 476 477 bo_va_entry->va = va; 478 bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, 479 mem->mapping_flags); 480 bo_va_entry->kgd_dev = (void *)adev; 481 list_add(&bo_va_entry->bo_list, list_bo_va); 482 483 if (p_bo_va_entry) 484 *p_bo_va_entry = bo_va_entry; 485 486 /* Allocate new page tables if needed and validate 487 * them. Clearing of new page tables and validate need to wait 488 * on move fences. We don't want that to trigger the eviction 489 * fence, so remove it temporarily. 490 */ 491 amdgpu_amdkfd_remove_eviction_fence(pd, 492 vm->process_info->eviction_fence, 493 NULL, NULL); 494 495 ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); 496 if (ret) { 497 pr_err("Failed to allocate pts, err=%d\n", ret); 498 goto err_alloc_pts; 499 } 500 501 ret = vm_validate_pt_pd_bos(vm); 502 if (ret) { 503 pr_err("validate_pt_pd_bos() failed\n"); 504 goto err_alloc_pts; 505 } 506 507 /* Add the eviction fence back */ 508 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); 509 510 return 0; 511 512 err_alloc_pts: 513 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); 514 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); 515 list_del(&bo_va_entry->bo_list); 516 err_vmadd: 517 kfree(bo_va_entry); 518 return ret; 519 } 520 521 static void remove_bo_from_vm(struct amdgpu_device *adev, 522 struct kfd_bo_va_list *entry, unsigned long size) 523 { 524 pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", 525 entry->va, 526 entry->va + size, entry); 527 amdgpu_vm_bo_rmv(adev, entry->bo_va); 528 list_del(&entry->bo_list); 529 kfree(entry); 530 } 531 532 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, 533 struct amdkfd_process_info *process_info, 534 bool userptr) 535 { 536 struct ttm_validate_buffer *entry = &mem->validate_list; 537 struct amdgpu_bo *bo = mem->bo; 538 539 INIT_LIST_HEAD(&entry->head); 540 entry->num_shared = 1; 541 entry->bo = &bo->tbo; 542 mutex_lock(&process_info->lock); 543 if (userptr) 544 list_add_tail(&entry->head, &process_info->userptr_valid_list); 545 else 546 list_add_tail(&entry->head, &process_info->kfd_bo_list); 547 mutex_unlock(&process_info->lock); 548 } 549 550 /* Initializes user pages. It registers the MMU notifier and validates 551 * the userptr BO in the GTT domain. 552 * 553 * The BO must already be on the userptr_valid_list. Otherwise an 554 * eviction and restore may happen that leaves the new BO unmapped 555 * with the user mode queues running. 556 * 557 * Takes the process_info->lock to protect against concurrent restore 558 * workers. 559 * 560 * Returns 0 for success, negative errno for errors. 561 */ 562 static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, 563 uint64_t user_addr) 564 { 565 struct amdkfd_process_info *process_info = mem->process_info; 566 struct amdgpu_bo *bo = mem->bo; 567 struct ttm_operation_ctx ctx = { true, false }; 568 int ret = 0; 569 570 mutex_lock(&process_info->lock); 571 572 ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); 573 if (ret) { 574 pr_err("%s: Failed to set userptr: %d\n", __func__, ret); 575 goto out; 576 } 577 578 ret = amdgpu_mn_register(bo, user_addr); 579 if (ret) { 580 pr_err("%s: Failed to register MMU notifier: %d\n", 581 __func__, ret); 582 goto out; 583 } 584 585 /* If no restore worker is running concurrently, user_pages 586 * should not be allocated 587 */ 588 WARN(mem->user_pages, "Leaking user_pages array"); 589 590 mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 591 sizeof(struct page *), 592 GFP_KERNEL | __GFP_ZERO); 593 if (!mem->user_pages) { 594 pr_err("%s: Failed to allocate pages array\n", __func__); 595 ret = -ENOMEM; 596 goto unregister_out; 597 } 598 599 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); 600 if (ret) { 601 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 602 goto free_out; 603 } 604 605 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); 606 607 ret = amdgpu_bo_reserve(bo, true); 608 if (ret) { 609 pr_err("%s: Failed to reserve BO\n", __func__); 610 goto release_out; 611 } 612 amdgpu_bo_placement_from_domain(bo, mem->domain); 613 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 614 if (ret) 615 pr_err("%s: failed to validate BO\n", __func__); 616 amdgpu_bo_unreserve(bo); 617 618 release_out: 619 if (ret) 620 release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 621 free_out: 622 kvfree(mem->user_pages); 623 mem->user_pages = NULL; 624 unregister_out: 625 if (ret) 626 amdgpu_mn_unregister(bo); 627 out: 628 mutex_unlock(&process_info->lock); 629 return ret; 630 } 631 632 /* Reserving a BO and its page table BOs must happen atomically to 633 * avoid deadlocks. Some operations update multiple VMs at once. Track 634 * all the reservation info in a context structure. Optionally a sync 635 * object can track VM updates. 636 */ 637 struct bo_vm_reservation_context { 638 struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ 639 unsigned int n_vms; /* Number of VMs reserved */ 640 struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ 641 struct ww_acquire_ctx ticket; /* Reservation ticket */ 642 struct list_head list, duplicates; /* BO lists */ 643 struct amdgpu_sync *sync; /* Pointer to sync object */ 644 bool reserved; /* Whether BOs are reserved */ 645 }; 646 647 enum bo_vm_match { 648 BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ 649 BO_VM_MAPPED, /* Match VMs where a BO is mapped */ 650 BO_VM_ALL, /* Match all VMs a BO was added to */ 651 }; 652 653 /** 654 * reserve_bo_and_vm - reserve a BO and a VM unconditionally. 655 * @mem: KFD BO structure. 656 * @vm: the VM to reserve. 657 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 658 */ 659 static int reserve_bo_and_vm(struct kgd_mem *mem, 660 struct amdgpu_vm *vm, 661 struct bo_vm_reservation_context *ctx) 662 { 663 struct amdgpu_bo *bo = mem->bo; 664 int ret; 665 666 WARN_ON(!vm); 667 668 ctx->reserved = false; 669 ctx->n_vms = 1; 670 ctx->sync = &mem->sync; 671 672 INIT_LIST_HEAD(&ctx->list); 673 INIT_LIST_HEAD(&ctx->duplicates); 674 675 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); 676 if (!ctx->vm_pd) 677 return -ENOMEM; 678 679 ctx->kfd_bo.priority = 0; 680 ctx->kfd_bo.tv.bo = &bo->tbo; 681 ctx->kfd_bo.tv.num_shared = 1; 682 ctx->kfd_bo.user_pages = NULL; 683 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 684 685 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); 686 687 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 688 false, &ctx->duplicates); 689 if (!ret) 690 ctx->reserved = true; 691 else { 692 pr_err("Failed to reserve buffers in ttm\n"); 693 kfree(ctx->vm_pd); 694 ctx->vm_pd = NULL; 695 } 696 697 return ret; 698 } 699 700 /** 701 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally 702 * @mem: KFD BO structure. 703 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO 704 * is used. Otherwise, a single VM associated with the BO. 705 * @map_type: the mapping status that will be used to filter the VMs. 706 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 707 * 708 * Returns 0 for success, negative for failure. 709 */ 710 static int reserve_bo_and_cond_vms(struct kgd_mem *mem, 711 struct amdgpu_vm *vm, enum bo_vm_match map_type, 712 struct bo_vm_reservation_context *ctx) 713 { 714 struct amdgpu_bo *bo = mem->bo; 715 struct kfd_bo_va_list *entry; 716 unsigned int i; 717 int ret; 718 719 ctx->reserved = false; 720 ctx->n_vms = 0; 721 ctx->vm_pd = NULL; 722 ctx->sync = &mem->sync; 723 724 INIT_LIST_HEAD(&ctx->list); 725 INIT_LIST_HEAD(&ctx->duplicates); 726 727 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 728 if ((vm && vm != entry->bo_va->base.vm) || 729 (entry->is_mapped != map_type 730 && map_type != BO_VM_ALL)) 731 continue; 732 733 ctx->n_vms++; 734 } 735 736 if (ctx->n_vms != 0) { 737 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), 738 GFP_KERNEL); 739 if (!ctx->vm_pd) 740 return -ENOMEM; 741 } 742 743 ctx->kfd_bo.priority = 0; 744 ctx->kfd_bo.tv.bo = &bo->tbo; 745 ctx->kfd_bo.tv.num_shared = 1; 746 ctx->kfd_bo.user_pages = NULL; 747 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 748 749 i = 0; 750 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 751 if ((vm && vm != entry->bo_va->base.vm) || 752 (entry->is_mapped != map_type 753 && map_type != BO_VM_ALL)) 754 continue; 755 756 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, 757 &ctx->vm_pd[i]); 758 i++; 759 } 760 761 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 762 false, &ctx->duplicates); 763 if (!ret) 764 ctx->reserved = true; 765 else 766 pr_err("Failed to reserve buffers in ttm.\n"); 767 768 if (ret) { 769 kfree(ctx->vm_pd); 770 ctx->vm_pd = NULL; 771 } 772 773 return ret; 774 } 775 776 /** 777 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context 778 * @ctx: Reservation context to unreserve 779 * @wait: Optionally wait for a sync object representing pending VM updates 780 * @intr: Whether the wait is interruptible 781 * 782 * Also frees any resources allocated in 783 * reserve_bo_and_(cond_)vm(s). Returns the status from 784 * amdgpu_sync_wait. 785 */ 786 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, 787 bool wait, bool intr) 788 { 789 int ret = 0; 790 791 if (wait) 792 ret = amdgpu_sync_wait(ctx->sync, intr); 793 794 if (ctx->reserved) 795 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); 796 kfree(ctx->vm_pd); 797 798 ctx->sync = NULL; 799 800 ctx->reserved = false; 801 ctx->vm_pd = NULL; 802 803 return ret; 804 } 805 806 static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, 807 struct kfd_bo_va_list *entry, 808 struct amdgpu_sync *sync) 809 { 810 struct amdgpu_bo_va *bo_va = entry->bo_va; 811 struct amdgpu_vm *vm = bo_va->base.vm; 812 struct amdgpu_bo *pd = vm->root.base.bo; 813 814 /* Remove eviction fence from PD (and thereby from PTs too as 815 * they share the resv. object). Otherwise during PT update 816 * job (see amdgpu_vm_bo_update_mapping), eviction fence would 817 * get added to job->sync object and job execution would 818 * trigger the eviction fence. 819 */ 820 amdgpu_amdkfd_remove_eviction_fence(pd, 821 vm->process_info->eviction_fence, 822 NULL, NULL); 823 amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 824 825 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 826 827 /* Add the eviction fence back */ 828 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); 829 830 amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 831 832 return 0; 833 } 834 835 static int update_gpuvm_pte(struct amdgpu_device *adev, 836 struct kfd_bo_va_list *entry, 837 struct amdgpu_sync *sync) 838 { 839 int ret; 840 struct amdgpu_vm *vm; 841 struct amdgpu_bo_va *bo_va; 842 struct amdgpu_bo *bo; 843 844 bo_va = entry->bo_va; 845 vm = bo_va->base.vm; 846 bo = bo_va->base.bo; 847 848 /* Update the page tables */ 849 ret = amdgpu_vm_bo_update(adev, bo_va, false); 850 if (ret) { 851 pr_err("amdgpu_vm_bo_update failed\n"); 852 return ret; 853 } 854 855 return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 856 } 857 858 static int map_bo_to_gpuvm(struct amdgpu_device *adev, 859 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, 860 bool no_update_pte) 861 { 862 int ret; 863 864 /* Set virtual address for the allocation */ 865 ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, 866 amdgpu_bo_size(entry->bo_va->base.bo), 867 entry->pte_flags); 868 if (ret) { 869 pr_err("Failed to map VA 0x%llx in vm. ret %d\n", 870 entry->va, ret); 871 return ret; 872 } 873 874 if (no_update_pte) 875 return 0; 876 877 ret = update_gpuvm_pte(adev, entry, sync); 878 if (ret) { 879 pr_err("update_gpuvm_pte() failed\n"); 880 goto update_gpuvm_pte_failed; 881 } 882 883 return 0; 884 885 update_gpuvm_pte_failed: 886 unmap_bo_from_gpuvm(adev, entry, sync); 887 return ret; 888 } 889 890 static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) 891 { 892 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); 893 894 if (!sg) 895 return NULL; 896 if (sg_alloc_table(sg, 1, GFP_KERNEL)) { 897 kfree(sg); 898 return NULL; 899 } 900 sg->sgl->dma_address = addr; 901 sg->sgl->length = size; 902 #ifdef CONFIG_NEED_SG_DMA_LENGTH 903 sg->sgl->dma_length = size; 904 #endif 905 return sg; 906 } 907 908 static int process_validate_vms(struct amdkfd_process_info *process_info) 909 { 910 struct amdgpu_vm *peer_vm; 911 int ret; 912 913 list_for_each_entry(peer_vm, &process_info->vm_list_head, 914 vm_list_node) { 915 ret = vm_validate_pt_pd_bos(peer_vm); 916 if (ret) 917 return ret; 918 } 919 920 return 0; 921 } 922 923 static int process_sync_pds_resv(struct amdkfd_process_info *process_info, 924 struct amdgpu_sync *sync) 925 { 926 struct amdgpu_vm *peer_vm; 927 int ret; 928 929 list_for_each_entry(peer_vm, &process_info->vm_list_head, 930 vm_list_node) { 931 struct amdgpu_bo *pd = peer_vm->root.base.bo; 932 933 ret = amdgpu_sync_resv(NULL, 934 sync, pd->tbo.resv, 935 AMDGPU_FENCE_OWNER_UNDEFINED, false); 936 if (ret) 937 return ret; 938 } 939 940 return 0; 941 } 942 943 static int process_update_pds(struct amdkfd_process_info *process_info, 944 struct amdgpu_sync *sync) 945 { 946 struct amdgpu_vm *peer_vm; 947 int ret; 948 949 list_for_each_entry(peer_vm, &process_info->vm_list_head, 950 vm_list_node) { 951 ret = vm_update_pds(peer_vm, sync); 952 if (ret) 953 return ret; 954 } 955 956 return 0; 957 } 958 959 static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, 960 struct dma_fence **ef) 961 { 962 struct amdkfd_process_info *info = NULL; 963 int ret; 964 965 if (!*process_info) { 966 info = kzalloc(sizeof(*info), GFP_KERNEL); 967 if (!info) 968 return -ENOMEM; 969 970 mutex_init(&info->lock); 971 INIT_LIST_HEAD(&info->vm_list_head); 972 INIT_LIST_HEAD(&info->kfd_bo_list); 973 INIT_LIST_HEAD(&info->userptr_valid_list); 974 INIT_LIST_HEAD(&info->userptr_inval_list); 975 976 info->eviction_fence = 977 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 978 current->mm); 979 if (!info->eviction_fence) { 980 pr_err("Failed to create eviction fence\n"); 981 ret = -ENOMEM; 982 goto create_evict_fence_fail; 983 } 984 985 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); 986 atomic_set(&info->evicted_bos, 0); 987 INIT_DELAYED_WORK(&info->restore_userptr_work, 988 amdgpu_amdkfd_restore_userptr_worker); 989 990 *process_info = info; 991 *ef = dma_fence_get(&info->eviction_fence->base); 992 } 993 994 vm->process_info = *process_info; 995 996 /* Validate page directory and attach eviction fence */ 997 ret = amdgpu_bo_reserve(vm->root.base.bo, true); 998 if (ret) 999 goto reserve_pd_fail; 1000 ret = vm_validate_pt_pd_bos(vm); 1001 if (ret) { 1002 pr_err("validate_pt_pd_bos() failed\n"); 1003 goto validate_pd_fail; 1004 } 1005 ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false); 1006 if (ret) 1007 goto wait_pd_fail; 1008 amdgpu_bo_fence(vm->root.base.bo, 1009 &vm->process_info->eviction_fence->base, true); 1010 amdgpu_bo_unreserve(vm->root.base.bo); 1011 1012 /* Update process info */ 1013 mutex_lock(&vm->process_info->lock); 1014 list_add_tail(&vm->vm_list_node, 1015 &(vm->process_info->vm_list_head)); 1016 vm->process_info->n_vms++; 1017 mutex_unlock(&vm->process_info->lock); 1018 1019 return 0; 1020 1021 wait_pd_fail: 1022 validate_pd_fail: 1023 amdgpu_bo_unreserve(vm->root.base.bo); 1024 reserve_pd_fail: 1025 vm->process_info = NULL; 1026 if (info) { 1027 /* Two fence references: one in info and one in *ef */ 1028 dma_fence_put(&info->eviction_fence->base); 1029 dma_fence_put(*ef); 1030 *ef = NULL; 1031 *process_info = NULL; 1032 put_pid(info->pid); 1033 create_evict_fence_fail: 1034 mutex_destroy(&info->lock); 1035 kfree(info); 1036 } 1037 return ret; 1038 } 1039 1040 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid, 1041 void **vm, void **process_info, 1042 struct dma_fence **ef) 1043 { 1044 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1045 struct amdgpu_vm *new_vm; 1046 int ret; 1047 1048 new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); 1049 if (!new_vm) 1050 return -ENOMEM; 1051 1052 /* Initialize AMDGPU part of the VM */ 1053 ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); 1054 if (ret) { 1055 pr_err("Failed init vm ret %d\n", ret); 1056 goto amdgpu_vm_init_fail; 1057 } 1058 1059 /* Initialize KFD part of the VM and process info */ 1060 ret = init_kfd_vm(new_vm, process_info, ef); 1061 if (ret) 1062 goto init_kfd_vm_fail; 1063 1064 *vm = (void *) new_vm; 1065 1066 return 0; 1067 1068 init_kfd_vm_fail: 1069 amdgpu_vm_fini(adev, new_vm); 1070 amdgpu_vm_init_fail: 1071 kfree(new_vm); 1072 return ret; 1073 } 1074 1075 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, 1076 struct file *filp, unsigned int pasid, 1077 void **vm, void **process_info, 1078 struct dma_fence **ef) 1079 { 1080 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1081 struct drm_file *drm_priv = filp->private_data; 1082 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; 1083 struct amdgpu_vm *avm = &drv_priv->vm; 1084 int ret; 1085 1086 /* Already a compute VM? */ 1087 if (avm->process_info) 1088 return -EINVAL; 1089 1090 /* Convert VM into a compute VM */ 1091 ret = amdgpu_vm_make_compute(adev, avm, pasid); 1092 if (ret) 1093 return ret; 1094 1095 /* Initialize KFD part of the VM and process info */ 1096 ret = init_kfd_vm(avm, process_info, ef); 1097 if (ret) 1098 return ret; 1099 1100 *vm = (void *)avm; 1101 1102 return 0; 1103 } 1104 1105 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 1106 struct amdgpu_vm *vm) 1107 { 1108 struct amdkfd_process_info *process_info = vm->process_info; 1109 struct amdgpu_bo *pd = vm->root.base.bo; 1110 1111 if (!process_info) 1112 return; 1113 1114 /* Release eviction fence from PD */ 1115 amdgpu_bo_reserve(pd, false); 1116 amdgpu_bo_fence(pd, NULL, false); 1117 amdgpu_bo_unreserve(pd); 1118 1119 /* Update process info */ 1120 mutex_lock(&process_info->lock); 1121 process_info->n_vms--; 1122 list_del(&vm->vm_list_node); 1123 mutex_unlock(&process_info->lock); 1124 1125 /* Release per-process resources when last compute VM is destroyed */ 1126 if (!process_info->n_vms) { 1127 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 1128 WARN_ON(!list_empty(&process_info->userptr_valid_list)); 1129 WARN_ON(!list_empty(&process_info->userptr_inval_list)); 1130 1131 dma_fence_put(&process_info->eviction_fence->base); 1132 cancel_delayed_work_sync(&process_info->restore_userptr_work); 1133 put_pid(process_info->pid); 1134 mutex_destroy(&process_info->lock); 1135 kfree(process_info); 1136 } 1137 } 1138 1139 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) 1140 { 1141 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1142 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1143 1144 if (WARN_ON(!kgd || !vm)) 1145 return; 1146 1147 pr_debug("Destroying process vm %p\n", vm); 1148 1149 /* Release the VM context */ 1150 amdgpu_vm_fini(adev, avm); 1151 kfree(vm); 1152 } 1153 1154 void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) 1155 { 1156 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1157 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1158 1159 if (WARN_ON(!kgd || !vm)) 1160 return; 1161 1162 pr_debug("Releasing process vm %p\n", vm); 1163 1164 /* The original pasid of amdgpu vm has already been 1165 * released during making a amdgpu vm to a compute vm 1166 * The current pasid is managed by kfd and will be 1167 * released on kfd process destroy. Set amdgpu pasid 1168 * to 0 to avoid duplicate release. 1169 */ 1170 amdgpu_vm_release_compute(adev, avm); 1171 } 1172 1173 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) 1174 { 1175 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1176 struct amdgpu_bo *pd = avm->root.base.bo; 1177 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 1178 1179 if (adev->asic_type < CHIP_VEGA10) 1180 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; 1181 return avm->pd_phys_addr; 1182 } 1183 1184 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1185 struct kgd_dev *kgd, uint64_t va, uint64_t size, 1186 void *vm, struct kgd_mem **mem, 1187 uint64_t *offset, uint32_t flags) 1188 { 1189 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1190 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1191 enum ttm_bo_type bo_type = ttm_bo_type_device; 1192 struct sg_table *sg = NULL; 1193 uint64_t user_addr = 0; 1194 struct amdgpu_bo *bo; 1195 struct amdgpu_bo_param bp; 1196 int byte_align; 1197 u32 domain, alloc_domain; 1198 u64 alloc_flags; 1199 uint32_t mapping_flags; 1200 int ret; 1201 1202 /* 1203 * Check on which domain to allocate BO 1204 */ 1205 if (flags & ALLOC_MEM_FLAGS_VRAM) { 1206 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; 1207 alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; 1208 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? 1209 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 1210 AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 1211 } else if (flags & ALLOC_MEM_FLAGS_GTT) { 1212 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; 1213 alloc_flags = 0; 1214 } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { 1215 domain = AMDGPU_GEM_DOMAIN_GTT; 1216 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1217 alloc_flags = 0; 1218 if (!offset || !*offset) 1219 return -EINVAL; 1220 user_addr = *offset; 1221 } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { 1222 domain = AMDGPU_GEM_DOMAIN_GTT; 1223 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1224 bo_type = ttm_bo_type_sg; 1225 alloc_flags = 0; 1226 if (size > UINT_MAX) 1227 return -EINVAL; 1228 sg = create_doorbell_sg(*offset, size); 1229 if (!sg) 1230 return -ENOMEM; 1231 } else { 1232 return -EINVAL; 1233 } 1234 1235 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1236 if (!*mem) { 1237 ret = -ENOMEM; 1238 goto err; 1239 } 1240 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1241 mutex_init(&(*mem)->lock); 1242 (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); 1243 1244 /* Workaround for AQL queue wraparound bug. Map the same 1245 * memory twice. That means we only actually allocate half 1246 * the memory. 1247 */ 1248 if ((*mem)->aql_queue) 1249 size = size >> 1; 1250 1251 /* Workaround for TLB bug on older VI chips */ 1252 byte_align = (adev->family == AMDGPU_FAMILY_VI && 1253 adev->asic_type != CHIP_FIJI && 1254 adev->asic_type != CHIP_POLARIS10 && 1255 adev->asic_type != CHIP_POLARIS11 && 1256 adev->asic_type != CHIP_POLARIS12) ? 1257 VI_BO_SIZE_ALIGN : 1; 1258 1259 mapping_flags = AMDGPU_VM_PAGE_READABLE; 1260 if (flags & ALLOC_MEM_FLAGS_WRITABLE) 1261 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; 1262 if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) 1263 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; 1264 if (flags & ALLOC_MEM_FLAGS_COHERENT) 1265 mapping_flags |= AMDGPU_VM_MTYPE_UC; 1266 else 1267 mapping_flags |= AMDGPU_VM_MTYPE_NC; 1268 (*mem)->mapping_flags = mapping_flags; 1269 1270 amdgpu_sync_create(&(*mem)->sync); 1271 1272 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); 1273 if (ret) { 1274 pr_debug("Insufficient system memory\n"); 1275 goto err_reserve_limit; 1276 } 1277 1278 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1279 va, size, domain_string(alloc_domain)); 1280 1281 memset(&bp, 0, sizeof(bp)); 1282 bp.size = size; 1283 bp.byte_align = byte_align; 1284 bp.domain = alloc_domain; 1285 bp.flags = alloc_flags; 1286 bp.type = bo_type; 1287 bp.resv = NULL; 1288 ret = amdgpu_bo_create(adev, &bp, &bo); 1289 if (ret) { 1290 pr_debug("Failed to create BO on domain %s. ret %d\n", 1291 domain_string(alloc_domain), ret); 1292 goto err_bo_create; 1293 } 1294 if (bo_type == ttm_bo_type_sg) { 1295 bo->tbo.sg = sg; 1296 bo->tbo.ttm->sg = sg; 1297 } 1298 bo->kfd_bo = *mem; 1299 (*mem)->bo = bo; 1300 if (user_addr) 1301 bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; 1302 1303 (*mem)->va = va; 1304 (*mem)->domain = domain; 1305 (*mem)->mapped_to_gpu_memory = 0; 1306 (*mem)->process_info = avm->process_info; 1307 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); 1308 1309 if (user_addr) { 1310 ret = init_user_pages(*mem, current->mm, user_addr); 1311 if (ret) { 1312 mutex_lock(&avm->process_info->lock); 1313 list_del(&(*mem)->validate_list.head); 1314 mutex_unlock(&avm->process_info->lock); 1315 goto allocate_init_user_pages_failed; 1316 } 1317 } 1318 1319 if (offset) 1320 *offset = amdgpu_bo_mmap_offset(bo); 1321 1322 return 0; 1323 1324 allocate_init_user_pages_failed: 1325 amdgpu_bo_unref(&bo); 1326 /* Don't unreserve system mem limit twice */ 1327 goto err_reserve_limit; 1328 err_bo_create: 1329 unreserve_mem_limit(adev, size, alloc_domain, !!sg); 1330 err_reserve_limit: 1331 mutex_destroy(&(*mem)->lock); 1332 kfree(*mem); 1333 err: 1334 if (sg) { 1335 sg_free_table(sg); 1336 kfree(sg); 1337 } 1338 return ret; 1339 } 1340 1341 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( 1342 struct kgd_dev *kgd, struct kgd_mem *mem) 1343 { 1344 struct amdkfd_process_info *process_info = mem->process_info; 1345 unsigned long bo_size = mem->bo->tbo.mem.size; 1346 struct kfd_bo_va_list *entry, *tmp; 1347 struct bo_vm_reservation_context ctx; 1348 struct ttm_validate_buffer *bo_list_entry; 1349 int ret; 1350 1351 mutex_lock(&mem->lock); 1352 1353 if (mem->mapped_to_gpu_memory > 0) { 1354 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", 1355 mem->va, bo_size); 1356 mutex_unlock(&mem->lock); 1357 return -EBUSY; 1358 } 1359 1360 mutex_unlock(&mem->lock); 1361 /* lock is not needed after this, since mem is unused and will 1362 * be freed anyway 1363 */ 1364 1365 /* No more MMU notifiers */ 1366 amdgpu_mn_unregister(mem->bo); 1367 1368 /* Make sure restore workers don't access the BO any more */ 1369 bo_list_entry = &mem->validate_list; 1370 mutex_lock(&process_info->lock); 1371 list_del(&bo_list_entry->head); 1372 mutex_unlock(&process_info->lock); 1373 1374 /* Free user pages if necessary */ 1375 if (mem->user_pages) { 1376 pr_debug("%s: Freeing user_pages array\n", __func__); 1377 if (mem->user_pages[0]) 1378 release_pages(mem->user_pages, 1379 mem->bo->tbo.ttm->num_pages); 1380 kvfree(mem->user_pages); 1381 } 1382 1383 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1384 if (unlikely(ret)) 1385 return ret; 1386 1387 /* The eviction fence should be removed by the last unmap. 1388 * TODO: Log an error condition if the bo still has the eviction fence 1389 * attached 1390 */ 1391 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1392 process_info->eviction_fence, 1393 NULL, NULL); 1394 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, 1395 mem->va + bo_size * (1 + mem->aql_queue)); 1396 1397 /* Remove from VM internal data structures */ 1398 list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) 1399 remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, 1400 entry, bo_size); 1401 1402 ret = unreserve_bo_and_vms(&ctx, false, false); 1403 1404 /* Free the sync object */ 1405 amdgpu_sync_free(&mem->sync); 1406 1407 /* If the SG is not NULL, it's one we created for a doorbell 1408 * BO. We need to free it. 1409 */ 1410 if (mem->bo->tbo.sg) { 1411 sg_free_table(mem->bo->tbo.sg); 1412 kfree(mem->bo->tbo.sg); 1413 } 1414 1415 /* Free the BO*/ 1416 amdgpu_bo_unref(&mem->bo); 1417 mutex_destroy(&mem->lock); 1418 kfree(mem); 1419 1420 return ret; 1421 } 1422 1423 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1424 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1425 { 1426 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1427 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1428 int ret; 1429 struct amdgpu_bo *bo; 1430 uint32_t domain; 1431 struct kfd_bo_va_list *entry; 1432 struct bo_vm_reservation_context ctx; 1433 struct kfd_bo_va_list *bo_va_entry = NULL; 1434 struct kfd_bo_va_list *bo_va_entry_aql = NULL; 1435 unsigned long bo_size; 1436 bool is_invalid_userptr = false; 1437 1438 bo = mem->bo; 1439 if (!bo) { 1440 pr_err("Invalid BO when mapping memory to GPU\n"); 1441 return -EINVAL; 1442 } 1443 1444 /* Make sure restore is not running concurrently. Since we 1445 * don't map invalid userptr BOs, we rely on the next restore 1446 * worker to do the mapping 1447 */ 1448 mutex_lock(&mem->process_info->lock); 1449 1450 /* Lock mmap-sem. If we find an invalid userptr BO, we can be 1451 * sure that the MMU notifier is no longer running 1452 * concurrently and the queues are actually stopped 1453 */ 1454 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1455 down_write(¤t->mm->mmap_sem); 1456 is_invalid_userptr = atomic_read(&mem->invalid); 1457 up_write(¤t->mm->mmap_sem); 1458 } 1459 1460 mutex_lock(&mem->lock); 1461 1462 domain = mem->domain; 1463 bo_size = bo->tbo.mem.size; 1464 1465 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", 1466 mem->va, 1467 mem->va + bo_size * (1 + mem->aql_queue), 1468 vm, domain_string(domain)); 1469 1470 ret = reserve_bo_and_vm(mem, vm, &ctx); 1471 if (unlikely(ret)) 1472 goto out; 1473 1474 /* Userptr can be marked as "not invalid", but not actually be 1475 * validated yet (still in the system domain). In that case 1476 * the queues are still stopped and we can leave mapping for 1477 * the next restore worker 1478 */ 1479 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && 1480 bo->tbo.mem.mem_type == TTM_PL_SYSTEM) 1481 is_invalid_userptr = true; 1482 1483 if (check_if_add_bo_to_vm(avm, mem)) { 1484 ret = add_bo_to_vm(adev, mem, avm, false, 1485 &bo_va_entry); 1486 if (ret) 1487 goto add_bo_to_vm_failed; 1488 if (mem->aql_queue) { 1489 ret = add_bo_to_vm(adev, mem, avm, 1490 true, &bo_va_entry_aql); 1491 if (ret) 1492 goto add_bo_to_vm_failed_aql; 1493 } 1494 } else { 1495 ret = vm_validate_pt_pd_bos(avm); 1496 if (unlikely(ret)) 1497 goto add_bo_to_vm_failed; 1498 } 1499 1500 if (mem->mapped_to_gpu_memory == 0 && 1501 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1502 /* Validate BO only once. The eviction fence gets added to BO 1503 * the first time it is mapped. Validate will wait for all 1504 * background evictions to complete. 1505 */ 1506 ret = amdgpu_amdkfd_bo_validate(bo, domain, true); 1507 if (ret) { 1508 pr_debug("Validate failed\n"); 1509 goto map_bo_to_gpuvm_failed; 1510 } 1511 } 1512 1513 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1514 if (entry->bo_va->base.vm == vm && !entry->is_mapped) { 1515 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", 1516 entry->va, entry->va + bo_size, 1517 entry); 1518 1519 ret = map_bo_to_gpuvm(adev, entry, ctx.sync, 1520 is_invalid_userptr); 1521 if (ret) { 1522 pr_err("Failed to map radeon bo to gpuvm\n"); 1523 goto map_bo_to_gpuvm_failed; 1524 } 1525 1526 ret = vm_update_pds(vm, ctx.sync); 1527 if (ret) { 1528 pr_err("Failed to update page directories\n"); 1529 goto map_bo_to_gpuvm_failed; 1530 } 1531 1532 entry->is_mapped = true; 1533 mem->mapped_to_gpu_memory++; 1534 pr_debug("\t INC mapping count %d\n", 1535 mem->mapped_to_gpu_memory); 1536 } 1537 } 1538 1539 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) 1540 amdgpu_bo_fence(bo, 1541 &avm->process_info->eviction_fence->base, 1542 true); 1543 ret = unreserve_bo_and_vms(&ctx, false, false); 1544 1545 goto out; 1546 1547 map_bo_to_gpuvm_failed: 1548 if (bo_va_entry_aql) 1549 remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); 1550 add_bo_to_vm_failed_aql: 1551 if (bo_va_entry) 1552 remove_bo_from_vm(adev, bo_va_entry, bo_size); 1553 add_bo_to_vm_failed: 1554 unreserve_bo_and_vms(&ctx, false, false); 1555 out: 1556 mutex_unlock(&mem->process_info->lock); 1557 mutex_unlock(&mem->lock); 1558 return ret; 1559 } 1560 1561 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1562 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1563 { 1564 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1565 struct amdkfd_process_info *process_info = 1566 ((struct amdgpu_vm *)vm)->process_info; 1567 unsigned long bo_size = mem->bo->tbo.mem.size; 1568 struct kfd_bo_va_list *entry; 1569 struct bo_vm_reservation_context ctx; 1570 int ret; 1571 1572 mutex_lock(&mem->lock); 1573 1574 ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); 1575 if (unlikely(ret)) 1576 goto out; 1577 /* If no VMs were reserved, it means the BO wasn't actually mapped */ 1578 if (ctx.n_vms == 0) { 1579 ret = -EINVAL; 1580 goto unreserve_out; 1581 } 1582 1583 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); 1584 if (unlikely(ret)) 1585 goto unreserve_out; 1586 1587 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", 1588 mem->va, 1589 mem->va + bo_size * (1 + mem->aql_queue), 1590 vm); 1591 1592 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1593 if (entry->bo_va->base.vm == vm && entry->is_mapped) { 1594 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", 1595 entry->va, 1596 entry->va + bo_size, 1597 entry); 1598 1599 ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); 1600 if (ret == 0) { 1601 entry->is_mapped = false; 1602 } else { 1603 pr_err("failed to unmap VA 0x%llx\n", 1604 mem->va); 1605 goto unreserve_out; 1606 } 1607 1608 mem->mapped_to_gpu_memory--; 1609 pr_debug("\t DEC mapping count %d\n", 1610 mem->mapped_to_gpu_memory); 1611 } 1612 } 1613 1614 /* If BO is unmapped from all VMs, unfence it. It can be evicted if 1615 * required. 1616 */ 1617 if (mem->mapped_to_gpu_memory == 0 && 1618 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) 1619 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1620 process_info->eviction_fence, 1621 NULL, NULL); 1622 1623 unreserve_out: 1624 unreserve_bo_and_vms(&ctx, false, false); 1625 out: 1626 mutex_unlock(&mem->lock); 1627 return ret; 1628 } 1629 1630 int amdgpu_amdkfd_gpuvm_sync_memory( 1631 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) 1632 { 1633 struct amdgpu_sync sync; 1634 int ret; 1635 1636 amdgpu_sync_create(&sync); 1637 1638 mutex_lock(&mem->lock); 1639 amdgpu_sync_clone(&mem->sync, &sync); 1640 mutex_unlock(&mem->lock); 1641 1642 ret = amdgpu_sync_wait(&sync, intr); 1643 amdgpu_sync_free(&sync); 1644 return ret; 1645 } 1646 1647 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, 1648 struct kgd_mem *mem, void **kptr, uint64_t *size) 1649 { 1650 int ret; 1651 struct amdgpu_bo *bo = mem->bo; 1652 1653 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1654 pr_err("userptr can't be mapped to kernel\n"); 1655 return -EINVAL; 1656 } 1657 1658 /* delete kgd_mem from kfd_bo_list to avoid re-validating 1659 * this BO in BO's restoring after eviction. 1660 */ 1661 mutex_lock(&mem->process_info->lock); 1662 1663 ret = amdgpu_bo_reserve(bo, true); 1664 if (ret) { 1665 pr_err("Failed to reserve bo. ret %d\n", ret); 1666 goto bo_reserve_failed; 1667 } 1668 1669 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 1670 if (ret) { 1671 pr_err("Failed to pin bo. ret %d\n", ret); 1672 goto pin_failed; 1673 } 1674 1675 ret = amdgpu_bo_kmap(bo, kptr); 1676 if (ret) { 1677 pr_err("Failed to map bo to kernel. ret %d\n", ret); 1678 goto kmap_failed; 1679 } 1680 1681 amdgpu_amdkfd_remove_eviction_fence( 1682 bo, mem->process_info->eviction_fence, NULL, NULL); 1683 list_del_init(&mem->validate_list.head); 1684 1685 if (size) 1686 *size = amdgpu_bo_size(bo); 1687 1688 amdgpu_bo_unreserve(bo); 1689 1690 mutex_unlock(&mem->process_info->lock); 1691 return 0; 1692 1693 kmap_failed: 1694 amdgpu_bo_unpin(bo); 1695 pin_failed: 1696 amdgpu_bo_unreserve(bo); 1697 bo_reserve_failed: 1698 mutex_unlock(&mem->process_info->lock); 1699 1700 return ret; 1701 } 1702 1703 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, 1704 struct kfd_vm_fault_info *mem) 1705 { 1706 struct amdgpu_device *adev; 1707 1708 adev = (struct amdgpu_device *)kgd; 1709 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { 1710 *mem = *adev->gmc.vm_fault_info; 1711 mb(); 1712 atomic_set(&adev->gmc.vm_fault_info_updated, 0); 1713 } 1714 return 0; 1715 } 1716 1717 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, 1718 struct dma_buf *dma_buf, 1719 uint64_t va, void *vm, 1720 struct kgd_mem **mem, uint64_t *size, 1721 uint64_t *mmap_offset) 1722 { 1723 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 1724 struct drm_gem_object *obj; 1725 struct amdgpu_bo *bo; 1726 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1727 1728 if (dma_buf->ops != &amdgpu_dmabuf_ops) 1729 /* Can't handle non-graphics buffers */ 1730 return -EINVAL; 1731 1732 obj = dma_buf->priv; 1733 if (obj->dev->dev_private != adev) 1734 /* Can't handle buffers from other devices */ 1735 return -EINVAL; 1736 1737 bo = gem_to_amdgpu_bo(obj); 1738 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 1739 AMDGPU_GEM_DOMAIN_GTT))) 1740 /* Only VRAM and GTT BOs are supported */ 1741 return -EINVAL; 1742 1743 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1744 if (!*mem) 1745 return -ENOMEM; 1746 1747 if (size) 1748 *size = amdgpu_bo_size(bo); 1749 1750 if (mmap_offset) 1751 *mmap_offset = amdgpu_bo_mmap_offset(bo); 1752 1753 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1754 mutex_init(&(*mem)->lock); 1755 (*mem)->mapping_flags = 1756 AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | 1757 AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; 1758 1759 (*mem)->bo = amdgpu_bo_ref(bo); 1760 (*mem)->va = va; 1761 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1762 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; 1763 (*mem)->mapped_to_gpu_memory = 0; 1764 (*mem)->process_info = avm->process_info; 1765 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); 1766 amdgpu_sync_create(&(*mem)->sync); 1767 1768 return 0; 1769 } 1770 1771 /* Evict a userptr BO by stopping the queues if necessary 1772 * 1773 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it 1774 * cannot do any memory allocations, and cannot take any locks that 1775 * are held elsewhere while allocating memory. Therefore this is as 1776 * simple as possible, using atomic counters. 1777 * 1778 * It doesn't do anything to the BO itself. The real work happens in 1779 * restore, where we get updated page addresses. This function only 1780 * ensures that GPU access to the BO is stopped. 1781 */ 1782 int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 1783 struct mm_struct *mm) 1784 { 1785 struct amdkfd_process_info *process_info = mem->process_info; 1786 int invalid, evicted_bos; 1787 int r = 0; 1788 1789 invalid = atomic_inc_return(&mem->invalid); 1790 evicted_bos = atomic_inc_return(&process_info->evicted_bos); 1791 if (evicted_bos == 1) { 1792 /* First eviction, stop the queues */ 1793 r = kgd2kfd->quiesce_mm(mm); 1794 if (r) 1795 pr_err("Failed to quiesce KFD\n"); 1796 schedule_delayed_work(&process_info->restore_userptr_work, 1797 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 1798 } 1799 1800 return r; 1801 } 1802 1803 /* Update invalid userptr BOs 1804 * 1805 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to 1806 * userptr_inval_list and updates user pages for all BOs that have 1807 * been invalidated since their last update. 1808 */ 1809 static int update_invalid_user_pages(struct amdkfd_process_info *process_info, 1810 struct mm_struct *mm) 1811 { 1812 struct kgd_mem *mem, *tmp_mem; 1813 struct amdgpu_bo *bo; 1814 struct ttm_operation_ctx ctx = { false, false }; 1815 int invalid, ret; 1816 1817 /* Move all invalidated BOs to the userptr_inval_list and 1818 * release their user pages by migration to the CPU domain 1819 */ 1820 list_for_each_entry_safe(mem, tmp_mem, 1821 &process_info->userptr_valid_list, 1822 validate_list.head) { 1823 if (!atomic_read(&mem->invalid)) 1824 continue; /* BO is still valid */ 1825 1826 bo = mem->bo; 1827 1828 if (amdgpu_bo_reserve(bo, true)) 1829 return -EAGAIN; 1830 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 1831 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1832 amdgpu_bo_unreserve(bo); 1833 if (ret) { 1834 pr_err("%s: Failed to invalidate userptr BO\n", 1835 __func__); 1836 return -EAGAIN; 1837 } 1838 1839 list_move_tail(&mem->validate_list.head, 1840 &process_info->userptr_inval_list); 1841 } 1842 1843 if (list_empty(&process_info->userptr_inval_list)) 1844 return 0; /* All evicted userptr BOs were freed */ 1845 1846 /* Go through userptr_inval_list and update any invalid user_pages */ 1847 list_for_each_entry(mem, &process_info->userptr_inval_list, 1848 validate_list.head) { 1849 invalid = atomic_read(&mem->invalid); 1850 if (!invalid) 1851 /* BO hasn't been invalidated since the last 1852 * revalidation attempt. Keep its BO list. 1853 */ 1854 continue; 1855 1856 bo = mem->bo; 1857 1858 if (!mem->user_pages) { 1859 mem->user_pages = 1860 kvmalloc_array(bo->tbo.ttm->num_pages, 1861 sizeof(struct page *), 1862 GFP_KERNEL | __GFP_ZERO); 1863 if (!mem->user_pages) { 1864 pr_err("%s: Failed to allocate pages array\n", 1865 __func__); 1866 return -ENOMEM; 1867 } 1868 } else if (mem->user_pages[0]) { 1869 release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 1870 } 1871 1872 /* Get updated user pages */ 1873 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, 1874 mem->user_pages); 1875 if (ret) { 1876 mem->user_pages[0] = NULL; 1877 pr_info("%s: Failed to get user pages: %d\n", 1878 __func__, ret); 1879 /* Pretend it succeeded. It will fail later 1880 * with a VM fault if the GPU tries to access 1881 * it. Better than hanging indefinitely with 1882 * stalled user mode queues. 1883 */ 1884 } 1885 1886 /* Mark the BO as valid unless it was invalidated 1887 * again concurrently 1888 */ 1889 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 1890 return -EAGAIN; 1891 } 1892 1893 return 0; 1894 } 1895 1896 /* Validate invalid userptr BOs 1897 * 1898 * Validates BOs on the userptr_inval_list, and moves them back to the 1899 * userptr_valid_list. Also updates GPUVM page tables with new page 1900 * addresses and waits for the page table updates to complete. 1901 */ 1902 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) 1903 { 1904 struct amdgpu_bo_list_entry *pd_bo_list_entries; 1905 struct list_head resv_list, duplicates; 1906 struct ww_acquire_ctx ticket; 1907 struct amdgpu_sync sync; 1908 1909 struct amdgpu_vm *peer_vm; 1910 struct kgd_mem *mem, *tmp_mem; 1911 struct amdgpu_bo *bo; 1912 struct ttm_operation_ctx ctx = { false, false }; 1913 int i, ret; 1914 1915 pd_bo_list_entries = kcalloc(process_info->n_vms, 1916 sizeof(struct amdgpu_bo_list_entry), 1917 GFP_KERNEL); 1918 if (!pd_bo_list_entries) { 1919 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1920 return -ENOMEM; 1921 } 1922 1923 INIT_LIST_HEAD(&resv_list); 1924 INIT_LIST_HEAD(&duplicates); 1925 1926 /* Get all the page directory BOs that need to be reserved */ 1927 i = 0; 1928 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1929 vm_list_node) 1930 amdgpu_vm_get_pd_bo(peer_vm, &resv_list, 1931 &pd_bo_list_entries[i++]); 1932 /* Add the userptr_inval_list entries to resv_list */ 1933 list_for_each_entry(mem, &process_info->userptr_inval_list, 1934 validate_list.head) { 1935 list_add_tail(&mem->resv_list.head, &resv_list); 1936 mem->resv_list.bo = mem->validate_list.bo; 1937 mem->resv_list.num_shared = mem->validate_list.num_shared; 1938 } 1939 1940 /* Reserve all BOs and page tables for validation */ 1941 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 1942 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1943 if (ret) 1944 goto out; 1945 1946 amdgpu_sync_create(&sync); 1947 1948 /* Avoid triggering eviction fences when unmapping invalid 1949 * userptr BOs (waits for all fences, doesn't use 1950 * FENCE_OWNER_VM) 1951 */ 1952 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1953 vm_list_node) 1954 amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, 1955 process_info->eviction_fence, 1956 NULL, NULL); 1957 1958 ret = process_validate_vms(process_info); 1959 if (ret) 1960 goto unreserve_out; 1961 1962 /* Validate BOs and update GPUVM page tables */ 1963 list_for_each_entry_safe(mem, tmp_mem, 1964 &process_info->userptr_inval_list, 1965 validate_list.head) { 1966 struct kfd_bo_va_list *bo_va_entry; 1967 1968 bo = mem->bo; 1969 1970 /* Copy pages array and validate the BO if we got user pages */ 1971 if (mem->user_pages[0]) { 1972 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 1973 mem->user_pages); 1974 amdgpu_bo_placement_from_domain(bo, mem->domain); 1975 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1976 if (ret) { 1977 pr_err("%s: failed to validate BO\n", __func__); 1978 goto unreserve_out; 1979 } 1980 } 1981 1982 /* Validate succeeded, now the BO owns the pages, free 1983 * our copy of the pointer array. Put this BO back on 1984 * the userptr_valid_list. If we need to revalidate 1985 * it, we need to start from scratch. 1986 */ 1987 kvfree(mem->user_pages); 1988 mem->user_pages = NULL; 1989 list_move_tail(&mem->validate_list.head, 1990 &process_info->userptr_valid_list); 1991 1992 /* Update mapping. If the BO was not validated 1993 * (because we couldn't get user pages), this will 1994 * clear the page table entries, which will result in 1995 * VM faults if the GPU tries to access the invalid 1996 * memory. 1997 */ 1998 list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { 1999 if (!bo_va_entry->is_mapped) 2000 continue; 2001 2002 ret = update_gpuvm_pte((struct amdgpu_device *) 2003 bo_va_entry->kgd_dev, 2004 bo_va_entry, &sync); 2005 if (ret) { 2006 pr_err("%s: update PTE failed\n", __func__); 2007 /* make sure this gets validated again */ 2008 atomic_inc(&mem->invalid); 2009 goto unreserve_out; 2010 } 2011 } 2012 } 2013 2014 /* Update page directories */ 2015 ret = process_update_pds(process_info, &sync); 2016 2017 unreserve_out: 2018 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2019 vm_list_node) 2020 amdgpu_bo_fence(peer_vm->root.base.bo, 2021 &process_info->eviction_fence->base, true); 2022 ttm_eu_backoff_reservation(&ticket, &resv_list); 2023 amdgpu_sync_wait(&sync, false); 2024 amdgpu_sync_free(&sync); 2025 out: 2026 kfree(pd_bo_list_entries); 2027 2028 return ret; 2029 } 2030 2031 /* Worker callback to restore evicted userptr BOs 2032 * 2033 * Tries to update and validate all userptr BOs. If successful and no 2034 * concurrent evictions happened, the queues are restarted. Otherwise, 2035 * reschedule for another attempt later. 2036 */ 2037 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) 2038 { 2039 struct delayed_work *dwork = to_delayed_work(work); 2040 struct amdkfd_process_info *process_info = 2041 container_of(dwork, struct amdkfd_process_info, 2042 restore_userptr_work); 2043 struct task_struct *usertask; 2044 struct mm_struct *mm; 2045 int evicted_bos; 2046 2047 evicted_bos = atomic_read(&process_info->evicted_bos); 2048 if (!evicted_bos) 2049 return; 2050 2051 /* Reference task and mm in case of concurrent process termination */ 2052 usertask = get_pid_task(process_info->pid, PIDTYPE_PID); 2053 if (!usertask) 2054 return; 2055 mm = get_task_mm(usertask); 2056 if (!mm) { 2057 put_task_struct(usertask); 2058 return; 2059 } 2060 2061 mutex_lock(&process_info->lock); 2062 2063 if (update_invalid_user_pages(process_info, mm)) 2064 goto unlock_out; 2065 /* userptr_inval_list can be empty if all evicted userptr BOs 2066 * have been freed. In that case there is nothing to validate 2067 * and we can just restart the queues. 2068 */ 2069 if (!list_empty(&process_info->userptr_inval_list)) { 2070 if (atomic_read(&process_info->evicted_bos) != evicted_bos) 2071 goto unlock_out; /* Concurrent eviction, try again */ 2072 2073 if (validate_invalid_user_pages(process_info)) 2074 goto unlock_out; 2075 } 2076 /* Final check for concurrent evicton and atomic update. If 2077 * another eviction happens after successful update, it will 2078 * be a first eviction that calls quiesce_mm. The eviction 2079 * reference counting inside KFD will handle this case. 2080 */ 2081 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != 2082 evicted_bos) 2083 goto unlock_out; 2084 evicted_bos = 0; 2085 if (kgd2kfd->resume_mm(mm)) { 2086 pr_err("%s: Failed to resume KFD\n", __func__); 2087 /* No recovery from this failure. Probably the CP is 2088 * hanging. No point trying again. 2089 */ 2090 } 2091 unlock_out: 2092 mutex_unlock(&process_info->lock); 2093 mmput(mm); 2094 put_task_struct(usertask); 2095 2096 /* If validation failed, reschedule another attempt */ 2097 if (evicted_bos) 2098 schedule_delayed_work(&process_info->restore_userptr_work, 2099 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 2100 } 2101 2102 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given 2103 * KFD process identified by process_info 2104 * 2105 * @process_info: amdkfd_process_info of the KFD process 2106 * 2107 * After memory eviction, restore thread calls this function. The function 2108 * should be called when the Process is still valid. BO restore involves - 2109 * 2110 * 1. Release old eviction fence and create new one 2111 * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. 2112 * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of 2113 * BOs that need to be reserved. 2114 * 4. Reserve all the BOs 2115 * 5. Validate of PD and PT BOs. 2116 * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence 2117 * 7. Add fence to all PD and PT BOs. 2118 * 8. Unreserve all BOs 2119 */ 2120 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) 2121 { 2122 struct amdgpu_bo_list_entry *pd_bo_list; 2123 struct amdkfd_process_info *process_info = info; 2124 struct amdgpu_vm *peer_vm; 2125 struct kgd_mem *mem; 2126 struct bo_vm_reservation_context ctx; 2127 struct amdgpu_amdkfd_fence *new_fence; 2128 int ret = 0, i; 2129 struct list_head duplicate_save; 2130 struct amdgpu_sync sync_obj; 2131 2132 INIT_LIST_HEAD(&duplicate_save); 2133 INIT_LIST_HEAD(&ctx.list); 2134 INIT_LIST_HEAD(&ctx.duplicates); 2135 2136 pd_bo_list = kcalloc(process_info->n_vms, 2137 sizeof(struct amdgpu_bo_list_entry), 2138 GFP_KERNEL); 2139 if (!pd_bo_list) 2140 return -ENOMEM; 2141 2142 i = 0; 2143 mutex_lock(&process_info->lock); 2144 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2145 vm_list_node) 2146 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); 2147 2148 /* Reserve all BOs and page tables/directory. Add all BOs from 2149 * kfd_bo_list to ctx.list 2150 */ 2151 list_for_each_entry(mem, &process_info->kfd_bo_list, 2152 validate_list.head) { 2153 2154 list_add_tail(&mem->resv_list.head, &ctx.list); 2155 mem->resv_list.bo = mem->validate_list.bo; 2156 mem->resv_list.num_shared = mem->validate_list.num_shared; 2157 } 2158 2159 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, 2160 false, &duplicate_save); 2161 if (ret) { 2162 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); 2163 goto ttm_reserve_fail; 2164 } 2165 2166 amdgpu_sync_create(&sync_obj); 2167 2168 /* Validate PDs and PTs */ 2169 ret = process_validate_vms(process_info); 2170 if (ret) 2171 goto validate_map_fail; 2172 2173 ret = process_sync_pds_resv(process_info, &sync_obj); 2174 if (ret) { 2175 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); 2176 goto validate_map_fail; 2177 } 2178 2179 /* Validate BOs and map them to GPUVM (update VM page tables). */ 2180 list_for_each_entry(mem, &process_info->kfd_bo_list, 2181 validate_list.head) { 2182 2183 struct amdgpu_bo *bo = mem->bo; 2184 uint32_t domain = mem->domain; 2185 struct kfd_bo_va_list *bo_va_entry; 2186 2187 ret = amdgpu_amdkfd_bo_validate(bo, domain, false); 2188 if (ret) { 2189 pr_debug("Memory eviction: Validate BOs failed. Try again\n"); 2190 goto validate_map_fail; 2191 } 2192 ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); 2193 if (ret) { 2194 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2195 goto validate_map_fail; 2196 } 2197 list_for_each_entry(bo_va_entry, &mem->bo_va_list, 2198 bo_list) { 2199 ret = update_gpuvm_pte((struct amdgpu_device *) 2200 bo_va_entry->kgd_dev, 2201 bo_va_entry, 2202 &sync_obj); 2203 if (ret) { 2204 pr_debug("Memory eviction: update PTE failed. Try again\n"); 2205 goto validate_map_fail; 2206 } 2207 } 2208 } 2209 2210 /* Update page directories */ 2211 ret = process_update_pds(process_info, &sync_obj); 2212 if (ret) { 2213 pr_debug("Memory eviction: update PDs failed. Try again\n"); 2214 goto validate_map_fail; 2215 } 2216 2217 /* Wait for validate and PT updates to finish */ 2218 amdgpu_sync_wait(&sync_obj, false); 2219 2220 /* Release old eviction fence and create new one, because fence only 2221 * goes from unsignaled to signaled, fence cannot be reused. 2222 * Use context and mm from the old fence. 2223 */ 2224 new_fence = amdgpu_amdkfd_fence_create( 2225 process_info->eviction_fence->base.context, 2226 process_info->eviction_fence->mm); 2227 if (!new_fence) { 2228 pr_err("Failed to create eviction fence\n"); 2229 ret = -ENOMEM; 2230 goto validate_map_fail; 2231 } 2232 dma_fence_put(&process_info->eviction_fence->base); 2233 process_info->eviction_fence = new_fence; 2234 *ef = dma_fence_get(&new_fence->base); 2235 2236 /* Attach new eviction fence to all BOs */ 2237 list_for_each_entry(mem, &process_info->kfd_bo_list, 2238 validate_list.head) 2239 amdgpu_bo_fence(mem->bo, 2240 &process_info->eviction_fence->base, true); 2241 2242 /* Attach eviction fence to PD / PT BOs */ 2243 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2244 vm_list_node) { 2245 struct amdgpu_bo *bo = peer_vm->root.base.bo; 2246 2247 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); 2248 } 2249 2250 validate_map_fail: 2251 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); 2252 amdgpu_sync_free(&sync_obj); 2253 ttm_reserve_fail: 2254 mutex_unlock(&process_info->lock); 2255 kfree(pd_bo_list); 2256 return ret; 2257 } 2258