1 /* 2 * Copyright 2014-2018 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #define pr_fmt(fmt) "kfd2kgd: " fmt 24 25 #include <linux/dma-buf.h> 26 #include <linux/list.h> 27 #include <linux/pagemap.h> 28 #include <linux/sched/mm.h> 29 #include <linux/sched/task.h> 30 31 #include "amdgpu_object.h" 32 #include "amdgpu_vm.h" 33 #include "amdgpu_amdkfd.h" 34 #include "amdgpu_dma_buf.h" 35 36 /* BO flag to indicate a KFD userptr BO */ 37 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) 38 39 /* Userptr restore delay, just long enough to allow consecutive VM 40 * changes to accumulate 41 */ 42 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 43 44 /* Impose limit on how much memory KFD can use */ 45 static struct { 46 uint64_t max_system_mem_limit; 47 uint64_t max_ttm_mem_limit; 48 int64_t system_mem_used; 49 int64_t ttm_mem_used; 50 spinlock_t mem_limit_lock; 51 } kfd_mem_limit; 52 53 /* Struct used for amdgpu_amdkfd_bo_validate */ 54 struct amdgpu_vm_parser { 55 uint32_t domain; 56 bool wait; 57 }; 58 59 static const char * const domain_bit_to_string[] = { 60 "CPU", 61 "GTT", 62 "VRAM", 63 "GDS", 64 "GWS", 65 "OA" 66 }; 67 68 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] 69 70 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); 71 72 73 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 74 { 75 return (struct amdgpu_device *)kgd; 76 } 77 78 static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, 79 struct kgd_mem *mem) 80 { 81 struct kfd_bo_va_list *entry; 82 83 list_for_each_entry(entry, &mem->bo_va_list, bo_list) 84 if (entry->bo_va->base.vm == avm) 85 return false; 86 87 return true; 88 } 89 90 /* Set memory usage limits. Current, limits are 91 * System (TTM + userptr) memory - 3/4th System RAM 92 * TTM memory - 3/8th System RAM 93 */ 94 void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 95 { 96 struct sysinfo si; 97 uint64_t mem; 98 99 si_meminfo(&si); 100 mem = si.totalram - si.totalhigh; 101 mem *= si.mem_unit; 102 103 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 104 kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); 105 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); 106 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", 107 (kfd_mem_limit.max_system_mem_limit >> 20), 108 (kfd_mem_limit.max_ttm_mem_limit >> 20)); 109 } 110 111 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 112 uint64_t size, u32 domain, bool sg) 113 { 114 size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; 115 uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; 116 int ret = 0; 117 118 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 119 sizeof(struct amdgpu_bo)); 120 121 vram_needed = 0; 122 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 123 /* TTM GTT memory */ 124 system_mem_needed = acc_size + size; 125 ttm_mem_needed = acc_size + size; 126 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 127 /* Userptr */ 128 system_mem_needed = acc_size + size; 129 ttm_mem_needed = acc_size; 130 } else { 131 /* VRAM and SG */ 132 system_mem_needed = acc_size; 133 ttm_mem_needed = acc_size; 134 if (domain == AMDGPU_GEM_DOMAIN_VRAM) 135 vram_needed = size; 136 } 137 138 spin_lock(&kfd_mem_limit.mem_limit_lock); 139 140 if ((kfd_mem_limit.system_mem_used + system_mem_needed > 141 kfd_mem_limit.max_system_mem_limit) || 142 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > 143 kfd_mem_limit.max_ttm_mem_limit) || 144 (adev->kfd.vram_used + vram_needed > 145 adev->gmc.real_vram_size - reserved_for_pt)) { 146 ret = -ENOMEM; 147 } else { 148 kfd_mem_limit.system_mem_used += system_mem_needed; 149 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 150 adev->kfd.vram_used += vram_needed; 151 } 152 153 spin_unlock(&kfd_mem_limit.mem_limit_lock); 154 return ret; 155 } 156 157 static void unreserve_mem_limit(struct amdgpu_device *adev, 158 uint64_t size, u32 domain, bool sg) 159 { 160 size_t acc_size; 161 162 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 163 sizeof(struct amdgpu_bo)); 164 165 spin_lock(&kfd_mem_limit.mem_limit_lock); 166 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 167 kfd_mem_limit.system_mem_used -= (acc_size + size); 168 kfd_mem_limit.ttm_mem_used -= (acc_size + size); 169 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 170 kfd_mem_limit.system_mem_used -= (acc_size + size); 171 kfd_mem_limit.ttm_mem_used -= acc_size; 172 } else { 173 kfd_mem_limit.system_mem_used -= acc_size; 174 kfd_mem_limit.ttm_mem_used -= acc_size; 175 if (domain == AMDGPU_GEM_DOMAIN_VRAM) { 176 adev->kfd.vram_used -= size; 177 WARN_ONCE(adev->kfd.vram_used < 0, 178 "kfd VRAM memory accounting unbalanced"); 179 } 180 } 181 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 182 "kfd system memory accounting unbalanced"); 183 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, 184 "kfd TTM memory accounting unbalanced"); 185 186 spin_unlock(&kfd_mem_limit.mem_limit_lock); 187 } 188 189 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) 190 { 191 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 192 u32 domain = bo->preferred_domains; 193 bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); 194 195 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { 196 domain = AMDGPU_GEM_DOMAIN_CPU; 197 sg = false; 198 } 199 200 unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); 201 } 202 203 204 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's 205 * reservation object. 206 * 207 * @bo: [IN] Remove eviction fence(s) from this BO 208 * @ef: [IN] This eviction fence is removed if it 209 * is present in the shared list. 210 * 211 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. 212 */ 213 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, 214 struct amdgpu_amdkfd_fence *ef) 215 { 216 struct dma_resv *resv = bo->tbo.base.resv; 217 struct dma_resv_list *old, *new; 218 unsigned int i, j, k; 219 220 if (!ef) 221 return -EINVAL; 222 223 old = dma_resv_get_list(resv); 224 if (!old) 225 return 0; 226 227 new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]), 228 GFP_KERNEL); 229 if (!new) 230 return -ENOMEM; 231 232 /* Go through all the shared fences in the resevation object and sort 233 * the interesting ones to the end of the list. 234 */ 235 for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { 236 struct dma_fence *f; 237 238 f = rcu_dereference_protected(old->shared[i], 239 dma_resv_held(resv)); 240 241 if (f->context == ef->base.context) 242 RCU_INIT_POINTER(new->shared[--j], f); 243 else 244 RCU_INIT_POINTER(new->shared[k++], f); 245 } 246 new->shared_max = old->shared_max; 247 new->shared_count = k; 248 249 /* Install the new fence list, seqcount provides the barriers */ 250 preempt_disable(); 251 write_seqcount_begin(&resv->seq); 252 RCU_INIT_POINTER(resv->fence, new); 253 write_seqcount_end(&resv->seq); 254 preempt_enable(); 255 256 /* Drop the references to the removed fences or move them to ef_list */ 257 for (i = j, k = 0; i < old->shared_count; ++i) { 258 struct dma_fence *f; 259 260 f = rcu_dereference_protected(new->shared[i], 261 dma_resv_held(resv)); 262 dma_fence_put(f); 263 } 264 kfree_rcu(old, rcu); 265 266 return 0; 267 } 268 269 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, 270 bool wait) 271 { 272 struct ttm_operation_ctx ctx = { false, false }; 273 int ret; 274 275 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), 276 "Called with userptr BO")) 277 return -EINVAL; 278 279 amdgpu_bo_placement_from_domain(bo, domain); 280 281 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 282 if (ret) 283 goto validate_fail; 284 if (wait) 285 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); 286 287 validate_fail: 288 return ret; 289 } 290 291 static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) 292 { 293 struct amdgpu_vm_parser *p = param; 294 295 return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); 296 } 297 298 /* vm_validate_pt_pd_bos - Validate page table and directory BOs 299 * 300 * Page directories are not updated here because huge page handling 301 * during page table updates can invalidate page directory entries 302 * again. Page directories are only updated after updating page 303 * tables. 304 */ 305 static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) 306 { 307 struct amdgpu_bo *pd = vm->root.base.bo; 308 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 309 struct amdgpu_vm_parser param; 310 int ret; 311 312 param.domain = AMDGPU_GEM_DOMAIN_VRAM; 313 param.wait = false; 314 315 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, 316 ¶m); 317 if (ret) { 318 pr_err("amdgpu: failed to validate PT BOs\n"); 319 return ret; 320 } 321 322 ret = amdgpu_amdkfd_validate(¶m, pd); 323 if (ret) { 324 pr_err("amdgpu: failed to validate PD\n"); 325 return ret; 326 } 327 328 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); 329 330 if (vm->use_cpu_for_update) { 331 ret = amdgpu_bo_kmap(pd, NULL); 332 if (ret) { 333 pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); 334 return ret; 335 } 336 } 337 338 return 0; 339 } 340 341 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) 342 { 343 struct amdgpu_bo *pd = vm->root.base.bo; 344 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 345 int ret; 346 347 ret = amdgpu_vm_update_pdes(adev, vm, false); 348 if (ret) 349 return ret; 350 351 return amdgpu_sync_fence(NULL, sync, vm->last_update, false); 352 } 353 354 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) 355 { 356 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); 357 bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; 358 uint32_t mapping_flags; 359 360 mapping_flags = AMDGPU_VM_PAGE_READABLE; 361 if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) 362 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; 363 if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) 364 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; 365 366 switch (adev->asic_type) { 367 case CHIP_ARCTURUS: 368 if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { 369 if (bo_adev == adev) 370 mapping_flags |= coherent ? 371 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; 372 else 373 mapping_flags |= AMDGPU_VM_MTYPE_UC; 374 } else { 375 mapping_flags |= coherent ? 376 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 377 } 378 break; 379 default: 380 mapping_flags |= coherent ? 381 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 382 } 383 384 return amdgpu_gem_va_map_flags(adev, mapping_flags); 385 } 386 387 /* add_bo_to_vm - Add a BO to a VM 388 * 389 * Everything that needs to bo done only once when a BO is first added 390 * to a VM. It can later be mapped and unmapped many times without 391 * repeating these steps. 392 * 393 * 1. Allocate and initialize BO VA entry data structure 394 * 2. Add BO to the VM 395 * 3. Determine ASIC-specific PTE flags 396 * 4. Alloc page tables and directories if needed 397 * 4a. Validate new page tables and directories 398 */ 399 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, 400 struct amdgpu_vm *vm, bool is_aql, 401 struct kfd_bo_va_list **p_bo_va_entry) 402 { 403 int ret; 404 struct kfd_bo_va_list *bo_va_entry; 405 struct amdgpu_bo *bo = mem->bo; 406 uint64_t va = mem->va; 407 struct list_head *list_bo_va = &mem->bo_va_list; 408 unsigned long bo_size = bo->tbo.mem.size; 409 410 if (!va) { 411 pr_err("Invalid VA when adding BO to VM\n"); 412 return -EINVAL; 413 } 414 415 if (is_aql) 416 va += bo_size; 417 418 bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); 419 if (!bo_va_entry) 420 return -ENOMEM; 421 422 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, 423 va + bo_size, vm); 424 425 /* Add BO to VM internal data structures*/ 426 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); 427 if (!bo_va_entry->bo_va) { 428 ret = -EINVAL; 429 pr_err("Failed to add BO object to VM. ret == %d\n", 430 ret); 431 goto err_vmadd; 432 } 433 434 bo_va_entry->va = va; 435 bo_va_entry->pte_flags = get_pte_flags(adev, mem); 436 bo_va_entry->kgd_dev = (void *)adev; 437 list_add(&bo_va_entry->bo_list, list_bo_va); 438 439 if (p_bo_va_entry) 440 *p_bo_va_entry = bo_va_entry; 441 442 /* Allocate validate page tables if needed */ 443 ret = vm_validate_pt_pd_bos(vm); 444 if (ret) { 445 pr_err("validate_pt_pd_bos() failed\n"); 446 goto err_alloc_pts; 447 } 448 449 return 0; 450 451 err_alloc_pts: 452 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); 453 list_del(&bo_va_entry->bo_list); 454 err_vmadd: 455 kfree(bo_va_entry); 456 return ret; 457 } 458 459 static void remove_bo_from_vm(struct amdgpu_device *adev, 460 struct kfd_bo_va_list *entry, unsigned long size) 461 { 462 pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", 463 entry->va, 464 entry->va + size, entry); 465 amdgpu_vm_bo_rmv(adev, entry->bo_va); 466 list_del(&entry->bo_list); 467 kfree(entry); 468 } 469 470 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, 471 struct amdkfd_process_info *process_info, 472 bool userptr) 473 { 474 struct ttm_validate_buffer *entry = &mem->validate_list; 475 struct amdgpu_bo *bo = mem->bo; 476 477 INIT_LIST_HEAD(&entry->head); 478 entry->num_shared = 1; 479 entry->bo = &bo->tbo; 480 mutex_lock(&process_info->lock); 481 if (userptr) 482 list_add_tail(&entry->head, &process_info->userptr_valid_list); 483 else 484 list_add_tail(&entry->head, &process_info->kfd_bo_list); 485 mutex_unlock(&process_info->lock); 486 } 487 488 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, 489 struct amdkfd_process_info *process_info) 490 { 491 struct ttm_validate_buffer *bo_list_entry; 492 493 bo_list_entry = &mem->validate_list; 494 mutex_lock(&process_info->lock); 495 list_del(&bo_list_entry->head); 496 mutex_unlock(&process_info->lock); 497 } 498 499 /* Initializes user pages. It registers the MMU notifier and validates 500 * the userptr BO in the GTT domain. 501 * 502 * The BO must already be on the userptr_valid_list. Otherwise an 503 * eviction and restore may happen that leaves the new BO unmapped 504 * with the user mode queues running. 505 * 506 * Takes the process_info->lock to protect against concurrent restore 507 * workers. 508 * 509 * Returns 0 for success, negative errno for errors. 510 */ 511 static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, 512 uint64_t user_addr) 513 { 514 struct amdkfd_process_info *process_info = mem->process_info; 515 struct amdgpu_bo *bo = mem->bo; 516 struct ttm_operation_ctx ctx = { true, false }; 517 int ret = 0; 518 519 mutex_lock(&process_info->lock); 520 521 ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); 522 if (ret) { 523 pr_err("%s: Failed to set userptr: %d\n", __func__, ret); 524 goto out; 525 } 526 527 ret = amdgpu_mn_register(bo, user_addr); 528 if (ret) { 529 pr_err("%s: Failed to register MMU notifier: %d\n", 530 __func__, ret); 531 goto out; 532 } 533 534 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); 535 if (ret) { 536 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 537 goto unregister_out; 538 } 539 540 ret = amdgpu_bo_reserve(bo, true); 541 if (ret) { 542 pr_err("%s: Failed to reserve BO\n", __func__); 543 goto release_out; 544 } 545 amdgpu_bo_placement_from_domain(bo, mem->domain); 546 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 547 if (ret) 548 pr_err("%s: failed to validate BO\n", __func__); 549 amdgpu_bo_unreserve(bo); 550 551 release_out: 552 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 553 unregister_out: 554 if (ret) 555 amdgpu_mn_unregister(bo); 556 out: 557 mutex_unlock(&process_info->lock); 558 return ret; 559 } 560 561 /* Reserving a BO and its page table BOs must happen atomically to 562 * avoid deadlocks. Some operations update multiple VMs at once. Track 563 * all the reservation info in a context structure. Optionally a sync 564 * object can track VM updates. 565 */ 566 struct bo_vm_reservation_context { 567 struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ 568 unsigned int n_vms; /* Number of VMs reserved */ 569 struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ 570 struct ww_acquire_ctx ticket; /* Reservation ticket */ 571 struct list_head list, duplicates; /* BO lists */ 572 struct amdgpu_sync *sync; /* Pointer to sync object */ 573 bool reserved; /* Whether BOs are reserved */ 574 }; 575 576 enum bo_vm_match { 577 BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ 578 BO_VM_MAPPED, /* Match VMs where a BO is mapped */ 579 BO_VM_ALL, /* Match all VMs a BO was added to */ 580 }; 581 582 /** 583 * reserve_bo_and_vm - reserve a BO and a VM unconditionally. 584 * @mem: KFD BO structure. 585 * @vm: the VM to reserve. 586 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 587 */ 588 static int reserve_bo_and_vm(struct kgd_mem *mem, 589 struct amdgpu_vm *vm, 590 struct bo_vm_reservation_context *ctx) 591 { 592 struct amdgpu_bo *bo = mem->bo; 593 int ret; 594 595 WARN_ON(!vm); 596 597 ctx->reserved = false; 598 ctx->n_vms = 1; 599 ctx->sync = &mem->sync; 600 601 INIT_LIST_HEAD(&ctx->list); 602 INIT_LIST_HEAD(&ctx->duplicates); 603 604 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); 605 if (!ctx->vm_pd) 606 return -ENOMEM; 607 608 ctx->kfd_bo.priority = 0; 609 ctx->kfd_bo.tv.bo = &bo->tbo; 610 ctx->kfd_bo.tv.num_shared = 1; 611 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 612 613 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); 614 615 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 616 false, &ctx->duplicates, true); 617 if (!ret) 618 ctx->reserved = true; 619 else { 620 pr_err("Failed to reserve buffers in ttm\n"); 621 kfree(ctx->vm_pd); 622 ctx->vm_pd = NULL; 623 } 624 625 return ret; 626 } 627 628 /** 629 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally 630 * @mem: KFD BO structure. 631 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO 632 * is used. Otherwise, a single VM associated with the BO. 633 * @map_type: the mapping status that will be used to filter the VMs. 634 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 635 * 636 * Returns 0 for success, negative for failure. 637 */ 638 static int reserve_bo_and_cond_vms(struct kgd_mem *mem, 639 struct amdgpu_vm *vm, enum bo_vm_match map_type, 640 struct bo_vm_reservation_context *ctx) 641 { 642 struct amdgpu_bo *bo = mem->bo; 643 struct kfd_bo_va_list *entry; 644 unsigned int i; 645 int ret; 646 647 ctx->reserved = false; 648 ctx->n_vms = 0; 649 ctx->vm_pd = NULL; 650 ctx->sync = &mem->sync; 651 652 INIT_LIST_HEAD(&ctx->list); 653 INIT_LIST_HEAD(&ctx->duplicates); 654 655 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 656 if ((vm && vm != entry->bo_va->base.vm) || 657 (entry->is_mapped != map_type 658 && map_type != BO_VM_ALL)) 659 continue; 660 661 ctx->n_vms++; 662 } 663 664 if (ctx->n_vms != 0) { 665 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), 666 GFP_KERNEL); 667 if (!ctx->vm_pd) 668 return -ENOMEM; 669 } 670 671 ctx->kfd_bo.priority = 0; 672 ctx->kfd_bo.tv.bo = &bo->tbo; 673 ctx->kfd_bo.tv.num_shared = 1; 674 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 675 676 i = 0; 677 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 678 if ((vm && vm != entry->bo_va->base.vm) || 679 (entry->is_mapped != map_type 680 && map_type != BO_VM_ALL)) 681 continue; 682 683 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, 684 &ctx->vm_pd[i]); 685 i++; 686 } 687 688 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 689 false, &ctx->duplicates, true); 690 if (!ret) 691 ctx->reserved = true; 692 else 693 pr_err("Failed to reserve buffers in ttm.\n"); 694 695 if (ret) { 696 kfree(ctx->vm_pd); 697 ctx->vm_pd = NULL; 698 } 699 700 return ret; 701 } 702 703 /** 704 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context 705 * @ctx: Reservation context to unreserve 706 * @wait: Optionally wait for a sync object representing pending VM updates 707 * @intr: Whether the wait is interruptible 708 * 709 * Also frees any resources allocated in 710 * reserve_bo_and_(cond_)vm(s). Returns the status from 711 * amdgpu_sync_wait. 712 */ 713 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, 714 bool wait, bool intr) 715 { 716 int ret = 0; 717 718 if (wait) 719 ret = amdgpu_sync_wait(ctx->sync, intr); 720 721 if (ctx->reserved) 722 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); 723 kfree(ctx->vm_pd); 724 725 ctx->sync = NULL; 726 727 ctx->reserved = false; 728 ctx->vm_pd = NULL; 729 730 return ret; 731 } 732 733 static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, 734 struct kfd_bo_va_list *entry, 735 struct amdgpu_sync *sync) 736 { 737 struct amdgpu_bo_va *bo_va = entry->bo_va; 738 struct amdgpu_vm *vm = bo_va->base.vm; 739 740 amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 741 742 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 743 744 amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 745 746 return 0; 747 } 748 749 static int update_gpuvm_pte(struct amdgpu_device *adev, 750 struct kfd_bo_va_list *entry, 751 struct amdgpu_sync *sync) 752 { 753 int ret; 754 struct amdgpu_bo_va *bo_va = entry->bo_va; 755 756 /* Update the page tables */ 757 ret = amdgpu_vm_bo_update(adev, bo_va, false); 758 if (ret) { 759 pr_err("amdgpu_vm_bo_update failed\n"); 760 return ret; 761 } 762 763 return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 764 } 765 766 static int map_bo_to_gpuvm(struct amdgpu_device *adev, 767 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, 768 bool no_update_pte) 769 { 770 int ret; 771 772 /* Set virtual address for the allocation */ 773 ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, 774 amdgpu_bo_size(entry->bo_va->base.bo), 775 entry->pte_flags); 776 if (ret) { 777 pr_err("Failed to map VA 0x%llx in vm. ret %d\n", 778 entry->va, ret); 779 return ret; 780 } 781 782 if (no_update_pte) 783 return 0; 784 785 ret = update_gpuvm_pte(adev, entry, sync); 786 if (ret) { 787 pr_err("update_gpuvm_pte() failed\n"); 788 goto update_gpuvm_pte_failed; 789 } 790 791 return 0; 792 793 update_gpuvm_pte_failed: 794 unmap_bo_from_gpuvm(adev, entry, sync); 795 return ret; 796 } 797 798 static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) 799 { 800 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); 801 802 if (!sg) 803 return NULL; 804 if (sg_alloc_table(sg, 1, GFP_KERNEL)) { 805 kfree(sg); 806 return NULL; 807 } 808 sg->sgl->dma_address = addr; 809 sg->sgl->length = size; 810 #ifdef CONFIG_NEED_SG_DMA_LENGTH 811 sg->sgl->dma_length = size; 812 #endif 813 return sg; 814 } 815 816 static int process_validate_vms(struct amdkfd_process_info *process_info) 817 { 818 struct amdgpu_vm *peer_vm; 819 int ret; 820 821 list_for_each_entry(peer_vm, &process_info->vm_list_head, 822 vm_list_node) { 823 ret = vm_validate_pt_pd_bos(peer_vm); 824 if (ret) 825 return ret; 826 } 827 828 return 0; 829 } 830 831 static int process_sync_pds_resv(struct amdkfd_process_info *process_info, 832 struct amdgpu_sync *sync) 833 { 834 struct amdgpu_vm *peer_vm; 835 int ret; 836 837 list_for_each_entry(peer_vm, &process_info->vm_list_head, 838 vm_list_node) { 839 struct amdgpu_bo *pd = peer_vm->root.base.bo; 840 841 ret = amdgpu_sync_resv(NULL, 842 sync, pd->tbo.base.resv, 843 AMDGPU_FENCE_OWNER_KFD, false); 844 if (ret) 845 return ret; 846 } 847 848 return 0; 849 } 850 851 static int process_update_pds(struct amdkfd_process_info *process_info, 852 struct amdgpu_sync *sync) 853 { 854 struct amdgpu_vm *peer_vm; 855 int ret; 856 857 list_for_each_entry(peer_vm, &process_info->vm_list_head, 858 vm_list_node) { 859 ret = vm_update_pds(peer_vm, sync); 860 if (ret) 861 return ret; 862 } 863 864 return 0; 865 } 866 867 static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, 868 struct dma_fence **ef) 869 { 870 struct amdkfd_process_info *info = NULL; 871 int ret; 872 873 if (!*process_info) { 874 info = kzalloc(sizeof(*info), GFP_KERNEL); 875 if (!info) 876 return -ENOMEM; 877 878 mutex_init(&info->lock); 879 INIT_LIST_HEAD(&info->vm_list_head); 880 INIT_LIST_HEAD(&info->kfd_bo_list); 881 INIT_LIST_HEAD(&info->userptr_valid_list); 882 INIT_LIST_HEAD(&info->userptr_inval_list); 883 884 info->eviction_fence = 885 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 886 current->mm); 887 if (!info->eviction_fence) { 888 pr_err("Failed to create eviction fence\n"); 889 ret = -ENOMEM; 890 goto create_evict_fence_fail; 891 } 892 893 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); 894 atomic_set(&info->evicted_bos, 0); 895 INIT_DELAYED_WORK(&info->restore_userptr_work, 896 amdgpu_amdkfd_restore_userptr_worker); 897 898 *process_info = info; 899 *ef = dma_fence_get(&info->eviction_fence->base); 900 } 901 902 vm->process_info = *process_info; 903 904 /* Validate page directory and attach eviction fence */ 905 ret = amdgpu_bo_reserve(vm->root.base.bo, true); 906 if (ret) 907 goto reserve_pd_fail; 908 ret = vm_validate_pt_pd_bos(vm); 909 if (ret) { 910 pr_err("validate_pt_pd_bos() failed\n"); 911 goto validate_pd_fail; 912 } 913 ret = amdgpu_bo_sync_wait(vm->root.base.bo, 914 AMDGPU_FENCE_OWNER_KFD, false); 915 if (ret) 916 goto wait_pd_fail; 917 ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); 918 if (ret) 919 goto reserve_shared_fail; 920 amdgpu_bo_fence(vm->root.base.bo, 921 &vm->process_info->eviction_fence->base, true); 922 amdgpu_bo_unreserve(vm->root.base.bo); 923 924 /* Update process info */ 925 mutex_lock(&vm->process_info->lock); 926 list_add_tail(&vm->vm_list_node, 927 &(vm->process_info->vm_list_head)); 928 vm->process_info->n_vms++; 929 mutex_unlock(&vm->process_info->lock); 930 931 return 0; 932 933 reserve_shared_fail: 934 wait_pd_fail: 935 validate_pd_fail: 936 amdgpu_bo_unreserve(vm->root.base.bo); 937 reserve_pd_fail: 938 vm->process_info = NULL; 939 if (info) { 940 /* Two fence references: one in info and one in *ef */ 941 dma_fence_put(&info->eviction_fence->base); 942 dma_fence_put(*ef); 943 *ef = NULL; 944 *process_info = NULL; 945 put_pid(info->pid); 946 create_evict_fence_fail: 947 mutex_destroy(&info->lock); 948 kfree(info); 949 } 950 return ret; 951 } 952 953 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid, 954 void **vm, void **process_info, 955 struct dma_fence **ef) 956 { 957 struct amdgpu_device *adev = get_amdgpu_device(kgd); 958 struct amdgpu_vm *new_vm; 959 int ret; 960 961 new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); 962 if (!new_vm) 963 return -ENOMEM; 964 965 /* Initialize AMDGPU part of the VM */ 966 ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); 967 if (ret) { 968 pr_err("Failed init vm ret %d\n", ret); 969 goto amdgpu_vm_init_fail; 970 } 971 972 /* Initialize KFD part of the VM and process info */ 973 ret = init_kfd_vm(new_vm, process_info, ef); 974 if (ret) 975 goto init_kfd_vm_fail; 976 977 *vm = (void *) new_vm; 978 979 return 0; 980 981 init_kfd_vm_fail: 982 amdgpu_vm_fini(adev, new_vm); 983 amdgpu_vm_init_fail: 984 kfree(new_vm); 985 return ret; 986 } 987 988 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, 989 struct file *filp, unsigned int pasid, 990 void **vm, void **process_info, 991 struct dma_fence **ef) 992 { 993 struct amdgpu_device *adev = get_amdgpu_device(kgd); 994 struct drm_file *drm_priv = filp->private_data; 995 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; 996 struct amdgpu_vm *avm = &drv_priv->vm; 997 int ret; 998 999 /* Already a compute VM? */ 1000 if (avm->process_info) 1001 return -EINVAL; 1002 1003 /* Convert VM into a compute VM */ 1004 ret = amdgpu_vm_make_compute(adev, avm, pasid); 1005 if (ret) 1006 return ret; 1007 1008 /* Initialize KFD part of the VM and process info */ 1009 ret = init_kfd_vm(avm, process_info, ef); 1010 if (ret) 1011 return ret; 1012 1013 *vm = (void *)avm; 1014 1015 return 0; 1016 } 1017 1018 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 1019 struct amdgpu_vm *vm) 1020 { 1021 struct amdkfd_process_info *process_info = vm->process_info; 1022 struct amdgpu_bo *pd = vm->root.base.bo; 1023 1024 if (!process_info) 1025 return; 1026 1027 /* Release eviction fence from PD */ 1028 amdgpu_bo_reserve(pd, false); 1029 amdgpu_bo_fence(pd, NULL, false); 1030 amdgpu_bo_unreserve(pd); 1031 1032 /* Update process info */ 1033 mutex_lock(&process_info->lock); 1034 process_info->n_vms--; 1035 list_del(&vm->vm_list_node); 1036 mutex_unlock(&process_info->lock); 1037 1038 /* Release per-process resources when last compute VM is destroyed */ 1039 if (!process_info->n_vms) { 1040 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 1041 WARN_ON(!list_empty(&process_info->userptr_valid_list)); 1042 WARN_ON(!list_empty(&process_info->userptr_inval_list)); 1043 1044 dma_fence_put(&process_info->eviction_fence->base); 1045 cancel_delayed_work_sync(&process_info->restore_userptr_work); 1046 put_pid(process_info->pid); 1047 mutex_destroy(&process_info->lock); 1048 kfree(process_info); 1049 } 1050 } 1051 1052 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) 1053 { 1054 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1055 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1056 1057 if (WARN_ON(!kgd || !vm)) 1058 return; 1059 1060 pr_debug("Destroying process vm %p\n", vm); 1061 1062 /* Release the VM context */ 1063 amdgpu_vm_fini(adev, avm); 1064 kfree(vm); 1065 } 1066 1067 void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) 1068 { 1069 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1070 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1071 1072 if (WARN_ON(!kgd || !vm)) 1073 return; 1074 1075 pr_debug("Releasing process vm %p\n", vm); 1076 1077 /* The original pasid of amdgpu vm has already been 1078 * released during making a amdgpu vm to a compute vm 1079 * The current pasid is managed by kfd and will be 1080 * released on kfd process destroy. Set amdgpu pasid 1081 * to 0 to avoid duplicate release. 1082 */ 1083 amdgpu_vm_release_compute(adev, avm); 1084 } 1085 1086 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) 1087 { 1088 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1089 struct amdgpu_bo *pd = avm->root.base.bo; 1090 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 1091 1092 if (adev->asic_type < CHIP_VEGA10) 1093 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; 1094 return avm->pd_phys_addr; 1095 } 1096 1097 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1098 struct kgd_dev *kgd, uint64_t va, uint64_t size, 1099 void *vm, struct kgd_mem **mem, 1100 uint64_t *offset, uint32_t flags) 1101 { 1102 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1103 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1104 enum ttm_bo_type bo_type = ttm_bo_type_device; 1105 struct sg_table *sg = NULL; 1106 uint64_t user_addr = 0; 1107 struct amdgpu_bo *bo; 1108 struct amdgpu_bo_param bp; 1109 u32 domain, alloc_domain; 1110 u64 alloc_flags; 1111 int ret; 1112 1113 /* 1114 * Check on which domain to allocate BO 1115 */ 1116 if (flags & ALLOC_MEM_FLAGS_VRAM) { 1117 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; 1118 alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; 1119 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? 1120 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 1121 AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 1122 } else if (flags & ALLOC_MEM_FLAGS_GTT) { 1123 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; 1124 alloc_flags = 0; 1125 } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { 1126 domain = AMDGPU_GEM_DOMAIN_GTT; 1127 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1128 alloc_flags = 0; 1129 if (!offset || !*offset) 1130 return -EINVAL; 1131 user_addr = *offset; 1132 } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | 1133 ALLOC_MEM_FLAGS_MMIO_REMAP)) { 1134 domain = AMDGPU_GEM_DOMAIN_GTT; 1135 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1136 bo_type = ttm_bo_type_sg; 1137 alloc_flags = 0; 1138 if (size > UINT_MAX) 1139 return -EINVAL; 1140 sg = create_doorbell_sg(*offset, size); 1141 if (!sg) 1142 return -ENOMEM; 1143 } else { 1144 return -EINVAL; 1145 } 1146 1147 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1148 if (!*mem) { 1149 ret = -ENOMEM; 1150 goto err; 1151 } 1152 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1153 mutex_init(&(*mem)->lock); 1154 (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); 1155 1156 /* Workaround for AQL queue wraparound bug. Map the same 1157 * memory twice. That means we only actually allocate half 1158 * the memory. 1159 */ 1160 if ((*mem)->aql_queue) 1161 size = size >> 1; 1162 1163 (*mem)->alloc_flags = flags; 1164 1165 amdgpu_sync_create(&(*mem)->sync); 1166 1167 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); 1168 if (ret) { 1169 pr_debug("Insufficient system memory\n"); 1170 goto err_reserve_limit; 1171 } 1172 1173 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1174 va, size, domain_string(alloc_domain)); 1175 1176 memset(&bp, 0, sizeof(bp)); 1177 bp.size = size; 1178 bp.byte_align = 1; 1179 bp.domain = alloc_domain; 1180 bp.flags = alloc_flags; 1181 bp.type = bo_type; 1182 bp.resv = NULL; 1183 ret = amdgpu_bo_create(adev, &bp, &bo); 1184 if (ret) { 1185 pr_debug("Failed to create BO on domain %s. ret %d\n", 1186 domain_string(alloc_domain), ret); 1187 goto err_bo_create; 1188 } 1189 if (bo_type == ttm_bo_type_sg) { 1190 bo->tbo.sg = sg; 1191 bo->tbo.ttm->sg = sg; 1192 } 1193 bo->kfd_bo = *mem; 1194 (*mem)->bo = bo; 1195 if (user_addr) 1196 bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; 1197 1198 (*mem)->va = va; 1199 (*mem)->domain = domain; 1200 (*mem)->mapped_to_gpu_memory = 0; 1201 (*mem)->process_info = avm->process_info; 1202 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); 1203 1204 if (user_addr) { 1205 ret = init_user_pages(*mem, current->mm, user_addr); 1206 if (ret) 1207 goto allocate_init_user_pages_failed; 1208 } 1209 1210 if (offset) 1211 *offset = amdgpu_bo_mmap_offset(bo); 1212 1213 return 0; 1214 1215 allocate_init_user_pages_failed: 1216 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); 1217 amdgpu_bo_unref(&bo); 1218 /* Don't unreserve system mem limit twice */ 1219 goto err_reserve_limit; 1220 err_bo_create: 1221 unreserve_mem_limit(adev, size, alloc_domain, !!sg); 1222 err_reserve_limit: 1223 mutex_destroy(&(*mem)->lock); 1224 kfree(*mem); 1225 err: 1226 if (sg) { 1227 sg_free_table(sg); 1228 kfree(sg); 1229 } 1230 return ret; 1231 } 1232 1233 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( 1234 struct kgd_dev *kgd, struct kgd_mem *mem) 1235 { 1236 struct amdkfd_process_info *process_info = mem->process_info; 1237 unsigned long bo_size = mem->bo->tbo.mem.size; 1238 struct kfd_bo_va_list *entry, *tmp; 1239 struct bo_vm_reservation_context ctx; 1240 struct ttm_validate_buffer *bo_list_entry; 1241 int ret; 1242 1243 mutex_lock(&mem->lock); 1244 1245 if (mem->mapped_to_gpu_memory > 0) { 1246 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", 1247 mem->va, bo_size); 1248 mutex_unlock(&mem->lock); 1249 return -EBUSY; 1250 } 1251 1252 mutex_unlock(&mem->lock); 1253 /* lock is not needed after this, since mem is unused and will 1254 * be freed anyway 1255 */ 1256 1257 /* No more MMU notifiers */ 1258 amdgpu_mn_unregister(mem->bo); 1259 1260 /* Make sure restore workers don't access the BO any more */ 1261 bo_list_entry = &mem->validate_list; 1262 mutex_lock(&process_info->lock); 1263 list_del(&bo_list_entry->head); 1264 mutex_unlock(&process_info->lock); 1265 1266 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1267 if (unlikely(ret)) 1268 return ret; 1269 1270 /* The eviction fence should be removed by the last unmap. 1271 * TODO: Log an error condition if the bo still has the eviction fence 1272 * attached 1273 */ 1274 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1275 process_info->eviction_fence); 1276 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, 1277 mem->va + bo_size * (1 + mem->aql_queue)); 1278 1279 /* Remove from VM internal data structures */ 1280 list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) 1281 remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, 1282 entry, bo_size); 1283 1284 ret = unreserve_bo_and_vms(&ctx, false, false); 1285 1286 /* Free the sync object */ 1287 amdgpu_sync_free(&mem->sync); 1288 1289 /* If the SG is not NULL, it's one we created for a doorbell or mmio 1290 * remap BO. We need to free it. 1291 */ 1292 if (mem->bo->tbo.sg) { 1293 sg_free_table(mem->bo->tbo.sg); 1294 kfree(mem->bo->tbo.sg); 1295 } 1296 1297 /* Free the BO*/ 1298 amdgpu_bo_unref(&mem->bo); 1299 mutex_destroy(&mem->lock); 1300 kfree(mem); 1301 1302 return ret; 1303 } 1304 1305 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1306 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1307 { 1308 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1309 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1310 int ret; 1311 struct amdgpu_bo *bo; 1312 uint32_t domain; 1313 struct kfd_bo_va_list *entry; 1314 struct bo_vm_reservation_context ctx; 1315 struct kfd_bo_va_list *bo_va_entry = NULL; 1316 struct kfd_bo_va_list *bo_va_entry_aql = NULL; 1317 unsigned long bo_size; 1318 bool is_invalid_userptr = false; 1319 1320 bo = mem->bo; 1321 if (!bo) { 1322 pr_err("Invalid BO when mapping memory to GPU\n"); 1323 return -EINVAL; 1324 } 1325 1326 /* Make sure restore is not running concurrently. Since we 1327 * don't map invalid userptr BOs, we rely on the next restore 1328 * worker to do the mapping 1329 */ 1330 mutex_lock(&mem->process_info->lock); 1331 1332 /* Lock mmap-sem. If we find an invalid userptr BO, we can be 1333 * sure that the MMU notifier is no longer running 1334 * concurrently and the queues are actually stopped 1335 */ 1336 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1337 down_write(¤t->mm->mmap_sem); 1338 is_invalid_userptr = atomic_read(&mem->invalid); 1339 up_write(¤t->mm->mmap_sem); 1340 } 1341 1342 mutex_lock(&mem->lock); 1343 1344 domain = mem->domain; 1345 bo_size = bo->tbo.mem.size; 1346 1347 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", 1348 mem->va, 1349 mem->va + bo_size * (1 + mem->aql_queue), 1350 vm, domain_string(domain)); 1351 1352 ret = reserve_bo_and_vm(mem, vm, &ctx); 1353 if (unlikely(ret)) 1354 goto out; 1355 1356 /* Userptr can be marked as "not invalid", but not actually be 1357 * validated yet (still in the system domain). In that case 1358 * the queues are still stopped and we can leave mapping for 1359 * the next restore worker 1360 */ 1361 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && 1362 bo->tbo.mem.mem_type == TTM_PL_SYSTEM) 1363 is_invalid_userptr = true; 1364 1365 if (check_if_add_bo_to_vm(avm, mem)) { 1366 ret = add_bo_to_vm(adev, mem, avm, false, 1367 &bo_va_entry); 1368 if (ret) 1369 goto add_bo_to_vm_failed; 1370 if (mem->aql_queue) { 1371 ret = add_bo_to_vm(adev, mem, avm, 1372 true, &bo_va_entry_aql); 1373 if (ret) 1374 goto add_bo_to_vm_failed_aql; 1375 } 1376 } else { 1377 ret = vm_validate_pt_pd_bos(avm); 1378 if (unlikely(ret)) 1379 goto add_bo_to_vm_failed; 1380 } 1381 1382 if (mem->mapped_to_gpu_memory == 0 && 1383 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1384 /* Validate BO only once. The eviction fence gets added to BO 1385 * the first time it is mapped. Validate will wait for all 1386 * background evictions to complete. 1387 */ 1388 ret = amdgpu_amdkfd_bo_validate(bo, domain, true); 1389 if (ret) { 1390 pr_debug("Validate failed\n"); 1391 goto map_bo_to_gpuvm_failed; 1392 } 1393 } 1394 1395 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1396 if (entry->bo_va->base.vm == vm && !entry->is_mapped) { 1397 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", 1398 entry->va, entry->va + bo_size, 1399 entry); 1400 1401 ret = map_bo_to_gpuvm(adev, entry, ctx.sync, 1402 is_invalid_userptr); 1403 if (ret) { 1404 pr_err("Failed to map bo to gpuvm\n"); 1405 goto map_bo_to_gpuvm_failed; 1406 } 1407 1408 ret = vm_update_pds(vm, ctx.sync); 1409 if (ret) { 1410 pr_err("Failed to update page directories\n"); 1411 goto map_bo_to_gpuvm_failed; 1412 } 1413 1414 entry->is_mapped = true; 1415 mem->mapped_to_gpu_memory++; 1416 pr_debug("\t INC mapping count %d\n", 1417 mem->mapped_to_gpu_memory); 1418 } 1419 } 1420 1421 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) 1422 amdgpu_bo_fence(bo, 1423 &avm->process_info->eviction_fence->base, 1424 true); 1425 ret = unreserve_bo_and_vms(&ctx, false, false); 1426 1427 goto out; 1428 1429 map_bo_to_gpuvm_failed: 1430 if (bo_va_entry_aql) 1431 remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); 1432 add_bo_to_vm_failed_aql: 1433 if (bo_va_entry) 1434 remove_bo_from_vm(adev, bo_va_entry, bo_size); 1435 add_bo_to_vm_failed: 1436 unreserve_bo_and_vms(&ctx, false, false); 1437 out: 1438 mutex_unlock(&mem->process_info->lock); 1439 mutex_unlock(&mem->lock); 1440 return ret; 1441 } 1442 1443 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1444 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1445 { 1446 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1447 struct amdkfd_process_info *process_info = 1448 ((struct amdgpu_vm *)vm)->process_info; 1449 unsigned long bo_size = mem->bo->tbo.mem.size; 1450 struct kfd_bo_va_list *entry; 1451 struct bo_vm_reservation_context ctx; 1452 int ret; 1453 1454 mutex_lock(&mem->lock); 1455 1456 ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); 1457 if (unlikely(ret)) 1458 goto out; 1459 /* If no VMs were reserved, it means the BO wasn't actually mapped */ 1460 if (ctx.n_vms == 0) { 1461 ret = -EINVAL; 1462 goto unreserve_out; 1463 } 1464 1465 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); 1466 if (unlikely(ret)) 1467 goto unreserve_out; 1468 1469 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", 1470 mem->va, 1471 mem->va + bo_size * (1 + mem->aql_queue), 1472 vm); 1473 1474 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1475 if (entry->bo_va->base.vm == vm && entry->is_mapped) { 1476 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", 1477 entry->va, 1478 entry->va + bo_size, 1479 entry); 1480 1481 ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); 1482 if (ret == 0) { 1483 entry->is_mapped = false; 1484 } else { 1485 pr_err("failed to unmap VA 0x%llx\n", 1486 mem->va); 1487 goto unreserve_out; 1488 } 1489 1490 mem->mapped_to_gpu_memory--; 1491 pr_debug("\t DEC mapping count %d\n", 1492 mem->mapped_to_gpu_memory); 1493 } 1494 } 1495 1496 /* If BO is unmapped from all VMs, unfence it. It can be evicted if 1497 * required. 1498 */ 1499 if (mem->mapped_to_gpu_memory == 0 && 1500 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) 1501 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1502 process_info->eviction_fence); 1503 1504 unreserve_out: 1505 unreserve_bo_and_vms(&ctx, false, false); 1506 out: 1507 mutex_unlock(&mem->lock); 1508 return ret; 1509 } 1510 1511 int amdgpu_amdkfd_gpuvm_sync_memory( 1512 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) 1513 { 1514 struct amdgpu_sync sync; 1515 int ret; 1516 1517 amdgpu_sync_create(&sync); 1518 1519 mutex_lock(&mem->lock); 1520 amdgpu_sync_clone(&mem->sync, &sync); 1521 mutex_unlock(&mem->lock); 1522 1523 ret = amdgpu_sync_wait(&sync, intr); 1524 amdgpu_sync_free(&sync); 1525 return ret; 1526 } 1527 1528 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, 1529 struct kgd_mem *mem, void **kptr, uint64_t *size) 1530 { 1531 int ret; 1532 struct amdgpu_bo *bo = mem->bo; 1533 1534 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1535 pr_err("userptr can't be mapped to kernel\n"); 1536 return -EINVAL; 1537 } 1538 1539 /* delete kgd_mem from kfd_bo_list to avoid re-validating 1540 * this BO in BO's restoring after eviction. 1541 */ 1542 mutex_lock(&mem->process_info->lock); 1543 1544 ret = amdgpu_bo_reserve(bo, true); 1545 if (ret) { 1546 pr_err("Failed to reserve bo. ret %d\n", ret); 1547 goto bo_reserve_failed; 1548 } 1549 1550 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 1551 if (ret) { 1552 pr_err("Failed to pin bo. ret %d\n", ret); 1553 goto pin_failed; 1554 } 1555 1556 ret = amdgpu_bo_kmap(bo, kptr); 1557 if (ret) { 1558 pr_err("Failed to map bo to kernel. ret %d\n", ret); 1559 goto kmap_failed; 1560 } 1561 1562 amdgpu_amdkfd_remove_eviction_fence( 1563 bo, mem->process_info->eviction_fence); 1564 list_del_init(&mem->validate_list.head); 1565 1566 if (size) 1567 *size = amdgpu_bo_size(bo); 1568 1569 amdgpu_bo_unreserve(bo); 1570 1571 mutex_unlock(&mem->process_info->lock); 1572 return 0; 1573 1574 kmap_failed: 1575 amdgpu_bo_unpin(bo); 1576 pin_failed: 1577 amdgpu_bo_unreserve(bo); 1578 bo_reserve_failed: 1579 mutex_unlock(&mem->process_info->lock); 1580 1581 return ret; 1582 } 1583 1584 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, 1585 struct kfd_vm_fault_info *mem) 1586 { 1587 struct amdgpu_device *adev; 1588 1589 adev = (struct amdgpu_device *)kgd; 1590 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { 1591 *mem = *adev->gmc.vm_fault_info; 1592 mb(); 1593 atomic_set(&adev->gmc.vm_fault_info_updated, 0); 1594 } 1595 return 0; 1596 } 1597 1598 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, 1599 struct dma_buf *dma_buf, 1600 uint64_t va, void *vm, 1601 struct kgd_mem **mem, uint64_t *size, 1602 uint64_t *mmap_offset) 1603 { 1604 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 1605 struct drm_gem_object *obj; 1606 struct amdgpu_bo *bo; 1607 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1608 1609 if (dma_buf->ops != &amdgpu_dmabuf_ops) 1610 /* Can't handle non-graphics buffers */ 1611 return -EINVAL; 1612 1613 obj = dma_buf->priv; 1614 if (obj->dev->dev_private != adev) 1615 /* Can't handle buffers from other devices */ 1616 return -EINVAL; 1617 1618 bo = gem_to_amdgpu_bo(obj); 1619 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 1620 AMDGPU_GEM_DOMAIN_GTT))) 1621 /* Only VRAM and GTT BOs are supported */ 1622 return -EINVAL; 1623 1624 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1625 if (!*mem) 1626 return -ENOMEM; 1627 1628 if (size) 1629 *size = amdgpu_bo_size(bo); 1630 1631 if (mmap_offset) 1632 *mmap_offset = amdgpu_bo_mmap_offset(bo); 1633 1634 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1635 mutex_init(&(*mem)->lock); 1636 (*mem)->alloc_flags = 1637 ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1638 ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | 1639 ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; 1640 1641 (*mem)->bo = amdgpu_bo_ref(bo); 1642 (*mem)->va = va; 1643 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1644 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; 1645 (*mem)->mapped_to_gpu_memory = 0; 1646 (*mem)->process_info = avm->process_info; 1647 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); 1648 amdgpu_sync_create(&(*mem)->sync); 1649 1650 return 0; 1651 } 1652 1653 /* Evict a userptr BO by stopping the queues if necessary 1654 * 1655 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it 1656 * cannot do any memory allocations, and cannot take any locks that 1657 * are held elsewhere while allocating memory. Therefore this is as 1658 * simple as possible, using atomic counters. 1659 * 1660 * It doesn't do anything to the BO itself. The real work happens in 1661 * restore, where we get updated page addresses. This function only 1662 * ensures that GPU access to the BO is stopped. 1663 */ 1664 int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 1665 struct mm_struct *mm) 1666 { 1667 struct amdkfd_process_info *process_info = mem->process_info; 1668 int invalid, evicted_bos; 1669 int r = 0; 1670 1671 invalid = atomic_inc_return(&mem->invalid); 1672 evicted_bos = atomic_inc_return(&process_info->evicted_bos); 1673 if (evicted_bos == 1) { 1674 /* First eviction, stop the queues */ 1675 r = kgd2kfd_quiesce_mm(mm); 1676 if (r) 1677 pr_err("Failed to quiesce KFD\n"); 1678 schedule_delayed_work(&process_info->restore_userptr_work, 1679 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 1680 } 1681 1682 return r; 1683 } 1684 1685 /* Update invalid userptr BOs 1686 * 1687 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to 1688 * userptr_inval_list and updates user pages for all BOs that have 1689 * been invalidated since their last update. 1690 */ 1691 static int update_invalid_user_pages(struct amdkfd_process_info *process_info, 1692 struct mm_struct *mm) 1693 { 1694 struct kgd_mem *mem, *tmp_mem; 1695 struct amdgpu_bo *bo; 1696 struct ttm_operation_ctx ctx = { false, false }; 1697 int invalid, ret; 1698 1699 /* Move all invalidated BOs to the userptr_inval_list and 1700 * release their user pages by migration to the CPU domain 1701 */ 1702 list_for_each_entry_safe(mem, tmp_mem, 1703 &process_info->userptr_valid_list, 1704 validate_list.head) { 1705 if (!atomic_read(&mem->invalid)) 1706 continue; /* BO is still valid */ 1707 1708 bo = mem->bo; 1709 1710 if (amdgpu_bo_reserve(bo, true)) 1711 return -EAGAIN; 1712 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 1713 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1714 amdgpu_bo_unreserve(bo); 1715 if (ret) { 1716 pr_err("%s: Failed to invalidate userptr BO\n", 1717 __func__); 1718 return -EAGAIN; 1719 } 1720 1721 list_move_tail(&mem->validate_list.head, 1722 &process_info->userptr_inval_list); 1723 } 1724 1725 if (list_empty(&process_info->userptr_inval_list)) 1726 return 0; /* All evicted userptr BOs were freed */ 1727 1728 /* Go through userptr_inval_list and update any invalid user_pages */ 1729 list_for_each_entry(mem, &process_info->userptr_inval_list, 1730 validate_list.head) { 1731 invalid = atomic_read(&mem->invalid); 1732 if (!invalid) 1733 /* BO hasn't been invalidated since the last 1734 * revalidation attempt. Keep its BO list. 1735 */ 1736 continue; 1737 1738 bo = mem->bo; 1739 1740 /* Get updated user pages */ 1741 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); 1742 if (ret) { 1743 pr_debug("%s: Failed to get user pages: %d\n", 1744 __func__, ret); 1745 1746 /* Return error -EBUSY or -ENOMEM, retry restore */ 1747 return ret; 1748 } 1749 1750 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1751 1752 /* Mark the BO as valid unless it was invalidated 1753 * again concurrently. 1754 */ 1755 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 1756 return -EAGAIN; 1757 } 1758 1759 return 0; 1760 } 1761 1762 /* Validate invalid userptr BOs 1763 * 1764 * Validates BOs on the userptr_inval_list, and moves them back to the 1765 * userptr_valid_list. Also updates GPUVM page tables with new page 1766 * addresses and waits for the page table updates to complete. 1767 */ 1768 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) 1769 { 1770 struct amdgpu_bo_list_entry *pd_bo_list_entries; 1771 struct list_head resv_list, duplicates; 1772 struct ww_acquire_ctx ticket; 1773 struct amdgpu_sync sync; 1774 1775 struct amdgpu_vm *peer_vm; 1776 struct kgd_mem *mem, *tmp_mem; 1777 struct amdgpu_bo *bo; 1778 struct ttm_operation_ctx ctx = { false, false }; 1779 int i, ret; 1780 1781 pd_bo_list_entries = kcalloc(process_info->n_vms, 1782 sizeof(struct amdgpu_bo_list_entry), 1783 GFP_KERNEL); 1784 if (!pd_bo_list_entries) { 1785 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1786 ret = -ENOMEM; 1787 goto out_no_mem; 1788 } 1789 1790 INIT_LIST_HEAD(&resv_list); 1791 INIT_LIST_HEAD(&duplicates); 1792 1793 /* Get all the page directory BOs that need to be reserved */ 1794 i = 0; 1795 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1796 vm_list_node) 1797 amdgpu_vm_get_pd_bo(peer_vm, &resv_list, 1798 &pd_bo_list_entries[i++]); 1799 /* Add the userptr_inval_list entries to resv_list */ 1800 list_for_each_entry(mem, &process_info->userptr_inval_list, 1801 validate_list.head) { 1802 list_add_tail(&mem->resv_list.head, &resv_list); 1803 mem->resv_list.bo = mem->validate_list.bo; 1804 mem->resv_list.num_shared = mem->validate_list.num_shared; 1805 } 1806 1807 /* Reserve all BOs and page tables for validation */ 1808 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates, 1809 true); 1810 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1811 if (ret) 1812 goto out_free; 1813 1814 amdgpu_sync_create(&sync); 1815 1816 ret = process_validate_vms(process_info); 1817 if (ret) 1818 goto unreserve_out; 1819 1820 /* Validate BOs and update GPUVM page tables */ 1821 list_for_each_entry_safe(mem, tmp_mem, 1822 &process_info->userptr_inval_list, 1823 validate_list.head) { 1824 struct kfd_bo_va_list *bo_va_entry; 1825 1826 bo = mem->bo; 1827 1828 /* Validate the BO if we got user pages */ 1829 if (bo->tbo.ttm->pages[0]) { 1830 amdgpu_bo_placement_from_domain(bo, mem->domain); 1831 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1832 if (ret) { 1833 pr_err("%s: failed to validate BO\n", __func__); 1834 goto unreserve_out; 1835 } 1836 } 1837 1838 list_move_tail(&mem->validate_list.head, 1839 &process_info->userptr_valid_list); 1840 1841 /* Update mapping. If the BO was not validated 1842 * (because we couldn't get user pages), this will 1843 * clear the page table entries, which will result in 1844 * VM faults if the GPU tries to access the invalid 1845 * memory. 1846 */ 1847 list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { 1848 if (!bo_va_entry->is_mapped) 1849 continue; 1850 1851 ret = update_gpuvm_pte((struct amdgpu_device *) 1852 bo_va_entry->kgd_dev, 1853 bo_va_entry, &sync); 1854 if (ret) { 1855 pr_err("%s: update PTE failed\n", __func__); 1856 /* make sure this gets validated again */ 1857 atomic_inc(&mem->invalid); 1858 goto unreserve_out; 1859 } 1860 } 1861 } 1862 1863 /* Update page directories */ 1864 ret = process_update_pds(process_info, &sync); 1865 1866 unreserve_out: 1867 ttm_eu_backoff_reservation(&ticket, &resv_list); 1868 amdgpu_sync_wait(&sync, false); 1869 amdgpu_sync_free(&sync); 1870 out_free: 1871 kfree(pd_bo_list_entries); 1872 out_no_mem: 1873 1874 return ret; 1875 } 1876 1877 /* Worker callback to restore evicted userptr BOs 1878 * 1879 * Tries to update and validate all userptr BOs. If successful and no 1880 * concurrent evictions happened, the queues are restarted. Otherwise, 1881 * reschedule for another attempt later. 1882 */ 1883 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) 1884 { 1885 struct delayed_work *dwork = to_delayed_work(work); 1886 struct amdkfd_process_info *process_info = 1887 container_of(dwork, struct amdkfd_process_info, 1888 restore_userptr_work); 1889 struct task_struct *usertask; 1890 struct mm_struct *mm; 1891 int evicted_bos; 1892 1893 evicted_bos = atomic_read(&process_info->evicted_bos); 1894 if (!evicted_bos) 1895 return; 1896 1897 /* Reference task and mm in case of concurrent process termination */ 1898 usertask = get_pid_task(process_info->pid, PIDTYPE_PID); 1899 if (!usertask) 1900 return; 1901 mm = get_task_mm(usertask); 1902 if (!mm) { 1903 put_task_struct(usertask); 1904 return; 1905 } 1906 1907 mutex_lock(&process_info->lock); 1908 1909 if (update_invalid_user_pages(process_info, mm)) 1910 goto unlock_out; 1911 /* userptr_inval_list can be empty if all evicted userptr BOs 1912 * have been freed. In that case there is nothing to validate 1913 * and we can just restart the queues. 1914 */ 1915 if (!list_empty(&process_info->userptr_inval_list)) { 1916 if (atomic_read(&process_info->evicted_bos) != evicted_bos) 1917 goto unlock_out; /* Concurrent eviction, try again */ 1918 1919 if (validate_invalid_user_pages(process_info)) 1920 goto unlock_out; 1921 } 1922 /* Final check for concurrent evicton and atomic update. If 1923 * another eviction happens after successful update, it will 1924 * be a first eviction that calls quiesce_mm. The eviction 1925 * reference counting inside KFD will handle this case. 1926 */ 1927 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != 1928 evicted_bos) 1929 goto unlock_out; 1930 evicted_bos = 0; 1931 if (kgd2kfd_resume_mm(mm)) { 1932 pr_err("%s: Failed to resume KFD\n", __func__); 1933 /* No recovery from this failure. Probably the CP is 1934 * hanging. No point trying again. 1935 */ 1936 } 1937 1938 unlock_out: 1939 mutex_unlock(&process_info->lock); 1940 mmput(mm); 1941 put_task_struct(usertask); 1942 1943 /* If validation failed, reschedule another attempt */ 1944 if (evicted_bos) 1945 schedule_delayed_work(&process_info->restore_userptr_work, 1946 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 1947 } 1948 1949 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given 1950 * KFD process identified by process_info 1951 * 1952 * @process_info: amdkfd_process_info of the KFD process 1953 * 1954 * After memory eviction, restore thread calls this function. The function 1955 * should be called when the Process is still valid. BO restore involves - 1956 * 1957 * 1. Release old eviction fence and create new one 1958 * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. 1959 * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of 1960 * BOs that need to be reserved. 1961 * 4. Reserve all the BOs 1962 * 5. Validate of PD and PT BOs. 1963 * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence 1964 * 7. Add fence to all PD and PT BOs. 1965 * 8. Unreserve all BOs 1966 */ 1967 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) 1968 { 1969 struct amdgpu_bo_list_entry *pd_bo_list; 1970 struct amdkfd_process_info *process_info = info; 1971 struct amdgpu_vm *peer_vm; 1972 struct kgd_mem *mem; 1973 struct bo_vm_reservation_context ctx; 1974 struct amdgpu_amdkfd_fence *new_fence; 1975 int ret = 0, i; 1976 struct list_head duplicate_save; 1977 struct amdgpu_sync sync_obj; 1978 1979 INIT_LIST_HEAD(&duplicate_save); 1980 INIT_LIST_HEAD(&ctx.list); 1981 INIT_LIST_HEAD(&ctx.duplicates); 1982 1983 pd_bo_list = kcalloc(process_info->n_vms, 1984 sizeof(struct amdgpu_bo_list_entry), 1985 GFP_KERNEL); 1986 if (!pd_bo_list) 1987 return -ENOMEM; 1988 1989 i = 0; 1990 mutex_lock(&process_info->lock); 1991 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1992 vm_list_node) 1993 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); 1994 1995 /* Reserve all BOs and page tables/directory. Add all BOs from 1996 * kfd_bo_list to ctx.list 1997 */ 1998 list_for_each_entry(mem, &process_info->kfd_bo_list, 1999 validate_list.head) { 2000 2001 list_add_tail(&mem->resv_list.head, &ctx.list); 2002 mem->resv_list.bo = mem->validate_list.bo; 2003 mem->resv_list.num_shared = mem->validate_list.num_shared; 2004 } 2005 2006 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, 2007 false, &duplicate_save, true); 2008 if (ret) { 2009 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); 2010 goto ttm_reserve_fail; 2011 } 2012 2013 amdgpu_sync_create(&sync_obj); 2014 2015 /* Validate PDs and PTs */ 2016 ret = process_validate_vms(process_info); 2017 if (ret) 2018 goto validate_map_fail; 2019 2020 ret = process_sync_pds_resv(process_info, &sync_obj); 2021 if (ret) { 2022 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); 2023 goto validate_map_fail; 2024 } 2025 2026 /* Validate BOs and map them to GPUVM (update VM page tables). */ 2027 list_for_each_entry(mem, &process_info->kfd_bo_list, 2028 validate_list.head) { 2029 2030 struct amdgpu_bo *bo = mem->bo; 2031 uint32_t domain = mem->domain; 2032 struct kfd_bo_va_list *bo_va_entry; 2033 2034 ret = amdgpu_amdkfd_bo_validate(bo, domain, false); 2035 if (ret) { 2036 pr_debug("Memory eviction: Validate BOs failed. Try again\n"); 2037 goto validate_map_fail; 2038 } 2039 ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); 2040 if (ret) { 2041 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2042 goto validate_map_fail; 2043 } 2044 list_for_each_entry(bo_va_entry, &mem->bo_va_list, 2045 bo_list) { 2046 ret = update_gpuvm_pte((struct amdgpu_device *) 2047 bo_va_entry->kgd_dev, 2048 bo_va_entry, 2049 &sync_obj); 2050 if (ret) { 2051 pr_debug("Memory eviction: update PTE failed. Try again\n"); 2052 goto validate_map_fail; 2053 } 2054 } 2055 } 2056 2057 /* Update page directories */ 2058 ret = process_update_pds(process_info, &sync_obj); 2059 if (ret) { 2060 pr_debug("Memory eviction: update PDs failed. Try again\n"); 2061 goto validate_map_fail; 2062 } 2063 2064 /* Wait for validate and PT updates to finish */ 2065 amdgpu_sync_wait(&sync_obj, false); 2066 2067 /* Release old eviction fence and create new one, because fence only 2068 * goes from unsignaled to signaled, fence cannot be reused. 2069 * Use context and mm from the old fence. 2070 */ 2071 new_fence = amdgpu_amdkfd_fence_create( 2072 process_info->eviction_fence->base.context, 2073 process_info->eviction_fence->mm); 2074 if (!new_fence) { 2075 pr_err("Failed to create eviction fence\n"); 2076 ret = -ENOMEM; 2077 goto validate_map_fail; 2078 } 2079 dma_fence_put(&process_info->eviction_fence->base); 2080 process_info->eviction_fence = new_fence; 2081 *ef = dma_fence_get(&new_fence->base); 2082 2083 /* Attach new eviction fence to all BOs */ 2084 list_for_each_entry(mem, &process_info->kfd_bo_list, 2085 validate_list.head) 2086 amdgpu_bo_fence(mem->bo, 2087 &process_info->eviction_fence->base, true); 2088 2089 /* Attach eviction fence to PD / PT BOs */ 2090 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2091 vm_list_node) { 2092 struct amdgpu_bo *bo = peer_vm->root.base.bo; 2093 2094 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); 2095 } 2096 2097 validate_map_fail: 2098 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); 2099 amdgpu_sync_free(&sync_obj); 2100 ttm_reserve_fail: 2101 mutex_unlock(&process_info->lock); 2102 kfree(pd_bo_list); 2103 return ret; 2104 } 2105 2106 int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) 2107 { 2108 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2109 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; 2110 int ret; 2111 2112 if (!info || !gws) 2113 return -EINVAL; 2114 2115 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 2116 if (!*mem) 2117 return -ENOMEM; 2118 2119 mutex_init(&(*mem)->lock); 2120 (*mem)->bo = amdgpu_bo_ref(gws_bo); 2121 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; 2122 (*mem)->process_info = process_info; 2123 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); 2124 amdgpu_sync_create(&(*mem)->sync); 2125 2126 2127 /* Validate gws bo the first time it is added to process */ 2128 mutex_lock(&(*mem)->process_info->lock); 2129 ret = amdgpu_bo_reserve(gws_bo, false); 2130 if (unlikely(ret)) { 2131 pr_err("Reserve gws bo failed %d\n", ret); 2132 goto bo_reservation_failure; 2133 } 2134 2135 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); 2136 if (ret) { 2137 pr_err("GWS BO validate failed %d\n", ret); 2138 goto bo_validation_failure; 2139 } 2140 /* GWS resource is shared b/t amdgpu and amdkfd 2141 * Add process eviction fence to bo so they can 2142 * evict each other. 2143 */ 2144 ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); 2145 if (ret) 2146 goto reserve_shared_fail; 2147 amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); 2148 amdgpu_bo_unreserve(gws_bo); 2149 mutex_unlock(&(*mem)->process_info->lock); 2150 2151 return ret; 2152 2153 reserve_shared_fail: 2154 bo_validation_failure: 2155 amdgpu_bo_unreserve(gws_bo); 2156 bo_reservation_failure: 2157 mutex_unlock(&(*mem)->process_info->lock); 2158 amdgpu_sync_free(&(*mem)->sync); 2159 remove_kgd_mem_from_kfd_bo_list(*mem, process_info); 2160 amdgpu_bo_unref(&gws_bo); 2161 mutex_destroy(&(*mem)->lock); 2162 kfree(*mem); 2163 *mem = NULL; 2164 return ret; 2165 } 2166 2167 int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) 2168 { 2169 int ret; 2170 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2171 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; 2172 struct amdgpu_bo *gws_bo = kgd_mem->bo; 2173 2174 /* Remove BO from process's validate list so restore worker won't touch 2175 * it anymore 2176 */ 2177 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); 2178 2179 ret = amdgpu_bo_reserve(gws_bo, false); 2180 if (unlikely(ret)) { 2181 pr_err("Reserve gws bo failed %d\n", ret); 2182 //TODO add BO back to validate_list? 2183 return ret; 2184 } 2185 amdgpu_amdkfd_remove_eviction_fence(gws_bo, 2186 process_info->eviction_fence); 2187 amdgpu_bo_unreserve(gws_bo); 2188 amdgpu_sync_free(&kgd_mem->sync); 2189 amdgpu_bo_unref(&gws_bo); 2190 mutex_destroy(&kgd_mem->lock); 2191 kfree(mem); 2192 return 0; 2193 } 2194