1 /* 2 * Copyright 2014-2018 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include <linux/dma-buf.h> 23 #include <linux/list.h> 24 #include <linux/pagemap.h> 25 #include <linux/sched/mm.h> 26 #include <linux/sched/task.h> 27 28 #include "amdgpu_object.h" 29 #include "amdgpu_gem.h" 30 #include "amdgpu_vm.h" 31 #include "amdgpu_amdkfd.h" 32 #include "amdgpu_dma_buf.h" 33 #include <uapi/linux/kfd_ioctl.h> 34 #include "amdgpu_xgmi.h" 35 36 /* BO flag to indicate a KFD userptr BO */ 37 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) 38 39 /* Userptr restore delay, just long enough to allow consecutive VM 40 * changes to accumulate 41 */ 42 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 43 44 /* Impose limit on how much memory KFD can use */ 45 static struct { 46 uint64_t max_system_mem_limit; 47 uint64_t max_ttm_mem_limit; 48 int64_t system_mem_used; 49 int64_t ttm_mem_used; 50 spinlock_t mem_limit_lock; 51 } kfd_mem_limit; 52 53 /* Struct used for amdgpu_amdkfd_bo_validate */ 54 struct amdgpu_vm_parser { 55 uint32_t domain; 56 bool wait; 57 }; 58 59 static const char * const domain_bit_to_string[] = { 60 "CPU", 61 "GTT", 62 "VRAM", 63 "GDS", 64 "GWS", 65 "OA" 66 }; 67 68 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] 69 70 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); 71 72 73 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 74 { 75 return (struct amdgpu_device *)kgd; 76 } 77 78 static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, 79 struct kgd_mem *mem) 80 { 81 struct kfd_bo_va_list *entry; 82 83 list_for_each_entry(entry, &mem->bo_va_list, bo_list) 84 if (entry->bo_va->base.vm == avm) 85 return false; 86 87 return true; 88 } 89 90 /* Set memory usage limits. Current, limits are 91 * System (TTM + userptr) memory - 15/16th System RAM 92 * TTM memory - 3/8th System RAM 93 */ 94 void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 95 { 96 struct sysinfo si; 97 uint64_t mem; 98 99 si_meminfo(&si); 100 mem = si.freeram - si.freehigh; 101 mem *= si.mem_unit; 102 103 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 104 kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); 105 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); 106 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", 107 (kfd_mem_limit.max_system_mem_limit >> 20), 108 (kfd_mem_limit.max_ttm_mem_limit >> 20)); 109 } 110 111 /* Estimate page table size needed to represent a given memory size 112 * 113 * With 4KB pages, we need one 8 byte PTE for each 4KB of memory 114 * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB 115 * of memory (factor 256K, >> 18). ROCm user mode tries to optimize 116 * for 2MB pages for TLB efficiency. However, small allocations and 117 * fragmented system memory still need some 4KB pages. We choose a 118 * compromise that should work in most cases without reserving too 119 * much memory for page tables unnecessarily (factor 16K, >> 14). 120 */ 121 #define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) 122 123 static size_t amdgpu_amdkfd_acc_size(uint64_t size) 124 { 125 size >>= PAGE_SHIFT; 126 size *= sizeof(dma_addr_t) + sizeof(void *); 127 128 return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + 129 __roundup_pow_of_two(sizeof(struct ttm_tt)) + 130 PAGE_ALIGN(size); 131 } 132 133 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 134 uint64_t size, u32 domain, bool sg) 135 { 136 uint64_t reserved_for_pt = 137 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); 138 size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; 139 int ret = 0; 140 141 acc_size = amdgpu_amdkfd_acc_size(size); 142 143 vram_needed = 0; 144 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 145 /* TTM GTT memory */ 146 system_mem_needed = acc_size + size; 147 ttm_mem_needed = acc_size + size; 148 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 149 /* Userptr */ 150 system_mem_needed = acc_size + size; 151 ttm_mem_needed = acc_size; 152 } else { 153 /* VRAM and SG */ 154 system_mem_needed = acc_size; 155 ttm_mem_needed = acc_size; 156 if (domain == AMDGPU_GEM_DOMAIN_VRAM) 157 vram_needed = size; 158 } 159 160 spin_lock(&kfd_mem_limit.mem_limit_lock); 161 162 if (kfd_mem_limit.system_mem_used + system_mem_needed > 163 kfd_mem_limit.max_system_mem_limit) 164 pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); 165 166 if ((kfd_mem_limit.system_mem_used + system_mem_needed > 167 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || 168 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > 169 kfd_mem_limit.max_ttm_mem_limit) || 170 (adev->kfd.vram_used + vram_needed > 171 adev->gmc.real_vram_size - reserved_for_pt)) { 172 ret = -ENOMEM; 173 } else { 174 kfd_mem_limit.system_mem_used += system_mem_needed; 175 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 176 adev->kfd.vram_used += vram_needed; 177 } 178 179 spin_unlock(&kfd_mem_limit.mem_limit_lock); 180 return ret; 181 } 182 183 static void unreserve_mem_limit(struct amdgpu_device *adev, 184 uint64_t size, u32 domain, bool sg) 185 { 186 size_t acc_size; 187 188 acc_size = amdgpu_amdkfd_acc_size(size); 189 190 spin_lock(&kfd_mem_limit.mem_limit_lock); 191 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 192 kfd_mem_limit.system_mem_used -= (acc_size + size); 193 kfd_mem_limit.ttm_mem_used -= (acc_size + size); 194 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { 195 kfd_mem_limit.system_mem_used -= (acc_size + size); 196 kfd_mem_limit.ttm_mem_used -= acc_size; 197 } else { 198 kfd_mem_limit.system_mem_used -= acc_size; 199 kfd_mem_limit.ttm_mem_used -= acc_size; 200 if (domain == AMDGPU_GEM_DOMAIN_VRAM) { 201 adev->kfd.vram_used -= size; 202 WARN_ONCE(adev->kfd.vram_used < 0, 203 "kfd VRAM memory accounting unbalanced"); 204 } 205 } 206 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 207 "kfd system memory accounting unbalanced"); 208 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, 209 "kfd TTM memory accounting unbalanced"); 210 211 spin_unlock(&kfd_mem_limit.mem_limit_lock); 212 } 213 214 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) 215 { 216 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 217 u32 domain = bo->preferred_domains; 218 bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); 219 220 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { 221 domain = AMDGPU_GEM_DOMAIN_CPU; 222 sg = false; 223 } 224 225 unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); 226 } 227 228 229 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's 230 * reservation object. 231 * 232 * @bo: [IN] Remove eviction fence(s) from this BO 233 * @ef: [IN] This eviction fence is removed if it 234 * is present in the shared list. 235 * 236 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. 237 */ 238 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, 239 struct amdgpu_amdkfd_fence *ef) 240 { 241 struct dma_resv *resv = bo->tbo.base.resv; 242 struct dma_resv_list *old, *new; 243 unsigned int i, j, k; 244 245 if (!ef) 246 return -EINVAL; 247 248 old = dma_resv_get_list(resv); 249 if (!old) 250 return 0; 251 252 new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL); 253 if (!new) 254 return -ENOMEM; 255 256 /* Go through all the shared fences in the resevation object and sort 257 * the interesting ones to the end of the list. 258 */ 259 for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { 260 struct dma_fence *f; 261 262 f = rcu_dereference_protected(old->shared[i], 263 dma_resv_held(resv)); 264 265 if (f->context == ef->base.context) 266 RCU_INIT_POINTER(new->shared[--j], f); 267 else 268 RCU_INIT_POINTER(new->shared[k++], f); 269 } 270 new->shared_max = old->shared_max; 271 new->shared_count = k; 272 273 /* Install the new fence list, seqcount provides the barriers */ 274 write_seqcount_begin(&resv->seq); 275 RCU_INIT_POINTER(resv->fence, new); 276 write_seqcount_end(&resv->seq); 277 278 /* Drop the references to the removed fences or move them to ef_list */ 279 for (i = j, k = 0; i < old->shared_count; ++i) { 280 struct dma_fence *f; 281 282 f = rcu_dereference_protected(new->shared[i], 283 dma_resv_held(resv)); 284 dma_fence_put(f); 285 } 286 kfree_rcu(old, rcu); 287 288 return 0; 289 } 290 291 int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) 292 { 293 struct amdgpu_bo *root = bo; 294 struct amdgpu_vm_bo_base *vm_bo; 295 struct amdgpu_vm *vm; 296 struct amdkfd_process_info *info; 297 struct amdgpu_amdkfd_fence *ef; 298 int ret; 299 300 /* we can always get vm_bo from root PD bo.*/ 301 while (root->parent) 302 root = root->parent; 303 304 vm_bo = root->vm_bo; 305 if (!vm_bo) 306 return 0; 307 308 vm = vm_bo->vm; 309 if (!vm) 310 return 0; 311 312 info = vm->process_info; 313 if (!info || !info->eviction_fence) 314 return 0; 315 316 ef = container_of(dma_fence_get(&info->eviction_fence->base), 317 struct amdgpu_amdkfd_fence, base); 318 319 BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); 320 ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); 321 dma_resv_unlock(bo->tbo.base.resv); 322 323 dma_fence_put(&ef->base); 324 return ret; 325 } 326 327 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, 328 bool wait) 329 { 330 struct ttm_operation_ctx ctx = { false, false }; 331 int ret; 332 333 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), 334 "Called with userptr BO")) 335 return -EINVAL; 336 337 amdgpu_bo_placement_from_domain(bo, domain); 338 339 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 340 if (ret) 341 goto validate_fail; 342 if (wait) 343 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); 344 345 validate_fail: 346 return ret; 347 } 348 349 static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) 350 { 351 struct amdgpu_vm_parser *p = param; 352 353 return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); 354 } 355 356 /* vm_validate_pt_pd_bos - Validate page table and directory BOs 357 * 358 * Page directories are not updated here because huge page handling 359 * during page table updates can invalidate page directory entries 360 * again. Page directories are only updated after updating page 361 * tables. 362 */ 363 static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) 364 { 365 struct amdgpu_bo *pd = vm->root.base.bo; 366 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 367 struct amdgpu_vm_parser param; 368 int ret; 369 370 param.domain = AMDGPU_GEM_DOMAIN_VRAM; 371 param.wait = false; 372 373 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, 374 ¶m); 375 if (ret) { 376 pr_err("failed to validate PT BOs\n"); 377 return ret; 378 } 379 380 ret = amdgpu_amdkfd_validate(¶m, pd); 381 if (ret) { 382 pr_err("failed to validate PD\n"); 383 return ret; 384 } 385 386 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); 387 388 if (vm->use_cpu_for_update) { 389 ret = amdgpu_bo_kmap(pd, NULL); 390 if (ret) { 391 pr_err("failed to kmap PD, ret=%d\n", ret); 392 return ret; 393 } 394 } 395 396 return 0; 397 } 398 399 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) 400 { 401 struct amdgpu_bo *pd = vm->root.base.bo; 402 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 403 int ret; 404 405 ret = amdgpu_vm_update_pdes(adev, vm, false); 406 if (ret) 407 return ret; 408 409 return amdgpu_sync_fence(sync, vm->last_update); 410 } 411 412 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) 413 { 414 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); 415 bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; 416 bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED; 417 uint32_t mapping_flags; 418 uint64_t pte_flags; 419 bool snoop = false; 420 421 mapping_flags = AMDGPU_VM_PAGE_READABLE; 422 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) 423 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; 424 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) 425 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; 426 427 switch (adev->asic_type) { 428 case CHIP_ARCTURUS: 429 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 430 if (bo_adev == adev) 431 mapping_flags |= coherent ? 432 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; 433 else 434 mapping_flags |= AMDGPU_VM_MTYPE_UC; 435 } else { 436 mapping_flags |= coherent ? 437 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 438 } 439 break; 440 case CHIP_ALDEBARAN: 441 if (coherent && uncached) { 442 if (adev->gmc.xgmi.connected_to_cpu || 443 !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) 444 snoop = true; 445 mapping_flags |= AMDGPU_VM_MTYPE_UC; 446 } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 447 if (bo_adev == adev) { 448 mapping_flags |= coherent ? 449 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; 450 if (adev->gmc.xgmi.connected_to_cpu) 451 snoop = true; 452 } else { 453 mapping_flags |= AMDGPU_VM_MTYPE_UC; 454 if (amdgpu_xgmi_same_hive(adev, bo_adev)) 455 snoop = true; 456 } 457 } else { 458 snoop = true; 459 mapping_flags |= coherent ? 460 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 461 } 462 break; 463 default: 464 mapping_flags |= coherent ? 465 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 466 } 467 468 pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); 469 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; 470 471 return pte_flags; 472 } 473 474 /* add_bo_to_vm - Add a BO to a VM 475 * 476 * Everything that needs to bo done only once when a BO is first added 477 * to a VM. It can later be mapped and unmapped many times without 478 * repeating these steps. 479 * 480 * 1. Allocate and initialize BO VA entry data structure 481 * 2. Add BO to the VM 482 * 3. Determine ASIC-specific PTE flags 483 * 4. Alloc page tables and directories if needed 484 * 4a. Validate new page tables and directories 485 */ 486 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, 487 struct amdgpu_vm *vm, bool is_aql, 488 struct kfd_bo_va_list **p_bo_va_entry) 489 { 490 int ret; 491 struct kfd_bo_va_list *bo_va_entry; 492 struct amdgpu_bo *bo = mem->bo; 493 uint64_t va = mem->va; 494 struct list_head *list_bo_va = &mem->bo_va_list; 495 unsigned long bo_size = bo->tbo.base.size; 496 497 if (!va) { 498 pr_err("Invalid VA when adding BO to VM\n"); 499 return -EINVAL; 500 } 501 502 if (is_aql) 503 va += bo_size; 504 505 bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); 506 if (!bo_va_entry) 507 return -ENOMEM; 508 509 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, 510 va + bo_size, vm); 511 512 /* Add BO to VM internal data structures*/ 513 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); 514 if (!bo_va_entry->bo_va) { 515 ret = -EINVAL; 516 pr_err("Failed to add BO object to VM. ret == %d\n", 517 ret); 518 goto err_vmadd; 519 } 520 521 bo_va_entry->va = va; 522 bo_va_entry->pte_flags = get_pte_flags(adev, mem); 523 bo_va_entry->kgd_dev = (void *)adev; 524 list_add(&bo_va_entry->bo_list, list_bo_va); 525 526 if (p_bo_va_entry) 527 *p_bo_va_entry = bo_va_entry; 528 529 /* Allocate validate page tables if needed */ 530 ret = vm_validate_pt_pd_bos(vm); 531 if (ret) { 532 pr_err("validate_pt_pd_bos() failed\n"); 533 goto err_alloc_pts; 534 } 535 536 return 0; 537 538 err_alloc_pts: 539 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); 540 list_del(&bo_va_entry->bo_list); 541 err_vmadd: 542 kfree(bo_va_entry); 543 return ret; 544 } 545 546 static void remove_bo_from_vm(struct amdgpu_device *adev, 547 struct kfd_bo_va_list *entry, unsigned long size) 548 { 549 pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", 550 entry->va, 551 entry->va + size, entry); 552 amdgpu_vm_bo_rmv(adev, entry->bo_va); 553 list_del(&entry->bo_list); 554 kfree(entry); 555 } 556 557 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, 558 struct amdkfd_process_info *process_info, 559 bool userptr) 560 { 561 struct ttm_validate_buffer *entry = &mem->validate_list; 562 struct amdgpu_bo *bo = mem->bo; 563 564 INIT_LIST_HEAD(&entry->head); 565 entry->num_shared = 1; 566 entry->bo = &bo->tbo; 567 mutex_lock(&process_info->lock); 568 if (userptr) 569 list_add_tail(&entry->head, &process_info->userptr_valid_list); 570 else 571 list_add_tail(&entry->head, &process_info->kfd_bo_list); 572 mutex_unlock(&process_info->lock); 573 } 574 575 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, 576 struct amdkfd_process_info *process_info) 577 { 578 struct ttm_validate_buffer *bo_list_entry; 579 580 bo_list_entry = &mem->validate_list; 581 mutex_lock(&process_info->lock); 582 list_del(&bo_list_entry->head); 583 mutex_unlock(&process_info->lock); 584 } 585 586 /* Initializes user pages. It registers the MMU notifier and validates 587 * the userptr BO in the GTT domain. 588 * 589 * The BO must already be on the userptr_valid_list. Otherwise an 590 * eviction and restore may happen that leaves the new BO unmapped 591 * with the user mode queues running. 592 * 593 * Takes the process_info->lock to protect against concurrent restore 594 * workers. 595 * 596 * Returns 0 for success, negative errno for errors. 597 */ 598 static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) 599 { 600 struct amdkfd_process_info *process_info = mem->process_info; 601 struct amdgpu_bo *bo = mem->bo; 602 struct ttm_operation_ctx ctx = { true, false }; 603 int ret = 0; 604 605 mutex_lock(&process_info->lock); 606 607 ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0); 608 if (ret) { 609 pr_err("%s: Failed to set userptr: %d\n", __func__, ret); 610 goto out; 611 } 612 613 ret = amdgpu_mn_register(bo, user_addr); 614 if (ret) { 615 pr_err("%s: Failed to register MMU notifier: %d\n", 616 __func__, ret); 617 goto out; 618 } 619 620 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); 621 if (ret) { 622 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 623 goto unregister_out; 624 } 625 626 ret = amdgpu_bo_reserve(bo, true); 627 if (ret) { 628 pr_err("%s: Failed to reserve BO\n", __func__); 629 goto release_out; 630 } 631 amdgpu_bo_placement_from_domain(bo, mem->domain); 632 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 633 if (ret) 634 pr_err("%s: failed to validate BO\n", __func__); 635 amdgpu_bo_unreserve(bo); 636 637 release_out: 638 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 639 unregister_out: 640 if (ret) 641 amdgpu_mn_unregister(bo); 642 out: 643 mutex_unlock(&process_info->lock); 644 return ret; 645 } 646 647 /* Reserving a BO and its page table BOs must happen atomically to 648 * avoid deadlocks. Some operations update multiple VMs at once. Track 649 * all the reservation info in a context structure. Optionally a sync 650 * object can track VM updates. 651 */ 652 struct bo_vm_reservation_context { 653 struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ 654 unsigned int n_vms; /* Number of VMs reserved */ 655 struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ 656 struct ww_acquire_ctx ticket; /* Reservation ticket */ 657 struct list_head list, duplicates; /* BO lists */ 658 struct amdgpu_sync *sync; /* Pointer to sync object */ 659 bool reserved; /* Whether BOs are reserved */ 660 }; 661 662 enum bo_vm_match { 663 BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ 664 BO_VM_MAPPED, /* Match VMs where a BO is mapped */ 665 BO_VM_ALL, /* Match all VMs a BO was added to */ 666 }; 667 668 /** 669 * reserve_bo_and_vm - reserve a BO and a VM unconditionally. 670 * @mem: KFD BO structure. 671 * @vm: the VM to reserve. 672 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 673 */ 674 static int reserve_bo_and_vm(struct kgd_mem *mem, 675 struct amdgpu_vm *vm, 676 struct bo_vm_reservation_context *ctx) 677 { 678 struct amdgpu_bo *bo = mem->bo; 679 int ret; 680 681 WARN_ON(!vm); 682 683 ctx->reserved = false; 684 ctx->n_vms = 1; 685 ctx->sync = &mem->sync; 686 687 INIT_LIST_HEAD(&ctx->list); 688 INIT_LIST_HEAD(&ctx->duplicates); 689 690 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); 691 if (!ctx->vm_pd) 692 return -ENOMEM; 693 694 ctx->kfd_bo.priority = 0; 695 ctx->kfd_bo.tv.bo = &bo->tbo; 696 ctx->kfd_bo.tv.num_shared = 1; 697 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 698 699 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); 700 701 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 702 false, &ctx->duplicates); 703 if (ret) { 704 pr_err("Failed to reserve buffers in ttm.\n"); 705 kfree(ctx->vm_pd); 706 ctx->vm_pd = NULL; 707 return ret; 708 } 709 710 ctx->reserved = true; 711 return 0; 712 } 713 714 /** 715 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally 716 * @mem: KFD BO structure. 717 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO 718 * is used. Otherwise, a single VM associated with the BO. 719 * @map_type: the mapping status that will be used to filter the VMs. 720 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 721 * 722 * Returns 0 for success, negative for failure. 723 */ 724 static int reserve_bo_and_cond_vms(struct kgd_mem *mem, 725 struct amdgpu_vm *vm, enum bo_vm_match map_type, 726 struct bo_vm_reservation_context *ctx) 727 { 728 struct amdgpu_bo *bo = mem->bo; 729 struct kfd_bo_va_list *entry; 730 unsigned int i; 731 int ret; 732 733 ctx->reserved = false; 734 ctx->n_vms = 0; 735 ctx->vm_pd = NULL; 736 ctx->sync = &mem->sync; 737 738 INIT_LIST_HEAD(&ctx->list); 739 INIT_LIST_HEAD(&ctx->duplicates); 740 741 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 742 if ((vm && vm != entry->bo_va->base.vm) || 743 (entry->is_mapped != map_type 744 && map_type != BO_VM_ALL)) 745 continue; 746 747 ctx->n_vms++; 748 } 749 750 if (ctx->n_vms != 0) { 751 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), 752 GFP_KERNEL); 753 if (!ctx->vm_pd) 754 return -ENOMEM; 755 } 756 757 ctx->kfd_bo.priority = 0; 758 ctx->kfd_bo.tv.bo = &bo->tbo; 759 ctx->kfd_bo.tv.num_shared = 1; 760 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 761 762 i = 0; 763 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 764 if ((vm && vm != entry->bo_va->base.vm) || 765 (entry->is_mapped != map_type 766 && map_type != BO_VM_ALL)) 767 continue; 768 769 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, 770 &ctx->vm_pd[i]); 771 i++; 772 } 773 774 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 775 false, &ctx->duplicates); 776 if (ret) { 777 pr_err("Failed to reserve buffers in ttm.\n"); 778 kfree(ctx->vm_pd); 779 ctx->vm_pd = NULL; 780 return ret; 781 } 782 783 ctx->reserved = true; 784 return 0; 785 } 786 787 /** 788 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context 789 * @ctx: Reservation context to unreserve 790 * @wait: Optionally wait for a sync object representing pending VM updates 791 * @intr: Whether the wait is interruptible 792 * 793 * Also frees any resources allocated in 794 * reserve_bo_and_(cond_)vm(s). Returns the status from 795 * amdgpu_sync_wait. 796 */ 797 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, 798 bool wait, bool intr) 799 { 800 int ret = 0; 801 802 if (wait) 803 ret = amdgpu_sync_wait(ctx->sync, intr); 804 805 if (ctx->reserved) 806 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); 807 kfree(ctx->vm_pd); 808 809 ctx->sync = NULL; 810 811 ctx->reserved = false; 812 ctx->vm_pd = NULL; 813 814 return ret; 815 } 816 817 static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, 818 struct kfd_bo_va_list *entry, 819 struct amdgpu_sync *sync) 820 { 821 struct amdgpu_bo_va *bo_va = entry->bo_va; 822 struct amdgpu_vm *vm = bo_va->base.vm; 823 824 amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 825 826 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 827 828 amdgpu_sync_fence(sync, bo_va->last_pt_update); 829 830 return 0; 831 } 832 833 static int update_gpuvm_pte(struct amdgpu_device *adev, 834 struct kfd_bo_va_list *entry, 835 struct amdgpu_sync *sync) 836 { 837 int ret; 838 struct amdgpu_bo_va *bo_va = entry->bo_va; 839 840 /* Update the page tables */ 841 ret = amdgpu_vm_bo_update(adev, bo_va, false); 842 if (ret) { 843 pr_err("amdgpu_vm_bo_update failed\n"); 844 return ret; 845 } 846 847 return amdgpu_sync_fence(sync, bo_va->last_pt_update); 848 } 849 850 static int map_bo_to_gpuvm(struct amdgpu_device *adev, 851 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, 852 bool no_update_pte) 853 { 854 int ret; 855 856 /* Set virtual address for the allocation */ 857 ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, 858 amdgpu_bo_size(entry->bo_va->base.bo), 859 entry->pte_flags); 860 if (ret) { 861 pr_err("Failed to map VA 0x%llx in vm. ret %d\n", 862 entry->va, ret); 863 return ret; 864 } 865 866 if (no_update_pte) 867 return 0; 868 869 ret = update_gpuvm_pte(adev, entry, sync); 870 if (ret) { 871 pr_err("update_gpuvm_pte() failed\n"); 872 goto update_gpuvm_pte_failed; 873 } 874 875 return 0; 876 877 update_gpuvm_pte_failed: 878 unmap_bo_from_gpuvm(adev, entry, sync); 879 return ret; 880 } 881 882 static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) 883 { 884 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); 885 886 if (!sg) 887 return NULL; 888 if (sg_alloc_table(sg, 1, GFP_KERNEL)) { 889 kfree(sg); 890 return NULL; 891 } 892 sg->sgl->dma_address = addr; 893 sg->sgl->length = size; 894 #ifdef CONFIG_NEED_SG_DMA_LENGTH 895 sg->sgl->dma_length = size; 896 #endif 897 return sg; 898 } 899 900 static int process_validate_vms(struct amdkfd_process_info *process_info) 901 { 902 struct amdgpu_vm *peer_vm; 903 int ret; 904 905 list_for_each_entry(peer_vm, &process_info->vm_list_head, 906 vm_list_node) { 907 ret = vm_validate_pt_pd_bos(peer_vm); 908 if (ret) 909 return ret; 910 } 911 912 return 0; 913 } 914 915 static int process_sync_pds_resv(struct amdkfd_process_info *process_info, 916 struct amdgpu_sync *sync) 917 { 918 struct amdgpu_vm *peer_vm; 919 int ret; 920 921 list_for_each_entry(peer_vm, &process_info->vm_list_head, 922 vm_list_node) { 923 struct amdgpu_bo *pd = peer_vm->root.base.bo; 924 925 ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, 926 AMDGPU_SYNC_NE_OWNER, 927 AMDGPU_FENCE_OWNER_KFD); 928 if (ret) 929 return ret; 930 } 931 932 return 0; 933 } 934 935 static int process_update_pds(struct amdkfd_process_info *process_info, 936 struct amdgpu_sync *sync) 937 { 938 struct amdgpu_vm *peer_vm; 939 int ret; 940 941 list_for_each_entry(peer_vm, &process_info->vm_list_head, 942 vm_list_node) { 943 ret = vm_update_pds(peer_vm, sync); 944 if (ret) 945 return ret; 946 } 947 948 return 0; 949 } 950 951 static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, 952 struct dma_fence **ef) 953 { 954 struct amdkfd_process_info *info = NULL; 955 int ret; 956 957 if (!*process_info) { 958 info = kzalloc(sizeof(*info), GFP_KERNEL); 959 if (!info) 960 return -ENOMEM; 961 962 mutex_init(&info->lock); 963 INIT_LIST_HEAD(&info->vm_list_head); 964 INIT_LIST_HEAD(&info->kfd_bo_list); 965 INIT_LIST_HEAD(&info->userptr_valid_list); 966 INIT_LIST_HEAD(&info->userptr_inval_list); 967 968 info->eviction_fence = 969 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 970 current->mm); 971 if (!info->eviction_fence) { 972 pr_err("Failed to create eviction fence\n"); 973 ret = -ENOMEM; 974 goto create_evict_fence_fail; 975 } 976 977 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); 978 atomic_set(&info->evicted_bos, 0); 979 INIT_DELAYED_WORK(&info->restore_userptr_work, 980 amdgpu_amdkfd_restore_userptr_worker); 981 982 *process_info = info; 983 *ef = dma_fence_get(&info->eviction_fence->base); 984 } 985 986 vm->process_info = *process_info; 987 988 /* Validate page directory and attach eviction fence */ 989 ret = amdgpu_bo_reserve(vm->root.base.bo, true); 990 if (ret) 991 goto reserve_pd_fail; 992 ret = vm_validate_pt_pd_bos(vm); 993 if (ret) { 994 pr_err("validate_pt_pd_bos() failed\n"); 995 goto validate_pd_fail; 996 } 997 ret = amdgpu_bo_sync_wait(vm->root.base.bo, 998 AMDGPU_FENCE_OWNER_KFD, false); 999 if (ret) 1000 goto wait_pd_fail; 1001 ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); 1002 if (ret) 1003 goto reserve_shared_fail; 1004 amdgpu_bo_fence(vm->root.base.bo, 1005 &vm->process_info->eviction_fence->base, true); 1006 amdgpu_bo_unreserve(vm->root.base.bo); 1007 1008 /* Update process info */ 1009 mutex_lock(&vm->process_info->lock); 1010 list_add_tail(&vm->vm_list_node, 1011 &(vm->process_info->vm_list_head)); 1012 vm->process_info->n_vms++; 1013 mutex_unlock(&vm->process_info->lock); 1014 1015 return 0; 1016 1017 reserve_shared_fail: 1018 wait_pd_fail: 1019 validate_pd_fail: 1020 amdgpu_bo_unreserve(vm->root.base.bo); 1021 reserve_pd_fail: 1022 vm->process_info = NULL; 1023 if (info) { 1024 /* Two fence references: one in info and one in *ef */ 1025 dma_fence_put(&info->eviction_fence->base); 1026 dma_fence_put(*ef); 1027 *ef = NULL; 1028 *process_info = NULL; 1029 put_pid(info->pid); 1030 create_evict_fence_fail: 1031 mutex_destroy(&info->lock); 1032 kfree(info); 1033 } 1034 return ret; 1035 } 1036 1037 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, 1038 struct file *filp, u32 pasid, 1039 void **vm, void **process_info, 1040 struct dma_fence **ef) 1041 { 1042 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1043 struct drm_file *drm_priv = filp->private_data; 1044 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; 1045 struct amdgpu_vm *avm = &drv_priv->vm; 1046 int ret; 1047 1048 /* Already a compute VM? */ 1049 if (avm->process_info) 1050 return -EINVAL; 1051 1052 /* Convert VM into a compute VM */ 1053 ret = amdgpu_vm_make_compute(adev, avm, pasid); 1054 if (ret) 1055 return ret; 1056 1057 /* Initialize KFD part of the VM and process info */ 1058 ret = init_kfd_vm(avm, process_info, ef); 1059 if (ret) 1060 return ret; 1061 1062 *vm = (void *)avm; 1063 1064 return 0; 1065 } 1066 1067 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 1068 struct amdgpu_vm *vm) 1069 { 1070 struct amdkfd_process_info *process_info = vm->process_info; 1071 struct amdgpu_bo *pd = vm->root.base.bo; 1072 1073 if (!process_info) 1074 return; 1075 1076 /* Release eviction fence from PD */ 1077 amdgpu_bo_reserve(pd, false); 1078 amdgpu_bo_fence(pd, NULL, false); 1079 amdgpu_bo_unreserve(pd); 1080 1081 /* Update process info */ 1082 mutex_lock(&process_info->lock); 1083 process_info->n_vms--; 1084 list_del(&vm->vm_list_node); 1085 mutex_unlock(&process_info->lock); 1086 1087 vm->process_info = NULL; 1088 1089 /* Release per-process resources when last compute VM is destroyed */ 1090 if (!process_info->n_vms) { 1091 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 1092 WARN_ON(!list_empty(&process_info->userptr_valid_list)); 1093 WARN_ON(!list_empty(&process_info->userptr_inval_list)); 1094 1095 dma_fence_put(&process_info->eviction_fence->base); 1096 cancel_delayed_work_sync(&process_info->restore_userptr_work); 1097 put_pid(process_info->pid); 1098 mutex_destroy(&process_info->lock); 1099 kfree(process_info); 1100 } 1101 } 1102 1103 void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) 1104 { 1105 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1106 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1107 1108 if (WARN_ON(!kgd || !vm)) 1109 return; 1110 1111 pr_debug("Releasing process vm %p\n", vm); 1112 1113 /* The original pasid of amdgpu vm has already been 1114 * released during making a amdgpu vm to a compute vm 1115 * The current pasid is managed by kfd and will be 1116 * released on kfd process destroy. Set amdgpu pasid 1117 * to 0 to avoid duplicate release. 1118 */ 1119 amdgpu_vm_release_compute(adev, avm); 1120 } 1121 1122 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) 1123 { 1124 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1125 struct amdgpu_bo *pd = avm->root.base.bo; 1126 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 1127 1128 if (adev->asic_type < CHIP_VEGA10) 1129 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; 1130 return avm->pd_phys_addr; 1131 } 1132 1133 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1134 struct kgd_dev *kgd, uint64_t va, uint64_t size, 1135 void *vm, struct kgd_mem **mem, 1136 uint64_t *offset, uint32_t flags) 1137 { 1138 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1139 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1140 enum ttm_bo_type bo_type = ttm_bo_type_device; 1141 struct sg_table *sg = NULL; 1142 uint64_t user_addr = 0; 1143 struct amdgpu_bo *bo; 1144 struct drm_gem_object *gobj; 1145 u32 domain, alloc_domain; 1146 u64 alloc_flags; 1147 int ret; 1148 1149 /* 1150 * Check on which domain to allocate BO 1151 */ 1152 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 1153 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; 1154 alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; 1155 alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? 1156 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 1157 AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 1158 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 1159 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; 1160 alloc_flags = 0; 1161 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 1162 domain = AMDGPU_GEM_DOMAIN_GTT; 1163 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1164 alloc_flags = 0; 1165 if (!offset || !*offset) 1166 return -EINVAL; 1167 user_addr = untagged_addr(*offset); 1168 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 1169 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { 1170 domain = AMDGPU_GEM_DOMAIN_GTT; 1171 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1172 bo_type = ttm_bo_type_sg; 1173 alloc_flags = 0; 1174 if (size > UINT_MAX) 1175 return -EINVAL; 1176 sg = create_doorbell_sg(*offset, size); 1177 if (!sg) 1178 return -ENOMEM; 1179 } else { 1180 return -EINVAL; 1181 } 1182 1183 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1184 if (!*mem) { 1185 ret = -ENOMEM; 1186 goto err; 1187 } 1188 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1189 mutex_init(&(*mem)->lock); 1190 (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); 1191 1192 /* Workaround for AQL queue wraparound bug. Map the same 1193 * memory twice. That means we only actually allocate half 1194 * the memory. 1195 */ 1196 if ((*mem)->aql_queue) 1197 size = size >> 1; 1198 1199 (*mem)->alloc_flags = flags; 1200 1201 amdgpu_sync_create(&(*mem)->sync); 1202 1203 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); 1204 if (ret) { 1205 pr_debug("Insufficient memory\n"); 1206 goto err_reserve_limit; 1207 } 1208 1209 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1210 va, size, domain_string(alloc_domain)); 1211 1212 ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags, 1213 bo_type, NULL, &gobj); 1214 if (ret) { 1215 pr_debug("Failed to create BO on domain %s. ret %d\n", 1216 domain_string(alloc_domain), ret); 1217 goto err_bo_create; 1218 } 1219 bo = gem_to_amdgpu_bo(gobj); 1220 if (bo_type == ttm_bo_type_sg) { 1221 bo->tbo.sg = sg; 1222 bo->tbo.ttm->sg = sg; 1223 } 1224 bo->kfd_bo = *mem; 1225 (*mem)->bo = bo; 1226 if (user_addr) 1227 bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; 1228 1229 (*mem)->va = va; 1230 (*mem)->domain = domain; 1231 (*mem)->mapped_to_gpu_memory = 0; 1232 (*mem)->process_info = avm->process_info; 1233 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); 1234 1235 if (user_addr) { 1236 ret = init_user_pages(*mem, user_addr); 1237 if (ret) 1238 goto allocate_init_user_pages_failed; 1239 } 1240 1241 if (offset) 1242 *offset = amdgpu_bo_mmap_offset(bo); 1243 1244 return 0; 1245 1246 allocate_init_user_pages_failed: 1247 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); 1248 amdgpu_bo_unref(&bo); 1249 /* Don't unreserve system mem limit twice */ 1250 goto err_reserve_limit; 1251 err_bo_create: 1252 unreserve_mem_limit(adev, size, alloc_domain, !!sg); 1253 err_reserve_limit: 1254 mutex_destroy(&(*mem)->lock); 1255 kfree(*mem); 1256 err: 1257 if (sg) { 1258 sg_free_table(sg); 1259 kfree(sg); 1260 } 1261 return ret; 1262 } 1263 1264 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( 1265 struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) 1266 { 1267 struct amdkfd_process_info *process_info = mem->process_info; 1268 unsigned long bo_size = mem->bo->tbo.base.size; 1269 struct kfd_bo_va_list *entry, *tmp; 1270 struct bo_vm_reservation_context ctx; 1271 struct ttm_validate_buffer *bo_list_entry; 1272 unsigned int mapped_to_gpu_memory; 1273 int ret; 1274 bool is_imported = false; 1275 1276 mutex_lock(&mem->lock); 1277 mapped_to_gpu_memory = mem->mapped_to_gpu_memory; 1278 is_imported = mem->is_imported; 1279 mutex_unlock(&mem->lock); 1280 /* lock is not needed after this, since mem is unused and will 1281 * be freed anyway 1282 */ 1283 1284 if (mapped_to_gpu_memory > 0) { 1285 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", 1286 mem->va, bo_size); 1287 return -EBUSY; 1288 } 1289 1290 /* Make sure restore workers don't access the BO any more */ 1291 bo_list_entry = &mem->validate_list; 1292 mutex_lock(&process_info->lock); 1293 list_del(&bo_list_entry->head); 1294 mutex_unlock(&process_info->lock); 1295 1296 /* No more MMU notifiers */ 1297 amdgpu_mn_unregister(mem->bo); 1298 1299 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1300 if (unlikely(ret)) 1301 return ret; 1302 1303 /* The eviction fence should be removed by the last unmap. 1304 * TODO: Log an error condition if the bo still has the eviction fence 1305 * attached 1306 */ 1307 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1308 process_info->eviction_fence); 1309 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, 1310 mem->va + bo_size * (1 + mem->aql_queue)); 1311 1312 /* Remove from VM internal data structures */ 1313 list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) 1314 remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, 1315 entry, bo_size); 1316 1317 ret = unreserve_bo_and_vms(&ctx, false, false); 1318 1319 /* Free the sync object */ 1320 amdgpu_sync_free(&mem->sync); 1321 1322 /* If the SG is not NULL, it's one we created for a doorbell or mmio 1323 * remap BO. We need to free it. 1324 */ 1325 if (mem->bo->tbo.sg) { 1326 sg_free_table(mem->bo->tbo.sg); 1327 kfree(mem->bo->tbo.sg); 1328 } 1329 1330 /* Update the size of the BO being freed if it was allocated from 1331 * VRAM and is not imported. 1332 */ 1333 if (size) { 1334 if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && 1335 (!is_imported)) 1336 *size = bo_size; 1337 else 1338 *size = 0; 1339 } 1340 1341 /* Free the BO*/ 1342 drm_gem_object_put(&mem->bo->tbo.base); 1343 mutex_destroy(&mem->lock); 1344 kfree(mem); 1345 1346 return ret; 1347 } 1348 1349 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1350 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1351 { 1352 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1353 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1354 int ret; 1355 struct amdgpu_bo *bo; 1356 uint32_t domain; 1357 struct kfd_bo_va_list *entry; 1358 struct bo_vm_reservation_context ctx; 1359 struct kfd_bo_va_list *bo_va_entry = NULL; 1360 struct kfd_bo_va_list *bo_va_entry_aql = NULL; 1361 unsigned long bo_size; 1362 bool is_invalid_userptr = false; 1363 1364 bo = mem->bo; 1365 if (!bo) { 1366 pr_err("Invalid BO when mapping memory to GPU\n"); 1367 return -EINVAL; 1368 } 1369 1370 /* Make sure restore is not running concurrently. Since we 1371 * don't map invalid userptr BOs, we rely on the next restore 1372 * worker to do the mapping 1373 */ 1374 mutex_lock(&mem->process_info->lock); 1375 1376 /* Lock mmap-sem. If we find an invalid userptr BO, we can be 1377 * sure that the MMU notifier is no longer running 1378 * concurrently and the queues are actually stopped 1379 */ 1380 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1381 mmap_write_lock(current->mm); 1382 is_invalid_userptr = atomic_read(&mem->invalid); 1383 mmap_write_unlock(current->mm); 1384 } 1385 1386 mutex_lock(&mem->lock); 1387 1388 domain = mem->domain; 1389 bo_size = bo->tbo.base.size; 1390 1391 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", 1392 mem->va, 1393 mem->va + bo_size * (1 + mem->aql_queue), 1394 vm, domain_string(domain)); 1395 1396 ret = reserve_bo_and_vm(mem, vm, &ctx); 1397 if (unlikely(ret)) 1398 goto out; 1399 1400 /* Userptr can be marked as "not invalid", but not actually be 1401 * validated yet (still in the system domain). In that case 1402 * the queues are still stopped and we can leave mapping for 1403 * the next restore worker 1404 */ 1405 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && 1406 bo->tbo.mem.mem_type == TTM_PL_SYSTEM) 1407 is_invalid_userptr = true; 1408 1409 if (check_if_add_bo_to_vm(avm, mem)) { 1410 ret = add_bo_to_vm(adev, mem, avm, false, 1411 &bo_va_entry); 1412 if (ret) 1413 goto add_bo_to_vm_failed; 1414 if (mem->aql_queue) { 1415 ret = add_bo_to_vm(adev, mem, avm, 1416 true, &bo_va_entry_aql); 1417 if (ret) 1418 goto add_bo_to_vm_failed_aql; 1419 } 1420 } else { 1421 ret = vm_validate_pt_pd_bos(avm); 1422 if (unlikely(ret)) 1423 goto add_bo_to_vm_failed; 1424 } 1425 1426 if (mem->mapped_to_gpu_memory == 0 && 1427 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1428 /* Validate BO only once. The eviction fence gets added to BO 1429 * the first time it is mapped. Validate will wait for all 1430 * background evictions to complete. 1431 */ 1432 ret = amdgpu_amdkfd_bo_validate(bo, domain, true); 1433 if (ret) { 1434 pr_debug("Validate failed\n"); 1435 goto map_bo_to_gpuvm_failed; 1436 } 1437 } 1438 1439 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1440 if (entry->bo_va->base.vm == vm && !entry->is_mapped) { 1441 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", 1442 entry->va, entry->va + bo_size, 1443 entry); 1444 1445 ret = map_bo_to_gpuvm(adev, entry, ctx.sync, 1446 is_invalid_userptr); 1447 if (ret) { 1448 pr_err("Failed to map bo to gpuvm\n"); 1449 goto map_bo_to_gpuvm_failed; 1450 } 1451 1452 ret = vm_update_pds(vm, ctx.sync); 1453 if (ret) { 1454 pr_err("Failed to update page directories\n"); 1455 goto map_bo_to_gpuvm_failed; 1456 } 1457 1458 entry->is_mapped = true; 1459 mem->mapped_to_gpu_memory++; 1460 pr_debug("\t INC mapping count %d\n", 1461 mem->mapped_to_gpu_memory); 1462 } 1463 } 1464 1465 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) 1466 amdgpu_bo_fence(bo, 1467 &avm->process_info->eviction_fence->base, 1468 true); 1469 ret = unreserve_bo_and_vms(&ctx, false, false); 1470 1471 goto out; 1472 1473 map_bo_to_gpuvm_failed: 1474 if (bo_va_entry_aql) 1475 remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); 1476 add_bo_to_vm_failed_aql: 1477 if (bo_va_entry) 1478 remove_bo_from_vm(adev, bo_va_entry, bo_size); 1479 add_bo_to_vm_failed: 1480 unreserve_bo_and_vms(&ctx, false, false); 1481 out: 1482 mutex_unlock(&mem->process_info->lock); 1483 mutex_unlock(&mem->lock); 1484 return ret; 1485 } 1486 1487 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1488 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1489 { 1490 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1491 struct amdkfd_process_info *process_info = 1492 ((struct amdgpu_vm *)vm)->process_info; 1493 unsigned long bo_size = mem->bo->tbo.base.size; 1494 struct kfd_bo_va_list *entry; 1495 struct bo_vm_reservation_context ctx; 1496 int ret; 1497 1498 mutex_lock(&mem->lock); 1499 1500 ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); 1501 if (unlikely(ret)) 1502 goto out; 1503 /* If no VMs were reserved, it means the BO wasn't actually mapped */ 1504 if (ctx.n_vms == 0) { 1505 ret = -EINVAL; 1506 goto unreserve_out; 1507 } 1508 1509 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); 1510 if (unlikely(ret)) 1511 goto unreserve_out; 1512 1513 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", 1514 mem->va, 1515 mem->va + bo_size * (1 + mem->aql_queue), 1516 vm); 1517 1518 list_for_each_entry(entry, &mem->bo_va_list, bo_list) { 1519 if (entry->bo_va->base.vm == vm && entry->is_mapped) { 1520 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", 1521 entry->va, 1522 entry->va + bo_size, 1523 entry); 1524 1525 ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); 1526 if (ret == 0) { 1527 entry->is_mapped = false; 1528 } else { 1529 pr_err("failed to unmap VA 0x%llx\n", 1530 mem->va); 1531 goto unreserve_out; 1532 } 1533 1534 mem->mapped_to_gpu_memory--; 1535 pr_debug("\t DEC mapping count %d\n", 1536 mem->mapped_to_gpu_memory); 1537 } 1538 } 1539 1540 /* If BO is unmapped from all VMs, unfence it. It can be evicted if 1541 * required. 1542 */ 1543 if (mem->mapped_to_gpu_memory == 0 && 1544 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && 1545 !mem->bo->tbo.pin_count) 1546 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1547 process_info->eviction_fence); 1548 1549 unreserve_out: 1550 unreserve_bo_and_vms(&ctx, false, false); 1551 out: 1552 mutex_unlock(&mem->lock); 1553 return ret; 1554 } 1555 1556 int amdgpu_amdkfd_gpuvm_sync_memory( 1557 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) 1558 { 1559 struct amdgpu_sync sync; 1560 int ret; 1561 1562 amdgpu_sync_create(&sync); 1563 1564 mutex_lock(&mem->lock); 1565 amdgpu_sync_clone(&mem->sync, &sync); 1566 mutex_unlock(&mem->lock); 1567 1568 ret = amdgpu_sync_wait(&sync, intr); 1569 amdgpu_sync_free(&sync); 1570 return ret; 1571 } 1572 1573 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, 1574 struct kgd_mem *mem, void **kptr, uint64_t *size) 1575 { 1576 int ret; 1577 struct amdgpu_bo *bo = mem->bo; 1578 1579 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1580 pr_err("userptr can't be mapped to kernel\n"); 1581 return -EINVAL; 1582 } 1583 1584 /* delete kgd_mem from kfd_bo_list to avoid re-validating 1585 * this BO in BO's restoring after eviction. 1586 */ 1587 mutex_lock(&mem->process_info->lock); 1588 1589 ret = amdgpu_bo_reserve(bo, true); 1590 if (ret) { 1591 pr_err("Failed to reserve bo. ret %d\n", ret); 1592 goto bo_reserve_failed; 1593 } 1594 1595 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 1596 if (ret) { 1597 pr_err("Failed to pin bo. ret %d\n", ret); 1598 goto pin_failed; 1599 } 1600 1601 ret = amdgpu_bo_kmap(bo, kptr); 1602 if (ret) { 1603 pr_err("Failed to map bo to kernel. ret %d\n", ret); 1604 goto kmap_failed; 1605 } 1606 1607 amdgpu_amdkfd_remove_eviction_fence( 1608 bo, mem->process_info->eviction_fence); 1609 list_del_init(&mem->validate_list.head); 1610 1611 if (size) 1612 *size = amdgpu_bo_size(bo); 1613 1614 amdgpu_bo_unreserve(bo); 1615 1616 mutex_unlock(&mem->process_info->lock); 1617 return 0; 1618 1619 kmap_failed: 1620 amdgpu_bo_unpin(bo); 1621 pin_failed: 1622 amdgpu_bo_unreserve(bo); 1623 bo_reserve_failed: 1624 mutex_unlock(&mem->process_info->lock); 1625 1626 return ret; 1627 } 1628 1629 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, 1630 struct kfd_vm_fault_info *mem) 1631 { 1632 struct amdgpu_device *adev; 1633 1634 adev = (struct amdgpu_device *)kgd; 1635 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { 1636 *mem = *adev->gmc.vm_fault_info; 1637 mb(); 1638 atomic_set(&adev->gmc.vm_fault_info_updated, 0); 1639 } 1640 return 0; 1641 } 1642 1643 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, 1644 struct dma_buf *dma_buf, 1645 uint64_t va, void *vm, 1646 struct kgd_mem **mem, uint64_t *size, 1647 uint64_t *mmap_offset) 1648 { 1649 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 1650 struct drm_gem_object *obj; 1651 struct amdgpu_bo *bo; 1652 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1653 1654 if (dma_buf->ops != &amdgpu_dmabuf_ops) 1655 /* Can't handle non-graphics buffers */ 1656 return -EINVAL; 1657 1658 obj = dma_buf->priv; 1659 if (drm_to_adev(obj->dev) != adev) 1660 /* Can't handle buffers from other devices */ 1661 return -EINVAL; 1662 1663 bo = gem_to_amdgpu_bo(obj); 1664 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 1665 AMDGPU_GEM_DOMAIN_GTT))) 1666 /* Only VRAM and GTT BOs are supported */ 1667 return -EINVAL; 1668 1669 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1670 if (!*mem) 1671 return -ENOMEM; 1672 1673 if (size) 1674 *size = amdgpu_bo_size(bo); 1675 1676 if (mmap_offset) 1677 *mmap_offset = amdgpu_bo_mmap_offset(bo); 1678 1679 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1680 mutex_init(&(*mem)->lock); 1681 1682 (*mem)->alloc_flags = 1683 ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1684 KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) 1685 | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE 1686 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; 1687 1688 drm_gem_object_get(&bo->tbo.base); 1689 (*mem)->bo = bo; 1690 (*mem)->va = va; 1691 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1692 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; 1693 (*mem)->mapped_to_gpu_memory = 0; 1694 (*mem)->process_info = avm->process_info; 1695 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); 1696 amdgpu_sync_create(&(*mem)->sync); 1697 (*mem)->is_imported = true; 1698 1699 return 0; 1700 } 1701 1702 /* Evict a userptr BO by stopping the queues if necessary 1703 * 1704 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it 1705 * cannot do any memory allocations, and cannot take any locks that 1706 * are held elsewhere while allocating memory. Therefore this is as 1707 * simple as possible, using atomic counters. 1708 * 1709 * It doesn't do anything to the BO itself. The real work happens in 1710 * restore, where we get updated page addresses. This function only 1711 * ensures that GPU access to the BO is stopped. 1712 */ 1713 int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 1714 struct mm_struct *mm) 1715 { 1716 struct amdkfd_process_info *process_info = mem->process_info; 1717 int evicted_bos; 1718 int r = 0; 1719 1720 atomic_inc(&mem->invalid); 1721 evicted_bos = atomic_inc_return(&process_info->evicted_bos); 1722 if (evicted_bos == 1) { 1723 /* First eviction, stop the queues */ 1724 r = kgd2kfd_quiesce_mm(mm); 1725 if (r) 1726 pr_err("Failed to quiesce KFD\n"); 1727 schedule_delayed_work(&process_info->restore_userptr_work, 1728 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 1729 } 1730 1731 return r; 1732 } 1733 1734 /* Update invalid userptr BOs 1735 * 1736 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to 1737 * userptr_inval_list and updates user pages for all BOs that have 1738 * been invalidated since their last update. 1739 */ 1740 static int update_invalid_user_pages(struct amdkfd_process_info *process_info, 1741 struct mm_struct *mm) 1742 { 1743 struct kgd_mem *mem, *tmp_mem; 1744 struct amdgpu_bo *bo; 1745 struct ttm_operation_ctx ctx = { false, false }; 1746 int invalid, ret; 1747 1748 /* Move all invalidated BOs to the userptr_inval_list and 1749 * release their user pages by migration to the CPU domain 1750 */ 1751 list_for_each_entry_safe(mem, tmp_mem, 1752 &process_info->userptr_valid_list, 1753 validate_list.head) { 1754 if (!atomic_read(&mem->invalid)) 1755 continue; /* BO is still valid */ 1756 1757 bo = mem->bo; 1758 1759 if (amdgpu_bo_reserve(bo, true)) 1760 return -EAGAIN; 1761 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 1762 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1763 amdgpu_bo_unreserve(bo); 1764 if (ret) { 1765 pr_err("%s: Failed to invalidate userptr BO\n", 1766 __func__); 1767 return -EAGAIN; 1768 } 1769 1770 list_move_tail(&mem->validate_list.head, 1771 &process_info->userptr_inval_list); 1772 } 1773 1774 if (list_empty(&process_info->userptr_inval_list)) 1775 return 0; /* All evicted userptr BOs were freed */ 1776 1777 /* Go through userptr_inval_list and update any invalid user_pages */ 1778 list_for_each_entry(mem, &process_info->userptr_inval_list, 1779 validate_list.head) { 1780 invalid = atomic_read(&mem->invalid); 1781 if (!invalid) 1782 /* BO hasn't been invalidated since the last 1783 * revalidation attempt. Keep its BO list. 1784 */ 1785 continue; 1786 1787 bo = mem->bo; 1788 1789 /* Get updated user pages */ 1790 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); 1791 if (ret) { 1792 pr_debug("%s: Failed to get user pages: %d\n", 1793 __func__, ret); 1794 1795 /* Return error -EBUSY or -ENOMEM, retry restore */ 1796 return ret; 1797 } 1798 1799 /* 1800 * FIXME: Cannot ignore the return code, must hold 1801 * notifier_lock 1802 */ 1803 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1804 1805 /* Mark the BO as valid unless it was invalidated 1806 * again concurrently. 1807 */ 1808 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 1809 return -EAGAIN; 1810 } 1811 1812 return 0; 1813 } 1814 1815 /* Validate invalid userptr BOs 1816 * 1817 * Validates BOs on the userptr_inval_list, and moves them back to the 1818 * userptr_valid_list. Also updates GPUVM page tables with new page 1819 * addresses and waits for the page table updates to complete. 1820 */ 1821 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) 1822 { 1823 struct amdgpu_bo_list_entry *pd_bo_list_entries; 1824 struct list_head resv_list, duplicates; 1825 struct ww_acquire_ctx ticket; 1826 struct amdgpu_sync sync; 1827 1828 struct amdgpu_vm *peer_vm; 1829 struct kgd_mem *mem, *tmp_mem; 1830 struct amdgpu_bo *bo; 1831 struct ttm_operation_ctx ctx = { false, false }; 1832 int i, ret; 1833 1834 pd_bo_list_entries = kcalloc(process_info->n_vms, 1835 sizeof(struct amdgpu_bo_list_entry), 1836 GFP_KERNEL); 1837 if (!pd_bo_list_entries) { 1838 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1839 ret = -ENOMEM; 1840 goto out_no_mem; 1841 } 1842 1843 INIT_LIST_HEAD(&resv_list); 1844 INIT_LIST_HEAD(&duplicates); 1845 1846 /* Get all the page directory BOs that need to be reserved */ 1847 i = 0; 1848 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1849 vm_list_node) 1850 amdgpu_vm_get_pd_bo(peer_vm, &resv_list, 1851 &pd_bo_list_entries[i++]); 1852 /* Add the userptr_inval_list entries to resv_list */ 1853 list_for_each_entry(mem, &process_info->userptr_inval_list, 1854 validate_list.head) { 1855 list_add_tail(&mem->resv_list.head, &resv_list); 1856 mem->resv_list.bo = mem->validate_list.bo; 1857 mem->resv_list.num_shared = mem->validate_list.num_shared; 1858 } 1859 1860 /* Reserve all BOs and page tables for validation */ 1861 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 1862 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1863 if (ret) 1864 goto out_free; 1865 1866 amdgpu_sync_create(&sync); 1867 1868 ret = process_validate_vms(process_info); 1869 if (ret) 1870 goto unreserve_out; 1871 1872 /* Validate BOs and update GPUVM page tables */ 1873 list_for_each_entry_safe(mem, tmp_mem, 1874 &process_info->userptr_inval_list, 1875 validate_list.head) { 1876 struct kfd_bo_va_list *bo_va_entry; 1877 1878 bo = mem->bo; 1879 1880 /* Validate the BO if we got user pages */ 1881 if (bo->tbo.ttm->pages[0]) { 1882 amdgpu_bo_placement_from_domain(bo, mem->domain); 1883 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1884 if (ret) { 1885 pr_err("%s: failed to validate BO\n", __func__); 1886 goto unreserve_out; 1887 } 1888 } 1889 1890 list_move_tail(&mem->validate_list.head, 1891 &process_info->userptr_valid_list); 1892 1893 /* Update mapping. If the BO was not validated 1894 * (because we couldn't get user pages), this will 1895 * clear the page table entries, which will result in 1896 * VM faults if the GPU tries to access the invalid 1897 * memory. 1898 */ 1899 list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { 1900 if (!bo_va_entry->is_mapped) 1901 continue; 1902 1903 ret = update_gpuvm_pte((struct amdgpu_device *) 1904 bo_va_entry->kgd_dev, 1905 bo_va_entry, &sync); 1906 if (ret) { 1907 pr_err("%s: update PTE failed\n", __func__); 1908 /* make sure this gets validated again */ 1909 atomic_inc(&mem->invalid); 1910 goto unreserve_out; 1911 } 1912 } 1913 } 1914 1915 /* Update page directories */ 1916 ret = process_update_pds(process_info, &sync); 1917 1918 unreserve_out: 1919 ttm_eu_backoff_reservation(&ticket, &resv_list); 1920 amdgpu_sync_wait(&sync, false); 1921 amdgpu_sync_free(&sync); 1922 out_free: 1923 kfree(pd_bo_list_entries); 1924 out_no_mem: 1925 1926 return ret; 1927 } 1928 1929 /* Worker callback to restore evicted userptr BOs 1930 * 1931 * Tries to update and validate all userptr BOs. If successful and no 1932 * concurrent evictions happened, the queues are restarted. Otherwise, 1933 * reschedule for another attempt later. 1934 */ 1935 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) 1936 { 1937 struct delayed_work *dwork = to_delayed_work(work); 1938 struct amdkfd_process_info *process_info = 1939 container_of(dwork, struct amdkfd_process_info, 1940 restore_userptr_work); 1941 struct task_struct *usertask; 1942 struct mm_struct *mm; 1943 int evicted_bos; 1944 1945 evicted_bos = atomic_read(&process_info->evicted_bos); 1946 if (!evicted_bos) 1947 return; 1948 1949 /* Reference task and mm in case of concurrent process termination */ 1950 usertask = get_pid_task(process_info->pid, PIDTYPE_PID); 1951 if (!usertask) 1952 return; 1953 mm = get_task_mm(usertask); 1954 if (!mm) { 1955 put_task_struct(usertask); 1956 return; 1957 } 1958 1959 mutex_lock(&process_info->lock); 1960 1961 if (update_invalid_user_pages(process_info, mm)) 1962 goto unlock_out; 1963 /* userptr_inval_list can be empty if all evicted userptr BOs 1964 * have been freed. In that case there is nothing to validate 1965 * and we can just restart the queues. 1966 */ 1967 if (!list_empty(&process_info->userptr_inval_list)) { 1968 if (atomic_read(&process_info->evicted_bos) != evicted_bos) 1969 goto unlock_out; /* Concurrent eviction, try again */ 1970 1971 if (validate_invalid_user_pages(process_info)) 1972 goto unlock_out; 1973 } 1974 /* Final check for concurrent evicton and atomic update. If 1975 * another eviction happens after successful update, it will 1976 * be a first eviction that calls quiesce_mm. The eviction 1977 * reference counting inside KFD will handle this case. 1978 */ 1979 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != 1980 evicted_bos) 1981 goto unlock_out; 1982 evicted_bos = 0; 1983 if (kgd2kfd_resume_mm(mm)) { 1984 pr_err("%s: Failed to resume KFD\n", __func__); 1985 /* No recovery from this failure. Probably the CP is 1986 * hanging. No point trying again. 1987 */ 1988 } 1989 1990 unlock_out: 1991 mutex_unlock(&process_info->lock); 1992 mmput(mm); 1993 put_task_struct(usertask); 1994 1995 /* If validation failed, reschedule another attempt */ 1996 if (evicted_bos) 1997 schedule_delayed_work(&process_info->restore_userptr_work, 1998 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 1999 } 2000 2001 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given 2002 * KFD process identified by process_info 2003 * 2004 * @process_info: amdkfd_process_info of the KFD process 2005 * 2006 * After memory eviction, restore thread calls this function. The function 2007 * should be called when the Process is still valid. BO restore involves - 2008 * 2009 * 1. Release old eviction fence and create new one 2010 * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. 2011 * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of 2012 * BOs that need to be reserved. 2013 * 4. Reserve all the BOs 2014 * 5. Validate of PD and PT BOs. 2015 * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence 2016 * 7. Add fence to all PD and PT BOs. 2017 * 8. Unreserve all BOs 2018 */ 2019 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) 2020 { 2021 struct amdgpu_bo_list_entry *pd_bo_list; 2022 struct amdkfd_process_info *process_info = info; 2023 struct amdgpu_vm *peer_vm; 2024 struct kgd_mem *mem; 2025 struct bo_vm_reservation_context ctx; 2026 struct amdgpu_amdkfd_fence *new_fence; 2027 int ret = 0, i; 2028 struct list_head duplicate_save; 2029 struct amdgpu_sync sync_obj; 2030 unsigned long failed_size = 0; 2031 unsigned long total_size = 0; 2032 2033 INIT_LIST_HEAD(&duplicate_save); 2034 INIT_LIST_HEAD(&ctx.list); 2035 INIT_LIST_HEAD(&ctx.duplicates); 2036 2037 pd_bo_list = kcalloc(process_info->n_vms, 2038 sizeof(struct amdgpu_bo_list_entry), 2039 GFP_KERNEL); 2040 if (!pd_bo_list) 2041 return -ENOMEM; 2042 2043 i = 0; 2044 mutex_lock(&process_info->lock); 2045 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2046 vm_list_node) 2047 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); 2048 2049 /* Reserve all BOs and page tables/directory. Add all BOs from 2050 * kfd_bo_list to ctx.list 2051 */ 2052 list_for_each_entry(mem, &process_info->kfd_bo_list, 2053 validate_list.head) { 2054 2055 list_add_tail(&mem->resv_list.head, &ctx.list); 2056 mem->resv_list.bo = mem->validate_list.bo; 2057 mem->resv_list.num_shared = mem->validate_list.num_shared; 2058 } 2059 2060 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, 2061 false, &duplicate_save); 2062 if (ret) { 2063 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); 2064 goto ttm_reserve_fail; 2065 } 2066 2067 amdgpu_sync_create(&sync_obj); 2068 2069 /* Validate PDs and PTs */ 2070 ret = process_validate_vms(process_info); 2071 if (ret) 2072 goto validate_map_fail; 2073 2074 ret = process_sync_pds_resv(process_info, &sync_obj); 2075 if (ret) { 2076 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); 2077 goto validate_map_fail; 2078 } 2079 2080 /* Validate BOs and map them to GPUVM (update VM page tables). */ 2081 list_for_each_entry(mem, &process_info->kfd_bo_list, 2082 validate_list.head) { 2083 2084 struct amdgpu_bo *bo = mem->bo; 2085 uint32_t domain = mem->domain; 2086 struct kfd_bo_va_list *bo_va_entry; 2087 2088 total_size += amdgpu_bo_size(bo); 2089 2090 ret = amdgpu_amdkfd_bo_validate(bo, domain, false); 2091 if (ret) { 2092 pr_debug("Memory eviction: Validate BOs failed\n"); 2093 failed_size += amdgpu_bo_size(bo); 2094 ret = amdgpu_amdkfd_bo_validate(bo, 2095 AMDGPU_GEM_DOMAIN_GTT, false); 2096 if (ret) { 2097 pr_debug("Memory eviction: Try again\n"); 2098 goto validate_map_fail; 2099 } 2100 } 2101 ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); 2102 if (ret) { 2103 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2104 goto validate_map_fail; 2105 } 2106 list_for_each_entry(bo_va_entry, &mem->bo_va_list, 2107 bo_list) { 2108 ret = update_gpuvm_pte((struct amdgpu_device *) 2109 bo_va_entry->kgd_dev, 2110 bo_va_entry, 2111 &sync_obj); 2112 if (ret) { 2113 pr_debug("Memory eviction: update PTE failed. Try again\n"); 2114 goto validate_map_fail; 2115 } 2116 } 2117 } 2118 2119 if (failed_size) 2120 pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); 2121 2122 /* Update page directories */ 2123 ret = process_update_pds(process_info, &sync_obj); 2124 if (ret) { 2125 pr_debug("Memory eviction: update PDs failed. Try again\n"); 2126 goto validate_map_fail; 2127 } 2128 2129 /* Wait for validate and PT updates to finish */ 2130 amdgpu_sync_wait(&sync_obj, false); 2131 2132 /* Release old eviction fence and create new one, because fence only 2133 * goes from unsignaled to signaled, fence cannot be reused. 2134 * Use context and mm from the old fence. 2135 */ 2136 new_fence = amdgpu_amdkfd_fence_create( 2137 process_info->eviction_fence->base.context, 2138 process_info->eviction_fence->mm); 2139 if (!new_fence) { 2140 pr_err("Failed to create eviction fence\n"); 2141 ret = -ENOMEM; 2142 goto validate_map_fail; 2143 } 2144 dma_fence_put(&process_info->eviction_fence->base); 2145 process_info->eviction_fence = new_fence; 2146 *ef = dma_fence_get(&new_fence->base); 2147 2148 /* Attach new eviction fence to all BOs */ 2149 list_for_each_entry(mem, &process_info->kfd_bo_list, 2150 validate_list.head) 2151 amdgpu_bo_fence(mem->bo, 2152 &process_info->eviction_fence->base, true); 2153 2154 /* Attach eviction fence to PD / PT BOs */ 2155 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2156 vm_list_node) { 2157 struct amdgpu_bo *bo = peer_vm->root.base.bo; 2158 2159 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); 2160 } 2161 2162 validate_map_fail: 2163 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); 2164 amdgpu_sync_free(&sync_obj); 2165 ttm_reserve_fail: 2166 mutex_unlock(&process_info->lock); 2167 kfree(pd_bo_list); 2168 return ret; 2169 } 2170 2171 int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) 2172 { 2173 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2174 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; 2175 int ret; 2176 2177 if (!info || !gws) 2178 return -EINVAL; 2179 2180 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 2181 if (!*mem) 2182 return -ENOMEM; 2183 2184 mutex_init(&(*mem)->lock); 2185 INIT_LIST_HEAD(&(*mem)->bo_va_list); 2186 (*mem)->bo = amdgpu_bo_ref(gws_bo); 2187 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; 2188 (*mem)->process_info = process_info; 2189 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); 2190 amdgpu_sync_create(&(*mem)->sync); 2191 2192 2193 /* Validate gws bo the first time it is added to process */ 2194 mutex_lock(&(*mem)->process_info->lock); 2195 ret = amdgpu_bo_reserve(gws_bo, false); 2196 if (unlikely(ret)) { 2197 pr_err("Reserve gws bo failed %d\n", ret); 2198 goto bo_reservation_failure; 2199 } 2200 2201 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); 2202 if (ret) { 2203 pr_err("GWS BO validate failed %d\n", ret); 2204 goto bo_validation_failure; 2205 } 2206 /* GWS resource is shared b/t amdgpu and amdkfd 2207 * Add process eviction fence to bo so they can 2208 * evict each other. 2209 */ 2210 ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); 2211 if (ret) 2212 goto reserve_shared_fail; 2213 amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); 2214 amdgpu_bo_unreserve(gws_bo); 2215 mutex_unlock(&(*mem)->process_info->lock); 2216 2217 return ret; 2218 2219 reserve_shared_fail: 2220 bo_validation_failure: 2221 amdgpu_bo_unreserve(gws_bo); 2222 bo_reservation_failure: 2223 mutex_unlock(&(*mem)->process_info->lock); 2224 amdgpu_sync_free(&(*mem)->sync); 2225 remove_kgd_mem_from_kfd_bo_list(*mem, process_info); 2226 amdgpu_bo_unref(&gws_bo); 2227 mutex_destroy(&(*mem)->lock); 2228 kfree(*mem); 2229 *mem = NULL; 2230 return ret; 2231 } 2232 2233 int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) 2234 { 2235 int ret; 2236 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2237 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; 2238 struct amdgpu_bo *gws_bo = kgd_mem->bo; 2239 2240 /* Remove BO from process's validate list so restore worker won't touch 2241 * it anymore 2242 */ 2243 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); 2244 2245 ret = amdgpu_bo_reserve(gws_bo, false); 2246 if (unlikely(ret)) { 2247 pr_err("Reserve gws bo failed %d\n", ret); 2248 //TODO add BO back to validate_list? 2249 return ret; 2250 } 2251 amdgpu_amdkfd_remove_eviction_fence(gws_bo, 2252 process_info->eviction_fence); 2253 amdgpu_bo_unreserve(gws_bo); 2254 amdgpu_sync_free(&kgd_mem->sync); 2255 amdgpu_bo_unref(&gws_bo); 2256 mutex_destroy(&kgd_mem->lock); 2257 kfree(mem); 2258 return 0; 2259 } 2260 2261 /* Returns GPU-specific tiling mode information */ 2262 int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, 2263 struct tile_config *config) 2264 { 2265 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 2266 2267 config->gb_addr_config = adev->gfx.config.gb_addr_config; 2268 config->tile_config_ptr = adev->gfx.config.tile_mode_array; 2269 config->num_tile_configs = 2270 ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2271 config->macro_tile_config_ptr = 2272 adev->gfx.config.macrotile_mode_array; 2273 config->num_macro_tile_configs = 2274 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2275 2276 /* Those values are not set from GFX9 onwards */ 2277 config->num_banks = adev->gfx.config.num_banks; 2278 config->num_ranks = adev->gfx.config.num_ranks; 2279 2280 return 0; 2281 } 2282