1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2014-2018 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 #include <linux/dma-buf.h> 24 #include <linux/list.h> 25 #include <linux/pagemap.h> 26 #include <linux/sched/mm.h> 27 #include <linux/sched/task.h> 28 #include <drm/ttm/ttm_tt.h> 29 30 #include "amdgpu_object.h" 31 #include "amdgpu_gem.h" 32 #include "amdgpu_vm.h" 33 #include "amdgpu_hmm.h" 34 #include "amdgpu_amdkfd.h" 35 #include "amdgpu_dma_buf.h" 36 #include <uapi/linux/kfd_ioctl.h> 37 #include "amdgpu_xgmi.h" 38 #include "kfd_smi_events.h" 39 40 /* Userptr restore delay, just long enough to allow consecutive VM 41 * changes to accumulate 42 */ 43 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 44 45 /* 46 * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB 47 * BO chunk 48 */ 49 #define VRAM_AVAILABLITY_ALIGN (1 << 21) 50 51 /* Impose limit on how much memory KFD can use */ 52 static struct { 53 uint64_t max_system_mem_limit; 54 uint64_t max_ttm_mem_limit; 55 int64_t system_mem_used; 56 int64_t ttm_mem_used; 57 spinlock_t mem_limit_lock; 58 } kfd_mem_limit; 59 60 static const char * const domain_bit_to_string[] = { 61 "CPU", 62 "GTT", 63 "VRAM", 64 "GDS", 65 "GWS", 66 "OA" 67 }; 68 69 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] 70 71 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); 72 73 static bool kfd_mem_is_attached(struct amdgpu_vm *avm, 74 struct kgd_mem *mem) 75 { 76 struct kfd_mem_attachment *entry; 77 78 list_for_each_entry(entry, &mem->attachments, list) 79 if (entry->bo_va->base.vm == avm) 80 return true; 81 82 return false; 83 } 84 85 /* Set memory usage limits. Current, limits are 86 * System (TTM + userptr) memory - 15/16th System RAM 87 * TTM memory - 3/8th System RAM 88 */ 89 void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 90 { 91 struct sysinfo si; 92 uint64_t mem; 93 94 si_meminfo(&si); 95 mem = si.freeram - si.freehigh; 96 mem *= si.mem_unit; 97 98 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 99 kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); 100 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); 101 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", 102 (kfd_mem_limit.max_system_mem_limit >> 20), 103 (kfd_mem_limit.max_ttm_mem_limit >> 20)); 104 } 105 106 void amdgpu_amdkfd_reserve_system_mem(uint64_t size) 107 { 108 kfd_mem_limit.system_mem_used += size; 109 } 110 111 /* Estimate page table size needed to represent a given memory size 112 * 113 * With 4KB pages, we need one 8 byte PTE for each 4KB of memory 114 * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB 115 * of memory (factor 256K, >> 18). ROCm user mode tries to optimize 116 * for 2MB pages for TLB efficiency. However, small allocations and 117 * fragmented system memory still need some 4KB pages. We choose a 118 * compromise that should work in most cases without reserving too 119 * much memory for page tables unnecessarily (factor 16K, >> 14). 120 */ 121 122 #define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) 123 124 /** 125 * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size 126 * of buffer. 127 * 128 * @adev: Device to which allocated BO belongs to 129 * @size: Size of buffer, in bytes, encapsulated by B0. This should be 130 * equivalent to amdgpu_bo_size(BO) 131 * @alloc_flag: Flag used in allocating a BO as noted above 132 * 133 * Return: returns -ENOMEM in case of error, ZERO otherwise 134 */ 135 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 136 uint64_t size, u32 alloc_flag) 137 { 138 uint64_t reserved_for_pt = 139 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); 140 size_t system_mem_needed, ttm_mem_needed, vram_needed; 141 int ret = 0; 142 143 system_mem_needed = 0; 144 ttm_mem_needed = 0; 145 vram_needed = 0; 146 if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 147 system_mem_needed = size; 148 ttm_mem_needed = size; 149 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 150 /* 151 * Conservatively round up the allocation requirement to 2 MB 152 * to avoid fragmentation caused by 4K allocations in the tail 153 * 2M BO chunk. 154 */ 155 vram_needed = size; 156 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 157 system_mem_needed = size; 158 } else if (!(alloc_flag & 159 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 160 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { 161 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); 162 return -ENOMEM; 163 } 164 165 spin_lock(&kfd_mem_limit.mem_limit_lock); 166 167 if (kfd_mem_limit.system_mem_used + system_mem_needed > 168 kfd_mem_limit.max_system_mem_limit) 169 pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); 170 171 if ((kfd_mem_limit.system_mem_used + system_mem_needed > 172 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || 173 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > 174 kfd_mem_limit.max_ttm_mem_limit) || 175 (adev && adev->kfd.vram_used + vram_needed > 176 adev->gmc.real_vram_size - reserved_for_pt)) { 177 ret = -ENOMEM; 178 goto release; 179 } 180 181 /* Update memory accounting by decreasing available system 182 * memory, TTM memory and GPU memory as computed above 183 */ 184 WARN_ONCE(vram_needed && !adev, 185 "adev reference can't be null when vram is used"); 186 if (adev) { 187 adev->kfd.vram_used += vram_needed; 188 adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); 189 } 190 kfd_mem_limit.system_mem_used += system_mem_needed; 191 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 192 193 release: 194 spin_unlock(&kfd_mem_limit.mem_limit_lock); 195 return ret; 196 } 197 198 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, 199 uint64_t size, u32 alloc_flag) 200 { 201 spin_lock(&kfd_mem_limit.mem_limit_lock); 202 203 if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 204 kfd_mem_limit.system_mem_used -= size; 205 kfd_mem_limit.ttm_mem_used -= size; 206 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 207 WARN_ONCE(!adev, 208 "adev reference can't be null when alloc mem flags vram is set"); 209 if (adev) { 210 adev->kfd.vram_used -= size; 211 adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); 212 } 213 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 214 kfd_mem_limit.system_mem_used -= size; 215 } else if (!(alloc_flag & 216 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 217 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { 218 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); 219 goto release; 220 } 221 WARN_ONCE(adev && adev->kfd.vram_used < 0, 222 "KFD VRAM memory accounting unbalanced"); 223 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, 224 "KFD TTM memory accounting unbalanced"); 225 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 226 "KFD system memory accounting unbalanced"); 227 228 release: 229 spin_unlock(&kfd_mem_limit.mem_limit_lock); 230 } 231 232 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) 233 { 234 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 235 u32 alloc_flags = bo->kfd_bo->alloc_flags; 236 u64 size = amdgpu_bo_size(bo); 237 238 amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); 239 240 kfree(bo->kfd_bo); 241 } 242 243 /** 244 * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information 245 * about USERPTR or DOOREBELL or MMIO BO. 246 * @adev: Device for which dmamap BO is being created 247 * @mem: BO of peer device that is being DMA mapped. Provides parameters 248 * in building the dmamap BO 249 * @bo_out: Output parameter updated with handle of dmamap BO 250 */ 251 static int 252 create_dmamap_sg_bo(struct amdgpu_device *adev, 253 struct kgd_mem *mem, struct amdgpu_bo **bo_out) 254 { 255 struct drm_gem_object *gem_obj; 256 int ret, align; 257 258 ret = amdgpu_bo_reserve(mem->bo, false); 259 if (ret) 260 return ret; 261 262 align = 1; 263 ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, 264 AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, 265 ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); 266 267 amdgpu_bo_unreserve(mem->bo); 268 269 if (ret) { 270 pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); 271 return -EINVAL; 272 } 273 274 *bo_out = gem_to_amdgpu_bo(gem_obj); 275 (*bo_out)->parent = amdgpu_bo_ref(mem->bo); 276 return ret; 277 } 278 279 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's 280 * reservation object. 281 * 282 * @bo: [IN] Remove eviction fence(s) from this BO 283 * @ef: [IN] This eviction fence is removed if it 284 * is present in the shared list. 285 * 286 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. 287 */ 288 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, 289 struct amdgpu_amdkfd_fence *ef) 290 { 291 struct dma_fence *replacement; 292 293 if (!ef) 294 return -EINVAL; 295 296 /* TODO: Instead of block before we should use the fence of the page 297 * table update and TLB flush here directly. 298 */ 299 replacement = dma_fence_get_stub(); 300 dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, 301 replacement, DMA_RESV_USAGE_BOOKKEEP); 302 dma_fence_put(replacement); 303 return 0; 304 } 305 306 int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) 307 { 308 struct amdgpu_bo *root = bo; 309 struct amdgpu_vm_bo_base *vm_bo; 310 struct amdgpu_vm *vm; 311 struct amdkfd_process_info *info; 312 struct amdgpu_amdkfd_fence *ef; 313 int ret; 314 315 /* we can always get vm_bo from root PD bo.*/ 316 while (root->parent) 317 root = root->parent; 318 319 vm_bo = root->vm_bo; 320 if (!vm_bo) 321 return 0; 322 323 vm = vm_bo->vm; 324 if (!vm) 325 return 0; 326 327 info = vm->process_info; 328 if (!info || !info->eviction_fence) 329 return 0; 330 331 ef = container_of(dma_fence_get(&info->eviction_fence->base), 332 struct amdgpu_amdkfd_fence, base); 333 334 BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); 335 ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); 336 dma_resv_unlock(bo->tbo.base.resv); 337 338 dma_fence_put(&ef->base); 339 return ret; 340 } 341 342 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, 343 bool wait) 344 { 345 struct ttm_operation_ctx ctx = { false, false }; 346 int ret; 347 348 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), 349 "Called with userptr BO")) 350 return -EINVAL; 351 352 amdgpu_bo_placement_from_domain(bo, domain); 353 354 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 355 if (ret) 356 goto validate_fail; 357 if (wait) 358 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); 359 360 validate_fail: 361 return ret; 362 } 363 364 static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) 365 { 366 return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); 367 } 368 369 /* vm_validate_pt_pd_bos - Validate page table and directory BOs 370 * 371 * Page directories are not updated here because huge page handling 372 * during page table updates can invalidate page directory entries 373 * again. Page directories are only updated after updating page 374 * tables. 375 */ 376 static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) 377 { 378 struct amdgpu_bo *pd = vm->root.bo; 379 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 380 int ret; 381 382 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); 383 if (ret) { 384 pr_err("failed to validate PT BOs\n"); 385 return ret; 386 } 387 388 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo); 389 390 return 0; 391 } 392 393 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) 394 { 395 struct amdgpu_bo *pd = vm->root.bo; 396 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 397 int ret; 398 399 ret = amdgpu_vm_update_pdes(adev, vm, false); 400 if (ret) 401 return ret; 402 403 return amdgpu_sync_fence(sync, vm->last_update); 404 } 405 406 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) 407 { 408 uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE | 409 AMDGPU_VM_MTYPE_DEFAULT; 410 411 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) 412 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; 413 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) 414 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; 415 416 return amdgpu_gem_va_map_flags(adev, mapping_flags); 417 } 418 419 /** 420 * create_sg_table() - Create an sg_table for a contiguous DMA addr range 421 * @addr: The starting address to point to 422 * @size: Size of memory area in bytes being pointed to 423 * 424 * Allocates an instance of sg_table and initializes it to point to memory 425 * area specified by input parameters. The address used to build is assumed 426 * to be DMA mapped, if needed. 427 * 428 * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table 429 * because they are physically contiguous. 430 * 431 * Return: Initialized instance of SG Table or NULL 432 */ 433 static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) 434 { 435 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); 436 437 if (!sg) 438 return NULL; 439 if (sg_alloc_table(sg, 1, GFP_KERNEL)) { 440 kfree(sg); 441 return NULL; 442 } 443 sg_dma_address(sg->sgl) = addr; 444 sg->sgl->length = size; 445 #ifdef CONFIG_NEED_SG_DMA_LENGTH 446 sg->sgl->dma_length = size; 447 #endif 448 return sg; 449 } 450 451 static int 452 kfd_mem_dmamap_userptr(struct kgd_mem *mem, 453 struct kfd_mem_attachment *attachment) 454 { 455 enum dma_data_direction direction = 456 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? 457 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 458 struct ttm_operation_ctx ctx = {.interruptible = true}; 459 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 460 struct amdgpu_device *adev = attachment->adev; 461 struct ttm_tt *src_ttm = mem->bo->tbo.ttm; 462 struct ttm_tt *ttm = bo->tbo.ttm; 463 int ret; 464 465 if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) 466 return -EINVAL; 467 468 ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); 469 if (unlikely(!ttm->sg)) 470 return -ENOMEM; 471 472 /* Same sequence as in amdgpu_ttm_tt_pin_userptr */ 473 ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages, 474 ttm->num_pages, 0, 475 (u64)ttm->num_pages << PAGE_SHIFT, 476 GFP_KERNEL); 477 if (unlikely(ret)) 478 goto free_sg; 479 480 ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); 481 if (unlikely(ret)) 482 goto release_sg; 483 484 drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address, 485 ttm->num_pages); 486 487 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 488 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 489 if (ret) 490 goto unmap_sg; 491 492 return 0; 493 494 unmap_sg: 495 dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); 496 release_sg: 497 pr_err("DMA map userptr failed: %d\n", ret); 498 sg_free_table(ttm->sg); 499 free_sg: 500 kfree(ttm->sg); 501 ttm->sg = NULL; 502 return ret; 503 } 504 505 static int 506 kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) 507 { 508 struct ttm_operation_ctx ctx = {.interruptible = true}; 509 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 510 511 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 512 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 513 } 514 515 /** 516 * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO 517 * @mem: SG BO of the DOORBELL or MMIO resource on the owning device 518 * @attachment: Virtual address attachment of the BO on accessing device 519 * 520 * An access request from the device that owns DOORBELL does not require DMA mapping. 521 * This is because the request doesn't go through PCIe root complex i.e. it instead 522 * loops back. The need to DMA map arises only when accessing peer device's DOORBELL 523 * 524 * In contrast, all access requests for MMIO need to be DMA mapped without regard to 525 * device ownership. This is because access requests for MMIO go through PCIe root 526 * complex. 527 * 528 * This is accomplished in two steps: 529 * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used 530 * in updating requesting device's page table 531 * - Signal TTM to mark memory pointed to by requesting device's BO as GPU 532 * accessible. This allows an update of requesting device's page table 533 * with entries associated with DOOREBELL or MMIO memory 534 * 535 * This method is invoked in the following contexts: 536 * - Mapping of DOORBELL or MMIO BO of same or peer device 537 * - Validating an evicted DOOREBELL or MMIO BO on device seeking access 538 * 539 * Return: ZERO if successful, NON-ZERO otherwise 540 */ 541 static int 542 kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, 543 struct kfd_mem_attachment *attachment) 544 { 545 struct ttm_operation_ctx ctx = {.interruptible = true}; 546 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 547 struct amdgpu_device *adev = attachment->adev; 548 struct ttm_tt *ttm = bo->tbo.ttm; 549 enum dma_data_direction dir; 550 dma_addr_t dma_addr; 551 bool mmio; 552 int ret; 553 554 /* Expect SG Table of dmapmap BO to be NULL */ 555 mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); 556 if (unlikely(ttm->sg)) { 557 pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); 558 return -EINVAL; 559 } 560 561 dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? 562 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 563 dma_addr = mem->bo->tbo.sg->sgl->dma_address; 564 pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); 565 pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); 566 dma_addr = dma_map_resource(adev->dev, dma_addr, 567 mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); 568 ret = dma_mapping_error(adev->dev, dma_addr); 569 if (unlikely(ret)) 570 return ret; 571 pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); 572 573 ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); 574 if (unlikely(!ttm->sg)) { 575 ret = -ENOMEM; 576 goto unmap_sg; 577 } 578 579 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 580 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 581 if (unlikely(ret)) 582 goto free_sg; 583 584 return ret; 585 586 free_sg: 587 sg_free_table(ttm->sg); 588 kfree(ttm->sg); 589 ttm->sg = NULL; 590 unmap_sg: 591 dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, 592 dir, DMA_ATTR_SKIP_CPU_SYNC); 593 return ret; 594 } 595 596 static int 597 kfd_mem_dmamap_attachment(struct kgd_mem *mem, 598 struct kfd_mem_attachment *attachment) 599 { 600 switch (attachment->type) { 601 case KFD_MEM_ATT_SHARED: 602 return 0; 603 case KFD_MEM_ATT_USERPTR: 604 return kfd_mem_dmamap_userptr(mem, attachment); 605 case KFD_MEM_ATT_DMABUF: 606 return kfd_mem_dmamap_dmabuf(attachment); 607 case KFD_MEM_ATT_SG: 608 return kfd_mem_dmamap_sg_bo(mem, attachment); 609 default: 610 WARN_ON_ONCE(1); 611 } 612 return -EINVAL; 613 } 614 615 static void 616 kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, 617 struct kfd_mem_attachment *attachment) 618 { 619 enum dma_data_direction direction = 620 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? 621 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 622 struct ttm_operation_ctx ctx = {.interruptible = false}; 623 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 624 struct amdgpu_device *adev = attachment->adev; 625 struct ttm_tt *ttm = bo->tbo.ttm; 626 627 if (unlikely(!ttm->sg)) 628 return; 629 630 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 631 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 632 633 dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); 634 sg_free_table(ttm->sg); 635 kfree(ttm->sg); 636 ttm->sg = NULL; 637 } 638 639 static void 640 kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) 641 { 642 struct ttm_operation_ctx ctx = {.interruptible = true}; 643 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 644 645 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 646 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 647 } 648 649 /** 650 * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO 651 * @mem: SG BO of the DOORBELL or MMIO resource on the owning device 652 * @attachment: Virtual address attachment of the BO on accessing device 653 * 654 * The method performs following steps: 655 * - Signal TTM to mark memory pointed to by BO as GPU inaccessible 656 * - Free SG Table that is used to encapsulate DMA mapped memory of 657 * peer device's DOORBELL or MMIO memory 658 * 659 * This method is invoked in the following contexts: 660 * UNMapping of DOORBELL or MMIO BO on a device having access to its memory 661 * Eviction of DOOREBELL or MMIO BO on device having access to its memory 662 * 663 * Return: void 664 */ 665 static void 666 kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, 667 struct kfd_mem_attachment *attachment) 668 { 669 struct ttm_operation_ctx ctx = {.interruptible = true}; 670 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 671 struct amdgpu_device *adev = attachment->adev; 672 struct ttm_tt *ttm = bo->tbo.ttm; 673 enum dma_data_direction dir; 674 675 if (unlikely(!ttm->sg)) { 676 pr_err("SG Table of BO is UNEXPECTEDLY NULL"); 677 return; 678 } 679 680 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 681 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 682 683 dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? 684 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 685 dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address, 686 ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); 687 sg_free_table(ttm->sg); 688 kfree(ttm->sg); 689 ttm->sg = NULL; 690 bo->tbo.sg = NULL; 691 } 692 693 static void 694 kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, 695 struct kfd_mem_attachment *attachment) 696 { 697 switch (attachment->type) { 698 case KFD_MEM_ATT_SHARED: 699 break; 700 case KFD_MEM_ATT_USERPTR: 701 kfd_mem_dmaunmap_userptr(mem, attachment); 702 break; 703 case KFD_MEM_ATT_DMABUF: 704 kfd_mem_dmaunmap_dmabuf(attachment); 705 break; 706 case KFD_MEM_ATT_SG: 707 kfd_mem_dmaunmap_sg_bo(mem, attachment); 708 break; 709 default: 710 WARN_ON_ONCE(1); 711 } 712 } 713 714 static int 715 kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, 716 struct amdgpu_bo **bo) 717 { 718 struct drm_gem_object *gobj; 719 int ret; 720 721 if (!mem->dmabuf) { 722 mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base, 723 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? 724 DRM_RDWR : 0); 725 if (IS_ERR(mem->dmabuf)) { 726 ret = PTR_ERR(mem->dmabuf); 727 mem->dmabuf = NULL; 728 return ret; 729 } 730 } 731 732 gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf); 733 if (IS_ERR(gobj)) 734 return PTR_ERR(gobj); 735 736 *bo = gem_to_amdgpu_bo(gobj); 737 (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE; 738 739 return 0; 740 } 741 742 /* kfd_mem_attach - Add a BO to a VM 743 * 744 * Everything that needs to bo done only once when a BO is first added 745 * to a VM. It can later be mapped and unmapped many times without 746 * repeating these steps. 747 * 748 * 0. Create BO for DMA mapping, if needed 749 * 1. Allocate and initialize BO VA entry data structure 750 * 2. Add BO to the VM 751 * 3. Determine ASIC-specific PTE flags 752 * 4. Alloc page tables and directories if needed 753 * 4a. Validate new page tables and directories 754 */ 755 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, 756 struct amdgpu_vm *vm, bool is_aql) 757 { 758 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); 759 unsigned long bo_size = mem->bo->tbo.base.size; 760 uint64_t va = mem->va; 761 struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; 762 struct amdgpu_bo *bo[2] = {NULL, NULL}; 763 bool same_hive = false; 764 int i, ret; 765 766 if (!va) { 767 pr_err("Invalid VA when adding BO to VM\n"); 768 return -EINVAL; 769 } 770 771 /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices 772 * 773 * The access path of MMIO and DOORBELL BOs of is always over PCIe. 774 * In contrast the access path of VRAM BOs depens upon the type of 775 * link that connects the peer device. Access over PCIe is allowed 776 * if peer device has large BAR. In contrast, access over xGMI is 777 * allowed for both small and large BAR configurations of peer device 778 */ 779 if ((adev != bo_adev) && 780 ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || 781 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || 782 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { 783 if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) 784 same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); 785 if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) 786 return -EINVAL; 787 } 788 789 for (i = 0; i <= is_aql; i++) { 790 attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); 791 if (unlikely(!attachment[i])) { 792 ret = -ENOMEM; 793 goto unwind; 794 } 795 796 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, 797 va + bo_size, vm); 798 799 if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || 800 (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || 801 same_hive) { 802 /* Mappings on the local GPU, or VRAM mappings in the 803 * local hive, or userptr mapping IOMMU direct map mode 804 * share the original BO 805 */ 806 attachment[i]->type = KFD_MEM_ATT_SHARED; 807 bo[i] = mem->bo; 808 drm_gem_object_get(&bo[i]->tbo.base); 809 } else if (i > 0) { 810 /* Multiple mappings on the same GPU share the BO */ 811 attachment[i]->type = KFD_MEM_ATT_SHARED; 812 bo[i] = bo[0]; 813 drm_gem_object_get(&bo[i]->tbo.base); 814 } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { 815 /* Create an SG BO to DMA-map userptrs on other GPUs */ 816 attachment[i]->type = KFD_MEM_ATT_USERPTR; 817 ret = create_dmamap_sg_bo(adev, mem, &bo[i]); 818 if (ret) 819 goto unwind; 820 /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */ 821 } else if (mem->bo->tbo.type == ttm_bo_type_sg) { 822 WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || 823 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), 824 "Handing invalid SG BO in ATTACH request"); 825 attachment[i]->type = KFD_MEM_ATT_SG; 826 ret = create_dmamap_sg_bo(adev, mem, &bo[i]); 827 if (ret) 828 goto unwind; 829 /* Enable acces to GTT and VRAM BOs of peer devices */ 830 } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || 831 mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { 832 attachment[i]->type = KFD_MEM_ATT_DMABUF; 833 ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); 834 if (ret) 835 goto unwind; 836 pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); 837 } else { 838 WARN_ONCE(true, "Handling invalid ATTACH request"); 839 ret = -EINVAL; 840 goto unwind; 841 } 842 843 /* Add BO to VM internal data structures */ 844 ret = amdgpu_bo_reserve(bo[i], false); 845 if (ret) { 846 pr_debug("Unable to reserve BO during memory attach"); 847 goto unwind; 848 } 849 attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); 850 amdgpu_bo_unreserve(bo[i]); 851 if (unlikely(!attachment[i]->bo_va)) { 852 ret = -ENOMEM; 853 pr_err("Failed to add BO object to VM. ret == %d\n", 854 ret); 855 goto unwind; 856 } 857 attachment[i]->va = va; 858 attachment[i]->pte_flags = get_pte_flags(adev, mem); 859 attachment[i]->adev = adev; 860 list_add(&attachment[i]->list, &mem->attachments); 861 862 va += bo_size; 863 } 864 865 return 0; 866 867 unwind: 868 for (; i >= 0; i--) { 869 if (!attachment[i]) 870 continue; 871 if (attachment[i]->bo_va) { 872 amdgpu_bo_reserve(bo[i], true); 873 amdgpu_vm_bo_del(adev, attachment[i]->bo_va); 874 amdgpu_bo_unreserve(bo[i]); 875 list_del(&attachment[i]->list); 876 } 877 if (bo[i]) 878 drm_gem_object_put(&bo[i]->tbo.base); 879 kfree(attachment[i]); 880 } 881 return ret; 882 } 883 884 static void kfd_mem_detach(struct kfd_mem_attachment *attachment) 885 { 886 struct amdgpu_bo *bo = attachment->bo_va->base.bo; 887 888 pr_debug("\t remove VA 0x%llx in entry %p\n", 889 attachment->va, attachment); 890 amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); 891 drm_gem_object_put(&bo->tbo.base); 892 list_del(&attachment->list); 893 kfree(attachment); 894 } 895 896 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, 897 struct amdkfd_process_info *process_info, 898 bool userptr) 899 { 900 struct ttm_validate_buffer *entry = &mem->validate_list; 901 struct amdgpu_bo *bo = mem->bo; 902 903 INIT_LIST_HEAD(&entry->head); 904 entry->num_shared = 1; 905 entry->bo = &bo->tbo; 906 mutex_lock(&process_info->lock); 907 if (userptr) 908 list_add_tail(&entry->head, &process_info->userptr_valid_list); 909 else 910 list_add_tail(&entry->head, &process_info->kfd_bo_list); 911 mutex_unlock(&process_info->lock); 912 } 913 914 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, 915 struct amdkfd_process_info *process_info) 916 { 917 struct ttm_validate_buffer *bo_list_entry; 918 919 bo_list_entry = &mem->validate_list; 920 mutex_lock(&process_info->lock); 921 list_del(&bo_list_entry->head); 922 mutex_unlock(&process_info->lock); 923 } 924 925 /* Initializes user pages. It registers the MMU notifier and validates 926 * the userptr BO in the GTT domain. 927 * 928 * The BO must already be on the userptr_valid_list. Otherwise an 929 * eviction and restore may happen that leaves the new BO unmapped 930 * with the user mode queues running. 931 * 932 * Takes the process_info->lock to protect against concurrent restore 933 * workers. 934 * 935 * Returns 0 for success, negative errno for errors. 936 */ 937 static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, 938 bool criu_resume) 939 { 940 struct amdkfd_process_info *process_info = mem->process_info; 941 struct amdgpu_bo *bo = mem->bo; 942 struct ttm_operation_ctx ctx = { true, false }; 943 struct hmm_range *range; 944 int ret = 0; 945 946 mutex_lock(&process_info->lock); 947 948 ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0); 949 if (ret) { 950 pr_err("%s: Failed to set userptr: %d\n", __func__, ret); 951 goto out; 952 } 953 954 ret = amdgpu_hmm_register(bo, user_addr); 955 if (ret) { 956 pr_err("%s: Failed to register MMU notifier: %d\n", 957 __func__, ret); 958 goto out; 959 } 960 961 if (criu_resume) { 962 /* 963 * During a CRIU restore operation, the userptr buffer objects 964 * will be validated in the restore_userptr_work worker at a 965 * later stage when it is scheduled by another ioctl called by 966 * CRIU master process for the target pid for restore. 967 */ 968 mutex_lock(&process_info->notifier_lock); 969 mem->invalid++; 970 mutex_unlock(&process_info->notifier_lock); 971 mutex_unlock(&process_info->lock); 972 return 0; 973 } 974 975 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); 976 if (ret) { 977 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 978 goto unregister_out; 979 } 980 981 ret = amdgpu_bo_reserve(bo, true); 982 if (ret) { 983 pr_err("%s: Failed to reserve BO\n", __func__); 984 goto release_out; 985 } 986 amdgpu_bo_placement_from_domain(bo, mem->domain); 987 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 988 if (ret) 989 pr_err("%s: failed to validate BO\n", __func__); 990 amdgpu_bo_unreserve(bo); 991 992 release_out: 993 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); 994 unregister_out: 995 if (ret) 996 amdgpu_hmm_unregister(bo); 997 out: 998 mutex_unlock(&process_info->lock); 999 return ret; 1000 } 1001 1002 /* Reserving a BO and its page table BOs must happen atomically to 1003 * avoid deadlocks. Some operations update multiple VMs at once. Track 1004 * all the reservation info in a context structure. Optionally a sync 1005 * object can track VM updates. 1006 */ 1007 struct bo_vm_reservation_context { 1008 struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ 1009 unsigned int n_vms; /* Number of VMs reserved */ 1010 struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ 1011 struct ww_acquire_ctx ticket; /* Reservation ticket */ 1012 struct list_head list, duplicates; /* BO lists */ 1013 struct amdgpu_sync *sync; /* Pointer to sync object */ 1014 bool reserved; /* Whether BOs are reserved */ 1015 }; 1016 1017 enum bo_vm_match { 1018 BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ 1019 BO_VM_MAPPED, /* Match VMs where a BO is mapped */ 1020 BO_VM_ALL, /* Match all VMs a BO was added to */ 1021 }; 1022 1023 /** 1024 * reserve_bo_and_vm - reserve a BO and a VM unconditionally. 1025 * @mem: KFD BO structure. 1026 * @vm: the VM to reserve. 1027 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 1028 */ 1029 static int reserve_bo_and_vm(struct kgd_mem *mem, 1030 struct amdgpu_vm *vm, 1031 struct bo_vm_reservation_context *ctx) 1032 { 1033 struct amdgpu_bo *bo = mem->bo; 1034 int ret; 1035 1036 WARN_ON(!vm); 1037 1038 ctx->reserved = false; 1039 ctx->n_vms = 1; 1040 ctx->sync = &mem->sync; 1041 1042 INIT_LIST_HEAD(&ctx->list); 1043 INIT_LIST_HEAD(&ctx->duplicates); 1044 1045 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); 1046 if (!ctx->vm_pd) 1047 return -ENOMEM; 1048 1049 ctx->kfd_bo.priority = 0; 1050 ctx->kfd_bo.tv.bo = &bo->tbo; 1051 ctx->kfd_bo.tv.num_shared = 1; 1052 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 1053 1054 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); 1055 1056 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 1057 false, &ctx->duplicates); 1058 if (ret) { 1059 pr_err("Failed to reserve buffers in ttm.\n"); 1060 kfree(ctx->vm_pd); 1061 ctx->vm_pd = NULL; 1062 return ret; 1063 } 1064 1065 ctx->reserved = true; 1066 return 0; 1067 } 1068 1069 /** 1070 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally 1071 * @mem: KFD BO structure. 1072 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO 1073 * is used. Otherwise, a single VM associated with the BO. 1074 * @map_type: the mapping status that will be used to filter the VMs. 1075 * @ctx: the struct that will be used in unreserve_bo_and_vms(). 1076 * 1077 * Returns 0 for success, negative for failure. 1078 */ 1079 static int reserve_bo_and_cond_vms(struct kgd_mem *mem, 1080 struct amdgpu_vm *vm, enum bo_vm_match map_type, 1081 struct bo_vm_reservation_context *ctx) 1082 { 1083 struct amdgpu_bo *bo = mem->bo; 1084 struct kfd_mem_attachment *entry; 1085 unsigned int i; 1086 int ret; 1087 1088 ctx->reserved = false; 1089 ctx->n_vms = 0; 1090 ctx->vm_pd = NULL; 1091 ctx->sync = &mem->sync; 1092 1093 INIT_LIST_HEAD(&ctx->list); 1094 INIT_LIST_HEAD(&ctx->duplicates); 1095 1096 list_for_each_entry(entry, &mem->attachments, list) { 1097 if ((vm && vm != entry->bo_va->base.vm) || 1098 (entry->is_mapped != map_type 1099 && map_type != BO_VM_ALL)) 1100 continue; 1101 1102 ctx->n_vms++; 1103 } 1104 1105 if (ctx->n_vms != 0) { 1106 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), 1107 GFP_KERNEL); 1108 if (!ctx->vm_pd) 1109 return -ENOMEM; 1110 } 1111 1112 ctx->kfd_bo.priority = 0; 1113 ctx->kfd_bo.tv.bo = &bo->tbo; 1114 ctx->kfd_bo.tv.num_shared = 1; 1115 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 1116 1117 i = 0; 1118 list_for_each_entry(entry, &mem->attachments, list) { 1119 if ((vm && vm != entry->bo_va->base.vm) || 1120 (entry->is_mapped != map_type 1121 && map_type != BO_VM_ALL)) 1122 continue; 1123 1124 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, 1125 &ctx->vm_pd[i]); 1126 i++; 1127 } 1128 1129 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, 1130 false, &ctx->duplicates); 1131 if (ret) { 1132 pr_err("Failed to reserve buffers in ttm.\n"); 1133 kfree(ctx->vm_pd); 1134 ctx->vm_pd = NULL; 1135 return ret; 1136 } 1137 1138 ctx->reserved = true; 1139 return 0; 1140 } 1141 1142 /** 1143 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context 1144 * @ctx: Reservation context to unreserve 1145 * @wait: Optionally wait for a sync object representing pending VM updates 1146 * @intr: Whether the wait is interruptible 1147 * 1148 * Also frees any resources allocated in 1149 * reserve_bo_and_(cond_)vm(s). Returns the status from 1150 * amdgpu_sync_wait. 1151 */ 1152 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, 1153 bool wait, bool intr) 1154 { 1155 int ret = 0; 1156 1157 if (wait) 1158 ret = amdgpu_sync_wait(ctx->sync, intr); 1159 1160 if (ctx->reserved) 1161 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); 1162 kfree(ctx->vm_pd); 1163 1164 ctx->sync = NULL; 1165 1166 ctx->reserved = false; 1167 ctx->vm_pd = NULL; 1168 1169 return ret; 1170 } 1171 1172 static void unmap_bo_from_gpuvm(struct kgd_mem *mem, 1173 struct kfd_mem_attachment *entry, 1174 struct amdgpu_sync *sync) 1175 { 1176 struct amdgpu_bo_va *bo_va = entry->bo_va; 1177 struct amdgpu_device *adev = entry->adev; 1178 struct amdgpu_vm *vm = bo_va->base.vm; 1179 1180 amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 1181 1182 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 1183 1184 amdgpu_sync_fence(sync, bo_va->last_pt_update); 1185 1186 kfd_mem_dmaunmap_attachment(mem, entry); 1187 } 1188 1189 static int update_gpuvm_pte(struct kgd_mem *mem, 1190 struct kfd_mem_attachment *entry, 1191 struct amdgpu_sync *sync) 1192 { 1193 struct amdgpu_bo_va *bo_va = entry->bo_va; 1194 struct amdgpu_device *adev = entry->adev; 1195 int ret; 1196 1197 ret = kfd_mem_dmamap_attachment(mem, entry); 1198 if (ret) 1199 return ret; 1200 1201 /* Update the page tables */ 1202 ret = amdgpu_vm_bo_update(adev, bo_va, false); 1203 if (ret) { 1204 pr_err("amdgpu_vm_bo_update failed\n"); 1205 return ret; 1206 } 1207 1208 return amdgpu_sync_fence(sync, bo_va->last_pt_update); 1209 } 1210 1211 static int map_bo_to_gpuvm(struct kgd_mem *mem, 1212 struct kfd_mem_attachment *entry, 1213 struct amdgpu_sync *sync, 1214 bool no_update_pte) 1215 { 1216 int ret; 1217 1218 /* Set virtual address for the allocation */ 1219 ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0, 1220 amdgpu_bo_size(entry->bo_va->base.bo), 1221 entry->pte_flags); 1222 if (ret) { 1223 pr_err("Failed to map VA 0x%llx in vm. ret %d\n", 1224 entry->va, ret); 1225 return ret; 1226 } 1227 1228 if (no_update_pte) 1229 return 0; 1230 1231 ret = update_gpuvm_pte(mem, entry, sync); 1232 if (ret) { 1233 pr_err("update_gpuvm_pte() failed\n"); 1234 goto update_gpuvm_pte_failed; 1235 } 1236 1237 return 0; 1238 1239 update_gpuvm_pte_failed: 1240 unmap_bo_from_gpuvm(mem, entry, sync); 1241 return ret; 1242 } 1243 1244 static int process_validate_vms(struct amdkfd_process_info *process_info) 1245 { 1246 struct amdgpu_vm *peer_vm; 1247 int ret; 1248 1249 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1250 vm_list_node) { 1251 ret = vm_validate_pt_pd_bos(peer_vm); 1252 if (ret) 1253 return ret; 1254 } 1255 1256 return 0; 1257 } 1258 1259 static int process_sync_pds_resv(struct amdkfd_process_info *process_info, 1260 struct amdgpu_sync *sync) 1261 { 1262 struct amdgpu_vm *peer_vm; 1263 int ret; 1264 1265 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1266 vm_list_node) { 1267 struct amdgpu_bo *pd = peer_vm->root.bo; 1268 1269 ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, 1270 AMDGPU_SYNC_NE_OWNER, 1271 AMDGPU_FENCE_OWNER_KFD); 1272 if (ret) 1273 return ret; 1274 } 1275 1276 return 0; 1277 } 1278 1279 static int process_update_pds(struct amdkfd_process_info *process_info, 1280 struct amdgpu_sync *sync) 1281 { 1282 struct amdgpu_vm *peer_vm; 1283 int ret; 1284 1285 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1286 vm_list_node) { 1287 ret = vm_update_pds(peer_vm, sync); 1288 if (ret) 1289 return ret; 1290 } 1291 1292 return 0; 1293 } 1294 1295 static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, 1296 struct dma_fence **ef) 1297 { 1298 struct amdkfd_process_info *info = NULL; 1299 int ret; 1300 1301 if (!*process_info) { 1302 info = kzalloc(sizeof(*info), GFP_KERNEL); 1303 if (!info) 1304 return -ENOMEM; 1305 1306 mutex_init(&info->lock); 1307 mutex_init(&info->notifier_lock); 1308 INIT_LIST_HEAD(&info->vm_list_head); 1309 INIT_LIST_HEAD(&info->kfd_bo_list); 1310 INIT_LIST_HEAD(&info->userptr_valid_list); 1311 INIT_LIST_HEAD(&info->userptr_inval_list); 1312 1313 info->eviction_fence = 1314 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 1315 current->mm, 1316 NULL); 1317 if (!info->eviction_fence) { 1318 pr_err("Failed to create eviction fence\n"); 1319 ret = -ENOMEM; 1320 goto create_evict_fence_fail; 1321 } 1322 1323 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); 1324 INIT_DELAYED_WORK(&info->restore_userptr_work, 1325 amdgpu_amdkfd_restore_userptr_worker); 1326 1327 *process_info = info; 1328 *ef = dma_fence_get(&info->eviction_fence->base); 1329 } 1330 1331 vm->process_info = *process_info; 1332 1333 /* Validate page directory and attach eviction fence */ 1334 ret = amdgpu_bo_reserve(vm->root.bo, true); 1335 if (ret) 1336 goto reserve_pd_fail; 1337 ret = vm_validate_pt_pd_bos(vm); 1338 if (ret) { 1339 pr_err("validate_pt_pd_bos() failed\n"); 1340 goto validate_pd_fail; 1341 } 1342 ret = amdgpu_bo_sync_wait(vm->root.bo, 1343 AMDGPU_FENCE_OWNER_KFD, false); 1344 if (ret) 1345 goto wait_pd_fail; 1346 ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); 1347 if (ret) 1348 goto reserve_shared_fail; 1349 dma_resv_add_fence(vm->root.bo->tbo.base.resv, 1350 &vm->process_info->eviction_fence->base, 1351 DMA_RESV_USAGE_BOOKKEEP); 1352 amdgpu_bo_unreserve(vm->root.bo); 1353 1354 /* Update process info */ 1355 mutex_lock(&vm->process_info->lock); 1356 list_add_tail(&vm->vm_list_node, 1357 &(vm->process_info->vm_list_head)); 1358 vm->process_info->n_vms++; 1359 mutex_unlock(&vm->process_info->lock); 1360 1361 return 0; 1362 1363 reserve_shared_fail: 1364 wait_pd_fail: 1365 validate_pd_fail: 1366 amdgpu_bo_unreserve(vm->root.bo); 1367 reserve_pd_fail: 1368 vm->process_info = NULL; 1369 if (info) { 1370 /* Two fence references: one in info and one in *ef */ 1371 dma_fence_put(&info->eviction_fence->base); 1372 dma_fence_put(*ef); 1373 *ef = NULL; 1374 *process_info = NULL; 1375 put_pid(info->pid); 1376 create_evict_fence_fail: 1377 mutex_destroy(&info->lock); 1378 mutex_destroy(&info->notifier_lock); 1379 kfree(info); 1380 } 1381 return ret; 1382 } 1383 1384 /** 1385 * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria 1386 * @bo: Handle of buffer object being pinned 1387 * @domain: Domain into which BO should be pinned 1388 * 1389 * - USERPTR BOs are UNPINNABLE and will return error 1390 * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their 1391 * PIN count incremented. It is valid to PIN a BO multiple times 1392 * 1393 * Return: ZERO if successful in pinning, Non-Zero in case of error. 1394 */ 1395 static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) 1396 { 1397 int ret = 0; 1398 1399 ret = amdgpu_bo_reserve(bo, false); 1400 if (unlikely(ret)) 1401 return ret; 1402 1403 ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); 1404 if (ret) 1405 pr_err("Error in Pinning BO to domain: %d\n", domain); 1406 1407 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); 1408 amdgpu_bo_unreserve(bo); 1409 1410 return ret; 1411 } 1412 1413 /** 1414 * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria 1415 * @bo: Handle of buffer object being unpinned 1416 * 1417 * - Is a illegal request for USERPTR BOs and is ignored 1418 * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their 1419 * PIN count decremented. Calls to UNPIN must balance calls to PIN 1420 */ 1421 static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) 1422 { 1423 int ret = 0; 1424 1425 ret = amdgpu_bo_reserve(bo, false); 1426 if (unlikely(ret)) 1427 return; 1428 1429 amdgpu_bo_unpin(bo); 1430 amdgpu_bo_unreserve(bo); 1431 } 1432 1433 int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, 1434 struct amdgpu_vm *avm, u32 pasid) 1435 1436 { 1437 int ret; 1438 1439 /* Free the original amdgpu allocated pasid, 1440 * will be replaced with kfd allocated pasid. 1441 */ 1442 if (avm->pasid) { 1443 amdgpu_pasid_free(avm->pasid); 1444 amdgpu_vm_set_pasid(adev, avm, 0); 1445 } 1446 1447 ret = amdgpu_vm_set_pasid(adev, avm, pasid); 1448 if (ret) 1449 return ret; 1450 1451 return 0; 1452 } 1453 1454 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, 1455 struct amdgpu_vm *avm, 1456 void **process_info, 1457 struct dma_fence **ef) 1458 { 1459 int ret; 1460 1461 /* Already a compute VM? */ 1462 if (avm->process_info) 1463 return -EINVAL; 1464 1465 /* Convert VM into a compute VM */ 1466 ret = amdgpu_vm_make_compute(adev, avm); 1467 if (ret) 1468 return ret; 1469 1470 /* Initialize KFD part of the VM and process info */ 1471 ret = init_kfd_vm(avm, process_info, ef); 1472 if (ret) 1473 return ret; 1474 1475 amdgpu_vm_set_task_info(avm); 1476 1477 return 0; 1478 } 1479 1480 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 1481 struct amdgpu_vm *vm) 1482 { 1483 struct amdkfd_process_info *process_info = vm->process_info; 1484 1485 if (!process_info) 1486 return; 1487 1488 /* Update process info */ 1489 mutex_lock(&process_info->lock); 1490 process_info->n_vms--; 1491 list_del(&vm->vm_list_node); 1492 mutex_unlock(&process_info->lock); 1493 1494 vm->process_info = NULL; 1495 1496 /* Release per-process resources when last compute VM is destroyed */ 1497 if (!process_info->n_vms) { 1498 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 1499 WARN_ON(!list_empty(&process_info->userptr_valid_list)); 1500 WARN_ON(!list_empty(&process_info->userptr_inval_list)); 1501 1502 dma_fence_put(&process_info->eviction_fence->base); 1503 cancel_delayed_work_sync(&process_info->restore_userptr_work); 1504 put_pid(process_info->pid); 1505 mutex_destroy(&process_info->lock); 1506 mutex_destroy(&process_info->notifier_lock); 1507 kfree(process_info); 1508 } 1509 } 1510 1511 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, 1512 void *drm_priv) 1513 { 1514 struct amdgpu_vm *avm; 1515 1516 if (WARN_ON(!adev || !drm_priv)) 1517 return; 1518 1519 avm = drm_priv_to_vm(drm_priv); 1520 1521 pr_debug("Releasing process vm %p\n", avm); 1522 1523 /* The original pasid of amdgpu vm has already been 1524 * released during making a amdgpu vm to a compute vm 1525 * The current pasid is managed by kfd and will be 1526 * released on kfd process destroy. Set amdgpu pasid 1527 * to 0 to avoid duplicate release. 1528 */ 1529 amdgpu_vm_release_compute(adev, avm); 1530 } 1531 1532 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) 1533 { 1534 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); 1535 struct amdgpu_bo *pd = avm->root.bo; 1536 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 1537 1538 if (adev->asic_type < CHIP_VEGA10) 1539 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; 1540 return avm->pd_phys_addr; 1541 } 1542 1543 void amdgpu_amdkfd_block_mmu_notifications(void *p) 1544 { 1545 struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; 1546 1547 mutex_lock(&pinfo->lock); 1548 WRITE_ONCE(pinfo->block_mmu_notifications, true); 1549 mutex_unlock(&pinfo->lock); 1550 } 1551 1552 int amdgpu_amdkfd_criu_resume(void *p) 1553 { 1554 int ret = 0; 1555 struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; 1556 1557 mutex_lock(&pinfo->lock); 1558 pr_debug("scheduling work\n"); 1559 mutex_lock(&pinfo->notifier_lock); 1560 pinfo->evicted_bos++; 1561 mutex_unlock(&pinfo->notifier_lock); 1562 if (!READ_ONCE(pinfo->block_mmu_notifications)) { 1563 ret = -EINVAL; 1564 goto out_unlock; 1565 } 1566 WRITE_ONCE(pinfo->block_mmu_notifications, false); 1567 schedule_delayed_work(&pinfo->restore_userptr_work, 0); 1568 1569 out_unlock: 1570 mutex_unlock(&pinfo->lock); 1571 return ret; 1572 } 1573 1574 size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) 1575 { 1576 uint64_t reserved_for_pt = 1577 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); 1578 size_t available; 1579 1580 spin_lock(&kfd_mem_limit.mem_limit_lock); 1581 available = adev->gmc.real_vram_size 1582 - adev->kfd.vram_used_aligned 1583 - atomic64_read(&adev->vram_pin_size) 1584 - reserved_for_pt; 1585 spin_unlock(&kfd_mem_limit.mem_limit_lock); 1586 1587 return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); 1588 } 1589 1590 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1591 struct amdgpu_device *adev, uint64_t va, uint64_t size, 1592 void *drm_priv, struct kgd_mem **mem, 1593 uint64_t *offset, uint32_t flags, bool criu_resume) 1594 { 1595 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); 1596 enum ttm_bo_type bo_type = ttm_bo_type_device; 1597 struct sg_table *sg = NULL; 1598 uint64_t user_addr = 0; 1599 struct amdgpu_bo *bo; 1600 struct drm_gem_object *gobj = NULL; 1601 u32 domain, alloc_domain; 1602 uint64_t aligned_size; 1603 u64 alloc_flags; 1604 int ret; 1605 1606 /* 1607 * Check on which domain to allocate BO 1608 */ 1609 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 1610 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; 1611 alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; 1612 alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? 1613 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; 1614 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 1615 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; 1616 alloc_flags = 0; 1617 } else { 1618 domain = AMDGPU_GEM_DOMAIN_GTT; 1619 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1620 alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE; 1621 1622 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 1623 if (!offset || !*offset) 1624 return -EINVAL; 1625 user_addr = untagged_addr(*offset); 1626 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 1627 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { 1628 bo_type = ttm_bo_type_sg; 1629 if (size > UINT_MAX) 1630 return -EINVAL; 1631 sg = create_sg_table(*offset, size); 1632 if (!sg) 1633 return -ENOMEM; 1634 } else { 1635 return -EINVAL; 1636 } 1637 } 1638 1639 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) 1640 alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; 1641 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) 1642 alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; 1643 1644 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1645 if (!*mem) { 1646 ret = -ENOMEM; 1647 goto err; 1648 } 1649 INIT_LIST_HEAD(&(*mem)->attachments); 1650 mutex_init(&(*mem)->lock); 1651 (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); 1652 1653 /* Workaround for AQL queue wraparound bug. Map the same 1654 * memory twice. That means we only actually allocate half 1655 * the memory. 1656 */ 1657 if ((*mem)->aql_queue) 1658 size >>= 1; 1659 aligned_size = PAGE_ALIGN(size); 1660 1661 (*mem)->alloc_flags = flags; 1662 1663 amdgpu_sync_create(&(*mem)->sync); 1664 1665 ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags); 1666 if (ret) { 1667 pr_debug("Insufficient memory\n"); 1668 goto err_reserve_limit; 1669 } 1670 1671 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1672 va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain)); 1673 1674 ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, 1675 bo_type, NULL, &gobj); 1676 if (ret) { 1677 pr_debug("Failed to create BO on domain %s. ret %d\n", 1678 domain_string(alloc_domain), ret); 1679 goto err_bo_create; 1680 } 1681 ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); 1682 if (ret) { 1683 pr_debug("Failed to allow vma node access. ret %d\n", ret); 1684 goto err_node_allow; 1685 } 1686 bo = gem_to_amdgpu_bo(gobj); 1687 if (bo_type == ttm_bo_type_sg) { 1688 bo->tbo.sg = sg; 1689 bo->tbo.ttm->sg = sg; 1690 } 1691 bo->kfd_bo = *mem; 1692 (*mem)->bo = bo; 1693 if (user_addr) 1694 bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; 1695 1696 (*mem)->va = va; 1697 (*mem)->domain = domain; 1698 (*mem)->mapped_to_gpu_memory = 0; 1699 (*mem)->process_info = avm->process_info; 1700 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); 1701 1702 if (user_addr) { 1703 pr_debug("creating userptr BO for user_addr = %llx\n", user_addr); 1704 ret = init_user_pages(*mem, user_addr, criu_resume); 1705 if (ret) 1706 goto allocate_init_user_pages_failed; 1707 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 1708 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { 1709 ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT); 1710 if (ret) { 1711 pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n"); 1712 goto err_pin_bo; 1713 } 1714 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; 1715 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; 1716 } 1717 1718 if (offset) 1719 *offset = amdgpu_bo_mmap_offset(bo); 1720 1721 return 0; 1722 1723 allocate_init_user_pages_failed: 1724 err_pin_bo: 1725 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); 1726 drm_vma_node_revoke(&gobj->vma_node, drm_priv); 1727 err_node_allow: 1728 /* Don't unreserve system mem limit twice */ 1729 goto err_reserve_limit; 1730 err_bo_create: 1731 amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); 1732 err_reserve_limit: 1733 mutex_destroy(&(*mem)->lock); 1734 if (gobj) 1735 drm_gem_object_put(gobj); 1736 else 1737 kfree(*mem); 1738 err: 1739 if (sg) { 1740 sg_free_table(sg); 1741 kfree(sg); 1742 } 1743 return ret; 1744 } 1745 1746 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( 1747 struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, 1748 uint64_t *size) 1749 { 1750 struct amdkfd_process_info *process_info = mem->process_info; 1751 unsigned long bo_size = mem->bo->tbo.base.size; 1752 bool use_release_notifier = (mem->bo->kfd_bo == mem); 1753 struct kfd_mem_attachment *entry, *tmp; 1754 struct bo_vm_reservation_context ctx; 1755 struct ttm_validate_buffer *bo_list_entry; 1756 unsigned int mapped_to_gpu_memory; 1757 int ret; 1758 bool is_imported = false; 1759 1760 mutex_lock(&mem->lock); 1761 1762 /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ 1763 if (mem->alloc_flags & 1764 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | 1765 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { 1766 amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); 1767 } 1768 1769 mapped_to_gpu_memory = mem->mapped_to_gpu_memory; 1770 is_imported = mem->is_imported; 1771 mutex_unlock(&mem->lock); 1772 /* lock is not needed after this, since mem is unused and will 1773 * be freed anyway 1774 */ 1775 1776 if (mapped_to_gpu_memory > 0) { 1777 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", 1778 mem->va, bo_size); 1779 return -EBUSY; 1780 } 1781 1782 /* Make sure restore workers don't access the BO any more */ 1783 bo_list_entry = &mem->validate_list; 1784 mutex_lock(&process_info->lock); 1785 list_del(&bo_list_entry->head); 1786 mutex_unlock(&process_info->lock); 1787 1788 /* Cleanup user pages and MMU notifiers */ 1789 if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { 1790 amdgpu_hmm_unregister(mem->bo); 1791 mutex_lock(&process_info->notifier_lock); 1792 amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); 1793 mutex_unlock(&process_info->notifier_lock); 1794 } 1795 1796 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1797 if (unlikely(ret)) 1798 return ret; 1799 1800 /* The eviction fence should be removed by the last unmap. 1801 * TODO: Log an error condition if the bo still has the eviction fence 1802 * attached 1803 */ 1804 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 1805 process_info->eviction_fence); 1806 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, 1807 mem->va + bo_size * (1 + mem->aql_queue)); 1808 1809 /* Remove from VM internal data structures */ 1810 list_for_each_entry_safe(entry, tmp, &mem->attachments, list) 1811 kfd_mem_detach(entry); 1812 1813 ret = unreserve_bo_and_vms(&ctx, false, false); 1814 1815 /* Free the sync object */ 1816 amdgpu_sync_free(&mem->sync); 1817 1818 /* If the SG is not NULL, it's one we created for a doorbell or mmio 1819 * remap BO. We need to free it. 1820 */ 1821 if (mem->bo->tbo.sg) { 1822 sg_free_table(mem->bo->tbo.sg); 1823 kfree(mem->bo->tbo.sg); 1824 } 1825 1826 /* Update the size of the BO being freed if it was allocated from 1827 * VRAM and is not imported. 1828 */ 1829 if (size) { 1830 if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && 1831 (!is_imported)) 1832 *size = bo_size; 1833 else 1834 *size = 0; 1835 } 1836 1837 /* Free the BO*/ 1838 drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); 1839 if (mem->dmabuf) 1840 dma_buf_put(mem->dmabuf); 1841 mutex_destroy(&mem->lock); 1842 1843 /* If this releases the last reference, it will end up calling 1844 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why 1845 * this needs to be the last call here. 1846 */ 1847 drm_gem_object_put(&mem->bo->tbo.base); 1848 1849 /* 1850 * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(), 1851 * explicitly free it here. 1852 */ 1853 if (!use_release_notifier) 1854 kfree(mem); 1855 1856 return ret; 1857 } 1858 1859 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1860 struct amdgpu_device *adev, struct kgd_mem *mem, 1861 void *drm_priv) 1862 { 1863 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); 1864 int ret; 1865 struct amdgpu_bo *bo; 1866 uint32_t domain; 1867 struct kfd_mem_attachment *entry; 1868 struct bo_vm_reservation_context ctx; 1869 unsigned long bo_size; 1870 bool is_invalid_userptr = false; 1871 1872 bo = mem->bo; 1873 if (!bo) { 1874 pr_err("Invalid BO when mapping memory to GPU\n"); 1875 return -EINVAL; 1876 } 1877 1878 /* Make sure restore is not running concurrently. Since we 1879 * don't map invalid userptr BOs, we rely on the next restore 1880 * worker to do the mapping 1881 */ 1882 mutex_lock(&mem->process_info->lock); 1883 1884 /* Lock notifier lock. If we find an invalid userptr BO, we can be 1885 * sure that the MMU notifier is no longer running 1886 * concurrently and the queues are actually stopped 1887 */ 1888 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1889 mutex_lock(&mem->process_info->notifier_lock); 1890 is_invalid_userptr = !!mem->invalid; 1891 mutex_unlock(&mem->process_info->notifier_lock); 1892 } 1893 1894 mutex_lock(&mem->lock); 1895 1896 domain = mem->domain; 1897 bo_size = bo->tbo.base.size; 1898 1899 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", 1900 mem->va, 1901 mem->va + bo_size * (1 + mem->aql_queue), 1902 avm, domain_string(domain)); 1903 1904 if (!kfd_mem_is_attached(avm, mem)) { 1905 ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue); 1906 if (ret) 1907 goto out; 1908 } 1909 1910 ret = reserve_bo_and_vm(mem, avm, &ctx); 1911 if (unlikely(ret)) 1912 goto out; 1913 1914 /* Userptr can be marked as "not invalid", but not actually be 1915 * validated yet (still in the system domain). In that case 1916 * the queues are still stopped and we can leave mapping for 1917 * the next restore worker 1918 */ 1919 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && 1920 bo->tbo.resource->mem_type == TTM_PL_SYSTEM) 1921 is_invalid_userptr = true; 1922 1923 ret = vm_validate_pt_pd_bos(avm); 1924 if (unlikely(ret)) 1925 goto out_unreserve; 1926 1927 if (mem->mapped_to_gpu_memory == 0 && 1928 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1929 /* Validate BO only once. The eviction fence gets added to BO 1930 * the first time it is mapped. Validate will wait for all 1931 * background evictions to complete. 1932 */ 1933 ret = amdgpu_amdkfd_bo_validate(bo, domain, true); 1934 if (ret) { 1935 pr_debug("Validate failed\n"); 1936 goto out_unreserve; 1937 } 1938 } 1939 1940 list_for_each_entry(entry, &mem->attachments, list) { 1941 if (entry->bo_va->base.vm != avm || entry->is_mapped) 1942 continue; 1943 1944 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", 1945 entry->va, entry->va + bo_size, entry); 1946 1947 ret = map_bo_to_gpuvm(mem, entry, ctx.sync, 1948 is_invalid_userptr); 1949 if (ret) { 1950 pr_err("Failed to map bo to gpuvm\n"); 1951 goto out_unreserve; 1952 } 1953 1954 ret = vm_update_pds(avm, ctx.sync); 1955 if (ret) { 1956 pr_err("Failed to update page directories\n"); 1957 goto out_unreserve; 1958 } 1959 1960 entry->is_mapped = true; 1961 mem->mapped_to_gpu_memory++; 1962 pr_debug("\t INC mapping count %d\n", 1963 mem->mapped_to_gpu_memory); 1964 } 1965 1966 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) 1967 dma_resv_add_fence(bo->tbo.base.resv, 1968 &avm->process_info->eviction_fence->base, 1969 DMA_RESV_USAGE_BOOKKEEP); 1970 ret = unreserve_bo_and_vms(&ctx, false, false); 1971 1972 goto out; 1973 1974 out_unreserve: 1975 unreserve_bo_and_vms(&ctx, false, false); 1976 out: 1977 mutex_unlock(&mem->process_info->lock); 1978 mutex_unlock(&mem->lock); 1979 return ret; 1980 } 1981 1982 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1983 struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) 1984 { 1985 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); 1986 struct amdkfd_process_info *process_info = avm->process_info; 1987 unsigned long bo_size = mem->bo->tbo.base.size; 1988 struct kfd_mem_attachment *entry; 1989 struct bo_vm_reservation_context ctx; 1990 int ret; 1991 1992 mutex_lock(&mem->lock); 1993 1994 ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx); 1995 if (unlikely(ret)) 1996 goto out; 1997 /* If no VMs were reserved, it means the BO wasn't actually mapped */ 1998 if (ctx.n_vms == 0) { 1999 ret = -EINVAL; 2000 goto unreserve_out; 2001 } 2002 2003 ret = vm_validate_pt_pd_bos(avm); 2004 if (unlikely(ret)) 2005 goto unreserve_out; 2006 2007 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", 2008 mem->va, 2009 mem->va + bo_size * (1 + mem->aql_queue), 2010 avm); 2011 2012 list_for_each_entry(entry, &mem->attachments, list) { 2013 if (entry->bo_va->base.vm != avm || !entry->is_mapped) 2014 continue; 2015 2016 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", 2017 entry->va, entry->va + bo_size, entry); 2018 2019 unmap_bo_from_gpuvm(mem, entry, ctx.sync); 2020 entry->is_mapped = false; 2021 2022 mem->mapped_to_gpu_memory--; 2023 pr_debug("\t DEC mapping count %d\n", 2024 mem->mapped_to_gpu_memory); 2025 } 2026 2027 /* If BO is unmapped from all VMs, unfence it. It can be evicted if 2028 * required. 2029 */ 2030 if (mem->mapped_to_gpu_memory == 0 && 2031 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && 2032 !mem->bo->tbo.pin_count) 2033 amdgpu_amdkfd_remove_eviction_fence(mem->bo, 2034 process_info->eviction_fence); 2035 2036 unreserve_out: 2037 unreserve_bo_and_vms(&ctx, false, false); 2038 out: 2039 mutex_unlock(&mem->lock); 2040 return ret; 2041 } 2042 2043 int amdgpu_amdkfd_gpuvm_sync_memory( 2044 struct amdgpu_device *adev, struct kgd_mem *mem, bool intr) 2045 { 2046 struct amdgpu_sync sync; 2047 int ret; 2048 2049 amdgpu_sync_create(&sync); 2050 2051 mutex_lock(&mem->lock); 2052 amdgpu_sync_clone(&mem->sync, &sync); 2053 mutex_unlock(&mem->lock); 2054 2055 ret = amdgpu_sync_wait(&sync, intr); 2056 amdgpu_sync_free(&sync); 2057 return ret; 2058 } 2059 2060 /** 2061 * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count 2062 * @adev: Device to which allocated BO belongs 2063 * @bo: Buffer object to be mapped 2064 * 2065 * Before return, bo reference count is incremented. To release the reference and unpin/ 2066 * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. 2067 */ 2068 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) 2069 { 2070 int ret; 2071 2072 ret = amdgpu_bo_reserve(bo, true); 2073 if (ret) { 2074 pr_err("Failed to reserve bo. ret %d\n", ret); 2075 goto err_reserve_bo_failed; 2076 } 2077 2078 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 2079 if (ret) { 2080 pr_err("Failed to pin bo. ret %d\n", ret); 2081 goto err_pin_bo_failed; 2082 } 2083 2084 ret = amdgpu_ttm_alloc_gart(&bo->tbo); 2085 if (ret) { 2086 pr_err("Failed to bind bo to GART. ret %d\n", ret); 2087 goto err_map_bo_gart_failed; 2088 } 2089 2090 amdgpu_amdkfd_remove_eviction_fence( 2091 bo, bo->vm_bo->vm->process_info->eviction_fence); 2092 2093 amdgpu_bo_unreserve(bo); 2094 2095 bo = amdgpu_bo_ref(bo); 2096 2097 return 0; 2098 2099 err_map_bo_gart_failed: 2100 amdgpu_bo_unpin(bo); 2101 err_pin_bo_failed: 2102 amdgpu_bo_unreserve(bo); 2103 err_reserve_bo_failed: 2104 2105 return ret; 2106 } 2107 2108 /** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access 2109 * 2110 * @mem: Buffer object to be mapped for CPU access 2111 * @kptr[out]: pointer in kernel CPU address space 2112 * @size[out]: size of the buffer 2113 * 2114 * Pins the BO and maps it for kernel CPU access. The eviction fence is removed 2115 * from the BO, since pinned BOs cannot be evicted. The bo must remain on the 2116 * validate_list, so the GPU mapping can be restored after a page table was 2117 * evicted. 2118 * 2119 * Return: 0 on success, error code on failure 2120 */ 2121 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, 2122 void **kptr, uint64_t *size) 2123 { 2124 int ret; 2125 struct amdgpu_bo *bo = mem->bo; 2126 2127 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 2128 pr_err("userptr can't be mapped to kernel\n"); 2129 return -EINVAL; 2130 } 2131 2132 mutex_lock(&mem->process_info->lock); 2133 2134 ret = amdgpu_bo_reserve(bo, true); 2135 if (ret) { 2136 pr_err("Failed to reserve bo. ret %d\n", ret); 2137 goto bo_reserve_failed; 2138 } 2139 2140 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 2141 if (ret) { 2142 pr_err("Failed to pin bo. ret %d\n", ret); 2143 goto pin_failed; 2144 } 2145 2146 ret = amdgpu_bo_kmap(bo, kptr); 2147 if (ret) { 2148 pr_err("Failed to map bo to kernel. ret %d\n", ret); 2149 goto kmap_failed; 2150 } 2151 2152 amdgpu_amdkfd_remove_eviction_fence( 2153 bo, mem->process_info->eviction_fence); 2154 2155 if (size) 2156 *size = amdgpu_bo_size(bo); 2157 2158 amdgpu_bo_unreserve(bo); 2159 2160 mutex_unlock(&mem->process_info->lock); 2161 return 0; 2162 2163 kmap_failed: 2164 amdgpu_bo_unpin(bo); 2165 pin_failed: 2166 amdgpu_bo_unreserve(bo); 2167 bo_reserve_failed: 2168 mutex_unlock(&mem->process_info->lock); 2169 2170 return ret; 2171 } 2172 2173 /** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access 2174 * 2175 * @mem: Buffer object to be unmapped for CPU access 2176 * 2177 * Removes the kernel CPU mapping and unpins the BO. It does not restore the 2178 * eviction fence, so this function should only be used for cleanup before the 2179 * BO is destroyed. 2180 */ 2181 void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) 2182 { 2183 struct amdgpu_bo *bo = mem->bo; 2184 2185 amdgpu_bo_reserve(bo, true); 2186 amdgpu_bo_kunmap(bo); 2187 amdgpu_bo_unpin(bo); 2188 amdgpu_bo_unreserve(bo); 2189 } 2190 2191 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, 2192 struct kfd_vm_fault_info *mem) 2193 { 2194 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { 2195 *mem = *adev->gmc.vm_fault_info; 2196 mb(); /* make sure read happened */ 2197 atomic_set(&adev->gmc.vm_fault_info_updated, 0); 2198 } 2199 return 0; 2200 } 2201 2202 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, 2203 struct dma_buf *dma_buf, 2204 uint64_t va, void *drm_priv, 2205 struct kgd_mem **mem, uint64_t *size, 2206 uint64_t *mmap_offset) 2207 { 2208 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); 2209 struct drm_gem_object *obj; 2210 struct amdgpu_bo *bo; 2211 int ret; 2212 2213 if (dma_buf->ops != &amdgpu_dmabuf_ops) 2214 /* Can't handle non-graphics buffers */ 2215 return -EINVAL; 2216 2217 obj = dma_buf->priv; 2218 if (drm_to_adev(obj->dev) != adev) 2219 /* Can't handle buffers from other devices */ 2220 return -EINVAL; 2221 2222 bo = gem_to_amdgpu_bo(obj); 2223 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 2224 AMDGPU_GEM_DOMAIN_GTT))) 2225 /* Only VRAM and GTT BOs are supported */ 2226 return -EINVAL; 2227 2228 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 2229 if (!*mem) 2230 return -ENOMEM; 2231 2232 ret = drm_vma_node_allow(&obj->vma_node, drm_priv); 2233 if (ret) { 2234 kfree(*mem); 2235 return ret; 2236 } 2237 2238 if (size) 2239 *size = amdgpu_bo_size(bo); 2240 2241 if (mmap_offset) 2242 *mmap_offset = amdgpu_bo_mmap_offset(bo); 2243 2244 INIT_LIST_HEAD(&(*mem)->attachments); 2245 mutex_init(&(*mem)->lock); 2246 2247 (*mem)->alloc_flags = 2248 ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 2249 KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) 2250 | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE 2251 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; 2252 2253 drm_gem_object_get(&bo->tbo.base); 2254 (*mem)->bo = bo; 2255 (*mem)->va = va; 2256 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 2257 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; 2258 (*mem)->mapped_to_gpu_memory = 0; 2259 (*mem)->process_info = avm->process_info; 2260 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); 2261 amdgpu_sync_create(&(*mem)->sync); 2262 (*mem)->is_imported = true; 2263 2264 return 0; 2265 } 2266 2267 /* Evict a userptr BO by stopping the queues if necessary 2268 * 2269 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it 2270 * cannot do any memory allocations, and cannot take any locks that 2271 * are held elsewhere while allocating memory. 2272 * 2273 * It doesn't do anything to the BO itself. The real work happens in 2274 * restore, where we get updated page addresses. This function only 2275 * ensures that GPU access to the BO is stopped. 2276 */ 2277 int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, 2278 unsigned long cur_seq, struct kgd_mem *mem) 2279 { 2280 struct amdkfd_process_info *process_info = mem->process_info; 2281 int r = 0; 2282 2283 /* Do not process MMU notifications during CRIU restore until 2284 * KFD_CRIU_OP_RESUME IOCTL is received 2285 */ 2286 if (READ_ONCE(process_info->block_mmu_notifications)) 2287 return 0; 2288 2289 mutex_lock(&process_info->notifier_lock); 2290 mmu_interval_set_seq(mni, cur_seq); 2291 2292 mem->invalid++; 2293 if (++process_info->evicted_bos == 1) { 2294 /* First eviction, stop the queues */ 2295 r = kgd2kfd_quiesce_mm(mni->mm, 2296 KFD_QUEUE_EVICTION_TRIGGER_USERPTR); 2297 if (r) 2298 pr_err("Failed to quiesce KFD\n"); 2299 schedule_delayed_work(&process_info->restore_userptr_work, 2300 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 2301 } 2302 mutex_unlock(&process_info->notifier_lock); 2303 2304 return r; 2305 } 2306 2307 /* Update invalid userptr BOs 2308 * 2309 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to 2310 * userptr_inval_list and updates user pages for all BOs that have 2311 * been invalidated since their last update. 2312 */ 2313 static int update_invalid_user_pages(struct amdkfd_process_info *process_info, 2314 struct mm_struct *mm) 2315 { 2316 struct kgd_mem *mem, *tmp_mem; 2317 struct amdgpu_bo *bo; 2318 struct ttm_operation_ctx ctx = { false, false }; 2319 uint32_t invalid; 2320 int ret = 0; 2321 2322 mutex_lock(&process_info->notifier_lock); 2323 2324 /* Move all invalidated BOs to the userptr_inval_list */ 2325 list_for_each_entry_safe(mem, tmp_mem, 2326 &process_info->userptr_valid_list, 2327 validate_list.head) 2328 if (mem->invalid) 2329 list_move_tail(&mem->validate_list.head, 2330 &process_info->userptr_inval_list); 2331 2332 /* Go through userptr_inval_list and update any invalid user_pages */ 2333 list_for_each_entry(mem, &process_info->userptr_inval_list, 2334 validate_list.head) { 2335 invalid = mem->invalid; 2336 if (!invalid) 2337 /* BO hasn't been invalidated since the last 2338 * revalidation attempt. Keep its page list. 2339 */ 2340 continue; 2341 2342 bo = mem->bo; 2343 2344 amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); 2345 mem->range = NULL; 2346 2347 /* BO reservations and getting user pages (hmm_range_fault) 2348 * must happen outside the notifier lock 2349 */ 2350 mutex_unlock(&process_info->notifier_lock); 2351 2352 /* Move the BO to system (CPU) domain if necessary to unmap 2353 * and free the SG table 2354 */ 2355 if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) { 2356 if (amdgpu_bo_reserve(bo, true)) 2357 return -EAGAIN; 2358 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 2359 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 2360 amdgpu_bo_unreserve(bo); 2361 if (ret) { 2362 pr_err("%s: Failed to invalidate userptr BO\n", 2363 __func__); 2364 return -EAGAIN; 2365 } 2366 } 2367 2368 /* Get updated user pages */ 2369 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, 2370 &mem->range); 2371 if (ret) { 2372 pr_debug("Failed %d to get user pages\n", ret); 2373 2374 /* Return -EFAULT bad address error as success. It will 2375 * fail later with a VM fault if the GPU tries to access 2376 * it. Better than hanging indefinitely with stalled 2377 * user mode queues. 2378 * 2379 * Return other error -EBUSY or -ENOMEM to retry restore 2380 */ 2381 if (ret != -EFAULT) 2382 return ret; 2383 2384 ret = 0; 2385 } 2386 2387 mutex_lock(&process_info->notifier_lock); 2388 2389 /* Mark the BO as valid unless it was invalidated 2390 * again concurrently. 2391 */ 2392 if (mem->invalid != invalid) { 2393 ret = -EAGAIN; 2394 goto unlock_out; 2395 } 2396 mem->invalid = 0; 2397 } 2398 2399 unlock_out: 2400 mutex_unlock(&process_info->notifier_lock); 2401 2402 return ret; 2403 } 2404 2405 /* Validate invalid userptr BOs 2406 * 2407 * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables 2408 * with new page addresses and waits for the page table updates to complete. 2409 */ 2410 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) 2411 { 2412 struct amdgpu_bo_list_entry *pd_bo_list_entries; 2413 struct list_head resv_list, duplicates; 2414 struct ww_acquire_ctx ticket; 2415 struct amdgpu_sync sync; 2416 2417 struct amdgpu_vm *peer_vm; 2418 struct kgd_mem *mem, *tmp_mem; 2419 struct amdgpu_bo *bo; 2420 struct ttm_operation_ctx ctx = { false, false }; 2421 int i, ret; 2422 2423 pd_bo_list_entries = kcalloc(process_info->n_vms, 2424 sizeof(struct amdgpu_bo_list_entry), 2425 GFP_KERNEL); 2426 if (!pd_bo_list_entries) { 2427 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 2428 ret = -ENOMEM; 2429 goto out_no_mem; 2430 } 2431 2432 INIT_LIST_HEAD(&resv_list); 2433 INIT_LIST_HEAD(&duplicates); 2434 2435 /* Get all the page directory BOs that need to be reserved */ 2436 i = 0; 2437 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2438 vm_list_node) 2439 amdgpu_vm_get_pd_bo(peer_vm, &resv_list, 2440 &pd_bo_list_entries[i++]); 2441 /* Add the userptr_inval_list entries to resv_list */ 2442 list_for_each_entry(mem, &process_info->userptr_inval_list, 2443 validate_list.head) { 2444 list_add_tail(&mem->resv_list.head, &resv_list); 2445 mem->resv_list.bo = mem->validate_list.bo; 2446 mem->resv_list.num_shared = mem->validate_list.num_shared; 2447 } 2448 2449 /* Reserve all BOs and page tables for validation */ 2450 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 2451 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 2452 if (ret) 2453 goto out_free; 2454 2455 amdgpu_sync_create(&sync); 2456 2457 ret = process_validate_vms(process_info); 2458 if (ret) 2459 goto unreserve_out; 2460 2461 /* Validate BOs and update GPUVM page tables */ 2462 list_for_each_entry_safe(mem, tmp_mem, 2463 &process_info->userptr_inval_list, 2464 validate_list.head) { 2465 struct kfd_mem_attachment *attachment; 2466 2467 bo = mem->bo; 2468 2469 /* Validate the BO if we got user pages */ 2470 if (bo->tbo.ttm->pages[0]) { 2471 amdgpu_bo_placement_from_domain(bo, mem->domain); 2472 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 2473 if (ret) { 2474 pr_err("%s: failed to validate BO\n", __func__); 2475 goto unreserve_out; 2476 } 2477 } 2478 2479 /* Update mapping. If the BO was not validated 2480 * (because we couldn't get user pages), this will 2481 * clear the page table entries, which will result in 2482 * VM faults if the GPU tries to access the invalid 2483 * memory. 2484 */ 2485 list_for_each_entry(attachment, &mem->attachments, list) { 2486 if (!attachment->is_mapped) 2487 continue; 2488 2489 kfd_mem_dmaunmap_attachment(mem, attachment); 2490 ret = update_gpuvm_pte(mem, attachment, &sync); 2491 if (ret) { 2492 pr_err("%s: update PTE failed\n", __func__); 2493 /* make sure this gets validated again */ 2494 mutex_lock(&process_info->notifier_lock); 2495 mem->invalid++; 2496 mutex_unlock(&process_info->notifier_lock); 2497 goto unreserve_out; 2498 } 2499 } 2500 } 2501 2502 /* Update page directories */ 2503 ret = process_update_pds(process_info, &sync); 2504 2505 unreserve_out: 2506 ttm_eu_backoff_reservation(&ticket, &resv_list); 2507 amdgpu_sync_wait(&sync, false); 2508 amdgpu_sync_free(&sync); 2509 out_free: 2510 kfree(pd_bo_list_entries); 2511 out_no_mem: 2512 2513 return ret; 2514 } 2515 2516 /* Confirm that all user pages are valid while holding the notifier lock 2517 * 2518 * Moves valid BOs from the userptr_inval_list back to userptr_val_list. 2519 */ 2520 static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info) 2521 { 2522 struct kgd_mem *mem, *tmp_mem; 2523 int ret = 0; 2524 2525 list_for_each_entry_safe(mem, tmp_mem, 2526 &process_info->userptr_inval_list, 2527 validate_list.head) { 2528 bool valid = amdgpu_ttm_tt_get_user_pages_done( 2529 mem->bo->tbo.ttm, mem->range); 2530 2531 mem->range = NULL; 2532 if (!valid) { 2533 WARN(!mem->invalid, "Invalid BO not marked invalid"); 2534 ret = -EAGAIN; 2535 continue; 2536 } 2537 WARN(mem->invalid, "Valid BO is marked invalid"); 2538 2539 list_move_tail(&mem->validate_list.head, 2540 &process_info->userptr_valid_list); 2541 } 2542 2543 return ret; 2544 } 2545 2546 /* Worker callback to restore evicted userptr BOs 2547 * 2548 * Tries to update and validate all userptr BOs. If successful and no 2549 * concurrent evictions happened, the queues are restarted. Otherwise, 2550 * reschedule for another attempt later. 2551 */ 2552 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) 2553 { 2554 struct delayed_work *dwork = to_delayed_work(work); 2555 struct amdkfd_process_info *process_info = 2556 container_of(dwork, struct amdkfd_process_info, 2557 restore_userptr_work); 2558 struct task_struct *usertask; 2559 struct mm_struct *mm; 2560 uint32_t evicted_bos; 2561 2562 mutex_lock(&process_info->notifier_lock); 2563 evicted_bos = process_info->evicted_bos; 2564 mutex_unlock(&process_info->notifier_lock); 2565 if (!evicted_bos) 2566 return; 2567 2568 /* Reference task and mm in case of concurrent process termination */ 2569 usertask = get_pid_task(process_info->pid, PIDTYPE_PID); 2570 if (!usertask) 2571 return; 2572 mm = get_task_mm(usertask); 2573 if (!mm) { 2574 put_task_struct(usertask); 2575 return; 2576 } 2577 2578 mutex_lock(&process_info->lock); 2579 2580 if (update_invalid_user_pages(process_info, mm)) 2581 goto unlock_out; 2582 /* userptr_inval_list can be empty if all evicted userptr BOs 2583 * have been freed. In that case there is nothing to validate 2584 * and we can just restart the queues. 2585 */ 2586 if (!list_empty(&process_info->userptr_inval_list)) { 2587 if (validate_invalid_user_pages(process_info)) 2588 goto unlock_out; 2589 } 2590 /* Final check for concurrent evicton and atomic update. If 2591 * another eviction happens after successful update, it will 2592 * be a first eviction that calls quiesce_mm. The eviction 2593 * reference counting inside KFD will handle this case. 2594 */ 2595 mutex_lock(&process_info->notifier_lock); 2596 if (process_info->evicted_bos != evicted_bos) 2597 goto unlock_notifier_out; 2598 2599 if (confirm_valid_user_pages_locked(process_info)) { 2600 WARN(1, "User pages unexpectedly invalid"); 2601 goto unlock_notifier_out; 2602 } 2603 2604 process_info->evicted_bos = evicted_bos = 0; 2605 2606 if (kgd2kfd_resume_mm(mm)) { 2607 pr_err("%s: Failed to resume KFD\n", __func__); 2608 /* No recovery from this failure. Probably the CP is 2609 * hanging. No point trying again. 2610 */ 2611 } 2612 2613 unlock_notifier_out: 2614 mutex_unlock(&process_info->notifier_lock); 2615 unlock_out: 2616 mutex_unlock(&process_info->lock); 2617 2618 /* If validation failed, reschedule another attempt */ 2619 if (evicted_bos) { 2620 schedule_delayed_work(&process_info->restore_userptr_work, 2621 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 2622 2623 kfd_smi_event_queue_restore_rescheduled(mm); 2624 } 2625 mmput(mm); 2626 put_task_struct(usertask); 2627 } 2628 2629 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given 2630 * KFD process identified by process_info 2631 * 2632 * @process_info: amdkfd_process_info of the KFD process 2633 * 2634 * After memory eviction, restore thread calls this function. The function 2635 * should be called when the Process is still valid. BO restore involves - 2636 * 2637 * 1. Release old eviction fence and create new one 2638 * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. 2639 * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of 2640 * BOs that need to be reserved. 2641 * 4. Reserve all the BOs 2642 * 5. Validate of PD and PT BOs. 2643 * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence 2644 * 7. Add fence to all PD and PT BOs. 2645 * 8. Unreserve all BOs 2646 */ 2647 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) 2648 { 2649 struct amdgpu_bo_list_entry *pd_bo_list; 2650 struct amdkfd_process_info *process_info = info; 2651 struct amdgpu_vm *peer_vm; 2652 struct kgd_mem *mem; 2653 struct bo_vm_reservation_context ctx; 2654 struct amdgpu_amdkfd_fence *new_fence; 2655 int ret = 0, i; 2656 struct list_head duplicate_save; 2657 struct amdgpu_sync sync_obj; 2658 unsigned long failed_size = 0; 2659 unsigned long total_size = 0; 2660 2661 INIT_LIST_HEAD(&duplicate_save); 2662 INIT_LIST_HEAD(&ctx.list); 2663 INIT_LIST_HEAD(&ctx.duplicates); 2664 2665 pd_bo_list = kcalloc(process_info->n_vms, 2666 sizeof(struct amdgpu_bo_list_entry), 2667 GFP_KERNEL); 2668 if (!pd_bo_list) 2669 return -ENOMEM; 2670 2671 i = 0; 2672 mutex_lock(&process_info->lock); 2673 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2674 vm_list_node) 2675 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); 2676 2677 /* Reserve all BOs and page tables/directory. Add all BOs from 2678 * kfd_bo_list to ctx.list 2679 */ 2680 list_for_each_entry(mem, &process_info->kfd_bo_list, 2681 validate_list.head) { 2682 2683 list_add_tail(&mem->resv_list.head, &ctx.list); 2684 mem->resv_list.bo = mem->validate_list.bo; 2685 mem->resv_list.num_shared = mem->validate_list.num_shared; 2686 } 2687 2688 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, 2689 false, &duplicate_save); 2690 if (ret) { 2691 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); 2692 goto ttm_reserve_fail; 2693 } 2694 2695 amdgpu_sync_create(&sync_obj); 2696 2697 /* Validate PDs and PTs */ 2698 ret = process_validate_vms(process_info); 2699 if (ret) 2700 goto validate_map_fail; 2701 2702 ret = process_sync_pds_resv(process_info, &sync_obj); 2703 if (ret) { 2704 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); 2705 goto validate_map_fail; 2706 } 2707 2708 /* Validate BOs and map them to GPUVM (update VM page tables). */ 2709 list_for_each_entry(mem, &process_info->kfd_bo_list, 2710 validate_list.head) { 2711 2712 struct amdgpu_bo *bo = mem->bo; 2713 uint32_t domain = mem->domain; 2714 struct kfd_mem_attachment *attachment; 2715 struct dma_resv_iter cursor; 2716 struct dma_fence *fence; 2717 2718 total_size += amdgpu_bo_size(bo); 2719 2720 ret = amdgpu_amdkfd_bo_validate(bo, domain, false); 2721 if (ret) { 2722 pr_debug("Memory eviction: Validate BOs failed\n"); 2723 failed_size += amdgpu_bo_size(bo); 2724 ret = amdgpu_amdkfd_bo_validate(bo, 2725 AMDGPU_GEM_DOMAIN_GTT, false); 2726 if (ret) { 2727 pr_debug("Memory eviction: Try again\n"); 2728 goto validate_map_fail; 2729 } 2730 } 2731 dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, 2732 DMA_RESV_USAGE_KERNEL, fence) { 2733 ret = amdgpu_sync_fence(&sync_obj, fence); 2734 if (ret) { 2735 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2736 goto validate_map_fail; 2737 } 2738 } 2739 list_for_each_entry(attachment, &mem->attachments, list) { 2740 if (!attachment->is_mapped) 2741 continue; 2742 2743 kfd_mem_dmaunmap_attachment(mem, attachment); 2744 ret = update_gpuvm_pte(mem, attachment, &sync_obj); 2745 if (ret) { 2746 pr_debug("Memory eviction: update PTE failed. Try again\n"); 2747 goto validate_map_fail; 2748 } 2749 } 2750 } 2751 2752 if (failed_size) 2753 pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); 2754 2755 /* Update page directories */ 2756 ret = process_update_pds(process_info, &sync_obj); 2757 if (ret) { 2758 pr_debug("Memory eviction: update PDs failed. Try again\n"); 2759 goto validate_map_fail; 2760 } 2761 2762 /* Wait for validate and PT updates to finish */ 2763 amdgpu_sync_wait(&sync_obj, false); 2764 2765 /* Release old eviction fence and create new one, because fence only 2766 * goes from unsignaled to signaled, fence cannot be reused. 2767 * Use context and mm from the old fence. 2768 */ 2769 new_fence = amdgpu_amdkfd_fence_create( 2770 process_info->eviction_fence->base.context, 2771 process_info->eviction_fence->mm, 2772 NULL); 2773 if (!new_fence) { 2774 pr_err("Failed to create eviction fence\n"); 2775 ret = -ENOMEM; 2776 goto validate_map_fail; 2777 } 2778 dma_fence_put(&process_info->eviction_fence->base); 2779 process_info->eviction_fence = new_fence; 2780 *ef = dma_fence_get(&new_fence->base); 2781 2782 /* Attach new eviction fence to all BOs except pinned ones */ 2783 list_for_each_entry(mem, &process_info->kfd_bo_list, 2784 validate_list.head) { 2785 if (mem->bo->tbo.pin_count) 2786 continue; 2787 2788 dma_resv_add_fence(mem->bo->tbo.base.resv, 2789 &process_info->eviction_fence->base, 2790 DMA_RESV_USAGE_BOOKKEEP); 2791 } 2792 /* Attach eviction fence to PD / PT BOs */ 2793 list_for_each_entry(peer_vm, &process_info->vm_list_head, 2794 vm_list_node) { 2795 struct amdgpu_bo *bo = peer_vm->root.bo; 2796 2797 dma_resv_add_fence(bo->tbo.base.resv, 2798 &process_info->eviction_fence->base, 2799 DMA_RESV_USAGE_BOOKKEEP); 2800 } 2801 2802 validate_map_fail: 2803 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); 2804 amdgpu_sync_free(&sync_obj); 2805 ttm_reserve_fail: 2806 mutex_unlock(&process_info->lock); 2807 kfree(pd_bo_list); 2808 return ret; 2809 } 2810 2811 int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) 2812 { 2813 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2814 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; 2815 int ret; 2816 2817 if (!info || !gws) 2818 return -EINVAL; 2819 2820 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 2821 if (!*mem) 2822 return -ENOMEM; 2823 2824 mutex_init(&(*mem)->lock); 2825 INIT_LIST_HEAD(&(*mem)->attachments); 2826 (*mem)->bo = amdgpu_bo_ref(gws_bo); 2827 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; 2828 (*mem)->process_info = process_info; 2829 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); 2830 amdgpu_sync_create(&(*mem)->sync); 2831 2832 2833 /* Validate gws bo the first time it is added to process */ 2834 mutex_lock(&(*mem)->process_info->lock); 2835 ret = amdgpu_bo_reserve(gws_bo, false); 2836 if (unlikely(ret)) { 2837 pr_err("Reserve gws bo failed %d\n", ret); 2838 goto bo_reservation_failure; 2839 } 2840 2841 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); 2842 if (ret) { 2843 pr_err("GWS BO validate failed %d\n", ret); 2844 goto bo_validation_failure; 2845 } 2846 /* GWS resource is shared b/t amdgpu and amdkfd 2847 * Add process eviction fence to bo so they can 2848 * evict each other. 2849 */ 2850 ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); 2851 if (ret) 2852 goto reserve_shared_fail; 2853 dma_resv_add_fence(gws_bo->tbo.base.resv, 2854 &process_info->eviction_fence->base, 2855 DMA_RESV_USAGE_BOOKKEEP); 2856 amdgpu_bo_unreserve(gws_bo); 2857 mutex_unlock(&(*mem)->process_info->lock); 2858 2859 return ret; 2860 2861 reserve_shared_fail: 2862 bo_validation_failure: 2863 amdgpu_bo_unreserve(gws_bo); 2864 bo_reservation_failure: 2865 mutex_unlock(&(*mem)->process_info->lock); 2866 amdgpu_sync_free(&(*mem)->sync); 2867 remove_kgd_mem_from_kfd_bo_list(*mem, process_info); 2868 amdgpu_bo_unref(&gws_bo); 2869 mutex_destroy(&(*mem)->lock); 2870 kfree(*mem); 2871 *mem = NULL; 2872 return ret; 2873 } 2874 2875 int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) 2876 { 2877 int ret; 2878 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; 2879 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; 2880 struct amdgpu_bo *gws_bo = kgd_mem->bo; 2881 2882 /* Remove BO from process's validate list so restore worker won't touch 2883 * it anymore 2884 */ 2885 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); 2886 2887 ret = amdgpu_bo_reserve(gws_bo, false); 2888 if (unlikely(ret)) { 2889 pr_err("Reserve gws bo failed %d\n", ret); 2890 //TODO add BO back to validate_list? 2891 return ret; 2892 } 2893 amdgpu_amdkfd_remove_eviction_fence(gws_bo, 2894 process_info->eviction_fence); 2895 amdgpu_bo_unreserve(gws_bo); 2896 amdgpu_sync_free(&kgd_mem->sync); 2897 amdgpu_bo_unref(&gws_bo); 2898 mutex_destroy(&kgd_mem->lock); 2899 kfree(mem); 2900 return 0; 2901 } 2902 2903 /* Returns GPU-specific tiling mode information */ 2904 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, 2905 struct tile_config *config) 2906 { 2907 config->gb_addr_config = adev->gfx.config.gb_addr_config; 2908 config->tile_config_ptr = adev->gfx.config.tile_mode_array; 2909 config->num_tile_configs = 2910 ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2911 config->macro_tile_config_ptr = 2912 adev->gfx.config.macrotile_mode_array; 2913 config->num_macro_tile_configs = 2914 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2915 2916 /* Those values are not set from GFX9 onwards */ 2917 config->num_banks = adev->gfx.config.num_banks; 2918 config->num_ranks = adev->gfx.config.num_ranks; 2919 2920 return 0; 2921 } 2922 2923 bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) 2924 { 2925 struct kfd_mem_attachment *entry; 2926 2927 list_for_each_entry(entry, &mem->attachments, list) { 2928 if (entry->is_mapped && entry->adev == adev) 2929 return true; 2930 } 2931 return false; 2932 } 2933 2934 #if defined(CONFIG_DEBUG_FS) 2935 2936 int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data) 2937 { 2938 2939 spin_lock(&kfd_mem_limit.mem_limit_lock); 2940 seq_printf(m, "System mem used %lldM out of %lluM\n", 2941 (kfd_mem_limit.system_mem_used >> 20), 2942 (kfd_mem_limit.max_system_mem_limit >> 20)); 2943 seq_printf(m, "TTM mem used %lldM out of %lluM\n", 2944 (kfd_mem_limit.ttm_mem_used >> 20), 2945 (kfd_mem_limit.max_ttm_mem_limit >> 20)); 2946 spin_unlock(&kfd_mem_limit.mem_limit_lock); 2947 2948 return 0; 2949 } 2950 2951 #endif 2952