1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020-2021 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/types.h> 25 #include <linux/sched/task.h> 26 #include "amdgpu_sync.h" 27 #include "amdgpu_object.h" 28 #include "amdgpu_vm.h" 29 #include "amdgpu_mn.h" 30 #include "amdgpu.h" 31 #include "amdgpu_xgmi.h" 32 #include "kfd_priv.h" 33 #include "kfd_svm.h" 34 #include "kfd_migrate.h" 35 36 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 37 38 /* Long enough to ensure no retry fault comes after svm range is restored and 39 * page table is updated. 40 */ 41 #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 42 43 static void svm_range_evict_svm_bo_worker(struct work_struct *work); 44 static bool 45 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, 46 const struct mmu_notifier_range *range, 47 unsigned long cur_seq); 48 49 static const struct mmu_interval_notifier_ops svm_range_mn_ops = { 50 .invalidate = svm_range_cpu_invalidate_pagetables, 51 }; 52 53 /** 54 * svm_range_unlink - unlink svm_range from lists and interval tree 55 * @prange: svm range structure to be removed 56 * 57 * Remove the svm_range from the svms and svm_bo lists and the svms 58 * interval tree. 59 * 60 * Context: The caller must hold svms->lock 61 */ 62 static void svm_range_unlink(struct svm_range *prange) 63 { 64 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, 65 prange, prange->start, prange->last); 66 67 if (prange->svm_bo) { 68 spin_lock(&prange->svm_bo->list_lock); 69 list_del(&prange->svm_bo_list); 70 spin_unlock(&prange->svm_bo->list_lock); 71 } 72 73 list_del(&prange->list); 74 if (prange->it_node.start != 0 && prange->it_node.last != 0) 75 interval_tree_remove(&prange->it_node, &prange->svms->objects); 76 } 77 78 static void 79 svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) 80 { 81 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, 82 prange, prange->start, prange->last); 83 84 mmu_interval_notifier_insert_locked(&prange->notifier, mm, 85 prange->start << PAGE_SHIFT, 86 prange->npages << PAGE_SHIFT, 87 &svm_range_mn_ops); 88 } 89 90 /** 91 * svm_range_add_to_svms - add svm range to svms 92 * @prange: svm range structure to be added 93 * 94 * Add the svm range to svms interval tree and link list 95 * 96 * Context: The caller must hold svms->lock 97 */ 98 static void svm_range_add_to_svms(struct svm_range *prange) 99 { 100 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, 101 prange, prange->start, prange->last); 102 103 list_add_tail(&prange->list, &prange->svms->list); 104 prange->it_node.start = prange->start; 105 prange->it_node.last = prange->last; 106 interval_tree_insert(&prange->it_node, &prange->svms->objects); 107 } 108 109 static void svm_range_remove_notifier(struct svm_range *prange) 110 { 111 pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", 112 prange->svms, prange, 113 prange->notifier.interval_tree.start >> PAGE_SHIFT, 114 prange->notifier.interval_tree.last >> PAGE_SHIFT); 115 116 if (prange->notifier.interval_tree.start != 0 && 117 prange->notifier.interval_tree.last != 0) 118 mmu_interval_notifier_remove(&prange->notifier); 119 } 120 121 static int 122 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, 123 unsigned long *hmm_pfns, uint32_t gpuidx) 124 { 125 enum dma_data_direction dir = DMA_BIDIRECTIONAL; 126 dma_addr_t *addr = prange->dma_addr[gpuidx]; 127 struct device *dev = adev->dev; 128 struct page *page; 129 int i, r; 130 131 if (!addr) { 132 addr = kvmalloc_array(prange->npages, sizeof(*addr), 133 GFP_KERNEL | __GFP_ZERO); 134 if (!addr) 135 return -ENOMEM; 136 prange->dma_addr[gpuidx] = addr; 137 } 138 139 for (i = 0; i < prange->npages; i++) { 140 if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), 141 "leaking dma mapping\n")) 142 dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); 143 144 page = hmm_pfn_to_page(hmm_pfns[i]); 145 if (is_zone_device_page(page)) { 146 struct amdgpu_device *bo_adev = 147 amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); 148 149 addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + 150 bo_adev->vm_manager.vram_base_offset - 151 bo_adev->kfd.dev->pgmap.range.start; 152 addr[i] |= SVM_RANGE_VRAM_DOMAIN; 153 pr_debug("vram address detected: 0x%llx\n", addr[i]); 154 continue; 155 } 156 addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); 157 r = dma_mapping_error(dev, addr[i]); 158 if (r) { 159 pr_debug("failed %d dma_map_page\n", r); 160 return r; 161 } 162 pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", 163 addr[i] >> PAGE_SHIFT, page_to_pfn(page)); 164 } 165 return 0; 166 } 167 168 static int 169 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, 170 unsigned long *hmm_pfns) 171 { 172 struct kfd_process *p; 173 uint32_t gpuidx; 174 int r; 175 176 p = container_of(prange->svms, struct kfd_process, svms); 177 178 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { 179 struct kfd_process_device *pdd; 180 struct amdgpu_device *adev; 181 182 pr_debug("mapping to gpu idx 0x%x\n", gpuidx); 183 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 184 if (!pdd) { 185 pr_debug("failed to find device idx %d\n", gpuidx); 186 return -EINVAL; 187 } 188 adev = (struct amdgpu_device *)pdd->dev->kgd; 189 190 r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); 191 if (r) 192 break; 193 } 194 195 return r; 196 } 197 198 void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, 199 unsigned long offset, unsigned long npages) 200 { 201 enum dma_data_direction dir = DMA_BIDIRECTIONAL; 202 int i; 203 204 if (!dma_addr) 205 return; 206 207 for (i = offset; i < offset + npages; i++) { 208 if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) 209 continue; 210 pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); 211 dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); 212 dma_addr[i] = 0; 213 } 214 } 215 216 void svm_range_free_dma_mappings(struct svm_range *prange) 217 { 218 struct kfd_process_device *pdd; 219 dma_addr_t *dma_addr; 220 struct device *dev; 221 struct kfd_process *p; 222 uint32_t gpuidx; 223 224 p = container_of(prange->svms, struct kfd_process, svms); 225 226 for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { 227 dma_addr = prange->dma_addr[gpuidx]; 228 if (!dma_addr) 229 continue; 230 231 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 232 if (!pdd) { 233 pr_debug("failed to find device idx %d\n", gpuidx); 234 continue; 235 } 236 dev = &pdd->dev->pdev->dev; 237 svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); 238 kvfree(dma_addr); 239 prange->dma_addr[gpuidx] = NULL; 240 } 241 } 242 243 static void svm_range_free(struct svm_range *prange) 244 { 245 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, 246 prange->start, prange->last); 247 248 svm_range_vram_node_free(prange); 249 svm_range_free_dma_mappings(prange); 250 mutex_destroy(&prange->lock); 251 mutex_destroy(&prange->migrate_mutex); 252 kfree(prange); 253 } 254 255 static void 256 svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, 257 uint8_t *granularity, uint32_t *flags) 258 { 259 *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 260 *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 261 *granularity = 9; 262 *flags = 263 KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; 264 } 265 266 static struct 267 svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, 268 uint64_t last) 269 { 270 uint64_t size = last - start + 1; 271 struct svm_range *prange; 272 struct kfd_process *p; 273 274 prange = kzalloc(sizeof(*prange), GFP_KERNEL); 275 if (!prange) 276 return NULL; 277 prange->npages = size; 278 prange->svms = svms; 279 prange->start = start; 280 prange->last = last; 281 INIT_LIST_HEAD(&prange->list); 282 INIT_LIST_HEAD(&prange->update_list); 283 INIT_LIST_HEAD(&prange->remove_list); 284 INIT_LIST_HEAD(&prange->insert_list); 285 INIT_LIST_HEAD(&prange->svm_bo_list); 286 INIT_LIST_HEAD(&prange->deferred_list); 287 INIT_LIST_HEAD(&prange->child_list); 288 atomic_set(&prange->invalid, 0); 289 prange->validate_timestamp = 0; 290 mutex_init(&prange->migrate_mutex); 291 mutex_init(&prange->lock); 292 293 p = container_of(svms, struct kfd_process, svms); 294 if (p->xnack_enabled) 295 bitmap_copy(prange->bitmap_access, svms->bitmap_supported, 296 MAX_GPU_INSTANCE); 297 298 svm_range_set_default_attributes(&prange->preferred_loc, 299 &prange->prefetch_loc, 300 &prange->granularity, &prange->flags); 301 302 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last); 303 304 return prange; 305 } 306 307 static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) 308 { 309 if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref)) 310 return false; 311 312 return true; 313 } 314 315 static void svm_range_bo_release(struct kref *kref) 316 { 317 struct svm_range_bo *svm_bo; 318 319 svm_bo = container_of(kref, struct svm_range_bo, kref); 320 spin_lock(&svm_bo->list_lock); 321 while (!list_empty(&svm_bo->range_list)) { 322 struct svm_range *prange = 323 list_first_entry(&svm_bo->range_list, 324 struct svm_range, svm_bo_list); 325 /* list_del_init tells a concurrent svm_range_vram_node_new when 326 * it's safe to reuse the svm_bo pointer and svm_bo_list head. 327 */ 328 list_del_init(&prange->svm_bo_list); 329 spin_unlock(&svm_bo->list_lock); 330 331 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, 332 prange->start, prange->last); 333 mutex_lock(&prange->lock); 334 prange->svm_bo = NULL; 335 mutex_unlock(&prange->lock); 336 337 spin_lock(&svm_bo->list_lock); 338 } 339 spin_unlock(&svm_bo->list_lock); 340 if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { 341 /* We're not in the eviction worker. 342 * Signal the fence and synchronize with any 343 * pending eviction work. 344 */ 345 dma_fence_signal(&svm_bo->eviction_fence->base); 346 cancel_work_sync(&svm_bo->eviction_work); 347 } 348 dma_fence_put(&svm_bo->eviction_fence->base); 349 amdgpu_bo_unref(&svm_bo->bo); 350 kfree(svm_bo); 351 } 352 353 void svm_range_bo_unref(struct svm_range_bo *svm_bo) 354 { 355 if (!svm_bo) 356 return; 357 358 kref_put(&svm_bo->kref, svm_range_bo_release); 359 } 360 361 static bool 362 svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) 363 { 364 struct amdgpu_device *bo_adev; 365 366 mutex_lock(&prange->lock); 367 if (!prange->svm_bo) { 368 mutex_unlock(&prange->lock); 369 return false; 370 } 371 if (prange->ttm_res) { 372 /* We still have a reference, all is well */ 373 mutex_unlock(&prange->lock); 374 return true; 375 } 376 if (svm_bo_ref_unless_zero(prange->svm_bo)) { 377 /* 378 * Migrate from GPU to GPU, remove range from source bo_adev 379 * svm_bo range list, and return false to allocate svm_bo from 380 * destination adev. 381 */ 382 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); 383 if (bo_adev != adev) { 384 mutex_unlock(&prange->lock); 385 386 spin_lock(&prange->svm_bo->list_lock); 387 list_del_init(&prange->svm_bo_list); 388 spin_unlock(&prange->svm_bo->list_lock); 389 390 svm_range_bo_unref(prange->svm_bo); 391 return false; 392 } 393 if (READ_ONCE(prange->svm_bo->evicting)) { 394 struct dma_fence *f; 395 struct svm_range_bo *svm_bo; 396 /* The BO is getting evicted, 397 * we need to get a new one 398 */ 399 mutex_unlock(&prange->lock); 400 svm_bo = prange->svm_bo; 401 f = dma_fence_get(&svm_bo->eviction_fence->base); 402 svm_range_bo_unref(prange->svm_bo); 403 /* wait for the fence to avoid long spin-loop 404 * at list_empty_careful 405 */ 406 dma_fence_wait(f, false); 407 dma_fence_put(f); 408 } else { 409 /* The BO was still around and we got 410 * a new reference to it 411 */ 412 mutex_unlock(&prange->lock); 413 pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n", 414 prange->svms, prange->start, prange->last); 415 416 prange->ttm_res = prange->svm_bo->bo->tbo.resource; 417 return true; 418 } 419 420 } else { 421 mutex_unlock(&prange->lock); 422 } 423 424 /* We need a new svm_bo. Spin-loop to wait for concurrent 425 * svm_range_bo_release to finish removing this range from 426 * its range list. After this, it is safe to reuse the 427 * svm_bo pointer and svm_bo_list head. 428 */ 429 while (!list_empty_careful(&prange->svm_bo_list)) 430 ; 431 432 return false; 433 } 434 435 static struct svm_range_bo *svm_range_bo_new(void) 436 { 437 struct svm_range_bo *svm_bo; 438 439 svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL); 440 if (!svm_bo) 441 return NULL; 442 443 kref_init(&svm_bo->kref); 444 INIT_LIST_HEAD(&svm_bo->range_list); 445 spin_lock_init(&svm_bo->list_lock); 446 447 return svm_bo; 448 } 449 450 int 451 svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, 452 bool clear) 453 { 454 struct amdgpu_bo_param bp; 455 struct svm_range_bo *svm_bo; 456 struct amdgpu_bo_user *ubo; 457 struct amdgpu_bo *bo; 458 struct kfd_process *p; 459 struct mm_struct *mm; 460 int r; 461 462 p = container_of(prange->svms, struct kfd_process, svms); 463 pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, 464 prange->start, prange->last); 465 466 if (svm_range_validate_svm_bo(adev, prange)) 467 return 0; 468 469 svm_bo = svm_range_bo_new(); 470 if (!svm_bo) { 471 pr_debug("failed to alloc svm bo\n"); 472 return -ENOMEM; 473 } 474 mm = get_task_mm(p->lead_thread); 475 if (!mm) { 476 pr_debug("failed to get mm\n"); 477 kfree(svm_bo); 478 return -ESRCH; 479 } 480 svm_bo->svms = prange->svms; 481 svm_bo->eviction_fence = 482 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 483 mm, 484 svm_bo); 485 mmput(mm); 486 INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker); 487 svm_bo->evicting = 0; 488 memset(&bp, 0, sizeof(bp)); 489 bp.size = prange->npages * PAGE_SIZE; 490 bp.byte_align = PAGE_SIZE; 491 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 492 bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 493 bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; 494 bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO; 495 bp.type = ttm_bo_type_device; 496 bp.resv = NULL; 497 498 r = amdgpu_bo_create_user(adev, &bp, &ubo); 499 if (r) { 500 pr_debug("failed %d to create bo\n", r); 501 goto create_bo_failed; 502 } 503 bo = &ubo->bo; 504 r = amdgpu_bo_reserve(bo, true); 505 if (r) { 506 pr_debug("failed %d to reserve bo\n", r); 507 goto reserve_bo_failed; 508 } 509 510 r = dma_resv_reserve_shared(bo->tbo.base.resv, 1); 511 if (r) { 512 pr_debug("failed %d to reserve bo\n", r); 513 amdgpu_bo_unreserve(bo); 514 goto reserve_bo_failed; 515 } 516 amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true); 517 518 amdgpu_bo_unreserve(bo); 519 520 svm_bo->bo = bo; 521 prange->svm_bo = svm_bo; 522 prange->ttm_res = bo->tbo.resource; 523 prange->offset = 0; 524 525 spin_lock(&svm_bo->list_lock); 526 list_add(&prange->svm_bo_list, &svm_bo->range_list); 527 spin_unlock(&svm_bo->list_lock); 528 529 return 0; 530 531 reserve_bo_failed: 532 amdgpu_bo_unref(&bo); 533 create_bo_failed: 534 dma_fence_put(&svm_bo->eviction_fence->base); 535 kfree(svm_bo); 536 prange->ttm_res = NULL; 537 538 return r; 539 } 540 541 void svm_range_vram_node_free(struct svm_range *prange) 542 { 543 svm_range_bo_unref(prange->svm_bo); 544 prange->ttm_res = NULL; 545 } 546 547 struct amdgpu_device * 548 svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) 549 { 550 struct kfd_process_device *pdd; 551 struct kfd_process *p; 552 int32_t gpu_idx; 553 554 p = container_of(prange->svms, struct kfd_process, svms); 555 556 gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id); 557 if (gpu_idx < 0) { 558 pr_debug("failed to get device by id 0x%x\n", gpu_id); 559 return NULL; 560 } 561 pdd = kfd_process_device_from_gpuidx(p, gpu_idx); 562 if (!pdd) { 563 pr_debug("failed to get device by idx 0x%x\n", gpu_idx); 564 return NULL; 565 } 566 567 return (struct amdgpu_device *)pdd->dev->kgd; 568 } 569 570 struct kfd_process_device * 571 svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) 572 { 573 struct kfd_process *p; 574 int32_t gpu_idx, gpuid; 575 int r; 576 577 p = container_of(prange->svms, struct kfd_process, svms); 578 579 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); 580 if (r) { 581 pr_debug("failed to get device id by adev %p\n", adev); 582 return NULL; 583 } 584 585 return kfd_process_device_from_gpuidx(p, gpu_idx); 586 } 587 588 static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) 589 { 590 struct ttm_operation_ctx ctx = { false, false }; 591 592 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 593 594 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 595 } 596 597 static int 598 svm_range_check_attr(struct kfd_process *p, 599 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) 600 { 601 uint32_t i; 602 603 for (i = 0; i < nattr; i++) { 604 uint32_t val = attrs[i].value; 605 int gpuidx = MAX_GPU_INSTANCE; 606 607 switch (attrs[i].type) { 608 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: 609 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM && 610 val != KFD_IOCTL_SVM_LOCATION_UNDEFINED) 611 gpuidx = kfd_process_gpuidx_from_gpuid(p, val); 612 break; 613 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: 614 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM) 615 gpuidx = kfd_process_gpuidx_from_gpuid(p, val); 616 break; 617 case KFD_IOCTL_SVM_ATTR_ACCESS: 618 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: 619 case KFD_IOCTL_SVM_ATTR_NO_ACCESS: 620 gpuidx = kfd_process_gpuidx_from_gpuid(p, val); 621 break; 622 case KFD_IOCTL_SVM_ATTR_SET_FLAGS: 623 break; 624 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: 625 break; 626 case KFD_IOCTL_SVM_ATTR_GRANULARITY: 627 break; 628 default: 629 pr_debug("unknown attr type 0x%x\n", attrs[i].type); 630 return -EINVAL; 631 } 632 633 if (gpuidx < 0) { 634 pr_debug("no GPU 0x%x found\n", val); 635 return -EINVAL; 636 } else if (gpuidx < MAX_GPU_INSTANCE && 637 !test_bit(gpuidx, p->svms.bitmap_supported)) { 638 pr_debug("GPU 0x%x not supported\n", val); 639 return -EINVAL; 640 } 641 } 642 643 return 0; 644 } 645 646 static void 647 svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, 648 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) 649 { 650 uint32_t i; 651 int gpuidx; 652 653 for (i = 0; i < nattr; i++) { 654 switch (attrs[i].type) { 655 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: 656 prange->preferred_loc = attrs[i].value; 657 break; 658 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: 659 prange->prefetch_loc = attrs[i].value; 660 break; 661 case KFD_IOCTL_SVM_ATTR_ACCESS: 662 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: 663 case KFD_IOCTL_SVM_ATTR_NO_ACCESS: 664 gpuidx = kfd_process_gpuidx_from_gpuid(p, 665 attrs[i].value); 666 if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { 667 bitmap_clear(prange->bitmap_access, gpuidx, 1); 668 bitmap_clear(prange->bitmap_aip, gpuidx, 1); 669 } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { 670 bitmap_set(prange->bitmap_access, gpuidx, 1); 671 bitmap_clear(prange->bitmap_aip, gpuidx, 1); 672 } else { 673 bitmap_clear(prange->bitmap_access, gpuidx, 1); 674 bitmap_set(prange->bitmap_aip, gpuidx, 1); 675 } 676 break; 677 case KFD_IOCTL_SVM_ATTR_SET_FLAGS: 678 prange->flags |= attrs[i].value; 679 break; 680 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: 681 prange->flags &= ~attrs[i].value; 682 break; 683 case KFD_IOCTL_SVM_ATTR_GRANULARITY: 684 prange->granularity = attrs[i].value; 685 break; 686 default: 687 WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); 688 } 689 } 690 } 691 692 /** 693 * svm_range_debug_dump - print all range information from svms 694 * @svms: svm range list header 695 * 696 * debug output svm range start, end, prefetch location from svms 697 * interval tree and link list 698 * 699 * Context: The caller must hold svms->lock 700 */ 701 static void svm_range_debug_dump(struct svm_range_list *svms) 702 { 703 struct interval_tree_node *node; 704 struct svm_range *prange; 705 706 pr_debug("dump svms 0x%p list\n", svms); 707 pr_debug("range\tstart\tpage\tend\t\tlocation\n"); 708 709 list_for_each_entry(prange, &svms->list, list) { 710 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", 711 prange, prange->start, prange->npages, 712 prange->start + prange->npages - 1, 713 prange->actual_loc); 714 } 715 716 pr_debug("dump svms 0x%p interval tree\n", svms); 717 pr_debug("range\tstart\tpage\tend\t\tlocation\n"); 718 node = interval_tree_iter_first(&svms->objects, 0, ~0ULL); 719 while (node) { 720 prange = container_of(node, struct svm_range, it_node); 721 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", 722 prange, prange->start, prange->npages, 723 prange->start + prange->npages - 1, 724 prange->actual_loc); 725 node = interval_tree_iter_next(node, 0, ~0ULL); 726 } 727 } 728 729 static bool 730 svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) 731 { 732 return (old->prefetch_loc == new->prefetch_loc && 733 old->flags == new->flags && 734 old->granularity == new->granularity); 735 } 736 737 static int 738 svm_range_split_array(void *ppnew, void *ppold, size_t size, 739 uint64_t old_start, uint64_t old_n, 740 uint64_t new_start, uint64_t new_n) 741 { 742 unsigned char *new, *old, *pold; 743 uint64_t d; 744 745 if (!ppold) 746 return 0; 747 pold = *(unsigned char **)ppold; 748 if (!pold) 749 return 0; 750 751 new = kvmalloc_array(new_n, size, GFP_KERNEL); 752 if (!new) 753 return -ENOMEM; 754 755 d = (new_start - old_start) * size; 756 memcpy(new, pold + d, new_n * size); 757 758 old = kvmalloc_array(old_n, size, GFP_KERNEL); 759 if (!old) { 760 kvfree(new); 761 return -ENOMEM; 762 } 763 764 d = (new_start == old_start) ? new_n * size : 0; 765 memcpy(old, pold + d, old_n * size); 766 767 kvfree(pold); 768 *(void **)ppold = old; 769 *(void **)ppnew = new; 770 771 return 0; 772 } 773 774 static int 775 svm_range_split_pages(struct svm_range *new, struct svm_range *old, 776 uint64_t start, uint64_t last) 777 { 778 uint64_t npages = last - start + 1; 779 int i, r; 780 781 for (i = 0; i < MAX_GPU_INSTANCE; i++) { 782 r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], 783 sizeof(*old->dma_addr[i]), old->start, 784 npages, new->start, new->npages); 785 if (r) 786 return r; 787 } 788 789 return 0; 790 } 791 792 static int 793 svm_range_split_nodes(struct svm_range *new, struct svm_range *old, 794 uint64_t start, uint64_t last) 795 { 796 uint64_t npages = last - start + 1; 797 798 pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n", 799 new->svms, new, new->start, start, last); 800 801 if (new->start == old->start) { 802 new->offset = old->offset; 803 old->offset += new->npages; 804 } else { 805 new->offset = old->offset + npages; 806 } 807 808 new->svm_bo = svm_range_bo_ref(old->svm_bo); 809 new->ttm_res = old->ttm_res; 810 811 spin_lock(&new->svm_bo->list_lock); 812 list_add(&new->svm_bo_list, &new->svm_bo->range_list); 813 spin_unlock(&new->svm_bo->list_lock); 814 815 return 0; 816 } 817 818 /** 819 * svm_range_split_adjust - split range and adjust 820 * 821 * @new: new range 822 * @old: the old range 823 * @start: the old range adjust to start address in pages 824 * @last: the old range adjust to last address in pages 825 * 826 * Copy system memory dma_addr or vram ttm_res in old range to new 827 * range from new_start up to size new->npages, the remaining old range is from 828 * start to last 829 * 830 * Return: 831 * 0 - OK, -ENOMEM - out of memory 832 */ 833 static int 834 svm_range_split_adjust(struct svm_range *new, struct svm_range *old, 835 uint64_t start, uint64_t last) 836 { 837 int r; 838 839 pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n", 840 new->svms, new->start, old->start, old->last, start, last); 841 842 if (new->start < old->start || 843 new->last > old->last) { 844 WARN_ONCE(1, "invalid new range start or last\n"); 845 return -EINVAL; 846 } 847 848 r = svm_range_split_pages(new, old, start, last); 849 if (r) 850 return r; 851 852 if (old->actual_loc && old->ttm_res) { 853 r = svm_range_split_nodes(new, old, start, last); 854 if (r) 855 return r; 856 } 857 858 old->npages = last - start + 1; 859 old->start = start; 860 old->last = last; 861 new->flags = old->flags; 862 new->preferred_loc = old->preferred_loc; 863 new->prefetch_loc = old->prefetch_loc; 864 new->actual_loc = old->actual_loc; 865 new->granularity = old->granularity; 866 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); 867 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); 868 869 return 0; 870 } 871 872 /** 873 * svm_range_split - split a range in 2 ranges 874 * 875 * @prange: the svm range to split 876 * @start: the remaining range start address in pages 877 * @last: the remaining range last address in pages 878 * @new: the result new range generated 879 * 880 * Two cases only: 881 * case 1: if start == prange->start 882 * prange ==> prange[start, last] 883 * new range [last + 1, prange->last] 884 * 885 * case 2: if last == prange->last 886 * prange ==> prange[start, last] 887 * new range [prange->start, start - 1] 888 * 889 * Return: 890 * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last 891 */ 892 static int 893 svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, 894 struct svm_range **new) 895 { 896 uint64_t old_start = prange->start; 897 uint64_t old_last = prange->last; 898 struct svm_range_list *svms; 899 int r = 0; 900 901 pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms, 902 old_start, old_last, start, last); 903 904 if (old_start != start && old_last != last) 905 return -EINVAL; 906 if (start < old_start || last > old_last) 907 return -EINVAL; 908 909 svms = prange->svms; 910 if (old_start == start) 911 *new = svm_range_new(svms, last + 1, old_last); 912 else 913 *new = svm_range_new(svms, old_start, start - 1); 914 if (!*new) 915 return -ENOMEM; 916 917 r = svm_range_split_adjust(*new, prange, start, last); 918 if (r) { 919 pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", 920 r, old_start, old_last, start, last); 921 svm_range_free(*new); 922 *new = NULL; 923 } 924 925 return r; 926 } 927 928 static int 929 svm_range_split_tail(struct svm_range *prange, struct svm_range *new, 930 uint64_t new_last, struct list_head *insert_list) 931 { 932 struct svm_range *tail; 933 int r = svm_range_split(prange, prange->start, new_last, &tail); 934 935 if (!r) 936 list_add(&tail->insert_list, insert_list); 937 return r; 938 } 939 940 static int 941 svm_range_split_head(struct svm_range *prange, struct svm_range *new, 942 uint64_t new_start, struct list_head *insert_list) 943 { 944 struct svm_range *head; 945 int r = svm_range_split(prange, new_start, prange->last, &head); 946 947 if (!r) 948 list_add(&head->insert_list, insert_list); 949 return r; 950 } 951 952 static void 953 svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, 954 struct svm_range *pchild, enum svm_work_list_ops op) 955 { 956 pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", 957 pchild, pchild->start, pchild->last, prange, op); 958 959 pchild->work_item.mm = mm; 960 pchild->work_item.op = op; 961 list_add_tail(&pchild->child_list, &prange->child_list); 962 } 963 964 /** 965 * svm_range_split_by_granularity - collect ranges within granularity boundary 966 * 967 * @p: the process with svms list 968 * @mm: mm structure 969 * @addr: the vm fault address in pages, to split the prange 970 * @parent: parent range if prange is from child list 971 * @prange: prange to split 972 * 973 * Trims @prange to be a single aligned block of prange->granularity if 974 * possible. The head and tail are added to the child_list in @parent. 975 * 976 * Context: caller must hold mmap_read_lock and prange->lock 977 * 978 * Return: 979 * 0 - OK, otherwise error code 980 */ 981 int 982 svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, 983 unsigned long addr, struct svm_range *parent, 984 struct svm_range *prange) 985 { 986 struct svm_range *head, *tail; 987 unsigned long start, last, size; 988 int r; 989 990 /* Align splited range start and size to granularity size, then a single 991 * PTE will be used for whole range, this reduces the number of PTE 992 * updated and the L1 TLB space used for translation. 993 */ 994 size = 1UL << prange->granularity; 995 start = ALIGN_DOWN(addr, size); 996 last = ALIGN(addr + 1, size) - 1; 997 998 pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", 999 prange->svms, prange->start, prange->last, start, last, size); 1000 1001 if (start > prange->start) { 1002 r = svm_range_split(prange, start, prange->last, &head); 1003 if (r) 1004 return r; 1005 svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); 1006 } 1007 1008 if (last < prange->last) { 1009 r = svm_range_split(prange, prange->start, last, &tail); 1010 if (r) 1011 return r; 1012 svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); 1013 } 1014 1015 /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ 1016 if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { 1017 prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; 1018 pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", 1019 prange, prange->start, prange->last, 1020 SVM_OP_ADD_RANGE_AND_MAP); 1021 } 1022 return 0; 1023 } 1024 1025 static uint64_t 1026 svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, 1027 int domain) 1028 { 1029 struct amdgpu_device *bo_adev; 1030 uint32_t flags = prange->flags; 1031 uint32_t mapping_flags = 0; 1032 uint64_t pte_flags; 1033 bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); 1034 bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; 1035 1036 if (domain == SVM_RANGE_VRAM_DOMAIN) 1037 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); 1038 1039 switch (adev->asic_type) { 1040 case CHIP_ARCTURUS: 1041 if (domain == SVM_RANGE_VRAM_DOMAIN) { 1042 if (bo_adev == adev) { 1043 mapping_flags |= coherent ? 1044 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; 1045 } else { 1046 mapping_flags |= coherent ? 1047 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1048 if (amdgpu_xgmi_same_hive(adev, bo_adev)) 1049 snoop = true; 1050 } 1051 } else { 1052 mapping_flags |= coherent ? 1053 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1054 } 1055 break; 1056 case CHIP_ALDEBARAN: 1057 if (domain == SVM_RANGE_VRAM_DOMAIN) { 1058 if (bo_adev == adev) { 1059 mapping_flags |= coherent ? 1060 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; 1061 if (adev->gmc.xgmi.connected_to_cpu) 1062 snoop = true; 1063 } else { 1064 mapping_flags |= coherent ? 1065 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1066 if (amdgpu_xgmi_same_hive(adev, bo_adev)) 1067 snoop = true; 1068 } 1069 } else { 1070 mapping_flags |= coherent ? 1071 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1072 } 1073 break; 1074 default: 1075 mapping_flags |= coherent ? 1076 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1077 } 1078 1079 mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; 1080 1081 if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) 1082 mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; 1083 if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) 1084 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; 1085 1086 pte_flags = AMDGPU_PTE_VALID; 1087 pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; 1088 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; 1089 1090 pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); 1091 1092 pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", 1093 prange->svms, prange->start, prange->last, 1094 (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); 1095 1096 return pte_flags; 1097 } 1098 1099 static int 1100 svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1101 uint64_t start, uint64_t last, 1102 struct dma_fence **fence) 1103 { 1104 uint64_t init_pte_value = 0; 1105 1106 pr_debug("[0x%llx 0x%llx]\n", start, last); 1107 1108 return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, 1109 start, last, init_pte_value, 0, 1110 NULL, NULL, fence, NULL); 1111 } 1112 1113 static int 1114 svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, 1115 unsigned long last) 1116 { 1117 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); 1118 struct kfd_process_device *pdd; 1119 struct dma_fence *fence = NULL; 1120 struct amdgpu_device *adev; 1121 struct kfd_process *p; 1122 uint32_t gpuidx; 1123 int r = 0; 1124 1125 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, 1126 MAX_GPU_INSTANCE); 1127 p = container_of(prange->svms, struct kfd_process, svms); 1128 1129 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { 1130 pr_debug("unmap from gpu idx 0x%x\n", gpuidx); 1131 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 1132 if (!pdd) { 1133 pr_debug("failed to find device idx %d\n", gpuidx); 1134 return -EINVAL; 1135 } 1136 adev = (struct amdgpu_device *)pdd->dev->kgd; 1137 1138 r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), 1139 start, last, &fence); 1140 if (r) 1141 break; 1142 1143 if (fence) { 1144 r = dma_fence_wait(fence, false); 1145 dma_fence_put(fence); 1146 fence = NULL; 1147 if (r) 1148 break; 1149 } 1150 amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, 1151 p->pasid, TLB_FLUSH_HEAVYWEIGHT); 1152 } 1153 1154 return r; 1155 } 1156 1157 static int 1158 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1159 struct svm_range *prange, dma_addr_t *dma_addr, 1160 struct amdgpu_device *bo_adev, struct dma_fence **fence) 1161 { 1162 struct amdgpu_bo_va bo_va; 1163 bool table_freed = false; 1164 uint64_t pte_flags; 1165 unsigned long last_start; 1166 int last_domain; 1167 int r = 0; 1168 int64_t i; 1169 1170 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, 1171 prange->last); 1172 1173 if (prange->svm_bo && prange->ttm_res) 1174 bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); 1175 1176 last_start = prange->start; 1177 for (i = 0; i < prange->npages; i++) { 1178 last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; 1179 dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; 1180 if ((prange->start + i) < prange->last && 1181 last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) 1182 continue; 1183 1184 pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", 1185 last_start, prange->start + i, last_domain ? "GPU" : "CPU"); 1186 pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); 1187 r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, 1188 last_start, 1189 prange->start + i, pte_flags, 1190 last_start - prange->start, 1191 NULL, 1192 dma_addr, 1193 &vm->last_update, 1194 &table_freed); 1195 if (r) { 1196 pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); 1197 goto out; 1198 } 1199 last_start = prange->start + i + 1; 1200 } 1201 1202 r = amdgpu_vm_update_pdes(adev, vm, false); 1203 if (r) { 1204 pr_debug("failed %d to update directories 0x%lx\n", r, 1205 prange->start); 1206 goto out; 1207 } 1208 1209 if (fence) 1210 *fence = dma_fence_get(vm->last_update); 1211 1212 if (table_freed) { 1213 struct kfd_process *p; 1214 1215 p = container_of(prange->svms, struct kfd_process, svms); 1216 amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, 1217 p->pasid, TLB_FLUSH_LEGACY); 1218 } 1219 out: 1220 return r; 1221 } 1222 1223 static int svm_range_map_to_gpus(struct svm_range *prange, 1224 unsigned long *bitmap, bool wait) 1225 { 1226 struct kfd_process_device *pdd; 1227 struct amdgpu_device *bo_adev; 1228 struct amdgpu_device *adev; 1229 struct kfd_process *p; 1230 struct dma_fence *fence = NULL; 1231 uint32_t gpuidx; 1232 int r = 0; 1233 1234 if (prange->svm_bo && prange->ttm_res) 1235 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); 1236 else 1237 bo_adev = NULL; 1238 1239 p = container_of(prange->svms, struct kfd_process, svms); 1240 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { 1241 pr_debug("mapping to gpu idx 0x%x\n", gpuidx); 1242 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 1243 if (!pdd) { 1244 pr_debug("failed to find device idx %d\n", gpuidx); 1245 return -EINVAL; 1246 } 1247 adev = (struct amdgpu_device *)pdd->dev->kgd; 1248 1249 pdd = kfd_bind_process_to_device(pdd->dev, p); 1250 if (IS_ERR(pdd)) 1251 return -EINVAL; 1252 1253 if (bo_adev && adev != bo_adev && 1254 !amdgpu_xgmi_same_hive(adev, bo_adev)) { 1255 pr_debug("cannot map to device idx %d\n", gpuidx); 1256 continue; 1257 } 1258 1259 r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), 1260 prange, prange->dma_addr[gpuidx], 1261 bo_adev, wait ? &fence : NULL); 1262 if (r) 1263 break; 1264 1265 if (fence) { 1266 r = dma_fence_wait(fence, false); 1267 dma_fence_put(fence); 1268 fence = NULL; 1269 if (r) { 1270 pr_debug("failed %d to dma fence wait\n", r); 1271 break; 1272 } 1273 } 1274 } 1275 1276 return r; 1277 } 1278 1279 struct svm_validate_context { 1280 struct kfd_process *process; 1281 struct svm_range *prange; 1282 bool intr; 1283 unsigned long bitmap[MAX_GPU_INSTANCE]; 1284 struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1]; 1285 struct list_head validate_list; 1286 struct ww_acquire_ctx ticket; 1287 }; 1288 1289 static int svm_range_reserve_bos(struct svm_validate_context *ctx) 1290 { 1291 struct kfd_process_device *pdd; 1292 struct amdgpu_device *adev; 1293 struct amdgpu_vm *vm; 1294 uint32_t gpuidx; 1295 int r; 1296 1297 INIT_LIST_HEAD(&ctx->validate_list); 1298 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { 1299 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); 1300 if (!pdd) { 1301 pr_debug("failed to find device idx %d\n", gpuidx); 1302 return -EINVAL; 1303 } 1304 adev = (struct amdgpu_device *)pdd->dev->kgd; 1305 vm = drm_priv_to_vm(pdd->drm_priv); 1306 1307 ctx->tv[gpuidx].bo = &vm->root.bo->tbo; 1308 ctx->tv[gpuidx].num_shared = 4; 1309 list_add(&ctx->tv[gpuidx].head, &ctx->validate_list); 1310 } 1311 if (ctx->prange->svm_bo && ctx->prange->ttm_res) { 1312 ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo; 1313 ctx->tv[MAX_GPU_INSTANCE].num_shared = 1; 1314 list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list); 1315 } 1316 1317 r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list, 1318 ctx->intr, NULL); 1319 if (r) { 1320 pr_debug("failed %d to reserve bo\n", r); 1321 return r; 1322 } 1323 1324 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { 1325 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); 1326 if (!pdd) { 1327 pr_debug("failed to find device idx %d\n", gpuidx); 1328 r = -EINVAL; 1329 goto unreserve_out; 1330 } 1331 adev = (struct amdgpu_device *)pdd->dev->kgd; 1332 1333 r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), 1334 svm_range_bo_validate, NULL); 1335 if (r) { 1336 pr_debug("failed %d validate pt bos\n", r); 1337 goto unreserve_out; 1338 } 1339 } 1340 1341 return 0; 1342 1343 unreserve_out: 1344 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); 1345 return r; 1346 } 1347 1348 static void svm_range_unreserve_bos(struct svm_validate_context *ctx) 1349 { 1350 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); 1351 } 1352 1353 static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) 1354 { 1355 struct kfd_process_device *pdd; 1356 struct amdgpu_device *adev; 1357 1358 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 1359 adev = (struct amdgpu_device *)pdd->dev->kgd; 1360 1361 return SVM_ADEV_PGMAP_OWNER(adev); 1362 } 1363 1364 /* 1365 * Validation+GPU mapping with concurrent invalidation (MMU notifiers) 1366 * 1367 * To prevent concurrent destruction or change of range attributes, the 1368 * svm_read_lock must be held. The caller must not hold the svm_write_lock 1369 * because that would block concurrent evictions and lead to deadlocks. To 1370 * serialize concurrent migrations or validations of the same range, the 1371 * prange->migrate_mutex must be held. 1372 * 1373 * For VRAM ranges, the SVM BO must be allocated and valid (protected by its 1374 * eviction fence. 1375 * 1376 * The following sequence ensures race-free validation and GPU mapping: 1377 * 1378 * 1. Reserve page table (and SVM BO if range is in VRAM) 1379 * 2. hmm_range_fault to get page addresses (if system memory) 1380 * 3. DMA-map pages (if system memory) 1381 * 4-a. Take notifier lock 1382 * 4-b. Check that pages still valid (mmu_interval_read_retry) 1383 * 4-c. Check that the range was not split or otherwise invalidated 1384 * 4-d. Update GPU page table 1385 * 4.e. Release notifier lock 1386 * 5. Release page table (and SVM BO) reservation 1387 */ 1388 static int svm_range_validate_and_map(struct mm_struct *mm, 1389 struct svm_range *prange, 1390 int32_t gpuidx, bool intr, bool wait) 1391 { 1392 struct svm_validate_context ctx; 1393 struct hmm_range *hmm_range; 1394 struct kfd_process *p; 1395 void *owner; 1396 int32_t idx; 1397 int r = 0; 1398 1399 ctx.process = container_of(prange->svms, struct kfd_process, svms); 1400 ctx.prange = prange; 1401 ctx.intr = intr; 1402 1403 if (gpuidx < MAX_GPU_INSTANCE) { 1404 bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); 1405 bitmap_set(ctx.bitmap, gpuidx, 1); 1406 } else if (ctx.process->xnack_enabled) { 1407 bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); 1408 1409 /* If prefetch range to GPU, or GPU retry fault migrate range to 1410 * GPU, which has ACCESS attribute to the range, create mapping 1411 * on that GPU. 1412 */ 1413 if (prange->actual_loc) { 1414 gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process, 1415 prange->actual_loc); 1416 if (gpuidx < 0) { 1417 WARN_ONCE(1, "failed get device by id 0x%x\n", 1418 prange->actual_loc); 1419 return -EINVAL; 1420 } 1421 if (test_bit(gpuidx, prange->bitmap_access)) 1422 bitmap_set(ctx.bitmap, gpuidx, 1); 1423 } 1424 } else { 1425 bitmap_or(ctx.bitmap, prange->bitmap_access, 1426 prange->bitmap_aip, MAX_GPU_INSTANCE); 1427 } 1428 1429 if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) 1430 return 0; 1431 1432 if (prange->actual_loc && !prange->ttm_res) { 1433 /* This should never happen. actual_loc gets set by 1434 * svm_migrate_ram_to_vram after allocating a BO. 1435 */ 1436 WARN(1, "VRAM BO missing during validation\n"); 1437 return -EINVAL; 1438 } 1439 1440 svm_range_reserve_bos(&ctx); 1441 1442 p = container_of(prange->svms, struct kfd_process, svms); 1443 owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, 1444 MAX_GPU_INSTANCE)); 1445 for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { 1446 if (kfd_svm_page_owner(p, idx) != owner) { 1447 owner = NULL; 1448 break; 1449 } 1450 } 1451 r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, 1452 prange->start << PAGE_SHIFT, 1453 prange->npages, &hmm_range, 1454 false, true, owner); 1455 if (r) { 1456 pr_debug("failed %d to get svm range pages\n", r); 1457 goto unreserve_out; 1458 } 1459 1460 r = svm_range_dma_map(prange, ctx.bitmap, 1461 hmm_range->hmm_pfns); 1462 if (r) { 1463 pr_debug("failed %d to dma map range\n", r); 1464 goto unreserve_out; 1465 } 1466 1467 prange->validated_once = true; 1468 1469 svm_range_lock(prange); 1470 if (amdgpu_hmm_range_get_pages_done(hmm_range)) { 1471 pr_debug("hmm update the range, need validate again\n"); 1472 r = -EAGAIN; 1473 goto unlock_out; 1474 } 1475 if (!list_empty(&prange->child_list)) { 1476 pr_debug("range split by unmap in parallel, validate again\n"); 1477 r = -EAGAIN; 1478 goto unlock_out; 1479 } 1480 1481 r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); 1482 1483 unlock_out: 1484 svm_range_unlock(prange); 1485 unreserve_out: 1486 svm_range_unreserve_bos(&ctx); 1487 1488 if (!r) 1489 prange->validate_timestamp = ktime_to_us(ktime_get()); 1490 1491 return r; 1492 } 1493 1494 /** 1495 * svm_range_list_lock_and_flush_work - flush pending deferred work 1496 * 1497 * @svms: the svm range list 1498 * @mm: the mm structure 1499 * 1500 * Context: Returns with mmap write lock held, pending deferred work flushed 1501 * 1502 */ 1503 static void 1504 svm_range_list_lock_and_flush_work(struct svm_range_list *svms, 1505 struct mm_struct *mm) 1506 { 1507 retry_flush_work: 1508 flush_work(&svms->deferred_list_work); 1509 mmap_write_lock(mm); 1510 1511 if (list_empty(&svms->deferred_range_list)) 1512 return; 1513 mmap_write_unlock(mm); 1514 pr_debug("retry flush\n"); 1515 goto retry_flush_work; 1516 } 1517 1518 static void svm_range_restore_work(struct work_struct *work) 1519 { 1520 struct delayed_work *dwork = to_delayed_work(work); 1521 struct amdkfd_process_info *process_info; 1522 struct svm_range_list *svms; 1523 struct svm_range *prange; 1524 struct kfd_process *p; 1525 struct mm_struct *mm; 1526 int evicted_ranges; 1527 int invalid; 1528 int r; 1529 1530 svms = container_of(dwork, struct svm_range_list, restore_work); 1531 evicted_ranges = atomic_read(&svms->evicted_ranges); 1532 if (!evicted_ranges) 1533 return; 1534 1535 pr_debug("restore svm ranges\n"); 1536 1537 /* kfd_process_notifier_release destroys this worker thread. So during 1538 * the lifetime of this thread, kfd_process and mm will be valid. 1539 */ 1540 p = container_of(svms, struct kfd_process, svms); 1541 process_info = p->kgd_process_info; 1542 mm = p->mm; 1543 if (!mm) 1544 return; 1545 1546 mutex_lock(&process_info->lock); 1547 svm_range_list_lock_and_flush_work(svms, mm); 1548 mutex_lock(&svms->lock); 1549 1550 evicted_ranges = atomic_read(&svms->evicted_ranges); 1551 1552 list_for_each_entry(prange, &svms->list, list) { 1553 invalid = atomic_read(&prange->invalid); 1554 if (!invalid) 1555 continue; 1556 1557 pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n", 1558 prange->svms, prange, prange->start, prange->last, 1559 invalid); 1560 1561 /* 1562 * If range is migrating, wait for migration is done. 1563 */ 1564 mutex_lock(&prange->migrate_mutex); 1565 1566 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, 1567 false, true); 1568 if (r) 1569 pr_debug("failed %d to map 0x%lx to gpus\n", r, 1570 prange->start); 1571 1572 mutex_unlock(&prange->migrate_mutex); 1573 if (r) 1574 goto out_reschedule; 1575 1576 if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) 1577 goto out_reschedule; 1578 } 1579 1580 if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != 1581 evicted_ranges) 1582 goto out_reschedule; 1583 1584 evicted_ranges = 0; 1585 1586 r = kgd2kfd_resume_mm(mm); 1587 if (r) { 1588 /* No recovery from this failure. Probably the CP is 1589 * hanging. No point trying again. 1590 */ 1591 pr_debug("failed %d to resume KFD\n", r); 1592 } 1593 1594 pr_debug("restore svm ranges successfully\n"); 1595 1596 out_reschedule: 1597 mutex_unlock(&svms->lock); 1598 mmap_write_unlock(mm); 1599 mutex_unlock(&process_info->lock); 1600 1601 /* If validation failed, reschedule another attempt */ 1602 if (evicted_ranges) { 1603 pr_debug("reschedule to restore svm range\n"); 1604 schedule_delayed_work(&svms->restore_work, 1605 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); 1606 } 1607 } 1608 1609 /** 1610 * svm_range_evict - evict svm range 1611 * 1612 * Stop all queues of the process to ensure GPU doesn't access the memory, then 1613 * return to let CPU evict the buffer and proceed CPU pagetable update. 1614 * 1615 * Don't need use lock to sync cpu pagetable invalidation with GPU execution. 1616 * If invalidation happens while restore work is running, restore work will 1617 * restart to ensure to get the latest CPU pages mapping to GPU, then start 1618 * the queues. 1619 */ 1620 static int 1621 svm_range_evict(struct svm_range *prange, struct mm_struct *mm, 1622 unsigned long start, unsigned long last) 1623 { 1624 struct svm_range_list *svms = prange->svms; 1625 struct svm_range *pchild; 1626 struct kfd_process *p; 1627 int r = 0; 1628 1629 p = container_of(svms, struct kfd_process, svms); 1630 1631 pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", 1632 svms, prange->start, prange->last, start, last); 1633 1634 if (!p->xnack_enabled) { 1635 int evicted_ranges; 1636 1637 list_for_each_entry(pchild, &prange->child_list, child_list) { 1638 mutex_lock_nested(&pchild->lock, 1); 1639 if (pchild->start <= last && pchild->last >= start) { 1640 pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", 1641 pchild->start, pchild->last); 1642 atomic_inc(&pchild->invalid); 1643 } 1644 mutex_unlock(&pchild->lock); 1645 } 1646 1647 if (prange->start <= last && prange->last >= start) 1648 atomic_inc(&prange->invalid); 1649 1650 evicted_ranges = atomic_inc_return(&svms->evicted_ranges); 1651 if (evicted_ranges != 1) 1652 return r; 1653 1654 pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", 1655 prange->svms, prange->start, prange->last); 1656 1657 /* First eviction, stop the queues */ 1658 r = kgd2kfd_quiesce_mm(mm); 1659 if (r) 1660 pr_debug("failed to quiesce KFD\n"); 1661 1662 pr_debug("schedule to restore svm %p ranges\n", svms); 1663 schedule_delayed_work(&svms->restore_work, 1664 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); 1665 } else { 1666 unsigned long s, l; 1667 1668 pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", 1669 prange->svms, start, last); 1670 list_for_each_entry(pchild, &prange->child_list, child_list) { 1671 mutex_lock_nested(&pchild->lock, 1); 1672 s = max(start, pchild->start); 1673 l = min(last, pchild->last); 1674 if (l >= s) 1675 svm_range_unmap_from_gpus(pchild, s, l); 1676 mutex_unlock(&pchild->lock); 1677 } 1678 s = max(start, prange->start); 1679 l = min(last, prange->last); 1680 if (l >= s) 1681 svm_range_unmap_from_gpus(prange, s, l); 1682 } 1683 1684 return r; 1685 } 1686 1687 static struct svm_range *svm_range_clone(struct svm_range *old) 1688 { 1689 struct svm_range *new; 1690 1691 new = svm_range_new(old->svms, old->start, old->last); 1692 if (!new) 1693 return NULL; 1694 1695 if (old->svm_bo) { 1696 new->ttm_res = old->ttm_res; 1697 new->offset = old->offset; 1698 new->svm_bo = svm_range_bo_ref(old->svm_bo); 1699 spin_lock(&new->svm_bo->list_lock); 1700 list_add(&new->svm_bo_list, &new->svm_bo->range_list); 1701 spin_unlock(&new->svm_bo->list_lock); 1702 } 1703 new->flags = old->flags; 1704 new->preferred_loc = old->preferred_loc; 1705 new->prefetch_loc = old->prefetch_loc; 1706 new->actual_loc = old->actual_loc; 1707 new->granularity = old->granularity; 1708 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); 1709 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); 1710 1711 return new; 1712 } 1713 1714 /** 1715 * svm_range_handle_overlap - split overlap ranges 1716 * @svms: svm range list header 1717 * @new: range added with this attributes 1718 * @start: range added start address, in pages 1719 * @last: range last address, in pages 1720 * @update_list: output, the ranges attributes are updated. For set_attr, this 1721 * will do validation and map to GPUs. For unmap, this will be 1722 * removed and unmap from GPUs 1723 * @insert_list: output, the ranges will be inserted into svms, attributes are 1724 * not changes. For set_attr, this will add into svms. 1725 * @remove_list:output, the ranges will be removed from svms 1726 * @left: the remaining range after overlap, For set_attr, this will be added 1727 * as new range. 1728 * 1729 * Total have 5 overlap cases. 1730 * 1731 * This function handles overlap of an address interval with existing 1732 * struct svm_ranges for applying new attributes. This may require 1733 * splitting existing struct svm_ranges. All changes should be applied to 1734 * the range_list and interval tree transactionally. If any split operation 1735 * fails, the entire update fails. Therefore the existing overlapping 1736 * svm_ranges are cloned and the original svm_ranges left unchanged. If the 1737 * transaction succeeds, the modified clones are added and the originals 1738 * freed. Otherwise the clones are removed and the old svm_ranges remain. 1739 * 1740 * Context: The caller must hold svms->lock 1741 */ 1742 static int 1743 svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, 1744 unsigned long start, unsigned long last, 1745 struct list_head *update_list, 1746 struct list_head *insert_list, 1747 struct list_head *remove_list, 1748 unsigned long *left) 1749 { 1750 struct interval_tree_node *node; 1751 struct svm_range *prange; 1752 struct svm_range *tmp; 1753 int r = 0; 1754 1755 INIT_LIST_HEAD(update_list); 1756 INIT_LIST_HEAD(insert_list); 1757 INIT_LIST_HEAD(remove_list); 1758 1759 node = interval_tree_iter_first(&svms->objects, start, last); 1760 while (node) { 1761 struct interval_tree_node *next; 1762 struct svm_range *old; 1763 unsigned long next_start; 1764 1765 pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start, 1766 node->last); 1767 1768 old = container_of(node, struct svm_range, it_node); 1769 next = interval_tree_iter_next(node, start, last); 1770 next_start = min(node->last, last) + 1; 1771 1772 if (node->start < start || node->last > last) { 1773 /* node intersects the updated range, clone+split it */ 1774 prange = svm_range_clone(old); 1775 if (!prange) { 1776 r = -ENOMEM; 1777 goto out; 1778 } 1779 1780 list_add(&old->remove_list, remove_list); 1781 list_add(&prange->insert_list, insert_list); 1782 1783 if (node->start < start) { 1784 pr_debug("change old range start\n"); 1785 r = svm_range_split_head(prange, new, start, 1786 insert_list); 1787 if (r) 1788 goto out; 1789 } 1790 if (node->last > last) { 1791 pr_debug("change old range last\n"); 1792 r = svm_range_split_tail(prange, new, last, 1793 insert_list); 1794 if (r) 1795 goto out; 1796 } 1797 } else { 1798 /* The node is contained within start..last, 1799 * just update it 1800 */ 1801 prange = old; 1802 } 1803 1804 if (!svm_range_is_same_attrs(prange, new)) 1805 list_add(&prange->update_list, update_list); 1806 1807 /* insert a new node if needed */ 1808 if (node->start > start) { 1809 prange = svm_range_new(prange->svms, start, 1810 node->start - 1); 1811 if (!prange) { 1812 r = -ENOMEM; 1813 goto out; 1814 } 1815 1816 list_add(&prange->insert_list, insert_list); 1817 list_add(&prange->update_list, update_list); 1818 } 1819 1820 node = next; 1821 start = next_start; 1822 } 1823 1824 if (left && start <= last) 1825 *left = last - start + 1; 1826 1827 out: 1828 if (r) 1829 list_for_each_entry_safe(prange, tmp, insert_list, insert_list) 1830 svm_range_free(prange); 1831 1832 return r; 1833 } 1834 1835 static void 1836 svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, 1837 struct svm_range *prange) 1838 { 1839 unsigned long start; 1840 unsigned long last; 1841 1842 start = prange->notifier.interval_tree.start >> PAGE_SHIFT; 1843 last = prange->notifier.interval_tree.last >> PAGE_SHIFT; 1844 1845 if (prange->start == start && prange->last == last) 1846 return; 1847 1848 pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", 1849 prange->svms, prange, start, last, prange->start, 1850 prange->last); 1851 1852 if (start != 0 && last != 0) { 1853 interval_tree_remove(&prange->it_node, &prange->svms->objects); 1854 svm_range_remove_notifier(prange); 1855 } 1856 prange->it_node.start = prange->start; 1857 prange->it_node.last = prange->last; 1858 1859 interval_tree_insert(&prange->it_node, &prange->svms->objects); 1860 svm_range_add_notifier_locked(mm, prange); 1861 } 1862 1863 static void 1864 svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) 1865 { 1866 struct mm_struct *mm = prange->work_item.mm; 1867 1868 switch (prange->work_item.op) { 1869 case SVM_OP_NULL: 1870 pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", 1871 svms, prange, prange->start, prange->last); 1872 break; 1873 case SVM_OP_UNMAP_RANGE: 1874 pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n", 1875 svms, prange, prange->start, prange->last); 1876 svm_range_unlink(prange); 1877 svm_range_remove_notifier(prange); 1878 svm_range_free(prange); 1879 break; 1880 case SVM_OP_UPDATE_RANGE_NOTIFIER: 1881 pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", 1882 svms, prange, prange->start, prange->last); 1883 svm_range_update_notifier_and_interval_tree(mm, prange); 1884 break; 1885 case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP: 1886 pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", 1887 svms, prange, prange->start, prange->last); 1888 svm_range_update_notifier_and_interval_tree(mm, prange); 1889 /* TODO: implement deferred validation and mapping */ 1890 break; 1891 case SVM_OP_ADD_RANGE: 1892 pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, 1893 prange->start, prange->last); 1894 svm_range_add_to_svms(prange); 1895 svm_range_add_notifier_locked(mm, prange); 1896 break; 1897 case SVM_OP_ADD_RANGE_AND_MAP: 1898 pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, 1899 prange, prange->start, prange->last); 1900 svm_range_add_to_svms(prange); 1901 svm_range_add_notifier_locked(mm, prange); 1902 /* TODO: implement deferred validation and mapping */ 1903 break; 1904 default: 1905 WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, 1906 prange->work_item.op); 1907 } 1908 } 1909 1910 static void svm_range_drain_retry_fault(struct svm_range_list *svms) 1911 { 1912 struct kfd_process_device *pdd; 1913 struct amdgpu_device *adev; 1914 struct kfd_process *p; 1915 uint32_t i; 1916 1917 p = container_of(svms, struct kfd_process, svms); 1918 1919 for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { 1920 pdd = p->pdds[i]; 1921 if (!pdd) 1922 continue; 1923 1924 pr_debug("drain retry fault gpu %d svms %p\n", i, svms); 1925 adev = (struct amdgpu_device *)pdd->dev->kgd; 1926 1927 amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); 1928 pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); 1929 } 1930 } 1931 1932 static void svm_range_deferred_list_work(struct work_struct *work) 1933 { 1934 struct svm_range_list *svms; 1935 struct svm_range *prange; 1936 struct mm_struct *mm; 1937 1938 svms = container_of(work, struct svm_range_list, deferred_list_work); 1939 pr_debug("enter svms 0x%p\n", svms); 1940 1941 spin_lock(&svms->deferred_list_lock); 1942 while (!list_empty(&svms->deferred_range_list)) { 1943 prange = list_first_entry(&svms->deferred_range_list, 1944 struct svm_range, deferred_list); 1945 spin_unlock(&svms->deferred_list_lock); 1946 pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, 1947 prange->start, prange->last, prange->work_item.op); 1948 1949 /* Make sure no stale retry fault coming after range is freed */ 1950 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) 1951 svm_range_drain_retry_fault(prange->svms); 1952 1953 mm = prange->work_item.mm; 1954 mmap_write_lock(mm); 1955 mutex_lock(&svms->lock); 1956 1957 /* Remove from deferred_list must be inside mmap write lock, 1958 * otherwise, svm_range_list_lock_and_flush_work may hold mmap 1959 * write lock, and continue because deferred_list is empty, then 1960 * deferred_list handle is blocked by mmap write lock. 1961 */ 1962 spin_lock(&svms->deferred_list_lock); 1963 list_del_init(&prange->deferred_list); 1964 spin_unlock(&svms->deferred_list_lock); 1965 1966 mutex_lock(&prange->migrate_mutex); 1967 while (!list_empty(&prange->child_list)) { 1968 struct svm_range *pchild; 1969 1970 pchild = list_first_entry(&prange->child_list, 1971 struct svm_range, child_list); 1972 pr_debug("child prange 0x%p op %d\n", pchild, 1973 pchild->work_item.op); 1974 list_del_init(&pchild->child_list); 1975 svm_range_handle_list_op(svms, pchild); 1976 } 1977 mutex_unlock(&prange->migrate_mutex); 1978 1979 svm_range_handle_list_op(svms, prange); 1980 mutex_unlock(&svms->lock); 1981 mmap_write_unlock(mm); 1982 1983 spin_lock(&svms->deferred_list_lock); 1984 } 1985 spin_unlock(&svms->deferred_list_lock); 1986 1987 pr_debug("exit svms 0x%p\n", svms); 1988 } 1989 1990 void 1991 svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, 1992 struct mm_struct *mm, enum svm_work_list_ops op) 1993 { 1994 spin_lock(&svms->deferred_list_lock); 1995 /* if prange is on the deferred list */ 1996 if (!list_empty(&prange->deferred_list)) { 1997 pr_debug("update exist prange 0x%p work op %d\n", prange, op); 1998 WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n"); 1999 if (op != SVM_OP_NULL && 2000 prange->work_item.op != SVM_OP_UNMAP_RANGE) 2001 prange->work_item.op = op; 2002 } else { 2003 prange->work_item.op = op; 2004 prange->work_item.mm = mm; 2005 list_add_tail(&prange->deferred_list, 2006 &prange->svms->deferred_range_list); 2007 pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", 2008 prange, prange->start, prange->last, op); 2009 } 2010 spin_unlock(&svms->deferred_list_lock); 2011 } 2012 2013 void schedule_deferred_list_work(struct svm_range_list *svms) 2014 { 2015 spin_lock(&svms->deferred_list_lock); 2016 if (!list_empty(&svms->deferred_range_list)) 2017 schedule_work(&svms->deferred_list_work); 2018 spin_unlock(&svms->deferred_list_lock); 2019 } 2020 2021 static void 2022 svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, 2023 struct svm_range *prange, unsigned long start, 2024 unsigned long last) 2025 { 2026 struct svm_range *head; 2027 struct svm_range *tail; 2028 2029 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { 2030 pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange, 2031 prange->start, prange->last); 2032 return; 2033 } 2034 if (start > prange->last || last < prange->start) 2035 return; 2036 2037 head = tail = prange; 2038 if (start > prange->start) 2039 svm_range_split(prange, prange->start, start - 1, &tail); 2040 if (last < tail->last) 2041 svm_range_split(tail, last + 1, tail->last, &head); 2042 2043 if (head != prange && tail != prange) { 2044 svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); 2045 svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); 2046 } else if (tail != prange) { 2047 svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); 2048 } else if (head != prange) { 2049 svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); 2050 } else if (parent != prange) { 2051 prange->work_item.op = SVM_OP_UNMAP_RANGE; 2052 } 2053 } 2054 2055 static void 2056 svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, 2057 unsigned long start, unsigned long last) 2058 { 2059 struct svm_range_list *svms; 2060 struct svm_range *pchild; 2061 struct kfd_process *p; 2062 unsigned long s, l; 2063 bool unmap_parent; 2064 2065 p = kfd_lookup_process_by_mm(mm); 2066 if (!p) 2067 return; 2068 svms = &p->svms; 2069 2070 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, 2071 prange, prange->start, prange->last, start, last); 2072 2073 unmap_parent = start <= prange->start && last >= prange->last; 2074 2075 list_for_each_entry(pchild, &prange->child_list, child_list) { 2076 mutex_lock_nested(&pchild->lock, 1); 2077 s = max(start, pchild->start); 2078 l = min(last, pchild->last); 2079 if (l >= s) 2080 svm_range_unmap_from_gpus(pchild, s, l); 2081 svm_range_unmap_split(mm, prange, pchild, start, last); 2082 mutex_unlock(&pchild->lock); 2083 } 2084 s = max(start, prange->start); 2085 l = min(last, prange->last); 2086 if (l >= s) 2087 svm_range_unmap_from_gpus(prange, s, l); 2088 svm_range_unmap_split(mm, prange, prange, start, last); 2089 2090 if (unmap_parent) 2091 svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); 2092 else 2093 svm_range_add_list_work(svms, prange, mm, 2094 SVM_OP_UPDATE_RANGE_NOTIFIER); 2095 schedule_deferred_list_work(svms); 2096 2097 kfd_unref_process(p); 2098 } 2099 2100 /** 2101 * svm_range_cpu_invalidate_pagetables - interval notifier callback 2102 * 2103 * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it 2104 * is from migration, or CPU page invalidation callback. 2105 * 2106 * For unmap event, unmap range from GPUs, remove prange from svms in a delayed 2107 * work thread, and split prange if only part of prange is unmapped. 2108 * 2109 * For invalidation event, if GPU retry fault is not enabled, evict the queues, 2110 * then schedule svm_range_restore_work to update GPU mapping and resume queues. 2111 * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will 2112 * update GPU mapping to recover. 2113 * 2114 * Context: mmap lock, notifier_invalidate_start lock are held 2115 * for invalidate event, prange lock is held if this is from migration 2116 */ 2117 static bool 2118 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, 2119 const struct mmu_notifier_range *range, 2120 unsigned long cur_seq) 2121 { 2122 struct svm_range *prange; 2123 unsigned long start; 2124 unsigned long last; 2125 2126 if (range->event == MMU_NOTIFY_RELEASE) 2127 return true; 2128 2129 start = mni->interval_tree.start; 2130 last = mni->interval_tree.last; 2131 start = (start > range->start ? start : range->start) >> PAGE_SHIFT; 2132 last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; 2133 pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n", 2134 start, last, range->start >> PAGE_SHIFT, 2135 (range->end - 1) >> PAGE_SHIFT, 2136 mni->interval_tree.start >> PAGE_SHIFT, 2137 mni->interval_tree.last >> PAGE_SHIFT, range->event); 2138 2139 prange = container_of(mni, struct svm_range, notifier); 2140 2141 svm_range_lock(prange); 2142 mmu_interval_set_seq(mni, cur_seq); 2143 2144 switch (range->event) { 2145 case MMU_NOTIFY_UNMAP: 2146 svm_range_unmap_from_cpu(mni->mm, prange, start, last); 2147 break; 2148 default: 2149 svm_range_evict(prange, mni->mm, start, last); 2150 break; 2151 } 2152 2153 svm_range_unlock(prange); 2154 2155 return true; 2156 } 2157 2158 /** 2159 * svm_range_from_addr - find svm range from fault address 2160 * @svms: svm range list header 2161 * @addr: address to search range interval tree, in pages 2162 * @parent: parent range if range is on child list 2163 * 2164 * Context: The caller must hold svms->lock 2165 * 2166 * Return: the svm_range found or NULL 2167 */ 2168 struct svm_range * 2169 svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, 2170 struct svm_range **parent) 2171 { 2172 struct interval_tree_node *node; 2173 struct svm_range *prange; 2174 struct svm_range *pchild; 2175 2176 node = interval_tree_iter_first(&svms->objects, addr, addr); 2177 if (!node) 2178 return NULL; 2179 2180 prange = container_of(node, struct svm_range, it_node); 2181 pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n", 2182 addr, prange->start, prange->last, node->start, node->last); 2183 2184 if (addr >= prange->start && addr <= prange->last) { 2185 if (parent) 2186 *parent = prange; 2187 return prange; 2188 } 2189 list_for_each_entry(pchild, &prange->child_list, child_list) 2190 if (addr >= pchild->start && addr <= pchild->last) { 2191 pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n", 2192 addr, pchild->start, pchild->last); 2193 if (parent) 2194 *parent = prange; 2195 return pchild; 2196 } 2197 2198 return NULL; 2199 } 2200 2201 /* svm_range_best_restore_location - decide the best fault restore location 2202 * @prange: svm range structure 2203 * @adev: the GPU on which vm fault happened 2204 * 2205 * This is only called when xnack is on, to decide the best location to restore 2206 * the range mapping after GPU vm fault. Caller uses the best location to do 2207 * migration if actual loc is not best location, then update GPU page table 2208 * mapping to the best location. 2209 * 2210 * If vm fault gpu is range preferred loc, the best_loc is preferred loc. 2211 * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu 2212 * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then 2213 * if range actual loc is cpu, best_loc is cpu 2214 * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is 2215 * range actual loc. 2216 * Otherwise, GPU no access, best_loc is -1. 2217 * 2218 * Return: 2219 * -1 means vm fault GPU no access 2220 * 0 for CPU or GPU id 2221 */ 2222 static int32_t 2223 svm_range_best_restore_location(struct svm_range *prange, 2224 struct amdgpu_device *adev, 2225 int32_t *gpuidx) 2226 { 2227 struct amdgpu_device *bo_adev; 2228 struct kfd_process *p; 2229 uint32_t gpuid; 2230 int r; 2231 2232 p = container_of(prange->svms, struct kfd_process, svms); 2233 2234 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); 2235 if (r < 0) { 2236 pr_debug("failed to get gpuid from kgd\n"); 2237 return -1; 2238 } 2239 2240 if (prange->preferred_loc == gpuid) 2241 return prange->preferred_loc; 2242 2243 if (test_bit(*gpuidx, prange->bitmap_access)) 2244 return gpuid; 2245 2246 if (test_bit(*gpuidx, prange->bitmap_aip)) { 2247 if (!prange->actual_loc) 2248 return 0; 2249 2250 bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); 2251 if (amdgpu_xgmi_same_hive(adev, bo_adev)) 2252 return prange->actual_loc; 2253 else 2254 return 0; 2255 } 2256 2257 return -1; 2258 } 2259 static int 2260 svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, 2261 unsigned long *start, unsigned long *last) 2262 { 2263 struct vm_area_struct *vma; 2264 struct interval_tree_node *node; 2265 unsigned long start_limit, end_limit; 2266 2267 vma = find_vma(p->mm, addr << PAGE_SHIFT); 2268 if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { 2269 pr_debug("VMA does not exist in address [0x%llx]\n", addr); 2270 return -EFAULT; 2271 } 2272 start_limit = max(vma->vm_start >> PAGE_SHIFT, 2273 (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); 2274 end_limit = min(vma->vm_end >> PAGE_SHIFT, 2275 (unsigned long)ALIGN(addr + 1, 2UL << 8)); 2276 /* First range that starts after the fault address */ 2277 node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); 2278 if (node) { 2279 end_limit = min(end_limit, node->start); 2280 /* Last range that ends before the fault address */ 2281 node = container_of(rb_prev(&node->rb), 2282 struct interval_tree_node, rb); 2283 } else { 2284 /* Last range must end before addr because 2285 * there was no range after addr 2286 */ 2287 node = container_of(rb_last(&p->svms.objects.rb_root), 2288 struct interval_tree_node, rb); 2289 } 2290 if (node) { 2291 if (node->last >= addr) { 2292 WARN(1, "Overlap with prev node and page fault addr\n"); 2293 return -EFAULT; 2294 } 2295 start_limit = max(start_limit, node->last + 1); 2296 } 2297 2298 *start = start_limit; 2299 *last = end_limit - 1; 2300 2301 pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n", 2302 vma->vm_start >> PAGE_SHIFT, *start, 2303 vma->vm_end >> PAGE_SHIFT, *last); 2304 2305 return 0; 2306 2307 } 2308 static struct 2309 svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, 2310 struct kfd_process *p, 2311 struct mm_struct *mm, 2312 int64_t addr) 2313 { 2314 struct svm_range *prange = NULL; 2315 unsigned long start, last; 2316 uint32_t gpuid, gpuidx; 2317 2318 if (svm_range_get_range_boundaries(p, addr, &start, &last)) 2319 return NULL; 2320 2321 prange = svm_range_new(&p->svms, start, last); 2322 if (!prange) { 2323 pr_debug("Failed to create prange in address [0x%llx]\n", addr); 2324 return NULL; 2325 } 2326 if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { 2327 pr_debug("failed to get gpuid from kgd\n"); 2328 svm_range_free(prange); 2329 return NULL; 2330 } 2331 2332 svm_range_add_to_svms(prange); 2333 svm_range_add_notifier_locked(mm, prange); 2334 2335 return prange; 2336 } 2337 2338 /* svm_range_skip_recover - decide if prange can be recovered 2339 * @prange: svm range structure 2340 * 2341 * GPU vm retry fault handle skip recover the range for cases: 2342 * 1. prange is on deferred list to be removed after unmap, it is stale fault, 2343 * deferred list work will drain the stale fault before free the prange. 2344 * 2. prange is on deferred list to add interval notifier after split, or 2345 * 3. prange is child range, it is split from parent prange, recover later 2346 * after interval notifier is added. 2347 * 2348 * Return: true to skip recover, false to recover 2349 */ 2350 static bool svm_range_skip_recover(struct svm_range *prange) 2351 { 2352 struct svm_range_list *svms = prange->svms; 2353 2354 spin_lock(&svms->deferred_list_lock); 2355 if (list_empty(&prange->deferred_list) && 2356 list_empty(&prange->child_list)) { 2357 spin_unlock(&svms->deferred_list_lock); 2358 return false; 2359 } 2360 spin_unlock(&svms->deferred_list_lock); 2361 2362 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { 2363 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n", 2364 svms, prange, prange->start, prange->last); 2365 return true; 2366 } 2367 if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP || 2368 prange->work_item.op == SVM_OP_ADD_RANGE) { 2369 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n", 2370 svms, prange, prange->start, prange->last); 2371 return true; 2372 } 2373 return false; 2374 } 2375 2376 static void 2377 svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, 2378 int32_t gpuidx) 2379 { 2380 struct kfd_process_device *pdd; 2381 2382 /* fault is on different page of same range 2383 * or fault is skipped to recover later 2384 * or fault is on invalid virtual address 2385 */ 2386 if (gpuidx == MAX_GPU_INSTANCE) { 2387 uint32_t gpuid; 2388 int r; 2389 2390 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx); 2391 if (r < 0) 2392 return; 2393 } 2394 2395 /* fault is recovered 2396 * or fault cannot recover because GPU no access on the range 2397 */ 2398 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 2399 if (pdd) 2400 WRITE_ONCE(pdd->faults, pdd->faults + 1); 2401 } 2402 2403 int 2404 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, 2405 uint64_t addr) 2406 { 2407 struct mm_struct *mm = NULL; 2408 struct svm_range_list *svms; 2409 struct svm_range *prange; 2410 struct kfd_process *p; 2411 uint64_t timestamp; 2412 int32_t best_loc; 2413 int32_t gpuidx = MAX_GPU_INSTANCE; 2414 bool write_locked = false; 2415 int r = 0; 2416 2417 if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { 2418 pr_debug("device does not support SVM\n"); 2419 return -EFAULT; 2420 } 2421 2422 p = kfd_lookup_process_by_pasid(pasid); 2423 if (!p) { 2424 pr_debug("kfd process not founded pasid 0x%x\n", pasid); 2425 return -ESRCH; 2426 } 2427 if (!p->xnack_enabled) { 2428 pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); 2429 return -EFAULT; 2430 } 2431 svms = &p->svms; 2432 2433 pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); 2434 2435 mm = get_task_mm(p->lead_thread); 2436 if (!mm) { 2437 pr_debug("svms 0x%p failed to get mm\n", svms); 2438 r = -ESRCH; 2439 goto out; 2440 } 2441 2442 mmap_read_lock(mm); 2443 retry_write_locked: 2444 mutex_lock(&svms->lock); 2445 prange = svm_range_from_addr(svms, addr, NULL); 2446 if (!prange) { 2447 pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", 2448 svms, addr); 2449 if (!write_locked) { 2450 /* Need the write lock to create new range with MMU notifier. 2451 * Also flush pending deferred work to make sure the interval 2452 * tree is up to date before we add a new range 2453 */ 2454 mutex_unlock(&svms->lock); 2455 mmap_read_unlock(mm); 2456 mmap_write_lock(mm); 2457 write_locked = true; 2458 goto retry_write_locked; 2459 } 2460 prange = svm_range_create_unregistered_range(adev, p, mm, addr); 2461 if (!prange) { 2462 pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", 2463 svms, addr); 2464 mmap_write_downgrade(mm); 2465 r = -EFAULT; 2466 goto out_unlock_svms; 2467 } 2468 } 2469 if (write_locked) 2470 mmap_write_downgrade(mm); 2471 2472 mutex_lock(&prange->migrate_mutex); 2473 2474 if (svm_range_skip_recover(prange)) { 2475 amdgpu_gmc_filter_faults_remove(adev, addr, pasid); 2476 goto out_unlock_range; 2477 } 2478 2479 timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; 2480 /* skip duplicate vm fault on different pages of same range */ 2481 if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { 2482 pr_debug("svms 0x%p [0x%lx %lx] already restored\n", 2483 svms, prange->start, prange->last); 2484 goto out_unlock_range; 2485 } 2486 2487 best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); 2488 if (best_loc == -1) { 2489 pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", 2490 svms, prange->start, prange->last); 2491 r = -EACCES; 2492 goto out_unlock_range; 2493 } 2494 2495 pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", 2496 svms, prange->start, prange->last, best_loc, 2497 prange->actual_loc); 2498 2499 if (prange->actual_loc != best_loc) { 2500 if (best_loc) { 2501 r = svm_migrate_to_vram(prange, best_loc, mm); 2502 if (r) { 2503 pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", 2504 r, addr); 2505 /* Fallback to system memory if migration to 2506 * VRAM failed 2507 */ 2508 if (prange->actual_loc) 2509 r = svm_migrate_vram_to_ram(prange, mm); 2510 else 2511 r = 0; 2512 } 2513 } else { 2514 r = svm_migrate_vram_to_ram(prange, mm); 2515 } 2516 if (r) { 2517 pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", 2518 r, svms, prange->start, prange->last); 2519 goto out_unlock_range; 2520 } 2521 } 2522 2523 r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); 2524 if (r) 2525 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", 2526 r, svms, prange->start, prange->last); 2527 2528 out_unlock_range: 2529 mutex_unlock(&prange->migrate_mutex); 2530 out_unlock_svms: 2531 mutex_unlock(&svms->lock); 2532 mmap_read_unlock(mm); 2533 2534 svm_range_count_fault(adev, p, gpuidx); 2535 2536 mmput(mm); 2537 out: 2538 kfd_unref_process(p); 2539 2540 if (r == -EAGAIN) { 2541 pr_debug("recover vm fault later\n"); 2542 amdgpu_gmc_filter_faults_remove(adev, addr, pasid); 2543 r = 0; 2544 } 2545 return r; 2546 } 2547 2548 void svm_range_list_fini(struct kfd_process *p) 2549 { 2550 struct svm_range *prange; 2551 struct svm_range *next; 2552 2553 pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); 2554 2555 /* Ensure list work is finished before process is destroyed */ 2556 flush_work(&p->svms.deferred_list_work); 2557 2558 list_for_each_entry_safe(prange, next, &p->svms.list, list) { 2559 svm_range_unlink(prange); 2560 svm_range_remove_notifier(prange); 2561 svm_range_free(prange); 2562 } 2563 2564 mutex_destroy(&p->svms.lock); 2565 2566 pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms); 2567 } 2568 2569 int svm_range_list_init(struct kfd_process *p) 2570 { 2571 struct svm_range_list *svms = &p->svms; 2572 int i; 2573 2574 svms->objects = RB_ROOT_CACHED; 2575 mutex_init(&svms->lock); 2576 INIT_LIST_HEAD(&svms->list); 2577 atomic_set(&svms->evicted_ranges, 0); 2578 INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); 2579 INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); 2580 INIT_LIST_HEAD(&svms->deferred_range_list); 2581 spin_lock_init(&svms->deferred_list_lock); 2582 2583 for (i = 0; i < p->n_pdds; i++) 2584 if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev)) 2585 bitmap_set(svms->bitmap_supported, i, 1); 2586 2587 return 0; 2588 } 2589 2590 /** 2591 * svm_range_is_valid - check if virtual address range is valid 2592 * @mm: current process mm_struct 2593 * @start: range start address, in pages 2594 * @size: range size, in pages 2595 * 2596 * Valid virtual address range means it belongs to one or more VMAs 2597 * 2598 * Context: Process context 2599 * 2600 * Return: 2601 * true - valid svm range 2602 * false - invalid svm range 2603 */ 2604 static bool 2605 svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) 2606 { 2607 const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; 2608 struct vm_area_struct *vma; 2609 unsigned long end; 2610 2611 start <<= PAGE_SHIFT; 2612 end = start + (size << PAGE_SHIFT); 2613 2614 do { 2615 vma = find_vma(mm, start); 2616 if (!vma || start < vma->vm_start || 2617 (vma->vm_flags & device_vma)) 2618 return false; 2619 start = min(end, vma->vm_end); 2620 } while (start < end); 2621 2622 return true; 2623 } 2624 2625 /** 2626 * svm_range_add - add svm range and handle overlap 2627 * @p: the range add to this process svms 2628 * @start: page size aligned 2629 * @size: page size aligned 2630 * @nattr: number of attributes 2631 * @attrs: array of attributes 2632 * @update_list: output, the ranges need validate and update GPU mapping 2633 * @insert_list: output, the ranges need insert to svms 2634 * @remove_list: output, the ranges are replaced and need remove from svms 2635 * 2636 * Check if the virtual address range has overlap with the registered ranges, 2637 * split the overlapped range, copy and adjust pages address and vram nodes in 2638 * old and new ranges. 2639 * 2640 * Context: Process context, caller must hold svms->lock 2641 * 2642 * Return: 2643 * 0 - OK, otherwise error code 2644 */ 2645 static int 2646 svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, 2647 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, 2648 struct list_head *update_list, struct list_head *insert_list, 2649 struct list_head *remove_list) 2650 { 2651 uint64_t last = start + size - 1UL; 2652 struct svm_range_list *svms; 2653 struct svm_range new = {0}; 2654 struct svm_range *prange; 2655 unsigned long left = 0; 2656 int r = 0; 2657 2658 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); 2659 2660 svm_range_apply_attrs(p, &new, nattr, attrs); 2661 2662 svms = &p->svms; 2663 2664 r = svm_range_handle_overlap(svms, &new, start, last, update_list, 2665 insert_list, remove_list, &left); 2666 if (r) 2667 return r; 2668 2669 if (left) { 2670 prange = svm_range_new(svms, last - left + 1, last); 2671 list_add(&prange->insert_list, insert_list); 2672 list_add(&prange->update_list, update_list); 2673 } 2674 2675 return 0; 2676 } 2677 2678 /* svm_range_best_prefetch_location - decide the best prefetch location 2679 * @prange: svm range structure 2680 * 2681 * For xnack off: 2682 * If range map to single GPU, the best acutal location is prefetch loc, which 2683 * can be CPU or GPU. 2684 * 2685 * If range map to multiple GPUs, only if mGPU connection on xgmi same hive, 2686 * the best actual location could be prefetch_loc GPU. If mGPU connection on 2687 * PCIe, the best actual location is always CPU, because GPU cannot access vram 2688 * of other GPUs, assuming PCIe small bar (large bar support is not upstream). 2689 * 2690 * For xnack on: 2691 * The best actual location is prefetch location. If mGPU connection on xgmi 2692 * same hive, range map to multiple GPUs. Otherwise, the range only map to 2693 * actual location GPU. Other GPU access vm fault will trigger migration. 2694 * 2695 * Context: Process context 2696 * 2697 * Return: 2698 * 0 for CPU or GPU id 2699 */ 2700 static uint32_t 2701 svm_range_best_prefetch_location(struct svm_range *prange) 2702 { 2703 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); 2704 uint32_t best_loc = prange->prefetch_loc; 2705 struct kfd_process_device *pdd; 2706 struct amdgpu_device *bo_adev; 2707 struct amdgpu_device *adev; 2708 struct kfd_process *p; 2709 uint32_t gpuidx; 2710 2711 p = container_of(prange->svms, struct kfd_process, svms); 2712 2713 /* xnack on */ 2714 if (p->xnack_enabled) 2715 goto out; 2716 2717 /* xnack off */ 2718 if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) 2719 goto out; 2720 2721 bo_adev = svm_range_get_adev_by_id(prange, best_loc); 2722 if (!bo_adev) { 2723 WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); 2724 best_loc = 0; 2725 goto out; 2726 } 2727 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, 2728 MAX_GPU_INSTANCE); 2729 2730 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { 2731 pdd = kfd_process_device_from_gpuidx(p, gpuidx); 2732 if (!pdd) { 2733 pr_debug("failed to get device by idx 0x%x\n", gpuidx); 2734 continue; 2735 } 2736 adev = (struct amdgpu_device *)pdd->dev->kgd; 2737 2738 if (adev == bo_adev) 2739 continue; 2740 2741 if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { 2742 best_loc = 0; 2743 break; 2744 } 2745 } 2746 2747 out: 2748 pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n", 2749 p->xnack_enabled, &p->svms, prange->start, prange->last, 2750 best_loc); 2751 2752 return best_loc; 2753 } 2754 2755 /* FIXME: This is a workaround for page locking bug when some pages are 2756 * invalid during migration to VRAM 2757 */ 2758 void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, 2759 void *owner) 2760 { 2761 struct hmm_range *hmm_range; 2762 int r; 2763 2764 if (prange->validated_once) 2765 return; 2766 2767 r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, 2768 prange->start << PAGE_SHIFT, 2769 prange->npages, &hmm_range, 2770 false, true, owner); 2771 if (!r) { 2772 amdgpu_hmm_range_get_pages_done(hmm_range); 2773 prange->validated_once = true; 2774 } 2775 } 2776 2777 /* svm_range_trigger_migration - start page migration if prefetch loc changed 2778 * @mm: current process mm_struct 2779 * @prange: svm range structure 2780 * @migrated: output, true if migration is triggered 2781 * 2782 * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range 2783 * from ram to vram. 2784 * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range 2785 * from vram to ram. 2786 * 2787 * If GPU vm fault retry is not enabled, migration interact with MMU notifier 2788 * and restore work: 2789 * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict 2790 * stops all queues, schedule restore work 2791 * 2. svm_range_restore_work wait for migration is done by 2792 * a. svm_range_validate_vram takes prange->migrate_mutex 2793 * b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns 2794 * 3. restore work update mappings of GPU, resume all queues. 2795 * 2796 * Context: Process context 2797 * 2798 * Return: 2799 * 0 - OK, otherwise - error code of migration 2800 */ 2801 static int 2802 svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, 2803 bool *migrated) 2804 { 2805 uint32_t best_loc; 2806 int r = 0; 2807 2808 *migrated = false; 2809 best_loc = svm_range_best_prefetch_location(prange); 2810 2811 if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || 2812 best_loc == prange->actual_loc) 2813 return 0; 2814 2815 if (!best_loc) { 2816 r = svm_migrate_vram_to_ram(prange, mm); 2817 *migrated = !r; 2818 return r; 2819 } 2820 2821 r = svm_migrate_to_vram(prange, best_loc, mm); 2822 *migrated = !r; 2823 2824 return r; 2825 } 2826 2827 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) 2828 { 2829 if (!fence) 2830 return -EINVAL; 2831 2832 if (dma_fence_is_signaled(&fence->base)) 2833 return 0; 2834 2835 if (fence->svm_bo) { 2836 WRITE_ONCE(fence->svm_bo->evicting, 1); 2837 schedule_work(&fence->svm_bo->eviction_work); 2838 } 2839 2840 return 0; 2841 } 2842 2843 static void svm_range_evict_svm_bo_worker(struct work_struct *work) 2844 { 2845 struct svm_range_bo *svm_bo; 2846 struct kfd_process *p; 2847 struct mm_struct *mm; 2848 2849 svm_bo = container_of(work, struct svm_range_bo, eviction_work); 2850 if (!svm_bo_ref_unless_zero(svm_bo)) 2851 return; /* svm_bo was freed while eviction was pending */ 2852 2853 /* svm_range_bo_release destroys this worker thread. So during 2854 * the lifetime of this thread, kfd_process and mm will be valid. 2855 */ 2856 p = container_of(svm_bo->svms, struct kfd_process, svms); 2857 mm = p->mm; 2858 if (!mm) 2859 return; 2860 2861 mmap_read_lock(mm); 2862 spin_lock(&svm_bo->list_lock); 2863 while (!list_empty(&svm_bo->range_list)) { 2864 struct svm_range *prange = 2865 list_first_entry(&svm_bo->range_list, 2866 struct svm_range, svm_bo_list); 2867 list_del_init(&prange->svm_bo_list); 2868 spin_unlock(&svm_bo->list_lock); 2869 2870 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, 2871 prange->start, prange->last); 2872 2873 mutex_lock(&prange->migrate_mutex); 2874 svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm); 2875 2876 mutex_lock(&prange->lock); 2877 prange->svm_bo = NULL; 2878 mutex_unlock(&prange->lock); 2879 2880 mutex_unlock(&prange->migrate_mutex); 2881 2882 spin_lock(&svm_bo->list_lock); 2883 } 2884 spin_unlock(&svm_bo->list_lock); 2885 mmap_read_unlock(mm); 2886 2887 dma_fence_signal(&svm_bo->eviction_fence->base); 2888 /* This is the last reference to svm_bo, after svm_range_vram_node_free 2889 * has been called in svm_migrate_vram_to_ram 2890 */ 2891 WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n"); 2892 svm_range_bo_unref(svm_bo); 2893 } 2894 2895 static int 2896 svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, 2897 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) 2898 { 2899 struct amdkfd_process_info *process_info = p->kgd_process_info; 2900 struct mm_struct *mm = current->mm; 2901 struct list_head update_list; 2902 struct list_head insert_list; 2903 struct list_head remove_list; 2904 struct svm_range_list *svms; 2905 struct svm_range *prange; 2906 struct svm_range *next; 2907 int r = 0; 2908 2909 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", 2910 p->pasid, &p->svms, start, start + size - 1, size); 2911 2912 r = svm_range_check_attr(p, nattr, attrs); 2913 if (r) 2914 return r; 2915 2916 svms = &p->svms; 2917 2918 mutex_lock(&process_info->lock); 2919 2920 svm_range_list_lock_and_flush_work(svms, mm); 2921 2922 if (!svm_range_is_valid(mm, start, size)) { 2923 pr_debug("invalid range\n"); 2924 r = -EFAULT; 2925 mmap_write_unlock(mm); 2926 goto out; 2927 } 2928 2929 mutex_lock(&svms->lock); 2930 2931 /* Add new range and split existing ranges as needed */ 2932 r = svm_range_add(p, start, size, nattr, attrs, &update_list, 2933 &insert_list, &remove_list); 2934 if (r) { 2935 mutex_unlock(&svms->lock); 2936 mmap_write_unlock(mm); 2937 goto out; 2938 } 2939 /* Apply changes as a transaction */ 2940 list_for_each_entry_safe(prange, next, &insert_list, insert_list) { 2941 svm_range_add_to_svms(prange); 2942 svm_range_add_notifier_locked(mm, prange); 2943 } 2944 list_for_each_entry(prange, &update_list, update_list) { 2945 svm_range_apply_attrs(p, prange, nattr, attrs); 2946 /* TODO: unmap ranges from GPU that lost access */ 2947 } 2948 list_for_each_entry_safe(prange, next, &remove_list, 2949 remove_list) { 2950 pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", 2951 prange->svms, prange, prange->start, 2952 prange->last); 2953 svm_range_unlink(prange); 2954 svm_range_remove_notifier(prange); 2955 svm_range_free(prange); 2956 } 2957 2958 mmap_write_downgrade(mm); 2959 /* Trigger migrations and revalidate and map to GPUs as needed. If 2960 * this fails we may be left with partially completed actions. There 2961 * is no clean way of rolling back to the previous state in such a 2962 * case because the rollback wouldn't be guaranteed to work either. 2963 */ 2964 list_for_each_entry(prange, &update_list, update_list) { 2965 bool migrated; 2966 2967 mutex_lock(&prange->migrate_mutex); 2968 2969 r = svm_range_trigger_migration(mm, prange, &migrated); 2970 if (r) 2971 goto out_unlock_range; 2972 2973 if (migrated && !p->xnack_enabled) { 2974 pr_debug("restore_work will update mappings of GPUs\n"); 2975 mutex_unlock(&prange->migrate_mutex); 2976 continue; 2977 } 2978 2979 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, 2980 true, true); 2981 if (r) 2982 pr_debug("failed %d to map svm range\n", r); 2983 2984 out_unlock_range: 2985 mutex_unlock(&prange->migrate_mutex); 2986 if (r) 2987 break; 2988 } 2989 2990 svm_range_debug_dump(svms); 2991 2992 mutex_unlock(&svms->lock); 2993 mmap_read_unlock(mm); 2994 out: 2995 mutex_unlock(&process_info->lock); 2996 2997 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, 2998 &p->svms, start, start + size - 1, r); 2999 3000 return r; 3001 } 3002 3003 static int 3004 svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, 3005 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) 3006 { 3007 DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); 3008 DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); 3009 bool get_preferred_loc = false; 3010 bool get_prefetch_loc = false; 3011 bool get_granularity = false; 3012 bool get_accessible = false; 3013 bool get_flags = false; 3014 uint64_t last = start + size - 1UL; 3015 struct mm_struct *mm = current->mm; 3016 uint8_t granularity = 0xff; 3017 struct interval_tree_node *node; 3018 struct svm_range_list *svms; 3019 struct svm_range *prange; 3020 uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 3021 uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 3022 uint32_t flags = 0xffffffff; 3023 int gpuidx; 3024 uint32_t i; 3025 3026 pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start, 3027 start + size - 1, nattr); 3028 3029 /* Flush pending deferred work to avoid racing with deferred actions from 3030 * previous memory map changes (e.g. munmap). Concurrent memory map changes 3031 * can still race with get_attr because we don't hold the mmap lock. But that 3032 * would be a race condition in the application anyway, and undefined 3033 * behaviour is acceptable in that case. 3034 */ 3035 flush_work(&p->svms.deferred_list_work); 3036 3037 mmap_read_lock(mm); 3038 if (!svm_range_is_valid(mm, start, size)) { 3039 pr_debug("invalid range\n"); 3040 mmap_read_unlock(mm); 3041 return -EINVAL; 3042 } 3043 mmap_read_unlock(mm); 3044 3045 for (i = 0; i < nattr; i++) { 3046 switch (attrs[i].type) { 3047 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: 3048 get_preferred_loc = true; 3049 break; 3050 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: 3051 get_prefetch_loc = true; 3052 break; 3053 case KFD_IOCTL_SVM_ATTR_ACCESS: 3054 get_accessible = true; 3055 break; 3056 case KFD_IOCTL_SVM_ATTR_SET_FLAGS: 3057 get_flags = true; 3058 break; 3059 case KFD_IOCTL_SVM_ATTR_GRANULARITY: 3060 get_granularity = true; 3061 break; 3062 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: 3063 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: 3064 case KFD_IOCTL_SVM_ATTR_NO_ACCESS: 3065 fallthrough; 3066 default: 3067 pr_debug("get invalid attr type 0x%x\n", attrs[i].type); 3068 return -EINVAL; 3069 } 3070 } 3071 3072 svms = &p->svms; 3073 3074 mutex_lock(&svms->lock); 3075 3076 node = interval_tree_iter_first(&svms->objects, start, last); 3077 if (!node) { 3078 pr_debug("range attrs not found return default values\n"); 3079 svm_range_set_default_attributes(&location, &prefetch_loc, 3080 &granularity, &flags); 3081 if (p->xnack_enabled) 3082 bitmap_copy(bitmap_access, svms->bitmap_supported, 3083 MAX_GPU_INSTANCE); 3084 else 3085 bitmap_zero(bitmap_access, MAX_GPU_INSTANCE); 3086 bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE); 3087 goto fill_values; 3088 } 3089 bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); 3090 bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE); 3091 3092 while (node) { 3093 struct interval_tree_node *next; 3094 3095 prange = container_of(node, struct svm_range, it_node); 3096 next = interval_tree_iter_next(node, start, last); 3097 3098 if (get_preferred_loc) { 3099 if (prange->preferred_loc == 3100 KFD_IOCTL_SVM_LOCATION_UNDEFINED || 3101 (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED && 3102 location != prange->preferred_loc)) { 3103 location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 3104 get_preferred_loc = false; 3105 } else { 3106 location = prange->preferred_loc; 3107 } 3108 } 3109 if (get_prefetch_loc) { 3110 if (prange->prefetch_loc == 3111 KFD_IOCTL_SVM_LOCATION_UNDEFINED || 3112 (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED && 3113 prefetch_loc != prange->prefetch_loc)) { 3114 prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; 3115 get_prefetch_loc = false; 3116 } else { 3117 prefetch_loc = prange->prefetch_loc; 3118 } 3119 } 3120 if (get_accessible) { 3121 bitmap_and(bitmap_access, bitmap_access, 3122 prange->bitmap_access, MAX_GPU_INSTANCE); 3123 bitmap_and(bitmap_aip, bitmap_aip, 3124 prange->bitmap_aip, MAX_GPU_INSTANCE); 3125 } 3126 if (get_flags) 3127 flags &= prange->flags; 3128 3129 if (get_granularity && prange->granularity < granularity) 3130 granularity = prange->granularity; 3131 3132 node = next; 3133 } 3134 fill_values: 3135 mutex_unlock(&svms->lock); 3136 3137 for (i = 0; i < nattr; i++) { 3138 switch (attrs[i].type) { 3139 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: 3140 attrs[i].value = location; 3141 break; 3142 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: 3143 attrs[i].value = prefetch_loc; 3144 break; 3145 case KFD_IOCTL_SVM_ATTR_ACCESS: 3146 gpuidx = kfd_process_gpuidx_from_gpuid(p, 3147 attrs[i].value); 3148 if (gpuidx < 0) { 3149 pr_debug("invalid gpuid %x\n", attrs[i].value); 3150 return -EINVAL; 3151 } 3152 if (test_bit(gpuidx, bitmap_access)) 3153 attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS; 3154 else if (test_bit(gpuidx, bitmap_aip)) 3155 attrs[i].type = 3156 KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE; 3157 else 3158 attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS; 3159 break; 3160 case KFD_IOCTL_SVM_ATTR_SET_FLAGS: 3161 attrs[i].value = flags; 3162 break; 3163 case KFD_IOCTL_SVM_ATTR_GRANULARITY: 3164 attrs[i].value = (uint32_t)granularity; 3165 break; 3166 } 3167 } 3168 3169 return 0; 3170 } 3171 3172 int 3173 svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, 3174 uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs) 3175 { 3176 int r; 3177 3178 start >>= PAGE_SHIFT; 3179 size >>= PAGE_SHIFT; 3180 3181 switch (op) { 3182 case KFD_IOCTL_SVM_OP_SET_ATTR: 3183 r = svm_range_set_attr(p, start, size, nattrs, attrs); 3184 break; 3185 case KFD_IOCTL_SVM_OP_GET_ATTR: 3186 r = svm_range_get_attr(p, start, size, nattrs, attrs); 3187 break; 3188 default: 3189 r = EINVAL; 3190 break; 3191 } 3192 3193 return r; 3194 } 3195