1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020-2021 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/types.h> 25 #include <linux/hmm.h> 26 #include <linux/dma-direction.h> 27 #include <linux/dma-mapping.h> 28 #include "amdgpu_sync.h" 29 #include "amdgpu_object.h" 30 #include "amdgpu_vm.h" 31 #include "amdgpu_mn.h" 32 #include "kfd_priv.h" 33 #include "kfd_svm.h" 34 #include "kfd_migrate.h" 35 36 static uint64_t 37 svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) 38 { 39 return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); 40 } 41 42 static int 43 svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, 44 dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) 45 { 46 struct amdgpu_device *adev = ring->adev; 47 struct amdgpu_job *job; 48 unsigned int num_dw, num_bytes; 49 struct dma_fence *fence; 50 uint64_t src_addr, dst_addr; 51 uint64_t pte_flags; 52 void *cpu_addr; 53 int r; 54 55 /* use gart window 0 */ 56 *gart_addr = adev->gmc.gart_start; 57 58 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); 59 num_bytes = npages * 8; 60 61 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, 62 AMDGPU_IB_POOL_DELAYED, &job); 63 if (r) 64 return r; 65 66 src_addr = num_dw * 4; 67 src_addr += job->ibs[0].gpu_addr; 68 69 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); 70 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, 71 dst_addr, num_bytes, false); 72 73 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 74 WARN_ON(job->ibs[0].length_dw > num_dw); 75 76 pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; 77 pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; 78 if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) 79 pte_flags |= AMDGPU_PTE_WRITEABLE; 80 pte_flags |= adev->gart.gart_pte_flags; 81 82 cpu_addr = &job->ibs[0].ptr[num_dw]; 83 84 r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); 85 if (r) 86 goto error_free; 87 88 r = amdgpu_job_submit(job, &adev->mman.entity, 89 AMDGPU_FENCE_OWNER_UNDEFINED, &fence); 90 if (r) 91 goto error_free; 92 93 dma_fence_put(fence); 94 95 return r; 96 97 error_free: 98 amdgpu_job_free(job); 99 return r; 100 } 101 102 /** 103 * svm_migrate_copy_memory_gart - sdma copy data between ram and vram 104 * 105 * @adev: amdgpu device the sdma ring running 106 * @src: source page address array 107 * @dst: destination page address array 108 * @npages: number of pages to copy 109 * @direction: enum MIGRATION_COPY_DIR 110 * @mfence: output, sdma fence to signal after sdma is done 111 * 112 * ram address uses GART table continuous entries mapping to ram pages, 113 * vram address uses direct mapping of vram pages, which must have npages 114 * number of continuous pages. 115 * GART update and sdma uses same buf copy function ring, sdma is splited to 116 * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for 117 * the last sdma finish fence which is returned to check copy memory is done. 118 * 119 * Context: Process context, takes and releases gtt_window_lock 120 * 121 * Return: 122 * 0 - OK, otherwise error code 123 */ 124 125 static int 126 svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, 127 uint64_t *vram, uint64_t npages, 128 enum MIGRATION_COPY_DIR direction, 129 struct dma_fence **mfence) 130 { 131 const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; 132 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 133 uint64_t gart_s, gart_d; 134 struct dma_fence *next; 135 uint64_t size; 136 int r; 137 138 mutex_lock(&adev->mman.gtt_window_lock); 139 140 while (npages) { 141 size = min(GTT_MAX_PAGES, npages); 142 143 if (direction == FROM_VRAM_TO_RAM) { 144 gart_s = svm_migrate_direct_mapping_addr(adev, *vram); 145 r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0); 146 147 } else if (direction == FROM_RAM_TO_VRAM) { 148 r = svm_migrate_gart_map(ring, size, sys, &gart_s, 149 KFD_IOCTL_SVM_FLAG_GPU_RO); 150 gart_d = svm_migrate_direct_mapping_addr(adev, *vram); 151 } 152 if (r) { 153 pr_debug("failed %d to create gart mapping\n", r); 154 goto out_unlock; 155 } 156 157 r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, 158 NULL, &next, false, true, false); 159 if (r) { 160 pr_debug("failed %d to copy memory\n", r); 161 goto out_unlock; 162 } 163 164 dma_fence_put(*mfence); 165 *mfence = next; 166 npages -= size; 167 if (npages) { 168 sys += size; 169 vram += size; 170 } 171 } 172 173 out_unlock: 174 mutex_unlock(&adev->mman.gtt_window_lock); 175 176 return r; 177 } 178 179 /** 180 * svm_migrate_copy_done - wait for memory copy sdma is done 181 * 182 * @adev: amdgpu device the sdma memory copy is executing on 183 * @mfence: migrate fence 184 * 185 * Wait for dma fence is signaled, if the copy ssplit into multiple sdma 186 * operations, this is the last sdma operation fence. 187 * 188 * Context: called after svm_migrate_copy_memory 189 * 190 * Return: 191 * 0 - success 192 * otherwise - error code from dma fence signal 193 */ 194 static int 195 svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) 196 { 197 int r = 0; 198 199 if (mfence) { 200 r = dma_fence_wait(mfence, false); 201 dma_fence_put(mfence); 202 pr_debug("sdma copy memory fence done\n"); 203 } 204 205 return r; 206 } 207 208 static uint64_t 209 svm_migrate_node_physical_addr(struct amdgpu_device *adev, 210 struct drm_mm_node **mm_node, uint64_t *offset) 211 { 212 struct drm_mm_node *node = *mm_node; 213 uint64_t pos = *offset; 214 215 if (node->start == AMDGPU_BO_INVALID_OFFSET) { 216 pr_debug("drm node is not validated\n"); 217 return 0; 218 } 219 220 pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start, 221 node->size); 222 223 if (pos >= node->size) { 224 do { 225 pos -= node->size; 226 node++; 227 } while (pos >= node->size); 228 229 *mm_node = node; 230 *offset = pos; 231 } 232 233 return (node->start + pos) << PAGE_SHIFT; 234 } 235 236 unsigned long 237 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) 238 { 239 return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT; 240 } 241 242 static void 243 svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) 244 { 245 struct page *page; 246 247 page = pfn_to_page(pfn); 248 page->zone_device_data = prange; 249 get_page(page); 250 lock_page(page); 251 } 252 253 static void 254 svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) 255 { 256 struct page *page; 257 258 page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr)); 259 unlock_page(page); 260 put_page(page); 261 } 262 263 static unsigned long 264 svm_migrate_addr(struct amdgpu_device *adev, struct page *page) 265 { 266 unsigned long addr; 267 268 addr = page_to_pfn(page) << PAGE_SHIFT; 269 return (addr - adev->kfd.dev->pgmap.range.start); 270 } 271 272 static struct page * 273 svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr) 274 { 275 struct page *page; 276 277 page = alloc_page_vma(GFP_HIGHUSER, vma, addr); 278 if (page) 279 lock_page(page); 280 281 return page; 282 } 283 284 static void svm_migrate_put_sys_page(unsigned long addr) 285 { 286 struct page *page; 287 288 page = pfn_to_page(addr >> PAGE_SHIFT); 289 unlock_page(page); 290 put_page(page); 291 } 292 293 static int 294 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, 295 struct migrate_vma *migrate, struct dma_fence **mfence, 296 dma_addr_t *scratch) 297 { 298 uint64_t npages = migrate->cpages; 299 struct device *dev = adev->dev; 300 struct drm_mm_node *node; 301 dma_addr_t *src; 302 uint64_t *dst; 303 uint64_t vram_addr; 304 uint64_t offset; 305 uint64_t i, j; 306 int r; 307 308 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, 309 prange->last); 310 311 src = scratch; 312 dst = (uint64_t *)(scratch + npages); 313 314 r = svm_range_vram_node_new(adev, prange, true); 315 if (r) { 316 pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); 317 goto out; 318 } 319 320 node = prange->ttm_res->mm_node; 321 offset = prange->offset; 322 vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset); 323 if (!vram_addr) { 324 WARN_ONCE(1, "vram node address is 0\n"); 325 r = -ENOMEM; 326 goto out; 327 } 328 329 for (i = j = 0; i < npages; i++) { 330 struct page *spage; 331 332 dst[i] = vram_addr + (j << PAGE_SHIFT); 333 migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); 334 svm_migrate_get_vram_page(prange, migrate->dst[i]); 335 336 migrate->dst[i] = migrate_pfn(migrate->dst[i]); 337 migrate->dst[i] |= MIGRATE_PFN_LOCKED; 338 339 if (migrate->src[i] & MIGRATE_PFN_VALID) { 340 spage = migrate_pfn_to_page(migrate->src[i]); 341 src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, 342 DMA_TO_DEVICE); 343 r = dma_mapping_error(dev, src[i]); 344 if (r) { 345 pr_debug("failed %d dma_map_page\n", r); 346 goto out_free_vram_pages; 347 } 348 } else { 349 if (j) { 350 r = svm_migrate_copy_memory_gart( 351 adev, src + i - j, 352 dst + i - j, j, 353 FROM_RAM_TO_VRAM, 354 mfence); 355 if (r) 356 goto out_free_vram_pages; 357 offset += j; 358 vram_addr = (node->start + offset) << PAGE_SHIFT; 359 j = 0; 360 } else { 361 offset++; 362 vram_addr += PAGE_SIZE; 363 } 364 if (offset >= node->size) { 365 node++; 366 pr_debug("next node size 0x%llx\n", node->size); 367 vram_addr = node->start << PAGE_SHIFT; 368 offset = 0; 369 } 370 continue; 371 } 372 373 pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", 374 src[i] >> PAGE_SHIFT, page_to_pfn(spage)); 375 376 if (j + offset >= node->size - 1 && i < npages - 1) { 377 r = svm_migrate_copy_memory_gart(adev, src + i - j, 378 dst + i - j, j + 1, 379 FROM_RAM_TO_VRAM, 380 mfence); 381 if (r) 382 goto out_free_vram_pages; 383 384 node++; 385 pr_debug("next node size 0x%llx\n", node->size); 386 vram_addr = node->start << PAGE_SHIFT; 387 offset = 0; 388 j = 0; 389 } else { 390 j++; 391 } 392 } 393 394 r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j, 395 FROM_RAM_TO_VRAM, mfence); 396 397 out_free_vram_pages: 398 if (r) { 399 pr_debug("failed %d to copy memory to vram\n", r); 400 while (i--) { 401 svm_migrate_put_vram_page(adev, dst[i]); 402 migrate->dst[i] = 0; 403 } 404 } 405 406 out: 407 return r; 408 } 409 410 static int 411 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, 412 struct vm_area_struct *vma, uint64_t start, 413 uint64_t end) 414 { 415 uint64_t npages = (end - start) >> PAGE_SHIFT; 416 struct dma_fence *mfence = NULL; 417 struct migrate_vma migrate; 418 dma_addr_t *scratch; 419 size_t size; 420 void *buf; 421 int r = -ENOMEM; 422 int retry = 0; 423 424 memset(&migrate, 0, sizeof(migrate)); 425 migrate.vma = vma; 426 migrate.start = start; 427 migrate.end = end; 428 migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; 429 migrate.pgmap_owner = adev; 430 431 size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); 432 size *= npages; 433 buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); 434 if (!buf) 435 goto out; 436 437 migrate.src = buf; 438 migrate.dst = migrate.src + npages; 439 scratch = (dma_addr_t *)(migrate.dst + npages); 440 441 retry: 442 r = migrate_vma_setup(&migrate); 443 if (r) { 444 pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", 445 r, prange->svms, prange->start, prange->last); 446 goto out_free; 447 } 448 if (migrate.cpages != npages) { 449 pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, 450 npages); 451 migrate_vma_finalize(&migrate); 452 if (retry++ >= 3) { 453 r = -ENOMEM; 454 pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", 455 r, prange->svms, prange->start, prange->last); 456 goto out_free; 457 } 458 459 goto retry; 460 } 461 462 if (migrate.cpages) { 463 svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, 464 scratch); 465 migrate_vma_pages(&migrate); 466 svm_migrate_copy_done(adev, mfence); 467 migrate_vma_finalize(&migrate); 468 } 469 470 svm_range_dma_unmap(adev->dev, scratch, 0, npages); 471 svm_range_free_dma_mappings(prange); 472 473 out_free: 474 kvfree(buf); 475 out: 476 return r; 477 } 478 479 /** 480 * svm_migrate_ram_to_vram - migrate svm range from system to device 481 * @prange: range structure 482 * @best_loc: the device to migrate to 483 * @mm: the process mm structure 484 * 485 * Context: Process context, caller hold mmap read lock, svms lock, prange lock 486 * 487 * Return: 488 * 0 - OK, otherwise error code 489 */ 490 static int 491 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, 492 struct mm_struct *mm) 493 { 494 unsigned long addr, start, end; 495 struct vm_area_struct *vma; 496 struct amdgpu_device *adev; 497 int r = 0; 498 499 if (prange->actual_loc == best_loc) { 500 pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", 501 prange->svms, prange->start, prange->last, best_loc); 502 return 0; 503 } 504 505 adev = svm_range_get_adev_by_id(prange, best_loc); 506 if (!adev) { 507 pr_debug("failed to get device by id 0x%x\n", best_loc); 508 return -ENODEV; 509 } 510 511 pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, 512 prange->start, prange->last, best_loc); 513 514 /* FIXME: workaround for page locking bug with invalid pages */ 515 svm_range_prefault(prange, mm); 516 517 start = prange->start << PAGE_SHIFT; 518 end = (prange->last + 1) << PAGE_SHIFT; 519 520 for (addr = start; addr < end;) { 521 unsigned long next; 522 523 vma = find_vma(mm, addr); 524 if (!vma || addr < vma->vm_start) 525 break; 526 527 next = min(vma->vm_end, end); 528 r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); 529 if (r) { 530 pr_debug("failed to migrate\n"); 531 break; 532 } 533 addr = next; 534 } 535 536 if (!r) 537 prange->actual_loc = best_loc; 538 539 return r; 540 } 541 542 static void svm_migrate_page_free(struct page *page) 543 { 544 /* Keep this function to avoid warning */ 545 } 546 547 static int 548 svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, 549 struct migrate_vma *migrate, struct dma_fence **mfence, 550 dma_addr_t *scratch) 551 { 552 uint64_t npages = migrate->cpages; 553 struct device *dev = adev->dev; 554 uint64_t *src; 555 dma_addr_t *dst; 556 struct page *dpage; 557 uint64_t i = 0, j; 558 uint64_t addr; 559 int r = 0; 560 561 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, 562 prange->last); 563 564 addr = prange->start << PAGE_SHIFT; 565 566 src = (uint64_t *)(scratch + npages); 567 dst = scratch; 568 569 for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { 570 struct page *spage; 571 572 spage = migrate_pfn_to_page(migrate->src[i]); 573 if (!spage) { 574 pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", 575 prange->svms, prange->start, prange->last); 576 r = -ENOMEM; 577 goto out_oom; 578 } 579 src[i] = svm_migrate_addr(adev, spage); 580 if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { 581 r = svm_migrate_copy_memory_gart(adev, dst + i - j, 582 src + i - j, j, 583 FROM_VRAM_TO_RAM, 584 mfence); 585 if (r) 586 goto out_oom; 587 j = 0; 588 } 589 590 dpage = svm_migrate_get_sys_page(migrate->vma, addr); 591 if (!dpage) { 592 pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n", 593 prange->svms, prange->start, prange->last); 594 r = -ENOMEM; 595 goto out_oom; 596 } 597 598 dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); 599 r = dma_mapping_error(dev, dst[i]); 600 if (r) { 601 pr_debug("failed %d dma_map_page\n", r); 602 goto out_oom; 603 } 604 605 pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", 606 dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); 607 608 migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); 609 migrate->dst[i] |= MIGRATE_PFN_LOCKED; 610 } 611 612 r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, 613 FROM_VRAM_TO_RAM, mfence); 614 615 out_oom: 616 if (r) { 617 pr_debug("failed %d copy to ram\n", r); 618 while (i--) { 619 svm_migrate_put_sys_page(dst[i]); 620 migrate->dst[i] = 0; 621 } 622 } 623 624 return r; 625 } 626 627 static int 628 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, 629 struct vm_area_struct *vma, uint64_t start, uint64_t end) 630 { 631 uint64_t npages = (end - start) >> PAGE_SHIFT; 632 struct dma_fence *mfence = NULL; 633 struct migrate_vma migrate; 634 dma_addr_t *scratch; 635 size_t size; 636 void *buf; 637 int r = -ENOMEM; 638 639 memset(&migrate, 0, sizeof(migrate)); 640 migrate.vma = vma; 641 migrate.start = start; 642 migrate.end = end; 643 migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 644 migrate.pgmap_owner = adev; 645 646 size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); 647 size *= npages; 648 buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); 649 if (!buf) 650 goto out; 651 652 migrate.src = buf; 653 migrate.dst = migrate.src + npages; 654 scratch = (dma_addr_t *)(migrate.dst + npages); 655 656 r = migrate_vma_setup(&migrate); 657 if (r) { 658 pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", 659 r, prange->svms, prange->start, prange->last); 660 goto out_free; 661 } 662 663 pr_debug("cpages %ld\n", migrate.cpages); 664 665 if (migrate.cpages) { 666 svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, 667 scratch); 668 migrate_vma_pages(&migrate); 669 svm_migrate_copy_done(adev, mfence); 670 migrate_vma_finalize(&migrate); 671 } else { 672 pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", 673 prange->start, prange->last); 674 } 675 676 svm_range_dma_unmap(adev->dev, scratch, 0, npages); 677 678 out_free: 679 kvfree(buf); 680 out: 681 return r; 682 } 683 684 /** 685 * svm_migrate_vram_to_ram - migrate svm range from device to system 686 * @prange: range structure 687 * @mm: process mm, use current->mm if NULL 688 * 689 * Context: Process context, caller hold mmap read lock, svms lock, prange lock 690 * 691 * Return: 692 * 0 - OK, otherwise error code 693 */ 694 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) 695 { 696 struct amdgpu_device *adev; 697 struct vm_area_struct *vma; 698 unsigned long addr; 699 unsigned long start; 700 unsigned long end; 701 int r = 0; 702 703 if (!prange->actual_loc) { 704 pr_debug("[0x%lx 0x%lx] already migrated to ram\n", 705 prange->start, prange->last); 706 return 0; 707 } 708 709 adev = svm_range_get_adev_by_id(prange, prange->actual_loc); 710 if (!adev) { 711 pr_debug("failed to get device by id 0x%x\n", 712 prange->actual_loc); 713 return -ENODEV; 714 } 715 716 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", 717 prange->svms, prange, prange->start, prange->last, 718 prange->actual_loc); 719 720 start = prange->start << PAGE_SHIFT; 721 end = (prange->last + 1) << PAGE_SHIFT; 722 723 for (addr = start; addr < end;) { 724 unsigned long next; 725 726 vma = find_vma(mm, addr); 727 if (!vma || addr < vma->vm_start) 728 break; 729 730 next = min(vma->vm_end, end); 731 r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); 732 if (r) { 733 pr_debug("failed %d to migrate\n", r); 734 break; 735 } 736 addr = next; 737 } 738 739 if (!r) { 740 svm_range_vram_node_free(prange); 741 prange->actual_loc = 0; 742 } 743 return r; 744 } 745 746 /** 747 * svm_migrate_vram_to_vram - migrate svm range from device to device 748 * @prange: range structure 749 * @best_loc: the device to migrate to 750 * @mm: process mm, use current->mm if NULL 751 * 752 * Context: Process context, caller hold mmap read lock, svms lock, prange lock 753 * 754 * Return: 755 * 0 - OK, otherwise error code 756 */ 757 static int 758 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, 759 struct mm_struct *mm) 760 { 761 int r; 762 763 /* 764 * TODO: for both devices with PCIe large bar or on same xgmi hive, skip 765 * system memory as migration bridge 766 */ 767 768 pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); 769 770 r = svm_migrate_vram_to_ram(prange, mm); 771 if (r) 772 return r; 773 774 return svm_migrate_ram_to_vram(prange, best_loc, mm); 775 } 776 777 int 778 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, 779 struct mm_struct *mm) 780 { 781 if (!prange->actual_loc) 782 return svm_migrate_ram_to_vram(prange, best_loc, mm); 783 else 784 return svm_migrate_vram_to_vram(prange, best_loc, mm); 785 786 } 787 788 /** 789 * svm_migrate_to_ram - CPU page fault handler 790 * @vmf: CPU vm fault vma, address 791 * 792 * Context: vm fault handler, caller holds the mmap read lock 793 * 794 * Return: 795 * 0 - OK 796 * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault 797 */ 798 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) 799 { 800 unsigned long addr = vmf->address; 801 struct vm_area_struct *vma; 802 enum svm_work_list_ops op; 803 struct svm_range *parent; 804 struct svm_range *prange; 805 struct kfd_process *p; 806 struct mm_struct *mm; 807 int r = 0; 808 809 vma = vmf->vma; 810 mm = vma->vm_mm; 811 812 p = kfd_lookup_process_by_mm(vma->vm_mm); 813 if (!p) { 814 pr_debug("failed find process at fault address 0x%lx\n", addr); 815 return VM_FAULT_SIGBUS; 816 } 817 addr >>= PAGE_SHIFT; 818 pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); 819 820 mutex_lock(&p->svms.lock); 821 822 prange = svm_range_from_addr(&p->svms, addr, &parent); 823 if (!prange) { 824 pr_debug("cannot find svm range at 0x%lx\n", addr); 825 r = -EFAULT; 826 goto out; 827 } 828 829 mutex_lock(&parent->migrate_mutex); 830 if (prange != parent) 831 mutex_lock_nested(&prange->migrate_mutex, 1); 832 833 if (!prange->actual_loc) 834 goto out_unlock_prange; 835 836 svm_range_lock(parent); 837 if (prange != parent) 838 mutex_lock_nested(&prange->lock, 1); 839 r = svm_range_split_by_granularity(p, mm, addr, parent, prange); 840 if (prange != parent) 841 mutex_unlock(&prange->lock); 842 svm_range_unlock(parent); 843 if (r) { 844 pr_debug("failed %d to split range by granularity\n", r); 845 goto out_unlock_prange; 846 } 847 848 r = svm_migrate_vram_to_ram(prange, mm); 849 if (r) 850 pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, 851 prange, prange->start, prange->last); 852 853 /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ 854 if (p->xnack_enabled && parent == prange) 855 op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; 856 else 857 op = SVM_OP_UPDATE_RANGE_NOTIFIER; 858 svm_range_add_list_work(&p->svms, parent, mm, op); 859 schedule_deferred_list_work(&p->svms); 860 861 out_unlock_prange: 862 if (prange != parent) 863 mutex_unlock(&prange->migrate_mutex); 864 mutex_unlock(&parent->migrate_mutex); 865 out: 866 mutex_unlock(&p->svms.lock); 867 kfd_unref_process(p); 868 869 pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); 870 871 return r ? VM_FAULT_SIGBUS : 0; 872 } 873 874 static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { 875 .page_free = svm_migrate_page_free, 876 .migrate_to_ram = svm_migrate_to_ram, 877 }; 878 879 /* Each VRAM page uses sizeof(struct page) on system memory */ 880 #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page)) 881 882 int svm_migrate_init(struct amdgpu_device *adev) 883 { 884 struct kfd_dev *kfddev = adev->kfd.dev; 885 struct dev_pagemap *pgmap; 886 struct resource *res; 887 unsigned long size; 888 void *r; 889 890 /* Page migration works on Vega10 or newer */ 891 if (kfddev->device_info->asic_family < CHIP_VEGA10) 892 return -EINVAL; 893 894 pgmap = &kfddev->pgmap; 895 memset(pgmap, 0, sizeof(*pgmap)); 896 897 /* TODO: register all vram to HMM for now. 898 * should remove reserved size 899 */ 900 size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); 901 res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); 902 if (IS_ERR(res)) 903 return -ENOMEM; 904 905 pgmap->type = MEMORY_DEVICE_PRIVATE; 906 pgmap->nr_range = 1; 907 pgmap->range.start = res->start; 908 pgmap->range.end = res->end; 909 pgmap->ops = &svm_migrate_pgmap_ops; 910 pgmap->owner = adev; 911 pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 912 r = devm_memremap_pages(adev->dev, pgmap); 913 if (IS_ERR(r)) { 914 pr_err("failed to register HMM device memory\n"); 915 devm_release_mem_region(adev->dev, res->start, 916 res->end - res->start + 1); 917 return PTR_ERR(r); 918 } 919 920 pr_debug("reserve %ldMB system memory for VRAM pages struct\n", 921 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20); 922 923 amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size)); 924 925 pr_info("HMM registered %ldMB device memory\n", size >> 20); 926 927 return 0; 928 } 929 930 void svm_migrate_fini(struct amdgpu_device *adev) 931 { 932 struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap; 933 934 devm_memunmap_pages(adev->dev, pgmap); 935 devm_release_mem_region(adev->dev, pgmap->range.start, 936 pgmap->range.end - pgmap->range.start + 1); 937 } 938