1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Device Memory Migration functionality. 4 * 5 * Originally written by Jérôme Glisse. 6 */ 7 #include <linux/export.h> 8 #include <linux/memremap.h> 9 #include <linux/migrate.h> 10 #include <linux/mm_inline.h> 11 #include <linux/mmu_notifier.h> 12 #include <linux/oom.h> 13 #include <linux/pagewalk.h> 14 #include <linux/rmap.h> 15 #include <linux/swapops.h> 16 #include <asm/tlbflush.h> 17 #include "internal.h" 18 19 static int migrate_vma_collect_skip(unsigned long start, 20 unsigned long end, 21 struct mm_walk *walk) 22 { 23 struct migrate_vma *migrate = walk->private; 24 unsigned long addr; 25 26 for (addr = start; addr < end; addr += PAGE_SIZE) { 27 migrate->dst[migrate->npages] = 0; 28 migrate->src[migrate->npages++] = 0; 29 } 30 31 return 0; 32 } 33 34 static int migrate_vma_collect_hole(unsigned long start, 35 unsigned long end, 36 __always_unused int depth, 37 struct mm_walk *walk) 38 { 39 struct migrate_vma *migrate = walk->private; 40 unsigned long addr; 41 42 /* Only allow populating anonymous memory. */ 43 if (!vma_is_anonymous(walk->vma)) 44 return migrate_vma_collect_skip(start, end, walk); 45 46 for (addr = start; addr < end; addr += PAGE_SIZE) { 47 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; 48 migrate->dst[migrate->npages] = 0; 49 migrate->npages++; 50 migrate->cpages++; 51 } 52 53 return 0; 54 } 55 56 static int migrate_vma_collect_pmd(pmd_t *pmdp, 57 unsigned long start, 58 unsigned long end, 59 struct mm_walk *walk) 60 { 61 struct migrate_vma *migrate = walk->private; 62 struct vm_area_struct *vma = walk->vma; 63 struct mm_struct *mm = vma->vm_mm; 64 unsigned long addr = start, unmapped = 0; 65 spinlock_t *ptl; 66 pte_t *ptep; 67 68 again: 69 if (pmd_none(*pmdp)) 70 return migrate_vma_collect_hole(start, end, -1, walk); 71 72 if (pmd_trans_huge(*pmdp)) { 73 struct page *page; 74 75 ptl = pmd_lock(mm, pmdp); 76 if (unlikely(!pmd_trans_huge(*pmdp))) { 77 spin_unlock(ptl); 78 goto again; 79 } 80 81 page = pmd_page(*pmdp); 82 if (is_huge_zero_page(page)) { 83 spin_unlock(ptl); 84 split_huge_pmd(vma, pmdp, addr); 85 if (pmd_trans_unstable(pmdp)) 86 return migrate_vma_collect_skip(start, end, 87 walk); 88 } else { 89 int ret; 90 91 get_page(page); 92 spin_unlock(ptl); 93 if (unlikely(!trylock_page(page))) 94 return migrate_vma_collect_skip(start, end, 95 walk); 96 ret = split_huge_page(page); 97 unlock_page(page); 98 put_page(page); 99 if (ret) 100 return migrate_vma_collect_skip(start, end, 101 walk); 102 if (pmd_none(*pmdp)) 103 return migrate_vma_collect_hole(start, end, -1, 104 walk); 105 } 106 } 107 108 if (unlikely(pmd_bad(*pmdp))) 109 return migrate_vma_collect_skip(start, end, walk); 110 111 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 112 arch_enter_lazy_mmu_mode(); 113 114 for (; addr < end; addr += PAGE_SIZE, ptep++) { 115 unsigned long mpfn = 0, pfn; 116 struct page *page; 117 swp_entry_t entry; 118 pte_t pte; 119 120 pte = *ptep; 121 122 if (pte_none(pte)) { 123 if (vma_is_anonymous(vma)) { 124 mpfn = MIGRATE_PFN_MIGRATE; 125 migrate->cpages++; 126 } 127 goto next; 128 } 129 130 if (!pte_present(pte)) { 131 /* 132 * Only care about unaddressable device page special 133 * page table entry. Other special swap entries are not 134 * migratable, and we ignore regular swapped page. 135 */ 136 entry = pte_to_swp_entry(pte); 137 if (!is_device_private_entry(entry)) 138 goto next; 139 140 page = pfn_swap_entry_to_page(entry); 141 if (!(migrate->flags & 142 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || 143 page->pgmap->owner != migrate->pgmap_owner) 144 goto next; 145 146 mpfn = migrate_pfn(page_to_pfn(page)) | 147 MIGRATE_PFN_MIGRATE; 148 if (is_writable_device_private_entry(entry)) 149 mpfn |= MIGRATE_PFN_WRITE; 150 } else { 151 pfn = pte_pfn(pte); 152 if (is_zero_pfn(pfn) && 153 (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) { 154 mpfn = MIGRATE_PFN_MIGRATE; 155 migrate->cpages++; 156 goto next; 157 } 158 page = vm_normal_page(migrate->vma, addr, pte); 159 if (page && !is_zone_device_page(page) && 160 !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) 161 goto next; 162 else if (page && is_device_coherent_page(page) && 163 (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) || 164 page->pgmap->owner != migrate->pgmap_owner)) 165 goto next; 166 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; 167 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; 168 } 169 170 /* FIXME support THP */ 171 if (!page || !page->mapping || PageTransCompound(page)) { 172 mpfn = 0; 173 goto next; 174 } 175 176 /* 177 * By getting a reference on the page we pin it and that blocks 178 * any kind of migration. Side effect is that it "freezes" the 179 * pte. 180 * 181 * We drop this reference after isolating the page from the lru 182 * for non device page (device page are not on the lru and thus 183 * can't be dropped from it). 184 */ 185 get_page(page); 186 187 /* 188 * Optimize for the common case where page is only mapped once 189 * in one process. If we can lock the page, then we can safely 190 * set up a special migration page table entry now. 191 */ 192 if (trylock_page(page)) { 193 bool anon_exclusive; 194 pte_t swp_pte; 195 196 anon_exclusive = PageAnon(page) && PageAnonExclusive(page); 197 if (anon_exclusive) { 198 flush_cache_page(vma, addr, pte_pfn(*ptep)); 199 ptep_clear_flush(vma, addr, ptep); 200 201 if (page_try_share_anon_rmap(page)) { 202 set_pte_at(mm, addr, ptep, pte); 203 unlock_page(page); 204 put_page(page); 205 mpfn = 0; 206 goto next; 207 } 208 } else { 209 ptep_get_and_clear(mm, addr, ptep); 210 } 211 212 migrate->cpages++; 213 214 /* Setup special migration page table entry */ 215 if (mpfn & MIGRATE_PFN_WRITE) 216 entry = make_writable_migration_entry( 217 page_to_pfn(page)); 218 else if (anon_exclusive) 219 entry = make_readable_exclusive_migration_entry( 220 page_to_pfn(page)); 221 else 222 entry = make_readable_migration_entry( 223 page_to_pfn(page)); 224 swp_pte = swp_entry_to_pte(entry); 225 if (pte_present(pte)) { 226 if (pte_soft_dirty(pte)) 227 swp_pte = pte_swp_mksoft_dirty(swp_pte); 228 if (pte_uffd_wp(pte)) 229 swp_pte = pte_swp_mkuffd_wp(swp_pte); 230 } else { 231 if (pte_swp_soft_dirty(pte)) 232 swp_pte = pte_swp_mksoft_dirty(swp_pte); 233 if (pte_swp_uffd_wp(pte)) 234 swp_pte = pte_swp_mkuffd_wp(swp_pte); 235 } 236 set_pte_at(mm, addr, ptep, swp_pte); 237 238 /* 239 * This is like regular unmap: we remove the rmap and 240 * drop page refcount. Page won't be freed, as we took 241 * a reference just above. 242 */ 243 page_remove_rmap(page, vma, false); 244 put_page(page); 245 246 if (pte_present(pte)) 247 unmapped++; 248 } else { 249 put_page(page); 250 mpfn = 0; 251 } 252 253 next: 254 migrate->dst[migrate->npages] = 0; 255 migrate->src[migrate->npages++] = mpfn; 256 } 257 arch_leave_lazy_mmu_mode(); 258 pte_unmap_unlock(ptep - 1, ptl); 259 260 /* Only flush the TLB if we actually modified any entries */ 261 if (unmapped) 262 flush_tlb_range(walk->vma, start, end); 263 264 return 0; 265 } 266 267 static const struct mm_walk_ops migrate_vma_walk_ops = { 268 .pmd_entry = migrate_vma_collect_pmd, 269 .pte_hole = migrate_vma_collect_hole, 270 }; 271 272 /* 273 * migrate_vma_collect() - collect pages over a range of virtual addresses 274 * @migrate: migrate struct containing all migration information 275 * 276 * This will walk the CPU page table. For each virtual address backed by a 277 * valid page, it updates the src array and takes a reference on the page, in 278 * order to pin the page until we lock it and unmap it. 279 */ 280 static void migrate_vma_collect(struct migrate_vma *migrate) 281 { 282 struct mmu_notifier_range range; 283 284 /* 285 * Note that the pgmap_owner is passed to the mmu notifier callback so 286 * that the registered device driver can skip invalidating device 287 * private page mappings that won't be migrated. 288 */ 289 mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0, 290 migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end, 291 migrate->pgmap_owner); 292 mmu_notifier_invalidate_range_start(&range); 293 294 walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end, 295 &migrate_vma_walk_ops, migrate); 296 297 mmu_notifier_invalidate_range_end(&range); 298 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); 299 } 300 301 /* 302 * migrate_vma_check_page() - check if page is pinned or not 303 * @page: struct page to check 304 * 305 * Pinned pages cannot be migrated. This is the same test as in 306 * folio_migrate_mapping(), except that here we allow migration of a 307 * ZONE_DEVICE page. 308 */ 309 static bool migrate_vma_check_page(struct page *page) 310 { 311 /* 312 * One extra ref because caller holds an extra reference, either from 313 * isolate_lru_page() for a regular page, or migrate_vma_collect() for 314 * a device page. 315 */ 316 int extra = 1; 317 318 /* 319 * FIXME support THP (transparent huge page), it is bit more complex to 320 * check them than regular pages, because they can be mapped with a pmd 321 * or with a pte (split pte mapping). 322 */ 323 if (PageCompound(page)) 324 return false; 325 326 /* Page from ZONE_DEVICE have one extra reference */ 327 if (is_zone_device_page(page)) 328 extra++; 329 330 /* For file back page */ 331 if (page_mapping(page)) 332 extra += 1 + page_has_private(page); 333 334 if ((page_count(page) - extra) > page_mapcount(page)) 335 return false; 336 337 return true; 338 } 339 340 /* 341 * migrate_vma_unmap() - replace page mapping with special migration pte entry 342 * @migrate: migrate struct containing all migration information 343 * 344 * Isolate pages from the LRU and replace mappings (CPU page table pte) with a 345 * special migration pte entry and check if it has been pinned. Pinned pages are 346 * restored because we cannot migrate them. 347 * 348 * This is the last step before we call the device driver callback to allocate 349 * destination memory and copy contents of original page over to new page. 350 */ 351 static void migrate_vma_unmap(struct migrate_vma *migrate) 352 { 353 const unsigned long npages = migrate->npages; 354 unsigned long i, restore = 0; 355 bool allow_drain = true; 356 357 lru_add_drain(); 358 359 for (i = 0; i < npages; i++) { 360 struct page *page = migrate_pfn_to_page(migrate->src[i]); 361 struct folio *folio; 362 363 if (!page) 364 continue; 365 366 /* ZONE_DEVICE pages are not on LRU */ 367 if (!is_zone_device_page(page)) { 368 if (!PageLRU(page) && allow_drain) { 369 /* Drain CPU's pagevec */ 370 lru_add_drain_all(); 371 allow_drain = false; 372 } 373 374 if (isolate_lru_page(page)) { 375 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 376 migrate->cpages--; 377 restore++; 378 continue; 379 } 380 381 /* Drop the reference we took in collect */ 382 put_page(page); 383 } 384 385 folio = page_folio(page); 386 if (folio_mapped(folio)) 387 try_to_migrate(folio, 0); 388 389 if (page_mapped(page) || !migrate_vma_check_page(page)) { 390 if (!is_zone_device_page(page)) { 391 get_page(page); 392 putback_lru_page(page); 393 } 394 395 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 396 migrate->cpages--; 397 restore++; 398 continue; 399 } 400 } 401 402 for (i = 0; i < npages && restore; i++) { 403 struct page *page = migrate_pfn_to_page(migrate->src[i]); 404 struct folio *folio; 405 406 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) 407 continue; 408 409 folio = page_folio(page); 410 remove_migration_ptes(folio, folio, false); 411 412 migrate->src[i] = 0; 413 folio_unlock(folio); 414 folio_put(folio); 415 restore--; 416 } 417 } 418 419 /** 420 * migrate_vma_setup() - prepare to migrate a range of memory 421 * @args: contains the vma, start, and pfns arrays for the migration 422 * 423 * Returns: negative errno on failures, 0 when 0 or more pages were migrated 424 * without an error. 425 * 426 * Prepare to migrate a range of memory virtual address range by collecting all 427 * the pages backing each virtual address in the range, saving them inside the 428 * src array. Then lock those pages and unmap them. Once the pages are locked 429 * and unmapped, check whether each page is pinned or not. Pages that aren't 430 * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the 431 * corresponding src array entry. Then restores any pages that are pinned, by 432 * remapping and unlocking those pages. 433 * 434 * The caller should then allocate destination memory and copy source memory to 435 * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE 436 * flag set). Once these are allocated and copied, the caller must update each 437 * corresponding entry in the dst array with the pfn value of the destination 438 * page and with MIGRATE_PFN_VALID. Destination pages must be locked via 439 * lock_page(). 440 * 441 * Note that the caller does not have to migrate all the pages that are marked 442 * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from 443 * device memory to system memory. If the caller cannot migrate a device page 444 * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe 445 * consequences for the userspace process, so it must be avoided if at all 446 * possible. 447 * 448 * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we 449 * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus 450 * allowing the caller to allocate device memory for those unbacked virtual 451 * addresses. For this the caller simply has to allocate device memory and 452 * properly set the destination entry like for regular migration. Note that 453 * this can still fail, and thus inside the device driver you must check if the 454 * migration was successful for those entries after calling migrate_vma_pages(), 455 * just like for regular migration. 456 * 457 * After that, the callers must call migrate_vma_pages() to go over each entry 458 * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag 459 * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, 460 * then migrate_vma_pages() to migrate struct page information from the source 461 * struct page to the destination struct page. If it fails to migrate the 462 * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the 463 * src array. 464 * 465 * At this point all successfully migrated pages have an entry in the src 466 * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst 467 * array entry with MIGRATE_PFN_VALID flag set. 468 * 469 * Once migrate_vma_pages() returns the caller may inspect which pages were 470 * successfully migrated, and which were not. Successfully migrated pages will 471 * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. 472 * 473 * It is safe to update device page table after migrate_vma_pages() because 474 * both destination and source page are still locked, and the mmap_lock is held 475 * in read mode (hence no one can unmap the range being migrated). 476 * 477 * Once the caller is done cleaning up things and updating its page table (if it 478 * chose to do so, this is not an obligation) it finally calls 479 * migrate_vma_finalize() to update the CPU page table to point to new pages 480 * for successfully migrated pages or otherwise restore the CPU page table to 481 * point to the original source pages. 482 */ 483 int migrate_vma_setup(struct migrate_vma *args) 484 { 485 long nr_pages = (args->end - args->start) >> PAGE_SHIFT; 486 487 args->start &= PAGE_MASK; 488 args->end &= PAGE_MASK; 489 if (!args->vma || is_vm_hugetlb_page(args->vma) || 490 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma)) 491 return -EINVAL; 492 if (nr_pages <= 0) 493 return -EINVAL; 494 if (args->start < args->vma->vm_start || 495 args->start >= args->vma->vm_end) 496 return -EINVAL; 497 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end) 498 return -EINVAL; 499 if (!args->src || !args->dst) 500 return -EINVAL; 501 502 memset(args->src, 0, sizeof(*args->src) * nr_pages); 503 args->cpages = 0; 504 args->npages = 0; 505 506 migrate_vma_collect(args); 507 508 if (args->cpages) 509 migrate_vma_unmap(args); 510 511 /* 512 * At this point pages are locked and unmapped, and thus they have 513 * stable content and can safely be copied to destination memory that 514 * is allocated by the drivers. 515 */ 516 return 0; 517 518 } 519 EXPORT_SYMBOL(migrate_vma_setup); 520 521 /* 522 * This code closely matches the code in: 523 * __handle_mm_fault() 524 * handle_pte_fault() 525 * do_anonymous_page() 526 * to map in an anonymous zero page but the struct page will be a ZONE_DEVICE 527 * private or coherent page. 528 */ 529 static void migrate_vma_insert_page(struct migrate_vma *migrate, 530 unsigned long addr, 531 struct page *page, 532 unsigned long *src) 533 { 534 struct vm_area_struct *vma = migrate->vma; 535 struct mm_struct *mm = vma->vm_mm; 536 bool flush = false; 537 spinlock_t *ptl; 538 pte_t entry; 539 pgd_t *pgdp; 540 p4d_t *p4dp; 541 pud_t *pudp; 542 pmd_t *pmdp; 543 pte_t *ptep; 544 545 /* Only allow populating anonymous memory */ 546 if (!vma_is_anonymous(vma)) 547 goto abort; 548 549 pgdp = pgd_offset(mm, addr); 550 p4dp = p4d_alloc(mm, pgdp, addr); 551 if (!p4dp) 552 goto abort; 553 pudp = pud_alloc(mm, p4dp, addr); 554 if (!pudp) 555 goto abort; 556 pmdp = pmd_alloc(mm, pudp, addr); 557 if (!pmdp) 558 goto abort; 559 560 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp)) 561 goto abort; 562 563 /* 564 * Use pte_alloc() instead of pte_alloc_map(). We can't run 565 * pte_offset_map() on pmds where a huge pmd might be created 566 * from a different thread. 567 * 568 * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when 569 * parallel threads are excluded by other means. 570 * 571 * Here we only have mmap_read_lock(mm). 572 */ 573 if (pte_alloc(mm, pmdp)) 574 goto abort; 575 576 /* See the comment in pte_alloc_one_map() */ 577 if (unlikely(pmd_trans_unstable(pmdp))) 578 goto abort; 579 580 if (unlikely(anon_vma_prepare(vma))) 581 goto abort; 582 if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) 583 goto abort; 584 585 /* 586 * The memory barrier inside __SetPageUptodate makes sure that 587 * preceding stores to the page contents become visible before 588 * the set_pte_at() write. 589 */ 590 __SetPageUptodate(page); 591 592 if (is_device_private_page(page)) { 593 swp_entry_t swp_entry; 594 595 if (vma->vm_flags & VM_WRITE) 596 swp_entry = make_writable_device_private_entry( 597 page_to_pfn(page)); 598 else 599 swp_entry = make_readable_device_private_entry( 600 page_to_pfn(page)); 601 entry = swp_entry_to_pte(swp_entry); 602 } else { 603 if (is_zone_device_page(page) && 604 !is_device_coherent_page(page)) { 605 pr_warn_once("Unsupported ZONE_DEVICE page type.\n"); 606 goto abort; 607 } 608 entry = mk_pte(page, vma->vm_page_prot); 609 if (vma->vm_flags & VM_WRITE) 610 entry = pte_mkwrite(pte_mkdirty(entry)); 611 } 612 613 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 614 615 if (check_stable_address_space(mm)) 616 goto unlock_abort; 617 618 if (pte_present(*ptep)) { 619 unsigned long pfn = pte_pfn(*ptep); 620 621 if (!is_zero_pfn(pfn)) 622 goto unlock_abort; 623 flush = true; 624 } else if (!pte_none(*ptep)) 625 goto unlock_abort; 626 627 /* 628 * Check for userfaultfd but do not deliver the fault. Instead, 629 * just back off. 630 */ 631 if (userfaultfd_missing(vma)) 632 goto unlock_abort; 633 634 inc_mm_counter(mm, MM_ANONPAGES); 635 page_add_new_anon_rmap(page, vma, addr); 636 if (!is_zone_device_page(page)) 637 lru_cache_add_inactive_or_unevictable(page, vma); 638 get_page(page); 639 640 if (flush) { 641 flush_cache_page(vma, addr, pte_pfn(*ptep)); 642 ptep_clear_flush_notify(vma, addr, ptep); 643 set_pte_at_notify(mm, addr, ptep, entry); 644 update_mmu_cache(vma, addr, ptep); 645 } else { 646 /* No need to invalidate - it was non-present before */ 647 set_pte_at(mm, addr, ptep, entry); 648 update_mmu_cache(vma, addr, ptep); 649 } 650 651 pte_unmap_unlock(ptep, ptl); 652 *src = MIGRATE_PFN_MIGRATE; 653 return; 654 655 unlock_abort: 656 pte_unmap_unlock(ptep, ptl); 657 abort: 658 *src &= ~MIGRATE_PFN_MIGRATE; 659 } 660 661 /** 662 * migrate_vma_pages() - migrate meta-data from src page to dst page 663 * @migrate: migrate struct containing all migration information 664 * 665 * This migrates struct page meta-data from source struct page to destination 666 * struct page. This effectively finishes the migration from source page to the 667 * destination page. 668 */ 669 void migrate_vma_pages(struct migrate_vma *migrate) 670 { 671 const unsigned long npages = migrate->npages; 672 const unsigned long start = migrate->start; 673 struct mmu_notifier_range range; 674 unsigned long addr, i; 675 bool notified = false; 676 677 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) { 678 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); 679 struct page *page = migrate_pfn_to_page(migrate->src[i]); 680 struct address_space *mapping; 681 int r; 682 683 if (!newpage) { 684 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 685 continue; 686 } 687 688 if (!page) { 689 /* 690 * The only time there is no vma is when called from 691 * migrate_device_coherent_page(). However this isn't 692 * called if the page could not be unmapped. 693 */ 694 VM_BUG_ON(!migrate->vma); 695 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) 696 continue; 697 if (!notified) { 698 notified = true; 699 700 mmu_notifier_range_init_owner(&range, 701 MMU_NOTIFY_MIGRATE, 0, migrate->vma, 702 migrate->vma->vm_mm, addr, migrate->end, 703 migrate->pgmap_owner); 704 mmu_notifier_invalidate_range_start(&range); 705 } 706 migrate_vma_insert_page(migrate, addr, newpage, 707 &migrate->src[i]); 708 continue; 709 } 710 711 mapping = page_mapping(page); 712 713 if (is_device_private_page(newpage) || 714 is_device_coherent_page(newpage)) { 715 /* 716 * For now only support anonymous memory migrating to 717 * device private or coherent memory. 718 */ 719 if (mapping) { 720 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 721 continue; 722 } 723 } else if (is_zone_device_page(newpage)) { 724 /* 725 * Other types of ZONE_DEVICE page are not supported. 726 */ 727 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 728 continue; 729 } 730 731 r = migrate_folio(mapping, page_folio(newpage), 732 page_folio(page), MIGRATE_SYNC_NO_COPY); 733 if (r != MIGRATEPAGE_SUCCESS) 734 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; 735 } 736 737 /* 738 * No need to double call mmu_notifier->invalidate_range() callback as 739 * the above ptep_clear_flush_notify() inside migrate_vma_insert_page() 740 * did already call it. 741 */ 742 if (notified) 743 mmu_notifier_invalidate_range_only_end(&range); 744 } 745 EXPORT_SYMBOL(migrate_vma_pages); 746 747 /** 748 * migrate_vma_finalize() - restore CPU page table entry 749 * @migrate: migrate struct containing all migration information 750 * 751 * This replaces the special migration pte entry with either a mapping to the 752 * new page if migration was successful for that page, or to the original page 753 * otherwise. 754 * 755 * This also unlocks the pages and puts them back on the lru, or drops the extra 756 * refcount, for device pages. 757 */ 758 void migrate_vma_finalize(struct migrate_vma *migrate) 759 { 760 const unsigned long npages = migrate->npages; 761 unsigned long i; 762 763 for (i = 0; i < npages; i++) { 764 struct folio *dst, *src; 765 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); 766 struct page *page = migrate_pfn_to_page(migrate->src[i]); 767 768 if (!page) { 769 if (newpage) { 770 unlock_page(newpage); 771 put_page(newpage); 772 } 773 continue; 774 } 775 776 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) { 777 if (newpage) { 778 unlock_page(newpage); 779 put_page(newpage); 780 } 781 newpage = page; 782 } 783 784 src = page_folio(page); 785 dst = page_folio(newpage); 786 remove_migration_ptes(src, dst, false); 787 folio_unlock(src); 788 789 if (is_zone_device_page(page)) 790 put_page(page); 791 else 792 putback_lru_page(page); 793 794 if (newpage != page) { 795 unlock_page(newpage); 796 if (is_zone_device_page(newpage)) 797 put_page(newpage); 798 else 799 putback_lru_page(newpage); 800 } 801 } 802 } 803 EXPORT_SYMBOL(migrate_vma_finalize); 804 805 /* 806 * Migrate a device coherent page back to normal memory. The caller should have 807 * a reference on page which will be copied to the new page if migration is 808 * successful or dropped on failure. 809 */ 810 int migrate_device_coherent_page(struct page *page) 811 { 812 unsigned long src_pfn, dst_pfn = 0; 813 struct migrate_vma args; 814 struct page *dpage; 815 816 WARN_ON_ONCE(PageCompound(page)); 817 818 lock_page(page); 819 src_pfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; 820 args.src = &src_pfn; 821 args.dst = &dst_pfn; 822 args.cpages = 1; 823 args.npages = 1; 824 args.vma = NULL; 825 826 /* 827 * We don't have a VMA and don't need to walk the page tables to find 828 * the source page. So call migrate_vma_unmap() directly to unmap the 829 * page as migrate_vma_setup() will fail if args.vma == NULL. 830 */ 831 migrate_vma_unmap(&args); 832 if (!(src_pfn & MIGRATE_PFN_MIGRATE)) 833 return -EBUSY; 834 835 dpage = alloc_page(GFP_USER | __GFP_NOWARN); 836 if (dpage) { 837 lock_page(dpage); 838 dst_pfn = migrate_pfn(page_to_pfn(dpage)); 839 } 840 841 migrate_vma_pages(&args); 842 if (src_pfn & MIGRATE_PFN_MIGRATE) 843 copy_highpage(dpage, page); 844 migrate_vma_finalize(&args); 845 846 if (src_pfn & MIGRATE_PFN_MIGRATE) 847 return 0; 848 return -EBUSY; 849 } 850