1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2013 Red Hat Inc. 4 * 5 * Authors: Jérôme Glisse <jglisse@redhat.com> 6 */ 7 /* 8 * Refer to include/linux/hmm.h for information about heterogeneous memory 9 * management or HMM for short. 10 */ 11 #include <linux/pagewalk.h> 12 #include <linux/hmm.h> 13 #include <linux/init.h> 14 #include <linux/rmap.h> 15 #include <linux/swap.h> 16 #include <linux/slab.h> 17 #include <linux/sched.h> 18 #include <linux/mmzone.h> 19 #include <linux/pagemap.h> 20 #include <linux/swapops.h> 21 #include <linux/hugetlb.h> 22 #include <linux/memremap.h> 23 #include <linux/sched/mm.h> 24 #include <linux/jump_label.h> 25 #include <linux/dma-mapping.h> 26 #include <linux/mmu_notifier.h> 27 #include <linux/memory_hotplug.h> 28 29 struct hmm_vma_walk { 30 struct hmm_range *range; 31 struct dev_pagemap *pgmap; 32 unsigned long last; 33 unsigned int flags; 34 }; 35 36 static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, 37 bool write_fault, uint64_t *pfn) 38 { 39 unsigned int flags = FAULT_FLAG_REMOTE; 40 struct hmm_vma_walk *hmm_vma_walk = walk->private; 41 struct hmm_range *range = hmm_vma_walk->range; 42 struct vm_area_struct *vma = walk->vma; 43 vm_fault_t ret; 44 45 if (!vma) 46 goto err; 47 48 if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY) 49 flags |= FAULT_FLAG_ALLOW_RETRY; 50 if (write_fault) 51 flags |= FAULT_FLAG_WRITE; 52 53 ret = handle_mm_fault(vma, addr, flags); 54 if (ret & VM_FAULT_RETRY) { 55 /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */ 56 return -EAGAIN; 57 } 58 if (ret & VM_FAULT_ERROR) 59 goto err; 60 61 return -EBUSY; 62 63 err: 64 *pfn = range->values[HMM_PFN_ERROR]; 65 return -EFAULT; 66 } 67 68 static int hmm_pfns_fill(unsigned long addr, unsigned long end, 69 struct hmm_range *range, enum hmm_pfn_value_e value) 70 { 71 uint64_t *pfns = range->pfns; 72 unsigned long i; 73 74 i = (addr - range->start) >> PAGE_SHIFT; 75 for (; addr < end; addr += PAGE_SIZE, i++) 76 pfns[i] = range->values[value]; 77 78 return 0; 79 } 80 81 /* 82 * hmm_vma_walk_hole_() - handle a range lacking valid pmd or pte(s) 83 * @addr: range virtual start address (inclusive) 84 * @end: range virtual end address (exclusive) 85 * @fault: should we fault or not ? 86 * @write_fault: write fault ? 87 * @walk: mm_walk structure 88 * Return: 0 on success, -EBUSY after page fault, or page fault error 89 * 90 * This function will be called whenever pmd_none() or pte_none() returns true, 91 * or whenever there is no page directory covering the virtual address range. 92 */ 93 static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end, 94 bool fault, bool write_fault, 95 struct mm_walk *walk) 96 { 97 struct hmm_vma_walk *hmm_vma_walk = walk->private; 98 struct hmm_range *range = hmm_vma_walk->range; 99 uint64_t *pfns = range->pfns; 100 unsigned long i; 101 102 hmm_vma_walk->last = addr; 103 i = (addr - range->start) >> PAGE_SHIFT; 104 105 if (write_fault && walk->vma && !(walk->vma->vm_flags & VM_WRITE)) 106 return -EPERM; 107 108 for (; addr < end; addr += PAGE_SIZE, i++) { 109 pfns[i] = range->values[HMM_PFN_NONE]; 110 if (fault || write_fault) { 111 int ret; 112 113 ret = hmm_vma_do_fault(walk, addr, write_fault, 114 &pfns[i]); 115 if (ret != -EBUSY) 116 return ret; 117 } 118 } 119 120 return (fault || write_fault) ? -EBUSY : 0; 121 } 122 123 static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 124 uint64_t pfns, uint64_t cpu_flags, 125 bool *fault, bool *write_fault) 126 { 127 struct hmm_range *range = hmm_vma_walk->range; 128 129 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) 130 return; 131 132 /* 133 * So we not only consider the individual per page request we also 134 * consider the default flags requested for the range. The API can 135 * be used 2 ways. The first one where the HMM user coalesces 136 * multiple page faults into one request and sets flags per pfn for 137 * those faults. The second one where the HMM user wants to pre- 138 * fault a range with specific flags. For the latter one it is a 139 * waste to have the user pre-fill the pfn arrays with a default 140 * flags value. 141 */ 142 pfns = (pfns & range->pfn_flags_mask) | range->default_flags; 143 144 /* We aren't ask to do anything ... */ 145 if (!(pfns & range->flags[HMM_PFN_VALID])) 146 return; 147 /* If this is device memory then only fault if explicitly requested */ 148 if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { 149 /* Do we fault on device memory ? */ 150 if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { 151 *write_fault = pfns & range->flags[HMM_PFN_WRITE]; 152 *fault = true; 153 } 154 return; 155 } 156 157 /* If CPU page table is not valid then we need to fault */ 158 *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); 159 /* Need to write fault ? */ 160 if ((pfns & range->flags[HMM_PFN_WRITE]) && 161 !(cpu_flags & range->flags[HMM_PFN_WRITE])) { 162 *write_fault = true; 163 *fault = true; 164 } 165 } 166 167 static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 168 const uint64_t *pfns, unsigned long npages, 169 uint64_t cpu_flags, bool *fault, 170 bool *write_fault) 171 { 172 unsigned long i; 173 174 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) { 175 *fault = *write_fault = false; 176 return; 177 } 178 179 *fault = *write_fault = false; 180 for (i = 0; i < npages; ++i) { 181 hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags, 182 fault, write_fault); 183 if ((*write_fault)) 184 return; 185 } 186 } 187 188 static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, 189 __always_unused int depth, struct mm_walk *walk) 190 { 191 struct hmm_vma_walk *hmm_vma_walk = walk->private; 192 struct hmm_range *range = hmm_vma_walk->range; 193 bool fault, write_fault; 194 unsigned long i, npages; 195 uint64_t *pfns; 196 197 i = (addr - range->start) >> PAGE_SHIFT; 198 npages = (end - addr) >> PAGE_SHIFT; 199 pfns = &range->pfns[i]; 200 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 201 0, &fault, &write_fault); 202 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 203 } 204 205 static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) 206 { 207 if (pmd_protnone(pmd)) 208 return 0; 209 return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] | 210 range->flags[HMM_PFN_WRITE] : 211 range->flags[HMM_PFN_VALID]; 212 } 213 214 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 215 static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 216 unsigned long end, uint64_t *pfns, pmd_t pmd) 217 { 218 struct hmm_vma_walk *hmm_vma_walk = walk->private; 219 struct hmm_range *range = hmm_vma_walk->range; 220 unsigned long pfn, npages, i; 221 bool fault, write_fault; 222 uint64_t cpu_flags; 223 224 npages = (end - addr) >> PAGE_SHIFT; 225 cpu_flags = pmd_to_hmm_pfn_flags(range, pmd); 226 hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, 227 &fault, &write_fault); 228 229 if (pmd_protnone(pmd) || fault || write_fault) 230 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 231 232 pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 233 for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { 234 if (pmd_devmap(pmd)) { 235 hmm_vma_walk->pgmap = get_dev_pagemap(pfn, 236 hmm_vma_walk->pgmap); 237 if (unlikely(!hmm_vma_walk->pgmap)) 238 return -EBUSY; 239 } 240 pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; 241 } 242 if (hmm_vma_walk->pgmap) { 243 put_dev_pagemap(hmm_vma_walk->pgmap); 244 hmm_vma_walk->pgmap = NULL; 245 } 246 hmm_vma_walk->last = end; 247 return 0; 248 } 249 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 250 /* stub to allow the code below to compile */ 251 int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 252 unsigned long end, uint64_t *pfns, pmd_t pmd); 253 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 254 255 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) 256 { 257 if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte)) 258 return 0; 259 return pte_write(pte) ? range->flags[HMM_PFN_VALID] | 260 range->flags[HMM_PFN_WRITE] : 261 range->flags[HMM_PFN_VALID]; 262 } 263 264 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, 265 unsigned long end, pmd_t *pmdp, pte_t *ptep, 266 uint64_t *pfn) 267 { 268 struct hmm_vma_walk *hmm_vma_walk = walk->private; 269 struct hmm_range *range = hmm_vma_walk->range; 270 bool fault, write_fault; 271 uint64_t cpu_flags; 272 pte_t pte = *ptep; 273 uint64_t orig_pfn = *pfn; 274 275 *pfn = range->values[HMM_PFN_NONE]; 276 fault = write_fault = false; 277 278 if (pte_none(pte)) { 279 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, 280 &fault, &write_fault); 281 if (fault || write_fault) 282 goto fault; 283 return 0; 284 } 285 286 if (!pte_present(pte)) { 287 swp_entry_t entry = pte_to_swp_entry(pte); 288 289 if (!non_swap_entry(entry)) { 290 cpu_flags = pte_to_hmm_pfn_flags(range, pte); 291 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 292 &fault, &write_fault); 293 if (fault || write_fault) 294 goto fault; 295 return 0; 296 } 297 298 /* 299 * This is a special swap entry, ignore migration, use 300 * device and report anything else as error. 301 */ 302 if (is_device_private_entry(entry)) { 303 cpu_flags = range->flags[HMM_PFN_VALID] | 304 range->flags[HMM_PFN_DEVICE_PRIVATE]; 305 cpu_flags |= is_write_device_private_entry(entry) ? 306 range->flags[HMM_PFN_WRITE] : 0; 307 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 308 &fault, &write_fault); 309 if (fault || write_fault) 310 goto fault; 311 *pfn = hmm_device_entry_from_pfn(range, 312 swp_offset(entry)); 313 *pfn |= cpu_flags; 314 return 0; 315 } 316 317 if (is_migration_entry(entry)) { 318 if (fault || write_fault) { 319 pte_unmap(ptep); 320 hmm_vma_walk->last = addr; 321 migration_entry_wait(walk->mm, pmdp, addr); 322 return -EBUSY; 323 } 324 return 0; 325 } 326 327 /* Report error for everything else */ 328 *pfn = range->values[HMM_PFN_ERROR]; 329 return -EFAULT; 330 } else { 331 cpu_flags = pte_to_hmm_pfn_flags(range, pte); 332 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 333 &fault, &write_fault); 334 } 335 336 if (fault || write_fault) 337 goto fault; 338 339 if (pte_devmap(pte)) { 340 hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), 341 hmm_vma_walk->pgmap); 342 if (unlikely(!hmm_vma_walk->pgmap)) 343 return -EBUSY; 344 } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { 345 if (!is_zero_pfn(pte_pfn(pte))) { 346 *pfn = range->values[HMM_PFN_SPECIAL]; 347 return -EFAULT; 348 } 349 /* 350 * Since each architecture defines a struct page for the zero 351 * page, just fall through and treat it like a normal page. 352 */ 353 } 354 355 *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags; 356 return 0; 357 358 fault: 359 if (hmm_vma_walk->pgmap) { 360 put_dev_pagemap(hmm_vma_walk->pgmap); 361 hmm_vma_walk->pgmap = NULL; 362 } 363 pte_unmap(ptep); 364 /* Fault any virtual address we were asked to fault */ 365 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 366 } 367 368 static int hmm_vma_walk_pmd(pmd_t *pmdp, 369 unsigned long start, 370 unsigned long end, 371 struct mm_walk *walk) 372 { 373 struct hmm_vma_walk *hmm_vma_walk = walk->private; 374 struct hmm_range *range = hmm_vma_walk->range; 375 uint64_t *pfns = range->pfns; 376 unsigned long addr = start, i; 377 pte_t *ptep; 378 pmd_t pmd; 379 380 again: 381 pmd = READ_ONCE(*pmdp); 382 if (pmd_none(pmd)) 383 return hmm_vma_walk_hole(start, end, -1, walk); 384 385 if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { 386 bool fault, write_fault; 387 unsigned long npages; 388 uint64_t *pfns; 389 390 i = (addr - range->start) >> PAGE_SHIFT; 391 npages = (end - addr) >> PAGE_SHIFT; 392 pfns = &range->pfns[i]; 393 394 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 395 0, &fault, &write_fault); 396 if (fault || write_fault) { 397 hmm_vma_walk->last = addr; 398 pmd_migration_entry_wait(walk->mm, pmdp); 399 return -EBUSY; 400 } 401 return 0; 402 } else if (!pmd_present(pmd)) 403 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 404 405 if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { 406 /* 407 * No need to take pmd_lock here, even if some other thread 408 * is splitting the huge pmd we will get that event through 409 * mmu_notifier callback. 410 * 411 * So just read pmd value and check again it's a transparent 412 * huge or device mapping one and compute corresponding pfn 413 * values. 414 */ 415 pmd = pmd_read_atomic(pmdp); 416 barrier(); 417 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) 418 goto again; 419 420 i = (addr - range->start) >> PAGE_SHIFT; 421 return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd); 422 } 423 424 /* 425 * We have handled all the valid cases above ie either none, migration, 426 * huge or transparent huge. At this point either it is a valid pmd 427 * entry pointing to pte directory or it is a bad pmd that will not 428 * recover. 429 */ 430 if (pmd_bad(pmd)) 431 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 432 433 ptep = pte_offset_map(pmdp, addr); 434 i = (addr - range->start) >> PAGE_SHIFT; 435 for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { 436 int r; 437 438 r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]); 439 if (r) { 440 /* hmm_vma_handle_pte() did unmap pte directory */ 441 hmm_vma_walk->last = addr; 442 return r; 443 } 444 } 445 if (hmm_vma_walk->pgmap) { 446 /* 447 * We do put_dev_pagemap() here and not in hmm_vma_handle_pte() 448 * so that we can leverage get_dev_pagemap() optimization which 449 * will not re-take a reference on a pgmap if we already have 450 * one. 451 */ 452 put_dev_pagemap(hmm_vma_walk->pgmap); 453 hmm_vma_walk->pgmap = NULL; 454 } 455 pte_unmap(ptep - 1); 456 457 hmm_vma_walk->last = addr; 458 return 0; 459 } 460 461 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \ 462 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) 463 static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud) 464 { 465 if (!pud_present(pud)) 466 return 0; 467 return pud_write(pud) ? range->flags[HMM_PFN_VALID] | 468 range->flags[HMM_PFN_WRITE] : 469 range->flags[HMM_PFN_VALID]; 470 } 471 472 static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, 473 struct mm_walk *walk) 474 { 475 struct hmm_vma_walk *hmm_vma_walk = walk->private; 476 struct hmm_range *range = hmm_vma_walk->range; 477 unsigned long addr = start; 478 pud_t pud; 479 int ret = 0; 480 spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma); 481 482 if (!ptl) 483 return 0; 484 485 /* Normally we don't want to split the huge page */ 486 walk->action = ACTION_CONTINUE; 487 488 pud = READ_ONCE(*pudp); 489 if (pud_none(pud)) { 490 ret = hmm_vma_walk_hole(start, end, -1, walk); 491 goto out_unlock; 492 } 493 494 if (pud_huge(pud) && pud_devmap(pud)) { 495 unsigned long i, npages, pfn; 496 uint64_t *pfns, cpu_flags; 497 bool fault, write_fault; 498 499 if (!pud_present(pud)) { 500 ret = hmm_vma_walk_hole(start, end, -1, walk); 501 goto out_unlock; 502 } 503 504 i = (addr - range->start) >> PAGE_SHIFT; 505 npages = (end - addr) >> PAGE_SHIFT; 506 pfns = &range->pfns[i]; 507 508 cpu_flags = pud_to_hmm_pfn_flags(range, pud); 509 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 510 cpu_flags, &fault, &write_fault); 511 if (fault || write_fault) { 512 ret = hmm_vma_walk_hole_(addr, end, fault, 513 write_fault, walk); 514 goto out_unlock; 515 } 516 517 pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 518 for (i = 0; i < npages; ++i, ++pfn) { 519 hmm_vma_walk->pgmap = get_dev_pagemap(pfn, 520 hmm_vma_walk->pgmap); 521 if (unlikely(!hmm_vma_walk->pgmap)) { 522 ret = -EBUSY; 523 goto out_unlock; 524 } 525 pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 526 cpu_flags; 527 } 528 if (hmm_vma_walk->pgmap) { 529 put_dev_pagemap(hmm_vma_walk->pgmap); 530 hmm_vma_walk->pgmap = NULL; 531 } 532 hmm_vma_walk->last = end; 533 goto out_unlock; 534 } 535 536 /* Ask for the PUD to be split */ 537 walk->action = ACTION_SUBTREE; 538 539 out_unlock: 540 spin_unlock(ptl); 541 return ret; 542 } 543 #else 544 #define hmm_vma_walk_pud NULL 545 #endif 546 547 #ifdef CONFIG_HUGETLB_PAGE 548 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, 549 unsigned long start, unsigned long end, 550 struct mm_walk *walk) 551 { 552 unsigned long addr = start, i, pfn; 553 struct hmm_vma_walk *hmm_vma_walk = walk->private; 554 struct hmm_range *range = hmm_vma_walk->range; 555 struct vm_area_struct *vma = walk->vma; 556 uint64_t orig_pfn, cpu_flags; 557 bool fault, write_fault; 558 spinlock_t *ptl; 559 pte_t entry; 560 int ret = 0; 561 562 ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); 563 entry = huge_ptep_get(pte); 564 565 i = (start - range->start) >> PAGE_SHIFT; 566 orig_pfn = range->pfns[i]; 567 range->pfns[i] = range->values[HMM_PFN_NONE]; 568 cpu_flags = pte_to_hmm_pfn_flags(range, entry); 569 fault = write_fault = false; 570 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 571 &fault, &write_fault); 572 if (fault || write_fault) { 573 ret = -ENOENT; 574 goto unlock; 575 } 576 577 pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); 578 for (; addr < end; addr += PAGE_SIZE, i++, pfn++) 579 range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 580 cpu_flags; 581 hmm_vma_walk->last = end; 582 583 unlock: 584 spin_unlock(ptl); 585 586 if (ret == -ENOENT) 587 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 588 589 return ret; 590 } 591 #else 592 #define hmm_vma_walk_hugetlb_entry NULL 593 #endif /* CONFIG_HUGETLB_PAGE */ 594 595 static int hmm_vma_walk_test(unsigned long start, unsigned long end, 596 struct mm_walk *walk) 597 { 598 struct hmm_vma_walk *hmm_vma_walk = walk->private; 599 struct hmm_range *range = hmm_vma_walk->range; 600 struct vm_area_struct *vma = walk->vma; 601 602 /* 603 * Skip vma ranges that don't have struct page backing them or 604 * map I/O devices directly. 605 */ 606 if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) 607 return -EFAULT; 608 609 /* 610 * If the vma does not allow read access, then assume that it does not 611 * allow write access either. HMM does not support architectures 612 * that allow write without read. 613 */ 614 if (!(vma->vm_flags & VM_READ)) { 615 bool fault, write_fault; 616 617 /* 618 * Check to see if a fault is requested for any page in the 619 * range. 620 */ 621 hmm_range_need_fault(hmm_vma_walk, range->pfns + 622 ((start - range->start) >> PAGE_SHIFT), 623 (end - start) >> PAGE_SHIFT, 624 0, &fault, &write_fault); 625 if (fault || write_fault) 626 return -EFAULT; 627 628 hmm_pfns_fill(start, end, range, HMM_PFN_NONE); 629 hmm_vma_walk->last = end; 630 631 /* Skip this vma and continue processing the next vma. */ 632 return 1; 633 } 634 635 return 0; 636 } 637 638 static const struct mm_walk_ops hmm_walk_ops = { 639 .pud_entry = hmm_vma_walk_pud, 640 .pmd_entry = hmm_vma_walk_pmd, 641 .pte_hole = hmm_vma_walk_hole, 642 .hugetlb_entry = hmm_vma_walk_hugetlb_entry, 643 .test_walk = hmm_vma_walk_test, 644 }; 645 646 /** 647 * hmm_range_fault - try to fault some address in a virtual address range 648 * @range: range being faulted 649 * @flags: HMM_FAULT_* flags 650 * 651 * Return: the number of valid pages in range->pfns[] (from range start 652 * address), which may be zero. On error one of the following status codes 653 * can be returned: 654 * 655 * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma 656 * (e.g., device file vma). 657 * -ENOMEM: Out of memory. 658 * -EPERM: Invalid permission (e.g., asking for write and range is read 659 * only). 660 * -EAGAIN: A page fault needs to be retried and mmap_sem was dropped. 661 * -EBUSY: The range has been invalidated and the caller needs to wait for 662 * the invalidation to finish. 663 * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access 664 * that range) number of valid pages in range->pfns[] (from 665 * range start address). 666 * 667 * This is similar to a regular CPU page fault except that it will not trigger 668 * any memory migration if the memory being faulted is not accessible by CPUs 669 * and caller does not ask for migration. 670 * 671 * On error, for one virtual address in the range, the function will mark the 672 * corresponding HMM pfn entry with an error flag. 673 */ 674 long hmm_range_fault(struct hmm_range *range, unsigned int flags) 675 { 676 struct hmm_vma_walk hmm_vma_walk = { 677 .range = range, 678 .last = range->start, 679 .flags = flags, 680 }; 681 struct mm_struct *mm = range->notifier->mm; 682 int ret; 683 684 lockdep_assert_held(&mm->mmap_sem); 685 686 do { 687 /* If range is no longer valid force retry. */ 688 if (mmu_interval_check_retry(range->notifier, 689 range->notifier_seq)) 690 return -EBUSY; 691 ret = walk_page_range(mm, hmm_vma_walk.last, range->end, 692 &hmm_walk_ops, &hmm_vma_walk); 693 } while (ret == -EBUSY); 694 695 if (ret) 696 return ret; 697 return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; 698 } 699 EXPORT_SYMBOL(hmm_range_fault); 700