1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2013 Red Hat Inc. 4 * 5 * Authors: Jérôme Glisse <jglisse@redhat.com> 6 */ 7 /* 8 * Refer to include/linux/hmm.h for information about heterogeneous memory 9 * management or HMM for short. 10 */ 11 #include <linux/pagewalk.h> 12 #include <linux/hmm.h> 13 #include <linux/init.h> 14 #include <linux/rmap.h> 15 #include <linux/swap.h> 16 #include <linux/slab.h> 17 #include <linux/sched.h> 18 #include <linux/mmzone.h> 19 #include <linux/pagemap.h> 20 #include <linux/swapops.h> 21 #include <linux/hugetlb.h> 22 #include <linux/memremap.h> 23 #include <linux/sched/mm.h> 24 #include <linux/jump_label.h> 25 #include <linux/dma-mapping.h> 26 #include <linux/mmu_notifier.h> 27 #include <linux/memory_hotplug.h> 28 29 struct hmm_vma_walk { 30 struct hmm_range *range; 31 struct dev_pagemap *pgmap; 32 unsigned long last; 33 unsigned int flags; 34 }; 35 36 static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, 37 bool write_fault, uint64_t *pfn) 38 { 39 unsigned int flags = FAULT_FLAG_REMOTE; 40 struct hmm_vma_walk *hmm_vma_walk = walk->private; 41 struct hmm_range *range = hmm_vma_walk->range; 42 struct vm_area_struct *vma = walk->vma; 43 vm_fault_t ret; 44 45 if (!vma) 46 goto err; 47 48 if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY) 49 flags |= FAULT_FLAG_ALLOW_RETRY; 50 if (write_fault) 51 flags |= FAULT_FLAG_WRITE; 52 53 ret = handle_mm_fault(vma, addr, flags); 54 if (ret & VM_FAULT_RETRY) { 55 /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */ 56 return -EAGAIN; 57 } 58 if (ret & VM_FAULT_ERROR) 59 goto err; 60 61 return -EBUSY; 62 63 err: 64 *pfn = range->values[HMM_PFN_ERROR]; 65 return -EFAULT; 66 } 67 68 static int hmm_pfns_fill(unsigned long addr, unsigned long end, 69 struct hmm_range *range, enum hmm_pfn_value_e value) 70 { 71 uint64_t *pfns = range->pfns; 72 unsigned long i; 73 74 i = (addr - range->start) >> PAGE_SHIFT; 75 for (; addr < end; addr += PAGE_SIZE, i++) 76 pfns[i] = range->values[value]; 77 78 return 0; 79 } 80 81 /* 82 * hmm_vma_walk_hole_() - handle a range lacking valid pmd or pte(s) 83 * @addr: range virtual start address (inclusive) 84 * @end: range virtual end address (exclusive) 85 * @fault: should we fault or not ? 86 * @write_fault: write fault ? 87 * @walk: mm_walk structure 88 * Return: 0 on success, -EBUSY after page fault, or page fault error 89 * 90 * This function will be called whenever pmd_none() or pte_none() returns true, 91 * or whenever there is no page directory covering the virtual address range. 92 */ 93 static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end, 94 bool fault, bool write_fault, 95 struct mm_walk *walk) 96 { 97 struct hmm_vma_walk *hmm_vma_walk = walk->private; 98 struct hmm_range *range = hmm_vma_walk->range; 99 uint64_t *pfns = range->pfns; 100 unsigned long i; 101 102 hmm_vma_walk->last = addr; 103 i = (addr - range->start) >> PAGE_SHIFT; 104 105 if (write_fault && walk->vma && !(walk->vma->vm_flags & VM_WRITE)) 106 return -EPERM; 107 108 for (; addr < end; addr += PAGE_SIZE, i++) { 109 pfns[i] = range->values[HMM_PFN_NONE]; 110 if (fault || write_fault) { 111 int ret; 112 113 ret = hmm_vma_do_fault(walk, addr, write_fault, 114 &pfns[i]); 115 if (ret != -EBUSY) 116 return ret; 117 } 118 } 119 120 return (fault || write_fault) ? -EBUSY : 0; 121 } 122 123 static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 124 uint64_t pfns, uint64_t cpu_flags, 125 bool *fault, bool *write_fault) 126 { 127 struct hmm_range *range = hmm_vma_walk->range; 128 129 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) 130 return; 131 132 /* 133 * So we not only consider the individual per page request we also 134 * consider the default flags requested for the range. The API can 135 * be used 2 ways. The first one where the HMM user coalesces 136 * multiple page faults into one request and sets flags per pfn for 137 * those faults. The second one where the HMM user wants to pre- 138 * fault a range with specific flags. For the latter one it is a 139 * waste to have the user pre-fill the pfn arrays with a default 140 * flags value. 141 */ 142 pfns = (pfns & range->pfn_flags_mask) | range->default_flags; 143 144 /* We aren't ask to do anything ... */ 145 if (!(pfns & range->flags[HMM_PFN_VALID])) 146 return; 147 /* If this is device memory then only fault if explicitly requested */ 148 if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { 149 /* Do we fault on device memory ? */ 150 if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { 151 *write_fault = pfns & range->flags[HMM_PFN_WRITE]; 152 *fault = true; 153 } 154 return; 155 } 156 157 /* If CPU page table is not valid then we need to fault */ 158 *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); 159 /* Need to write fault ? */ 160 if ((pfns & range->flags[HMM_PFN_WRITE]) && 161 !(cpu_flags & range->flags[HMM_PFN_WRITE])) { 162 *write_fault = true; 163 *fault = true; 164 } 165 } 166 167 static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, 168 const uint64_t *pfns, unsigned long npages, 169 uint64_t cpu_flags, bool *fault, 170 bool *write_fault) 171 { 172 unsigned long i; 173 174 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) { 175 *fault = *write_fault = false; 176 return; 177 } 178 179 *fault = *write_fault = false; 180 for (i = 0; i < npages; ++i) { 181 hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags, 182 fault, write_fault); 183 if ((*write_fault)) 184 return; 185 } 186 } 187 188 static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, 189 struct mm_walk *walk) 190 { 191 struct hmm_vma_walk *hmm_vma_walk = walk->private; 192 struct hmm_range *range = hmm_vma_walk->range; 193 bool fault, write_fault; 194 unsigned long i, npages; 195 uint64_t *pfns; 196 197 i = (addr - range->start) >> PAGE_SHIFT; 198 npages = (end - addr) >> PAGE_SHIFT; 199 pfns = &range->pfns[i]; 200 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 201 0, &fault, &write_fault); 202 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 203 } 204 205 static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) 206 { 207 if (pmd_protnone(pmd)) 208 return 0; 209 return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] | 210 range->flags[HMM_PFN_WRITE] : 211 range->flags[HMM_PFN_VALID]; 212 } 213 214 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 215 static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 216 unsigned long end, uint64_t *pfns, pmd_t pmd) 217 { 218 struct hmm_vma_walk *hmm_vma_walk = walk->private; 219 struct hmm_range *range = hmm_vma_walk->range; 220 unsigned long pfn, npages, i; 221 bool fault, write_fault; 222 uint64_t cpu_flags; 223 224 npages = (end - addr) >> PAGE_SHIFT; 225 cpu_flags = pmd_to_hmm_pfn_flags(range, pmd); 226 hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, 227 &fault, &write_fault); 228 229 if (pmd_protnone(pmd) || fault || write_fault) 230 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 231 232 pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 233 for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { 234 if (pmd_devmap(pmd)) { 235 hmm_vma_walk->pgmap = get_dev_pagemap(pfn, 236 hmm_vma_walk->pgmap); 237 if (unlikely(!hmm_vma_walk->pgmap)) 238 return -EBUSY; 239 } 240 pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; 241 } 242 if (hmm_vma_walk->pgmap) { 243 put_dev_pagemap(hmm_vma_walk->pgmap); 244 hmm_vma_walk->pgmap = NULL; 245 } 246 hmm_vma_walk->last = end; 247 return 0; 248 } 249 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 250 /* stub to allow the code below to compile */ 251 int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, 252 unsigned long end, uint64_t *pfns, pmd_t pmd); 253 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 254 255 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) 256 { 257 if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte)) 258 return 0; 259 return pte_write(pte) ? range->flags[HMM_PFN_VALID] | 260 range->flags[HMM_PFN_WRITE] : 261 range->flags[HMM_PFN_VALID]; 262 } 263 264 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, 265 unsigned long end, pmd_t *pmdp, pte_t *ptep, 266 uint64_t *pfn) 267 { 268 struct hmm_vma_walk *hmm_vma_walk = walk->private; 269 struct hmm_range *range = hmm_vma_walk->range; 270 bool fault, write_fault; 271 uint64_t cpu_flags; 272 pte_t pte = *ptep; 273 uint64_t orig_pfn = *pfn; 274 275 *pfn = range->values[HMM_PFN_NONE]; 276 fault = write_fault = false; 277 278 if (pte_none(pte)) { 279 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, 280 &fault, &write_fault); 281 if (fault || write_fault) 282 goto fault; 283 return 0; 284 } 285 286 if (!pte_present(pte)) { 287 swp_entry_t entry = pte_to_swp_entry(pte); 288 289 if (!non_swap_entry(entry)) { 290 cpu_flags = pte_to_hmm_pfn_flags(range, pte); 291 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 292 &fault, &write_fault); 293 if (fault || write_fault) 294 goto fault; 295 return 0; 296 } 297 298 /* 299 * This is a special swap entry, ignore migration, use 300 * device and report anything else as error. 301 */ 302 if (is_device_private_entry(entry)) { 303 cpu_flags = range->flags[HMM_PFN_VALID] | 304 range->flags[HMM_PFN_DEVICE_PRIVATE]; 305 cpu_flags |= is_write_device_private_entry(entry) ? 306 range->flags[HMM_PFN_WRITE] : 0; 307 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 308 &fault, &write_fault); 309 if (fault || write_fault) 310 goto fault; 311 *pfn = hmm_device_entry_from_pfn(range, 312 swp_offset(entry)); 313 *pfn |= cpu_flags; 314 return 0; 315 } 316 317 if (is_migration_entry(entry)) { 318 if (fault || write_fault) { 319 pte_unmap(ptep); 320 hmm_vma_walk->last = addr; 321 migration_entry_wait(walk->mm, pmdp, addr); 322 return -EBUSY; 323 } 324 return 0; 325 } 326 327 /* Report error for everything else */ 328 *pfn = range->values[HMM_PFN_ERROR]; 329 return -EFAULT; 330 } else { 331 cpu_flags = pte_to_hmm_pfn_flags(range, pte); 332 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 333 &fault, &write_fault); 334 } 335 336 if (fault || write_fault) 337 goto fault; 338 339 if (pte_devmap(pte)) { 340 hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), 341 hmm_vma_walk->pgmap); 342 if (unlikely(!hmm_vma_walk->pgmap)) 343 return -EBUSY; 344 } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { 345 if (!is_zero_pfn(pte_pfn(pte))) { 346 *pfn = range->values[HMM_PFN_SPECIAL]; 347 return -EFAULT; 348 } 349 /* 350 * Since each architecture defines a struct page for the zero 351 * page, just fall through and treat it like a normal page. 352 */ 353 } 354 355 *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags; 356 return 0; 357 358 fault: 359 if (hmm_vma_walk->pgmap) { 360 put_dev_pagemap(hmm_vma_walk->pgmap); 361 hmm_vma_walk->pgmap = NULL; 362 } 363 pte_unmap(ptep); 364 /* Fault any virtual address we were asked to fault */ 365 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 366 } 367 368 static int hmm_vma_walk_pmd(pmd_t *pmdp, 369 unsigned long start, 370 unsigned long end, 371 struct mm_walk *walk) 372 { 373 struct hmm_vma_walk *hmm_vma_walk = walk->private; 374 struct hmm_range *range = hmm_vma_walk->range; 375 uint64_t *pfns = range->pfns; 376 unsigned long addr = start, i; 377 pte_t *ptep; 378 pmd_t pmd; 379 380 again: 381 pmd = READ_ONCE(*pmdp); 382 if (pmd_none(pmd)) 383 return hmm_vma_walk_hole(start, end, walk); 384 385 if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { 386 bool fault, write_fault; 387 unsigned long npages; 388 uint64_t *pfns; 389 390 i = (addr - range->start) >> PAGE_SHIFT; 391 npages = (end - addr) >> PAGE_SHIFT; 392 pfns = &range->pfns[i]; 393 394 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 395 0, &fault, &write_fault); 396 if (fault || write_fault) { 397 hmm_vma_walk->last = addr; 398 pmd_migration_entry_wait(walk->mm, pmdp); 399 return -EBUSY; 400 } 401 return 0; 402 } else if (!pmd_present(pmd)) 403 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 404 405 if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { 406 /* 407 * No need to take pmd_lock here, even if some other thread 408 * is splitting the huge pmd we will get that event through 409 * mmu_notifier callback. 410 * 411 * So just read pmd value and check again it's a transparent 412 * huge or device mapping one and compute corresponding pfn 413 * values. 414 */ 415 pmd = pmd_read_atomic(pmdp); 416 barrier(); 417 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) 418 goto again; 419 420 i = (addr - range->start) >> PAGE_SHIFT; 421 return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd); 422 } 423 424 /* 425 * We have handled all the valid cases above ie either none, migration, 426 * huge or transparent huge. At this point either it is a valid pmd 427 * entry pointing to pte directory or it is a bad pmd that will not 428 * recover. 429 */ 430 if (pmd_bad(pmd)) 431 return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); 432 433 ptep = pte_offset_map(pmdp, addr); 434 i = (addr - range->start) >> PAGE_SHIFT; 435 for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { 436 int r; 437 438 r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]); 439 if (r) { 440 /* hmm_vma_handle_pte() did unmap pte directory */ 441 hmm_vma_walk->last = addr; 442 return r; 443 } 444 } 445 if (hmm_vma_walk->pgmap) { 446 /* 447 * We do put_dev_pagemap() here and not in hmm_vma_handle_pte() 448 * so that we can leverage get_dev_pagemap() optimization which 449 * will not re-take a reference on a pgmap if we already have 450 * one. 451 */ 452 put_dev_pagemap(hmm_vma_walk->pgmap); 453 hmm_vma_walk->pgmap = NULL; 454 } 455 pte_unmap(ptep - 1); 456 457 hmm_vma_walk->last = addr; 458 return 0; 459 } 460 461 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \ 462 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) 463 static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud) 464 { 465 if (!pud_present(pud)) 466 return 0; 467 return pud_write(pud) ? range->flags[HMM_PFN_VALID] | 468 range->flags[HMM_PFN_WRITE] : 469 range->flags[HMM_PFN_VALID]; 470 } 471 472 static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, 473 struct mm_walk *walk) 474 { 475 struct hmm_vma_walk *hmm_vma_walk = walk->private; 476 struct hmm_range *range = hmm_vma_walk->range; 477 unsigned long addr = start, next; 478 pmd_t *pmdp; 479 pud_t pud; 480 int ret; 481 482 again: 483 pud = READ_ONCE(*pudp); 484 if (pud_none(pud)) 485 return hmm_vma_walk_hole(start, end, walk); 486 487 if (pud_huge(pud) && pud_devmap(pud)) { 488 unsigned long i, npages, pfn; 489 uint64_t *pfns, cpu_flags; 490 bool fault, write_fault; 491 492 if (!pud_present(pud)) 493 return hmm_vma_walk_hole(start, end, walk); 494 495 i = (addr - range->start) >> PAGE_SHIFT; 496 npages = (end - addr) >> PAGE_SHIFT; 497 pfns = &range->pfns[i]; 498 499 cpu_flags = pud_to_hmm_pfn_flags(range, pud); 500 hmm_range_need_fault(hmm_vma_walk, pfns, npages, 501 cpu_flags, &fault, &write_fault); 502 if (fault || write_fault) 503 return hmm_vma_walk_hole_(addr, end, fault, 504 write_fault, walk); 505 506 pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 507 for (i = 0; i < npages; ++i, ++pfn) { 508 hmm_vma_walk->pgmap = get_dev_pagemap(pfn, 509 hmm_vma_walk->pgmap); 510 if (unlikely(!hmm_vma_walk->pgmap)) 511 return -EBUSY; 512 pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 513 cpu_flags; 514 } 515 if (hmm_vma_walk->pgmap) { 516 put_dev_pagemap(hmm_vma_walk->pgmap); 517 hmm_vma_walk->pgmap = NULL; 518 } 519 hmm_vma_walk->last = end; 520 return 0; 521 } 522 523 split_huge_pud(walk->vma, pudp, addr); 524 if (pud_none(*pudp)) 525 goto again; 526 527 pmdp = pmd_offset(pudp, addr); 528 do { 529 next = pmd_addr_end(addr, end); 530 ret = hmm_vma_walk_pmd(pmdp, addr, next, walk); 531 if (ret) 532 return ret; 533 } while (pmdp++, addr = next, addr != end); 534 535 return 0; 536 } 537 #else 538 #define hmm_vma_walk_pud NULL 539 #endif 540 541 #ifdef CONFIG_HUGETLB_PAGE 542 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, 543 unsigned long start, unsigned long end, 544 struct mm_walk *walk) 545 { 546 unsigned long addr = start, i, pfn; 547 struct hmm_vma_walk *hmm_vma_walk = walk->private; 548 struct hmm_range *range = hmm_vma_walk->range; 549 struct vm_area_struct *vma = walk->vma; 550 uint64_t orig_pfn, cpu_flags; 551 bool fault, write_fault; 552 spinlock_t *ptl; 553 pte_t entry; 554 int ret = 0; 555 556 ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); 557 entry = huge_ptep_get(pte); 558 559 i = (start - range->start) >> PAGE_SHIFT; 560 orig_pfn = range->pfns[i]; 561 range->pfns[i] = range->values[HMM_PFN_NONE]; 562 cpu_flags = pte_to_hmm_pfn_flags(range, entry); 563 fault = write_fault = false; 564 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, 565 &fault, &write_fault); 566 if (fault || write_fault) { 567 ret = -ENOENT; 568 goto unlock; 569 } 570 571 pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); 572 for (; addr < end; addr += PAGE_SIZE, i++, pfn++) 573 range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | 574 cpu_flags; 575 hmm_vma_walk->last = end; 576 577 unlock: 578 spin_unlock(ptl); 579 580 if (ret == -ENOENT) 581 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); 582 583 return ret; 584 } 585 #else 586 #define hmm_vma_walk_hugetlb_entry NULL 587 #endif /* CONFIG_HUGETLB_PAGE */ 588 589 static int hmm_vma_walk_test(unsigned long start, unsigned long end, 590 struct mm_walk *walk) 591 { 592 struct hmm_vma_walk *hmm_vma_walk = walk->private; 593 struct hmm_range *range = hmm_vma_walk->range; 594 struct vm_area_struct *vma = walk->vma; 595 596 /* 597 * Skip vma ranges that don't have struct page backing them or 598 * map I/O devices directly. 599 */ 600 if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) 601 return -EFAULT; 602 603 /* 604 * If the vma does not allow read access, then assume that it does not 605 * allow write access either. HMM does not support architectures 606 * that allow write without read. 607 */ 608 if (!(vma->vm_flags & VM_READ)) { 609 bool fault, write_fault; 610 611 /* 612 * Check to see if a fault is requested for any page in the 613 * range. 614 */ 615 hmm_range_need_fault(hmm_vma_walk, range->pfns + 616 ((start - range->start) >> PAGE_SHIFT), 617 (end - start) >> PAGE_SHIFT, 618 0, &fault, &write_fault); 619 if (fault || write_fault) 620 return -EFAULT; 621 622 hmm_pfns_fill(start, end, range, HMM_PFN_NONE); 623 hmm_vma_walk->last = end; 624 625 /* Skip this vma and continue processing the next vma. */ 626 return 1; 627 } 628 629 return 0; 630 } 631 632 static const struct mm_walk_ops hmm_walk_ops = { 633 .pud_entry = hmm_vma_walk_pud, 634 .pmd_entry = hmm_vma_walk_pmd, 635 .pte_hole = hmm_vma_walk_hole, 636 .hugetlb_entry = hmm_vma_walk_hugetlb_entry, 637 .test_walk = hmm_vma_walk_test, 638 }; 639 640 /** 641 * hmm_range_fault - try to fault some address in a virtual address range 642 * @range: range being faulted 643 * @flags: HMM_FAULT_* flags 644 * 645 * Return: the number of valid pages in range->pfns[] (from range start 646 * address), which may be zero. On error one of the following status codes 647 * can be returned: 648 * 649 * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma 650 * (e.g., device file vma). 651 * -ENOMEM: Out of memory. 652 * -EPERM: Invalid permission (e.g., asking for write and range is read 653 * only). 654 * -EAGAIN: A page fault needs to be retried and mmap_sem was dropped. 655 * -EBUSY: The range has been invalidated and the caller needs to wait for 656 * the invalidation to finish. 657 * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access 658 * that range) number of valid pages in range->pfns[] (from 659 * range start address). 660 * 661 * This is similar to a regular CPU page fault except that it will not trigger 662 * any memory migration if the memory being faulted is not accessible by CPUs 663 * and caller does not ask for migration. 664 * 665 * On error, for one virtual address in the range, the function will mark the 666 * corresponding HMM pfn entry with an error flag. 667 */ 668 long hmm_range_fault(struct hmm_range *range, unsigned int flags) 669 { 670 struct hmm_vma_walk hmm_vma_walk = { 671 .range = range, 672 .last = range->start, 673 .flags = flags, 674 }; 675 struct mm_struct *mm = range->notifier->mm; 676 int ret; 677 678 lockdep_assert_held(&mm->mmap_sem); 679 680 do { 681 /* If range is no longer valid force retry. */ 682 if (mmu_interval_check_retry(range->notifier, 683 range->notifier_seq)) 684 return -EBUSY; 685 ret = walk_page_range(mm, hmm_vma_walk.last, range->end, 686 &hmm_walk_ops, &hmm_vma_walk); 687 } while (ret == -EBUSY); 688 689 if (ret) 690 return ret; 691 return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; 692 } 693 EXPORT_SYMBOL(hmm_range_fault); 694