Lines Matching +full:free +full:- +full:standing

1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
18 * would have taken more than the 6M I have free, but it worked well as
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
70 #include <linux/memory-tiers.h>
90 #include "pgalloc-track.h"
95 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
111 * Return true if the original pte was a uffd-wp pte marker (so the pte was
112 * wr-protected).
116 if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) in vmf_orig_pte_uffd_wp()
119 return pte_marker_uffd_wp(vmf->orig_pte); in vmf_orig_pte_uffd_wp()
185 * Note: this doesn't free the actual pages themselves. That
194 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
222 if (end - 1 > ceiling - 1) in free_pmd_range()
228 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
256 if (end - 1 > ceiling - 1) in free_pud_range()
262 mm_dec_nr_puds(tlb->mm); in free_pud_range()
290 if (end - 1 > ceiling - 1) in free_p4d_range()
299 * This function frees user-level page tables of a process.
315 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
316 * of the address space and the top of it (using -1 for the in free_pgd_range()
320 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
331 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
345 if (end - 1 > ceiling - 1) in free_pgd_range()
346 end -= PMD_SIZE; in free_pgd_range()
347 if (addr > end - 1) in free_pgd_range()
354 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
368 unsigned long addr = vma->vm_start; in free_pgtables()
375 next = mas_find(mas, ceiling - 1); in free_pgtables()
387 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
388 floor, next ? next->vm_start : ceiling); in free_pgtables()
393 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
396 next = mas_find(mas, ceiling - 1); in free_pgtables()
402 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
403 floor, next ? next->vm_start : ceiling); in free_pgtables()
423 * of a chain of data-dependent loads, meaning most CPUs (alpha in pmd_install()
425 * seen in-order. See the alpha page table accessors for the in pmd_install()
439 return -ENOMEM; in __pte_alloc()
451 return -ENOMEM; in __pte_alloc_kernel()
474 if (current->mm == mm) in add_mm_rss_vec()
483 * is found. For example, we might have a PFN-mapped pte in
491 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
520 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
524 current->comm, in print_bad_pte()
529 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
531 vma->vm_file, in print_bad_pte()
532 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
533 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
534 mapping ? mapping->a_ops->read_folio : NULL); in print_bad_pte()
540 * vm_normal_page -- This function gets the "struct page" associated with a pte.
560 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
589 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
590 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
591 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
612 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
613 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
619 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
620 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
622 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
662 * in a direct-access (dax) mapping, so let's just replicate the in vm_normal_page_pmd()
665 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
666 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
672 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
673 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
675 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
705 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); in restore_exclusive_pte()
730 set_pte_at(vma->vm_mm, address, ptep, pte); in restore_exclusive_pte()
733 * No need to invalidate - it was non-present before. However in restore_exclusive_pte()
756 return -EBUSY; in try_restore_exclusive_pte()
770 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
778 return -EIO; in copy_nonpresent_pte()
781 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
783 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
784 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
785 &src_mm->mmlist); in copy_nonpresent_pte()
833 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
855 VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); in copy_nonpresent_pte()
857 return -EBUSY; in copy_nonpresent_pte()
858 return -ENOENT; in copy_nonpresent_pte()
878 * and re-use the pte the traditional way.
880 * And if we need a pre-allocated page but don't yet have
895 return -EAGAIN; in copy_present_page()
902 copy_user_highpage(&new_folio->page, page, addr, src_vma); in copy_present_page()
909 pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot); in copy_present_page()
912 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
914 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
919 * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
927 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_pte()
928 unsigned long vm_flags = src_vma->vm_flags; in copy_present_pte()
978 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_pte()
1005 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
1006 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
1024 * protected by mmap_lock-less collapse skipping areas with anon_vma in copy_pte_range()
1030 ret = -ENOMEM; in copy_pte_range()
1046 * We are holding two locks at this point - either of them in copy_pte_range()
1065 if (ret == -EIO) { in copy_pte_range()
1068 } else if (ret == -EBUSY) { in copy_pte_range()
1079 WARN_ON_ONCE(ret != -ENOENT); in copy_pte_range()
1085 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
1088 if (unlikely(ret == -EAGAIN)) in copy_pte_range()
1092 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1109 if (ret == -EIO) { in copy_pte_range()
1112 ret = -ENOMEM; in copy_pte_range()
1116 } else if (ret == -EBUSY) { in copy_pte_range()
1118 } else if (ret == -EAGAIN) { in copy_pte_range()
1121 return -ENOMEM; in copy_pte_range()
1142 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1143 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1149 return -ENOMEM; in copy_pmd_range()
1156 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1159 if (err == -ENOMEM) in copy_pmd_range()
1160 return -ENOMEM; in copy_pmd_range()
1169 return -ENOMEM; in copy_pmd_range()
1179 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1180 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1186 return -ENOMEM; in copy_pud_range()
1193 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1196 if (err == -ENOMEM) in copy_pud_range()
1197 return -ENOMEM; in copy_pud_range()
1206 return -ENOMEM; in copy_pud_range()
1216 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1222 return -ENOMEM; in copy_p4d_range()
1230 return -ENOMEM; in copy_p4d_range()
1244 * Always copy pgtables when dst_vma has uffd-wp enabled even if it's in vma_needs_copy()
1245 * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable in vma_needs_copy()
1246 * contains uffd-wp protection information, that's something we can't in vma_needs_copy()
1252 if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vma_needs_copy()
1255 if (src_vma->anon_vma) in vma_needs_copy()
1272 unsigned long addr = src_vma->vm_start; in copy_page_range()
1273 unsigned long end = src_vma->vm_end; in copy_page_range()
1274 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1275 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1286 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1288 * We do not free on error cases below as remove_vma in copy_page_range()
1302 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1316 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1329 ret = -ENOMEM; in copy_page_range()
1335 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1349 return details->even_cows; in should_zap_cows()
1363 /* Otherwise we should only zap non-anon pages */ in should_zap_page()
1372 return details->zap_flags & ZAP_FLAG_DROP_MARKER; in zap_drop_file_uffd_wp()
1376 * This function makes sure that we'll replace the none pte with an uffd-wp
1399 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1432 tlb->fullmm); in zap_pte_range()
1454 rss[mm_counter(page)]--; in zap_pte_range()
1477 * consider uffd-wp bit when zap. For more information, in zap_pte_range()
1481 rss[mm_counter(page)]--; in zap_pte_range()
1489 rss[MM_SWAPENTS]--; in zap_pte_range()
1496 rss[mm_counter(page)]--; in zap_pte_range()
1513 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1530 * entries before releasing the ptl), free the batched in zap_pte_range()
1551 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1558 } else if (details && details->single_folio && in zap_pmd_range()
1559 folio_test_pmd_mappable(details->single_folio) && in zap_pmd_range()
1560 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1561 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1575 pmd--; in zap_pmd_range()
1593 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1594 mmap_assert_locked(tlb->mm); in zap_pud_range()
1639 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1655 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1658 if (start >= vma->vm_end) in unmap_single_vma()
1660 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1661 if (end <= vma->vm_start) in unmap_single_vma()
1664 if (vma->vm_file) in unmap_single_vma()
1667 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1673 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1674 * should be non-null for valid hugetlb area. in unmap_single_vma()
1677 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1678 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1683 if (vma->vm_file) { in unmap_single_vma()
1685 details->zap_flags : 0; in unmap_single_vma()
1695 * unmap_vmas - unmap a range of memory covered by a list of vma's
1712 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1723 /* Careful - we need to zap private pages too! */ in unmap_vmas()
1727 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, in unmap_vmas()
1737 } while ((vma = mas_find(mas, tree_end - 1)) != NULL); in unmap_vmas()
1742 * zap_page_range_single - remove user pages in a given range
1758 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in zap_page_range_single()
1761 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range_single()
1762 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
1765 * unmap 'address-end' not 'range.start-range.end' as range in zap_page_range_single()
1775 * zap_vma_ptes - remove ptes mapping the vma
1789 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
1831 return -EINVAL; in validate_page_before_insert()
1840 return -EBUSY; in insert_page_into_pte_locked()
1843 inc_mm_counter(vma->vm_mm, mm_counter_file(page)); in insert_page_into_pte_locked()
1845 set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot)); in insert_page_into_pte_locked()
1866 retval = -ENOMEM; in insert_page()
1867 pte = get_locked_pte(vma->vm_mm, addr, &ptl); in insert_page()
1882 return -EINVAL; in insert_page_in_batch_locked()
1898 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
1904 ret = -EFAULT; in insert_pages()
1910 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
1913 ret = -ENOMEM; in insert_pages()
1923 ret = -EFAULT; in insert_pages()
1932 remaining_pages_total -= pte_idx; in insert_pages()
1939 pages_to_write_in_pmd -= batch_size; in insert_pages()
1940 remaining_pages_total -= batch_size; in insert_pages()
1951 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
1968 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
1970 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
1971 return -EFAULT; in vm_insert_pages()
1972 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
1973 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
1974 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
1978 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
1983 * vm_insert_page - insert single page into user vma
2004 * Usually this function is called from f_op->mmap() handler
2005 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2007 * function from other places, for example from page-fault handler.
2014 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
2015 return -EFAULT; in vm_insert_page()
2017 return -EINVAL; in vm_insert_page()
2018 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
2019 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
2020 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
2023 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
2028 * __vm_map_pages - maps range of kernel pages into user vma
2042 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
2047 return -ENXIO; in __vm_map_pages()
2050 if (count > num - offset) in __vm_map_pages()
2051 return -ENXIO; in __vm_map_pages()
2064 * vm_map_pages - maps range of kernel pages starts with non zero offset
2084 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
2089 * vm_map_pages_zero - map range of kernel pages starts with zero offset
2111 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2163 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2170 * to override pgprot on a per-page basis.
2177 * pgprot typically only differs from @vma->vm_page_prot when drivers set
2178 * caching- and encryption bits different than those of @vma->vm_page_prot,
2179 * because the caching- or encryption mode may not be known at mmap() time.
2181 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2184 * functions that don't touch caching- or encryption bits, using pte_modify()
2187 * Also when new page-table entries are created, this is only done using the
2188 * fault() callback, and never using the value of vma->vm_page_prot,
2189 * except for page-table entries that point to anonymous pages as the result
2204 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2205 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2207 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2208 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2210 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2224 * vmf_insert_pfn - insert single pfn into user vma
2232 * This function should only be called from a vm_ops->fault handler, and
2246 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2253 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2267 pgprot_t pgprot = vma->vm_page_prot; in __vm_insert_mixed()
2272 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2302 if (err == -ENOMEM) in __vm_insert_mixed()
2304 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2332 * in null mappings (currently treated as "copy-on-access")
2344 return -ENOMEM; in remap_pte_range()
2349 err = -EACCES; in remap_pte_range()
2368 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2371 return -ENOMEM; in remap_pmd_range()
2391 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2394 return -ENOMEM; in remap_pud_range()
2413 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2416 return -ENOMEM; in remap_p4d_range()
2433 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range_internal()
2437 return -EINVAL; in remap_pfn_range_internal()
2452 * There's a horrible special case to handle copy-on-write in remap_pfn_range_internal()
2454 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range_internal()
2457 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range_internal()
2458 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range_internal()
2459 return -EINVAL; in remap_pfn_range_internal()
2460 vma->vm_pgoff = pfn; in remap_pfn_range_internal()
2466 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range_internal()
2482 * must have pre-validated the caching bits of the pgprot_t.
2494 * maintain page reference counts, and callers may free in remap_pfn_range_notrack()
2502 * remap_pfn_range - remap kernel memory to userspace
2520 return -EINVAL; in remap_pfn_range()
2530 * vm_iomap_memory - remap memory to userspace
2539 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2540 * whatever write-combining details or similar.
2550 return -EINVAL; in vm_iomap_memory()
2552 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2560 return -EINVAL; in vm_iomap_memory()
2563 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2564 return -EINVAL; in vm_iomap_memory()
2565 pfn += vma->vm_pgoff; in vm_iomap_memory()
2566 pages -= vma->vm_pgoff; in vm_iomap_memory()
2569 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2571 return -EINVAL; in vm_iomap_memory()
2574 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2592 return -ENOMEM; in apply_to_pte_range()
2598 return -EINVAL; in apply_to_pte_range()
2635 return -ENOMEM; in apply_to_pmd_range()
2644 return -EINVAL; in apply_to_pmd_range()
2671 return -ENOMEM; in apply_to_pud_range()
2680 return -EINVAL; in apply_to_pud_range()
2707 return -ENOMEM; in apply_to_p4d_range()
2716 return -EINVAL; in apply_to_p4d_range()
2742 return -EINVAL; in __apply_to_page_range()
2750 err = -EINVAL; in __apply_to_page_range()
2797 * read non-atomically. Before making any commitment, on those architectures
2808 spin_lock(vmf->ptl); in pte_unmap_same()
2809 same = pte_same(ptep_get(vmf->pte), vmf->orig_pte); in pte_unmap_same()
2810 spin_unlock(vmf->ptl); in pte_unmap_same()
2813 pte_unmap(vmf->pte); in pte_unmap_same()
2814 vmf->pte = NULL; in pte_unmap_same()
2821 * -EHWPOISON: copy failed due to hwpoison in source page
2822 * -EAGAIN: copied failed (some other reason)
2830 struct vm_area_struct *vma = vmf->vma; in __wp_page_copy_user()
2831 struct mm_struct *mm = vma->vm_mm; in __wp_page_copy_user()
2832 unsigned long addr = vmf->address; in __wp_page_copy_user()
2837 return -EHWPOISON; in __wp_page_copy_user()
2844 * a "struct page" for it. We do a best-effort copy by in __wp_page_copy_user()
2846 * fails, we just zero-fill it. Live with it. in __wp_page_copy_user()
2855 vmf->pte = NULL; in __wp_page_copy_user()
2856 if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { in __wp_page_copy_user()
2859 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
2860 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
2865 if (vmf->pte) in __wp_page_copy_user()
2866 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
2867 ret = -EAGAIN; in __wp_page_copy_user()
2871 entry = pte_mkyoung(vmf->orig_pte); in __wp_page_copy_user()
2872 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in __wp_page_copy_user()
2873 update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1); in __wp_page_copy_user()
2883 if (vmf->pte) in __wp_page_copy_user()
2886 /* Re-validate under PTL if the page is still mapped */ in __wp_page_copy_user()
2887 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
2888 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
2890 if (vmf->pte) in __wp_page_copy_user()
2891 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
2892 ret = -EAGAIN; in __wp_page_copy_user()
2903 * use-case in __wp_page_copy_user()
2914 if (vmf->pte) in __wp_page_copy_user()
2915 pte_unmap_unlock(vmf->pte, vmf->ptl); in __wp_page_copy_user()
2924 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
2927 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
2945 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
2947 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
2949 if (vmf->vma->vm_file && in do_page_mkwrite()
2950 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
2953 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
2955 vmf->flags = old_flags; in do_page_mkwrite()
2960 if (!folio->mapping) { in do_page_mkwrite()
2977 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
2979 struct folio *folio = page_folio(vmf->page); in fault_dirty_shared_page()
2981 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
2986 * Take a local copy of the address_space - folio.mapping may be zeroed in fault_dirty_shared_page()
2988 * pinned by vma->vm_file's reference. We rely on folio_unlock()'s in fault_dirty_shared_page()
2995 file_update_time(vma->vm_file); in fault_dirty_shared_page()
3024 * or due to us being the last reference standing to the page. In either
3026 * any related book-keeping.
3029 __releases(vmf->ptl) in wp_page_reuse()
3031 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
3032 struct page *page = vmf->page; in wp_page_reuse()
3035 VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); in wp_page_reuse()
3044 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
3046 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
3047 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
3049 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
3050 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_reuse()
3051 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
3064 * - Allocate a page, copy the content of the old page to the new one.
3065 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3066 * - Take the PTL. If the pte changed, bail out and release the allocated page
3067 * - If the pte is still the way we remember it, update the page table and all
3068 * relevant references. This includes dropping the reference the page-table
3070 * - In any case, unlock the PTL and drop the reference we took to the old page.
3074 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_page_copy()
3075 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
3076 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
3086 if (vmf->page) in wp_page_copy()
3087 old_folio = page_folio(vmf->page); in wp_page_copy()
3091 if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { in wp_page_copy()
3092 new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address); in wp_page_copy()
3097 vmf->address, false); in wp_page_copy()
3101 ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); in wp_page_copy()
3105 * it's fine. If not, userspace would re-fault on in wp_page_copy()
3108 * The -EHWPOISON case will not be retried. in wp_page_copy()
3115 return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; in wp_page_copy()
3117 kmsan_copy_page_meta(&new_folio->page, vmf->page); in wp_page_copy()
3127 vmf->address & PAGE_MASK, in wp_page_copy()
3128 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
3132 * Re-check the pte - we dropped the lock in wp_page_copy()
3134 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
3135 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in wp_page_copy()
3138 dec_mm_counter(mm, mm_counter_file(&old_folio->page)); in wp_page_copy()
3142 ksm_might_unmap_zero_page(mm, vmf->orig_pte); in wp_page_copy()
3145 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
3146 entry = mk_pte(&new_folio->page, vma->vm_page_prot); in wp_page_copy()
3149 if (pte_soft_dirty(vmf->orig_pte)) in wp_page_copy()
3151 if (pte_uffd_wp(vmf->orig_pte)) in wp_page_copy()
3164 ptep_clear_flush(vma, vmf->address, vmf->pte); in wp_page_copy()
3165 folio_add_new_anon_rmap(new_folio, vma, vmf->address); in wp_page_copy()
3173 set_pte_at_notify(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
3174 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_copy()
3198 page_remove_rmap(vmf->page, vma, false); in wp_page_copy()
3201 /* Free the old page.. */ in wp_page_copy()
3204 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3205 } else if (vmf->pte) { in wp_page_copy()
3206 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
3207 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3216 free_swap_cache(&old_folio->page); in wp_page_copy()
3233 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3239 * shared mapping due to PTE being read-only once the mapped page is prepared.
3250 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3251 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3252 &vmf->ptl); in finish_mkwrite_fault()
3253 if (!vmf->pte) in finish_mkwrite_fault()
3259 if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) { in finish_mkwrite_fault()
3260 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3261 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3274 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3276 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3279 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3280 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in wp_pfn_shared()
3281 vma_end_read(vmf->vma); in wp_pfn_shared()
3285 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3286 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3296 __releases(vmf->ptl) in wp_page_shared()
3298 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3303 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3306 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3307 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in wp_page_shared()
3309 vma_end_read(vmf->vma); in wp_page_shared()
3342 * shared-page counter for the old page.
3345 * done by the caller (the low-level page fault routine in most cases).
3353 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3358 __releases(vmf->ptl) in do_wp_page()
3360 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_wp_page()
3361 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3365 if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) { in do_wp_page()
3366 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3371 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3374 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3375 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3376 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3379 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3381 if (vmf->page) in do_wp_page()
3382 folio = page_folio(vmf->page); in do_wp_page()
3388 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in do_wp_page()
3394 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3396 if (!vmf->page) in do_wp_page()
3410 if (PageAnonExclusive(vmf->page)) in do_wp_page()
3443 page_move_anon_rmap(vmf->page, vma); in do_wp_page()
3447 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3454 if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) { in do_wp_page()
3455 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3456 vma_end_read(vmf->vma); in do_wp_page()
3466 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3478 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3490 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3491 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3496 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3497 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3503 * unmap_mapping_folio() - Unmap single folio from processes.
3515 struct address_space *mapping = folio->mapping; in unmap_mapping_folio()
3522 first_index = folio->index; in unmap_mapping_folio()
3523 last_index = folio_next_index(folio) - 1; in unmap_mapping_folio()
3530 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_folio()
3531 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_folio()
3537 * unmap_mapping_pages() - Unmap pages from processes.
3553 pgoff_t last_index = start + nr - 1; in unmap_mapping_pages()
3560 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3561 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_pages()
3568 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3588 pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3593 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3595 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3607 struct folio *folio = page_folio(vmf->page); in remove_device_exclusive_entry()
3608 struct vm_area_struct *vma = vmf->vma; in remove_device_exclusive_entry()
3614 * the PTL so a racing thread can remove the device-exclusive in remove_device_exclusive_entry()
3615 * entry and unmap it. If the folio is free the entry must in remove_device_exclusive_entry()
3617 * been re-allocated after being freed all we do is lock and in remove_device_exclusive_entry()
3629 vma->vm_mm, vmf->address & PAGE_MASK, in remove_device_exclusive_entry()
3630 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); in remove_device_exclusive_entry()
3633 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in remove_device_exclusive_entry()
3634 &vmf->ptl); in remove_device_exclusive_entry()
3635 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in remove_device_exclusive_entry()
3636 restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte); in remove_device_exclusive_entry()
3638 if (vmf->pte) in remove_device_exclusive_entry()
3639 pte_unmap_unlock(vmf->pte, vmf->ptl); in remove_device_exclusive_entry()
3653 if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) || in should_try_to_free_swap()
3668 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in pte_marker_clear()
3669 vmf->address, &vmf->ptl); in pte_marker_clear()
3670 if (!vmf->pte) in pte_marker_clear()
3673 * Be careful so that we will only recover a special uffd-wp pte into a in pte_marker_clear()
3680 if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) in pte_marker_clear()
3681 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); in pte_marker_clear()
3682 pte_unmap_unlock(vmf->pte, vmf->ptl); in pte_marker_clear()
3688 if (vma_is_anonymous(vmf->vma)) in do_pte_missing()
3695 * This is actually a page-missing access, but with uffd-wp special pte
3696 * installed. It means this pte was wr-protected before being unmapped.
3702 * got unregistered - we can simply clear them. in pte_marker_handle_uffd_wp()
3704 if (unlikely(!userfaultfd_wp(vmf->vma))) in pte_marker_handle_uffd_wp()
3712 swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); in handle_pte_marker()
3722 /* Higher priority than uffd-wp when data corrupted */ in handle_pte_marker()
3734 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3743 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
3758 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
3761 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
3762 vmf->address); in do_swap_page()
3764 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
3767 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_swap_page()
3777 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
3778 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3779 vmf->address, &vmf->ptl); in do_swap_page()
3780 if (unlikely(!vmf->pte || in do_swap_page()
3781 !pte_same(ptep_get(vmf->pte), in do_swap_page()
3782 vmf->orig_pte))) in do_swap_page()
3789 get_page(vmf->page); in do_swap_page()
3790 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3791 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
3792 put_page(vmf->page); in do_swap_page()
3798 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
3809 folio = swap_cache_get_folio(entry, vma, vmf->address); in do_swap_page()
3815 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
3820 * finish swapin first, free the entry, and swapout in do_swap_page()
3833 vma, vmf->address, false); in do_swap_page()
3834 page = &folio->page; in do_swap_page()
3840 vma->vm_mm, GFP_KERNEL, in do_swap_page()
3854 folio->swap = entry; in do_swap_page()
3856 folio->private = NULL; in do_swap_page()
3871 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3872 vmf->address, &vmf->ptl); in do_swap_page()
3873 if (likely(vmf->pte && in do_swap_page()
3874 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
3882 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
3910 * page->index of !PageKSM() pages would be nonlinear inside the in do_swap_page()
3911 * anon VMA -- PageKSM() is lost on actual swapout. in do_swap_page()
3913 page = ksm_might_need_to_copy(page, vma, vmf->address); in do_swap_page()
3917 } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) { in do_swap_page()
3929 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && in do_swap_page()
3939 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
3940 &vmf->ptl); in do_swap_page()
3941 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
3965 exclusive = pte_swp_exclusive(vmf->orig_pte); in do_swap_page()
3969 * swapcache -> certainly exclusive. in do_swap_page()
3973 data_race(si->flags & SWP_STABLE_WRITES)) { in do_swap_page()
4004 * Remove the swap entry and conditionally try to free up the swapcache. in do_swap_page()
4009 if (should_try_to_free_swap(folio, vma, vmf->flags)) in do_swap_page()
4012 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in do_swap_page()
4013 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in do_swap_page()
4014 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
4024 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4026 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
4031 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
4033 if (pte_swp_uffd_wp(vmf->orig_pte)) in do_swap_page()
4035 vmf->orig_pte = pte; in do_swap_page()
4039 page_add_new_anon_rmap(page, vma, vmf->address); in do_swap_page()
4042 page_add_anon_rmap(page, vma, vmf->address, rmap_flags); in do_swap_page()
4047 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_swap_page()
4048 arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); in do_swap_page()
4064 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4071 /* No need to invalidate - it was non-present before */ in do_swap_page()
4072 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in do_swap_page()
4074 if (vmf->pte) in do_swap_page()
4075 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4084 if (vmf->pte) in do_swap_page()
4085 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4102 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4109 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
4114 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
4115 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
4122 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
4125 /* Use the zero-page for reads */ in do_anonymous_page()
4126 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
4127 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
4128 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
4129 vma->vm_page_prot)); in do_anonymous_page()
4130 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
4131 vmf->address, &vmf->ptl); in do_anonymous_page()
4132 if (!vmf->pte) in do_anonymous_page()
4135 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4138 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4143 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4152 folio = vma_alloc_zeroed_movable_folio(vma, vmf->address); in do_anonymous_page()
4156 if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL)) in do_anonymous_page()
4167 entry = mk_pte(&folio->page, vma->vm_page_prot); in do_anonymous_page()
4169 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
4172 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_anonymous_page()
4173 &vmf->ptl); in do_anonymous_page()
4174 if (!vmf->pte) in do_anonymous_page()
4177 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4181 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4187 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4192 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in do_anonymous_page()
4193 folio_add_new_anon_rmap(folio, vma, vmf->address); in do_anonymous_page()
4198 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in do_anonymous_page()
4200 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
4201 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in do_anonymous_page()
4203 if (vmf->pte) in do_anonymous_page()
4204 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4217 * released depending on flags and vma->vm_ops->fault() return value.
4222 struct vm_area_struct *vma = vmf->vma; in __do_fault()
4240 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
4241 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
4242 if (!vmf->prealloc_pte) in __do_fault()
4246 ret = vma->vm_ops->fault(vmf); in __do_fault()
4251 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
4252 struct page *page = vmf->page; in __do_fault()
4257 page->index, 1, false); in __do_fault()
4264 vmf->page = NULL; in __do_fault()
4269 lock_page(vmf->page); in __do_fault()
4271 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); in __do_fault()
4279 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
4281 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
4286 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
4287 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
4292 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
4293 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
4294 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
4301 * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any in do_set_pmd()
4304 if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) in do_set_pmd()
4327 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
4328 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
4329 if (!vmf->prealloc_pte) in do_set_pmd()
4333 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
4334 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
4339 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
4343 add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); in do_set_pmd()
4352 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
4354 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
4360 spin_unlock(vmf->ptl); in do_set_pmd()
4371 * set_pte_range - Set a range of PTEs to point to pages in a folio.
4381 struct vm_area_struct *vma = vmf->vma; in set_pte_range()
4383 bool write = vmf->flags & FAULT_FLAG_WRITE; in set_pte_range()
4384 bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); in set_pte_range()
4388 entry = mk_pte(page, vma->vm_page_prot); in set_pte_range()
4399 /* copy-on-write page */ in set_pte_range()
4400 if (write && !(vma->vm_flags & VM_SHARED)) { in set_pte_range()
4401 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr); in set_pte_range()
4406 add_mm_counter(vma->vm_mm, mm_counter_file(page), nr); in set_pte_range()
4409 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr); in set_pte_range()
4411 /* no need to invalidate: a not-present page won't be cached */ in set_pte_range()
4412 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr); in set_pte_range()
4417 if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID) in vmf_pte_changed()
4418 return !pte_same(ptep_get(vmf->pte), vmf->orig_pte); in vmf_pte_changed()
4420 return !pte_none(ptep_get(vmf->pte)); in vmf_pte_changed()
4424 * finish_fault - finish page fault once we have prepared the page to fault
4440 struct vm_area_struct *vma = vmf->vma; in finish_fault()
4445 if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) in finish_fault()
4446 page = vmf->cow_page; in finish_fault()
4448 page = vmf->page; in finish_fault()
4454 if (!(vma->vm_flags & VM_SHARED)) { in finish_fault()
4455 ret = check_stable_address_space(vma->vm_mm); in finish_fault()
4460 if (pmd_none(*vmf->pmd)) { in finish_fault()
4467 if (vmf->prealloc_pte) in finish_fault()
4468 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); in finish_fault()
4469 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) in finish_fault()
4473 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in finish_fault()
4474 vmf->address, &vmf->ptl); in finish_fault()
4475 if (!vmf->pte) in finish_fault()
4478 /* Re-check under ptl */ in finish_fault()
4482 set_pte_range(vmf, folio, page, 1, vmf->address); in finish_fault()
4485 update_mmu_tlb(vma, vmf->address, vmf->pte); in finish_fault()
4489 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
4510 return -EINVAL; in fault_around_bytes_set()
4513 * The minimum value is 1 page, however this results in no fault-around in fault_around_bytes_set()
4537 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4538 * not ready to be mapped: not up-to-date, locked, etc.
4555 pgoff_t pte_off = pte_index(vmf->address); in do_fault_around()
4556 /* The page offset of vmf->address within the VMA. */ in do_fault_around()
4557 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in do_fault_around()
4563 pte_off - min(pte_off, vma_off)); in do_fault_around()
4567 pte_off + vma_pages(vmf->vma) - vma_off) - 1; in do_fault_around()
4569 if (pmd_none(*vmf->pmd)) { in do_fault_around()
4570 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
4571 if (!vmf->prealloc_pte) in do_fault_around()
4576 ret = vmf->vma->vm_ops->map_pages(vmf, in do_fault_around()
4577 vmf->pgoff + from_pte - pte_off, in do_fault_around()
4578 vmf->pgoff + to_pte - pte_off); in do_fault_around()
4584 /* Return true if we should do read fault-around, false otherwise */
4587 /* No ->map_pages? No way to fault around... */ in should_fault_around()
4588 if (!vmf->vma->vm_ops->map_pages) in should_fault_around()
4591 if (uffd_disable_fault_around(vmf->vma)) in should_fault_around()
4604 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
4614 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_read_fault()
4615 vma_end_read(vmf->vma); in do_read_fault()
4624 folio = page_folio(vmf->page); in do_read_fault()
4633 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
4636 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_cow_fault()
4644 vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); in do_cow_fault()
4645 if (!vmf->cow_page) in do_cow_fault()
4648 if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, in do_cow_fault()
4650 put_page(vmf->cow_page); in do_cow_fault()
4653 folio_throttle_swaprate(page_folio(vmf->cow_page), GFP_KERNEL); in do_cow_fault()
4661 copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); in do_cow_fault()
4662 __SetPageUptodate(vmf->cow_page); in do_cow_fault()
4665 unlock_page(vmf->page); in do_cow_fault()
4666 put_page(vmf->page); in do_cow_fault()
4671 put_page(vmf->cow_page); in do_cow_fault()
4677 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
4681 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_shared_fault()
4690 folio = page_folio(vmf->page); in do_shared_fault()
4696 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
4719 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4728 struct vm_area_struct *vma = vmf->vma; in do_fault()
4729 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
4735 if (!vma->vm_ops->fault) { in do_fault()
4736 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in do_fault()
4737 vmf->address, &vmf->ptl); in do_fault()
4738 if (unlikely(!vmf->pte)) in do_fault()
4748 if (unlikely(pte_none(ptep_get(vmf->pte)))) in do_fault()
4753 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
4755 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
4757 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
4762 /* preallocated pagetable is unused: free it */ in do_fault()
4763 if (vmf->prealloc_pte) { in do_fault()
4764 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
4765 vmf->prealloc_pte = NULL; in do_fault()
4789 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
4803 spin_lock(vmf->ptl); in do_numa_page()
4804 if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in do_numa_page()
4805 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4810 old_pte = ptep_get(vmf->pte); in do_numa_page()
4811 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4819 can_change_pte_writable(vma, vmf->address, pte)) in do_numa_page()
4822 page = vm_normal_page(vma, vmf->address, pte); in do_numa_page()
4826 /* TODO: handle PTE-mapped THP */ in do_numa_page()
4845 if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) in do_numa_page()
4855 last_cpupid = (-1 & LAST_CPUPID_MASK); in do_numa_page()
4858 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, in do_numa_page()
4864 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4876 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_numa_page()
4877 vmf->address, &vmf->ptl); in do_numa_page()
4878 if (unlikely(!vmf->pte)) in do_numa_page()
4880 if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in do_numa_page()
4881 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4887 * non-accessible ptes, some can allow access by kernel mode. in do_numa_page()
4889 old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); in do_numa_page()
4890 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4894 ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); in do_numa_page()
4895 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in do_numa_page()
4896 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4905 struct vm_area_struct *vma = vmf->vma; in create_huge_pmd()
4908 if (vma->vm_ops->huge_fault) in create_huge_pmd()
4909 return vma->vm_ops->huge_fault(vmf, PMD_ORDER); in create_huge_pmd()
4916 struct vm_area_struct *vma = vmf->vma; in wp_huge_pmd()
4917 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_huge_pmd()
4922 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) in wp_huge_pmd()
4927 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pmd()
4928 if (vma->vm_ops->huge_fault) { in wp_huge_pmd()
4929 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); in wp_huge_pmd()
4935 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
4936 __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
4945 struct vm_area_struct *vma = vmf->vma; in create_huge_pud()
4949 if (vma->vm_ops->huge_fault) in create_huge_pud()
4950 return vma->vm_ops->huge_fault(vmf, PUD_ORDER); in create_huge_pud()
4959 struct vm_area_struct *vma = vmf->vma; in wp_huge_pud()
4965 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pud()
4966 if (vma->vm_ops->huge_fault) { in wp_huge_pud()
4967 ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); in wp_huge_pud()
4973 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
4974 __split_huge_pud(vma, vmf->pud, vmf->address); in wp_huge_pud()
4988 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4998 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
5000 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
5005 vmf->pte = NULL; in handle_pte_fault()
5006 vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5014 vmf->pte = pte_offset_map_nolock(vmf->vma->vm_mm, vmf->pmd, in handle_pte_fault()
5015 vmf->address, &vmf->ptl); in handle_pte_fault()
5016 if (unlikely(!vmf->pte)) in handle_pte_fault()
5018 vmf->orig_pte = ptep_get_lockless(vmf->pte); in handle_pte_fault()
5019 vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5021 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
5022 pte_unmap(vmf->pte); in handle_pte_fault()
5023 vmf->pte = NULL; in handle_pte_fault()
5027 if (!vmf->pte) in handle_pte_fault()
5030 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
5033 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
5036 spin_lock(vmf->ptl); in handle_pte_fault()
5037 entry = vmf->orig_pte; in handle_pte_fault()
5038 if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) { in handle_pte_fault()
5039 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
5042 if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { in handle_pte_fault()
5045 else if (likely(vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
5049 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
5050 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
5051 update_mmu_cache_range(vmf, vmf->vma, vmf->address, in handle_pte_fault()
5052 vmf->pte, 1); in handle_pte_fault()
5055 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
5063 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
5064 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, in handle_pte_fault()
5065 vmf->pte); in handle_pte_fault()
5068 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
5089 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
5090 unsigned long vm_flags = vma->vm_flags; in __handle_mm_fault()
5173 * mm_account_fault - Do page fault accounting
5176 * of perf event counters, but we'll still do the per-task accounting to
5185 * still be in per-arch page fault handlers at the entry of page fault.
5222 current->maj_flt++; in mm_account_fault()
5224 current->min_flt++; in mm_account_fault()
5244 current->in_lru_fault = vma_has_recency(vma); in lru_gen_enter_fault()
5249 current->in_lru_fault = false; in lru_gen_exit_fault()
5269 * just treat it like an ordinary read-fault otherwise. in sanitize_fault_flags()
5271 if (!is_cow_mapping(vma->vm_flags)) in sanitize_fault_flags()
5274 /* Write faults on read-only mappings are impossible ... */ in sanitize_fault_flags()
5275 if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE))) in sanitize_fault_flags()
5278 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) && in sanitize_fault_flags()
5279 !is_cow_mapping(vma->vm_flags))) in sanitize_fault_flags()
5284 * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of in sanitize_fault_flags()
5306 struct mm_struct *mm = vma->vm_mm; in handle_mm_fault()
5332 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
5380 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but in mmap_upgrade_trylock()
5425 if (likely(vma && (vma->vm_start <= addr))) in lock_mm_and_find_vma()
5432 if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { in lock_mm_and_find_vma()
5443 * re-take it, and also look up the vma again, in lock_mm_and_find_vma()
5444 * re-checking it. in lock_mm_and_find_vma()
5453 if (vma->vm_start <= addr) in lock_mm_and_find_vma()
5455 if (!(vma->vm_flags & VM_GROWSDOWN)) in lock_mm_and_find_vma()
5481 MA_STATE(mas, &mm->mm_mt, address, address); in lock_vma_under_rcu()
5499 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) in lock_vma_under_rcu()
5503 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in lock_vma_under_rcu()
5507 if (vma->detached) { in lock_vma_under_rcu()
5529 * We've already handled the fast-path in-line.
5535 return -ENOMEM; in __p4d_alloc()
5537 spin_lock(&mm->page_table_lock); in __p4d_alloc()
5544 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
5552 * We've already handled the fast-path in-line.
5558 return -ENOMEM; in __pud_alloc()
5560 spin_lock(&mm->page_table_lock); in __pud_alloc()
5567 spin_unlock(&mm->page_table_lock); in __pud_alloc()
5575 * We've already handled the fast-path in-line.
5582 return -ENOMEM; in __pmd_alloc()
5598 * follow_pte - look up PTE at a user virtual address
5614 * it is not a good general-purpose API.
5616 * Return: zero on success, -ve otherwise.
5652 return -EINVAL; in follow_pte()
5657 * follow_pfn - look up PFN at a user virtual address
5667 * Return: zero and the pfn at @pfn on success, -ve otherwise.
5672 int ret = -EINVAL; in follow_pfn()
5676 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfn()
5679 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); in follow_pfn()
5693 int ret = -EINVAL; in follow_phys()
5697 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_phys()
5700 if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) in follow_phys()
5722 * generic_access_phys - generic implementation for iomem mmap access
5742 int ret = -EINVAL; in generic_access_phys()
5744 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in generic_access_phys()
5745 return -EINVAL; in generic_access_phys()
5748 if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) in generic_access_phys()
5749 return -EINVAL; in generic_access_phys()
5757 return -EINVAL; in generic_access_phys()
5761 return -ENOMEM; in generic_access_phys()
5763 if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) in generic_access_phys()
5822 return buf - old_buf; in __access_remote_vm()
5835 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
5836 bytes = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
5843 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
5844 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
5845 bytes = PAGE_SIZE-offset; in __access_remote_vm()
5859 len -= bytes; in __access_remote_vm()
5865 return buf - old_buf; in __access_remote_vm()
5869 * access_remote_vm - access another process' address space
5914 struct mm_struct *mm = current->mm; in print_vma_addr()
5924 if (vma && vma->vm_file) { in print_vma_addr()
5925 struct file *f = vma->vm_file; in print_vma_addr()
5934 vma->vm_start, in print_vma_addr()
5935 vma->vm_end - vma->vm_start); in print_vma_addr()
5949 if (current->mm) in __might_fault()
5950 might_lock_read(&current->mm->mmap_lock); in __might_fault()
5969 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in process_huge_page()
5973 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
5979 for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { in process_huge_page()
5987 base = pages_per_huge_page - 2 * (pages_per_huge_page - n); in process_huge_page()
5988 l = pages_per_huge_page - n; in process_huge_page()
5998 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
6003 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
6044 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in clear_huge_page()
6071 return -EHWPOISON; in copy_user_gigantic_page()
6087 if (copy_mc_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, in copy_subpage()
6088 addr, copy_arg->vma)) { in copy_subpage()
6089 memory_failure_queue(page_to_pfn(copy_arg->src + idx), 0); in copy_subpage()
6090 return -EHWPOISON; in copy_subpage()
6100 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in copy_user_large_folio()
6102 .dst = &dst->page, in copy_user_large_folio()
6103 .src = &src->page, in copy_user_large_folio()
6134 ret_val -= (PAGE_SIZE - rc); in copy_folio_from_user()
6152 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
6163 ptdesc->ptl = ptl; in ptlock_alloc()
6169 kmem_cache_free(page_ptl_cachep, ptdesc->ptl); in ptlock_free()