hugetlb.c (57a30218fa25c469ed507964bbf028b7a064309a) hugetlb.c (7d4a8be0c4b2b7ffb367929d2b352651f083806b)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Generic hugetlb support.
4 * (C) Nadia Yvette Chambers, April 2004
5 */
6#include <linux/list.h>
7#include <linux/init.h>
8#include <linux/mm.h>

--- 246 unchanged lines hidden (view full) ---

255static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
256{
257 return subpool_inode(file_inode(vma->vm_file));
258}
259
260/*
261 * hugetlb vma_lock helper routines
262 */
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Generic hugetlb support.
4 * (C) Nadia Yvette Chambers, April 2004
5 */
6#include <linux/list.h>
7#include <linux/init.h>
8#include <linux/mm.h>

--- 246 unchanged lines hidden (view full) ---

255static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
256{
257 return subpool_inode(file_inode(vma->vm_file));
258}
259
260/*
261 * hugetlb vma_lock helper routines
262 */
263static bool __vma_shareable_lock(struct vm_area_struct *vma)
264{
265 return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
266 vma->vm_private_data;
267}
268
269void hugetlb_vma_lock_read(struct vm_area_struct *vma)
270{
271 if (__vma_shareable_lock(vma)) {
272 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
273
274 down_read(&vma_lock->rw_sema);
275 }
276}

--- 1210 unchanged lines hidden (view full) ---

1487 for (i = 1; i < nr_pages; i++) {
1488 p = folio_page(folio, i);
1489 p->mapping = NULL;
1490 clear_compound_head(p);
1491 if (!demote)
1492 set_page_refcounted(p);
1493 }
1494
263void hugetlb_vma_lock_read(struct vm_area_struct *vma)
264{
265 if (__vma_shareable_lock(vma)) {
266 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
267
268 down_read(&vma_lock->rw_sema);
269 }
270}

--- 1210 unchanged lines hidden (view full) ---

1481 for (i = 1; i < nr_pages; i++) {
1482 p = folio_page(folio, i);
1483 p->mapping = NULL;
1484 clear_compound_head(p);
1485 if (!demote)
1486 set_page_refcounted(p);
1487 }
1488
1495 folio_set_compound_order(folio, 0);
1489 folio_set_order(folio, 0);
1496 __folio_clear_head(folio);
1497}
1498
1499static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
1500 unsigned int order)
1501{
1502 __destroy_compound_gigantic_folio(folio, order, true);
1503}

--- 447 unchanged lines hidden (view full) ---

1951{
1952 int i, j;
1953 int nr_pages = 1 << order;
1954 struct page *p;
1955
1956 __folio_clear_reserved(folio);
1957 __folio_set_head(folio);
1958 /* we rely on prep_new_hugetlb_folio to set the destructor */
1490 __folio_clear_head(folio);
1491}
1492
1493static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
1494 unsigned int order)
1495{
1496 __destroy_compound_gigantic_folio(folio, order, true);
1497}

--- 447 unchanged lines hidden (view full) ---

1945{
1946 int i, j;
1947 int nr_pages = 1 << order;
1948 struct page *p;
1949
1950 __folio_clear_reserved(folio);
1951 __folio_set_head(folio);
1952 /* we rely on prep_new_hugetlb_folio to set the destructor */
1959 folio_set_compound_order(folio, order);
1953 folio_set_order(folio, order);
1960 for (i = 0; i < nr_pages; i++) {
1961 p = folio_page(folio, i);
1962
1963 /*
1964 * For gigantic hugepages allocated through bootmem at
1965 * boot, it's safer to be consistent with the not-gigantic
1966 * hugepages and clear the PG_reserved bit from all tail pages
1967 * too. Otherwise drivers using get_user_pages() to access tail

--- 47 unchanged lines hidden (view full) ---

2015 clear_compound_head(p);
2016 set_page_refcounted(p);
2017 }
2018 /* need to clear PG_reserved on remaining tail pages */
2019 for (; j < nr_pages; j++) {
2020 p = folio_page(folio, j);
2021 __ClearPageReserved(p);
2022 }
1954 for (i = 0; i < nr_pages; i++) {
1955 p = folio_page(folio, i);
1956
1957 /*
1958 * For gigantic hugepages allocated through bootmem at
1959 * boot, it's safer to be consistent with the not-gigantic
1960 * hugepages and clear the PG_reserved bit from all tail pages
1961 * too. Otherwise drivers using get_user_pages() to access tail

--- 47 unchanged lines hidden (view full) ---

2009 clear_compound_head(p);
2010 set_page_refcounted(p);
2011 }
2012 /* need to clear PG_reserved on remaining tail pages */
2013 for (; j < nr_pages; j++) {
2014 p = folio_page(folio, j);
2015 __ClearPageReserved(p);
2016 }
2023 folio_set_compound_order(folio, 0);
2017 folio_set_order(folio, 0);
2024 __folio_clear_head(folio);
2025 return false;
2026}
2027
2028static bool prep_compound_gigantic_folio(struct folio *folio,
2029 unsigned int order)
2030{
2031 return __prep_compound_gigantic_folio(folio, order, false);

--- 2935 unchanged lines hidden (view full) ---

4967 struct hstate *h = hstate_vma(src_vma);
4968 unsigned long sz = huge_page_size(h);
4969 unsigned long npages = pages_per_huge_page(h);
4970 struct mmu_notifier_range range;
4971 unsigned long last_addr_mask;
4972 int ret = 0;
4973
4974 if (cow) {
2018 __folio_clear_head(folio);
2019 return false;
2020}
2021
2022static bool prep_compound_gigantic_folio(struct folio *folio,
2023 unsigned int order)
2024{
2025 return __prep_compound_gigantic_folio(folio, order, false);

--- 2935 unchanged lines hidden (view full) ---

4961 struct hstate *h = hstate_vma(src_vma);
4962 unsigned long sz = huge_page_size(h);
4963 unsigned long npages = pages_per_huge_page(h);
4964 struct mmu_notifier_range range;
4965 unsigned long last_addr_mask;
4966 int ret = 0;
4967
4968 if (cow) {
4975 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src,
4969 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src,
4976 src_vma->vm_start,
4977 src_vma->vm_end);
4978 mmu_notifier_invalidate_range_start(&range);
4979 mmap_assert_write_locked(src);
4980 raw_write_seqcount_begin(&src->write_protect_seq);
4981 } else {
4982 /*
4983 * For shared mappings the vma lock must be held before
4970 src_vma->vm_start,
4971 src_vma->vm_end);
4972 mmu_notifier_invalidate_range_start(&range);
4973 mmap_assert_write_locked(src);
4974 raw_write_seqcount_begin(&src->write_protect_seq);
4975 } else {
4976 /*
4977 * For shared mappings the vma lock must be held before
4984 * calling huge_pte_offset in the src vma. Otherwise, the
4978 * calling hugetlb_walk() in the src vma. Otherwise, the
4985 * returned ptep could go away if part of a shared pmd and
4986 * another thread calls huge_pmd_unshare.
4987 */
4988 hugetlb_vma_lock_read(src_vma);
4989 }
4990
4991 last_addr_mask = hugetlb_mask_last_page(h);
4992 for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
4993 spinlock_t *src_ptl, *dst_ptl;
4979 * returned ptep could go away if part of a shared pmd and
4980 * another thread calls huge_pmd_unshare.
4981 */
4982 hugetlb_vma_lock_read(src_vma);
4983 }
4984
4985 last_addr_mask = hugetlb_mask_last_page(h);
4986 for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
4987 spinlock_t *src_ptl, *dst_ptl;
4994 src_pte = huge_pte_offset(src, addr, sz);
4988 src_pte = hugetlb_walk(src_vma, addr, sz);
4995 if (!src_pte) {
4996 addr |= last_addr_mask;
4997 continue;
4998 }
4999 dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
5000 if (!dst_pte) {
5001 ret = -ENOMEM;
5002 break;

--- 43 unchanged lines hidden (view full) ---

5046 if (userfaultfd_wp(src_vma) && uffd_wp)
5047 entry = huge_pte_mkuffd_wp(entry);
5048 set_huge_pte_at(src, addr, src_pte, entry);
5049 }
5050 if (!userfaultfd_wp(dst_vma) && uffd_wp)
5051 entry = huge_pte_clear_uffd_wp(entry);
5052 set_huge_pte_at(dst, addr, dst_pte, entry);
5053 } else if (unlikely(is_pte_marker(entry))) {
4989 if (!src_pte) {
4990 addr |= last_addr_mask;
4991 continue;
4992 }
4993 dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
4994 if (!dst_pte) {
4995 ret = -ENOMEM;
4996 break;

--- 43 unchanged lines hidden (view full) ---

5040 if (userfaultfd_wp(src_vma) && uffd_wp)
5041 entry = huge_pte_mkuffd_wp(entry);
5042 set_huge_pte_at(src, addr, src_pte, entry);
5043 }
5044 if (!userfaultfd_wp(dst_vma) && uffd_wp)
5045 entry = huge_pte_clear_uffd_wp(entry);
5046 set_huge_pte_at(dst, addr, dst_pte, entry);
5047 } else if (unlikely(is_pte_marker(entry))) {
5048 /* No swap on hugetlb */
5049 WARN_ON_ONCE(
5050 is_swapin_error_entry(pte_to_swp_entry(entry)));
5054 /*
5055 * We copy the pte marker only if the dst vma has
5056 * uffd-wp enabled.
5057 */
5058 if (userfaultfd_wp(dst_vma))
5059 set_huge_pte_at(dst, addr, dst_pte, entry);
5060 } else {
5061 entry = huge_ptep_get(src_pte);

--- 113 unchanged lines hidden (view full) ---

5175 unsigned long sz = huge_page_size(h);
5176 struct mm_struct *mm = vma->vm_mm;
5177 unsigned long old_end = old_addr + len;
5178 unsigned long last_addr_mask;
5179 pte_t *src_pte, *dst_pte;
5180 struct mmu_notifier_range range;
5181 bool shared_pmd = false;
5182
5051 /*
5052 * We copy the pte marker only if the dst vma has
5053 * uffd-wp enabled.
5054 */
5055 if (userfaultfd_wp(dst_vma))
5056 set_huge_pte_at(dst, addr, dst_pte, entry);
5057 } else {
5058 entry = huge_ptep_get(src_pte);

--- 113 unchanged lines hidden (view full) ---

5172 unsigned long sz = huge_page_size(h);
5173 struct mm_struct *mm = vma->vm_mm;
5174 unsigned long old_end = old_addr + len;
5175 unsigned long last_addr_mask;
5176 pte_t *src_pte, *dst_pte;
5177 struct mmu_notifier_range range;
5178 bool shared_pmd = false;
5179
5183 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr,
5180 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr,
5184 old_end);
5185 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
5186 /*
5187 * In case of shared PMDs, we should cover the maximum possible
5188 * range.
5189 */
5190 flush_cache_range(vma, range.start, range.end);
5191
5192 mmu_notifier_invalidate_range_start(&range);
5193 last_addr_mask = hugetlb_mask_last_page(h);
5194 /* Prevent race with file truncation */
5195 hugetlb_vma_lock_write(vma);
5196 i_mmap_lock_write(mapping);
5197 for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
5181 old_end);
5182 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
5183 /*
5184 * In case of shared PMDs, we should cover the maximum possible
5185 * range.
5186 */
5187 flush_cache_range(vma, range.start, range.end);
5188
5189 mmu_notifier_invalidate_range_start(&range);
5190 last_addr_mask = hugetlb_mask_last_page(h);
5191 /* Prevent race with file truncation */
5192 hugetlb_vma_lock_write(vma);
5193 i_mmap_lock_write(mapping);
5194 for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
5198 src_pte = huge_pte_offset(mm, old_addr, sz);
5195 src_pte = hugetlb_walk(vma, old_addr, sz);
5199 if (!src_pte) {
5200 old_addr |= last_addr_mask;
5201 new_addr |= last_addr_mask;
5202 continue;
5203 }
5204 if (huge_pte_none(huge_ptep_get(src_pte)))
5205 continue;
5206

--- 46 unchanged lines hidden (view full) ---

5253 * to huge page.
5254 */
5255 tlb_change_page_size(tlb, sz);
5256 tlb_start_vma(tlb, vma);
5257
5258 last_addr_mask = hugetlb_mask_last_page(h);
5259 address = start;
5260 for (; address < end; address += sz) {
5196 if (!src_pte) {
5197 old_addr |= last_addr_mask;
5198 new_addr |= last_addr_mask;
5199 continue;
5200 }
5201 if (huge_pte_none(huge_ptep_get(src_pte)))
5202 continue;
5203

--- 46 unchanged lines hidden (view full) ---

5250 * to huge page.
5251 */
5252 tlb_change_page_size(tlb, sz);
5253 tlb_start_vma(tlb, vma);
5254
5255 last_addr_mask = hugetlb_mask_last_page(h);
5256 address = start;
5257 for (; address < end; address += sz) {
5261 ptep = huge_pte_offset(mm, address, sz);
5258 ptep = hugetlb_walk(vma, address, sz);
5262 if (!ptep) {
5263 address |= last_addr_mask;
5264 continue;
5265 }
5266
5267 ptl = huge_pte_lock(h, mm, ptep);
5268 if (huge_pmd_unshare(mm, vma, address, ptep)) {
5269 spin_unlock(ptl);

--- 119 unchanged lines hidden (view full) ---

5389
5390void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
5391 unsigned long end, struct page *ref_page,
5392 zap_flags_t zap_flags)
5393{
5394 struct mmu_notifier_range range;
5395 struct mmu_gather tlb;
5396
5259 if (!ptep) {
5260 address |= last_addr_mask;
5261 continue;
5262 }
5263
5264 ptl = huge_pte_lock(h, mm, ptep);
5265 if (huge_pmd_unshare(mm, vma, address, ptep)) {
5266 spin_unlock(ptl);

--- 119 unchanged lines hidden (view full) ---

5386
5387void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
5388 unsigned long end, struct page *ref_page,
5389 zap_flags_t zap_flags)
5390{
5391 struct mmu_notifier_range range;
5392 struct mmu_gather tlb;
5393
5397 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
5394 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
5398 start, end);
5399 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
5400 mmu_notifier_invalidate_range_start(&range);
5401 tlb_gather_mmu(&tlb, vma->vm_mm);
5402
5403 __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
5404
5405 mmu_notifier_invalidate_range_end(&range);

--- 160 unchanged lines hidden (view full) ---

5566 hugetlb_vma_unlock_read(vma);
5567 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
5568
5569 unmap_ref_private(mm, vma, old_page, haddr);
5570
5571 mutex_lock(&hugetlb_fault_mutex_table[hash]);
5572 hugetlb_vma_lock_read(vma);
5573 spin_lock(ptl);
5395 start, end);
5396 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
5397 mmu_notifier_invalidate_range_start(&range);
5398 tlb_gather_mmu(&tlb, vma->vm_mm);
5399
5400 __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
5401
5402 mmu_notifier_invalidate_range_end(&range);

--- 160 unchanged lines hidden (view full) ---

5563 hugetlb_vma_unlock_read(vma);
5564 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
5565
5566 unmap_ref_private(mm, vma, old_page, haddr);
5567
5568 mutex_lock(&hugetlb_fault_mutex_table[hash]);
5569 hugetlb_vma_lock_read(vma);
5570 spin_lock(ptl);
5574 ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
5571 ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
5575 if (likely(ptep &&
5576 pte_same(huge_ptep_get(ptep), pte)))
5577 goto retry_avoidcopy;
5578 /*
5579 * race occurs while re-acquiring page table
5580 * lock, and our job is done.
5581 */
5582 delayacct_wpcopy_end();

--- 12 unchanged lines hidden (view full) ---

5595 ret = VM_FAULT_OOM;
5596 goto out_release_all;
5597 }
5598
5599 copy_user_huge_page(new_page, old_page, address, vma,
5600 pages_per_huge_page(h));
5601 __SetPageUptodate(new_page);
5602
5572 if (likely(ptep &&
5573 pte_same(huge_ptep_get(ptep), pte)))
5574 goto retry_avoidcopy;
5575 /*
5576 * race occurs while re-acquiring page table
5577 * lock, and our job is done.
5578 */
5579 delayacct_wpcopy_end();

--- 12 unchanged lines hidden (view full) ---

5592 ret = VM_FAULT_OOM;
5593 goto out_release_all;
5594 }
5595
5596 copy_user_huge_page(new_page, old_page, address, vma,
5597 pages_per_huge_page(h));
5598 __SetPageUptodate(new_page);
5599
5603 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, haddr,
5600 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
5604 haddr + huge_page_size(h));
5605 mmu_notifier_invalidate_range_start(&range);
5606
5607 /*
5608 * Retake the page table lock to check for racing updates
5609 * before the page tables are altered
5610 */
5611 spin_lock(ptl);
5601 haddr + huge_page_size(h));
5602 mmu_notifier_invalidate_range_start(&range);
5603
5604 /*
5605 * Retake the page table lock to check for racing updates
5606 * before the page tables are altered
5607 */
5608 spin_lock(ptl);
5612 ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
5609 ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
5613 if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
5614 /* Break COW or unshare */
5615 huge_ptep_clear_flush(vma, haddr, ptep);
5616 mmu_notifier_invalidate_range(mm, range.start, range.end);
5617 page_remove_rmap(old_page, vma, true);
5618 hugepage_add_new_anon_rmap(new_page, vma, haddr);
5619 set_huge_pte_at(mm, haddr, ptep,
5620 make_huge_pte(vma, new_page, !unshare));

--- 290 unchanged lines hidden (view full) ---

5911 page_dup_file_rmap(page, true);
5912 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
5913 && (vma->vm_flags & VM_SHARED)));
5914 /*
5915 * If this pte was previously wr-protected, keep it wr-protected even
5916 * if populated.
5917 */
5918 if (unlikely(pte_marker_uffd_wp(old_pte)))
5610 if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
5611 /* Break COW or unshare */
5612 huge_ptep_clear_flush(vma, haddr, ptep);
5613 mmu_notifier_invalidate_range(mm, range.start, range.end);
5614 page_remove_rmap(old_page, vma, true);
5615 hugepage_add_new_anon_rmap(new_page, vma, haddr);
5616 set_huge_pte_at(mm, haddr, ptep,
5617 make_huge_pte(vma, new_page, !unshare));

--- 290 unchanged lines hidden (view full) ---

5908 page_dup_file_rmap(page, true);
5909 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
5910 && (vma->vm_flags & VM_SHARED)));
5911 /*
5912 * If this pte was previously wr-protected, keep it wr-protected even
5913 * if populated.
5914 */
5915 if (unlikely(pte_marker_uffd_wp(old_pte)))
5919 new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte));
5916 new_pte = huge_pte_mkuffd_wp(new_pte);
5920 set_huge_pte_at(mm, haddr, ptep, new_pte);
5921
5922 hugetlb_count_add(pages_per_huge_page(h), mm);
5923 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
5924 /* Optimization, do the COW without a second fault */
5925 ret = hugetlb_wp(mm, vma, address, ptep, flags, page, ptl);
5926 }
5927

--- 58 unchanged lines hidden (view full) ---

5986 pgoff_t idx;
5987 struct page *page = NULL;
5988 struct page *pagecache_page = NULL;
5989 struct hstate *h = hstate_vma(vma);
5990 struct address_space *mapping;
5991 int need_wait_lock = 0;
5992 unsigned long haddr = address & huge_page_mask(h);
5993
5917 set_huge_pte_at(mm, haddr, ptep, new_pte);
5918
5919 hugetlb_count_add(pages_per_huge_page(h), mm);
5920 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
5921 /* Optimization, do the COW without a second fault */
5922 ret = hugetlb_wp(mm, vma, address, ptep, flags, page, ptl);
5923 }
5924

--- 58 unchanged lines hidden (view full) ---

5983 pgoff_t idx;
5984 struct page *page = NULL;
5985 struct page *pagecache_page = NULL;
5986 struct hstate *h = hstate_vma(vma);
5987 struct address_space *mapping;
5988 int need_wait_lock = 0;
5989 unsigned long haddr = address & huge_page_mask(h);
5990
5994 ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
5995 if (ptep) {
5996 /*
5997 * Since we hold no locks, ptep could be stale. That is
5998 * OK as we are only making decisions based on content and
5999 * not actually modifying content here.
6000 */
6001 entry = huge_ptep_get(ptep);
6002 if (unlikely(is_hugetlb_entry_migration(entry))) {
6003 migration_entry_wait_huge(vma, ptep);
6004 return 0;
6005 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
6006 return VM_FAULT_HWPOISON_LARGE |
6007 VM_FAULT_SET_HINDEX(hstate_index(h));
6008 }
6009
6010 /*
6011 * Serialize hugepage allocation and instantiation, so that we don't
6012 * get spurious allocation failures if two CPUs race to instantiate
6013 * the same page in the page cache.
6014 */
6015 mapping = vma->vm_file->f_mapping;
6016 idx = vma_hugecache_offset(h, vma, haddr);
6017 hash = hugetlb_fault_mutex_hash(mapping, idx);
6018 mutex_lock(&hugetlb_fault_mutex_table[hash]);
6019
6020 /*
6021 * Acquire vma lock before calling huge_pte_alloc and hold
6022 * until finished with ptep. This prevents huge_pmd_unshare from
6023 * being called elsewhere and making the ptep no longer valid.
5991 /*
5992 * Serialize hugepage allocation and instantiation, so that we don't
5993 * get spurious allocation failures if two CPUs race to instantiate
5994 * the same page in the page cache.
5995 */
5996 mapping = vma->vm_file->f_mapping;
5997 idx = vma_hugecache_offset(h, vma, haddr);
5998 hash = hugetlb_fault_mutex_hash(mapping, idx);
5999 mutex_lock(&hugetlb_fault_mutex_table[hash]);
6000
6001 /*
6002 * Acquire vma lock before calling huge_pte_alloc and hold
6003 * until finished with ptep. This prevents huge_pmd_unshare from
6004 * being called elsewhere and making the ptep no longer valid.
6024 *
6025 * ptep could have already be assigned via huge_pte_offset. That
6026 * is OK, as huge_pte_alloc will return the same value unless
6027 * something has changed.
6028 */
6029 hugetlb_vma_lock_read(vma);
6030 ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
6031 if (!ptep) {
6032 hugetlb_vma_unlock_read(vma);
6033 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
6034 return VM_FAULT_OOM;
6035 }

--- 12 unchanged lines hidden (view full) ---

6048
6049 /*
6050 * entry could be a migration/hwpoison entry at this point, so this
6051 * check prevents the kernel from going below assuming that we have
6052 * an active hugepage in pagecache. This goto expects the 2nd page
6053 * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
6054 * properly handle it.
6055 */
6005 */
6006 hugetlb_vma_lock_read(vma);
6007 ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
6008 if (!ptep) {
6009 hugetlb_vma_unlock_read(vma);
6010 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
6011 return VM_FAULT_OOM;
6012 }

--- 12 unchanged lines hidden (view full) ---

6025
6026 /*
6027 * entry could be a migration/hwpoison entry at this point, so this
6028 * check prevents the kernel from going below assuming that we have
6029 * an active hugepage in pagecache. This goto expects the 2nd page
6030 * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
6031 * properly handle it.
6032 */
6056 if (!pte_present(entry))
6033 if (!pte_present(entry)) {
6034 if (unlikely(is_hugetlb_entry_migration(entry))) {
6035 /*
6036 * Release the hugetlb fault lock now, but retain
6037 * the vma lock, because it is needed to guard the
6038 * huge_pte_lockptr() later in
6039 * migration_entry_wait_huge(). The vma lock will
6040 * be released there.
6041 */
6042 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
6043 migration_entry_wait_huge(vma, ptep);
6044 return 0;
6045 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
6046 ret = VM_FAULT_HWPOISON_LARGE |
6047 VM_FAULT_SET_HINDEX(hstate_index(h));
6057 goto out_mutex;
6048 goto out_mutex;
6049 }
6058
6059 /*
6060 * If we are going to COW/unshare the mapping later, we examine the
6061 * pending reservations for this page now. This will ensure that any
6062 * allocations necessary to record that reservation occur outside the
6063 * spinlock. Also lookup the pagecache page now as it is used to
6064 * determine if a reservation has been consumed.
6065 */

--- 328 unchanged lines hidden (view full) ---

6394
6395 /*
6396 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
6397 * follow_hugetlb_page().
6398 */
6399 if (WARN_ON_ONCE(flags & FOLL_PIN))
6400 return NULL;
6401
6050
6051 /*
6052 * If we are going to COW/unshare the mapping later, we examine the
6053 * pending reservations for this page now. This will ensure that any
6054 * allocations necessary to record that reservation occur outside the
6055 * spinlock. Also lookup the pagecache page now as it is used to
6056 * determine if a reservation has been consumed.
6057 */

--- 328 unchanged lines hidden (view full) ---

6386
6387 /*
6388 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
6389 * follow_hugetlb_page().
6390 */
6391 if (WARN_ON_ONCE(flags & FOLL_PIN))
6392 return NULL;
6393
6402retry:
6403 pte = huge_pte_offset(mm, haddr, huge_page_size(h));
6394 hugetlb_vma_lock_read(vma);
6395 pte = hugetlb_walk(vma, haddr, huge_page_size(h));
6404 if (!pte)
6396 if (!pte)
6405 return NULL;
6397 goto out_unlock;
6406
6407 ptl = huge_pte_lock(h, mm, pte);
6408 entry = huge_ptep_get(pte);
6409 if (pte_present(entry)) {
6410 page = pte_page(entry) +
6411 ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
6412 /*
6413 * Note that page may be a sub-page, and with vmemmap
6414 * optimizations the page struct may be read only.
6415 * try_grab_page() will increase the ref count on the
6416 * head page, so this will be OK.
6417 *
6418 * try_grab_page() should always be able to get the page here,
6419 * because we hold the ptl lock and have verified pte_present().
6420 */
6421 if (try_grab_page(page, flags)) {
6422 page = NULL;
6423 goto out;
6424 }
6398
6399 ptl = huge_pte_lock(h, mm, pte);
6400 entry = huge_ptep_get(pte);
6401 if (pte_present(entry)) {
6402 page = pte_page(entry) +
6403 ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
6404 /*
6405 * Note that page may be a sub-page, and with vmemmap
6406 * optimizations the page struct may be read only.
6407 * try_grab_page() will increase the ref count on the
6408 * head page, so this will be OK.
6409 *
6410 * try_grab_page() should always be able to get the page here,
6411 * because we hold the ptl lock and have verified pte_present().
6412 */
6413 if (try_grab_page(page, flags)) {
6414 page = NULL;
6415 goto out;
6416 }
6425 } else {
6426 if (is_hugetlb_entry_migration(entry)) {
6427 spin_unlock(ptl);
6428 __migration_entry_wait_huge(pte, ptl);
6429 goto retry;
6430 }
6431 /*
6432 * hwpoisoned entry is treated as no_page_table in
6433 * follow_page_mask().
6434 */
6435 }
6436out:
6437 spin_unlock(ptl);
6417 }
6418out:
6419 spin_unlock(ptl);
6420out_unlock:
6421 hugetlb_vma_unlock_read(vma);
6438 return page;
6439}
6440
6441long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
6442 struct page **pages, struct vm_area_struct **vmas,
6443 unsigned long *position, unsigned long *nr_pages,
6444 long i, unsigned int flags, int *locked)
6445{

--- 14 unchanged lines hidden (view full) ---

6460 * If we have a pending SIGKILL, don't keep faulting pages and
6461 * potentially allocating memory.
6462 */
6463 if (fatal_signal_pending(current)) {
6464 remainder = 0;
6465 break;
6466 }
6467
6422 return page;
6423}
6424
6425long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
6426 struct page **pages, struct vm_area_struct **vmas,
6427 unsigned long *position, unsigned long *nr_pages,
6428 long i, unsigned int flags, int *locked)
6429{

--- 14 unchanged lines hidden (view full) ---

6444 * If we have a pending SIGKILL, don't keep faulting pages and
6445 * potentially allocating memory.
6446 */
6447 if (fatal_signal_pending(current)) {
6448 remainder = 0;
6449 break;
6450 }
6451
6452 hugetlb_vma_lock_read(vma);
6468 /*
6469 * Some archs (sparc64, sh*) have multiple pte_ts to
6470 * each hugepage. We have to make sure we get the
6471 * first, for the page indexing below to work.
6472 *
6473 * Note that page table lock is not held when pte is null.
6474 */
6453 /*
6454 * Some archs (sparc64, sh*) have multiple pte_ts to
6455 * each hugepage. We have to make sure we get the
6456 * first, for the page indexing below to work.
6457 *
6458 * Note that page table lock is not held when pte is null.
6459 */
6475 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
6476 huge_page_size(h));
6460 pte = hugetlb_walk(vma, vaddr & huge_page_mask(h),
6461 huge_page_size(h));
6477 if (pte)
6478 ptl = huge_pte_lock(h, mm, pte);
6479 absent = !pte || huge_pte_none(huge_ptep_get(pte));
6480
6481 /*
6482 * When coredumping, it suits get_dump_page if we just return
6483 * an error where there's an empty slot with no huge pagecache
6484 * to back it. This way, we avoid allocating a hugepage, and
6485 * the sparse dumpfile avoids allocating disk blocks, but its
6486 * huge holes still show up with zeroes where they need to be.
6487 */
6488 if (absent && (flags & FOLL_DUMP) &&
6489 !hugetlbfs_pagecache_present(h, vma, vaddr)) {
6490 if (pte)
6491 spin_unlock(ptl);
6462 if (pte)
6463 ptl = huge_pte_lock(h, mm, pte);
6464 absent = !pte || huge_pte_none(huge_ptep_get(pte));
6465
6466 /*
6467 * When coredumping, it suits get_dump_page if we just return
6468 * an error where there's an empty slot with no huge pagecache
6469 * to back it. This way, we avoid allocating a hugepage, and
6470 * the sparse dumpfile avoids allocating disk blocks, but its
6471 * huge holes still show up with zeroes where they need to be.
6472 */
6473 if (absent && (flags & FOLL_DUMP) &&
6474 !hugetlbfs_pagecache_present(h, vma, vaddr)) {
6475 if (pte)
6476 spin_unlock(ptl);
6477 hugetlb_vma_unlock_read(vma);
6492 remainder = 0;
6493 break;
6494 }
6495
6496 /*
6497 * We need call hugetlb_fault for both hugepages under migration
6498 * (in which case hugetlb_fault waits for the migration,) and
6499 * hwpoisoned hugepages (in which case we need to prevent the

--- 5 unchanged lines hidden (view full) ---

6505 */
6506 if (absent ||
6507 __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
6508 vm_fault_t ret;
6509 unsigned int fault_flags = 0;
6510
6511 if (pte)
6512 spin_unlock(ptl);
6478 remainder = 0;
6479 break;
6480 }
6481
6482 /*
6483 * We need call hugetlb_fault for both hugepages under migration
6484 * (in which case hugetlb_fault waits for the migration,) and
6485 * hwpoisoned hugepages (in which case we need to prevent the

--- 5 unchanged lines hidden (view full) ---

6491 */
6492 if (absent ||
6493 __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
6494 vm_fault_t ret;
6495 unsigned int fault_flags = 0;
6496
6497 if (pte)
6498 spin_unlock(ptl);
6499 hugetlb_vma_unlock_read(vma);
6500
6513 if (flags & FOLL_WRITE)
6514 fault_flags |= FAULT_FLAG_WRITE;
6515 else if (unshare)
6516 fault_flags |= FAULT_FLAG_UNSHARE;
6517 if (locked) {
6518 fault_flags |= FAULT_FLAG_ALLOW_RETRY |
6519 FAULT_FLAG_KILLABLE;
6520 if (flags & FOLL_INTERRUPTIBLE)

--- 46 unchanged lines hidden (view full) ---

6567 */
6568 if (!pages && !vmas && !pfn_offset &&
6569 (vaddr + huge_page_size(h) < vma->vm_end) &&
6570 (remainder >= pages_per_huge_page(h))) {
6571 vaddr += huge_page_size(h);
6572 remainder -= pages_per_huge_page(h);
6573 i += pages_per_huge_page(h);
6574 spin_unlock(ptl);
6501 if (flags & FOLL_WRITE)
6502 fault_flags |= FAULT_FLAG_WRITE;
6503 else if (unshare)
6504 fault_flags |= FAULT_FLAG_UNSHARE;
6505 if (locked) {
6506 fault_flags |= FAULT_FLAG_ALLOW_RETRY |
6507 FAULT_FLAG_KILLABLE;
6508 if (flags & FOLL_INTERRUPTIBLE)

--- 46 unchanged lines hidden (view full) ---

6555 */
6556 if (!pages && !vmas && !pfn_offset &&
6557 (vaddr + huge_page_size(h) < vma->vm_end) &&
6558 (remainder >= pages_per_huge_page(h))) {
6559 vaddr += huge_page_size(h);
6560 remainder -= pages_per_huge_page(h);
6561 i += pages_per_huge_page(h);
6562 spin_unlock(ptl);
6563 hugetlb_vma_unlock_read(vma);
6575 continue;
6576 }
6577
6578 /* vaddr may not be aligned to PAGE_SIZE */
6579 refs = min3(pages_per_huge_page(h) - pfn_offset, remainder,
6580 (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT);
6581
6582 if (pages || vmas)

--- 13 unchanged lines hidden (view full) ---

6596 * any way. As this is hugetlb, the pages will never
6597 * be p2pdma or not longterm pinable. So this page
6598 * must be available at this point, unless the page
6599 * refcount overflowed:
6600 */
6601 if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
6602 flags))) {
6603 spin_unlock(ptl);
6564 continue;
6565 }
6566
6567 /* vaddr may not be aligned to PAGE_SIZE */
6568 refs = min3(pages_per_huge_page(h) - pfn_offset, remainder,
6569 (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT);
6570
6571 if (pages || vmas)

--- 13 unchanged lines hidden (view full) ---

6585 * any way. As this is hugetlb, the pages will never
6586 * be p2pdma or not longterm pinable. So this page
6587 * must be available at this point, unless the page
6588 * refcount overflowed:
6589 */
6590 if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
6591 flags))) {
6592 spin_unlock(ptl);
6593 hugetlb_vma_unlock_read(vma);
6604 remainder = 0;
6605 err = -ENOMEM;
6606 break;
6607 }
6608 }
6609
6610 vaddr += (refs << PAGE_SHIFT);
6611 remainder -= refs;
6612 i += refs;
6613
6614 spin_unlock(ptl);
6594 remainder = 0;
6595 err = -ENOMEM;
6596 break;
6597 }
6598 }
6599
6600 vaddr += (refs << PAGE_SHIFT);
6601 remainder -= refs;
6602 i += refs;
6603
6604 spin_unlock(ptl);
6605 hugetlb_vma_unlock_read(vma);
6615 }
6616 *nr_pages = remainder;
6617 /*
6618 * setting position is actually required only if remainder is
6619 * not zero but it's faster not to add a "if (remainder)"
6620 * branch.
6621 */
6622 *position = vaddr;
6623
6624 return i ? i : err;
6625}
6626
6606 }
6607 *nr_pages = remainder;
6608 /*
6609 * setting position is actually required only if remainder is
6610 * not zero but it's faster not to add a "if (remainder)"
6611 * branch.
6612 */
6613 *position = vaddr;
6614
6615 return i ? i : err;
6616}
6617
6627unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
6618long hugetlb_change_protection(struct vm_area_struct *vma,
6628 unsigned long address, unsigned long end,
6629 pgprot_t newprot, unsigned long cp_flags)
6630{
6631 struct mm_struct *mm = vma->vm_mm;
6632 unsigned long start = address;
6633 pte_t *ptep;
6634 pte_t pte;
6635 struct hstate *h = hstate_vma(vma);
6619 unsigned long address, unsigned long end,
6620 pgprot_t newprot, unsigned long cp_flags)
6621{
6622 struct mm_struct *mm = vma->vm_mm;
6623 unsigned long start = address;
6624 pte_t *ptep;
6625 pte_t pte;
6626 struct hstate *h = hstate_vma(vma);
6636 unsigned long pages = 0, psize = huge_page_size(h);
6627 long pages = 0, psize = huge_page_size(h);
6637 bool shared_pmd = false;
6638 struct mmu_notifier_range range;
6639 unsigned long last_addr_mask;
6640 bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
6641 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
6642
6643 /*
6644 * In the case of shared PMDs, the area to flush could be beyond
6645 * start/end. Set range.start/range.end to cover the maximum possible
6646 * range if PMD sharing is possible.
6647 */
6648 mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA,
6628 bool shared_pmd = false;
6629 struct mmu_notifier_range range;
6630 unsigned long last_addr_mask;
6631 bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
6632 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
6633
6634 /*
6635 * In the case of shared PMDs, the area to flush could be beyond
6636 * start/end. Set range.start/range.end to cover the maximum possible
6637 * range if PMD sharing is possible.
6638 */
6639 mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA,
6649 0, vma, mm, start, end);
6640 0, mm, start, end);
6650 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
6651
6652 BUG_ON(address >= end);
6653 flush_cache_range(vma, range.start, range.end);
6654
6655 mmu_notifier_invalidate_range_start(&range);
6656 hugetlb_vma_lock_write(vma);
6657 i_mmap_lock_write(vma->vm_file->f_mapping);
6658 last_addr_mask = hugetlb_mask_last_page(h);
6659 for (; address < end; address += psize) {
6660 spinlock_t *ptl;
6641 adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
6642
6643 BUG_ON(address >= end);
6644 flush_cache_range(vma, range.start, range.end);
6645
6646 mmu_notifier_invalidate_range_start(&range);
6647 hugetlb_vma_lock_write(vma);
6648 i_mmap_lock_write(vma->vm_file->f_mapping);
6649 last_addr_mask = hugetlb_mask_last_page(h);
6650 for (; address < end; address += psize) {
6651 spinlock_t *ptl;
6661 ptep = huge_pte_offset(mm, address, psize);
6652 ptep = hugetlb_walk(vma, address, psize);
6662 if (!ptep) {
6663 if (!uffd_wp) {
6664 address |= last_addr_mask;
6665 continue;
6666 }
6667 /*
6668 * Userfaultfd wr-protect requires pgtable
6669 * pre-allocations to install pte markers.
6670 */
6671 ptep = huge_pte_alloc(mm, vma, address, psize);
6653 if (!ptep) {
6654 if (!uffd_wp) {
6655 address |= last_addr_mask;
6656 continue;
6657 }
6658 /*
6659 * Userfaultfd wr-protect requires pgtable
6660 * pre-allocations to install pte markers.
6661 */
6662 ptep = huge_pte_alloc(mm, vma, address, psize);
6672 if (!ptep)
6663 if (!ptep) {
6664 pages = -ENOMEM;
6673 break;
6665 break;
6666 }
6674 }
6675 ptl = huge_pte_lock(h, mm, ptep);
6676 if (huge_pmd_unshare(mm, vma, address, ptep)) {
6677 /*
6678 * When uffd-wp is enabled on the vma, unshare
6679 * shouldn't happen at all. Warn about it if it
6680 * happened due to some reason.
6681 */

--- 38 unchanged lines hidden (view full) ---

6720 } else if (!huge_pte_none(pte)) {
6721 pte_t old_pte;
6722 unsigned int shift = huge_page_shift(hstate_vma(vma));
6723
6724 old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
6725 pte = huge_pte_modify(old_pte, newprot);
6726 pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
6727 if (uffd_wp)
6667 }
6668 ptl = huge_pte_lock(h, mm, ptep);
6669 if (huge_pmd_unshare(mm, vma, address, ptep)) {
6670 /*
6671 * When uffd-wp is enabled on the vma, unshare
6672 * shouldn't happen at all. Warn about it if it
6673 * happened due to some reason.
6674 */

--- 38 unchanged lines hidden (view full) ---

6713 } else if (!huge_pte_none(pte)) {
6714 pte_t old_pte;
6715 unsigned int shift = huge_page_shift(hstate_vma(vma));
6716
6717 old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
6718 pte = huge_pte_modify(old_pte, newprot);
6719 pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
6720 if (uffd_wp)
6728 pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
6721 pte = huge_pte_mkuffd_wp(pte);
6729 else if (uffd_wp_resolve)
6730 pte = huge_pte_clear_uffd_wp(pte);
6731 huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
6732 pages++;
6733 } else {
6734 /* None pte */
6735 if (unlikely(uffd_wp))
6736 /* Safe to modify directly (none->non-present). */

--- 18 unchanged lines hidden (view full) ---

6755 * page table protection not changing it to point to a new page.
6756 *
6757 * See Documentation/mm/mmu_notifier.rst
6758 */
6759 i_mmap_unlock_write(vma->vm_file->f_mapping);
6760 hugetlb_vma_unlock_write(vma);
6761 mmu_notifier_invalidate_range_end(&range);
6762
6722 else if (uffd_wp_resolve)
6723 pte = huge_pte_clear_uffd_wp(pte);
6724 huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
6725 pages++;
6726 } else {
6727 /* None pte */
6728 if (unlikely(uffd_wp))
6729 /* Safe to modify directly (none->non-present). */

--- 18 unchanged lines hidden (view full) ---

6748 * page table protection not changing it to point to a new page.
6749 *
6750 * See Documentation/mm/mmu_notifier.rst
6751 */
6752 i_mmap_unlock_write(vma->vm_file->f_mapping);
6753 hugetlb_vma_unlock_write(vma);
6754 mmu_notifier_invalidate_range_end(&range);
6755
6763 return pages << h->order;
6756 return pages > 0 ? (pages << h->order) : pages;
6764}
6765
6766/* Return true if reservation was successful, false otherwise. */
6767bool hugetlb_reserve_pages(struct inode *inode,
6768 long from, long to,
6769 struct vm_area_struct *vma,
6770 vm_flags_t vm_flags)
6771{
6757}
6758
6759/* Return true if reservation was successful, false otherwise. */
6760bool hugetlb_reserve_pages(struct inode *inode,
6761 long from, long to,
6762 struct vm_area_struct *vma,
6763 vm_flags_t vm_flags)
6764{
6772 long chg, add = -1;
6765 long chg = -1, add = -1;
6773 struct hstate *h = hstate_inode(inode);
6774 struct hugepage_subpool *spool = subpool_inode(inode);
6775 struct resv_map *resv_map;
6776 struct hugetlb_cgroup *h_cg = NULL;
6777 long gbl_reserve, regions_needed = 0;
6778
6779 /* This should never happen */
6780 if (from > to) {

--- 282 unchanged lines hidden (view full) ---

7063
7064 i_mmap_lock_read(mapping);
7065 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
7066 if (svma == vma)
7067 continue;
7068
7069 saddr = page_table_shareable(svma, vma, addr, idx);
7070 if (saddr) {
6766 struct hstate *h = hstate_inode(inode);
6767 struct hugepage_subpool *spool = subpool_inode(inode);
6768 struct resv_map *resv_map;
6769 struct hugetlb_cgroup *h_cg = NULL;
6770 long gbl_reserve, regions_needed = 0;
6771
6772 /* This should never happen */
6773 if (from > to) {

--- 282 unchanged lines hidden (view full) ---

7056
7057 i_mmap_lock_read(mapping);
7058 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
7059 if (svma == vma)
7060 continue;
7061
7062 saddr = page_table_shareable(svma, vma, addr, idx);
7063 if (saddr) {
7071 spte = huge_pte_offset(svma->vm_mm, saddr,
7072 vma_mmu_pagesize(svma));
7064 spte = hugetlb_walk(svma, saddr,
7065 vma_mmu_pagesize(svma));
7073 if (spte) {
7074 get_page(virt_to_page(spte));
7075 break;
7076 }
7077 }
7078 }
7079
7080 if (!spte)

--- 289 unchanged lines hidden (view full) ---

7370 if (start >= end)
7371 return;
7372
7373 flush_cache_range(vma, start, end);
7374 /*
7375 * No need to call adjust_range_if_pmd_sharing_possible(), because
7376 * we have already done the PUD_SIZE alignment.
7377 */
7066 if (spte) {
7067 get_page(virt_to_page(spte));
7068 break;
7069 }
7070 }
7071 }
7072
7073 if (!spte)

--- 289 unchanged lines hidden (view full) ---

7363 if (start >= end)
7364 return;
7365
7366 flush_cache_range(vma, start, end);
7367 /*
7368 * No need to call adjust_range_if_pmd_sharing_possible(), because
7369 * we have already done the PUD_SIZE alignment.
7370 */
7378 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
7371 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
7379 start, end);
7380 mmu_notifier_invalidate_range_start(&range);
7381 hugetlb_vma_lock_write(vma);
7382 i_mmap_lock_write(vma->vm_file->f_mapping);
7383 for (address = start; address < end; address += PUD_SIZE) {
7372 start, end);
7373 mmu_notifier_invalidate_range_start(&range);
7374 hugetlb_vma_lock_write(vma);
7375 i_mmap_lock_write(vma->vm_file->f_mapping);
7376 for (address = start; address < end; address += PUD_SIZE) {
7384 ptep = huge_pte_offset(mm, address, sz);
7377 ptep = hugetlb_walk(vma, address, sz);
7385 if (!ptep)
7386 continue;
7387 ptl = huge_pte_lock(h, mm, ptep);
7388 huge_pmd_unshare(mm, vma, address, ptep);
7389 spin_unlock(ptl);
7390 }
7391 flush_hugetlb_tlb_range(vma, start, end);
7392 i_mmap_unlock_write(vma->vm_file->f_mapping);

--- 170 unchanged lines hidden ---
7378 if (!ptep)
7379 continue;
7380 ptl = huge_pte_lock(h, mm, ptep);
7381 huge_pmd_unshare(mm, vma, address, ptep);
7382 spin_unlock(ptl);
7383 }
7384 flush_hugetlb_tlb_range(vma, start, end);
7385 i_mmap_unlock_write(vma->vm_file->f_mapping);

--- 170 unchanged lines hidden ---