huge_memory.c (6f84981772535e670e4e2df051a672af229b6694) huge_memory.c (7d4a8be0c4b2b7ffb367929d2b352651f083806b)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2009 Red Hat, Inc.
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/mm.h>

--- 1589 unchanged lines hidden (view full) ---

1598 * Return true if we do MADV_FREE successfully on entire pmd page.
1599 * Otherwise, return false.
1600 */
1601bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1602 pmd_t *pmd, unsigned long addr, unsigned long next)
1603{
1604 spinlock_t *ptl;
1605 pmd_t orig_pmd;
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2009 Red Hat, Inc.
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/mm.h>

--- 1589 unchanged lines hidden (view full) ---

1598 * Return true if we do MADV_FREE successfully on entire pmd page.
1599 * Otherwise, return false.
1600 */
1601bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1602 pmd_t *pmd, unsigned long addr, unsigned long next)
1603{
1604 spinlock_t *ptl;
1605 pmd_t orig_pmd;
1606 struct page *page;
1606 struct folio *folio;
1607 struct mm_struct *mm = tlb->mm;
1608 bool ret = false;
1609
1610 tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
1611
1612 ptl = pmd_trans_huge_lock(pmd, vma);
1613 if (!ptl)
1614 goto out_unlocked;
1615
1616 orig_pmd = *pmd;
1617 if (is_huge_zero_pmd(orig_pmd))
1618 goto out;
1619
1620 if (unlikely(!pmd_present(orig_pmd))) {
1621 VM_BUG_ON(thp_migration_supported() &&
1622 !is_pmd_migration_entry(orig_pmd));
1623 goto out;
1624 }
1625
1607 struct mm_struct *mm = tlb->mm;
1608 bool ret = false;
1609
1610 tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
1611
1612 ptl = pmd_trans_huge_lock(pmd, vma);
1613 if (!ptl)
1614 goto out_unlocked;
1615
1616 orig_pmd = *pmd;
1617 if (is_huge_zero_pmd(orig_pmd))
1618 goto out;
1619
1620 if (unlikely(!pmd_present(orig_pmd))) {
1621 VM_BUG_ON(thp_migration_supported() &&
1622 !is_pmd_migration_entry(orig_pmd));
1623 goto out;
1624 }
1625
1626 page = pmd_page(orig_pmd);
1626 folio = pfn_folio(pmd_pfn(orig_pmd));
1627 /*
1627 /*
1628 * If other processes are mapping this page, we couldn't discard
1629 * the page unless they all do MADV_FREE so let's skip the page.
1628 * If other processes are mapping this folio, we couldn't discard
1629 * the folio unless they all do MADV_FREE so let's skip the folio.
1630 */
1630 */
1631 if (total_mapcount(page) != 1)
1631 if (folio_mapcount(folio) != 1)
1632 goto out;
1633
1632 goto out;
1633
1634 if (!trylock_page(page))
1634 if (!folio_trylock(folio))
1635 goto out;
1636
1637 /*
1638 * If user want to discard part-pages of THP, split it so MADV_FREE
1639 * will deactivate only them.
1640 */
1641 if (next - addr != HPAGE_PMD_SIZE) {
1635 goto out;
1636
1637 /*
1638 * If user want to discard part-pages of THP, split it so MADV_FREE
1639 * will deactivate only them.
1640 */
1641 if (next - addr != HPAGE_PMD_SIZE) {
1642 get_page(page);
1642 folio_get(folio);
1643 spin_unlock(ptl);
1643 spin_unlock(ptl);
1644 split_huge_page(page);
1645 unlock_page(page);
1646 put_page(page);
1644 split_folio(folio);
1645 folio_unlock(folio);
1646 folio_put(folio);
1647 goto out_unlocked;
1648 }
1649
1647 goto out_unlocked;
1648 }
1649
1650 if (PageDirty(page))
1651 ClearPageDirty(page);
1652 unlock_page(page);
1650 if (folio_test_dirty(folio))
1651 folio_clear_dirty(folio);
1652 folio_unlock(folio);
1653
1654 if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
1655 pmdp_invalidate(vma, addr, pmd);
1656 orig_pmd = pmd_mkold(orig_pmd);
1657 orig_pmd = pmd_mkclean(orig_pmd);
1658
1659 set_pmd_at(mm, addr, pmd, orig_pmd);
1660 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1661 }
1662
1653
1654 if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
1655 pmdp_invalidate(vma, addr, pmd);
1656 orig_pmd = pmd_mkold(orig_pmd);
1657 orig_pmd = pmd_mkclean(orig_pmd);
1658
1659 set_pmd_at(mm, addr, pmd, orig_pmd);
1660 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1661 }
1662
1663 mark_page_lazyfree(page);
1663 folio_mark_lazyfree(folio);
1664 ret = true;
1665out:
1666 spin_unlock(ptl);
1667out_unlocked:
1668 return ret;
1669}
1670
1671static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)

--- 243 unchanged lines hidden (view full) ---

1915 * which may break userspace.
1916 *
1917 * pmdp_invalidate_ad() is required to make sure we don't miss
1918 * dirty/young flags set by hardware.
1919 */
1920 oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
1921
1922 entry = pmd_modify(oldpmd, newprot);
1664 ret = true;
1665out:
1666 spin_unlock(ptl);
1667out_unlocked:
1668 return ret;
1669}
1670
1671static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)

--- 243 unchanged lines hidden (view full) ---

1915 * which may break userspace.
1916 *
1917 * pmdp_invalidate_ad() is required to make sure we don't miss
1918 * dirty/young flags set by hardware.
1919 */
1920 oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
1921
1922 entry = pmd_modify(oldpmd, newprot);
1923 if (uffd_wp) {
1924 entry = pmd_wrprotect(entry);
1923 if (uffd_wp)
1925 entry = pmd_mkuffd_wp(entry);
1924 entry = pmd_mkuffd_wp(entry);
1926 } else if (uffd_wp_resolve) {
1925 else if (uffd_wp_resolve)
1927 /*
1928 * Leave the write bit to be handled by PF interrupt
1929 * handler, then things like COW could be properly
1930 * handled.
1931 */
1932 entry = pmd_clear_uffd_wp(entry);
1926 /*
1927 * Leave the write bit to be handled by PF interrupt
1928 * handler, then things like COW could be properly
1929 * handled.
1930 */
1931 entry = pmd_clear_uffd_wp(entry);
1933 }
1934
1935 /* See change_pte_range(). */
1936 if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
1937 can_change_pmd_writable(vma, addr, entry))
1938 entry = pmd_mkwrite(entry);
1939
1940 ret = HPAGE_PMD_NR;
1941 set_pmd_at(mm, addr, pmd, entry);

--- 75 unchanged lines hidden (view full) ---

2017}
2018
2019void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
2020 unsigned long address)
2021{
2022 spinlock_t *ptl;
2023 struct mmu_notifier_range range;
2024
1932
1933 /* See change_pte_range(). */
1934 if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
1935 can_change_pmd_writable(vma, addr, entry))
1936 entry = pmd_mkwrite(entry);
1937
1938 ret = HPAGE_PMD_NR;
1939 set_pmd_at(mm, addr, pmd, entry);

--- 75 unchanged lines hidden (view full) ---

2015}
2016
2017void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
2018 unsigned long address)
2019{
2020 spinlock_t *ptl;
2021 struct mmu_notifier_range range;
2022
2025 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
2023 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
2026 address & HPAGE_PUD_MASK,
2027 (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE);
2028 mmu_notifier_invalidate_range_start(&range);
2029 ptl = pud_lock(vma->vm_mm, pud);
2030 if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
2031 goto out;
2032 __split_huge_pud_locked(vma, pud, range.start);
2033

--- 245 unchanged lines hidden (view full) ---

2279}
2280
2281void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
2282 unsigned long address, bool freeze, struct folio *folio)
2283{
2284 spinlock_t *ptl;
2285 struct mmu_notifier_range range;
2286
2024 address & HPAGE_PUD_MASK,
2025 (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE);
2026 mmu_notifier_invalidate_range_start(&range);
2027 ptl = pud_lock(vma->vm_mm, pud);
2028 if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
2029 goto out;
2030 __split_huge_pud_locked(vma, pud, range.start);
2031

--- 245 unchanged lines hidden (view full) ---

2277}
2278
2279void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
2280 unsigned long address, bool freeze, struct folio *folio)
2281{
2282 spinlock_t *ptl;
2283 struct mmu_notifier_range range;
2284
2287 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
2285 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
2288 address & HPAGE_PMD_MASK,
2289 (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
2290 mmu_notifier_invalidate_range_start(&range);
2291 ptl = pmd_lock(vma->vm_mm, pmd);
2292
2293 /*
2294 * If caller asks to setup a migration entry, we need a folio to check
2295 * pmd against. Otherwise we can end up replacing wrong folio.

--- 536 unchanged lines hidden (view full) ---

2832 *
2833 * Check PageSwapCache to determine if the page is being
2834 * handled by page reclaim since THP swap would add the page into
2835 * swap cache before calling try_to_unmap().
2836 */
2837 if (PageSwapCache(page))
2838 return;
2839
2286 address & HPAGE_PMD_MASK,
2287 (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
2288 mmu_notifier_invalidate_range_start(&range);
2289 ptl = pmd_lock(vma->vm_mm, pmd);
2290
2291 /*
2292 * If caller asks to setup a migration entry, we need a folio to check
2293 * pmd against. Otherwise we can end up replacing wrong folio.

--- 536 unchanged lines hidden (view full) ---

2830 *
2831 * Check PageSwapCache to determine if the page is being
2832 * handled by page reclaim since THP swap would add the page into
2833 * swap cache before calling try_to_unmap().
2834 */
2835 if (PageSwapCache(page))
2836 return;
2837
2838 if (!list_empty(page_deferred_list(page)))
2839 return;
2840
2840 spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
2841 if (list_empty(page_deferred_list(page))) {
2842 count_vm_event(THP_DEFERRED_SPLIT_PAGE);
2843 list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
2844 ds_queue->split_queue_len++;
2845#ifdef CONFIG_MEMCG
2846 if (memcg)
2847 set_shrinker_bit(memcg, page_to_nid(page),

--- 81 unchanged lines hidden (view full) ---

2929 SHRINKER_NONSLAB,
2930};
2931
2932#ifdef CONFIG_DEBUG_FS
2933static void split_huge_pages_all(void)
2934{
2935 struct zone *zone;
2936 struct page *page;
2841 spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
2842 if (list_empty(page_deferred_list(page))) {
2843 count_vm_event(THP_DEFERRED_SPLIT_PAGE);
2844 list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
2845 ds_queue->split_queue_len++;
2846#ifdef CONFIG_MEMCG
2847 if (memcg)
2848 set_shrinker_bit(memcg, page_to_nid(page),

--- 81 unchanged lines hidden (view full) ---

2930 SHRINKER_NONSLAB,
2931};
2932
2933#ifdef CONFIG_DEBUG_FS
2934static void split_huge_pages_all(void)
2935{
2936 struct zone *zone;
2937 struct page *page;
2938 struct folio *folio;
2937 unsigned long pfn, max_zone_pfn;
2938 unsigned long total = 0, split = 0;
2939
2940 pr_debug("Split all THPs\n");
2941 for_each_zone(zone) {
2942 if (!managed_zone(zone))
2943 continue;
2944 max_zone_pfn = zone_end_pfn(zone);
2945 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
2946 int nr_pages;
2947
2948 page = pfn_to_online_page(pfn);
2939 unsigned long pfn, max_zone_pfn;
2940 unsigned long total = 0, split = 0;
2941
2942 pr_debug("Split all THPs\n");
2943 for_each_zone(zone) {
2944 if (!managed_zone(zone))
2945 continue;
2946 max_zone_pfn = zone_end_pfn(zone);
2947 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
2948 int nr_pages;
2949
2950 page = pfn_to_online_page(pfn);
2949 if (!page || !get_page_unless_zero(page))
2951 if (!page || PageTail(page))
2950 continue;
2952 continue;
2953 folio = page_folio(page);
2954 if (!folio_try_get(folio))
2955 continue;
2951
2956
2952 if (zone != page_zone(page))
2957 if (unlikely(page_folio(page) != folio))
2953 goto next;
2954
2958 goto next;
2959
2955 if (!PageHead(page) || PageHuge(page) || !PageLRU(page))
2960 if (zone != folio_zone(folio))
2956 goto next;
2957
2961 goto next;
2962
2963 if (!folio_test_large(folio)
2964 || folio_test_hugetlb(folio)
2965 || !folio_test_lru(folio))
2966 goto next;
2967
2958 total++;
2968 total++;
2959 lock_page(page);
2960 nr_pages = thp_nr_pages(page);
2961 if (!split_huge_page(page))
2969 folio_lock(folio);
2970 nr_pages = folio_nr_pages(folio);
2971 if (!split_folio(folio))
2962 split++;
2963 pfn += nr_pages - 1;
2972 split++;
2973 pfn += nr_pages - 1;
2964 unlock_page(page);
2974 folio_unlock(folio);
2965next:
2975next:
2966 put_page(page);
2976 folio_put(folio);
2967 cond_resched();
2968 }
2969 }
2970
2971 pr_debug("%lu of %lu THP split\n", split, total);
2972}
2973
2974static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)

--- 295 unchanged lines hidden (view full) ---

3270 entry = pmd_to_swp_entry(*pvmw->pmd);
3271 get_page(new);
3272 pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));
3273 if (pmd_swp_soft_dirty(*pvmw->pmd))
3274 pmde = pmd_mksoft_dirty(pmde);
3275 if (is_writable_migration_entry(entry))
3276 pmde = maybe_pmd_mkwrite(pmde, vma);
3277 if (pmd_swp_uffd_wp(*pvmw->pmd))
2977 cond_resched();
2978 }
2979 }
2980
2981 pr_debug("%lu of %lu THP split\n", split, total);
2982}
2983
2984static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)

--- 295 unchanged lines hidden (view full) ---

3280 entry = pmd_to_swp_entry(*pvmw->pmd);
3281 get_page(new);
3282 pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));
3283 if (pmd_swp_soft_dirty(*pvmw->pmd))
3284 pmde = pmd_mksoft_dirty(pmde);
3285 if (is_writable_migration_entry(entry))
3286 pmde = maybe_pmd_mkwrite(pmde, vma);
3287 if (pmd_swp_uffd_wp(*pvmw->pmd))
3278 pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
3288 pmde = pmd_mkuffd_wp(pmde);
3279 if (!is_migration_entry_young(entry))
3280 pmde = pmd_mkold(pmde);
3281 /* NOTE: this may contain setting soft-dirty on some archs */
3282 if (PageDirty(new) && is_migration_entry_dirty(entry))
3283 pmde = pmd_mkdirty(pmde);
3284
3285 if (PageAnon(new)) {
3286 rmap_t rmap_flags = RMAP_COMPOUND;

--- 16 unchanged lines hidden ---
3289 if (!is_migration_entry_young(entry))
3290 pmde = pmd_mkold(pmde);
3291 /* NOTE: this may contain setting soft-dirty on some archs */
3292 if (PageDirty(new) && is_migration_entry_dirty(entry))
3293 pmde = pmd_mkdirty(pmde);
3294
3295 if (PageAnon(new)) {
3296 rmap_t rmap_flags = RMAP_COMPOUND;

--- 16 unchanged lines hidden ---