1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2e2cda322SAndrea Arcangeli /* 3e2cda322SAndrea Arcangeli * mm/pgtable-generic.c 4e2cda322SAndrea Arcangeli * 5ca5999fdSMike Rapoport * Generic pgtable methods declared in linux/pgtable.h 6e2cda322SAndrea Arcangeli * 7e2cda322SAndrea Arcangeli * Copyright (C) 2010 Linus Torvalds 8e2cda322SAndrea Arcangeli */ 9e2cda322SAndrea Arcangeli 10f95ba941SAndrew Morton #include <linux/pagemap.h> 11a31acd3eSPeter Zijlstra #include <linux/hugetlb.h> 12ca5999fdSMike Rapoport #include <linux/pgtable.h> 130d940a9bSHugh Dickins #include <linux/swap.h> 140d940a9bSHugh Dickins #include <linux/swapops.h> 1536090defSArnd Bergmann #include <linux/mm_inline.h> 16e2cda322SAndrea Arcangeli #include <asm/tlb.h> 17e2cda322SAndrea Arcangeli 18bc4b4448SJoonsoo Kim /* 19bc4b4448SJoonsoo Kim * If a p?d_bad entry is found while walking page tables, report 20bc4b4448SJoonsoo Kim * the error, before resetting entry to p?d_none. Usually (but 21bc4b4448SJoonsoo Kim * very seldom) called out from the p?d_none_or_clear_bad macros. 22bc4b4448SJoonsoo Kim */ 23bc4b4448SJoonsoo Kim 24bc4b4448SJoonsoo Kim void pgd_clear_bad(pgd_t *pgd) 25bc4b4448SJoonsoo Kim { 26bc4b4448SJoonsoo Kim pgd_ERROR(*pgd); 27bc4b4448SJoonsoo Kim pgd_clear(pgd); 28bc4b4448SJoonsoo Kim } 29bc4b4448SJoonsoo Kim 30f2400abcSVineet Gupta #ifndef __PAGETABLE_P4D_FOLDED 31c2febafcSKirill A. Shutemov void p4d_clear_bad(p4d_t *p4d) 32c2febafcSKirill A. Shutemov { 33c2febafcSKirill A. Shutemov p4d_ERROR(*p4d); 34c2febafcSKirill A. Shutemov p4d_clear(p4d); 35c2febafcSKirill A. Shutemov } 36f2400abcSVineet Gupta #endif 37c2febafcSKirill A. Shutemov 38f2400abcSVineet Gupta #ifndef __PAGETABLE_PUD_FOLDED 39bc4b4448SJoonsoo Kim void pud_clear_bad(pud_t *pud) 40bc4b4448SJoonsoo Kim { 41bc4b4448SJoonsoo Kim pud_ERROR(*pud); 42bc4b4448SJoonsoo Kim pud_clear(pud); 43bc4b4448SJoonsoo Kim } 44f2400abcSVineet Gupta #endif 45bc4b4448SJoonsoo Kim 46f2400abcSVineet Gupta /* 47f2400abcSVineet Gupta * Note that the pmd variant below can't be stub'ed out just as for p4d/pud 48f2400abcSVineet Gupta * above. pmd folding is special and typically pmd_* macros refer to upper 49f2400abcSVineet Gupta * level even when folded 50f2400abcSVineet Gupta */ 51bc4b4448SJoonsoo Kim void pmd_clear_bad(pmd_t *pmd) 52bc4b4448SJoonsoo Kim { 53bc4b4448SJoonsoo Kim pmd_ERROR(*pmd); 54bc4b4448SJoonsoo Kim pmd_clear(pmd); 55bc4b4448SJoonsoo Kim } 56bc4b4448SJoonsoo Kim 57e2cda322SAndrea Arcangeli #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 58e2cda322SAndrea Arcangeli /* 59cef23d9dSRik van Riel * Only sets the access flags (dirty, accessed), as well as write 60cef23d9dSRik van Riel * permission. Furthermore, we know it always gets set to a "more 61e2cda322SAndrea Arcangeli * permissive" setting, which allows most architectures to optimize 62e2cda322SAndrea Arcangeli * this. We return whether the PTE actually changed, which in turn 63e2cda322SAndrea Arcangeli * instructs the caller to do things like update__mmu_cache. This 64e2cda322SAndrea Arcangeli * used to be done in the caller, but sparc needs minor faults to 65e2cda322SAndrea Arcangeli * force that call on sun4c so we changed this macro slightly 66e2cda322SAndrea Arcangeli */ 67e2cda322SAndrea Arcangeli int ptep_set_access_flags(struct vm_area_struct *vma, 68e2cda322SAndrea Arcangeli unsigned long address, pte_t *ptep, 69e2cda322SAndrea Arcangeli pte_t entry, int dirty) 70e2cda322SAndrea Arcangeli { 71*c33c7948SRyan Roberts int changed = !pte_same(ptep_get(ptep), entry); 72e2cda322SAndrea Arcangeli if (changed) { 73e2cda322SAndrea Arcangeli set_pte_at(vma->vm_mm, address, ptep, entry); 7499c29133SGerald Schaefer flush_tlb_fix_spurious_fault(vma, address, ptep); 75e2cda322SAndrea Arcangeli } 76e2cda322SAndrea Arcangeli return changed; 77e2cda322SAndrea Arcangeli } 78e2cda322SAndrea Arcangeli #endif 79e2cda322SAndrea Arcangeli 8052585bccSVineet Gupta #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 8152585bccSVineet Gupta int ptep_clear_flush_young(struct vm_area_struct *vma, 8252585bccSVineet Gupta unsigned long address, pte_t *ptep) 8352585bccSVineet Gupta { 8452585bccSVineet Gupta int young; 8552585bccSVineet Gupta young = ptep_test_and_clear_young(vma, address, ptep); 8652585bccSVineet Gupta if (young) 8752585bccSVineet Gupta flush_tlb_page(vma, address); 8852585bccSVineet Gupta return young; 8952585bccSVineet Gupta } 9052585bccSVineet Gupta #endif 9152585bccSVineet Gupta 9252585bccSVineet Gupta #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 9352585bccSVineet Gupta pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, 9452585bccSVineet Gupta pte_t *ptep) 9552585bccSVineet Gupta { 9652585bccSVineet Gupta struct mm_struct *mm = (vma)->vm_mm; 9752585bccSVineet Gupta pte_t pte; 9852585bccSVineet Gupta pte = ptep_get_and_clear(mm, address, ptep); 9952585bccSVineet Gupta if (pte_accessible(mm, pte)) 10052585bccSVineet Gupta flush_tlb_page(vma, address); 10152585bccSVineet Gupta return pte; 10252585bccSVineet Gupta } 10352585bccSVineet Gupta #endif 10452585bccSVineet Gupta 105bd5e88adSVineet Gupta #ifdef CONFIG_TRANSPARENT_HUGEPAGE 106bd5e88adSVineet Gupta 107e2cda322SAndrea Arcangeli #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 108e2cda322SAndrea Arcangeli int pmdp_set_access_flags(struct vm_area_struct *vma, 109e2cda322SAndrea Arcangeli unsigned long address, pmd_t *pmdp, 110e2cda322SAndrea Arcangeli pmd_t entry, int dirty) 111e2cda322SAndrea Arcangeli { 112e2cda322SAndrea Arcangeli int changed = !pmd_same(*pmdp, entry); 113e2cda322SAndrea Arcangeli VM_BUG_ON(address & ~HPAGE_PMD_MASK); 114e2cda322SAndrea Arcangeli if (changed) { 115e2cda322SAndrea Arcangeli set_pmd_at(vma->vm_mm, address, pmdp, entry); 11612ebc158SVineet Gupta flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 117e2cda322SAndrea Arcangeli } 118e2cda322SAndrea Arcangeli return changed; 119e2cda322SAndrea Arcangeli } 120e2cda322SAndrea Arcangeli #endif 121e2cda322SAndrea Arcangeli 122e2cda322SAndrea Arcangeli #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 123e2cda322SAndrea Arcangeli int pmdp_clear_flush_young(struct vm_area_struct *vma, 124e2cda322SAndrea Arcangeli unsigned long address, pmd_t *pmdp) 125e2cda322SAndrea Arcangeli { 126e2cda322SAndrea Arcangeli int young; 127d8c37c48SNaoya Horiguchi VM_BUG_ON(address & ~HPAGE_PMD_MASK); 128e2cda322SAndrea Arcangeli young = pmdp_test_and_clear_young(vma, address, pmdp); 129e2cda322SAndrea Arcangeli if (young) 13012ebc158SVineet Gupta flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 131e2cda322SAndrea Arcangeli return young; 132e2cda322SAndrea Arcangeli } 133e2cda322SAndrea Arcangeli #endif 134e2cda322SAndrea Arcangeli 1358809aa2dSAneesh Kumar K.V #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH 1368809aa2dSAneesh Kumar K.V pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, 137e2cda322SAndrea Arcangeli pmd_t *pmdp) 138e2cda322SAndrea Arcangeli { 139e2cda322SAndrea Arcangeli pmd_t pmd; 140e2cda322SAndrea Arcangeli VM_BUG_ON(address & ~HPAGE_PMD_MASK); 14199fa8a48SHugh Dickins VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && 14299fa8a48SHugh Dickins !pmd_devmap(*pmdp)); 1438809aa2dSAneesh Kumar K.V pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); 14412ebc158SVineet Gupta flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 145e2cda322SAndrea Arcangeli return pmd; 146e2cda322SAndrea Arcangeli } 147a00cc7d9SMatthew Wilcox 148a00cc7d9SMatthew Wilcox #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 149a00cc7d9SMatthew Wilcox pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, 150a00cc7d9SMatthew Wilcox pud_t *pudp) 151a00cc7d9SMatthew Wilcox { 152a00cc7d9SMatthew Wilcox pud_t pud; 153a00cc7d9SMatthew Wilcox 154a00cc7d9SMatthew Wilcox VM_BUG_ON(address & ~HPAGE_PUD_MASK); 155a00cc7d9SMatthew Wilcox VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp)); 156a00cc7d9SMatthew Wilcox pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp); 157a00cc7d9SMatthew Wilcox flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); 158a00cc7d9SMatthew Wilcox return pud; 159a00cc7d9SMatthew Wilcox } 160a00cc7d9SMatthew Wilcox #endif 161e2cda322SAndrea Arcangeli #endif 162e2cda322SAndrea Arcangeli 163e3ebcf64SGerald Schaefer #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT 1646b0b50b0SAneesh Kumar K.V void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1656b0b50b0SAneesh Kumar K.V pgtable_t pgtable) 166e3ebcf64SGerald Schaefer { 167c4088ebdSKirill A. Shutemov assert_spin_locked(pmd_lockptr(mm, pmdp)); 168e3ebcf64SGerald Schaefer 169e3ebcf64SGerald Schaefer /* FIFO */ 170c389a250SKirill A. Shutemov if (!pmd_huge_pte(mm, pmdp)) 171e3ebcf64SGerald Schaefer INIT_LIST_HEAD(&pgtable->lru); 172e3ebcf64SGerald Schaefer else 173c389a250SKirill A. Shutemov list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); 174c389a250SKirill A. Shutemov pmd_huge_pte(mm, pmdp) = pgtable; 175e3ebcf64SGerald Schaefer } 176e3ebcf64SGerald Schaefer #endif 177e3ebcf64SGerald Schaefer 178e3ebcf64SGerald Schaefer #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW 179e3ebcf64SGerald Schaefer /* no "address" argument so destroys page coloring of some arch */ 1806b0b50b0SAneesh Kumar K.V pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 181e3ebcf64SGerald Schaefer { 182e3ebcf64SGerald Schaefer pgtable_t pgtable; 183e3ebcf64SGerald Schaefer 184c4088ebdSKirill A. Shutemov assert_spin_locked(pmd_lockptr(mm, pmdp)); 185e3ebcf64SGerald Schaefer 186e3ebcf64SGerald Schaefer /* FIFO */ 187c389a250SKirill A. Shutemov pgtable = pmd_huge_pte(mm, pmdp); 18814669347SGeliang Tang pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru, 189e3ebcf64SGerald Schaefer struct page, lru); 19014669347SGeliang Tang if (pmd_huge_pte(mm, pmdp)) 191e3ebcf64SGerald Schaefer list_del(&pgtable->lru); 192e3ebcf64SGerald Schaefer return pgtable; 193e3ebcf64SGerald Schaefer } 194e3ebcf64SGerald Schaefer #endif 19546dcde73SGerald Schaefer 19646dcde73SGerald Schaefer #ifndef __HAVE_ARCH_PMDP_INVALIDATE 197d52605d7SKirill A. Shutemov pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 19846dcde73SGerald Schaefer pmd_t *pmdp) 19946dcde73SGerald Schaefer { 20086ec2da0SAnshuman Khandual pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); 20112ebc158SVineet Gupta flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 202d52605d7SKirill A. Shutemov return old; 20346dcde73SGerald Schaefer } 20446dcde73SGerald Schaefer #endif 205f28b6ff8SAneesh Kumar K.V 2064f831457SNadav Amit #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD 2074f831457SNadav Amit pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, 2084f831457SNadav Amit pmd_t *pmdp) 2094f831457SNadav Amit { 2104f831457SNadav Amit return pmdp_invalidate(vma, address, pmdp); 2114f831457SNadav Amit } 2124f831457SNadav Amit #endif 2134f831457SNadav Amit 214f28b6ff8SAneesh Kumar K.V #ifndef pmdp_collapse_flush 215f28b6ff8SAneesh Kumar K.V pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, 216f28b6ff8SAneesh Kumar K.V pmd_t *pmdp) 217f28b6ff8SAneesh Kumar K.V { 2188809aa2dSAneesh Kumar K.V /* 2198809aa2dSAneesh Kumar K.V * pmd and hugepage pte format are same. So we could 2208809aa2dSAneesh Kumar K.V * use the same function. 2218809aa2dSAneesh Kumar K.V */ 222f28b6ff8SAneesh Kumar K.V pmd_t pmd; 223f28b6ff8SAneesh Kumar K.V 224f28b6ff8SAneesh Kumar K.V VM_BUG_ON(address & ~HPAGE_PMD_MASK); 225f28b6ff8SAneesh Kumar K.V VM_BUG_ON(pmd_trans_huge(*pmdp)); 2268809aa2dSAneesh Kumar K.V pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); 2276a6ac72fSVineet Gupta 2286a6ac72fSVineet Gupta /* collapse entails shooting down ptes not pmd */ 2296a6ac72fSVineet Gupta flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 230f28b6ff8SAneesh Kumar K.V return pmd; 231f28b6ff8SAneesh Kumar K.V } 232f28b6ff8SAneesh Kumar K.V #endif 233bd5e88adSVineet Gupta #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2340d940a9bSHugh Dickins 2350d940a9bSHugh Dickins pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) 2360d940a9bSHugh Dickins { 2370d940a9bSHugh Dickins pmd_t pmdval; 2380d940a9bSHugh Dickins 2390d940a9bSHugh Dickins /* rcu_read_lock() to be added later */ 2400d940a9bSHugh Dickins pmdval = pmdp_get_lockless(pmd); 2410d940a9bSHugh Dickins if (pmdvalp) 2420d940a9bSHugh Dickins *pmdvalp = pmdval; 2430d940a9bSHugh Dickins if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval))) 2440d940a9bSHugh Dickins goto nomap; 2450d940a9bSHugh Dickins if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval))) 2460d940a9bSHugh Dickins goto nomap; 2470d940a9bSHugh Dickins if (unlikely(pmd_bad(pmdval))) { 2480d940a9bSHugh Dickins pmd_clear_bad(pmd); 2490d940a9bSHugh Dickins goto nomap; 2500d940a9bSHugh Dickins } 2510d940a9bSHugh Dickins return __pte_map(&pmdval, addr); 2520d940a9bSHugh Dickins nomap: 2530d940a9bSHugh Dickins /* rcu_read_unlock() to be added later */ 2540d940a9bSHugh Dickins return NULL; 2550d940a9bSHugh Dickins } 2560d940a9bSHugh Dickins 2570d940a9bSHugh Dickins pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, 2580d940a9bSHugh Dickins unsigned long addr, spinlock_t **ptlp) 2590d940a9bSHugh Dickins { 2600d940a9bSHugh Dickins pmd_t pmdval; 2610d940a9bSHugh Dickins pte_t *pte; 2620d940a9bSHugh Dickins 2630d940a9bSHugh Dickins pte = __pte_offset_map(pmd, addr, &pmdval); 2640d940a9bSHugh Dickins if (likely(pte)) 2650d940a9bSHugh Dickins *ptlp = pte_lockptr(mm, &pmdval); 2660d940a9bSHugh Dickins return pte; 2670d940a9bSHugh Dickins } 2680d940a9bSHugh Dickins 2690d940a9bSHugh Dickins pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, 2700d940a9bSHugh Dickins unsigned long addr, spinlock_t **ptlp) 2710d940a9bSHugh Dickins { 2720d940a9bSHugh Dickins spinlock_t *ptl; 2730d940a9bSHugh Dickins pmd_t pmdval; 2740d940a9bSHugh Dickins pte_t *pte; 2750d940a9bSHugh Dickins again: 2760d940a9bSHugh Dickins pte = __pte_offset_map(pmd, addr, &pmdval); 2770d940a9bSHugh Dickins if (unlikely(!pte)) 2780d940a9bSHugh Dickins return pte; 2790d940a9bSHugh Dickins ptl = pte_lockptr(mm, &pmdval); 2800d940a9bSHugh Dickins spin_lock(ptl); 2810d940a9bSHugh Dickins if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) { 2820d940a9bSHugh Dickins *ptlp = ptl; 2830d940a9bSHugh Dickins return pte; 2840d940a9bSHugh Dickins } 2850d940a9bSHugh Dickins pte_unmap_unlock(pte, ptl); 2860d940a9bSHugh Dickins goto again; 2870d940a9bSHugh Dickins } 288