12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 20186f47eSKumar Gala /* 30186f47eSKumar Gala * This file contains common routines for dealing with free of page tables 48d30c14cSBenjamin Herrenschmidt * Along with common page table handling code 50186f47eSKumar Gala * 60186f47eSKumar Gala * Derived from arch/powerpc/mm/tlb_64.c: 70186f47eSKumar Gala * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 80186f47eSKumar Gala * 90186f47eSKumar Gala * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 100186f47eSKumar Gala * and Cort Dougan (PReP) (cort@cs.nmt.edu) 110186f47eSKumar Gala * Copyright (C) 1996 Paul Mackerras 120186f47eSKumar Gala * 130186f47eSKumar Gala * Derived from "arch/i386/mm/init.c" 140186f47eSKumar Gala * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 150186f47eSKumar Gala * 160186f47eSKumar Gala * Dave Engebretsen <engebret@us.ibm.com> 170186f47eSKumar Gala * Rework for PPC64 port. 180186f47eSKumar Gala */ 190186f47eSKumar Gala 200186f47eSKumar Gala #include <linux/kernel.h> 215a0e3ad6STejun Heo #include <linux/gfp.h> 220186f47eSKumar Gala #include <linux/mm.h> 230186f47eSKumar Gala #include <linux/percpu.h> 240186f47eSKumar Gala #include <linux/hardirq.h> 2541151e77SBecky Bruce #include <linux/hugetlb.h> 260186f47eSKumar Gala #include <asm/pgalloc.h> 270186f47eSKumar Gala #include <asm/tlbflush.h> 280186f47eSKumar Gala #include <asm/tlb.h> 290caed4deSChristophe Leroy #include <asm/hugetlb.h> 300186f47eSKumar Gala 318d30c14cSBenjamin Herrenschmidt static inline int is_exec_fault(void) 328d30c14cSBenjamin Herrenschmidt { 338d30c14cSBenjamin Herrenschmidt return current->thread.regs && TRAP(current->thread.regs) == 0x400; 348d30c14cSBenjamin Herrenschmidt } 358d30c14cSBenjamin Herrenschmidt 368d30c14cSBenjamin Herrenschmidt /* We only try to do i/d cache coherency on stuff that looks like 378d30c14cSBenjamin Herrenschmidt * reasonably "normal" PTEs. We currently require a PTE to be present 3830bda41aSAneesh Kumar K.V * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that 39ea3cc330SBenjamin Herrenschmidt * on userspace PTEs 408d30c14cSBenjamin Herrenschmidt */ 418d30c14cSBenjamin Herrenschmidt static inline int pte_looks_normal(pte_t pte) 428d30c14cSBenjamin Herrenschmidt { 43ac29c640SAneesh Kumar K.V 4426973fa5SChristophe Leroy if (pte_present(pte) && !pte_special(pte)) { 4530bda41aSAneesh Kumar K.V if (pte_ci(pte)) 4630bda41aSAneesh Kumar K.V return 0; 47ac29c640SAneesh Kumar K.V if (pte_user(pte)) 48ac29c640SAneesh Kumar K.V return 1; 49ac29c640SAneesh Kumar K.V } 50ac29c640SAneesh Kumar K.V return 0; 518d30c14cSBenjamin Herrenschmidt } 528d30c14cSBenjamin Herrenschmidt 53e51df2c1SAnton Blanchard static struct page *maybe_pte_to_page(pte_t pte) 54ea3cc330SBenjamin Herrenschmidt { 55ea3cc330SBenjamin Herrenschmidt unsigned long pfn = pte_pfn(pte); 56ea3cc330SBenjamin Herrenschmidt struct page *page; 57ea3cc330SBenjamin Herrenschmidt 58ea3cc330SBenjamin Herrenschmidt if (unlikely(!pfn_valid(pfn))) 59ea3cc330SBenjamin Herrenschmidt return NULL; 60ea3cc330SBenjamin Herrenschmidt page = pfn_to_page(pfn); 61ea3cc330SBenjamin Herrenschmidt if (PageReserved(page)) 62ea3cc330SBenjamin Herrenschmidt return NULL; 63ea3cc330SBenjamin Herrenschmidt return page; 64ea3cc330SBenjamin Herrenschmidt } 65ea3cc330SBenjamin Herrenschmidt 66d81e6f8bSChristophe Leroy #ifdef CONFIG_PPC_BOOK3S 67ea3cc330SBenjamin Herrenschmidt 688d30c14cSBenjamin Herrenschmidt /* Server-style MMU handles coherency when hashing if HW exec permission 69ea3cc330SBenjamin Herrenschmidt * is supposed per page (currently 64-bit only). If not, then, we always 70ea3cc330SBenjamin Herrenschmidt * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec 71ea3cc330SBenjamin Herrenschmidt * support falls into the same category. 728d30c14cSBenjamin Herrenschmidt */ 73ea3cc330SBenjamin Herrenschmidt 74385e89d5SChristophe Leroy static pte_t set_pte_filter_hash(pte_t pte) 758d30c14cSBenjamin Herrenschmidt { 764dfb88caSAneesh Kumar K.V if (radix_enabled()) 774dfb88caSAneesh Kumar K.V return pte; 784dfb88caSAneesh Kumar K.V 79ea3cc330SBenjamin Herrenschmidt pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 80ea3cc330SBenjamin Herrenschmidt if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 81ea3cc330SBenjamin Herrenschmidt cpu_has_feature(CPU_FTR_NOEXECUTE))) { 82ea3cc330SBenjamin Herrenschmidt struct page *pg = maybe_pte_to_page(pte); 83ea3cc330SBenjamin Herrenschmidt if (!pg) 84ea3cc330SBenjamin Herrenschmidt return pte; 85ea3cc330SBenjamin Herrenschmidt if (!test_bit(PG_arch_1, &pg->flags)) { 86ea3cc330SBenjamin Herrenschmidt flush_dcache_icache_page(pg); 87ea3cc330SBenjamin Herrenschmidt set_bit(PG_arch_1, &pg->flags); 888d30c14cSBenjamin Herrenschmidt } 89ea3cc330SBenjamin Herrenschmidt } 90ea3cc330SBenjamin Herrenschmidt return pte; 91ea3cc330SBenjamin Herrenschmidt } 92ea3cc330SBenjamin Herrenschmidt 93d81e6f8bSChristophe Leroy #else /* CONFIG_PPC_BOOK3S */ 94ea3cc330SBenjamin Herrenschmidt 95385e89d5SChristophe Leroy static pte_t set_pte_filter_hash(pte_t pte) { return pte; } 96385e89d5SChristophe Leroy 97385e89d5SChristophe Leroy #endif /* CONFIG_PPC_BOOK3S */ 98385e89d5SChristophe Leroy 99ea3cc330SBenjamin Herrenschmidt /* Embedded type MMU with HW exec support. This is a bit more complicated 100ea3cc330SBenjamin Herrenschmidt * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so 101ea3cc330SBenjamin Herrenschmidt * instead we "filter out" the exec permission for non clean pages. 1028d30c14cSBenjamin Herrenschmidt */ 103b12c07a4SChristophe Leroy static inline pte_t set_pte_filter(pte_t pte) 1048d30c14cSBenjamin Herrenschmidt { 105ea3cc330SBenjamin Herrenschmidt struct page *pg; 106ea3cc330SBenjamin Herrenschmidt 107385e89d5SChristophe Leroy if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 108385e89d5SChristophe Leroy return set_pte_filter_hash(pte); 109385e89d5SChristophe Leroy 110ea3cc330SBenjamin Herrenschmidt /* No exec permission in the first place, move on */ 11126973fa5SChristophe Leroy if (!pte_exec(pte) || !pte_looks_normal(pte)) 112ea3cc330SBenjamin Herrenschmidt return pte; 113ea3cc330SBenjamin Herrenschmidt 114ea3cc330SBenjamin Herrenschmidt /* If you set _PAGE_EXEC on weird pages you're on your own */ 115ea3cc330SBenjamin Herrenschmidt pg = maybe_pte_to_page(pte); 116ea3cc330SBenjamin Herrenschmidt if (unlikely(!pg)) 117ea3cc330SBenjamin Herrenschmidt return pte; 118ea3cc330SBenjamin Herrenschmidt 119ea3cc330SBenjamin Herrenschmidt /* If the page clean, we move on */ 120ea3cc330SBenjamin Herrenschmidt if (test_bit(PG_arch_1, &pg->flags)) 121ea3cc330SBenjamin Herrenschmidt return pte; 122ea3cc330SBenjamin Herrenschmidt 123ea3cc330SBenjamin Herrenschmidt /* If it's an exec fault, we flush the cache and make it clean */ 124ea3cc330SBenjamin Herrenschmidt if (is_exec_fault()) { 125ea3cc330SBenjamin Herrenschmidt flush_dcache_icache_page(pg); 126ea3cc330SBenjamin Herrenschmidt set_bit(PG_arch_1, &pg->flags); 127ea3cc330SBenjamin Herrenschmidt return pte; 1288d30c14cSBenjamin Herrenschmidt } 129ea3cc330SBenjamin Herrenschmidt 130ea3cc330SBenjamin Herrenschmidt /* Else, we filter out _PAGE_EXEC */ 13126973fa5SChristophe Leroy return pte_exprotect(pte); 132ea3cc330SBenjamin Herrenschmidt } 133ea3cc330SBenjamin Herrenschmidt 134ea3cc330SBenjamin Herrenschmidt static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, 135ea3cc330SBenjamin Herrenschmidt int dirty) 136ea3cc330SBenjamin Herrenschmidt { 137ea3cc330SBenjamin Herrenschmidt struct page *pg; 138ea3cc330SBenjamin Herrenschmidt 139385e89d5SChristophe Leroy if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 140385e89d5SChristophe Leroy return pte; 141385e89d5SChristophe Leroy 142ea3cc330SBenjamin Herrenschmidt /* So here, we only care about exec faults, as we use them 143ea3cc330SBenjamin Herrenschmidt * to recover lost _PAGE_EXEC and perform I$/D$ coherency 144ea3cc330SBenjamin Herrenschmidt * if necessary. Also if _PAGE_EXEC is already set, same deal, 145ea3cc330SBenjamin Herrenschmidt * we just bail out 1468d30c14cSBenjamin Herrenschmidt */ 14726973fa5SChristophe Leroy if (dirty || pte_exec(pte) || !is_exec_fault()) 148ea3cc330SBenjamin Herrenschmidt return pte; 149ea3cc330SBenjamin Herrenschmidt 150ea3cc330SBenjamin Herrenschmidt #ifdef CONFIG_DEBUG_VM 151ea3cc330SBenjamin Herrenschmidt /* So this is an exec fault, _PAGE_EXEC is not set. If it was 152ea3cc330SBenjamin Herrenschmidt * an error we would have bailed out earlier in do_page_fault() 153ea3cc330SBenjamin Herrenschmidt * but let's make sure of it 154ea3cc330SBenjamin Herrenschmidt */ 155ea3cc330SBenjamin Herrenschmidt if (WARN_ON(!(vma->vm_flags & VM_EXEC))) 156ea3cc330SBenjamin Herrenschmidt return pte; 157ea3cc330SBenjamin Herrenschmidt #endif /* CONFIG_DEBUG_VM */ 158ea3cc330SBenjamin Herrenschmidt 159ea3cc330SBenjamin Herrenschmidt /* If you set _PAGE_EXEC on weird pages you're on your own */ 160ea3cc330SBenjamin Herrenschmidt pg = maybe_pte_to_page(pte); 161ea3cc330SBenjamin Herrenschmidt if (unlikely(!pg)) 162ea3cc330SBenjamin Herrenschmidt goto bail; 163ea3cc330SBenjamin Herrenschmidt 164ea3cc330SBenjamin Herrenschmidt /* If the page is already clean, we move on */ 165ea3cc330SBenjamin Herrenschmidt if (test_bit(PG_arch_1, &pg->flags)) 166ea3cc330SBenjamin Herrenschmidt goto bail; 167ea3cc330SBenjamin Herrenschmidt 168ea3cc330SBenjamin Herrenschmidt /* Clean the page and set PG_arch_1 */ 169ea3cc330SBenjamin Herrenschmidt flush_dcache_icache_page(pg); 170ea3cc330SBenjamin Herrenschmidt set_bit(PG_arch_1, &pg->flags); 171ea3cc330SBenjamin Herrenschmidt 172ea3cc330SBenjamin Herrenschmidt bail: 17326973fa5SChristophe Leroy return pte_mkexec(pte); 1748d30c14cSBenjamin Herrenschmidt } 175ea3cc330SBenjamin Herrenschmidt 1768d30c14cSBenjamin Herrenschmidt /* 1778d30c14cSBenjamin Herrenschmidt * set_pte stores a linux PTE into the linux page table. 1788d30c14cSBenjamin Herrenschmidt */ 179ea3cc330SBenjamin Herrenschmidt void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 180ea3cc330SBenjamin Herrenschmidt pte_t pte) 1818d30c14cSBenjamin Herrenschmidt { 1828a0516edSMel Gorman /* 183da7ad366SAneesh Kumar K.V * Make sure hardware valid bit is not set. We don't do 184da7ad366SAneesh Kumar K.V * tlb flush for this update. 1858a0516edSMel Gorman */ 186dd0e144aSAneesh Kumar K.V VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); 187c7d54842SAneesh Kumar K.V 188c618f6b1SGavin Shan /* Add the pte bit when trying to set a pte */ 18926973fa5SChristophe Leroy pte = pte_mkpte(pte); 1908a0516edSMel Gorman 1918d30c14cSBenjamin Herrenschmidt /* Note: mm->context.id might not yet have been assigned as 1928d30c14cSBenjamin Herrenschmidt * this context might not have been activated yet when this 1938d30c14cSBenjamin Herrenschmidt * is called. 1948d30c14cSBenjamin Herrenschmidt */ 19579df1b37SLEROY Christophe pte = set_pte_filter(pte); 1968d30c14cSBenjamin Herrenschmidt 1978d30c14cSBenjamin Herrenschmidt /* Perform the setting of the PTE */ 1988d30c14cSBenjamin Herrenschmidt __set_pte_at(mm, addr, ptep, pte, 0); 1998d30c14cSBenjamin Herrenschmidt } 2008d30c14cSBenjamin Herrenschmidt 2018d30c14cSBenjamin Herrenschmidt /* 2028d30c14cSBenjamin Herrenschmidt * This is called when relaxing access to a PTE. It's also called in the page 2038d30c14cSBenjamin Herrenschmidt * fault path when we don't hit any of the major fault cases, ie, a minor 2048d30c14cSBenjamin Herrenschmidt * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have 2058d30c14cSBenjamin Herrenschmidt * handled those two for us, we additionally deal with missing execute 2068d30c14cSBenjamin Herrenschmidt * permission here on some processors 2078d30c14cSBenjamin Herrenschmidt */ 2088d30c14cSBenjamin Herrenschmidt int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, 2098d30c14cSBenjamin Herrenschmidt pte_t *ptep, pte_t entry, int dirty) 2108d30c14cSBenjamin Herrenschmidt { 2118d30c14cSBenjamin Herrenschmidt int changed; 212ea3cc330SBenjamin Herrenschmidt entry = set_access_flags_filter(entry, vma, dirty); 2138d30c14cSBenjamin Herrenschmidt changed = !pte_same(*(ptep), entry); 2148d30c14cSBenjamin Herrenschmidt if (changed) { 2158d30c14cSBenjamin Herrenschmidt assert_pte_locked(vma->vm_mm, address); 216e4c1112cSAneesh Kumar K.V __ptep_set_access_flags(vma, ptep, entry, 217e4c1112cSAneesh Kumar K.V address, mmu_virtual_psize); 2188d30c14cSBenjamin Herrenschmidt } 2198d30c14cSBenjamin Herrenschmidt return changed; 2208d30c14cSBenjamin Herrenschmidt } 2218d30c14cSBenjamin Herrenschmidt 222f069ff39SAneesh Kumar K.V #ifdef CONFIG_HUGETLB_PAGE 223bce85a16SBreno Leitao int huge_ptep_set_access_flags(struct vm_area_struct *vma, 224f069ff39SAneesh Kumar K.V unsigned long addr, pte_t *ptep, 225f069ff39SAneesh Kumar K.V pte_t pte, int dirty) 226f069ff39SAneesh Kumar K.V { 227f069ff39SAneesh Kumar K.V #ifdef HUGETLB_NEED_PRELOAD 228f069ff39SAneesh Kumar K.V /* 229f069ff39SAneesh Kumar K.V * The "return 1" forces a call of update_mmu_cache, which will write a 230f069ff39SAneesh Kumar K.V * TLB entry. Without this, platforms that don't do a write of the TLB 231f069ff39SAneesh Kumar K.V * entry in the TLB miss handler asm will fault ad infinitum. 232f069ff39SAneesh Kumar K.V */ 233f069ff39SAneesh Kumar K.V ptep_set_access_flags(vma, addr, ptep, pte, dirty); 234f069ff39SAneesh Kumar K.V return 1; 235f069ff39SAneesh Kumar K.V #else 236e4c1112cSAneesh Kumar K.V int changed, psize; 237f069ff39SAneesh Kumar K.V 238f069ff39SAneesh Kumar K.V pte = set_access_flags_filter(pte, vma, dirty); 239f069ff39SAneesh Kumar K.V changed = !pte_same(*(ptep), pte); 240f069ff39SAneesh Kumar K.V if (changed) { 241e4c1112cSAneesh Kumar K.V 242e4c1112cSAneesh Kumar K.V #ifdef CONFIG_PPC_BOOK3S_64 243ed515b68SAneesh Kumar K.V struct hstate *h = hstate_vma(vma); 244ed515b68SAneesh Kumar K.V 245ed515b68SAneesh Kumar K.V psize = hstate_get_psize(h); 246ed515b68SAneesh Kumar K.V #ifdef CONFIG_DEBUG_VM 247ed515b68SAneesh Kumar K.V assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep)); 248ed515b68SAneesh Kumar K.V #endif 249ed515b68SAneesh Kumar K.V 250e4c1112cSAneesh Kumar K.V #else 251e4c1112cSAneesh Kumar K.V /* 252b12c07a4SChristophe Leroy * Not used on non book3s64 platforms. 253b12c07a4SChristophe Leroy * 8xx compares it with mmu_virtual_psize to 254b12c07a4SChristophe Leroy * know if it is a huge page or not. 255e4c1112cSAneesh Kumar K.V */ 256b12c07a4SChristophe Leroy psize = MMU_PAGE_COUNT; 257e4c1112cSAneesh Kumar K.V #endif 258e4c1112cSAneesh Kumar K.V __ptep_set_access_flags(vma, ptep, pte, addr, psize); 259f069ff39SAneesh Kumar K.V } 260f069ff39SAneesh Kumar K.V return changed; 261f069ff39SAneesh Kumar K.V #endif 262f069ff39SAneesh Kumar K.V } 263b12c07a4SChristophe Leroy 264b12c07a4SChristophe Leroy #if defined(CONFIG_PPC_8xx) 265b12c07a4SChristophe Leroy void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 266b12c07a4SChristophe Leroy { 267b250c8c0SChristophe Leroy pmd_t *pmd = pmd_ptr(mm, addr); 268b250c8c0SChristophe Leroy pte_basic_t val; 269b250c8c0SChristophe Leroy pte_basic_t *entry = &ptep->pte; 270b250c8c0SChristophe Leroy int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; 271b250c8c0SChristophe Leroy int i; 272b250c8c0SChristophe Leroy 273b12c07a4SChristophe Leroy /* 274b12c07a4SChristophe Leroy * Make sure hardware valid bit is not set. We don't do 275b12c07a4SChristophe Leroy * tlb flush for this update. 276b12c07a4SChristophe Leroy */ 277b12c07a4SChristophe Leroy VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); 278b12c07a4SChristophe Leroy 279b12c07a4SChristophe Leroy pte = pte_mkpte(pte); 280b12c07a4SChristophe Leroy 281b12c07a4SChristophe Leroy pte = set_pte_filter(pte); 282b12c07a4SChristophe Leroy 283b250c8c0SChristophe Leroy val = pte_val(pte); 284b250c8c0SChristophe Leroy for (i = 0; i < num; i++, entry++, val += SZ_4K) 285b250c8c0SChristophe Leroy *entry = val; 286b12c07a4SChristophe Leroy } 287b12c07a4SChristophe Leroy #endif 288f069ff39SAneesh Kumar K.V #endif /* CONFIG_HUGETLB_PAGE */ 289f069ff39SAneesh Kumar K.V 2908d30c14cSBenjamin Herrenschmidt #ifdef CONFIG_DEBUG_VM 2918d30c14cSBenjamin Herrenschmidt void assert_pte_locked(struct mm_struct *mm, unsigned long addr) 2928d30c14cSBenjamin Herrenschmidt { 2938d30c14cSBenjamin Herrenschmidt pgd_t *pgd; 2942fb47060SMike Rapoport p4d_t *p4d; 2958d30c14cSBenjamin Herrenschmidt pud_t *pud; 2968d30c14cSBenjamin Herrenschmidt pmd_t *pmd; 2978d30c14cSBenjamin Herrenschmidt 2988d30c14cSBenjamin Herrenschmidt if (mm == &init_mm) 2998d30c14cSBenjamin Herrenschmidt return; 3008d30c14cSBenjamin Herrenschmidt pgd = mm->pgd + pgd_index(addr); 3018d30c14cSBenjamin Herrenschmidt BUG_ON(pgd_none(*pgd)); 3022fb47060SMike Rapoport p4d = p4d_offset(pgd, addr); 3032fb47060SMike Rapoport BUG_ON(p4d_none(*p4d)); 3042fb47060SMike Rapoport pud = pud_offset(p4d, addr); 3058d30c14cSBenjamin Herrenschmidt BUG_ON(pud_none(*pud)); 3068d30c14cSBenjamin Herrenschmidt pmd = pmd_offset(pud, addr); 307a00e7beaSAneesh Kumar K.V /* 308a00e7beaSAneesh Kumar K.V * khugepaged to collapse normal pages to hugepage, first set 309*c1e8d7c6SMichel Lespinasse * pmd to none to force page fault/gup to take mmap_lock. After 310a00e7beaSAneesh Kumar K.V * pmd is set to none, we do a pte_clear which does this assertion 311a00e7beaSAneesh Kumar K.V * so if we find pmd none, return. 312a00e7beaSAneesh Kumar K.V */ 313a00e7beaSAneesh Kumar K.V if (pmd_none(*pmd)) 314a00e7beaSAneesh Kumar K.V return; 3158d30c14cSBenjamin Herrenschmidt BUG_ON(!pmd_present(*pmd)); 316797a747aSKumar Gala assert_spin_locked(pte_lockptr(mm, pmd)); 3178d30c14cSBenjamin Herrenschmidt } 3188d30c14cSBenjamin Herrenschmidt #endif /* CONFIG_DEBUG_VM */ 3198d30c14cSBenjamin Herrenschmidt 320e9ab1a1cSAlexey Kardashevskiy unsigned long vmalloc_to_phys(void *va) 321e9ab1a1cSAlexey Kardashevskiy { 322e9ab1a1cSAlexey Kardashevskiy unsigned long pfn = vmalloc_to_pfn(va); 323e9ab1a1cSAlexey Kardashevskiy 324e9ab1a1cSAlexey Kardashevskiy BUG_ON(!pfn); 325e9ab1a1cSAlexey Kardashevskiy return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); 326e9ab1a1cSAlexey Kardashevskiy } 327e9ab1a1cSAlexey Kardashevskiy EXPORT_SYMBOL_GPL(vmalloc_to_phys); 3280caed4deSChristophe Leroy 3290caed4deSChristophe Leroy /* 3300caed4deSChristophe Leroy * We have 4 cases for pgds and pmds: 3310caed4deSChristophe Leroy * (1) invalid (all zeroes) 3320caed4deSChristophe Leroy * (2) pointer to next table, as normal; bottom 6 bits == 0 3330caed4deSChristophe Leroy * (3) leaf pte for huge page _PAGE_PTE set 3340caed4deSChristophe Leroy * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table 3350caed4deSChristophe Leroy * 3360caed4deSChristophe Leroy * So long as we atomically load page table pointers we are safe against teardown, 3370caed4deSChristophe Leroy * we can follow the address down to the the page and take a ref on it. 3380caed4deSChristophe Leroy * This function need to be called with interrupts disabled. We use this variant 3390caed4deSChristophe Leroy * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED 3400caed4deSChristophe Leroy */ 3410caed4deSChristophe Leroy pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, 3420caed4deSChristophe Leroy bool *is_thp, unsigned *hpage_shift) 3430caed4deSChristophe Leroy { 3442fb47060SMike Rapoport pgd_t *pgdp; 3452fb47060SMike Rapoport p4d_t p4d, *p4dp; 3460caed4deSChristophe Leroy pud_t pud, *pudp; 3470caed4deSChristophe Leroy pmd_t pmd, *pmdp; 3480caed4deSChristophe Leroy pte_t *ret_pte; 3490caed4deSChristophe Leroy hugepd_t *hpdp = NULL; 3502fb47060SMike Rapoport unsigned pdshift; 3510caed4deSChristophe Leroy 3520caed4deSChristophe Leroy if (hpage_shift) 3530caed4deSChristophe Leroy *hpage_shift = 0; 3540caed4deSChristophe Leroy 3550caed4deSChristophe Leroy if (is_thp) 3560caed4deSChristophe Leroy *is_thp = false; 3570caed4deSChristophe Leroy 3580caed4deSChristophe Leroy /* 3590caed4deSChristophe Leroy * Always operate on the local stack value. This make sure the 3600caed4deSChristophe Leroy * value don't get updated by a parallel THP split/collapse, 3610caed4deSChristophe Leroy * page fault or a page unmap. The return pte_t * is still not 3620caed4deSChristophe Leroy * stable. So should be checked there for above conditions. 3632fb47060SMike Rapoport * Top level is an exception because it is folded into p4d. 3640caed4deSChristophe Leroy */ 3652fb47060SMike Rapoport pgdp = pgdir + pgd_index(ea); 3662fb47060SMike Rapoport p4dp = p4d_offset(pgdp, ea); 3672fb47060SMike Rapoport p4d = READ_ONCE(*p4dp); 3682fb47060SMike Rapoport pdshift = P4D_SHIFT; 3692fb47060SMike Rapoport 3702fb47060SMike Rapoport if (p4d_none(p4d)) 3710caed4deSChristophe Leroy return NULL; 372fab9a116SChristophe Leroy 3732fb47060SMike Rapoport if (p4d_is_leaf(p4d)) { 3742fb47060SMike Rapoport ret_pte = (pte_t *)p4dp; 3750caed4deSChristophe Leroy goto out; 376fab9a116SChristophe Leroy } 377d6eaceddSAneesh Kumar K.V 3782fb47060SMike Rapoport if (is_hugepd(__hugepd(p4d_val(p4d)))) { 3792fb47060SMike Rapoport hpdp = (hugepd_t *)&p4d; 380fab9a116SChristophe Leroy goto out_huge; 381fab9a116SChristophe Leroy } 38226e66b08SChristophe Leroy 3830caed4deSChristophe Leroy /* 3840caed4deSChristophe Leroy * Even if we end up with an unmap, the pgtable will not 3850caed4deSChristophe Leroy * be freed, because we do an rcu free and here we are 3860caed4deSChristophe Leroy * irq disabled 3870caed4deSChristophe Leroy */ 3880caed4deSChristophe Leroy pdshift = PUD_SHIFT; 3892fb47060SMike Rapoport pudp = pud_offset(&p4d, ea); 3900caed4deSChristophe Leroy pud = READ_ONCE(*pudp); 3910caed4deSChristophe Leroy 3920caed4deSChristophe Leroy if (pud_none(pud)) 3930caed4deSChristophe Leroy return NULL; 394fab9a116SChristophe Leroy 395d6eaceddSAneesh Kumar K.V if (pud_is_leaf(pud)) { 3960caed4deSChristophe Leroy ret_pte = (pte_t *)pudp; 3970caed4deSChristophe Leroy goto out; 398fab9a116SChristophe Leroy } 399d6eaceddSAneesh Kumar K.V 400fab9a116SChristophe Leroy if (is_hugepd(__hugepd(pud_val(pud)))) { 4010caed4deSChristophe Leroy hpdp = (hugepd_t *)&pud; 402fab9a116SChristophe Leroy goto out_huge; 403fab9a116SChristophe Leroy } 404d6eaceddSAneesh Kumar K.V 4050caed4deSChristophe Leroy pdshift = PMD_SHIFT; 4060caed4deSChristophe Leroy pmdp = pmd_offset(&pud, ea); 4070caed4deSChristophe Leroy pmd = READ_ONCE(*pmdp); 408a00196a2SNicholas Piggin 4090caed4deSChristophe Leroy /* 410a00196a2SNicholas Piggin * A hugepage collapse is captured by this condition, see 411a00196a2SNicholas Piggin * pmdp_collapse_flush. 4120caed4deSChristophe Leroy */ 4130caed4deSChristophe Leroy if (pmd_none(pmd)) 4140caed4deSChristophe Leroy return NULL; 4150caed4deSChristophe Leroy 416a00196a2SNicholas Piggin #ifdef CONFIG_PPC_BOOK3S_64 417a00196a2SNicholas Piggin /* 418a00196a2SNicholas Piggin * A hugepage split is captured by this condition, see 419a00196a2SNicholas Piggin * pmdp_invalidate. 420a00196a2SNicholas Piggin * 421a00196a2SNicholas Piggin * Huge page modification can be caught here too. 422a00196a2SNicholas Piggin */ 423a00196a2SNicholas Piggin if (pmd_is_serializing(pmd)) 424a00196a2SNicholas Piggin return NULL; 425a00196a2SNicholas Piggin #endif 426a00196a2SNicholas Piggin 4270caed4deSChristophe Leroy if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { 4280caed4deSChristophe Leroy if (is_thp) 4290caed4deSChristophe Leroy *is_thp = true; 4300caed4deSChristophe Leroy ret_pte = (pte_t *)pmdp; 4310caed4deSChristophe Leroy goto out; 4320caed4deSChristophe Leroy } 433d6eaceddSAneesh Kumar K.V 434d6eaceddSAneesh Kumar K.V if (pmd_is_leaf(pmd)) { 4350caed4deSChristophe Leroy ret_pte = (pte_t *)pmdp; 4360caed4deSChristophe Leroy goto out; 437fab9a116SChristophe Leroy } 438d6eaceddSAneesh Kumar K.V 439fab9a116SChristophe Leroy if (is_hugepd(__hugepd(pmd_val(pmd)))) { 4400caed4deSChristophe Leroy hpdp = (hugepd_t *)&pmd; 441fab9a116SChristophe Leroy goto out_huge; 442fab9a116SChristophe Leroy } 443fab9a116SChristophe Leroy 4440caed4deSChristophe Leroy return pte_offset_kernel(&pmd, ea); 44526e66b08SChristophe Leroy 446fab9a116SChristophe Leroy out_huge: 4470caed4deSChristophe Leroy if (!hpdp) 4480caed4deSChristophe Leroy return NULL; 4490caed4deSChristophe Leroy 4500caed4deSChristophe Leroy ret_pte = hugepte_offset(*hpdp, ea, pdshift); 4510caed4deSChristophe Leroy pdshift = hugepd_shift(*hpdp); 4520caed4deSChristophe Leroy out: 4530caed4deSChristophe Leroy if (hpage_shift) 4540caed4deSChristophe Leroy *hpage_shift = pdshift; 4550caed4deSChristophe Leroy return ret_pte; 4560caed4deSChristophe Leroy } 4570caed4deSChristophe Leroy EXPORT_SYMBOL_GPL(__find_linux_pte); 458