1 /* 2 * This file contains common routines for dealing with free of page tables 3 * Along with common page table handling code 4 * 5 * Derived from arch/powerpc/mm/tlb_64.c: 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7 * 8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 9 * and Cort Dougan (PReP) (cort@cs.nmt.edu) 10 * Copyright (C) 1996 Paul Mackerras 11 * 12 * Derived from "arch/i386/mm/init.c" 13 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 14 * 15 * Dave Engebretsen <engebret@us.ibm.com> 16 * Rework for PPC64 port. 17 * 18 * This program is free software; you can redistribute it and/or 19 * modify it under the terms of the GNU General Public License 20 * as published by the Free Software Foundation; either version 21 * 2 of the License, or (at your option) any later version. 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/gfp.h> 26 #include <linux/mm.h> 27 #include <linux/percpu.h> 28 #include <linux/hardirq.h> 29 #include <linux/hugetlb.h> 30 #include <asm/pgalloc.h> 31 #include <asm/tlbflush.h> 32 #include <asm/tlb.h> 33 34 static inline int is_exec_fault(void) 35 { 36 return current->thread.regs && TRAP(current->thread.regs) == 0x400; 37 } 38 39 /* We only try to do i/d cache coherency on stuff that looks like 40 * reasonably "normal" PTEs. We currently require a PTE to be present 41 * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that 42 * on userspace PTEs 43 */ 44 static inline int pte_looks_normal(pte_t pte) 45 { 46 47 #if defined(CONFIG_PPC_BOOK3S_64) 48 if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) { 49 if (pte_ci(pte)) 50 return 0; 51 if (pte_user(pte)) 52 return 1; 53 } 54 return 0; 55 #else 56 return (pte_val(pte) & 57 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER | 58 _PAGE_PRIVILEGED)) == 59 (_PAGE_PRESENT | _PAGE_USER); 60 #endif 61 } 62 63 static struct page *maybe_pte_to_page(pte_t pte) 64 { 65 unsigned long pfn = pte_pfn(pte); 66 struct page *page; 67 68 if (unlikely(!pfn_valid(pfn))) 69 return NULL; 70 page = pfn_to_page(pfn); 71 if (PageReserved(page)) 72 return NULL; 73 return page; 74 } 75 76 #if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 77 78 /* Server-style MMU handles coherency when hashing if HW exec permission 79 * is supposed per page (currently 64-bit only). If not, then, we always 80 * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec 81 * support falls into the same category. 82 */ 83 84 static pte_t set_pte_filter(pte_t pte) 85 { 86 if (radix_enabled()) 87 return pte; 88 89 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 90 if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 91 cpu_has_feature(CPU_FTR_NOEXECUTE))) { 92 struct page *pg = maybe_pte_to_page(pte); 93 if (!pg) 94 return pte; 95 if (!test_bit(PG_arch_1, &pg->flags)) { 96 flush_dcache_icache_page(pg); 97 set_bit(PG_arch_1, &pg->flags); 98 } 99 } 100 return pte; 101 } 102 103 static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, 104 int dirty) 105 { 106 return pte; 107 } 108 109 #else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ 110 111 /* Embedded type MMU with HW exec support. This is a bit more complicated 112 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so 113 * instead we "filter out" the exec permission for non clean pages. 114 */ 115 static pte_t set_pte_filter(pte_t pte) 116 { 117 struct page *pg; 118 119 /* No exec permission in the first place, move on */ 120 if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) 121 return pte; 122 123 /* If you set _PAGE_EXEC on weird pages you're on your own */ 124 pg = maybe_pte_to_page(pte); 125 if (unlikely(!pg)) 126 return pte; 127 128 /* If the page clean, we move on */ 129 if (test_bit(PG_arch_1, &pg->flags)) 130 return pte; 131 132 /* If it's an exec fault, we flush the cache and make it clean */ 133 if (is_exec_fault()) { 134 flush_dcache_icache_page(pg); 135 set_bit(PG_arch_1, &pg->flags); 136 return pte; 137 } 138 139 /* Else, we filter out _PAGE_EXEC */ 140 return __pte(pte_val(pte) & ~_PAGE_EXEC); 141 } 142 143 static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, 144 int dirty) 145 { 146 struct page *pg; 147 148 /* So here, we only care about exec faults, as we use them 149 * to recover lost _PAGE_EXEC and perform I$/D$ coherency 150 * if necessary. Also if _PAGE_EXEC is already set, same deal, 151 * we just bail out 152 */ 153 if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) 154 return pte; 155 156 #ifdef CONFIG_DEBUG_VM 157 /* So this is an exec fault, _PAGE_EXEC is not set. If it was 158 * an error we would have bailed out earlier in do_page_fault() 159 * but let's make sure of it 160 */ 161 if (WARN_ON(!(vma->vm_flags & VM_EXEC))) 162 return pte; 163 #endif /* CONFIG_DEBUG_VM */ 164 165 /* If you set _PAGE_EXEC on weird pages you're on your own */ 166 pg = maybe_pte_to_page(pte); 167 if (unlikely(!pg)) 168 goto bail; 169 170 /* If the page is already clean, we move on */ 171 if (test_bit(PG_arch_1, &pg->flags)) 172 goto bail; 173 174 /* Clean the page and set PG_arch_1 */ 175 flush_dcache_icache_page(pg); 176 set_bit(PG_arch_1, &pg->flags); 177 178 bail: 179 return __pte(pte_val(pte) | _PAGE_EXEC); 180 } 181 182 #endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ 183 184 /* 185 * set_pte stores a linux PTE into the linux page table. 186 */ 187 void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 188 pte_t pte) 189 { 190 /* 191 * When handling numa faults, we already have the pte marked 192 * _PAGE_PRESENT, but we can be sure that it is not in hpte. 193 * Hence we can use set_pte_at for them. 194 */ 195 VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep)); 196 197 /* Add the pte bit when trying to set a pte */ 198 pte = __pte(pte_val(pte) | _PAGE_PTE); 199 200 /* Note: mm->context.id might not yet have been assigned as 201 * this context might not have been activated yet when this 202 * is called. 203 */ 204 pte = set_pte_filter(pte); 205 206 /* Perform the setting of the PTE */ 207 __set_pte_at(mm, addr, ptep, pte, 0); 208 } 209 210 /* 211 * This is called when relaxing access to a PTE. It's also called in the page 212 * fault path when we don't hit any of the major fault cases, ie, a minor 213 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have 214 * handled those two for us, we additionally deal with missing execute 215 * permission here on some processors 216 */ 217 int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, 218 pte_t *ptep, pte_t entry, int dirty) 219 { 220 int changed; 221 entry = set_access_flags_filter(entry, vma, dirty); 222 changed = !pte_same(*(ptep), entry); 223 if (changed) { 224 assert_pte_locked(vma->vm_mm, address); 225 __ptep_set_access_flags(vma, ptep, entry, 226 address, mmu_virtual_psize); 227 } 228 return changed; 229 } 230 231 #ifdef CONFIG_HUGETLB_PAGE 232 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, 233 unsigned long addr, pte_t *ptep, 234 pte_t pte, int dirty) 235 { 236 #ifdef HUGETLB_NEED_PRELOAD 237 /* 238 * The "return 1" forces a call of update_mmu_cache, which will write a 239 * TLB entry. Without this, platforms that don't do a write of the TLB 240 * entry in the TLB miss handler asm will fault ad infinitum. 241 */ 242 ptep_set_access_flags(vma, addr, ptep, pte, dirty); 243 return 1; 244 #else 245 int changed, psize; 246 247 pte = set_access_flags_filter(pte, vma, dirty); 248 changed = !pte_same(*(ptep), pte); 249 if (changed) { 250 251 #ifdef CONFIG_PPC_BOOK3S_64 252 struct hstate *h = hstate_vma(vma); 253 254 psize = hstate_get_psize(h); 255 #ifdef CONFIG_DEBUG_VM 256 assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep)); 257 #endif 258 259 #else 260 /* 261 * Not used on non book3s64 platforms. But 8xx 262 * can possibly use tsize derived from hstate. 263 */ 264 psize = 0; 265 #endif 266 __ptep_set_access_flags(vma, ptep, pte, addr, psize); 267 } 268 return changed; 269 #endif 270 } 271 #endif /* CONFIG_HUGETLB_PAGE */ 272 273 #ifdef CONFIG_DEBUG_VM 274 void assert_pte_locked(struct mm_struct *mm, unsigned long addr) 275 { 276 pgd_t *pgd; 277 pud_t *pud; 278 pmd_t *pmd; 279 280 if (mm == &init_mm) 281 return; 282 pgd = mm->pgd + pgd_index(addr); 283 BUG_ON(pgd_none(*pgd)); 284 pud = pud_offset(pgd, addr); 285 BUG_ON(pud_none(*pud)); 286 pmd = pmd_offset(pud, addr); 287 /* 288 * khugepaged to collapse normal pages to hugepage, first set 289 * pmd to none to force page fault/gup to take mmap_sem. After 290 * pmd is set to none, we do a pte_clear which does this assertion 291 * so if we find pmd none, return. 292 */ 293 if (pmd_none(*pmd)) 294 return; 295 BUG_ON(!pmd_present(*pmd)); 296 assert_spin_locked(pte_lockptr(mm, pmd)); 297 } 298 #endif /* CONFIG_DEBUG_VM */ 299 300 unsigned long vmalloc_to_phys(void *va) 301 { 302 unsigned long pfn = vmalloc_to_pfn(va); 303 304 BUG_ON(!pfn); 305 return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); 306 } 307 EXPORT_SYMBOL_GPL(vmalloc_to_phys); 308