1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 21965aae3SH. Peter Anvin #ifndef _ASM_X86_MMU_CONTEXT_H 31965aae3SH. Peter Anvin #define _ASM_X86_MMU_CONTEXT_H 4bb898558SAl Viro 5bb898558SAl Viro #include <asm/desc.h> 660063497SArun Sharma #include <linux/atomic.h> 7d17d8f9dSDave Hansen #include <linux/mm_types.h> 87d06d9c9SDave Hansen #include <linux/pkeys.h> 9d17d8f9dSDave Hansen 10d17d8f9dSDave Hansen #include <trace/events/tlb.h> 11d17d8f9dSDave Hansen 12bb898558SAl Viro #include <asm/pgalloc.h> 13bb898558SAl Viro #include <asm/tlbflush.h> 14bb898558SAl Viro #include <asm/paravirt.h> 15fe3d197fSDave Hansen #include <asm/mpx.h> 16f39681edSAndy Lutomirski 17f39681edSAndy Lutomirski extern atomic64_t last_mm_ctx_id; 18f39681edSAndy Lutomirski 19fdc0269eSJuergen Gross #ifndef CONFIG_PARAVIRT_XXL 20bb898558SAl Viro static inline void paravirt_activate_mm(struct mm_struct *prev, 21bb898558SAl Viro struct mm_struct *next) 22bb898558SAl Viro { 23bb898558SAl Viro } 24fdc0269eSJuergen Gross #endif /* !CONFIG_PARAVIRT_XXL */ 25bb898558SAl Viro 267911d3f7SAndy Lutomirski #ifdef CONFIG_PERF_EVENTS 27631fe154SDavidlohr Bueso 28631fe154SDavidlohr Bueso DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); 29a6673429SAndy Lutomirski 307911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) 317911d3f7SAndy Lutomirski { 32631fe154SDavidlohr Bueso if (static_branch_unlikely(&rdpmc_always_available_key) || 33a6673429SAndy Lutomirski atomic_read(&mm->context.perf_rdpmc_allowed)) 347911d3f7SAndy Lutomirski cr4_set_bits(X86_CR4_PCE); 357911d3f7SAndy Lutomirski else 367911d3f7SAndy Lutomirski cr4_clear_bits(X86_CR4_PCE); 377911d3f7SAndy Lutomirski } 387911d3f7SAndy Lutomirski #else 397911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) {} 407911d3f7SAndy Lutomirski #endif 417911d3f7SAndy Lutomirski 42a5b9e5a2SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 43bb898558SAl Viro /* 4437868fe1SAndy Lutomirski * ldt_structs can be allocated, used, and freed, but they are never 4537868fe1SAndy Lutomirski * modified while live. 4637868fe1SAndy Lutomirski */ 4737868fe1SAndy Lutomirski struct ldt_struct { 4837868fe1SAndy Lutomirski /* 4937868fe1SAndy Lutomirski * Xen requires page-aligned LDTs with special permissions. This is 5037868fe1SAndy Lutomirski * needed to prevent us from installing evil descriptors such as 5137868fe1SAndy Lutomirski * call gates. On native, we could merge the ldt_struct and LDT 5237868fe1SAndy Lutomirski * allocations, but it's not worth trying to optimize. 5337868fe1SAndy Lutomirski */ 5437868fe1SAndy Lutomirski struct desc_struct *entries; 55bbf79d21SBorislav Petkov unsigned int nr_entries; 56f55f0501SAndy Lutomirski 57f55f0501SAndy Lutomirski /* 58f55f0501SAndy Lutomirski * If PTI is in use, then the entries array is not mapped while we're 59f55f0501SAndy Lutomirski * in user mode. The whole array will be aliased at the addressed 60f55f0501SAndy Lutomirski * given by ldt_slot_va(slot). We use two slots so that we can allocate 61f55f0501SAndy Lutomirski * and map, and enable a new LDT without invalidating the mapping 62f55f0501SAndy Lutomirski * of an older, still-in-use LDT. 63f55f0501SAndy Lutomirski * 64f55f0501SAndy Lutomirski * slot will be -1 if this LDT doesn't have an alias mapping. 65f55f0501SAndy Lutomirski */ 66f55f0501SAndy Lutomirski int slot; 6737868fe1SAndy Lutomirski }; 6837868fe1SAndy Lutomirski 69f55f0501SAndy Lutomirski /* This is a multiple of PAGE_SIZE. */ 70f55f0501SAndy Lutomirski #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) 71f55f0501SAndy Lutomirski 72f55f0501SAndy Lutomirski static inline void *ldt_slot_va(int slot) 73f55f0501SAndy Lutomirski { 74f55f0501SAndy Lutomirski return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 75f55f0501SAndy Lutomirski } 76f55f0501SAndy Lutomirski 77a5b9e5a2SAndy Lutomirski /* 78a5b9e5a2SAndy Lutomirski * Used for LDT copy/destruction. 79a5b9e5a2SAndy Lutomirski */ 80a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) 81a4828f81SThomas Gleixner { 82a4828f81SThomas Gleixner mm->context.ldt = NULL; 83a4828f81SThomas Gleixner init_rwsem(&mm->context.ldt_usr_sem); 84a4828f81SThomas Gleixner } 85a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); 8639a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm); 87f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm); 88a5b9e5a2SAndy Lutomirski #else /* CONFIG_MODIFY_LDT_SYSCALL */ 89a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) { } 90a4828f81SThomas Gleixner static inline int ldt_dup_context(struct mm_struct *oldmm, 91a5b9e5a2SAndy Lutomirski struct mm_struct *mm) 92a5b9e5a2SAndy Lutomirski { 93a5b9e5a2SAndy Lutomirski return 0; 94a5b9e5a2SAndy Lutomirski } 9539a0526fSDave Hansen static inline void destroy_context_ldt(struct mm_struct *mm) { } 96f55f0501SAndy Lutomirski static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } 97a5b9e5a2SAndy Lutomirski #endif 98a5b9e5a2SAndy Lutomirski 9937868fe1SAndy Lutomirski static inline void load_mm_ldt(struct mm_struct *mm) 10037868fe1SAndy Lutomirski { 101a5b9e5a2SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 10237868fe1SAndy Lutomirski struct ldt_struct *ldt; 10337868fe1SAndy Lutomirski 1043382290eSWill Deacon /* READ_ONCE synchronizes with smp_store_release */ 1053382290eSWill Deacon ldt = READ_ONCE(mm->context.ldt); 10637868fe1SAndy Lutomirski 10737868fe1SAndy Lutomirski /* 10837868fe1SAndy Lutomirski * Any change to mm->context.ldt is followed by an IPI to all 10937868fe1SAndy Lutomirski * CPUs with the mm active. The LDT will not be freed until 11037868fe1SAndy Lutomirski * after the IPI is handled by all such CPUs. This means that, 11137868fe1SAndy Lutomirski * if the ldt_struct changes before we return, the values we see 11237868fe1SAndy Lutomirski * will be safe, and the new values will be loaded before we run 11337868fe1SAndy Lutomirski * any user code. 11437868fe1SAndy Lutomirski * 11537868fe1SAndy Lutomirski * NB: don't try to convert this to use RCU without extreme care. 11637868fe1SAndy Lutomirski * We would still need IRQs off, because we don't want to change 11737868fe1SAndy Lutomirski * the local LDT after an IPI loaded a newer value than the one 11837868fe1SAndy Lutomirski * that we can see. 11937868fe1SAndy Lutomirski */ 12037868fe1SAndy Lutomirski 121f55f0501SAndy Lutomirski if (unlikely(ldt)) { 122f55f0501SAndy Lutomirski if (static_cpu_has(X86_FEATURE_PTI)) { 123f55f0501SAndy Lutomirski if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { 124f55f0501SAndy Lutomirski /* 125f55f0501SAndy Lutomirski * Whoops -- either the new LDT isn't mapped 126f55f0501SAndy Lutomirski * (if slot == -1) or is mapped into a bogus 127f55f0501SAndy Lutomirski * slot (if slot > 1). 128f55f0501SAndy Lutomirski */ 12937868fe1SAndy Lutomirski clear_LDT(); 130f55f0501SAndy Lutomirski return; 131f55f0501SAndy Lutomirski } 132f55f0501SAndy Lutomirski 133f55f0501SAndy Lutomirski /* 134f55f0501SAndy Lutomirski * If page table isolation is enabled, ldt->entries 135f55f0501SAndy Lutomirski * will not be mapped in the userspace pagetables. 136f55f0501SAndy Lutomirski * Tell the CPU to access the LDT through the alias 137f55f0501SAndy Lutomirski * at ldt_slot_va(ldt->slot). 138f55f0501SAndy Lutomirski */ 139f55f0501SAndy Lutomirski set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); 140f55f0501SAndy Lutomirski } else { 141f55f0501SAndy Lutomirski set_ldt(ldt->entries, ldt->nr_entries); 142f55f0501SAndy Lutomirski } 143f55f0501SAndy Lutomirski } else { 144f55f0501SAndy Lutomirski clear_LDT(); 145f55f0501SAndy Lutomirski } 146a5b9e5a2SAndy Lutomirski #else 147a5b9e5a2SAndy Lutomirski clear_LDT(); 148a5b9e5a2SAndy Lutomirski #endif 14973534258SAndy Lutomirski } 15073534258SAndy Lutomirski 15173534258SAndy Lutomirski static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 15273534258SAndy Lutomirski { 15373534258SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 15473534258SAndy Lutomirski /* 15573534258SAndy Lutomirski * Load the LDT if either the old or new mm had an LDT. 15673534258SAndy Lutomirski * 15773534258SAndy Lutomirski * An mm will never go from having an LDT to not having an LDT. Two 15873534258SAndy Lutomirski * mms never share an LDT, so we don't gain anything by checking to 15973534258SAndy Lutomirski * see whether the LDT changed. There's also no guarantee that 16073534258SAndy Lutomirski * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, 16173534258SAndy Lutomirski * then prev->context.ldt will also be non-NULL. 16273534258SAndy Lutomirski * 16373534258SAndy Lutomirski * If we really cared, we could optimize the case where prev == next 16473534258SAndy Lutomirski * and we're exiting lazy mode. Most of the time, if this happens, 16573534258SAndy Lutomirski * we don't actually need to reload LDTR, but modify_ldt() is mostly 16673534258SAndy Lutomirski * used by legacy code and emulators where we don't need this level of 16773534258SAndy Lutomirski * performance. 16873534258SAndy Lutomirski * 16973534258SAndy Lutomirski * This uses | instead of || because it generates better code. 17073534258SAndy Lutomirski */ 17173534258SAndy Lutomirski if (unlikely((unsigned long)prev->context.ldt | 17273534258SAndy Lutomirski (unsigned long)next->context.ldt)) 17373534258SAndy Lutomirski load_mm_ldt(next); 17473534258SAndy Lutomirski #endif 17537868fe1SAndy Lutomirski 17637868fe1SAndy Lutomirski DEBUG_LOCKS_WARN_ON(preemptible()); 17737868fe1SAndy Lutomirski } 17837868fe1SAndy Lutomirski 179b956575bSAndy Lutomirski void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 1806826c8ffSBrian Gerst 181a31e184eSDave Hansen /* 182a31e184eSDave Hansen * Init a new mm. Used on mm copies, like at fork() 183a31e184eSDave Hansen * and on mm's that are brand-new, like at execve(). 184a31e184eSDave Hansen */ 18539a0526fSDave Hansen static inline int init_new_context(struct task_struct *tsk, 18639a0526fSDave Hansen struct mm_struct *mm) 18739a0526fSDave Hansen { 188c2b3496bSPeter Zijlstra mutex_init(&mm->context.lock); 189c2b3496bSPeter Zijlstra 190f39681edSAndy Lutomirski mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 191f39681edSAndy Lutomirski atomic64_set(&mm->context.tlb_gen, 0); 192f39681edSAndy Lutomirski 193e8c24d3aSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 194e8c24d3aSDave Hansen if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 1952fa9d1cfSDave Hansen /* pkey 0 is the default and allocated implicitly */ 196e8c24d3aSDave Hansen mm->context.pkey_allocation_map = 0x1; 197e8c24d3aSDave Hansen /* -1 means unallocated or invalid */ 198e8c24d3aSDave Hansen mm->context.execute_only_pkey = -1; 199e8c24d3aSDave Hansen } 200e8c24d3aSDave Hansen #endif 201a4828f81SThomas Gleixner init_new_context_ldt(mm); 202a4828f81SThomas Gleixner return 0; 20339a0526fSDave Hansen } 20439a0526fSDave Hansen static inline void destroy_context(struct mm_struct *mm) 20539a0526fSDave Hansen { 20639a0526fSDave Hansen destroy_context_ldt(mm); 20739a0526fSDave Hansen } 20839a0526fSDave Hansen 20969c0319aSAndy Lutomirski extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, 21069c0319aSAndy Lutomirski struct task_struct *tsk); 2116826c8ffSBrian Gerst 212078194f8SAndy Lutomirski extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 213078194f8SAndy Lutomirski struct task_struct *tsk); 214078194f8SAndy Lutomirski #define switch_mm_irqs_off switch_mm_irqs_off 215bb898558SAl Viro 216bb898558SAl Viro #define activate_mm(prev, next) \ 217bb898558SAl Viro do { \ 218bb898558SAl Viro paravirt_activate_mm((prev), (next)); \ 219bb898558SAl Viro switch_mm((prev), (next), NULL); \ 220bb898558SAl Viro } while (0); 221bb898558SAl Viro 2226826c8ffSBrian Gerst #ifdef CONFIG_X86_32 2236826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 2246826c8ffSBrian Gerst do { \ 225ccbeed3aSTejun Heo lazy_load_gs(0); \ 2266826c8ffSBrian Gerst } while (0) 2276826c8ffSBrian Gerst #else 2286826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 2296826c8ffSBrian Gerst do { \ 2306826c8ffSBrian Gerst load_gs_index(0); \ 2316826c8ffSBrian Gerst loadsegment(fs, 0); \ 2326826c8ffSBrian Gerst } while (0) 2336826c8ffSBrian Gerst #endif 234bb898558SAl Viro 235a31e184eSDave Hansen static inline void arch_dup_pkeys(struct mm_struct *oldmm, 236a31e184eSDave Hansen struct mm_struct *mm) 237a31e184eSDave Hansen { 238a31e184eSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 239a31e184eSDave Hansen if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 240a31e184eSDave Hansen return; 241a31e184eSDave Hansen 242a31e184eSDave Hansen /* Duplicate the oldmm pkey state in mm: */ 243a31e184eSDave Hansen mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 244a31e184eSDave Hansen mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; 245a31e184eSDave Hansen #endif 246a31e184eSDave Hansen } 247a31e184eSDave Hansen 248c10e83f5SThomas Gleixner static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 249a1ea1c03SDave Hansen { 250a31e184eSDave Hansen arch_dup_pkeys(oldmm, mm); 251a1ea1c03SDave Hansen paravirt_arch_dup_mmap(oldmm, mm); 252a4828f81SThomas Gleixner return ldt_dup_context(oldmm, mm); 253a1ea1c03SDave Hansen } 254a1ea1c03SDave Hansen 255a1ea1c03SDave Hansen static inline void arch_exit_mmap(struct mm_struct *mm) 256a1ea1c03SDave Hansen { 257a1ea1c03SDave Hansen paravirt_arch_exit_mmap(mm); 258f55f0501SAndy Lutomirski ldt_arch_exit_mmap(mm); 259a1ea1c03SDave Hansen } 260a1ea1c03SDave Hansen 261b0e9b09bSDave Hansen #ifdef CONFIG_X86_64 262b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 263b0e9b09bSDave Hansen { 26497f2645fSMasahiro Yamada return !IS_ENABLED(CONFIG_IA32_EMULATION) || 265b0e9b09bSDave Hansen !(mm->context.ia32_compat == TIF_IA32); 266b0e9b09bSDave Hansen } 267b0e9b09bSDave Hansen #else 268b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 269b0e9b09bSDave Hansen { 270b0e9b09bSDave Hansen return false; 271b0e9b09bSDave Hansen } 272b0e9b09bSDave Hansen #endif 273b0e9b09bSDave Hansen 274fe3d197fSDave Hansen static inline void arch_bprm_mm_init(struct mm_struct *mm, 275fe3d197fSDave Hansen struct vm_area_struct *vma) 276fe3d197fSDave Hansen { 277fe3d197fSDave Hansen mpx_mm_init(mm); 278fe3d197fSDave Hansen } 279fe3d197fSDave Hansen 2801de4fa14SDave Hansen static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 2811de4fa14SDave Hansen unsigned long start, unsigned long end) 2821de4fa14SDave Hansen { 283c922228eSDave Hansen /* 284c922228eSDave Hansen * mpx_notify_unmap() goes and reads a rarely-hot 285c922228eSDave Hansen * cacheline in the mm_struct. That can be expensive 286c922228eSDave Hansen * enough to be seen in profiles. 287c922228eSDave Hansen * 288c922228eSDave Hansen * The mpx_notify_unmap() call and its contents have been 289c922228eSDave Hansen * observed to affect munmap() performance on hardware 290c922228eSDave Hansen * where MPX is not present. 291c922228eSDave Hansen * 292c922228eSDave Hansen * The unlikely() optimizes for the fast case: no MPX 293c922228eSDave Hansen * in the CPU, or no MPX use in the process. Even if 294c922228eSDave Hansen * we get this wrong (in the unlikely event that MPX 295c922228eSDave Hansen * is widely enabled on some system) the overhead of 296c922228eSDave Hansen * MPX itself (reading bounds tables) is expected to 297c922228eSDave Hansen * overwhelm the overhead of getting this unlikely() 298c922228eSDave Hansen * consistently wrong. 299c922228eSDave Hansen */ 300c922228eSDave Hansen if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) 3011de4fa14SDave Hansen mpx_notify_unmap(mm, vma, start, end); 3021de4fa14SDave Hansen } 3031de4fa14SDave Hansen 30433a709b2SDave Hansen /* 30533a709b2SDave Hansen * We only want to enforce protection keys on the current process 30633a709b2SDave Hansen * because we effectively have no access to PKRU for other 30733a709b2SDave Hansen * processes or any way to tell *which * PKRU in a threaded 30833a709b2SDave Hansen * process we could use. 30933a709b2SDave Hansen * 31033a709b2SDave Hansen * So do not enforce things if the VMA is not from the current 31133a709b2SDave Hansen * mm, or if we are in a kernel thread. 31233a709b2SDave Hansen */ 31333a709b2SDave Hansen static inline bool vma_is_foreign(struct vm_area_struct *vma) 31433a709b2SDave Hansen { 31533a709b2SDave Hansen if (!current->mm) 31633a709b2SDave Hansen return true; 31733a709b2SDave Hansen /* 31833a709b2SDave Hansen * Should PKRU be enforced on the access to this VMA? If 31933a709b2SDave Hansen * the VMA is from another process, then PKRU has no 32033a709b2SDave Hansen * relevance and should not be enforced. 32133a709b2SDave Hansen */ 32233a709b2SDave Hansen if (current->mm != vma->vm_mm) 32333a709b2SDave Hansen return true; 32433a709b2SDave Hansen 32533a709b2SDave Hansen return false; 32633a709b2SDave Hansen } 32733a709b2SDave Hansen 3281b2ee126SDave Hansen static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 329d61172b4SDave Hansen bool write, bool execute, bool foreign) 33033a709b2SDave Hansen { 331d61172b4SDave Hansen /* pkeys never affect instruction fetches */ 332d61172b4SDave Hansen if (execute) 333d61172b4SDave Hansen return true; 33433a709b2SDave Hansen /* allow access if the VMA is not one from this process */ 3351b2ee126SDave Hansen if (foreign || vma_is_foreign(vma)) 33633a709b2SDave Hansen return true; 33733a709b2SDave Hansen return __pkru_allows_pkey(vma_pkey(vma), write); 33833a709b2SDave Hansen } 33933a709b2SDave Hansen 34052a2af40SAndy Lutomirski /* 341d6e41f11SAndy Lutomirski * This can be used from process context to figure out what the value of 3426c690ee1SAndy Lutomirski * CR3 is without needing to do a (slow) __read_cr3(). 343d6e41f11SAndy Lutomirski * 344d6e41f11SAndy Lutomirski * It's intended to be used for code like KVM that sneakily changes CR3 345d6e41f11SAndy Lutomirski * and needs to restore it. It needs to be used very carefully. 346d6e41f11SAndy Lutomirski */ 347d6e41f11SAndy Lutomirski static inline unsigned long __get_current_cr3_fast(void) 348d6e41f11SAndy Lutomirski { 34950fb83a6SDave Hansen unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, 35047061a24SAndy Lutomirski this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 35110af6235SAndy Lutomirski 352d6e41f11SAndy Lutomirski /* For now, be very restrictive about when this can be called. */ 3534c07f904SRoman Kagan VM_WARN_ON(in_nmi() || preemptible()); 354d6e41f11SAndy Lutomirski 3556c690ee1SAndy Lutomirski VM_BUG_ON(cr3 != __read_cr3()); 356d6e41f11SAndy Lutomirski return cr3; 357d6e41f11SAndy Lutomirski } 358d6e41f11SAndy Lutomirski 3591965aae3SH. Peter Anvin #endif /* _ASM_X86_MMU_CONTEXT_H */ 360