11965aae3SH. Peter Anvin #ifndef _ASM_X86_MMU_CONTEXT_H 21965aae3SH. Peter Anvin #define _ASM_X86_MMU_CONTEXT_H 3bb898558SAl Viro 4bb898558SAl Viro #include <asm/desc.h> 560063497SArun Sharma #include <linux/atomic.h> 6d17d8f9dSDave Hansen #include <linux/mm_types.h> 7d17d8f9dSDave Hansen 8d17d8f9dSDave Hansen #include <trace/events/tlb.h> 9d17d8f9dSDave Hansen 10bb898558SAl Viro #include <asm/pgalloc.h> 11bb898558SAl Viro #include <asm/tlbflush.h> 12bb898558SAl Viro #include <asm/paravirt.h> 13fe3d197fSDave Hansen #include <asm/mpx.h> 14bb898558SAl Viro #ifndef CONFIG_PARAVIRT 15bb898558SAl Viro static inline void paravirt_activate_mm(struct mm_struct *prev, 16bb898558SAl Viro struct mm_struct *next) 17bb898558SAl Viro { 18bb898558SAl Viro } 19bb898558SAl Viro #endif /* !CONFIG_PARAVIRT */ 20bb898558SAl Viro 217911d3f7SAndy Lutomirski #ifdef CONFIG_PERF_EVENTS 22a6673429SAndy Lutomirski extern struct static_key rdpmc_always_available; 23a6673429SAndy Lutomirski 247911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) 257911d3f7SAndy Lutomirski { 26a833581eSPeter Zijlstra if (static_key_false(&rdpmc_always_available) || 27a6673429SAndy Lutomirski atomic_read(&mm->context.perf_rdpmc_allowed)) 287911d3f7SAndy Lutomirski cr4_set_bits(X86_CR4_PCE); 297911d3f7SAndy Lutomirski else 307911d3f7SAndy Lutomirski cr4_clear_bits(X86_CR4_PCE); 317911d3f7SAndy Lutomirski } 327911d3f7SAndy Lutomirski #else 337911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) {} 347911d3f7SAndy Lutomirski #endif 357911d3f7SAndy Lutomirski 36bb898558SAl Viro /* 3737868fe1SAndy Lutomirski * ldt_structs can be allocated, used, and freed, but they are never 3837868fe1SAndy Lutomirski * modified while live. 3937868fe1SAndy Lutomirski */ 4037868fe1SAndy Lutomirski struct ldt_struct { 4137868fe1SAndy Lutomirski /* 4237868fe1SAndy Lutomirski * Xen requires page-aligned LDTs with special permissions. This is 4337868fe1SAndy Lutomirski * needed to prevent us from installing evil descriptors such as 4437868fe1SAndy Lutomirski * call gates. On native, we could merge the ldt_struct and LDT 4537868fe1SAndy Lutomirski * allocations, but it's not worth trying to optimize. 4637868fe1SAndy Lutomirski */ 4737868fe1SAndy Lutomirski struct desc_struct *entries; 4837868fe1SAndy Lutomirski int size; 4937868fe1SAndy Lutomirski }; 5037868fe1SAndy Lutomirski 5137868fe1SAndy Lutomirski static inline void load_mm_ldt(struct mm_struct *mm) 5237868fe1SAndy Lutomirski { 5337868fe1SAndy Lutomirski struct ldt_struct *ldt; 5437868fe1SAndy Lutomirski 5537868fe1SAndy Lutomirski /* lockless_dereference synchronizes with smp_store_release */ 5637868fe1SAndy Lutomirski ldt = lockless_dereference(mm->context.ldt); 5737868fe1SAndy Lutomirski 5837868fe1SAndy Lutomirski /* 5937868fe1SAndy Lutomirski * Any change to mm->context.ldt is followed by an IPI to all 6037868fe1SAndy Lutomirski * CPUs with the mm active. The LDT will not be freed until 6137868fe1SAndy Lutomirski * after the IPI is handled by all such CPUs. This means that, 6237868fe1SAndy Lutomirski * if the ldt_struct changes before we return, the values we see 6337868fe1SAndy Lutomirski * will be safe, and the new values will be loaded before we run 6437868fe1SAndy Lutomirski * any user code. 6537868fe1SAndy Lutomirski * 6637868fe1SAndy Lutomirski * NB: don't try to convert this to use RCU without extreme care. 6737868fe1SAndy Lutomirski * We would still need IRQs off, because we don't want to change 6837868fe1SAndy Lutomirski * the local LDT after an IPI loaded a newer value than the one 6937868fe1SAndy Lutomirski * that we can see. 7037868fe1SAndy Lutomirski */ 7137868fe1SAndy Lutomirski 7237868fe1SAndy Lutomirski if (unlikely(ldt)) 7337868fe1SAndy Lutomirski set_ldt(ldt->entries, ldt->size); 7437868fe1SAndy Lutomirski else 7537868fe1SAndy Lutomirski clear_LDT(); 7637868fe1SAndy Lutomirski 7737868fe1SAndy Lutomirski DEBUG_LOCKS_WARN_ON(preemptible()); 7837868fe1SAndy Lutomirski } 7937868fe1SAndy Lutomirski 8037868fe1SAndy Lutomirski /* 81bb898558SAl Viro * Used for LDT copy/destruction. 82bb898558SAl Viro */ 83bb898558SAl Viro int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 84bb898558SAl Viro void destroy_context(struct mm_struct *mm); 85bb898558SAl Viro 866826c8ffSBrian Gerst 876826c8ffSBrian Gerst static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 886826c8ffSBrian Gerst { 896826c8ffSBrian Gerst #ifdef CONFIG_SMP 90c6ae41e7SAlex Shi if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 91c6ae41e7SAlex Shi this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); 92bb898558SAl Viro #endif 936826c8ffSBrian Gerst } 946826c8ffSBrian Gerst 956826c8ffSBrian Gerst static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 966826c8ffSBrian Gerst struct task_struct *tsk) 976826c8ffSBrian Gerst { 986826c8ffSBrian Gerst unsigned cpu = smp_processor_id(); 996826c8ffSBrian Gerst 1006826c8ffSBrian Gerst if (likely(prev != next)) { 1016826c8ffSBrian Gerst #ifdef CONFIG_SMP 102c6ae41e7SAlex Shi this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 103c6ae41e7SAlex Shi this_cpu_write(cpu_tlbstate.active_mm, next); 1046826c8ffSBrian Gerst #endif 10578f1c4d6SRusty Russell cpumask_set_cpu(cpu, mm_cpumask(next)); 1066826c8ffSBrian Gerst 1076826c8ffSBrian Gerst /* Re-load page tables */ 1086826c8ffSBrian Gerst load_cr3(next->pgd); 109d17d8f9dSDave Hansen trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 1106826c8ffSBrian Gerst 1118f898fbbSRik van Riel /* Stop flush ipis for the previous mm */ 112831d52bcSSuresh Siddha cpumask_clear_cpu(cpu, mm_cpumask(prev)); 113831d52bcSSuresh Siddha 1147911d3f7SAndy Lutomirski /* Load per-mm CR4 state */ 1157911d3f7SAndy Lutomirski load_mm_cr4(next); 1167911d3f7SAndy Lutomirski 117c4a7bba2SAndy Lutomirski /* 118c4a7bba2SAndy Lutomirski * Load the LDT, if the LDT is different. 119c4a7bba2SAndy Lutomirski * 12022c4bd9fSAndy Lutomirski * It's possible that prev->context.ldt doesn't match 12122c4bd9fSAndy Lutomirski * the LDT register. This can happen if leave_mm(prev) 12222c4bd9fSAndy Lutomirski * was called and then modify_ldt changed 12322c4bd9fSAndy Lutomirski * prev->context.ldt but suppressed an IPI to this CPU. 12422c4bd9fSAndy Lutomirski * In this case, prev->context.ldt != NULL, because we 12537868fe1SAndy Lutomirski * never set context.ldt to NULL while the mm still 12637868fe1SAndy Lutomirski * exists. That means that next->context.ldt != 12737868fe1SAndy Lutomirski * prev->context.ldt, because mms never share an LDT. 128c4a7bba2SAndy Lutomirski */ 1296826c8ffSBrian Gerst if (unlikely(prev->context.ldt != next->context.ldt)) 13037868fe1SAndy Lutomirski load_mm_ldt(next); 1316826c8ffSBrian Gerst } 1326826c8ffSBrian Gerst #ifdef CONFIG_SMP 1336826c8ffSBrian Gerst else { 134c6ae41e7SAlex Shi this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 135c6ae41e7SAlex Shi BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); 1366826c8ffSBrian Gerst 1378f898fbbSRik van Riel if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { 1388f898fbbSRik van Riel /* 1398f898fbbSRik van Riel * On established mms, the mm_cpumask is only changed 1408f898fbbSRik van Riel * from irq context, from ptep_clear_flush() while in 1418f898fbbSRik van Riel * lazy tlb mode, and here. Irqs are blocked during 1428f898fbbSRik van Riel * schedule, protecting us from simultaneous changes. 1438f898fbbSRik van Riel */ 1448f898fbbSRik van Riel cpumask_set_cpu(cpu, mm_cpumask(next)); 1458f898fbbSRik van Riel /* 1468f898fbbSRik van Riel * We were in lazy tlb mode and leave_mm disabled 1476826c8ffSBrian Gerst * tlb flush IPI delivery. We must reload CR3 1486826c8ffSBrian Gerst * to make sure to use no freed page tables. 1496826c8ffSBrian Gerst */ 1506826c8ffSBrian Gerst load_cr3(next->pgd); 151d17d8f9dSDave Hansen trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 1527911d3f7SAndy Lutomirski load_mm_cr4(next); 15337868fe1SAndy Lutomirski load_mm_ldt(next); 1546826c8ffSBrian Gerst } 1556826c8ffSBrian Gerst } 1566826c8ffSBrian Gerst #endif 1576826c8ffSBrian Gerst } 158bb898558SAl Viro 159bb898558SAl Viro #define activate_mm(prev, next) \ 160bb898558SAl Viro do { \ 161bb898558SAl Viro paravirt_activate_mm((prev), (next)); \ 162bb898558SAl Viro switch_mm((prev), (next), NULL); \ 163bb898558SAl Viro } while (0); 164bb898558SAl Viro 1656826c8ffSBrian Gerst #ifdef CONFIG_X86_32 1666826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 1676826c8ffSBrian Gerst do { \ 168ccbeed3aSTejun Heo lazy_load_gs(0); \ 1696826c8ffSBrian Gerst } while (0) 1706826c8ffSBrian Gerst #else 1716826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 1726826c8ffSBrian Gerst do { \ 1736826c8ffSBrian Gerst load_gs_index(0); \ 1746826c8ffSBrian Gerst loadsegment(fs, 0); \ 1756826c8ffSBrian Gerst } while (0) 1766826c8ffSBrian Gerst #endif 177bb898558SAl Viro 178a1ea1c03SDave Hansen static inline void arch_dup_mmap(struct mm_struct *oldmm, 179a1ea1c03SDave Hansen struct mm_struct *mm) 180a1ea1c03SDave Hansen { 181a1ea1c03SDave Hansen paravirt_arch_dup_mmap(oldmm, mm); 182a1ea1c03SDave Hansen } 183a1ea1c03SDave Hansen 184a1ea1c03SDave Hansen static inline void arch_exit_mmap(struct mm_struct *mm) 185a1ea1c03SDave Hansen { 186a1ea1c03SDave Hansen paravirt_arch_exit_mmap(mm); 187a1ea1c03SDave Hansen } 188a1ea1c03SDave Hansen 189b0e9b09bSDave Hansen #ifdef CONFIG_X86_64 190b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 191b0e9b09bSDave Hansen { 192b0e9b09bSDave Hansen return !config_enabled(CONFIG_IA32_EMULATION) || 193b0e9b09bSDave Hansen !(mm->context.ia32_compat == TIF_IA32); 194b0e9b09bSDave Hansen } 195b0e9b09bSDave Hansen #else 196b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 197b0e9b09bSDave Hansen { 198b0e9b09bSDave Hansen return false; 199b0e9b09bSDave Hansen } 200b0e9b09bSDave Hansen #endif 201b0e9b09bSDave Hansen 202fe3d197fSDave Hansen static inline void arch_bprm_mm_init(struct mm_struct *mm, 203fe3d197fSDave Hansen struct vm_area_struct *vma) 204fe3d197fSDave Hansen { 205fe3d197fSDave Hansen mpx_mm_init(mm); 206fe3d197fSDave Hansen } 207fe3d197fSDave Hansen 2081de4fa14SDave Hansen static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 2091de4fa14SDave Hansen unsigned long start, unsigned long end) 2101de4fa14SDave Hansen { 211c922228eSDave Hansen /* 212c922228eSDave Hansen * mpx_notify_unmap() goes and reads a rarely-hot 213c922228eSDave Hansen * cacheline in the mm_struct. That can be expensive 214c922228eSDave Hansen * enough to be seen in profiles. 215c922228eSDave Hansen * 216c922228eSDave Hansen * The mpx_notify_unmap() call and its contents have been 217c922228eSDave Hansen * observed to affect munmap() performance on hardware 218c922228eSDave Hansen * where MPX is not present. 219c922228eSDave Hansen * 220c922228eSDave Hansen * The unlikely() optimizes for the fast case: no MPX 221c922228eSDave Hansen * in the CPU, or no MPX use in the process. Even if 222c922228eSDave Hansen * we get this wrong (in the unlikely event that MPX 223c922228eSDave Hansen * is widely enabled on some system) the overhead of 224c922228eSDave Hansen * MPX itself (reading bounds tables) is expected to 225c922228eSDave Hansen * overwhelm the overhead of getting this unlikely() 226c922228eSDave Hansen * consistently wrong. 227c922228eSDave Hansen */ 228c922228eSDave Hansen if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) 2291de4fa14SDave Hansen mpx_notify_unmap(mm, vma, start, end); 2301de4fa14SDave Hansen } 2311de4fa14SDave Hansen 2321965aae3SH. Peter Anvin #endif /* _ASM_X86_MMU_CONTEXT_H */ 233