1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 277e463d1SThomas Gleixner /* 377e463d1SThomas Gleixner * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds 477e463d1SThomas Gleixner * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> 577e463d1SThomas Gleixner * Copyright (C) 2002 Andi Kleen 677e463d1SThomas Gleixner * 777e463d1SThomas Gleixner * This handles calls from both 32bit and 64bit mode. 8c2b3496bSPeter Zijlstra * 9c2b3496bSPeter Zijlstra * Lock order: 10c2b3496bSPeter Zijlstra * contex.ldt_usr_sem 11c1e8d7c6SMichel Lespinasse * mmap_lock 12c2b3496bSPeter Zijlstra * context.lock 1377e463d1SThomas Gleixner */ 1477e463d1SThomas Gleixner 1577e463d1SThomas Gleixner #include <linux/errno.h> 165a0e3ad6STejun Heo #include <linux/gfp.h> 1777e463d1SThomas Gleixner #include <linux/sched.h> 1877e463d1SThomas Gleixner #include <linux/string.h> 1977e463d1SThomas Gleixner #include <linux/mm.h> 2077e463d1SThomas Gleixner #include <linux/smp.h> 21da20ab35SDave Hansen #include <linux/syscalls.h> 2237868fe1SAndy Lutomirski #include <linux/slab.h> 2377e463d1SThomas Gleixner #include <linux/vmalloc.h> 24423a5405SJaswinder Singh Rajput #include <linux/uaccess.h> 2577e463d1SThomas Gleixner 2677e463d1SThomas Gleixner #include <asm/ldt.h> 27f55f0501SAndy Lutomirski #include <asm/tlb.h> 2877e463d1SThomas Gleixner #include <asm/desc.h> 2977e463d1SThomas Gleixner #include <asm/mmu_context.h> 30186525bdSIngo Molnar #include <asm/pgtable_areas.h> 31186525bdSIngo Molnar 32cc801833SAndy Lutomirski #include <xen/xen.h> 33cc801833SAndy Lutomirski 34186525bdSIngo Molnar /* This is a multiple of PAGE_SIZE. */ 35186525bdSIngo Molnar #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) 36186525bdSIngo Molnar 37186525bdSIngo Molnar static inline void *ldt_slot_va(int slot) 38186525bdSIngo Molnar { 39186525bdSIngo Molnar return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 40186525bdSIngo Molnar } 41186525bdSIngo Molnar 42186525bdSIngo Molnar void load_mm_ldt(struct mm_struct *mm) 43186525bdSIngo Molnar { 44186525bdSIngo Molnar struct ldt_struct *ldt; 45186525bdSIngo Molnar 46186525bdSIngo Molnar /* READ_ONCE synchronizes with smp_store_release */ 47186525bdSIngo Molnar ldt = READ_ONCE(mm->context.ldt); 48186525bdSIngo Molnar 49186525bdSIngo Molnar /* 50186525bdSIngo Molnar * Any change to mm->context.ldt is followed by an IPI to all 51186525bdSIngo Molnar * CPUs with the mm active. The LDT will not be freed until 52186525bdSIngo Molnar * after the IPI is handled by all such CPUs. This means that, 53186525bdSIngo Molnar * if the ldt_struct changes before we return, the values we see 54186525bdSIngo Molnar * will be safe, and the new values will be loaded before we run 55186525bdSIngo Molnar * any user code. 56186525bdSIngo Molnar * 57186525bdSIngo Molnar * NB: don't try to convert this to use RCU without extreme care. 58186525bdSIngo Molnar * We would still need IRQs off, because we don't want to change 59186525bdSIngo Molnar * the local LDT after an IPI loaded a newer value than the one 60186525bdSIngo Molnar * that we can see. 61186525bdSIngo Molnar */ 62186525bdSIngo Molnar 63186525bdSIngo Molnar if (unlikely(ldt)) { 64186525bdSIngo Molnar if (static_cpu_has(X86_FEATURE_PTI)) { 65186525bdSIngo Molnar if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { 66186525bdSIngo Molnar /* 67186525bdSIngo Molnar * Whoops -- either the new LDT isn't mapped 68186525bdSIngo Molnar * (if slot == -1) or is mapped into a bogus 69186525bdSIngo Molnar * slot (if slot > 1). 70186525bdSIngo Molnar */ 71186525bdSIngo Molnar clear_LDT(); 72186525bdSIngo Molnar return; 73186525bdSIngo Molnar } 74186525bdSIngo Molnar 75186525bdSIngo Molnar /* 76186525bdSIngo Molnar * If page table isolation is enabled, ldt->entries 77186525bdSIngo Molnar * will not be mapped in the userspace pagetables. 78186525bdSIngo Molnar * Tell the CPU to access the LDT through the alias 79186525bdSIngo Molnar * at ldt_slot_va(ldt->slot). 80186525bdSIngo Molnar */ 81186525bdSIngo Molnar set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); 82186525bdSIngo Molnar } else { 83186525bdSIngo Molnar set_ldt(ldt->entries, ldt->nr_entries); 84186525bdSIngo Molnar } 85186525bdSIngo Molnar } else { 86186525bdSIngo Molnar clear_LDT(); 87186525bdSIngo Molnar } 88186525bdSIngo Molnar } 89186525bdSIngo Molnar 90186525bdSIngo Molnar void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 91186525bdSIngo Molnar { 92186525bdSIngo Molnar /* 93186525bdSIngo Molnar * Load the LDT if either the old or new mm had an LDT. 94186525bdSIngo Molnar * 95186525bdSIngo Molnar * An mm will never go from having an LDT to not having an LDT. Two 96186525bdSIngo Molnar * mms never share an LDT, so we don't gain anything by checking to 97186525bdSIngo Molnar * see whether the LDT changed. There's also no guarantee that 98186525bdSIngo Molnar * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, 99186525bdSIngo Molnar * then prev->context.ldt will also be non-NULL. 100186525bdSIngo Molnar * 101186525bdSIngo Molnar * If we really cared, we could optimize the case where prev == next 102186525bdSIngo Molnar * and we're exiting lazy mode. Most of the time, if this happens, 103186525bdSIngo Molnar * we don't actually need to reload LDTR, but modify_ldt() is mostly 104186525bdSIngo Molnar * used by legacy code and emulators where we don't need this level of 105186525bdSIngo Molnar * performance. 106186525bdSIngo Molnar * 107186525bdSIngo Molnar * This uses | instead of || because it generates better code. 108186525bdSIngo Molnar */ 109186525bdSIngo Molnar if (unlikely((unsigned long)prev->context.ldt | 110186525bdSIngo Molnar (unsigned long)next->context.ldt)) 111186525bdSIngo Molnar load_mm_ldt(next); 112186525bdSIngo Molnar 113186525bdSIngo Molnar DEBUG_LOCKS_WARN_ON(preemptible()); 114186525bdSIngo Molnar } 11577e463d1SThomas Gleixner 116a6323757SAndy Lutomirski static void refresh_ldt_segments(void) 117a6323757SAndy Lutomirski { 118a6323757SAndy Lutomirski #ifdef CONFIG_X86_64 119a6323757SAndy Lutomirski unsigned short sel; 120a6323757SAndy Lutomirski 121a6323757SAndy Lutomirski /* 122a6323757SAndy Lutomirski * Make sure that the cached DS and ES descriptors match the updated 123a6323757SAndy Lutomirski * LDT. 124a6323757SAndy Lutomirski */ 125a6323757SAndy Lutomirski savesegment(ds, sel); 126a6323757SAndy Lutomirski if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) 127a6323757SAndy Lutomirski loadsegment(ds, sel); 128a6323757SAndy Lutomirski 129a6323757SAndy Lutomirski savesegment(es, sel); 130a6323757SAndy Lutomirski if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) 131a6323757SAndy Lutomirski loadsegment(es, sel); 132a6323757SAndy Lutomirski #endif 133a6323757SAndy Lutomirski } 134a6323757SAndy Lutomirski 135c2b3496bSPeter Zijlstra /* context.lock is held by the task which issued the smp function call */ 1363d28ebceSAndy Lutomirski static void flush_ldt(void *__mm) 13777e463d1SThomas Gleixner { 1383d28ebceSAndy Lutomirski struct mm_struct *mm = __mm; 13977e463d1SThomas Gleixner 1403d28ebceSAndy Lutomirski if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) 14137868fe1SAndy Lutomirski return; 14237868fe1SAndy Lutomirski 143f55f0501SAndy Lutomirski load_mm_ldt(mm); 144a6323757SAndy Lutomirski 145a6323757SAndy Lutomirski refresh_ldt_segments(); 14637868fe1SAndy Lutomirski } 14737868fe1SAndy Lutomirski 14837868fe1SAndy Lutomirski /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ 149bbf79d21SBorislav Petkov static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) 15077e463d1SThomas Gleixner { 15137868fe1SAndy Lutomirski struct ldt_struct *new_ldt; 152990e9dc3SThomas Gleixner unsigned int alloc_size; 15377e463d1SThomas Gleixner 154bbf79d21SBorislav Petkov if (num_entries > LDT_ENTRIES) 15537868fe1SAndy Lutomirski return NULL; 15637868fe1SAndy Lutomirski 15737868fe1SAndy Lutomirski new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); 15837868fe1SAndy Lutomirski if (!new_ldt) 15937868fe1SAndy Lutomirski return NULL; 16037868fe1SAndy Lutomirski 16137868fe1SAndy Lutomirski BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); 162bbf79d21SBorislav Petkov alloc_size = num_entries * LDT_ENTRY_SIZE; 16337868fe1SAndy Lutomirski 16437868fe1SAndy Lutomirski /* 16537868fe1SAndy Lutomirski * Xen is very picky: it requires a page-aligned LDT that has no 16637868fe1SAndy Lutomirski * trailing nonzero bytes in any page that contains LDT descriptors. 16737868fe1SAndy Lutomirski * Keep it simple: zero the whole allocation and never allocate less 16837868fe1SAndy Lutomirski * than PAGE_SIZE. 16937868fe1SAndy Lutomirski */ 17037868fe1SAndy Lutomirski if (alloc_size > PAGE_SIZE) 17137868fe1SAndy Lutomirski new_ldt->entries = vzalloc(alloc_size); 17277e463d1SThomas Gleixner else 173f454b478SJan Beulich new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); 17477e463d1SThomas Gleixner 17537868fe1SAndy Lutomirski if (!new_ldt->entries) { 17637868fe1SAndy Lutomirski kfree(new_ldt); 17737868fe1SAndy Lutomirski return NULL; 17877e463d1SThomas Gleixner } 17977e463d1SThomas Gleixner 180f55f0501SAndy Lutomirski /* The new LDT isn't aliased for PTI yet. */ 181f55f0501SAndy Lutomirski new_ldt->slot = -1; 182f55f0501SAndy Lutomirski 183bbf79d21SBorislav Petkov new_ldt->nr_entries = num_entries; 18437868fe1SAndy Lutomirski return new_ldt; 18537868fe1SAndy Lutomirski } 18637868fe1SAndy Lutomirski 1879bae3197SJoerg Roedel #ifdef CONFIG_PAGE_TABLE_ISOLATION 1889bae3197SJoerg Roedel 1899bae3197SJoerg Roedel static void do_sanity_check(struct mm_struct *mm, 1909bae3197SJoerg Roedel bool had_kernel_mapping, 1919bae3197SJoerg Roedel bool had_user_mapping) 1929bae3197SJoerg Roedel { 1939bae3197SJoerg Roedel if (mm->context.ldt) { 1949bae3197SJoerg Roedel /* 1959bae3197SJoerg Roedel * We already had an LDT. The top-level entry should already 1969bae3197SJoerg Roedel * have been allocated and synchronized with the usermode 1979bae3197SJoerg Roedel * tables. 1989bae3197SJoerg Roedel */ 1999bae3197SJoerg Roedel WARN_ON(!had_kernel_mapping); 20067e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI)) 2019bae3197SJoerg Roedel WARN_ON(!had_user_mapping); 2029bae3197SJoerg Roedel } else { 2039bae3197SJoerg Roedel /* 2049bae3197SJoerg Roedel * This is the first time we're mapping an LDT for this process. 2059bae3197SJoerg Roedel * Sync the pgd to the usermode tables. 2069bae3197SJoerg Roedel */ 2079bae3197SJoerg Roedel WARN_ON(had_kernel_mapping); 20867e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI)) 2099bae3197SJoerg Roedel WARN_ON(had_user_mapping); 2109bae3197SJoerg Roedel } 2119bae3197SJoerg Roedel } 2129bae3197SJoerg Roedel 2136df934b9SJoerg Roedel #ifdef CONFIG_X86_PAE 2146df934b9SJoerg Roedel 2156df934b9SJoerg Roedel static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va) 2166df934b9SJoerg Roedel { 2176df934b9SJoerg Roedel p4d_t *p4d; 2186df934b9SJoerg Roedel pud_t *pud; 2196df934b9SJoerg Roedel 2206df934b9SJoerg Roedel if (pgd->pgd == 0) 2216df934b9SJoerg Roedel return NULL; 2226df934b9SJoerg Roedel 2236df934b9SJoerg Roedel p4d = p4d_offset(pgd, va); 2246df934b9SJoerg Roedel if (p4d_none(*p4d)) 2256df934b9SJoerg Roedel return NULL; 2266df934b9SJoerg Roedel 2276df934b9SJoerg Roedel pud = pud_offset(p4d, va); 2286df934b9SJoerg Roedel if (pud_none(*pud)) 2296df934b9SJoerg Roedel return NULL; 2306df934b9SJoerg Roedel 2316df934b9SJoerg Roedel return pmd_offset(pud, va); 2326df934b9SJoerg Roedel } 2336df934b9SJoerg Roedel 2346df934b9SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm) 2356df934b9SJoerg Roedel { 2366df934b9SJoerg Roedel pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); 2376df934b9SJoerg Roedel pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); 2386df934b9SJoerg Roedel pmd_t *k_pmd, *u_pmd; 2396df934b9SJoerg Roedel 2406df934b9SJoerg Roedel k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); 2416df934b9SJoerg Roedel u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); 2426df934b9SJoerg Roedel 24367e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) 2446df934b9SJoerg Roedel set_pmd(u_pmd, *k_pmd); 2456df934b9SJoerg Roedel } 2466df934b9SJoerg Roedel 2476df934b9SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm) 2486df934b9SJoerg Roedel { 2496df934b9SJoerg Roedel pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); 2506df934b9SJoerg Roedel pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); 2516df934b9SJoerg Roedel bool had_kernel, had_user; 2526df934b9SJoerg Roedel pmd_t *k_pmd, *u_pmd; 2536df934b9SJoerg Roedel 2546df934b9SJoerg Roedel k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); 2556df934b9SJoerg Roedel u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); 2566df934b9SJoerg Roedel had_kernel = (k_pmd->pmd != 0); 2576df934b9SJoerg Roedel had_user = (u_pmd->pmd != 0); 2586df934b9SJoerg Roedel 2596df934b9SJoerg Roedel do_sanity_check(mm, had_kernel, had_user); 2606df934b9SJoerg Roedel } 2616df934b9SJoerg Roedel 2626df934b9SJoerg Roedel #else /* !CONFIG_X86_PAE */ 2636df934b9SJoerg Roedel 2649bae3197SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm) 2659bae3197SJoerg Roedel { 2669bae3197SJoerg Roedel pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); 2679bae3197SJoerg Roedel 26867e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) 2699bae3197SJoerg Roedel set_pgd(kernel_to_user_pgdp(pgd), *pgd); 2709bae3197SJoerg Roedel } 2719bae3197SJoerg Roedel 2729bae3197SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm) 2739bae3197SJoerg Roedel { 2749bae3197SJoerg Roedel pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); 2759bae3197SJoerg Roedel bool had_kernel = (pgd->pgd != 0); 2769bae3197SJoerg Roedel bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0); 2779bae3197SJoerg Roedel 2789bae3197SJoerg Roedel do_sanity_check(mm, had_kernel, had_user); 2799bae3197SJoerg Roedel } 2809bae3197SJoerg Roedel 2816df934b9SJoerg Roedel #endif /* CONFIG_X86_PAE */ 2826df934b9SJoerg Roedel 283f55f0501SAndy Lutomirski /* 284f55f0501SAndy Lutomirski * If PTI is enabled, this maps the LDT into the kernelmode and 285f55f0501SAndy Lutomirski * usermode tables for the given mm. 286f55f0501SAndy Lutomirski */ 287f55f0501SAndy Lutomirski static int 288f55f0501SAndy Lutomirski map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) 289f55f0501SAndy Lutomirski { 290f55f0501SAndy Lutomirski unsigned long va; 2919bae3197SJoerg Roedel bool is_vmalloc; 292f55f0501SAndy Lutomirski spinlock_t *ptl; 293a0e6e083SKirill A. Shutemov int i, nr_pages; 294f55f0501SAndy Lutomirski 29567e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI)) 296f55f0501SAndy Lutomirski return 0; 297f55f0501SAndy Lutomirski 298f55f0501SAndy Lutomirski /* 299f55f0501SAndy Lutomirski * Any given ldt_struct should have map_ldt_struct() called at most 300f55f0501SAndy Lutomirski * once. 301f55f0501SAndy Lutomirski */ 302f55f0501SAndy Lutomirski WARN_ON(ldt->slot != -1); 303f55f0501SAndy Lutomirski 3049bae3197SJoerg Roedel /* Check if the current mappings are sane */ 3059bae3197SJoerg Roedel sanity_check_ldt_mapping(mm); 3069bae3197SJoerg Roedel 307f55f0501SAndy Lutomirski is_vmalloc = is_vmalloc_addr(ldt->entries); 308f55f0501SAndy Lutomirski 309a0e6e083SKirill A. Shutemov nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); 310a0e6e083SKirill A. Shutemov 311a0e6e083SKirill A. Shutemov for (i = 0; i < nr_pages; i++) { 312f55f0501SAndy Lutomirski unsigned long offset = i << PAGE_SHIFT; 313f55f0501SAndy Lutomirski const void *src = (char *)ldt->entries + offset; 314f55f0501SAndy Lutomirski unsigned long pfn; 315fb43d6cbSDave Hansen pgprot_t pte_prot; 316f55f0501SAndy Lutomirski pte_t pte, *ptep; 317f55f0501SAndy Lutomirski 318f55f0501SAndy Lutomirski va = (unsigned long)ldt_slot_va(slot) + offset; 319f55f0501SAndy Lutomirski pfn = is_vmalloc ? vmalloc_to_pfn(src) : 320f55f0501SAndy Lutomirski page_to_pfn(virt_to_page(src)); 321f55f0501SAndy Lutomirski /* 322f55f0501SAndy Lutomirski * Treat the PTI LDT range as a *userspace* range. 323f55f0501SAndy Lutomirski * get_locked_pte() will allocate all needed pagetables 324f55f0501SAndy Lutomirski * and account for them in this mm. 325f55f0501SAndy Lutomirski */ 326f55f0501SAndy Lutomirski ptep = get_locked_pte(mm, va, &ptl); 327f55f0501SAndy Lutomirski if (!ptep) 328f55f0501SAndy Lutomirski return -ENOMEM; 3299f5cb6b3SThomas Gleixner /* 3309f5cb6b3SThomas Gleixner * Map it RO so the easy to find address is not a primary 3319f5cb6b3SThomas Gleixner * target via some kernel interface which misses a 3329f5cb6b3SThomas Gleixner * permission check. 3339f5cb6b3SThomas Gleixner */ 334fb43d6cbSDave Hansen pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); 335fb43d6cbSDave Hansen /* Filter out unsuppored __PAGE_KERNEL* bits: */ 336e6f39e87SJoerg Roedel pgprot_val(pte_prot) &= __supported_pte_mask; 337fb43d6cbSDave Hansen pte = pfn_pte(pfn, pte_prot); 338f55f0501SAndy Lutomirski set_pte_at(mm, va, ptep, pte); 339f55f0501SAndy Lutomirski pte_unmap_unlock(ptep, ptl); 340f55f0501SAndy Lutomirski } 341f55f0501SAndy Lutomirski 3429bae3197SJoerg Roedel /* Propagate LDT mapping to the user page-table */ 3439bae3197SJoerg Roedel map_ldt_struct_to_user(mm); 344f55f0501SAndy Lutomirski 345f55f0501SAndy Lutomirski ldt->slot = slot; 346f55f0501SAndy Lutomirski return 0; 347f55f0501SAndy Lutomirski } 348f55f0501SAndy Lutomirski 349a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) 350a0e6e083SKirill A. Shutemov { 351a0e6e083SKirill A. Shutemov unsigned long va; 352a0e6e083SKirill A. Shutemov int i, nr_pages; 353a0e6e083SKirill A. Shutemov 354a0e6e083SKirill A. Shutemov if (!ldt) 355a0e6e083SKirill A. Shutemov return; 356a0e6e083SKirill A. Shutemov 357a0e6e083SKirill A. Shutemov /* LDT map/unmap is only required for PTI */ 35867e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI)) 359a0e6e083SKirill A. Shutemov return; 360a0e6e083SKirill A. Shutemov 361a0e6e083SKirill A. Shutemov nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); 362a0e6e083SKirill A. Shutemov 363a0e6e083SKirill A. Shutemov for (i = 0; i < nr_pages; i++) { 364a0e6e083SKirill A. Shutemov unsigned long offset = i << PAGE_SHIFT; 365a0e6e083SKirill A. Shutemov spinlock_t *ptl; 366a0e6e083SKirill A. Shutemov pte_t *ptep; 367a0e6e083SKirill A. Shutemov 368a0e6e083SKirill A. Shutemov va = (unsigned long)ldt_slot_va(ldt->slot) + offset; 369a0e6e083SKirill A. Shutemov ptep = get_locked_pte(mm, va, &ptl); 370a0e6e083SKirill A. Shutemov pte_clear(mm, va, ptep); 371a0e6e083SKirill A. Shutemov pte_unmap_unlock(ptep, ptl); 372a0e6e083SKirill A. Shutemov } 373a0e6e083SKirill A. Shutemov 374a0e6e083SKirill A. Shutemov va = (unsigned long)ldt_slot_va(ldt->slot); 375a0e6e083SKirill A. Shutemov flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); 376a0e6e083SKirill A. Shutemov } 377a0e6e083SKirill A. Shutemov 3789bae3197SJoerg Roedel #else /* !CONFIG_PAGE_TABLE_ISOLATION */ 3799bae3197SJoerg Roedel 3809bae3197SJoerg Roedel static int 3819bae3197SJoerg Roedel map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) 3829bae3197SJoerg Roedel { 3839bae3197SJoerg Roedel return 0; 3849bae3197SJoerg Roedel } 385a0e6e083SKirill A. Shutemov 386a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) 387a0e6e083SKirill A. Shutemov { 388a0e6e083SKirill A. Shutemov } 3899bae3197SJoerg Roedel #endif /* CONFIG_PAGE_TABLE_ISOLATION */ 3909bae3197SJoerg Roedel 391f55f0501SAndy Lutomirski static void free_ldt_pgtables(struct mm_struct *mm) 392f55f0501SAndy Lutomirski { 393f55f0501SAndy Lutomirski #ifdef CONFIG_PAGE_TABLE_ISOLATION 394f55f0501SAndy Lutomirski struct mmu_gather tlb; 395f55f0501SAndy Lutomirski unsigned long start = LDT_BASE_ADDR; 3968195d869SJoerg Roedel unsigned long end = LDT_END_ADDR; 397f55f0501SAndy Lutomirski 39867e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI)) 399f55f0501SAndy Lutomirski return; 400f55f0501SAndy Lutomirski 401f55f0501SAndy Lutomirski tlb_gather_mmu(&tlb, mm, start, end); 402f55f0501SAndy Lutomirski free_pgd_range(&tlb, start, end, start, end); 403*ae8eba8bSWill Deacon tlb_finish_mmu(&tlb); 404f55f0501SAndy Lutomirski #endif 405f55f0501SAndy Lutomirski } 406f55f0501SAndy Lutomirski 40737868fe1SAndy Lutomirski /* After calling this, the LDT is immutable. */ 40837868fe1SAndy Lutomirski static void finalize_ldt_struct(struct ldt_struct *ldt) 40977e463d1SThomas Gleixner { 410bbf79d21SBorislav Petkov paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); 41137868fe1SAndy Lutomirski } 41277e463d1SThomas Gleixner 413c2b3496bSPeter Zijlstra static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) 41437868fe1SAndy Lutomirski { 415c2b3496bSPeter Zijlstra mutex_lock(&mm->context.lock); 41638ffbe66SJeremy Fitzhardinge 417c2b3496bSPeter Zijlstra /* Synchronizes with READ_ONCE in load_mm_ldt. */ 418c2b3496bSPeter Zijlstra smp_store_release(&mm->context.ldt, ldt); 419c2b3496bSPeter Zijlstra 420c2b3496bSPeter Zijlstra /* Activate the LDT for all CPUs using currents mm. */ 421c2b3496bSPeter Zijlstra on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); 422c2b3496bSPeter Zijlstra 423c2b3496bSPeter Zijlstra mutex_unlock(&mm->context.lock); 42437868fe1SAndy Lutomirski } 42537868fe1SAndy Lutomirski 42637868fe1SAndy Lutomirski static void free_ldt_struct(struct ldt_struct *ldt) 42737868fe1SAndy Lutomirski { 42837868fe1SAndy Lutomirski if (likely(!ldt)) 42937868fe1SAndy Lutomirski return; 43037868fe1SAndy Lutomirski 431bbf79d21SBorislav Petkov paravirt_free_ldt(ldt->entries, ldt->nr_entries); 432bbf79d21SBorislav Petkov if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) 4338d5341a6SAndrey Ryabinin vfree_atomic(ldt->entries); 43437868fe1SAndy Lutomirski else 435f454b478SJan Beulich free_page((unsigned long)ldt->entries); 43637868fe1SAndy Lutomirski kfree(ldt); 43777e463d1SThomas Gleixner } 43877e463d1SThomas Gleixner 43977e463d1SThomas Gleixner /* 440a4828f81SThomas Gleixner * Called on fork from arch_dup_mmap(). Just copy the current LDT state, 441a4828f81SThomas Gleixner * the new task is not running, so nothing can be installed. 44277e463d1SThomas Gleixner */ 443a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) 44477e463d1SThomas Gleixner { 44537868fe1SAndy Lutomirski struct ldt_struct *new_ldt; 44677e463d1SThomas Gleixner int retval = 0; 44777e463d1SThomas Gleixner 448a4828f81SThomas Gleixner if (!old_mm) 44937868fe1SAndy Lutomirski return 0; 45037868fe1SAndy Lutomirski 45137868fe1SAndy Lutomirski mutex_lock(&old_mm->context.lock); 452a4828f81SThomas Gleixner if (!old_mm->context.ldt) 45337868fe1SAndy Lutomirski goto out_unlock; 45437868fe1SAndy Lutomirski 455bbf79d21SBorislav Petkov new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); 45637868fe1SAndy Lutomirski if (!new_ldt) { 45737868fe1SAndy Lutomirski retval = -ENOMEM; 45837868fe1SAndy Lutomirski goto out_unlock; 45937868fe1SAndy Lutomirski } 46037868fe1SAndy Lutomirski 46137868fe1SAndy Lutomirski memcpy(new_ldt->entries, old_mm->context.ldt->entries, 462bbf79d21SBorislav Petkov new_ldt->nr_entries * LDT_ENTRY_SIZE); 46337868fe1SAndy Lutomirski finalize_ldt_struct(new_ldt); 46437868fe1SAndy Lutomirski 465f55f0501SAndy Lutomirski retval = map_ldt_struct(mm, new_ldt, 0); 466f55f0501SAndy Lutomirski if (retval) { 467f55f0501SAndy Lutomirski free_ldt_pgtables(mm); 468f55f0501SAndy Lutomirski free_ldt_struct(new_ldt); 469f55f0501SAndy Lutomirski goto out_unlock; 470f55f0501SAndy Lutomirski } 47137868fe1SAndy Lutomirski mm->context.ldt = new_ldt; 47237868fe1SAndy Lutomirski 47337868fe1SAndy Lutomirski out_unlock: 47437868fe1SAndy Lutomirski mutex_unlock(&old_mm->context.lock); 47577e463d1SThomas Gleixner return retval; 47677e463d1SThomas Gleixner } 47777e463d1SThomas Gleixner 47877e463d1SThomas Gleixner /* 47977e463d1SThomas Gleixner * No need to lock the MM as we are the last user 48077e463d1SThomas Gleixner * 48177e463d1SThomas Gleixner * 64bit: Don't touch the LDT register - we're already in the next thread. 48277e463d1SThomas Gleixner */ 48339a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm) 48477e463d1SThomas Gleixner { 48537868fe1SAndy Lutomirski free_ldt_struct(mm->context.ldt); 48637868fe1SAndy Lutomirski mm->context.ldt = NULL; 48777e463d1SThomas Gleixner } 48877e463d1SThomas Gleixner 489f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm) 490f55f0501SAndy Lutomirski { 491f55f0501SAndy Lutomirski free_ldt_pgtables(mm); 492f55f0501SAndy Lutomirski } 493f55f0501SAndy Lutomirski 49477e463d1SThomas Gleixner static int read_ldt(void __user *ptr, unsigned long bytecount) 49577e463d1SThomas Gleixner { 49677e463d1SThomas Gleixner struct mm_struct *mm = current->mm; 497bbf79d21SBorislav Petkov unsigned long entries_size; 498bbf79d21SBorislav Petkov int retval; 49977e463d1SThomas Gleixner 500c2b3496bSPeter Zijlstra down_read(&mm->context.ldt_usr_sem); 50137868fe1SAndy Lutomirski 50237868fe1SAndy Lutomirski if (!mm->context.ldt) { 50337868fe1SAndy Lutomirski retval = 0; 50437868fe1SAndy Lutomirski goto out_unlock; 50537868fe1SAndy Lutomirski } 50637868fe1SAndy Lutomirski 50777e463d1SThomas Gleixner if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) 50877e463d1SThomas Gleixner bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; 50977e463d1SThomas Gleixner 510bbf79d21SBorislav Petkov entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; 511bbf79d21SBorislav Petkov if (entries_size > bytecount) 512bbf79d21SBorislav Petkov entries_size = bytecount; 51377e463d1SThomas Gleixner 514bbf79d21SBorislav Petkov if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) { 51537868fe1SAndy Lutomirski retval = -EFAULT; 51637868fe1SAndy Lutomirski goto out_unlock; 51737868fe1SAndy Lutomirski } 51837868fe1SAndy Lutomirski 519bbf79d21SBorislav Petkov if (entries_size != bytecount) { 52037868fe1SAndy Lutomirski /* Zero-fill the rest and pretend we read bytecount bytes. */ 521bbf79d21SBorislav Petkov if (clear_user(ptr + entries_size, bytecount - entries_size)) { 52237868fe1SAndy Lutomirski retval = -EFAULT; 52337868fe1SAndy Lutomirski goto out_unlock; 52477e463d1SThomas Gleixner } 52577e463d1SThomas Gleixner } 52637868fe1SAndy Lutomirski retval = bytecount; 52737868fe1SAndy Lutomirski 52837868fe1SAndy Lutomirski out_unlock: 529c2b3496bSPeter Zijlstra up_read(&mm->context.ldt_usr_sem); 53037868fe1SAndy Lutomirski return retval; 53177e463d1SThomas Gleixner } 53277e463d1SThomas Gleixner 53377e463d1SThomas Gleixner static int read_default_ldt(void __user *ptr, unsigned long bytecount) 53477e463d1SThomas Gleixner { 53577e463d1SThomas Gleixner /* CHECKME: Can we use _one_ random number ? */ 53677e463d1SThomas Gleixner #ifdef CONFIG_X86_32 53777e463d1SThomas Gleixner unsigned long size = 5 * sizeof(struct desc_struct); 53877e463d1SThomas Gleixner #else 53977e463d1SThomas Gleixner unsigned long size = 128; 54077e463d1SThomas Gleixner #endif 54177e463d1SThomas Gleixner if (bytecount > size) 54277e463d1SThomas Gleixner bytecount = size; 54377e463d1SThomas Gleixner if (clear_user(ptr, bytecount)) 54477e463d1SThomas Gleixner return -EFAULT; 54577e463d1SThomas Gleixner return bytecount; 54677e463d1SThomas Gleixner } 54777e463d1SThomas Gleixner 548cc801833SAndy Lutomirski static bool allow_16bit_segments(void) 549cc801833SAndy Lutomirski { 550cc801833SAndy Lutomirski if (!IS_ENABLED(CONFIG_X86_16BIT)) 551cc801833SAndy Lutomirski return false; 552cc801833SAndy Lutomirski 553cc801833SAndy Lutomirski #ifdef CONFIG_XEN_PV 554cc801833SAndy Lutomirski /* 555cc801833SAndy Lutomirski * Xen PV does not implement ESPFIX64, which means that 16-bit 556cc801833SAndy Lutomirski * segments will not work correctly. Until either Xen PV implements 557cc801833SAndy Lutomirski * ESPFIX64 and can signal this fact to the guest or unless someone 558cc801833SAndy Lutomirski * provides compelling evidence that allowing broken 16-bit segments 559cc801833SAndy Lutomirski * is worthwhile, disallow 16-bit segments under Xen PV. 560cc801833SAndy Lutomirski */ 561cc801833SAndy Lutomirski if (xen_pv_domain()) { 562bb5a93aaSLinus Torvalds pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); 563cc801833SAndy Lutomirski return false; 564cc801833SAndy Lutomirski } 565cc801833SAndy Lutomirski #endif 566cc801833SAndy Lutomirski 567cc801833SAndy Lutomirski return true; 568cc801833SAndy Lutomirski } 569cc801833SAndy Lutomirski 57077e463d1SThomas Gleixner static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) 57177e463d1SThomas Gleixner { 57277e463d1SThomas Gleixner struct mm_struct *mm = current->mm; 573990e9dc3SThomas Gleixner struct ldt_struct *new_ldt, *old_ldt; 574bbf79d21SBorislav Petkov unsigned int old_nr_entries, new_nr_entries; 575990e9dc3SThomas Gleixner struct user_desc ldt_info; 5765af72502SGlauber de Oliveira Costa struct desc_struct ldt; 57777e463d1SThomas Gleixner int error; 57877e463d1SThomas Gleixner 57977e463d1SThomas Gleixner error = -EINVAL; 58077e463d1SThomas Gleixner if (bytecount != sizeof(ldt_info)) 58177e463d1SThomas Gleixner goto out; 58277e463d1SThomas Gleixner error = -EFAULT; 58377e463d1SThomas Gleixner if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) 58477e463d1SThomas Gleixner goto out; 58577e463d1SThomas Gleixner 58677e463d1SThomas Gleixner error = -EINVAL; 58777e463d1SThomas Gleixner if (ldt_info.entry_number >= LDT_ENTRIES) 58877e463d1SThomas Gleixner goto out; 58977e463d1SThomas Gleixner if (ldt_info.contents == 3) { 59077e463d1SThomas Gleixner if (oldmode) 59177e463d1SThomas Gleixner goto out; 59277e463d1SThomas Gleixner if (ldt_info.seg_not_present == 0) 59377e463d1SThomas Gleixner goto out; 59477e463d1SThomas Gleixner } 59577e463d1SThomas Gleixner 59637868fe1SAndy Lutomirski if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || 59737868fe1SAndy Lutomirski LDT_empty(&ldt_info)) { 59837868fe1SAndy Lutomirski /* The user wants to clear the entry. */ 5995af72502SGlauber de Oliveira Costa memset(&ldt, 0, sizeof(ldt)); 60037868fe1SAndy Lutomirski } else { 601cc801833SAndy Lutomirski if (!ldt_info.seg_32bit && !allow_16bit_segments()) { 60234273f41SH. Peter Anvin error = -EINVAL; 60337868fe1SAndy Lutomirski goto out; 60434273f41SH. Peter Anvin } 60534273f41SH. Peter Anvin 60680fbb69aSGlauber de Oliveira Costa fill_ldt(&ldt, &ldt_info); 60777e463d1SThomas Gleixner if (oldmode) 6085af72502SGlauber de Oliveira Costa ldt.avl = 0; 60937868fe1SAndy Lutomirski } 61077e463d1SThomas Gleixner 611c2b3496bSPeter Zijlstra if (down_write_killable(&mm->context.ldt_usr_sem)) 612c2b3496bSPeter Zijlstra return -EINTR; 61337868fe1SAndy Lutomirski 61437868fe1SAndy Lutomirski old_ldt = mm->context.ldt; 615bbf79d21SBorislav Petkov old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; 616bbf79d21SBorislav Petkov new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); 61737868fe1SAndy Lutomirski 61837868fe1SAndy Lutomirski error = -ENOMEM; 619bbf79d21SBorislav Petkov new_ldt = alloc_ldt_struct(new_nr_entries); 62037868fe1SAndy Lutomirski if (!new_ldt) 62137868fe1SAndy Lutomirski goto out_unlock; 62237868fe1SAndy Lutomirski 62337868fe1SAndy Lutomirski if (old_ldt) 624bbf79d21SBorislav Petkov memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE); 625bbf79d21SBorislav Petkov 62637868fe1SAndy Lutomirski new_ldt->entries[ldt_info.entry_number] = ldt; 62737868fe1SAndy Lutomirski finalize_ldt_struct(new_ldt); 62837868fe1SAndy Lutomirski 629f55f0501SAndy Lutomirski /* 630f55f0501SAndy Lutomirski * If we are using PTI, map the new LDT into the userspace pagetables. 631f55f0501SAndy Lutomirski * If there is already an LDT, use the other slot so that other CPUs 632f55f0501SAndy Lutomirski * will continue to use the old LDT until install_ldt() switches 633f55f0501SAndy Lutomirski * them over to the new LDT. 634f55f0501SAndy Lutomirski */ 635f55f0501SAndy Lutomirski error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); 636f55f0501SAndy Lutomirski if (error) { 637a62d6985SThomas Gleixner /* 638a62d6985SThomas Gleixner * This only can fail for the first LDT setup. If an LDT is 639a62d6985SThomas Gleixner * already installed then the PTE page is already 640a62d6985SThomas Gleixner * populated. Mop up a half populated page table. 641a62d6985SThomas Gleixner */ 6427f414195SThomas Gleixner if (!WARN_ON_ONCE(old_ldt)) 643a62d6985SThomas Gleixner free_ldt_pgtables(mm); 644a62d6985SThomas Gleixner free_ldt_struct(new_ldt); 645f55f0501SAndy Lutomirski goto out_unlock; 646f55f0501SAndy Lutomirski } 647f55f0501SAndy Lutomirski 64837868fe1SAndy Lutomirski install_ldt(mm, new_ldt); 649a0e6e083SKirill A. Shutemov unmap_ldt_struct(mm, old_ldt); 65037868fe1SAndy Lutomirski free_ldt_struct(old_ldt); 65177e463d1SThomas Gleixner error = 0; 65277e463d1SThomas Gleixner 65377e463d1SThomas Gleixner out_unlock: 654c2b3496bSPeter Zijlstra up_write(&mm->context.ldt_usr_sem); 65577e463d1SThomas Gleixner out: 65677e463d1SThomas Gleixner return error; 65777e463d1SThomas Gleixner } 65877e463d1SThomas Gleixner 659da20ab35SDave Hansen SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , 660da20ab35SDave Hansen unsigned long , bytecount) 66177e463d1SThomas Gleixner { 66277e463d1SThomas Gleixner int ret = -ENOSYS; 66377e463d1SThomas Gleixner 66477e463d1SThomas Gleixner switch (func) { 66577e463d1SThomas Gleixner case 0: 66677e463d1SThomas Gleixner ret = read_ldt(ptr, bytecount); 66777e463d1SThomas Gleixner break; 66877e463d1SThomas Gleixner case 1: 66977e463d1SThomas Gleixner ret = write_ldt(ptr, bytecount, 1); 67077e463d1SThomas Gleixner break; 67177e463d1SThomas Gleixner case 2: 67277e463d1SThomas Gleixner ret = read_default_ldt(ptr, bytecount); 67377e463d1SThomas Gleixner break; 67477e463d1SThomas Gleixner case 0x11: 67577e463d1SThomas Gleixner ret = write_ldt(ptr, bytecount, 0); 67677e463d1SThomas Gleixner break; 67777e463d1SThomas Gleixner } 678da20ab35SDave Hansen /* 679da20ab35SDave Hansen * The SYSCALL_DEFINE() macros give us an 'unsigned long' 680da20ab35SDave Hansen * return type, but tht ABI for sys_modify_ldt() expects 681da20ab35SDave Hansen * 'int'. This cast gives us an int-sized value in %rax 682da20ab35SDave Hansen * for the return code. The 'unsigned' is necessary so 683da20ab35SDave Hansen * the compiler does not try to sign-extend the negative 684da20ab35SDave Hansen * return codes into the high half of the register when 685da20ab35SDave Hansen * taking the value from int->long. 686da20ab35SDave Hansen */ 687da20ab35SDave Hansen return (unsigned int)ret; 68877e463d1SThomas Gleixner } 689