1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
277e463d1SThomas Gleixner /*
377e463d1SThomas Gleixner * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
477e463d1SThomas Gleixner * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
577e463d1SThomas Gleixner * Copyright (C) 2002 Andi Kleen
677e463d1SThomas Gleixner *
777e463d1SThomas Gleixner * This handles calls from both 32bit and 64bit mode.
8c2b3496bSPeter Zijlstra *
9c2b3496bSPeter Zijlstra * Lock order:
10c2b3496bSPeter Zijlstra * contex.ldt_usr_sem
11c1e8d7c6SMichel Lespinasse * mmap_lock
12c2b3496bSPeter Zijlstra * context.lock
1377e463d1SThomas Gleixner */
1477e463d1SThomas Gleixner
1577e463d1SThomas Gleixner #include <linux/errno.h>
165a0e3ad6STejun Heo #include <linux/gfp.h>
1777e463d1SThomas Gleixner #include <linux/sched.h>
1877e463d1SThomas Gleixner #include <linux/string.h>
1977e463d1SThomas Gleixner #include <linux/mm.h>
2077e463d1SThomas Gleixner #include <linux/smp.h>
21da20ab35SDave Hansen #include <linux/syscalls.h>
2237868fe1SAndy Lutomirski #include <linux/slab.h>
2377e463d1SThomas Gleixner #include <linux/vmalloc.h>
24423a5405SJaswinder Singh Rajput #include <linux/uaccess.h>
2577e463d1SThomas Gleixner
2677e463d1SThomas Gleixner #include <asm/ldt.h>
27f55f0501SAndy Lutomirski #include <asm/tlb.h>
2877e463d1SThomas Gleixner #include <asm/desc.h>
2977e463d1SThomas Gleixner #include <asm/mmu_context.h>
30186525bdSIngo Molnar #include <asm/pgtable_areas.h>
31186525bdSIngo Molnar
32cc801833SAndy Lutomirski #include <xen/xen.h>
33cc801833SAndy Lutomirski
34186525bdSIngo Molnar /* This is a multiple of PAGE_SIZE. */
35186525bdSIngo Molnar #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
36186525bdSIngo Molnar
ldt_slot_va(int slot)37186525bdSIngo Molnar static inline void *ldt_slot_va(int slot)
38186525bdSIngo Molnar {
39186525bdSIngo Molnar return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
40186525bdSIngo Molnar }
41186525bdSIngo Molnar
load_mm_ldt(struct mm_struct * mm)42186525bdSIngo Molnar void load_mm_ldt(struct mm_struct *mm)
43186525bdSIngo Molnar {
44186525bdSIngo Molnar struct ldt_struct *ldt;
45186525bdSIngo Molnar
46186525bdSIngo Molnar /* READ_ONCE synchronizes with smp_store_release */
47186525bdSIngo Molnar ldt = READ_ONCE(mm->context.ldt);
48186525bdSIngo Molnar
49186525bdSIngo Molnar /*
50186525bdSIngo Molnar * Any change to mm->context.ldt is followed by an IPI to all
51186525bdSIngo Molnar * CPUs with the mm active. The LDT will not be freed until
52186525bdSIngo Molnar * after the IPI is handled by all such CPUs. This means that,
53186525bdSIngo Molnar * if the ldt_struct changes before we return, the values we see
54186525bdSIngo Molnar * will be safe, and the new values will be loaded before we run
55186525bdSIngo Molnar * any user code.
56186525bdSIngo Molnar *
57186525bdSIngo Molnar * NB: don't try to convert this to use RCU without extreme care.
58186525bdSIngo Molnar * We would still need IRQs off, because we don't want to change
59186525bdSIngo Molnar * the local LDT after an IPI loaded a newer value than the one
60186525bdSIngo Molnar * that we can see.
61186525bdSIngo Molnar */
62186525bdSIngo Molnar
63186525bdSIngo Molnar if (unlikely(ldt)) {
64186525bdSIngo Molnar if (static_cpu_has(X86_FEATURE_PTI)) {
65186525bdSIngo Molnar if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
66186525bdSIngo Molnar /*
67186525bdSIngo Molnar * Whoops -- either the new LDT isn't mapped
68186525bdSIngo Molnar * (if slot == -1) or is mapped into a bogus
69186525bdSIngo Molnar * slot (if slot > 1).
70186525bdSIngo Molnar */
71186525bdSIngo Molnar clear_LDT();
72186525bdSIngo Molnar return;
73186525bdSIngo Molnar }
74186525bdSIngo Molnar
75186525bdSIngo Molnar /*
76186525bdSIngo Molnar * If page table isolation is enabled, ldt->entries
77186525bdSIngo Molnar * will not be mapped in the userspace pagetables.
78186525bdSIngo Molnar * Tell the CPU to access the LDT through the alias
79186525bdSIngo Molnar * at ldt_slot_va(ldt->slot).
80186525bdSIngo Molnar */
81186525bdSIngo Molnar set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
82186525bdSIngo Molnar } else {
83186525bdSIngo Molnar set_ldt(ldt->entries, ldt->nr_entries);
84186525bdSIngo Molnar }
85186525bdSIngo Molnar } else {
86186525bdSIngo Molnar clear_LDT();
87186525bdSIngo Molnar }
88186525bdSIngo Molnar }
89186525bdSIngo Molnar
switch_ldt(struct mm_struct * prev,struct mm_struct * next)90186525bdSIngo Molnar void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
91186525bdSIngo Molnar {
92186525bdSIngo Molnar /*
93186525bdSIngo Molnar * Load the LDT if either the old or new mm had an LDT.
94186525bdSIngo Molnar *
95186525bdSIngo Molnar * An mm will never go from having an LDT to not having an LDT. Two
96186525bdSIngo Molnar * mms never share an LDT, so we don't gain anything by checking to
97186525bdSIngo Molnar * see whether the LDT changed. There's also no guarantee that
98186525bdSIngo Molnar * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
99186525bdSIngo Molnar * then prev->context.ldt will also be non-NULL.
100186525bdSIngo Molnar *
101186525bdSIngo Molnar * If we really cared, we could optimize the case where prev == next
102186525bdSIngo Molnar * and we're exiting lazy mode. Most of the time, if this happens,
103186525bdSIngo Molnar * we don't actually need to reload LDTR, but modify_ldt() is mostly
104186525bdSIngo Molnar * used by legacy code and emulators where we don't need this level of
105186525bdSIngo Molnar * performance.
106186525bdSIngo Molnar *
107186525bdSIngo Molnar * This uses | instead of || because it generates better code.
108186525bdSIngo Molnar */
109186525bdSIngo Molnar if (unlikely((unsigned long)prev->context.ldt |
110186525bdSIngo Molnar (unsigned long)next->context.ldt))
111186525bdSIngo Molnar load_mm_ldt(next);
112186525bdSIngo Molnar
113186525bdSIngo Molnar DEBUG_LOCKS_WARN_ON(preemptible());
114186525bdSIngo Molnar }
11577e463d1SThomas Gleixner
refresh_ldt_segments(void)116a6323757SAndy Lutomirski static void refresh_ldt_segments(void)
117a6323757SAndy Lutomirski {
118a6323757SAndy Lutomirski #ifdef CONFIG_X86_64
119a6323757SAndy Lutomirski unsigned short sel;
120a6323757SAndy Lutomirski
121a6323757SAndy Lutomirski /*
122a6323757SAndy Lutomirski * Make sure that the cached DS and ES descriptors match the updated
123a6323757SAndy Lutomirski * LDT.
124a6323757SAndy Lutomirski */
125a6323757SAndy Lutomirski savesegment(ds, sel);
126a6323757SAndy Lutomirski if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
127a6323757SAndy Lutomirski loadsegment(ds, sel);
128a6323757SAndy Lutomirski
129a6323757SAndy Lutomirski savesegment(es, sel);
130a6323757SAndy Lutomirski if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
131a6323757SAndy Lutomirski loadsegment(es, sel);
132a6323757SAndy Lutomirski #endif
133a6323757SAndy Lutomirski }
134a6323757SAndy Lutomirski
135c2b3496bSPeter Zijlstra /* context.lock is held by the task which issued the smp function call */
flush_ldt(void * __mm)1363d28ebceSAndy Lutomirski static void flush_ldt(void *__mm)
13777e463d1SThomas Gleixner {
1383d28ebceSAndy Lutomirski struct mm_struct *mm = __mm;
13977e463d1SThomas Gleixner
1403d28ebceSAndy Lutomirski if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
14137868fe1SAndy Lutomirski return;
14237868fe1SAndy Lutomirski
143f55f0501SAndy Lutomirski load_mm_ldt(mm);
144a6323757SAndy Lutomirski
145a6323757SAndy Lutomirski refresh_ldt_segments();
14637868fe1SAndy Lutomirski }
14737868fe1SAndy Lutomirski
14837868fe1SAndy Lutomirski /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
alloc_ldt_struct(unsigned int num_entries)149bbf79d21SBorislav Petkov static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
15077e463d1SThomas Gleixner {
15137868fe1SAndy Lutomirski struct ldt_struct *new_ldt;
152990e9dc3SThomas Gleixner unsigned int alloc_size;
15377e463d1SThomas Gleixner
154bbf79d21SBorislav Petkov if (num_entries > LDT_ENTRIES)
15537868fe1SAndy Lutomirski return NULL;
15637868fe1SAndy Lutomirski
157ec403e2aSVasily Averin new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
15837868fe1SAndy Lutomirski if (!new_ldt)
15937868fe1SAndy Lutomirski return NULL;
16037868fe1SAndy Lutomirski
16137868fe1SAndy Lutomirski BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
162bbf79d21SBorislav Petkov alloc_size = num_entries * LDT_ENTRY_SIZE;
16337868fe1SAndy Lutomirski
16437868fe1SAndy Lutomirski /*
16537868fe1SAndy Lutomirski * Xen is very picky: it requires a page-aligned LDT that has no
16637868fe1SAndy Lutomirski * trailing nonzero bytes in any page that contains LDT descriptors.
16737868fe1SAndy Lutomirski * Keep it simple: zero the whole allocation and never allocate less
16837868fe1SAndy Lutomirski * than PAGE_SIZE.
16937868fe1SAndy Lutomirski */
17037868fe1SAndy Lutomirski if (alloc_size > PAGE_SIZE)
171ec403e2aSVasily Averin new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
17277e463d1SThomas Gleixner else
173ec403e2aSVasily Averin new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
17477e463d1SThomas Gleixner
17537868fe1SAndy Lutomirski if (!new_ldt->entries) {
17637868fe1SAndy Lutomirski kfree(new_ldt);
17737868fe1SAndy Lutomirski return NULL;
17877e463d1SThomas Gleixner }
17977e463d1SThomas Gleixner
180f55f0501SAndy Lutomirski /* The new LDT isn't aliased for PTI yet. */
181f55f0501SAndy Lutomirski new_ldt->slot = -1;
182f55f0501SAndy Lutomirski
183bbf79d21SBorislav Petkov new_ldt->nr_entries = num_entries;
18437868fe1SAndy Lutomirski return new_ldt;
18537868fe1SAndy Lutomirski }
18637868fe1SAndy Lutomirski
1879bae3197SJoerg Roedel #ifdef CONFIG_PAGE_TABLE_ISOLATION
1889bae3197SJoerg Roedel
do_sanity_check(struct mm_struct * mm,bool had_kernel_mapping,bool had_user_mapping)1899bae3197SJoerg Roedel static void do_sanity_check(struct mm_struct *mm,
1909bae3197SJoerg Roedel bool had_kernel_mapping,
1919bae3197SJoerg Roedel bool had_user_mapping)
1929bae3197SJoerg Roedel {
1939bae3197SJoerg Roedel if (mm->context.ldt) {
1949bae3197SJoerg Roedel /*
1959bae3197SJoerg Roedel * We already had an LDT. The top-level entry should already
1969bae3197SJoerg Roedel * have been allocated and synchronized with the usermode
1979bae3197SJoerg Roedel * tables.
1989bae3197SJoerg Roedel */
1999bae3197SJoerg Roedel WARN_ON(!had_kernel_mapping);
20067e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI))
2019bae3197SJoerg Roedel WARN_ON(!had_user_mapping);
2029bae3197SJoerg Roedel } else {
2039bae3197SJoerg Roedel /*
2049bae3197SJoerg Roedel * This is the first time we're mapping an LDT for this process.
2059bae3197SJoerg Roedel * Sync the pgd to the usermode tables.
2069bae3197SJoerg Roedel */
2079bae3197SJoerg Roedel WARN_ON(had_kernel_mapping);
20867e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI))
2099bae3197SJoerg Roedel WARN_ON(had_user_mapping);
2109bae3197SJoerg Roedel }
2119bae3197SJoerg Roedel }
2129bae3197SJoerg Roedel
2136df934b9SJoerg Roedel #ifdef CONFIG_X86_PAE
2146df934b9SJoerg Roedel
pgd_to_pmd_walk(pgd_t * pgd,unsigned long va)2156df934b9SJoerg Roedel static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
2166df934b9SJoerg Roedel {
2176df934b9SJoerg Roedel p4d_t *p4d;
2186df934b9SJoerg Roedel pud_t *pud;
2196df934b9SJoerg Roedel
2206df934b9SJoerg Roedel if (pgd->pgd == 0)
2216df934b9SJoerg Roedel return NULL;
2226df934b9SJoerg Roedel
2236df934b9SJoerg Roedel p4d = p4d_offset(pgd, va);
2246df934b9SJoerg Roedel if (p4d_none(*p4d))
2256df934b9SJoerg Roedel return NULL;
2266df934b9SJoerg Roedel
2276df934b9SJoerg Roedel pud = pud_offset(p4d, va);
2286df934b9SJoerg Roedel if (pud_none(*pud))
2296df934b9SJoerg Roedel return NULL;
2306df934b9SJoerg Roedel
2316df934b9SJoerg Roedel return pmd_offset(pud, va);
2326df934b9SJoerg Roedel }
2336df934b9SJoerg Roedel
map_ldt_struct_to_user(struct mm_struct * mm)2346df934b9SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm)
2356df934b9SJoerg Roedel {
2366df934b9SJoerg Roedel pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
2376df934b9SJoerg Roedel pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
2386df934b9SJoerg Roedel pmd_t *k_pmd, *u_pmd;
2396df934b9SJoerg Roedel
2406df934b9SJoerg Roedel k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
2416df934b9SJoerg Roedel u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
2426df934b9SJoerg Roedel
24367e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
2446df934b9SJoerg Roedel set_pmd(u_pmd, *k_pmd);
2456df934b9SJoerg Roedel }
2466df934b9SJoerg Roedel
sanity_check_ldt_mapping(struct mm_struct * mm)2476df934b9SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm)
2486df934b9SJoerg Roedel {
2496df934b9SJoerg Roedel pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
2506df934b9SJoerg Roedel pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
2516df934b9SJoerg Roedel bool had_kernel, had_user;
2526df934b9SJoerg Roedel pmd_t *k_pmd, *u_pmd;
2536df934b9SJoerg Roedel
2546df934b9SJoerg Roedel k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
2556df934b9SJoerg Roedel u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
2566df934b9SJoerg Roedel had_kernel = (k_pmd->pmd != 0);
2576df934b9SJoerg Roedel had_user = (u_pmd->pmd != 0);
2586df934b9SJoerg Roedel
2596df934b9SJoerg Roedel do_sanity_check(mm, had_kernel, had_user);
2606df934b9SJoerg Roedel }
2616df934b9SJoerg Roedel
2626df934b9SJoerg Roedel #else /* !CONFIG_X86_PAE */
2636df934b9SJoerg Roedel
map_ldt_struct_to_user(struct mm_struct * mm)2649bae3197SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm)
2659bae3197SJoerg Roedel {
2669bae3197SJoerg Roedel pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
2679bae3197SJoerg Roedel
26867e87d43SBorislav Petkov if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
2699bae3197SJoerg Roedel set_pgd(kernel_to_user_pgdp(pgd), *pgd);
2709bae3197SJoerg Roedel }
2719bae3197SJoerg Roedel
sanity_check_ldt_mapping(struct mm_struct * mm)2729bae3197SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm)
2739bae3197SJoerg Roedel {
2749bae3197SJoerg Roedel pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
2759bae3197SJoerg Roedel bool had_kernel = (pgd->pgd != 0);
2769bae3197SJoerg Roedel bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0);
2779bae3197SJoerg Roedel
2789bae3197SJoerg Roedel do_sanity_check(mm, had_kernel, had_user);
2799bae3197SJoerg Roedel }
2809bae3197SJoerg Roedel
2816df934b9SJoerg Roedel #endif /* CONFIG_X86_PAE */
2826df934b9SJoerg Roedel
283f55f0501SAndy Lutomirski /*
284f55f0501SAndy Lutomirski * If PTI is enabled, this maps the LDT into the kernelmode and
285f55f0501SAndy Lutomirski * usermode tables for the given mm.
286f55f0501SAndy Lutomirski */
287f55f0501SAndy Lutomirski static int
map_ldt_struct(struct mm_struct * mm,struct ldt_struct * ldt,int slot)288f55f0501SAndy Lutomirski map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
289f55f0501SAndy Lutomirski {
290f55f0501SAndy Lutomirski unsigned long va;
2919bae3197SJoerg Roedel bool is_vmalloc;
292f55f0501SAndy Lutomirski spinlock_t *ptl;
293a0e6e083SKirill A. Shutemov int i, nr_pages;
294f55f0501SAndy Lutomirski
29567e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI))
296f55f0501SAndy Lutomirski return 0;
297f55f0501SAndy Lutomirski
298f55f0501SAndy Lutomirski /*
299f55f0501SAndy Lutomirski * Any given ldt_struct should have map_ldt_struct() called at most
300f55f0501SAndy Lutomirski * once.
301f55f0501SAndy Lutomirski */
302f55f0501SAndy Lutomirski WARN_ON(ldt->slot != -1);
303f55f0501SAndy Lutomirski
3049bae3197SJoerg Roedel /* Check if the current mappings are sane */
3059bae3197SJoerg Roedel sanity_check_ldt_mapping(mm);
3069bae3197SJoerg Roedel
307f55f0501SAndy Lutomirski is_vmalloc = is_vmalloc_addr(ldt->entries);
308f55f0501SAndy Lutomirski
309a0e6e083SKirill A. Shutemov nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
310a0e6e083SKirill A. Shutemov
311a0e6e083SKirill A. Shutemov for (i = 0; i < nr_pages; i++) {
312f55f0501SAndy Lutomirski unsigned long offset = i << PAGE_SHIFT;
313f55f0501SAndy Lutomirski const void *src = (char *)ldt->entries + offset;
314f55f0501SAndy Lutomirski unsigned long pfn;
315fb43d6cbSDave Hansen pgprot_t pte_prot;
316f55f0501SAndy Lutomirski pte_t pte, *ptep;
317f55f0501SAndy Lutomirski
318f55f0501SAndy Lutomirski va = (unsigned long)ldt_slot_va(slot) + offset;
319f55f0501SAndy Lutomirski pfn = is_vmalloc ? vmalloc_to_pfn(src) :
320f55f0501SAndy Lutomirski page_to_pfn(virt_to_page(src));
321f55f0501SAndy Lutomirski /*
322f55f0501SAndy Lutomirski * Treat the PTI LDT range as a *userspace* range.
323f55f0501SAndy Lutomirski * get_locked_pte() will allocate all needed pagetables
324f55f0501SAndy Lutomirski * and account for them in this mm.
325f55f0501SAndy Lutomirski */
326f55f0501SAndy Lutomirski ptep = get_locked_pte(mm, va, &ptl);
327f55f0501SAndy Lutomirski if (!ptep)
328f55f0501SAndy Lutomirski return -ENOMEM;
3299f5cb6b3SThomas Gleixner /*
3309f5cb6b3SThomas Gleixner * Map it RO so the easy to find address is not a primary
3319f5cb6b3SThomas Gleixner * target via some kernel interface which misses a
3329f5cb6b3SThomas Gleixner * permission check.
3339f5cb6b3SThomas Gleixner */
334fb43d6cbSDave Hansen pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
335fb43d6cbSDave Hansen /* Filter out unsuppored __PAGE_KERNEL* bits: */
336e6f39e87SJoerg Roedel pgprot_val(pte_prot) &= __supported_pte_mask;
337fb43d6cbSDave Hansen pte = pfn_pte(pfn, pte_prot);
338f55f0501SAndy Lutomirski set_pte_at(mm, va, ptep, pte);
339f55f0501SAndy Lutomirski pte_unmap_unlock(ptep, ptl);
340f55f0501SAndy Lutomirski }
341f55f0501SAndy Lutomirski
3429bae3197SJoerg Roedel /* Propagate LDT mapping to the user page-table */
3439bae3197SJoerg Roedel map_ldt_struct_to_user(mm);
344f55f0501SAndy Lutomirski
345f55f0501SAndy Lutomirski ldt->slot = slot;
346f55f0501SAndy Lutomirski return 0;
347f55f0501SAndy Lutomirski }
348f55f0501SAndy Lutomirski
unmap_ldt_struct(struct mm_struct * mm,struct ldt_struct * ldt)349a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
350a0e6e083SKirill A. Shutemov {
351a0e6e083SKirill A. Shutemov unsigned long va;
352a0e6e083SKirill A. Shutemov int i, nr_pages;
353a0e6e083SKirill A. Shutemov
354a0e6e083SKirill A. Shutemov if (!ldt)
355a0e6e083SKirill A. Shutemov return;
356a0e6e083SKirill A. Shutemov
357a0e6e083SKirill A. Shutemov /* LDT map/unmap is only required for PTI */
35867e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI))
359a0e6e083SKirill A. Shutemov return;
360a0e6e083SKirill A. Shutemov
361a0e6e083SKirill A. Shutemov nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
362a0e6e083SKirill A. Shutemov
363a0e6e083SKirill A. Shutemov for (i = 0; i < nr_pages; i++) {
364a0e6e083SKirill A. Shutemov unsigned long offset = i << PAGE_SHIFT;
365a0e6e083SKirill A. Shutemov spinlock_t *ptl;
366a0e6e083SKirill A. Shutemov pte_t *ptep;
367a0e6e083SKirill A. Shutemov
368a0e6e083SKirill A. Shutemov va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
369a0e6e083SKirill A. Shutemov ptep = get_locked_pte(mm, va, &ptl);
370*975ca398SHugh Dickins if (!WARN_ON_ONCE(!ptep)) {
371a0e6e083SKirill A. Shutemov pte_clear(mm, va, ptep);
372a0e6e083SKirill A. Shutemov pte_unmap_unlock(ptep, ptl);
373a0e6e083SKirill A. Shutemov }
374*975ca398SHugh Dickins }
375a0e6e083SKirill A. Shutemov
376a0e6e083SKirill A. Shutemov va = (unsigned long)ldt_slot_va(ldt->slot);
377a0e6e083SKirill A. Shutemov flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
378a0e6e083SKirill A. Shutemov }
379a0e6e083SKirill A. Shutemov
3809bae3197SJoerg Roedel #else /* !CONFIG_PAGE_TABLE_ISOLATION */
3819bae3197SJoerg Roedel
3829bae3197SJoerg Roedel static int
map_ldt_struct(struct mm_struct * mm,struct ldt_struct * ldt,int slot)3839bae3197SJoerg Roedel map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
3849bae3197SJoerg Roedel {
3859bae3197SJoerg Roedel return 0;
3869bae3197SJoerg Roedel }
387a0e6e083SKirill A. Shutemov
unmap_ldt_struct(struct mm_struct * mm,struct ldt_struct * ldt)388a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
389a0e6e083SKirill A. Shutemov {
390a0e6e083SKirill A. Shutemov }
3919bae3197SJoerg Roedel #endif /* CONFIG_PAGE_TABLE_ISOLATION */
3929bae3197SJoerg Roedel
free_ldt_pgtables(struct mm_struct * mm)393f55f0501SAndy Lutomirski static void free_ldt_pgtables(struct mm_struct *mm)
394f55f0501SAndy Lutomirski {
395f55f0501SAndy Lutomirski #ifdef CONFIG_PAGE_TABLE_ISOLATION
396f55f0501SAndy Lutomirski struct mmu_gather tlb;
397f55f0501SAndy Lutomirski unsigned long start = LDT_BASE_ADDR;
3988195d869SJoerg Roedel unsigned long end = LDT_END_ADDR;
399f55f0501SAndy Lutomirski
40067e87d43SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_PTI))
401f55f0501SAndy Lutomirski return;
402f55f0501SAndy Lutomirski
4038cf55f24SWill Deacon /*
4048cf55f24SWill Deacon * Although free_pgd_range() is intended for freeing user
4058cf55f24SWill Deacon * page-tables, it also works out for kernel mappings on x86.
4068cf55f24SWill Deacon * We use tlb_gather_mmu_fullmm() to avoid confusing the
4078cf55f24SWill Deacon * range-tracking logic in __tlb_adjust_range().
4088cf55f24SWill Deacon */
4098cf55f24SWill Deacon tlb_gather_mmu_fullmm(&tlb, mm);
410f55f0501SAndy Lutomirski free_pgd_range(&tlb, start, end, start, end);
411ae8eba8bSWill Deacon tlb_finish_mmu(&tlb);
412f55f0501SAndy Lutomirski #endif
413f55f0501SAndy Lutomirski }
414f55f0501SAndy Lutomirski
41537868fe1SAndy Lutomirski /* After calling this, the LDT is immutable. */
finalize_ldt_struct(struct ldt_struct * ldt)41637868fe1SAndy Lutomirski static void finalize_ldt_struct(struct ldt_struct *ldt)
41777e463d1SThomas Gleixner {
418bbf79d21SBorislav Petkov paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
41937868fe1SAndy Lutomirski }
42077e463d1SThomas Gleixner
install_ldt(struct mm_struct * mm,struct ldt_struct * ldt)421c2b3496bSPeter Zijlstra static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
42237868fe1SAndy Lutomirski {
423c2b3496bSPeter Zijlstra mutex_lock(&mm->context.lock);
42438ffbe66SJeremy Fitzhardinge
425c2b3496bSPeter Zijlstra /* Synchronizes with READ_ONCE in load_mm_ldt. */
426c2b3496bSPeter Zijlstra smp_store_release(&mm->context.ldt, ldt);
427c2b3496bSPeter Zijlstra
428c2b3496bSPeter Zijlstra /* Activate the LDT for all CPUs using currents mm. */
429c2b3496bSPeter Zijlstra on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
430c2b3496bSPeter Zijlstra
431c2b3496bSPeter Zijlstra mutex_unlock(&mm->context.lock);
43237868fe1SAndy Lutomirski }
43337868fe1SAndy Lutomirski
free_ldt_struct(struct ldt_struct * ldt)43437868fe1SAndy Lutomirski static void free_ldt_struct(struct ldt_struct *ldt)
43537868fe1SAndy Lutomirski {
43637868fe1SAndy Lutomirski if (likely(!ldt))
43737868fe1SAndy Lutomirski return;
43837868fe1SAndy Lutomirski
439bbf79d21SBorislav Petkov paravirt_free_ldt(ldt->entries, ldt->nr_entries);
440bbf79d21SBorislav Petkov if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
4418d5341a6SAndrey Ryabinin vfree_atomic(ldt->entries);
44237868fe1SAndy Lutomirski else
443f454b478SJan Beulich free_page((unsigned long)ldt->entries);
44437868fe1SAndy Lutomirski kfree(ldt);
44577e463d1SThomas Gleixner }
44677e463d1SThomas Gleixner
44777e463d1SThomas Gleixner /*
448a4828f81SThomas Gleixner * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
449a4828f81SThomas Gleixner * the new task is not running, so nothing can be installed.
45077e463d1SThomas Gleixner */
ldt_dup_context(struct mm_struct * old_mm,struct mm_struct * mm)451a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
45277e463d1SThomas Gleixner {
45337868fe1SAndy Lutomirski struct ldt_struct *new_ldt;
45477e463d1SThomas Gleixner int retval = 0;
45577e463d1SThomas Gleixner
456a4828f81SThomas Gleixner if (!old_mm)
45737868fe1SAndy Lutomirski return 0;
45837868fe1SAndy Lutomirski
45937868fe1SAndy Lutomirski mutex_lock(&old_mm->context.lock);
460a4828f81SThomas Gleixner if (!old_mm->context.ldt)
46137868fe1SAndy Lutomirski goto out_unlock;
46237868fe1SAndy Lutomirski
463bbf79d21SBorislav Petkov new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
46437868fe1SAndy Lutomirski if (!new_ldt) {
46537868fe1SAndy Lutomirski retval = -ENOMEM;
46637868fe1SAndy Lutomirski goto out_unlock;
46737868fe1SAndy Lutomirski }
46837868fe1SAndy Lutomirski
46937868fe1SAndy Lutomirski memcpy(new_ldt->entries, old_mm->context.ldt->entries,
470bbf79d21SBorislav Petkov new_ldt->nr_entries * LDT_ENTRY_SIZE);
47137868fe1SAndy Lutomirski finalize_ldt_struct(new_ldt);
47237868fe1SAndy Lutomirski
473f55f0501SAndy Lutomirski retval = map_ldt_struct(mm, new_ldt, 0);
474f55f0501SAndy Lutomirski if (retval) {
475f55f0501SAndy Lutomirski free_ldt_pgtables(mm);
476f55f0501SAndy Lutomirski free_ldt_struct(new_ldt);
477f55f0501SAndy Lutomirski goto out_unlock;
478f55f0501SAndy Lutomirski }
47937868fe1SAndy Lutomirski mm->context.ldt = new_ldt;
48037868fe1SAndy Lutomirski
48137868fe1SAndy Lutomirski out_unlock:
48237868fe1SAndy Lutomirski mutex_unlock(&old_mm->context.lock);
48377e463d1SThomas Gleixner return retval;
48477e463d1SThomas Gleixner }
48577e463d1SThomas Gleixner
48677e463d1SThomas Gleixner /*
48777e463d1SThomas Gleixner * No need to lock the MM as we are the last user
48877e463d1SThomas Gleixner *
48977e463d1SThomas Gleixner * 64bit: Don't touch the LDT register - we're already in the next thread.
49077e463d1SThomas Gleixner */
destroy_context_ldt(struct mm_struct * mm)49139a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm)
49277e463d1SThomas Gleixner {
49337868fe1SAndy Lutomirski free_ldt_struct(mm->context.ldt);
49437868fe1SAndy Lutomirski mm->context.ldt = NULL;
49577e463d1SThomas Gleixner }
49677e463d1SThomas Gleixner
ldt_arch_exit_mmap(struct mm_struct * mm)497f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm)
498f55f0501SAndy Lutomirski {
499f55f0501SAndy Lutomirski free_ldt_pgtables(mm);
500f55f0501SAndy Lutomirski }
501f55f0501SAndy Lutomirski
read_ldt(void __user * ptr,unsigned long bytecount)50277e463d1SThomas Gleixner static int read_ldt(void __user *ptr, unsigned long bytecount)
50377e463d1SThomas Gleixner {
50477e463d1SThomas Gleixner struct mm_struct *mm = current->mm;
505bbf79d21SBorislav Petkov unsigned long entries_size;
506bbf79d21SBorislav Petkov int retval;
50777e463d1SThomas Gleixner
508c2b3496bSPeter Zijlstra down_read(&mm->context.ldt_usr_sem);
50937868fe1SAndy Lutomirski
51037868fe1SAndy Lutomirski if (!mm->context.ldt) {
51137868fe1SAndy Lutomirski retval = 0;
51237868fe1SAndy Lutomirski goto out_unlock;
51337868fe1SAndy Lutomirski }
51437868fe1SAndy Lutomirski
51577e463d1SThomas Gleixner if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
51677e463d1SThomas Gleixner bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
51777e463d1SThomas Gleixner
518bbf79d21SBorislav Petkov entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
519bbf79d21SBorislav Petkov if (entries_size > bytecount)
520bbf79d21SBorislav Petkov entries_size = bytecount;
52177e463d1SThomas Gleixner
522bbf79d21SBorislav Petkov if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
52337868fe1SAndy Lutomirski retval = -EFAULT;
52437868fe1SAndy Lutomirski goto out_unlock;
52537868fe1SAndy Lutomirski }
52637868fe1SAndy Lutomirski
527bbf79d21SBorislav Petkov if (entries_size != bytecount) {
52837868fe1SAndy Lutomirski /* Zero-fill the rest and pretend we read bytecount bytes. */
529bbf79d21SBorislav Petkov if (clear_user(ptr + entries_size, bytecount - entries_size)) {
53037868fe1SAndy Lutomirski retval = -EFAULT;
53137868fe1SAndy Lutomirski goto out_unlock;
53277e463d1SThomas Gleixner }
53377e463d1SThomas Gleixner }
53437868fe1SAndy Lutomirski retval = bytecount;
53537868fe1SAndy Lutomirski
53637868fe1SAndy Lutomirski out_unlock:
537c2b3496bSPeter Zijlstra up_read(&mm->context.ldt_usr_sem);
53837868fe1SAndy Lutomirski return retval;
53977e463d1SThomas Gleixner }
54077e463d1SThomas Gleixner
read_default_ldt(void __user * ptr,unsigned long bytecount)54177e463d1SThomas Gleixner static int read_default_ldt(void __user *ptr, unsigned long bytecount)
54277e463d1SThomas Gleixner {
54377e463d1SThomas Gleixner /* CHECKME: Can we use _one_ random number ? */
54477e463d1SThomas Gleixner #ifdef CONFIG_X86_32
54577e463d1SThomas Gleixner unsigned long size = 5 * sizeof(struct desc_struct);
54677e463d1SThomas Gleixner #else
54777e463d1SThomas Gleixner unsigned long size = 128;
54877e463d1SThomas Gleixner #endif
54977e463d1SThomas Gleixner if (bytecount > size)
55077e463d1SThomas Gleixner bytecount = size;
55177e463d1SThomas Gleixner if (clear_user(ptr, bytecount))
55277e463d1SThomas Gleixner return -EFAULT;
55377e463d1SThomas Gleixner return bytecount;
55477e463d1SThomas Gleixner }
55577e463d1SThomas Gleixner
allow_16bit_segments(void)556cc801833SAndy Lutomirski static bool allow_16bit_segments(void)
557cc801833SAndy Lutomirski {
558cc801833SAndy Lutomirski if (!IS_ENABLED(CONFIG_X86_16BIT))
559cc801833SAndy Lutomirski return false;
560cc801833SAndy Lutomirski
561cc801833SAndy Lutomirski #ifdef CONFIG_XEN_PV
562cc801833SAndy Lutomirski /*
563cc801833SAndy Lutomirski * Xen PV does not implement ESPFIX64, which means that 16-bit
564cc801833SAndy Lutomirski * segments will not work correctly. Until either Xen PV implements
565cc801833SAndy Lutomirski * ESPFIX64 and can signal this fact to the guest or unless someone
566cc801833SAndy Lutomirski * provides compelling evidence that allowing broken 16-bit segments
567cc801833SAndy Lutomirski * is worthwhile, disallow 16-bit segments under Xen PV.
568cc801833SAndy Lutomirski */
569cc801833SAndy Lutomirski if (xen_pv_domain()) {
570bb5a93aaSLinus Torvalds pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
571cc801833SAndy Lutomirski return false;
572cc801833SAndy Lutomirski }
573cc801833SAndy Lutomirski #endif
574cc801833SAndy Lutomirski
575cc801833SAndy Lutomirski return true;
576cc801833SAndy Lutomirski }
577cc801833SAndy Lutomirski
write_ldt(void __user * ptr,unsigned long bytecount,int oldmode)57877e463d1SThomas Gleixner static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
57977e463d1SThomas Gleixner {
58077e463d1SThomas Gleixner struct mm_struct *mm = current->mm;
581990e9dc3SThomas Gleixner struct ldt_struct *new_ldt, *old_ldt;
582bbf79d21SBorislav Petkov unsigned int old_nr_entries, new_nr_entries;
583990e9dc3SThomas Gleixner struct user_desc ldt_info;
5845af72502SGlauber de Oliveira Costa struct desc_struct ldt;
58577e463d1SThomas Gleixner int error;
58677e463d1SThomas Gleixner
58777e463d1SThomas Gleixner error = -EINVAL;
58877e463d1SThomas Gleixner if (bytecount != sizeof(ldt_info))
58977e463d1SThomas Gleixner goto out;
59077e463d1SThomas Gleixner error = -EFAULT;
59177e463d1SThomas Gleixner if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
59277e463d1SThomas Gleixner goto out;
59377e463d1SThomas Gleixner
59477e463d1SThomas Gleixner error = -EINVAL;
59577e463d1SThomas Gleixner if (ldt_info.entry_number >= LDT_ENTRIES)
59677e463d1SThomas Gleixner goto out;
59777e463d1SThomas Gleixner if (ldt_info.contents == 3) {
59877e463d1SThomas Gleixner if (oldmode)
59977e463d1SThomas Gleixner goto out;
60077e463d1SThomas Gleixner if (ldt_info.seg_not_present == 0)
60177e463d1SThomas Gleixner goto out;
60277e463d1SThomas Gleixner }
60377e463d1SThomas Gleixner
60437868fe1SAndy Lutomirski if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
60537868fe1SAndy Lutomirski LDT_empty(&ldt_info)) {
60637868fe1SAndy Lutomirski /* The user wants to clear the entry. */
6075af72502SGlauber de Oliveira Costa memset(&ldt, 0, sizeof(ldt));
60837868fe1SAndy Lutomirski } else {
609cc801833SAndy Lutomirski if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
61034273f41SH. Peter Anvin error = -EINVAL;
61137868fe1SAndy Lutomirski goto out;
61234273f41SH. Peter Anvin }
61334273f41SH. Peter Anvin
61480fbb69aSGlauber de Oliveira Costa fill_ldt(&ldt, &ldt_info);
61577e463d1SThomas Gleixner if (oldmode)
6165af72502SGlauber de Oliveira Costa ldt.avl = 0;
61737868fe1SAndy Lutomirski }
61877e463d1SThomas Gleixner
619c2b3496bSPeter Zijlstra if (down_write_killable(&mm->context.ldt_usr_sem))
620c2b3496bSPeter Zijlstra return -EINTR;
62137868fe1SAndy Lutomirski
62237868fe1SAndy Lutomirski old_ldt = mm->context.ldt;
623bbf79d21SBorislav Petkov old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
624bbf79d21SBorislav Petkov new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
62537868fe1SAndy Lutomirski
62637868fe1SAndy Lutomirski error = -ENOMEM;
627bbf79d21SBorislav Petkov new_ldt = alloc_ldt_struct(new_nr_entries);
62837868fe1SAndy Lutomirski if (!new_ldt)
62937868fe1SAndy Lutomirski goto out_unlock;
63037868fe1SAndy Lutomirski
63137868fe1SAndy Lutomirski if (old_ldt)
632bbf79d21SBorislav Petkov memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
633bbf79d21SBorislav Petkov
63437868fe1SAndy Lutomirski new_ldt->entries[ldt_info.entry_number] = ldt;
63537868fe1SAndy Lutomirski finalize_ldt_struct(new_ldt);
63637868fe1SAndy Lutomirski
637f55f0501SAndy Lutomirski /*
638f55f0501SAndy Lutomirski * If we are using PTI, map the new LDT into the userspace pagetables.
639f55f0501SAndy Lutomirski * If there is already an LDT, use the other slot so that other CPUs
640f55f0501SAndy Lutomirski * will continue to use the old LDT until install_ldt() switches
641f55f0501SAndy Lutomirski * them over to the new LDT.
642f55f0501SAndy Lutomirski */
643f55f0501SAndy Lutomirski error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
644f55f0501SAndy Lutomirski if (error) {
645a62d6985SThomas Gleixner /*
646a62d6985SThomas Gleixner * This only can fail for the first LDT setup. If an LDT is
647a62d6985SThomas Gleixner * already installed then the PTE page is already
648a62d6985SThomas Gleixner * populated. Mop up a half populated page table.
649a62d6985SThomas Gleixner */
6507f414195SThomas Gleixner if (!WARN_ON_ONCE(old_ldt))
651a62d6985SThomas Gleixner free_ldt_pgtables(mm);
652a62d6985SThomas Gleixner free_ldt_struct(new_ldt);
653f55f0501SAndy Lutomirski goto out_unlock;
654f55f0501SAndy Lutomirski }
655f55f0501SAndy Lutomirski
65637868fe1SAndy Lutomirski install_ldt(mm, new_ldt);
657a0e6e083SKirill A. Shutemov unmap_ldt_struct(mm, old_ldt);
65837868fe1SAndy Lutomirski free_ldt_struct(old_ldt);
65977e463d1SThomas Gleixner error = 0;
66077e463d1SThomas Gleixner
66177e463d1SThomas Gleixner out_unlock:
662c2b3496bSPeter Zijlstra up_write(&mm->context.ldt_usr_sem);
66377e463d1SThomas Gleixner out:
66477e463d1SThomas Gleixner return error;
66577e463d1SThomas Gleixner }
66677e463d1SThomas Gleixner
SYSCALL_DEFINE3(modify_ldt,int,func,void __user *,ptr,unsigned long,bytecount)667da20ab35SDave Hansen SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
668da20ab35SDave Hansen unsigned long , bytecount)
66977e463d1SThomas Gleixner {
67077e463d1SThomas Gleixner int ret = -ENOSYS;
67177e463d1SThomas Gleixner
67277e463d1SThomas Gleixner switch (func) {
67377e463d1SThomas Gleixner case 0:
67477e463d1SThomas Gleixner ret = read_ldt(ptr, bytecount);
67577e463d1SThomas Gleixner break;
67677e463d1SThomas Gleixner case 1:
67777e463d1SThomas Gleixner ret = write_ldt(ptr, bytecount, 1);
67877e463d1SThomas Gleixner break;
67977e463d1SThomas Gleixner case 2:
68077e463d1SThomas Gleixner ret = read_default_ldt(ptr, bytecount);
68177e463d1SThomas Gleixner break;
68277e463d1SThomas Gleixner case 0x11:
68377e463d1SThomas Gleixner ret = write_ldt(ptr, bytecount, 0);
68477e463d1SThomas Gleixner break;
68577e463d1SThomas Gleixner }
686da20ab35SDave Hansen /*
687da20ab35SDave Hansen * The SYSCALL_DEFINE() macros give us an 'unsigned long'
688da20ab35SDave Hansen * return type, but tht ABI for sys_modify_ldt() expects
689da20ab35SDave Hansen * 'int'. This cast gives us an int-sized value in %rax
690da20ab35SDave Hansen * for the return code. The 'unsigned' is necessary so
691da20ab35SDave Hansen * the compiler does not try to sign-extend the negative
692da20ab35SDave Hansen * return codes into the high half of the register when
693da20ab35SDave Hansen * taking the value from int->long.
694da20ab35SDave Hansen */
695da20ab35SDave Hansen return (unsigned int)ret;
69677e463d1SThomas Gleixner }
697