xref: /openbmc/linux/arch/x86/kernel/ldt.c (revision ae8eba8b)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
277e463d1SThomas Gleixner /*
377e463d1SThomas Gleixner  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
477e463d1SThomas Gleixner  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
577e463d1SThomas Gleixner  * Copyright (C) 2002 Andi Kleen
677e463d1SThomas Gleixner  *
777e463d1SThomas Gleixner  * This handles calls from both 32bit and 64bit mode.
8c2b3496bSPeter Zijlstra  *
9c2b3496bSPeter Zijlstra  * Lock order:
10c2b3496bSPeter Zijlstra  *	contex.ldt_usr_sem
11c1e8d7c6SMichel Lespinasse  *	  mmap_lock
12c2b3496bSPeter Zijlstra  *	    context.lock
1377e463d1SThomas Gleixner  */
1477e463d1SThomas Gleixner 
1577e463d1SThomas Gleixner #include <linux/errno.h>
165a0e3ad6STejun Heo #include <linux/gfp.h>
1777e463d1SThomas Gleixner #include <linux/sched.h>
1877e463d1SThomas Gleixner #include <linux/string.h>
1977e463d1SThomas Gleixner #include <linux/mm.h>
2077e463d1SThomas Gleixner #include <linux/smp.h>
21da20ab35SDave Hansen #include <linux/syscalls.h>
2237868fe1SAndy Lutomirski #include <linux/slab.h>
2377e463d1SThomas Gleixner #include <linux/vmalloc.h>
24423a5405SJaswinder Singh Rajput #include <linux/uaccess.h>
2577e463d1SThomas Gleixner 
2677e463d1SThomas Gleixner #include <asm/ldt.h>
27f55f0501SAndy Lutomirski #include <asm/tlb.h>
2877e463d1SThomas Gleixner #include <asm/desc.h>
2977e463d1SThomas Gleixner #include <asm/mmu_context.h>
30186525bdSIngo Molnar #include <asm/pgtable_areas.h>
31186525bdSIngo Molnar 
32cc801833SAndy Lutomirski #include <xen/xen.h>
33cc801833SAndy Lutomirski 
34186525bdSIngo Molnar /* This is a multiple of PAGE_SIZE. */
35186525bdSIngo Molnar #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
36186525bdSIngo Molnar 
37186525bdSIngo Molnar static inline void *ldt_slot_va(int slot)
38186525bdSIngo Molnar {
39186525bdSIngo Molnar 	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
40186525bdSIngo Molnar }
41186525bdSIngo Molnar 
42186525bdSIngo Molnar void load_mm_ldt(struct mm_struct *mm)
43186525bdSIngo Molnar {
44186525bdSIngo Molnar 	struct ldt_struct *ldt;
45186525bdSIngo Molnar 
46186525bdSIngo Molnar 	/* READ_ONCE synchronizes with smp_store_release */
47186525bdSIngo Molnar 	ldt = READ_ONCE(mm->context.ldt);
48186525bdSIngo Molnar 
49186525bdSIngo Molnar 	/*
50186525bdSIngo Molnar 	 * Any change to mm->context.ldt is followed by an IPI to all
51186525bdSIngo Molnar 	 * CPUs with the mm active.  The LDT will not be freed until
52186525bdSIngo Molnar 	 * after the IPI is handled by all such CPUs.  This means that,
53186525bdSIngo Molnar 	 * if the ldt_struct changes before we return, the values we see
54186525bdSIngo Molnar 	 * will be safe, and the new values will be loaded before we run
55186525bdSIngo Molnar 	 * any user code.
56186525bdSIngo Molnar 	 *
57186525bdSIngo Molnar 	 * NB: don't try to convert this to use RCU without extreme care.
58186525bdSIngo Molnar 	 * We would still need IRQs off, because we don't want to change
59186525bdSIngo Molnar 	 * the local LDT after an IPI loaded a newer value than the one
60186525bdSIngo Molnar 	 * that we can see.
61186525bdSIngo Molnar 	 */
62186525bdSIngo Molnar 
63186525bdSIngo Molnar 	if (unlikely(ldt)) {
64186525bdSIngo Molnar 		if (static_cpu_has(X86_FEATURE_PTI)) {
65186525bdSIngo Molnar 			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
66186525bdSIngo Molnar 				/*
67186525bdSIngo Molnar 				 * Whoops -- either the new LDT isn't mapped
68186525bdSIngo Molnar 				 * (if slot == -1) or is mapped into a bogus
69186525bdSIngo Molnar 				 * slot (if slot > 1).
70186525bdSIngo Molnar 				 */
71186525bdSIngo Molnar 				clear_LDT();
72186525bdSIngo Molnar 				return;
73186525bdSIngo Molnar 			}
74186525bdSIngo Molnar 
75186525bdSIngo Molnar 			/*
76186525bdSIngo Molnar 			 * If page table isolation is enabled, ldt->entries
77186525bdSIngo Molnar 			 * will not be mapped in the userspace pagetables.
78186525bdSIngo Molnar 			 * Tell the CPU to access the LDT through the alias
79186525bdSIngo Molnar 			 * at ldt_slot_va(ldt->slot).
80186525bdSIngo Molnar 			 */
81186525bdSIngo Molnar 			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
82186525bdSIngo Molnar 		} else {
83186525bdSIngo Molnar 			set_ldt(ldt->entries, ldt->nr_entries);
84186525bdSIngo Molnar 		}
85186525bdSIngo Molnar 	} else {
86186525bdSIngo Molnar 		clear_LDT();
87186525bdSIngo Molnar 	}
88186525bdSIngo Molnar }
89186525bdSIngo Molnar 
90186525bdSIngo Molnar void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
91186525bdSIngo Molnar {
92186525bdSIngo Molnar 	/*
93186525bdSIngo Molnar 	 * Load the LDT if either the old or new mm had an LDT.
94186525bdSIngo Molnar 	 *
95186525bdSIngo Molnar 	 * An mm will never go from having an LDT to not having an LDT.  Two
96186525bdSIngo Molnar 	 * mms never share an LDT, so we don't gain anything by checking to
97186525bdSIngo Molnar 	 * see whether the LDT changed.  There's also no guarantee that
98186525bdSIngo Molnar 	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
99186525bdSIngo Molnar 	 * then prev->context.ldt will also be non-NULL.
100186525bdSIngo Molnar 	 *
101186525bdSIngo Molnar 	 * If we really cared, we could optimize the case where prev == next
102186525bdSIngo Molnar 	 * and we're exiting lazy mode.  Most of the time, if this happens,
103186525bdSIngo Molnar 	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
104186525bdSIngo Molnar 	 * used by legacy code and emulators where we don't need this level of
105186525bdSIngo Molnar 	 * performance.
106186525bdSIngo Molnar 	 *
107186525bdSIngo Molnar 	 * This uses | instead of || because it generates better code.
108186525bdSIngo Molnar 	 */
109186525bdSIngo Molnar 	if (unlikely((unsigned long)prev->context.ldt |
110186525bdSIngo Molnar 		     (unsigned long)next->context.ldt))
111186525bdSIngo Molnar 		load_mm_ldt(next);
112186525bdSIngo Molnar 
113186525bdSIngo Molnar 	DEBUG_LOCKS_WARN_ON(preemptible());
114186525bdSIngo Molnar }
11577e463d1SThomas Gleixner 
116a6323757SAndy Lutomirski static void refresh_ldt_segments(void)
117a6323757SAndy Lutomirski {
118a6323757SAndy Lutomirski #ifdef CONFIG_X86_64
119a6323757SAndy Lutomirski 	unsigned short sel;
120a6323757SAndy Lutomirski 
121a6323757SAndy Lutomirski 	/*
122a6323757SAndy Lutomirski 	 * Make sure that the cached DS and ES descriptors match the updated
123a6323757SAndy Lutomirski 	 * LDT.
124a6323757SAndy Lutomirski 	 */
125a6323757SAndy Lutomirski 	savesegment(ds, sel);
126a6323757SAndy Lutomirski 	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
127a6323757SAndy Lutomirski 		loadsegment(ds, sel);
128a6323757SAndy Lutomirski 
129a6323757SAndy Lutomirski 	savesegment(es, sel);
130a6323757SAndy Lutomirski 	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
131a6323757SAndy Lutomirski 		loadsegment(es, sel);
132a6323757SAndy Lutomirski #endif
133a6323757SAndy Lutomirski }
134a6323757SAndy Lutomirski 
135c2b3496bSPeter Zijlstra /* context.lock is held by the task which issued the smp function call */
1363d28ebceSAndy Lutomirski static void flush_ldt(void *__mm)
13777e463d1SThomas Gleixner {
1383d28ebceSAndy Lutomirski 	struct mm_struct *mm = __mm;
13977e463d1SThomas Gleixner 
1403d28ebceSAndy Lutomirski 	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
14137868fe1SAndy Lutomirski 		return;
14237868fe1SAndy Lutomirski 
143f55f0501SAndy Lutomirski 	load_mm_ldt(mm);
144a6323757SAndy Lutomirski 
145a6323757SAndy Lutomirski 	refresh_ldt_segments();
14637868fe1SAndy Lutomirski }
14737868fe1SAndy Lutomirski 
14837868fe1SAndy Lutomirski /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
149bbf79d21SBorislav Petkov static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
15077e463d1SThomas Gleixner {
15137868fe1SAndy Lutomirski 	struct ldt_struct *new_ldt;
152990e9dc3SThomas Gleixner 	unsigned int alloc_size;
15377e463d1SThomas Gleixner 
154bbf79d21SBorislav Petkov 	if (num_entries > LDT_ENTRIES)
15537868fe1SAndy Lutomirski 		return NULL;
15637868fe1SAndy Lutomirski 
15737868fe1SAndy Lutomirski 	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
15837868fe1SAndy Lutomirski 	if (!new_ldt)
15937868fe1SAndy Lutomirski 		return NULL;
16037868fe1SAndy Lutomirski 
16137868fe1SAndy Lutomirski 	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
162bbf79d21SBorislav Petkov 	alloc_size = num_entries * LDT_ENTRY_SIZE;
16337868fe1SAndy Lutomirski 
16437868fe1SAndy Lutomirski 	/*
16537868fe1SAndy Lutomirski 	 * Xen is very picky: it requires a page-aligned LDT that has no
16637868fe1SAndy Lutomirski 	 * trailing nonzero bytes in any page that contains LDT descriptors.
16737868fe1SAndy Lutomirski 	 * Keep it simple: zero the whole allocation and never allocate less
16837868fe1SAndy Lutomirski 	 * than PAGE_SIZE.
16937868fe1SAndy Lutomirski 	 */
17037868fe1SAndy Lutomirski 	if (alloc_size > PAGE_SIZE)
17137868fe1SAndy Lutomirski 		new_ldt->entries = vzalloc(alloc_size);
17277e463d1SThomas Gleixner 	else
173f454b478SJan Beulich 		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
17477e463d1SThomas Gleixner 
17537868fe1SAndy Lutomirski 	if (!new_ldt->entries) {
17637868fe1SAndy Lutomirski 		kfree(new_ldt);
17737868fe1SAndy Lutomirski 		return NULL;
17877e463d1SThomas Gleixner 	}
17977e463d1SThomas Gleixner 
180f55f0501SAndy Lutomirski 	/* The new LDT isn't aliased for PTI yet. */
181f55f0501SAndy Lutomirski 	new_ldt->slot = -1;
182f55f0501SAndy Lutomirski 
183bbf79d21SBorislav Petkov 	new_ldt->nr_entries = num_entries;
18437868fe1SAndy Lutomirski 	return new_ldt;
18537868fe1SAndy Lutomirski }
18637868fe1SAndy Lutomirski 
1879bae3197SJoerg Roedel #ifdef CONFIG_PAGE_TABLE_ISOLATION
1889bae3197SJoerg Roedel 
1899bae3197SJoerg Roedel static void do_sanity_check(struct mm_struct *mm,
1909bae3197SJoerg Roedel 			    bool had_kernel_mapping,
1919bae3197SJoerg Roedel 			    bool had_user_mapping)
1929bae3197SJoerg Roedel {
1939bae3197SJoerg Roedel 	if (mm->context.ldt) {
1949bae3197SJoerg Roedel 		/*
1959bae3197SJoerg Roedel 		 * We already had an LDT.  The top-level entry should already
1969bae3197SJoerg Roedel 		 * have been allocated and synchronized with the usermode
1979bae3197SJoerg Roedel 		 * tables.
1989bae3197SJoerg Roedel 		 */
1999bae3197SJoerg Roedel 		WARN_ON(!had_kernel_mapping);
20067e87d43SBorislav Petkov 		if (boot_cpu_has(X86_FEATURE_PTI))
2019bae3197SJoerg Roedel 			WARN_ON(!had_user_mapping);
2029bae3197SJoerg Roedel 	} else {
2039bae3197SJoerg Roedel 		/*
2049bae3197SJoerg Roedel 		 * This is the first time we're mapping an LDT for this process.
2059bae3197SJoerg Roedel 		 * Sync the pgd to the usermode tables.
2069bae3197SJoerg Roedel 		 */
2079bae3197SJoerg Roedel 		WARN_ON(had_kernel_mapping);
20867e87d43SBorislav Petkov 		if (boot_cpu_has(X86_FEATURE_PTI))
2099bae3197SJoerg Roedel 			WARN_ON(had_user_mapping);
2109bae3197SJoerg Roedel 	}
2119bae3197SJoerg Roedel }
2129bae3197SJoerg Roedel 
2136df934b9SJoerg Roedel #ifdef CONFIG_X86_PAE
2146df934b9SJoerg Roedel 
2156df934b9SJoerg Roedel static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
2166df934b9SJoerg Roedel {
2176df934b9SJoerg Roedel 	p4d_t *p4d;
2186df934b9SJoerg Roedel 	pud_t *pud;
2196df934b9SJoerg Roedel 
2206df934b9SJoerg Roedel 	if (pgd->pgd == 0)
2216df934b9SJoerg Roedel 		return NULL;
2226df934b9SJoerg Roedel 
2236df934b9SJoerg Roedel 	p4d = p4d_offset(pgd, va);
2246df934b9SJoerg Roedel 	if (p4d_none(*p4d))
2256df934b9SJoerg Roedel 		return NULL;
2266df934b9SJoerg Roedel 
2276df934b9SJoerg Roedel 	pud = pud_offset(p4d, va);
2286df934b9SJoerg Roedel 	if (pud_none(*pud))
2296df934b9SJoerg Roedel 		return NULL;
2306df934b9SJoerg Roedel 
2316df934b9SJoerg Roedel 	return pmd_offset(pud, va);
2326df934b9SJoerg Roedel }
2336df934b9SJoerg Roedel 
2346df934b9SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm)
2356df934b9SJoerg Roedel {
2366df934b9SJoerg Roedel 	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
2376df934b9SJoerg Roedel 	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
2386df934b9SJoerg Roedel 	pmd_t *k_pmd, *u_pmd;
2396df934b9SJoerg Roedel 
2406df934b9SJoerg Roedel 	k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
2416df934b9SJoerg Roedel 	u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
2426df934b9SJoerg Roedel 
24367e87d43SBorislav Petkov 	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
2446df934b9SJoerg Roedel 		set_pmd(u_pmd, *k_pmd);
2456df934b9SJoerg Roedel }
2466df934b9SJoerg Roedel 
2476df934b9SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm)
2486df934b9SJoerg Roedel {
2496df934b9SJoerg Roedel 	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
2506df934b9SJoerg Roedel 	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
2516df934b9SJoerg Roedel 	bool had_kernel, had_user;
2526df934b9SJoerg Roedel 	pmd_t *k_pmd, *u_pmd;
2536df934b9SJoerg Roedel 
2546df934b9SJoerg Roedel 	k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
2556df934b9SJoerg Roedel 	u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
2566df934b9SJoerg Roedel 	had_kernel = (k_pmd->pmd != 0);
2576df934b9SJoerg Roedel 	had_user   = (u_pmd->pmd != 0);
2586df934b9SJoerg Roedel 
2596df934b9SJoerg Roedel 	do_sanity_check(mm, had_kernel, had_user);
2606df934b9SJoerg Roedel }
2616df934b9SJoerg Roedel 
2626df934b9SJoerg Roedel #else /* !CONFIG_X86_PAE */
2636df934b9SJoerg Roedel 
2649bae3197SJoerg Roedel static void map_ldt_struct_to_user(struct mm_struct *mm)
2659bae3197SJoerg Roedel {
2669bae3197SJoerg Roedel 	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
2679bae3197SJoerg Roedel 
26867e87d43SBorislav Petkov 	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
2699bae3197SJoerg Roedel 		set_pgd(kernel_to_user_pgdp(pgd), *pgd);
2709bae3197SJoerg Roedel }
2719bae3197SJoerg Roedel 
2729bae3197SJoerg Roedel static void sanity_check_ldt_mapping(struct mm_struct *mm)
2739bae3197SJoerg Roedel {
2749bae3197SJoerg Roedel 	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
2759bae3197SJoerg Roedel 	bool had_kernel = (pgd->pgd != 0);
2769bae3197SJoerg Roedel 	bool had_user   = (kernel_to_user_pgdp(pgd)->pgd != 0);
2779bae3197SJoerg Roedel 
2789bae3197SJoerg Roedel 	do_sanity_check(mm, had_kernel, had_user);
2799bae3197SJoerg Roedel }
2809bae3197SJoerg Roedel 
2816df934b9SJoerg Roedel #endif /* CONFIG_X86_PAE */
2826df934b9SJoerg Roedel 
283f55f0501SAndy Lutomirski /*
284f55f0501SAndy Lutomirski  * If PTI is enabled, this maps the LDT into the kernelmode and
285f55f0501SAndy Lutomirski  * usermode tables for the given mm.
286f55f0501SAndy Lutomirski  */
287f55f0501SAndy Lutomirski static int
288f55f0501SAndy Lutomirski map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
289f55f0501SAndy Lutomirski {
290f55f0501SAndy Lutomirski 	unsigned long va;
2919bae3197SJoerg Roedel 	bool is_vmalloc;
292f55f0501SAndy Lutomirski 	spinlock_t *ptl;
293a0e6e083SKirill A. Shutemov 	int i, nr_pages;
294f55f0501SAndy Lutomirski 
29567e87d43SBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_PTI))
296f55f0501SAndy Lutomirski 		return 0;
297f55f0501SAndy Lutomirski 
298f55f0501SAndy Lutomirski 	/*
299f55f0501SAndy Lutomirski 	 * Any given ldt_struct should have map_ldt_struct() called at most
300f55f0501SAndy Lutomirski 	 * once.
301f55f0501SAndy Lutomirski 	 */
302f55f0501SAndy Lutomirski 	WARN_ON(ldt->slot != -1);
303f55f0501SAndy Lutomirski 
3049bae3197SJoerg Roedel 	/* Check if the current mappings are sane */
3059bae3197SJoerg Roedel 	sanity_check_ldt_mapping(mm);
3069bae3197SJoerg Roedel 
307f55f0501SAndy Lutomirski 	is_vmalloc = is_vmalloc_addr(ldt->entries);
308f55f0501SAndy Lutomirski 
309a0e6e083SKirill A. Shutemov 	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
310a0e6e083SKirill A. Shutemov 
311a0e6e083SKirill A. Shutemov 	for (i = 0; i < nr_pages; i++) {
312f55f0501SAndy Lutomirski 		unsigned long offset = i << PAGE_SHIFT;
313f55f0501SAndy Lutomirski 		const void *src = (char *)ldt->entries + offset;
314f55f0501SAndy Lutomirski 		unsigned long pfn;
315fb43d6cbSDave Hansen 		pgprot_t pte_prot;
316f55f0501SAndy Lutomirski 		pte_t pte, *ptep;
317f55f0501SAndy Lutomirski 
318f55f0501SAndy Lutomirski 		va = (unsigned long)ldt_slot_va(slot) + offset;
319f55f0501SAndy Lutomirski 		pfn = is_vmalloc ? vmalloc_to_pfn(src) :
320f55f0501SAndy Lutomirski 			page_to_pfn(virt_to_page(src));
321f55f0501SAndy Lutomirski 		/*
322f55f0501SAndy Lutomirski 		 * Treat the PTI LDT range as a *userspace* range.
323f55f0501SAndy Lutomirski 		 * get_locked_pte() will allocate all needed pagetables
324f55f0501SAndy Lutomirski 		 * and account for them in this mm.
325f55f0501SAndy Lutomirski 		 */
326f55f0501SAndy Lutomirski 		ptep = get_locked_pte(mm, va, &ptl);
327f55f0501SAndy Lutomirski 		if (!ptep)
328f55f0501SAndy Lutomirski 			return -ENOMEM;
3299f5cb6b3SThomas Gleixner 		/*
3309f5cb6b3SThomas Gleixner 		 * Map it RO so the easy to find address is not a primary
3319f5cb6b3SThomas Gleixner 		 * target via some kernel interface which misses a
3329f5cb6b3SThomas Gleixner 		 * permission check.
3339f5cb6b3SThomas Gleixner 		 */
334fb43d6cbSDave Hansen 		pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
335fb43d6cbSDave Hansen 		/* Filter out unsuppored __PAGE_KERNEL* bits: */
336e6f39e87SJoerg Roedel 		pgprot_val(pte_prot) &= __supported_pte_mask;
337fb43d6cbSDave Hansen 		pte = pfn_pte(pfn, pte_prot);
338f55f0501SAndy Lutomirski 		set_pte_at(mm, va, ptep, pte);
339f55f0501SAndy Lutomirski 		pte_unmap_unlock(ptep, ptl);
340f55f0501SAndy Lutomirski 	}
341f55f0501SAndy Lutomirski 
3429bae3197SJoerg Roedel 	/* Propagate LDT mapping to the user page-table */
3439bae3197SJoerg Roedel 	map_ldt_struct_to_user(mm);
344f55f0501SAndy Lutomirski 
345f55f0501SAndy Lutomirski 	ldt->slot = slot;
346f55f0501SAndy Lutomirski 	return 0;
347f55f0501SAndy Lutomirski }
348f55f0501SAndy Lutomirski 
349a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
350a0e6e083SKirill A. Shutemov {
351a0e6e083SKirill A. Shutemov 	unsigned long va;
352a0e6e083SKirill A. Shutemov 	int i, nr_pages;
353a0e6e083SKirill A. Shutemov 
354a0e6e083SKirill A. Shutemov 	if (!ldt)
355a0e6e083SKirill A. Shutemov 		return;
356a0e6e083SKirill A. Shutemov 
357a0e6e083SKirill A. Shutemov 	/* LDT map/unmap is only required for PTI */
35867e87d43SBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_PTI))
359a0e6e083SKirill A. Shutemov 		return;
360a0e6e083SKirill A. Shutemov 
361a0e6e083SKirill A. Shutemov 	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
362a0e6e083SKirill A. Shutemov 
363a0e6e083SKirill A. Shutemov 	for (i = 0; i < nr_pages; i++) {
364a0e6e083SKirill A. Shutemov 		unsigned long offset = i << PAGE_SHIFT;
365a0e6e083SKirill A. Shutemov 		spinlock_t *ptl;
366a0e6e083SKirill A. Shutemov 		pte_t *ptep;
367a0e6e083SKirill A. Shutemov 
368a0e6e083SKirill A. Shutemov 		va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
369a0e6e083SKirill A. Shutemov 		ptep = get_locked_pte(mm, va, &ptl);
370a0e6e083SKirill A. Shutemov 		pte_clear(mm, va, ptep);
371a0e6e083SKirill A. Shutemov 		pte_unmap_unlock(ptep, ptl);
372a0e6e083SKirill A. Shutemov 	}
373a0e6e083SKirill A. Shutemov 
374a0e6e083SKirill A. Shutemov 	va = (unsigned long)ldt_slot_va(ldt->slot);
375a0e6e083SKirill A. Shutemov 	flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
376a0e6e083SKirill A. Shutemov }
377a0e6e083SKirill A. Shutemov 
3789bae3197SJoerg Roedel #else /* !CONFIG_PAGE_TABLE_ISOLATION */
3799bae3197SJoerg Roedel 
3809bae3197SJoerg Roedel static int
3819bae3197SJoerg Roedel map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
3829bae3197SJoerg Roedel {
3839bae3197SJoerg Roedel 	return 0;
3849bae3197SJoerg Roedel }
385a0e6e083SKirill A. Shutemov 
386a0e6e083SKirill A. Shutemov static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
387a0e6e083SKirill A. Shutemov {
388a0e6e083SKirill A. Shutemov }
3899bae3197SJoerg Roedel #endif /* CONFIG_PAGE_TABLE_ISOLATION */
3909bae3197SJoerg Roedel 
391f55f0501SAndy Lutomirski static void free_ldt_pgtables(struct mm_struct *mm)
392f55f0501SAndy Lutomirski {
393f55f0501SAndy Lutomirski #ifdef CONFIG_PAGE_TABLE_ISOLATION
394f55f0501SAndy Lutomirski 	struct mmu_gather tlb;
395f55f0501SAndy Lutomirski 	unsigned long start = LDT_BASE_ADDR;
3968195d869SJoerg Roedel 	unsigned long end = LDT_END_ADDR;
397f55f0501SAndy Lutomirski 
39867e87d43SBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_PTI))
399f55f0501SAndy Lutomirski 		return;
400f55f0501SAndy Lutomirski 
401f55f0501SAndy Lutomirski 	tlb_gather_mmu(&tlb, mm, start, end);
402f55f0501SAndy Lutomirski 	free_pgd_range(&tlb, start, end, start, end);
403*ae8eba8bSWill Deacon 	tlb_finish_mmu(&tlb);
404f55f0501SAndy Lutomirski #endif
405f55f0501SAndy Lutomirski }
406f55f0501SAndy Lutomirski 
40737868fe1SAndy Lutomirski /* After calling this, the LDT is immutable. */
40837868fe1SAndy Lutomirski static void finalize_ldt_struct(struct ldt_struct *ldt)
40977e463d1SThomas Gleixner {
410bbf79d21SBorislav Petkov 	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
41137868fe1SAndy Lutomirski }
41277e463d1SThomas Gleixner 
413c2b3496bSPeter Zijlstra static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
41437868fe1SAndy Lutomirski {
415c2b3496bSPeter Zijlstra 	mutex_lock(&mm->context.lock);
41638ffbe66SJeremy Fitzhardinge 
417c2b3496bSPeter Zijlstra 	/* Synchronizes with READ_ONCE in load_mm_ldt. */
418c2b3496bSPeter Zijlstra 	smp_store_release(&mm->context.ldt, ldt);
419c2b3496bSPeter Zijlstra 
420c2b3496bSPeter Zijlstra 	/* Activate the LDT for all CPUs using currents mm. */
421c2b3496bSPeter Zijlstra 	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
422c2b3496bSPeter Zijlstra 
423c2b3496bSPeter Zijlstra 	mutex_unlock(&mm->context.lock);
42437868fe1SAndy Lutomirski }
42537868fe1SAndy Lutomirski 
42637868fe1SAndy Lutomirski static void free_ldt_struct(struct ldt_struct *ldt)
42737868fe1SAndy Lutomirski {
42837868fe1SAndy Lutomirski 	if (likely(!ldt))
42937868fe1SAndy Lutomirski 		return;
43037868fe1SAndy Lutomirski 
431bbf79d21SBorislav Petkov 	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
432bbf79d21SBorislav Petkov 	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
4338d5341a6SAndrey Ryabinin 		vfree_atomic(ldt->entries);
43437868fe1SAndy Lutomirski 	else
435f454b478SJan Beulich 		free_page((unsigned long)ldt->entries);
43637868fe1SAndy Lutomirski 	kfree(ldt);
43777e463d1SThomas Gleixner }
43877e463d1SThomas Gleixner 
43977e463d1SThomas Gleixner /*
440a4828f81SThomas Gleixner  * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
441a4828f81SThomas Gleixner  * the new task is not running, so nothing can be installed.
44277e463d1SThomas Gleixner  */
443a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
44477e463d1SThomas Gleixner {
44537868fe1SAndy Lutomirski 	struct ldt_struct *new_ldt;
44677e463d1SThomas Gleixner 	int retval = 0;
44777e463d1SThomas Gleixner 
448a4828f81SThomas Gleixner 	if (!old_mm)
44937868fe1SAndy Lutomirski 		return 0;
45037868fe1SAndy Lutomirski 
45137868fe1SAndy Lutomirski 	mutex_lock(&old_mm->context.lock);
452a4828f81SThomas Gleixner 	if (!old_mm->context.ldt)
45337868fe1SAndy Lutomirski 		goto out_unlock;
45437868fe1SAndy Lutomirski 
455bbf79d21SBorislav Petkov 	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
45637868fe1SAndy Lutomirski 	if (!new_ldt) {
45737868fe1SAndy Lutomirski 		retval = -ENOMEM;
45837868fe1SAndy Lutomirski 		goto out_unlock;
45937868fe1SAndy Lutomirski 	}
46037868fe1SAndy Lutomirski 
46137868fe1SAndy Lutomirski 	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
462bbf79d21SBorislav Petkov 	       new_ldt->nr_entries * LDT_ENTRY_SIZE);
46337868fe1SAndy Lutomirski 	finalize_ldt_struct(new_ldt);
46437868fe1SAndy Lutomirski 
465f55f0501SAndy Lutomirski 	retval = map_ldt_struct(mm, new_ldt, 0);
466f55f0501SAndy Lutomirski 	if (retval) {
467f55f0501SAndy Lutomirski 		free_ldt_pgtables(mm);
468f55f0501SAndy Lutomirski 		free_ldt_struct(new_ldt);
469f55f0501SAndy Lutomirski 		goto out_unlock;
470f55f0501SAndy Lutomirski 	}
47137868fe1SAndy Lutomirski 	mm->context.ldt = new_ldt;
47237868fe1SAndy Lutomirski 
47337868fe1SAndy Lutomirski out_unlock:
47437868fe1SAndy Lutomirski 	mutex_unlock(&old_mm->context.lock);
47577e463d1SThomas Gleixner 	return retval;
47677e463d1SThomas Gleixner }
47777e463d1SThomas Gleixner 
47877e463d1SThomas Gleixner /*
47977e463d1SThomas Gleixner  * No need to lock the MM as we are the last user
48077e463d1SThomas Gleixner  *
48177e463d1SThomas Gleixner  * 64bit: Don't touch the LDT register - we're already in the next thread.
48277e463d1SThomas Gleixner  */
48339a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm)
48477e463d1SThomas Gleixner {
48537868fe1SAndy Lutomirski 	free_ldt_struct(mm->context.ldt);
48637868fe1SAndy Lutomirski 	mm->context.ldt = NULL;
48777e463d1SThomas Gleixner }
48877e463d1SThomas Gleixner 
489f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm)
490f55f0501SAndy Lutomirski {
491f55f0501SAndy Lutomirski 	free_ldt_pgtables(mm);
492f55f0501SAndy Lutomirski }
493f55f0501SAndy Lutomirski 
49477e463d1SThomas Gleixner static int read_ldt(void __user *ptr, unsigned long bytecount)
49577e463d1SThomas Gleixner {
49677e463d1SThomas Gleixner 	struct mm_struct *mm = current->mm;
497bbf79d21SBorislav Petkov 	unsigned long entries_size;
498bbf79d21SBorislav Petkov 	int retval;
49977e463d1SThomas Gleixner 
500c2b3496bSPeter Zijlstra 	down_read(&mm->context.ldt_usr_sem);
50137868fe1SAndy Lutomirski 
50237868fe1SAndy Lutomirski 	if (!mm->context.ldt) {
50337868fe1SAndy Lutomirski 		retval = 0;
50437868fe1SAndy Lutomirski 		goto out_unlock;
50537868fe1SAndy Lutomirski 	}
50637868fe1SAndy Lutomirski 
50777e463d1SThomas Gleixner 	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
50877e463d1SThomas Gleixner 		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
50977e463d1SThomas Gleixner 
510bbf79d21SBorislav Petkov 	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
511bbf79d21SBorislav Petkov 	if (entries_size > bytecount)
512bbf79d21SBorislav Petkov 		entries_size = bytecount;
51377e463d1SThomas Gleixner 
514bbf79d21SBorislav Petkov 	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
51537868fe1SAndy Lutomirski 		retval = -EFAULT;
51637868fe1SAndy Lutomirski 		goto out_unlock;
51737868fe1SAndy Lutomirski 	}
51837868fe1SAndy Lutomirski 
519bbf79d21SBorislav Petkov 	if (entries_size != bytecount) {
52037868fe1SAndy Lutomirski 		/* Zero-fill the rest and pretend we read bytecount bytes. */
521bbf79d21SBorislav Petkov 		if (clear_user(ptr + entries_size, bytecount - entries_size)) {
52237868fe1SAndy Lutomirski 			retval = -EFAULT;
52337868fe1SAndy Lutomirski 			goto out_unlock;
52477e463d1SThomas Gleixner 		}
52577e463d1SThomas Gleixner 	}
52637868fe1SAndy Lutomirski 	retval = bytecount;
52737868fe1SAndy Lutomirski 
52837868fe1SAndy Lutomirski out_unlock:
529c2b3496bSPeter Zijlstra 	up_read(&mm->context.ldt_usr_sem);
53037868fe1SAndy Lutomirski 	return retval;
53177e463d1SThomas Gleixner }
53277e463d1SThomas Gleixner 
53377e463d1SThomas Gleixner static int read_default_ldt(void __user *ptr, unsigned long bytecount)
53477e463d1SThomas Gleixner {
53577e463d1SThomas Gleixner 	/* CHECKME: Can we use _one_ random number ? */
53677e463d1SThomas Gleixner #ifdef CONFIG_X86_32
53777e463d1SThomas Gleixner 	unsigned long size = 5 * sizeof(struct desc_struct);
53877e463d1SThomas Gleixner #else
53977e463d1SThomas Gleixner 	unsigned long size = 128;
54077e463d1SThomas Gleixner #endif
54177e463d1SThomas Gleixner 	if (bytecount > size)
54277e463d1SThomas Gleixner 		bytecount = size;
54377e463d1SThomas Gleixner 	if (clear_user(ptr, bytecount))
54477e463d1SThomas Gleixner 		return -EFAULT;
54577e463d1SThomas Gleixner 	return bytecount;
54677e463d1SThomas Gleixner }
54777e463d1SThomas Gleixner 
548cc801833SAndy Lutomirski static bool allow_16bit_segments(void)
549cc801833SAndy Lutomirski {
550cc801833SAndy Lutomirski 	if (!IS_ENABLED(CONFIG_X86_16BIT))
551cc801833SAndy Lutomirski 		return false;
552cc801833SAndy Lutomirski 
553cc801833SAndy Lutomirski #ifdef CONFIG_XEN_PV
554cc801833SAndy Lutomirski 	/*
555cc801833SAndy Lutomirski 	 * Xen PV does not implement ESPFIX64, which means that 16-bit
556cc801833SAndy Lutomirski 	 * segments will not work correctly.  Until either Xen PV implements
557cc801833SAndy Lutomirski 	 * ESPFIX64 and can signal this fact to the guest or unless someone
558cc801833SAndy Lutomirski 	 * provides compelling evidence that allowing broken 16-bit segments
559cc801833SAndy Lutomirski 	 * is worthwhile, disallow 16-bit segments under Xen PV.
560cc801833SAndy Lutomirski 	 */
561cc801833SAndy Lutomirski 	if (xen_pv_domain()) {
562bb5a93aaSLinus Torvalds 		pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
563cc801833SAndy Lutomirski 		return false;
564cc801833SAndy Lutomirski 	}
565cc801833SAndy Lutomirski #endif
566cc801833SAndy Lutomirski 
567cc801833SAndy Lutomirski 	return true;
568cc801833SAndy Lutomirski }
569cc801833SAndy Lutomirski 
57077e463d1SThomas Gleixner static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
57177e463d1SThomas Gleixner {
57277e463d1SThomas Gleixner 	struct mm_struct *mm = current->mm;
573990e9dc3SThomas Gleixner 	struct ldt_struct *new_ldt, *old_ldt;
574bbf79d21SBorislav Petkov 	unsigned int old_nr_entries, new_nr_entries;
575990e9dc3SThomas Gleixner 	struct user_desc ldt_info;
5765af72502SGlauber de Oliveira Costa 	struct desc_struct ldt;
57777e463d1SThomas Gleixner 	int error;
57877e463d1SThomas Gleixner 
57977e463d1SThomas Gleixner 	error = -EINVAL;
58077e463d1SThomas Gleixner 	if (bytecount != sizeof(ldt_info))
58177e463d1SThomas Gleixner 		goto out;
58277e463d1SThomas Gleixner 	error = -EFAULT;
58377e463d1SThomas Gleixner 	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
58477e463d1SThomas Gleixner 		goto out;
58577e463d1SThomas Gleixner 
58677e463d1SThomas Gleixner 	error = -EINVAL;
58777e463d1SThomas Gleixner 	if (ldt_info.entry_number >= LDT_ENTRIES)
58877e463d1SThomas Gleixner 		goto out;
58977e463d1SThomas Gleixner 	if (ldt_info.contents == 3) {
59077e463d1SThomas Gleixner 		if (oldmode)
59177e463d1SThomas Gleixner 			goto out;
59277e463d1SThomas Gleixner 		if (ldt_info.seg_not_present == 0)
59377e463d1SThomas Gleixner 			goto out;
59477e463d1SThomas Gleixner 	}
59577e463d1SThomas Gleixner 
59637868fe1SAndy Lutomirski 	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
59737868fe1SAndy Lutomirski 	    LDT_empty(&ldt_info)) {
59837868fe1SAndy Lutomirski 		/* The user wants to clear the entry. */
5995af72502SGlauber de Oliveira Costa 		memset(&ldt, 0, sizeof(ldt));
60037868fe1SAndy Lutomirski 	} else {
601cc801833SAndy Lutomirski 		if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
60234273f41SH. Peter Anvin 			error = -EINVAL;
60337868fe1SAndy Lutomirski 			goto out;
60434273f41SH. Peter Anvin 		}
60534273f41SH. Peter Anvin 
60680fbb69aSGlauber de Oliveira Costa 		fill_ldt(&ldt, &ldt_info);
60777e463d1SThomas Gleixner 		if (oldmode)
6085af72502SGlauber de Oliveira Costa 			ldt.avl = 0;
60937868fe1SAndy Lutomirski 	}
61077e463d1SThomas Gleixner 
611c2b3496bSPeter Zijlstra 	if (down_write_killable(&mm->context.ldt_usr_sem))
612c2b3496bSPeter Zijlstra 		return -EINTR;
61337868fe1SAndy Lutomirski 
61437868fe1SAndy Lutomirski 	old_ldt       = mm->context.ldt;
615bbf79d21SBorislav Petkov 	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
616bbf79d21SBorislav Petkov 	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
61737868fe1SAndy Lutomirski 
61837868fe1SAndy Lutomirski 	error = -ENOMEM;
619bbf79d21SBorislav Petkov 	new_ldt = alloc_ldt_struct(new_nr_entries);
62037868fe1SAndy Lutomirski 	if (!new_ldt)
62137868fe1SAndy Lutomirski 		goto out_unlock;
62237868fe1SAndy Lutomirski 
62337868fe1SAndy Lutomirski 	if (old_ldt)
624bbf79d21SBorislav Petkov 		memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
625bbf79d21SBorislav Petkov 
62637868fe1SAndy Lutomirski 	new_ldt->entries[ldt_info.entry_number] = ldt;
62737868fe1SAndy Lutomirski 	finalize_ldt_struct(new_ldt);
62837868fe1SAndy Lutomirski 
629f55f0501SAndy Lutomirski 	/*
630f55f0501SAndy Lutomirski 	 * If we are using PTI, map the new LDT into the userspace pagetables.
631f55f0501SAndy Lutomirski 	 * If there is already an LDT, use the other slot so that other CPUs
632f55f0501SAndy Lutomirski 	 * will continue to use the old LDT until install_ldt() switches
633f55f0501SAndy Lutomirski 	 * them over to the new LDT.
634f55f0501SAndy Lutomirski 	 */
635f55f0501SAndy Lutomirski 	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
636f55f0501SAndy Lutomirski 	if (error) {
637a62d6985SThomas Gleixner 		/*
638a62d6985SThomas Gleixner 		 * This only can fail for the first LDT setup. If an LDT is
639a62d6985SThomas Gleixner 		 * already installed then the PTE page is already
640a62d6985SThomas Gleixner 		 * populated. Mop up a half populated page table.
641a62d6985SThomas Gleixner 		 */
6427f414195SThomas Gleixner 		if (!WARN_ON_ONCE(old_ldt))
643a62d6985SThomas Gleixner 			free_ldt_pgtables(mm);
644a62d6985SThomas Gleixner 		free_ldt_struct(new_ldt);
645f55f0501SAndy Lutomirski 		goto out_unlock;
646f55f0501SAndy Lutomirski 	}
647f55f0501SAndy Lutomirski 
64837868fe1SAndy Lutomirski 	install_ldt(mm, new_ldt);
649a0e6e083SKirill A. Shutemov 	unmap_ldt_struct(mm, old_ldt);
65037868fe1SAndy Lutomirski 	free_ldt_struct(old_ldt);
65177e463d1SThomas Gleixner 	error = 0;
65277e463d1SThomas Gleixner 
65377e463d1SThomas Gleixner out_unlock:
654c2b3496bSPeter Zijlstra 	up_write(&mm->context.ldt_usr_sem);
65577e463d1SThomas Gleixner out:
65677e463d1SThomas Gleixner 	return error;
65777e463d1SThomas Gleixner }
65877e463d1SThomas Gleixner 
659da20ab35SDave Hansen SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
660da20ab35SDave Hansen 		unsigned long , bytecount)
66177e463d1SThomas Gleixner {
66277e463d1SThomas Gleixner 	int ret = -ENOSYS;
66377e463d1SThomas Gleixner 
66477e463d1SThomas Gleixner 	switch (func) {
66577e463d1SThomas Gleixner 	case 0:
66677e463d1SThomas Gleixner 		ret = read_ldt(ptr, bytecount);
66777e463d1SThomas Gleixner 		break;
66877e463d1SThomas Gleixner 	case 1:
66977e463d1SThomas Gleixner 		ret = write_ldt(ptr, bytecount, 1);
67077e463d1SThomas Gleixner 		break;
67177e463d1SThomas Gleixner 	case 2:
67277e463d1SThomas Gleixner 		ret = read_default_ldt(ptr, bytecount);
67377e463d1SThomas Gleixner 		break;
67477e463d1SThomas Gleixner 	case 0x11:
67577e463d1SThomas Gleixner 		ret = write_ldt(ptr, bytecount, 0);
67677e463d1SThomas Gleixner 		break;
67777e463d1SThomas Gleixner 	}
678da20ab35SDave Hansen 	/*
679da20ab35SDave Hansen 	 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
680da20ab35SDave Hansen 	 * return type, but tht ABI for sys_modify_ldt() expects
681da20ab35SDave Hansen 	 * 'int'.  This cast gives us an int-sized value in %rax
682da20ab35SDave Hansen 	 * for the return code.  The 'unsigned' is necessary so
683da20ab35SDave Hansen 	 * the compiler does not try to sign-extend the negative
684da20ab35SDave Hansen 	 * return codes into the high half of the register when
685da20ab35SDave Hansen 	 * taking the value from int->long.
686da20ab35SDave Hansen 	 */
687da20ab35SDave Hansen 	return (unsigned int)ret;
68877e463d1SThomas Gleixner }
689