xref: /openbmc/linux/arch/x86/kernel/fpu/core.c (revision fbb6b31a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  Copyright (C) 1994 Linus Torvalds
4  *
5  *  Pentium III FXSR, SSE support
6  *  General FPU state handling cleanups
7  *	Gareth Hughes <gareth@valinux.com>, May 2000
8  */
9 #include <asm/fpu/api.h>
10 #include <asm/fpu/regset.h>
11 #include <asm/fpu/sched.h>
12 #include <asm/fpu/signal.h>
13 #include <asm/fpu/types.h>
14 #include <asm/traps.h>
15 #include <asm/irq_regs.h>
16 
17 #include <linux/hardirq.h>
18 #include <linux/pkeys.h>
19 #include <linux/vmalloc.h>
20 
21 #include "context.h"
22 #include "internal.h"
23 #include "legacy.h"
24 #include "xstate.h"
25 
26 #define CREATE_TRACE_POINTS
27 #include <asm/trace/fpu.h>
28 
29 #ifdef CONFIG_X86_64
30 DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
31 DEFINE_PER_CPU(u64, xfd_state);
32 #endif
33 
34 /* The FPU state configuration data for kernel and user space */
35 struct fpu_state_config	fpu_kernel_cfg __ro_after_init;
36 struct fpu_state_config fpu_user_cfg __ro_after_init;
37 
38 /*
39  * Represents the initial FPU state. It's mostly (but not completely) zeroes,
40  * depending on the FPU hardware format:
41  */
42 struct fpstate init_fpstate __ro_after_init;
43 
44 /* Track in-kernel FPU usage */
45 static DEFINE_PER_CPU(bool, in_kernel_fpu);
46 
47 /*
48  * Track which context is using the FPU on the CPU:
49  */
50 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
51 
52 /*
53  * Can we use the FPU in kernel mode with the
54  * whole "kernel_fpu_begin/end()" sequence?
55  */
56 bool irq_fpu_usable(void)
57 {
58 	if (WARN_ON_ONCE(in_nmi()))
59 		return false;
60 
61 	/* In kernel FPU usage already active? */
62 	if (this_cpu_read(in_kernel_fpu))
63 		return false;
64 
65 	/*
66 	 * When not in NMI or hard interrupt context, FPU can be used in:
67 	 *
68 	 * - Task context except from within fpregs_lock()'ed critical
69 	 *   regions.
70 	 *
71 	 * - Soft interrupt processing context which cannot happen
72 	 *   while in a fpregs_lock()'ed critical region.
73 	 */
74 	if (!in_hardirq())
75 		return true;
76 
77 	/*
78 	 * In hard interrupt context it's safe when soft interrupts
79 	 * are enabled, which means the interrupt did not hit in
80 	 * a fpregs_lock()'ed critical region.
81 	 */
82 	return !softirq_count();
83 }
84 EXPORT_SYMBOL(irq_fpu_usable);
85 
86 /*
87  * Track AVX512 state use because it is known to slow the max clock
88  * speed of the core.
89  */
90 static void update_avx_timestamp(struct fpu *fpu)
91 {
92 
93 #define AVX512_TRACKING_MASK	(XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
94 
95 	if (fpu->fpstate->regs.xsave.header.xfeatures & AVX512_TRACKING_MASK)
96 		fpu->avx512_timestamp = jiffies;
97 }
98 
99 /*
100  * Save the FPU register state in fpu->fpstate->regs. The register state is
101  * preserved.
102  *
103  * Must be called with fpregs_lock() held.
104  *
105  * The legacy FNSAVE instruction clears all FPU state unconditionally, so
106  * register state has to be reloaded. That might be a pointless exercise
107  * when the FPU is going to be used by another task right after that. But
108  * this only affects 20+ years old 32bit systems and avoids conditionals all
109  * over the place.
110  *
111  * FXSAVE and all XSAVE variants preserve the FPU register state.
112  */
113 void save_fpregs_to_fpstate(struct fpu *fpu)
114 {
115 	if (likely(use_xsave())) {
116 		os_xsave(fpu->fpstate);
117 		update_avx_timestamp(fpu);
118 		return;
119 	}
120 
121 	if (likely(use_fxsr())) {
122 		fxsave(&fpu->fpstate->regs.fxsave);
123 		return;
124 	}
125 
126 	/*
127 	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
128 	 * so we have to reload them from the memory state.
129 	 */
130 	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
131 	frstor(&fpu->fpstate->regs.fsave);
132 }
133 
134 void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
135 {
136 	/*
137 	 * AMD K7/K8 and later CPUs up to Zen don't save/restore
138 	 * FDP/FIP/FOP unless an exception is pending. Clear the x87 state
139 	 * here by setting it to fixed values.  "m" is a random variable
140 	 * that should be in L1.
141 	 */
142 	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
143 		asm volatile(
144 			"fnclex\n\t"
145 			"emms\n\t"
146 			"fildl %P[addr]"	/* set F?P to defined value */
147 			: : [addr] "m" (fpstate));
148 	}
149 
150 	if (use_xsave()) {
151 		/*
152 		 * Dynamically enabled features are enabled in XCR0, but
153 		 * usage requires also that the corresponding bits in XFD
154 		 * are cleared.  If the bits are set then using a related
155 		 * instruction will raise #NM. This allows to do the
156 		 * allocation of the larger FPU buffer lazy from #NM or if
157 		 * the task has no permission to kill it which would happen
158 		 * via #UD if the feature is disabled in XCR0.
159 		 *
160 		 * XFD state is following the same life time rules as
161 		 * XSTATE and to restore state correctly XFD has to be
162 		 * updated before XRSTORS otherwise the component would
163 		 * stay in or go into init state even if the bits are set
164 		 * in fpstate::regs::xsave::xfeatures.
165 		 */
166 		xfd_update_state(fpstate);
167 
168 		/*
169 		 * Restoring state always needs to modify all features
170 		 * which are in @mask even if the current task cannot use
171 		 * extended features.
172 		 *
173 		 * So fpstate->xfeatures cannot be used here, because then
174 		 * a feature for which the task has no permission but was
175 		 * used by the previous task would not go into init state.
176 		 */
177 		mask = fpu_kernel_cfg.max_features & mask;
178 
179 		os_xrstor(fpstate, mask);
180 	} else {
181 		if (use_fxsr())
182 			fxrstor(&fpstate->regs.fxsave);
183 		else
184 			frstor(&fpstate->regs.fsave);
185 	}
186 }
187 
188 void fpu_reset_from_exception_fixup(void)
189 {
190 	restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
191 }
192 
193 #if IS_ENABLED(CONFIG_KVM)
194 static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
195 
196 static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
197 {
198 	struct fpu_state_perm *fpuperm;
199 	u64 perm;
200 
201 	if (!IS_ENABLED(CONFIG_X86_64))
202 		return;
203 
204 	spin_lock_irq(&current->sighand->siglock);
205 	fpuperm = &current->group_leader->thread.fpu.guest_perm;
206 	perm = fpuperm->__state_perm;
207 
208 	/* First fpstate allocation locks down permissions. */
209 	WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
210 
211 	spin_unlock_irq(&current->sighand->siglock);
212 
213 	gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
214 }
215 
216 bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
217 {
218 	struct fpstate *fpstate;
219 	unsigned int size;
220 
221 	size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
222 	fpstate = vzalloc(size);
223 	if (!fpstate)
224 		return false;
225 
226 	/* Leave xfd to 0 (the reset value defined by spec) */
227 	__fpstate_reset(fpstate, 0);
228 	fpstate_init_user(fpstate);
229 	fpstate->is_valloc	= true;
230 	fpstate->is_guest	= true;
231 
232 	gfpu->fpstate		= fpstate;
233 	gfpu->xfeatures		= fpu_user_cfg.default_features;
234 	gfpu->perm		= fpu_user_cfg.default_features;
235 	gfpu->uabi_size		= fpu_user_cfg.default_size;
236 	fpu_init_guest_permissions(gfpu);
237 
238 	return true;
239 }
240 EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
241 
242 void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
243 {
244 	struct fpstate *fps = gfpu->fpstate;
245 
246 	if (!fps)
247 		return;
248 
249 	if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
250 		return;
251 
252 	gfpu->fpstate = NULL;
253 	vfree(fps);
254 }
255 EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
256 
257 /*
258   * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable
259   * @guest_fpu:         Pointer to the guest FPU container
260   * @xfeatures:         Features requested by guest CPUID
261   *
262   * Enable all dynamic xfeatures according to guest perm and requested CPUID.
263   *
264   * Return: 0 on success, error code otherwise
265   */
266 int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
267 {
268 	lockdep_assert_preemption_enabled();
269 
270 	/* Nothing to do if all requested features are already enabled. */
271 	xfeatures &= ~guest_fpu->xfeatures;
272 	if (!xfeatures)
273 		return 0;
274 
275 	return __xfd_enable_feature(xfeatures, guest_fpu);
276 }
277 EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);
278 
279 #ifdef CONFIG_X86_64
280 void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
281 {
282 	fpregs_lock();
283 	guest_fpu->fpstate->xfd = xfd;
284 	if (guest_fpu->fpstate->in_use)
285 		xfd_update_state(guest_fpu->fpstate);
286 	fpregs_unlock();
287 }
288 EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
289 
290 /**
291  * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
292  *
293  * Must be invoked from KVM after a VMEXIT before enabling interrupts when
294  * XFD write emulation is disabled. This is required because the guest can
295  * freely modify XFD and the state at VMEXIT is not guaranteed to be the
296  * same as the state on VMENTER. So software state has to be udpated before
297  * any operation which depends on it can take place.
298  *
299  * Note: It can be invoked unconditionally even when write emulation is
300  * enabled for the price of a then pointless MSR read.
301  */
302 void fpu_sync_guest_vmexit_xfd_state(void)
303 {
304 	struct fpstate *fps = current->thread.fpu.fpstate;
305 
306 	lockdep_assert_irqs_disabled();
307 	if (fpu_state_size_dynamic()) {
308 		rdmsrl(MSR_IA32_XFD, fps->xfd);
309 		__this_cpu_write(xfd_state, fps->xfd);
310 	}
311 }
312 EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
313 #endif /* CONFIG_X86_64 */
314 
315 int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
316 {
317 	struct fpstate *guest_fps = guest_fpu->fpstate;
318 	struct fpu *fpu = &current->thread.fpu;
319 	struct fpstate *cur_fps = fpu->fpstate;
320 
321 	fpregs_lock();
322 	if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
323 		save_fpregs_to_fpstate(fpu);
324 
325 	/* Swap fpstate */
326 	if (enter_guest) {
327 		fpu->__task_fpstate = cur_fps;
328 		fpu->fpstate = guest_fps;
329 		guest_fps->in_use = true;
330 	} else {
331 		guest_fps->in_use = false;
332 		fpu->fpstate = fpu->__task_fpstate;
333 		fpu->__task_fpstate = NULL;
334 	}
335 
336 	cur_fps = fpu->fpstate;
337 
338 	if (!cur_fps->is_confidential) {
339 		/* Includes XFD update */
340 		restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
341 	} else {
342 		/*
343 		 * XSTATE is restored by firmware from encrypted
344 		 * memory. Make sure XFD state is correct while
345 		 * running with guest fpstate
346 		 */
347 		xfd_update_state(cur_fps);
348 	}
349 
350 	fpregs_mark_activate();
351 	fpregs_unlock();
352 	return 0;
353 }
354 EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
355 
356 void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
357 				    unsigned int size, u32 pkru)
358 {
359 	struct fpstate *kstate = gfpu->fpstate;
360 	union fpregs_state *ustate = buf;
361 	struct membuf mb = { .p = buf, .left = size };
362 
363 	if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
364 		__copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
365 	} else {
366 		memcpy(&ustate->fxsave, &kstate->regs.fxsave,
367 		       sizeof(ustate->fxsave));
368 		/* Make it restorable on a XSAVE enabled host */
369 		ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
370 	}
371 }
372 EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);
373 
374 int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
375 				   u64 xcr0, u32 *vpkru)
376 {
377 	struct fpstate *kstate = gfpu->fpstate;
378 	const union fpregs_state *ustate = buf;
379 	struct pkru_state *xpkru;
380 	int ret;
381 
382 	if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
383 		if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
384 			return -EINVAL;
385 		if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
386 			return -EINVAL;
387 		memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
388 		return 0;
389 	}
390 
391 	if (ustate->xsave.header.xfeatures & ~xcr0)
392 		return -EINVAL;
393 
394 	ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
395 	if (ret)
396 		return ret;
397 
398 	/* Retrieve PKRU if not in init state */
399 	if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
400 		xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
401 		*vpkru = xpkru->pkru;
402 	}
403 	return 0;
404 }
405 EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
406 #endif /* CONFIG_KVM */
407 
408 void kernel_fpu_begin_mask(unsigned int kfpu_mask)
409 {
410 	preempt_disable();
411 
412 	WARN_ON_FPU(!irq_fpu_usable());
413 	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
414 
415 	this_cpu_write(in_kernel_fpu, true);
416 
417 	if (!(current->flags & PF_KTHREAD) &&
418 	    !test_thread_flag(TIF_NEED_FPU_LOAD)) {
419 		set_thread_flag(TIF_NEED_FPU_LOAD);
420 		save_fpregs_to_fpstate(&current->thread.fpu);
421 	}
422 	__cpu_invalidate_fpregs_state();
423 
424 	/* Put sane initial values into the control registers. */
425 	if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
426 		ldmxcsr(MXCSR_DEFAULT);
427 
428 	if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
429 		asm volatile ("fninit");
430 }
431 EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
432 
433 void kernel_fpu_end(void)
434 {
435 	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
436 
437 	this_cpu_write(in_kernel_fpu, false);
438 	preempt_enable();
439 }
440 EXPORT_SYMBOL_GPL(kernel_fpu_end);
441 
442 /*
443  * Sync the FPU register state to current's memory register state when the
444  * current task owns the FPU. The hardware register state is preserved.
445  */
446 void fpu_sync_fpstate(struct fpu *fpu)
447 {
448 	WARN_ON_FPU(fpu != &current->thread.fpu);
449 
450 	fpregs_lock();
451 	trace_x86_fpu_before_save(fpu);
452 
453 	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
454 		save_fpregs_to_fpstate(fpu);
455 
456 	trace_x86_fpu_after_save(fpu);
457 	fpregs_unlock();
458 }
459 
460 static inline unsigned int init_fpstate_copy_size(void)
461 {
462 	if (!use_xsave())
463 		return fpu_kernel_cfg.default_size;
464 
465 	/* XSAVE(S) just needs the legacy and the xstate header part */
466 	return sizeof(init_fpstate.regs.xsave);
467 }
468 
469 static inline void fpstate_init_fxstate(struct fpstate *fpstate)
470 {
471 	fpstate->regs.fxsave.cwd = 0x37f;
472 	fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
473 }
474 
475 /*
476  * Legacy x87 fpstate state init:
477  */
478 static inline void fpstate_init_fstate(struct fpstate *fpstate)
479 {
480 	fpstate->regs.fsave.cwd = 0xffff037fu;
481 	fpstate->regs.fsave.swd = 0xffff0000u;
482 	fpstate->regs.fsave.twd = 0xffffffffu;
483 	fpstate->regs.fsave.fos = 0xffff0000u;
484 }
485 
486 /*
487  * Used in two places:
488  * 1) Early boot to setup init_fpstate for non XSAVE systems
489  * 2) fpu_init_fpstate_user() which is invoked from KVM
490  */
491 void fpstate_init_user(struct fpstate *fpstate)
492 {
493 	if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
494 		fpstate_init_soft(&fpstate->regs.soft);
495 		return;
496 	}
497 
498 	xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);
499 
500 	if (cpu_feature_enabled(X86_FEATURE_FXSR))
501 		fpstate_init_fxstate(fpstate);
502 	else
503 		fpstate_init_fstate(fpstate);
504 }
505 
506 static void __fpstate_reset(struct fpstate *fpstate, u64 xfd)
507 {
508 	/* Initialize sizes and feature masks */
509 	fpstate->size		= fpu_kernel_cfg.default_size;
510 	fpstate->user_size	= fpu_user_cfg.default_size;
511 	fpstate->xfeatures	= fpu_kernel_cfg.default_features;
512 	fpstate->user_xfeatures	= fpu_user_cfg.default_features;
513 	fpstate->xfd		= xfd;
514 }
515 
516 void fpstate_reset(struct fpu *fpu)
517 {
518 	/* Set the fpstate pointer to the default fpstate */
519 	fpu->fpstate = &fpu->__fpstate;
520 	__fpstate_reset(fpu->fpstate, init_fpstate.xfd);
521 
522 	/* Initialize the permission related info in fpu */
523 	fpu->perm.__state_perm		= fpu_kernel_cfg.default_features;
524 	fpu->perm.__state_size		= fpu_kernel_cfg.default_size;
525 	fpu->perm.__user_state_size	= fpu_user_cfg.default_size;
526 	/* Same defaults for guests */
527 	fpu->guest_perm = fpu->perm;
528 }
529 
530 static inline void fpu_inherit_perms(struct fpu *dst_fpu)
531 {
532 	if (fpu_state_size_dynamic()) {
533 		struct fpu *src_fpu = &current->group_leader->thread.fpu;
534 
535 		spin_lock_irq(&current->sighand->siglock);
536 		/* Fork also inherits the permissions of the parent */
537 		dst_fpu->perm = src_fpu->perm;
538 		dst_fpu->guest_perm = src_fpu->guest_perm;
539 		spin_unlock_irq(&current->sighand->siglock);
540 	}
541 }
542 
543 /* Clone current's FPU state on fork */
544 int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
545 {
546 	struct fpu *src_fpu = &current->thread.fpu;
547 	struct fpu *dst_fpu = &dst->thread.fpu;
548 
549 	/* The new task's FPU state cannot be valid in the hardware. */
550 	dst_fpu->last_cpu = -1;
551 
552 	fpstate_reset(dst_fpu);
553 
554 	if (!cpu_feature_enabled(X86_FEATURE_FPU))
555 		return 0;
556 
557 	/*
558 	 * Enforce reload for user space tasks and prevent kernel threads
559 	 * from trying to save the FPU registers on context switch.
560 	 */
561 	set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
562 
563 	/*
564 	 * No FPU state inheritance for kernel threads and IO
565 	 * worker threads.
566 	 */
567 	if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
568 		/* Clear out the minimal state */
569 		memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
570 		       init_fpstate_copy_size());
571 		return 0;
572 	}
573 
574 	/*
575 	 * If a new feature is added, ensure all dynamic features are
576 	 * caller-saved from here!
577 	 */
578 	BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
579 
580 	/*
581 	 * Save the default portion of the current FPU state into the
582 	 * clone. Assume all dynamic features to be defined as caller-
583 	 * saved, which enables skipping both the expansion of fpstate
584 	 * and the copying of any dynamic state.
585 	 *
586 	 * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
587 	 * copying is not valid when current uses non-default states.
588 	 */
589 	fpregs_lock();
590 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
591 		fpregs_restore_userregs();
592 	save_fpregs_to_fpstate(dst_fpu);
593 	if (!(clone_flags & CLONE_THREAD))
594 		fpu_inherit_perms(dst_fpu);
595 	fpregs_unlock();
596 
597 	/*
598 	 * Children never inherit PASID state.
599 	 * Force it to have its init value:
600 	 */
601 	if (use_xsave())
602 		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
603 
604 	trace_x86_fpu_copy_src(src_fpu);
605 	trace_x86_fpu_copy_dst(dst_fpu);
606 
607 	return 0;
608 }
609 
610 /*
611  * Whitelist the FPU register state embedded into task_struct for hardened
612  * usercopy.
613  */
614 void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
615 {
616 	*offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
617 	*size = fpu_kernel_cfg.default_size;
618 }
619 
620 /*
621  * Drops current FPU state: deactivates the fpregs and
622  * the fpstate. NOTE: it still leaves previous contents
623  * in the fpregs in the eager-FPU case.
624  *
625  * This function can be used in cases where we know that
626  * a state-restore is coming: either an explicit one,
627  * or a reschedule.
628  */
629 void fpu__drop(struct fpu *fpu)
630 {
631 	preempt_disable();
632 
633 	if (fpu == &current->thread.fpu) {
634 		/* Ignore delayed exceptions from user space */
635 		asm volatile("1: fwait\n"
636 			     "2:\n"
637 			     _ASM_EXTABLE(1b, 2b));
638 		fpregs_deactivate(fpu);
639 	}
640 
641 	trace_x86_fpu_dropped(fpu);
642 
643 	preempt_enable();
644 }
645 
646 /*
647  * Clear FPU registers by setting them up from the init fpstate.
648  * Caller must do fpregs_[un]lock() around it.
649  */
650 static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
651 {
652 	if (use_xsave())
653 		os_xrstor(&init_fpstate, features_mask);
654 	else if (use_fxsr())
655 		fxrstor(&init_fpstate.regs.fxsave);
656 	else
657 		frstor(&init_fpstate.regs.fsave);
658 
659 	pkru_write_default();
660 }
661 
662 /*
663  * Reset current->fpu memory state to the init values.
664  */
665 static void fpu_reset_fpregs(void)
666 {
667 	struct fpu *fpu = &current->thread.fpu;
668 
669 	fpregs_lock();
670 	fpu__drop(fpu);
671 	/*
672 	 * This does not change the actual hardware registers. It just
673 	 * resets the memory image and sets TIF_NEED_FPU_LOAD so a
674 	 * subsequent return to usermode will reload the registers from the
675 	 * task's memory image.
676 	 *
677 	 * Do not use fpstate_init() here. Just copy init_fpstate which has
678 	 * the correct content already except for PKRU.
679 	 *
680 	 * PKRU handling does not rely on the xstate when restoring for
681 	 * user space as PKRU is eagerly written in switch_to() and
682 	 * flush_thread().
683 	 */
684 	memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
685 	set_thread_flag(TIF_NEED_FPU_LOAD);
686 	fpregs_unlock();
687 }
688 
689 /*
690  * Reset current's user FPU states to the init states.  current's
691  * supervisor states, if any, are not modified by this function.  The
692  * caller guarantees that the XSTATE header in memory is intact.
693  */
694 void fpu__clear_user_states(struct fpu *fpu)
695 {
696 	WARN_ON_FPU(fpu != &current->thread.fpu);
697 
698 	fpregs_lock();
699 	if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
700 		fpu_reset_fpregs();
701 		fpregs_unlock();
702 		return;
703 	}
704 
705 	/*
706 	 * Ensure that current's supervisor states are loaded into their
707 	 * corresponding registers.
708 	 */
709 	if (xfeatures_mask_supervisor() &&
710 	    !fpregs_state_valid(fpu, smp_processor_id()))
711 		os_xrstor_supervisor(fpu->fpstate);
712 
713 	/* Reset user states in registers. */
714 	restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
715 
716 	/*
717 	 * Now all FPU registers have their desired values.  Inform the FPU
718 	 * state machine that current's FPU registers are in the hardware
719 	 * registers. The memory image does not need to be updated because
720 	 * any operation relying on it has to save the registers first when
721 	 * current's FPU is marked active.
722 	 */
723 	fpregs_mark_activate();
724 	fpregs_unlock();
725 }
726 
727 void fpu_flush_thread(void)
728 {
729 	fpstate_reset(&current->thread.fpu);
730 	fpu_reset_fpregs();
731 }
732 /*
733  * Load FPU context before returning to userspace.
734  */
735 void switch_fpu_return(void)
736 {
737 	if (!static_cpu_has(X86_FEATURE_FPU))
738 		return;
739 
740 	fpregs_restore_userregs();
741 }
742 EXPORT_SYMBOL_GPL(switch_fpu_return);
743 
744 #ifdef CONFIG_X86_DEBUG_FPU
745 /*
746  * If current FPU state according to its tracking (loaded FPU context on this
747  * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
748  * loaded on return to userland.
749  */
750 void fpregs_assert_state_consistent(void)
751 {
752 	struct fpu *fpu = &current->thread.fpu;
753 
754 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
755 		return;
756 
757 	WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
758 }
759 EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
760 #endif
761 
762 void fpregs_mark_activate(void)
763 {
764 	struct fpu *fpu = &current->thread.fpu;
765 
766 	fpregs_activate(fpu);
767 	fpu->last_cpu = smp_processor_id();
768 	clear_thread_flag(TIF_NEED_FPU_LOAD);
769 }
770 
771 /*
772  * x87 math exception handling:
773  */
774 
775 int fpu__exception_code(struct fpu *fpu, int trap_nr)
776 {
777 	int err;
778 
779 	if (trap_nr == X86_TRAP_MF) {
780 		unsigned short cwd, swd;
781 		/*
782 		 * (~cwd & swd) will mask out exceptions that are not set to unmasked
783 		 * status.  0x3f is the exception bits in these regs, 0x200 is the
784 		 * C1 reg you need in case of a stack fault, 0x040 is the stack
785 		 * fault bit.  We should only be taking one exception at a time,
786 		 * so if this combination doesn't produce any single exception,
787 		 * then we have a bad program that isn't synchronizing its FPU usage
788 		 * and it will suffer the consequences since we won't be able to
789 		 * fully reproduce the context of the exception.
790 		 */
791 		if (boot_cpu_has(X86_FEATURE_FXSR)) {
792 			cwd = fpu->fpstate->regs.fxsave.cwd;
793 			swd = fpu->fpstate->regs.fxsave.swd;
794 		} else {
795 			cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
796 			swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
797 		}
798 
799 		err = swd & ~cwd;
800 	} else {
801 		/*
802 		 * The SIMD FPU exceptions are handled a little differently, as there
803 		 * is only a single status/control register.  Thus, to determine which
804 		 * unmasked exception was caught we must mask the exception mask bits
805 		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
806 		 */
807 		unsigned short mxcsr = MXCSR_DEFAULT;
808 
809 		if (boot_cpu_has(X86_FEATURE_XMM))
810 			mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
811 
812 		err = ~(mxcsr >> 7) & mxcsr;
813 	}
814 
815 	if (err & 0x001) {	/* Invalid op */
816 		/*
817 		 * swd & 0x240 == 0x040: Stack Underflow
818 		 * swd & 0x240 == 0x240: Stack Overflow
819 		 * User must clear the SF bit (0x40) if set
820 		 */
821 		return FPE_FLTINV;
822 	} else if (err & 0x004) { /* Divide by Zero */
823 		return FPE_FLTDIV;
824 	} else if (err & 0x008) { /* Overflow */
825 		return FPE_FLTOVF;
826 	} else if (err & 0x012) { /* Denormal, Underflow */
827 		return FPE_FLTUND;
828 	} else if (err & 0x020) { /* Precision */
829 		return FPE_FLTRES;
830 	}
831 
832 	/*
833 	 * If we're using IRQ 13, or supposedly even some trap
834 	 * X86_TRAP_MF implementations, it's possible
835 	 * we get a spurious trap, which is not an error.
836 	 */
837 	return 0;
838 }
839