xref: /openbmc/linux/arch/arm64/kernel/fpsimd.c (revision 24f68eb5bf14a74027946970a18bc902e19d986a)
1caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
253631b54SCatalin Marinas /*
353631b54SCatalin Marinas  * FP/SIMD context switching and fault handling
453631b54SCatalin Marinas  *
553631b54SCatalin Marinas  * Copyright (C) 2012 ARM Ltd.
653631b54SCatalin Marinas  * Author: Catalin Marinas <catalin.marinas@arm.com>
753631b54SCatalin Marinas  */
853631b54SCatalin Marinas 
97582e220SDave Martin #include <linux/bitmap.h>
10d06b76beSDave Martin #include <linux/bitops.h>
11cb84d11eSDave Martin #include <linux/bottom_half.h>
12bc0ee476SDave Martin #include <linux/bug.h>
137582e220SDave Martin #include <linux/cache.h>
14bc0ee476SDave Martin #include <linux/compat.h>
151e570f51SDave Martin #include <linux/compiler.h>
1632365e64SJanet Liu #include <linux/cpu.h>
17fb1ab1abSLorenzo Pieralisi #include <linux/cpu_pm.h>
1897bcbee4SMark Brown #include <linux/ctype.h>
1953631b54SCatalin Marinas #include <linux/kernel.h>
2094ef7ecbSDave Martin #include <linux/linkage.h>
21bc0ee476SDave Martin #include <linux/irqflags.h>
2253631b54SCatalin Marinas #include <linux/init.h>
23cb84d11eSDave Martin #include <linux/percpu.h>
242d2123bcSDave Martin #include <linux/prctl.h>
254328825dSDave Martin #include <linux/preempt.h>
26bc0ee476SDave Martin #include <linux/ptrace.h>
273f07c014SIngo Molnar #include <linux/sched/signal.h>
28bc0ee476SDave Martin #include <linux/sched/task_stack.h>
2953631b54SCatalin Marinas #include <linux/signal.h>
30bc0ee476SDave Martin #include <linux/slab.h>
3131dc52b3SDave Martin #include <linux/stddef.h>
324ffa09a9SDave Martin #include <linux/sysctl.h>
3341040cf7SDave Martin #include <linux/swab.h>
3453631b54SCatalin Marinas 
35af4a81b9SDave Martin #include <asm/esr.h>
36c6b90d5cSTian Tao #include <asm/exception.h>
3753631b54SCatalin Marinas #include <asm/fpsimd.h>
38c0cda3b8SDave Martin #include <asm/cpufeature.h>
3953631b54SCatalin Marinas #include <asm/cputype.h>
40c6b90d5cSTian Tao #include <asm/neon.h>
412cf97d46SDave Martin #include <asm/processor.h>
424328825dSDave Martin #include <asm/simd.h>
43bc0ee476SDave Martin #include <asm/sigcontext.h>
44bc0ee476SDave Martin #include <asm/sysreg.h>
45bc0ee476SDave Martin #include <asm/traps.h>
46d06b76beSDave Martin #include <asm/virt.h>
4753631b54SCatalin Marinas 
4853631b54SCatalin Marinas #define FPEXC_IOF	(1 << 0)
4953631b54SCatalin Marinas #define FPEXC_DZF	(1 << 1)
5053631b54SCatalin Marinas #define FPEXC_OFF	(1 << 2)
5153631b54SCatalin Marinas #define FPEXC_UFF	(1 << 3)
5253631b54SCatalin Marinas #define FPEXC_IXF	(1 << 4)
5353631b54SCatalin Marinas #define FPEXC_IDF	(1 << 7)
5453631b54SCatalin Marinas 
5553631b54SCatalin Marinas /*
56bc0ee476SDave Martin  * (Note: in this discussion, statements about FPSIMD apply equally to SVE.)
57bc0ee476SDave Martin  *
58005f78cdSArd Biesheuvel  * In order to reduce the number of times the FPSIMD state is needlessly saved
59005f78cdSArd Biesheuvel  * and restored, we need to keep track of two things:
60005f78cdSArd Biesheuvel  * (a) for each task, we need to remember which CPU was the last one to have
61005f78cdSArd Biesheuvel  *     the task's FPSIMD state loaded into its FPSIMD registers;
62005f78cdSArd Biesheuvel  * (b) for each CPU, we need to remember which task's userland FPSIMD state has
63005f78cdSArd Biesheuvel  *     been loaded into its FPSIMD registers most recently, or whether it has
64005f78cdSArd Biesheuvel  *     been used to perform kernel mode NEON in the meantime.
65005f78cdSArd Biesheuvel  *
6620b85472SDave Martin  * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to
67005f78cdSArd Biesheuvel  * the id of the current CPU every time the state is loaded onto a CPU. For (b),
68005f78cdSArd Biesheuvel  * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
69005f78cdSArd Biesheuvel  * address of the userland FPSIMD state of the task that was loaded onto the CPU
70005f78cdSArd Biesheuvel  * the most recently, or NULL if kernel mode NEON has been performed after that.
71005f78cdSArd Biesheuvel  *
72005f78cdSArd Biesheuvel  * With this in place, we no longer have to restore the next FPSIMD state right
73005f78cdSArd Biesheuvel  * when switching between tasks. Instead, we can defer this check to userland
74005f78cdSArd Biesheuvel  * resume, at which time we verify whether the CPU's fpsimd_last_state and the
7520b85472SDave Martin  * task's fpsimd_cpu are still mutually in sync. If this is the case, we
76005f78cdSArd Biesheuvel  * can omit the FPSIMD restore.
77005f78cdSArd Biesheuvel  *
78005f78cdSArd Biesheuvel  * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
79005f78cdSArd Biesheuvel  * indicate whether or not the userland FPSIMD state of the current task is
80005f78cdSArd Biesheuvel  * present in the registers. The flag is set unless the FPSIMD registers of this
81005f78cdSArd Biesheuvel  * CPU currently contain the most recent userland FPSIMD state of the current
8231aa126dSMarc Zyngier  * task. If the task is behaving as a VMM, then this is will be managed by
8331aa126dSMarc Zyngier  * KVM which will clear it to indicate that the vcpu FPSIMD state is currently
8431aa126dSMarc Zyngier  * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
8531aa126dSMarc Zyngier  * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
8631aa126dSMarc Zyngier  * flag the register state as invalid.
87005f78cdSArd Biesheuvel  *
88cb84d11eSDave Martin  * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
89cb84d11eSDave Martin  * save the task's FPSIMD context back to task_struct from softirq context.
90cb84d11eSDave Martin  * To prevent this from racing with the manipulation of the task's FPSIMD state
91cb84d11eSDave Martin  * from task context and thereby corrupting the state, it is necessary to
92cb84d11eSDave Martin  * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
936dcdefcdSJulien Grall  * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
946dcdefcdSJulien Grall  * run but prevent them to use FPSIMD.
95cb84d11eSDave Martin  *
96005f78cdSArd Biesheuvel  * For a certain task, the sequence may look something like this:
9720b85472SDave Martin  * - the task gets scheduled in; if both the task's fpsimd_cpu field
98005f78cdSArd Biesheuvel  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
99005f78cdSArd Biesheuvel  *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
100005f78cdSArd Biesheuvel  *   cleared, otherwise it is set;
101005f78cdSArd Biesheuvel  *
102005f78cdSArd Biesheuvel  * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
103005f78cdSArd Biesheuvel  *   userland FPSIMD state is copied from memory to the registers, the task's
10420b85472SDave Martin  *   fpsimd_cpu field is set to the id of the current CPU, the current
105005f78cdSArd Biesheuvel  *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
106005f78cdSArd Biesheuvel  *   TIF_FOREIGN_FPSTATE flag is cleared;
107005f78cdSArd Biesheuvel  *
108005f78cdSArd Biesheuvel  * - the task executes an ordinary syscall; upon return to userland, the
109005f78cdSArd Biesheuvel  *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
110005f78cdSArd Biesheuvel  *   restored;
111005f78cdSArd Biesheuvel  *
112005f78cdSArd Biesheuvel  * - the task executes a syscall which executes some NEON instructions; this is
113005f78cdSArd Biesheuvel  *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
114005f78cdSArd Biesheuvel  *   register contents to memory, clears the fpsimd_last_state per-cpu variable
115005f78cdSArd Biesheuvel  *   and sets the TIF_FOREIGN_FPSTATE flag;
116005f78cdSArd Biesheuvel  *
117005f78cdSArd Biesheuvel  * - the task gets preempted after kernel_neon_end() is called; as we have not
118005f78cdSArd Biesheuvel  *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
119005f78cdSArd Biesheuvel  *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
120005f78cdSArd Biesheuvel  */
121cb968afcSDave Martin 
1221192b93bSMark Brown static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
123005f78cdSArd Biesheuvel 
124b5bc00ffSMark Brown __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
125b5bc00ffSMark Brown #ifdef CONFIG_ARM64_SVE
126b5bc00ffSMark Brown 	[ARM64_VEC_SVE] = {
127b5bc00ffSMark Brown 		.type			= ARM64_VEC_SVE,
128b5bc00ffSMark Brown 		.name			= "SVE",
129b5bc00ffSMark Brown 		.min_vl			= SVE_VL_MIN,
130b5bc00ffSMark Brown 		.max_vl			= SVE_VL_MIN,
131b5bc00ffSMark Brown 		.max_virtualisable_vl	= SVE_VL_MIN,
132b5bc00ffSMark Brown 	},
133b5bc00ffSMark Brown #endif
134b42990d3SMark Brown #ifdef CONFIG_ARM64_SME
135b42990d3SMark Brown 	[ARM64_VEC_SME] = {
136b42990d3SMark Brown 		.type			= ARM64_VEC_SME,
137b42990d3SMark Brown 		.name			= "SME",
138b42990d3SMark Brown 	},
139b42990d3SMark Brown #endif
140b5bc00ffSMark Brown };
141b5bc00ffSMark Brown 
vec_vl_inherit_flag(enum vec_type type)1425838a155SMark Brown static unsigned int vec_vl_inherit_flag(enum vec_type type)
1435838a155SMark Brown {
1445838a155SMark Brown 	switch (type) {
1455838a155SMark Brown 	case ARM64_VEC_SVE:
1465838a155SMark Brown 		return TIF_SVE_VL_INHERIT;
1479e4ab6c8SMark Brown 	case ARM64_VEC_SME:
1489e4ab6c8SMark Brown 		return TIF_SME_VL_INHERIT;
1495838a155SMark Brown 	default:
1505838a155SMark Brown 		WARN_ON_ONCE(1);
1515838a155SMark Brown 		return 0;
1525838a155SMark Brown 	}
1535838a155SMark Brown }
1545838a155SMark Brown 
155b5bc00ffSMark Brown struct vl_config {
156b5bc00ffSMark Brown 	int __default_vl;		/* Default VL for tasks */
157b5bc00ffSMark Brown };
158b5bc00ffSMark Brown 
159b5bc00ffSMark Brown static struct vl_config vl_config[ARM64_VEC_MAX];
160b5bc00ffSMark Brown 
get_default_vl(enum vec_type type)16104ee53a5SMark Brown static inline int get_default_vl(enum vec_type type)
162b5bc00ffSMark Brown {
163b5bc00ffSMark Brown 	return READ_ONCE(vl_config[type].__default_vl);
164b5bc00ffSMark Brown }
1651e570f51SDave Martin 
16604ee53a5SMark Brown #ifdef CONFIG_ARM64_SVE
16704ee53a5SMark Brown 
get_sve_default_vl(void)16804ee53a5SMark Brown static inline int get_sve_default_vl(void)
1691e570f51SDave Martin {
170b5bc00ffSMark Brown 	return get_default_vl(ARM64_VEC_SVE);
1711e570f51SDave Martin }
17279ab047cSDave Martin 
set_default_vl(enum vec_type type,int val)17304ee53a5SMark Brown static inline void set_default_vl(enum vec_type type, int val)
1741e570f51SDave Martin {
175b5bc00ffSMark Brown 	WRITE_ONCE(vl_config[type].__default_vl, val);
1761e570f51SDave Martin }
1771e570f51SDave Martin 
set_sve_default_vl(int val)17804ee53a5SMark Brown static inline void set_sve_default_vl(int val)
179b5bc00ffSMark Brown {
180b5bc00ffSMark Brown 	set_default_vl(ARM64_VEC_SVE, val);
181b5bc00ffSMark Brown }
182624835abSDave Martin 
183fdfa976cSDave Martin static void __percpu *efi_sve_state;
1847582e220SDave Martin 
1857582e220SDave Martin #else /* ! CONFIG_ARM64_SVE */
1867582e220SDave Martin 
1877582e220SDave Martin /* Dummy declaration for code that will be optimised out: */
188fdfa976cSDave Martin extern void __percpu *efi_sve_state;
1897582e220SDave Martin 
1907582e220SDave Martin #endif /* ! CONFIG_ARM64_SVE */
1917582e220SDave Martin 
192b42990d3SMark Brown #ifdef CONFIG_ARM64_SME
193b42990d3SMark Brown 
get_sme_default_vl(void)194b42990d3SMark Brown static int get_sme_default_vl(void)
195b42990d3SMark Brown {
196b42990d3SMark Brown 	return get_default_vl(ARM64_VEC_SME);
197b42990d3SMark Brown }
198b42990d3SMark Brown 
set_sme_default_vl(int val)199b42990d3SMark Brown static void set_sme_default_vl(int val)
200b42990d3SMark Brown {
201b42990d3SMark Brown 	set_default_vl(ARM64_VEC_SME, val);
202b42990d3SMark Brown }
203b42990d3SMark Brown 
2048bd7f91cSMark Brown static void sme_free(struct task_struct *);
2058bd7f91cSMark Brown 
2068bd7f91cSMark Brown #else
2078bd7f91cSMark Brown 
sme_free(struct task_struct * t)2088bd7f91cSMark Brown static inline void sme_free(struct task_struct *t) { }
2098bd7f91cSMark Brown 
210b42990d3SMark Brown #endif
211b42990d3SMark Brown 
2126dcdefcdSJulien Grall DEFINE_PER_CPU(bool, fpsimd_context_busy);
2136dcdefcdSJulien Grall EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
2146dcdefcdSJulien Grall 
215b24b5205SMark Brown static void fpsimd_bind_task_to_cpu(void);
216b24b5205SMark Brown 
__get_cpu_fpsimd_context(void)2176dcdefcdSJulien Grall static void __get_cpu_fpsimd_context(void)
2186dcdefcdSJulien Grall {
2196dcdefcdSJulien Grall 	bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
2206dcdefcdSJulien Grall 
2216dcdefcdSJulien Grall 	WARN_ON(busy);
2226dcdefcdSJulien Grall }
2236dcdefcdSJulien Grall 
2246dcdefcdSJulien Grall /*
2256dcdefcdSJulien Grall  * Claim ownership of the CPU FPSIMD context for use by the calling context.
2266dcdefcdSJulien Grall  *
2276dcdefcdSJulien Grall  * The caller may freely manipulate the FPSIMD context metadata until
2286dcdefcdSJulien Grall  * put_cpu_fpsimd_context() is called.
2296dcdefcdSJulien Grall  *
2306dcdefcdSJulien Grall  * The double-underscore version must only be called if you know the task
2316dcdefcdSJulien Grall  * can't be preempted.
232696207d4SSebastian Andrzej Siewior  *
233696207d4SSebastian Andrzej Siewior  * On RT kernels local_bh_disable() is not sufficient because it only
234696207d4SSebastian Andrzej Siewior  * serializes soft interrupt related sections via a local lock, but stays
235696207d4SSebastian Andrzej Siewior  * preemptible. Disabling preemption is the right choice here as bottom
236696207d4SSebastian Andrzej Siewior  * half processing is always in thread context on RT kernels so it
237696207d4SSebastian Andrzej Siewior  * implicitly prevents bottom half processing as well.
2386dcdefcdSJulien Grall  */
get_cpu_fpsimd_context(void)2396dcdefcdSJulien Grall static void get_cpu_fpsimd_context(void)
2406dcdefcdSJulien Grall {
241696207d4SSebastian Andrzej Siewior 	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
24213150149SArd Biesheuvel 		local_bh_disable();
243696207d4SSebastian Andrzej Siewior 	else
244696207d4SSebastian Andrzej Siewior 		preempt_disable();
2456dcdefcdSJulien Grall 	__get_cpu_fpsimd_context();
2466dcdefcdSJulien Grall }
2476dcdefcdSJulien Grall 
__put_cpu_fpsimd_context(void)2486dcdefcdSJulien Grall static void __put_cpu_fpsimd_context(void)
2496dcdefcdSJulien Grall {
2506dcdefcdSJulien Grall 	bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
2516dcdefcdSJulien Grall 
2526dcdefcdSJulien Grall 	WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
2536dcdefcdSJulien Grall }
2546dcdefcdSJulien Grall 
2556dcdefcdSJulien Grall /*
2566dcdefcdSJulien Grall  * Release the CPU FPSIMD context.
2576dcdefcdSJulien Grall  *
2586dcdefcdSJulien Grall  * Must be called from a context in which get_cpu_fpsimd_context() was
2596dcdefcdSJulien Grall  * previously called, with no call to put_cpu_fpsimd_context() in the
2606dcdefcdSJulien Grall  * meantime.
2616dcdefcdSJulien Grall  */
put_cpu_fpsimd_context(void)2626dcdefcdSJulien Grall static void put_cpu_fpsimd_context(void)
2636dcdefcdSJulien Grall {
2646dcdefcdSJulien Grall 	__put_cpu_fpsimd_context();
265696207d4SSebastian Andrzej Siewior 	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
26613150149SArd Biesheuvel 		local_bh_enable();
267696207d4SSebastian Andrzej Siewior 	else
268696207d4SSebastian Andrzej Siewior 		preempt_enable();
2696dcdefcdSJulien Grall }
2706dcdefcdSJulien Grall 
have_cpu_fpsimd_context(void)2716dcdefcdSJulien Grall static bool have_cpu_fpsimd_context(void)
2726dcdefcdSJulien Grall {
2736dcdefcdSJulien Grall 	return !preemptible() && __this_cpu_read(fpsimd_context_busy);
2746dcdefcdSJulien Grall }
2756dcdefcdSJulien Grall 
task_get_vl(const struct task_struct * task,enum vec_type type)2765838a155SMark Brown unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
2770423eedcSMark Brown {
2785838a155SMark Brown 	return task->thread.vl[type];
2790423eedcSMark Brown }
2800423eedcSMark Brown 
task_set_vl(struct task_struct * task,enum vec_type type,unsigned long vl)2815838a155SMark Brown void task_set_vl(struct task_struct *task, enum vec_type type,
2825838a155SMark Brown 		 unsigned long vl)
2830423eedcSMark Brown {
2845838a155SMark Brown 	task->thread.vl[type] = vl;
2850423eedcSMark Brown }
2860423eedcSMark Brown 
task_get_vl_onexec(const struct task_struct * task,enum vec_type type)2875838a155SMark Brown unsigned int task_get_vl_onexec(const struct task_struct *task,
2885838a155SMark Brown 				enum vec_type type)
2890423eedcSMark Brown {
2905838a155SMark Brown 	return task->thread.vl_onexec[type];
2910423eedcSMark Brown }
2920423eedcSMark Brown 
task_set_vl_onexec(struct task_struct * task,enum vec_type type,unsigned long vl)2935838a155SMark Brown void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
2945838a155SMark Brown 			unsigned long vl)
2950423eedcSMark Brown {
2965838a155SMark Brown 	task->thread.vl_onexec[type] = vl;
2970423eedcSMark Brown }
2980423eedcSMark Brown 
299bc0ee476SDave Martin /*
300af7167d6SMark Brown  * TIF_SME controls whether a task can use SME without trapping while
301af7167d6SMark Brown  * in userspace, when TIF_SME is set then we must have storage
30297b5576bSDongxu Sun  * allocated in sve_state and sme_state to store the contents of both ZA
303af7167d6SMark Brown  * and the SVE registers for both streaming and non-streaming modes.
304af7167d6SMark Brown  *
305af7167d6SMark Brown  * If both SVCR.ZA and SVCR.SM are disabled then at any point we
306af7167d6SMark Brown  * may disable TIF_SME and reenable traps.
307af7167d6SMark Brown  */
308af7167d6SMark Brown 
309af7167d6SMark Brown 
310af7167d6SMark Brown /*
311bc0ee476SDave Martin  * TIF_SVE controls whether a task can use SVE without trapping while
312af7167d6SMark Brown  * in userspace, and also (together with TIF_SME) the way a task's
313af7167d6SMark Brown  * FPSIMD/SVE state is stored in thread_struct.
314bc0ee476SDave Martin  *
315bc0ee476SDave Martin  * The kernel uses this flag to track whether a user task is actively
316bc0ee476SDave Martin  * using SVE, and therefore whether full SVE register state needs to
317bc0ee476SDave Martin  * be tracked.  If not, the cheaper FPSIMD context handling code can
318bc0ee476SDave Martin  * be used instead of the more costly SVE equivalents.
319bc0ee476SDave Martin  *
320af7167d6SMark Brown  *  * TIF_SVE or SVCR.SM set:
321bc0ee476SDave Martin  *
322bc0ee476SDave Martin  *    The task can execute SVE instructions while in userspace without
323bc0ee476SDave Martin  *    trapping to the kernel.
324bc0ee476SDave Martin  *
325bc0ee476SDave Martin  *    During any syscall, the kernel may optionally clear TIF_SVE and
326bc0ee476SDave Martin  *    discard the vector state except for the FPSIMD subset.
327bc0ee476SDave Martin  *
328bc0ee476SDave Martin  *  * TIF_SVE clear:
329bc0ee476SDave Martin  *
330bc0ee476SDave Martin  *    An attempt by the user task to execute an SVE instruction causes
331bc0ee476SDave Martin  *    do_sve_acc() to be called, which does some preparation and then
332bc0ee476SDave Martin  *    sets TIF_SVE.
333bc0ee476SDave Martin  *
334baa85152SMark Brown  * During any syscall, the kernel may optionally clear TIF_SVE and
335baa85152SMark Brown  * discard the vector state except for the FPSIMD subset.
336baa85152SMark Brown  *
337baa85152SMark Brown  * The data will be stored in one of two formats:
338baa85152SMark Brown  *
339baa85152SMark Brown  *  * FPSIMD only - FP_STATE_FPSIMD:
340baa85152SMark Brown  *
341baa85152SMark Brown  *    When the FPSIMD only state stored task->thread.fp_type is set to
342baa85152SMark Brown  *    FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
34365896545SDave Martin  *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
344bc0ee476SDave Martin  *    logically zero but not stored anywhere; P0-P15 and FFR are not
345bc0ee476SDave Martin  *    stored and have unspecified values from userspace's point of
346bc0ee476SDave Martin  *    view.  For hygiene purposes, the kernel zeroes them on next use,
347bc0ee476SDave Martin  *    but userspace is discouraged from relying on this.
348bc0ee476SDave Martin  *
349bc0ee476SDave Martin  *    task->thread.sve_state does not need to be non-NULL, valid or any
350deeb8f9aSMark Brown  *    particular size: it must not be dereferenced and any data stored
351deeb8f9aSMark Brown  *    there should be considered stale and not referenced.
352bc0ee476SDave Martin  *
353baa85152SMark Brown  *  * SVE state - FP_STATE_SVE:
354baa85152SMark Brown  *
355baa85152SMark Brown  *    When the full SVE state is stored task->thread.fp_type is set to
356baa85152SMark Brown  *    FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
357baa85152SMark Brown  *    corresponding Zn), P0-P15 and FFR are encoded in in
358baa85152SMark Brown  *    task->thread.sve_state, formatted appropriately for vector
359baa85152SMark Brown  *    length task->thread.sve_vl or, if SVCR.SM is set,
360baa85152SMark Brown  *    task->thread.sme_vl. The storage for the vector registers in
361baa85152SMark Brown  *    task->thread.uw.fpsimd_state should be ignored.
362baa85152SMark Brown  *
363baa85152SMark Brown  *    task->thread.sve_state must point to a valid buffer at least
364deeb8f9aSMark Brown  *    sve_state_size(task) bytes in size. The data stored in
365deeb8f9aSMark Brown  *    task->thread.uw.fpsimd_state.vregs should be considered stale
366deeb8f9aSMark Brown  *    and not referenced.
367bc0ee476SDave Martin  *
36865896545SDave Martin  *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
36965896545SDave Martin  *    irrespective of whether TIF_SVE is clear or set, since these are
37065896545SDave Martin  *    not vector length dependent.
371bc0ee476SDave Martin  */
372bc0ee476SDave Martin 
373bc0ee476SDave Martin /*
374bc0ee476SDave Martin  * Update current's FPSIMD/SVE registers from thread_struct.
375bc0ee476SDave Martin  *
376bc0ee476SDave Martin  * This function should be called only when the FPSIMD/SVE state in
377bc0ee476SDave Martin  * thread_struct is known to be up to date, when preparing to enter
378bc0ee476SDave Martin  * userspace.
379bc0ee476SDave Martin  */
task_fpsimd_load(void)380bc0ee476SDave Martin static void task_fpsimd_load(void)
381bc0ee476SDave Martin {
382af7167d6SMark Brown 	bool restore_sve_regs = false;
383af7167d6SMark Brown 	bool restore_ffr;
384af7167d6SMark Brown 
38552f73c38SSuzuki K Poulose 	WARN_ON(!system_supports_fpsimd());
3866dcdefcdSJulien Grall 	WARN_ON(!have_cpu_fpsimd_context());
387bc0ee476SDave Martin 
3880cab5b49SMark Brown 	if (system_supports_sve() || system_supports_sme()) {
389a0136be4SMark Brown 		switch (current->thread.fp_type) {
390a0136be4SMark Brown 		case FP_STATE_FPSIMD:
391a0136be4SMark Brown 			/* Stop tracking SVE for this task until next use. */
392a0136be4SMark Brown 			if (test_and_clear_thread_flag(TIF_SVE))
393a0136be4SMark Brown 				sve_user_disable();
394a0136be4SMark Brown 			break;
395a0136be4SMark Brown 		case FP_STATE_SVE:
396a0136be4SMark Brown 			if (!thread_sm_enabled(&current->thread) &&
397a0136be4SMark Brown 			    !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
398a0136be4SMark Brown 				sve_user_enable();
399a0136be4SMark Brown 
400a0136be4SMark Brown 			if (test_thread_flag(TIF_SVE))
401ddc806b5SMark Brown 				sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
402a0136be4SMark Brown 
403af7167d6SMark Brown 			restore_sve_regs = true;
404af7167d6SMark Brown 			restore_ffr = true;
405a0136be4SMark Brown 			break;
406a0136be4SMark Brown 		default:
407a0136be4SMark Brown 			/*
408a0136be4SMark Brown 			 * This indicates either a bug in
409a0136be4SMark Brown 			 * fpsimd_save() or memory corruption, we
410a0136be4SMark Brown 			 * should always record an explicit format
411a0136be4SMark Brown 			 * when we save. We always at least have the
412a0136be4SMark Brown 			 * memory allocated for FPSMID registers so
413a0136be4SMark Brown 			 * try that and hope for the best.
414a0136be4SMark Brown 			 */
415a0136be4SMark Brown 			WARN_ON_ONCE(1);
416a0136be4SMark Brown 			clear_thread_flag(TIF_SVE);
417a0136be4SMark Brown 			break;
418a0136be4SMark Brown 		}
419bc0ee476SDave Martin 	}
420af7167d6SMark Brown 
421af7167d6SMark Brown 	/* Restore SME, override SVE register configuration if needed */
422af7167d6SMark Brown 	if (system_supports_sme()) {
423af7167d6SMark Brown 		unsigned long sme_vl = task_get_sme_vl(current);
424af7167d6SMark Brown 
4250033cd93SMark Brown 		/* Ensure VL is set up for restoring data */
426af7167d6SMark Brown 		if (test_thread_flag(TIF_SME))
427af7167d6SMark Brown 			sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
428af7167d6SMark Brown 
429ec0067a6SMark Brown 		write_sysreg_s(current->thread.svcr, SYS_SVCR);
430af7167d6SMark Brown 
4310033cd93SMark Brown 		if (thread_za_enabled(&current->thread))
43295fcec71SMark Brown 			sme_load_state(current->thread.sme_state,
43395fcec71SMark Brown 				       system_supports_sme2());
4340033cd93SMark Brown 
435a0136be4SMark Brown 		if (thread_sm_enabled(&current->thread))
436af7167d6SMark Brown 			restore_ffr = system_supports_fa64();
437af7167d6SMark Brown 	}
438af7167d6SMark Brown 
439baa85152SMark Brown 	if (restore_sve_regs) {
440baa85152SMark Brown 		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
441af7167d6SMark Brown 		sve_load_state(sve_pffr(&current->thread),
442af7167d6SMark Brown 			       &current->thread.uw.fpsimd_state.fpsr,
443af7167d6SMark Brown 			       restore_ffr);
444baa85152SMark Brown 	} else {
445baa85152SMark Brown 		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
446af7167d6SMark Brown 		fpsimd_load_state(&current->thread.uw.fpsimd_state);
447ddc806b5SMark Brown 	}
448baa85152SMark Brown }
449bc0ee476SDave Martin 
450bc0ee476SDave Martin /*
451d1797615SDave Martin  * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
452432110cdSMark Brown  * date with respect to the CPU registers. Note carefully that the
453432110cdSMark Brown  * current context is the context last bound to the CPU stored in
454432110cdSMark Brown  * last, if KVM is involved this may be the guest VM context rather
455432110cdSMark Brown  * than the host thread for the VM pointed to by current. This means
456432110cdSMark Brown  * that we must always reference the state storage via last rather
45762021cc3SMark Brown  * than via current, if we are saving KVM state then it will have
45862021cc3SMark Brown  * ensured that the type of registers to save is set in last->to_save.
459bc0ee476SDave Martin  */
fpsimd_save(void)46054b8c7cbSJulien Grall static void fpsimd_save(void)
461bc0ee476SDave Martin {
4621192b93bSMark Brown 	struct cpu_fp_state const *last =
46304950674SDave Martin 		this_cpu_ptr(&fpsimd_last_state);
464e6b673b7SDave Martin 	/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
465af7167d6SMark Brown 	bool save_sve_regs = false;
466af7167d6SMark Brown 	bool save_ffr;
467af7167d6SMark Brown 	unsigned int vl;
468d1797615SDave Martin 
46952f73c38SSuzuki K Poulose 	WARN_ON(!system_supports_fpsimd());
4706dcdefcdSJulien Grall 	WARN_ON(!have_cpu_fpsimd_context());
471bc0ee476SDave Martin 
4722d481bd3SMark Brown 	if (test_thread_flag(TIF_FOREIGN_FPSTATE))
4732d481bd3SMark Brown 		return;
4742d481bd3SMark Brown 
4758c845e27SMark Brown 	/*
4768c845e27SMark Brown 	 * If a task is in a syscall the ABI allows us to only
4778c845e27SMark Brown 	 * preserve the state shared with FPSIMD so don't bother
4788c845e27SMark Brown 	 * saving the full SVE state in that case.
4798c845e27SMark Brown 	 */
4808c845e27SMark Brown 	if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
4818c845e27SMark Brown 	     !in_syscall(current_pt_regs())) ||
48262021cc3SMark Brown 	    last->to_save == FP_STATE_SVE) {
483af7167d6SMark Brown 		save_sve_regs = true;
484af7167d6SMark Brown 		save_ffr = true;
485af7167d6SMark Brown 		vl = last->sve_vl;
486b40c559bSMark Brown 	}
487b40c559bSMark Brown 
488af7167d6SMark Brown 	if (system_supports_sme()) {
489af7167d6SMark Brown 		u64 *svcr = last->svcr;
490af7167d6SMark Brown 
491ec0067a6SMark Brown 		*svcr = read_sysreg_s(SYS_SVCR);
4920033cd93SMark Brown 
493ec0067a6SMark Brown 		if (*svcr & SVCR_ZA_MASK)
49495fcec71SMark Brown 			sme_save_state(last->sme_state,
49595fcec71SMark Brown 				       system_supports_sme2());
496af7167d6SMark Brown 
497af7167d6SMark Brown 		/* If we are in streaming mode override regular SVE. */
498ec0067a6SMark Brown 		if (*svcr & SVCR_SM_MASK) {
499af7167d6SMark Brown 			save_sve_regs = true;
500af7167d6SMark Brown 			save_ffr = system_supports_fa64();
501af7167d6SMark Brown 			vl = last->sme_vl;
502af7167d6SMark Brown 		}
503af7167d6SMark Brown 	}
504af7167d6SMark Brown 
505af7167d6SMark Brown 	if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
506af7167d6SMark Brown 		/* Get the configured VL from RDVL, will account for SM */
507af7167d6SMark Brown 		if (WARN_ON(sve_get_vl() != vl)) {
508bc0ee476SDave Martin 			/*
509bc0ee476SDave Martin 			 * Can't save the user regs, so current would
510bc0ee476SDave Martin 			 * re-enter user with corrupt state.
511bc0ee476SDave Martin 			 * There's no way to recover, so kill it:
512bc0ee476SDave Martin 			 */
5134ef333b2SAmit Daniel Kachhap 			force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
514bc0ee476SDave Martin 			return;
515bc0ee476SDave Martin 		}
516bc0ee476SDave Martin 
51704950674SDave Martin 		sve_save_state((char *)last->sve_state +
518af7167d6SMark Brown 					sve_ffr_offset(vl),
519af7167d6SMark Brown 			       &last->st->fpsr, save_ffr);
520baa85152SMark Brown 		*last->fp_type = FP_STATE_SVE;
5212d481bd3SMark Brown 	} else {
52204950674SDave Martin 		fpsimd_save_state(last->st);
523baa85152SMark Brown 		*last->fp_type = FP_STATE_FPSIMD;
524bc0ee476SDave Martin 	}
525bc0ee476SDave Martin }
526bc0ee476SDave Martin 
5277582e220SDave Martin /*
5287582e220SDave Martin  * All vector length selection from userspace comes through here.
5297582e220SDave Martin  * We're on a slow path, so some sanity-checks are included.
5307582e220SDave Martin  * If things go wrong there's a bug somewhere, but try to fall back to a
5317582e220SDave Martin  * safe choice.
5327582e220SDave Martin  */
find_supported_vector_length(enum vec_type type,unsigned int vl)533b5bc00ffSMark Brown static unsigned int find_supported_vector_length(enum vec_type type,
534b5bc00ffSMark Brown 						 unsigned int vl)
5357582e220SDave Martin {
536b5bc00ffSMark Brown 	struct vl_info *info = &vl_info[type];
5377582e220SDave Martin 	int bit;
538b5bc00ffSMark Brown 	int max_vl = info->max_vl;
5397582e220SDave Martin 
5407582e220SDave Martin 	if (WARN_ON(!sve_vl_valid(vl)))
541b5bc00ffSMark Brown 		vl = info->min_vl;
5427582e220SDave Martin 
5437582e220SDave Martin 	if (WARN_ON(!sve_vl_valid(max_vl)))
544b5bc00ffSMark Brown 		max_vl = info->min_vl;
5457582e220SDave Martin 
5467582e220SDave Martin 	if (vl > max_vl)
5477582e220SDave Martin 		vl = max_vl;
548b42990d3SMark Brown 	if (vl < info->min_vl)
549b42990d3SMark Brown 		vl = info->min_vl;
5507582e220SDave Martin 
551b5bc00ffSMark Brown 	bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
552ead9e430SDave Martin 			    __vq_to_bit(sve_vq_from_vl(vl)));
553ead9e430SDave Martin 	return sve_vl_from_vq(__bit_to_vq(bit));
5547582e220SDave Martin }
5557582e220SDave Martin 
556e575fb9eSWill Deacon #if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
5574ffa09a9SDave Martin 
vec_proc_do_default_vl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)55897bcbee4SMark Brown static int vec_proc_do_default_vl(struct ctl_table *table, int write,
55932927393SChristoph Hellwig 				  void *buffer, size_t *lenp, loff_t *ppos)
5604ffa09a9SDave Martin {
56197bcbee4SMark Brown 	struct vl_info *info = table->extra1;
56297bcbee4SMark Brown 	enum vec_type type = info->type;
5634ffa09a9SDave Martin 	int ret;
56497bcbee4SMark Brown 	int vl = get_default_vl(type);
5654ffa09a9SDave Martin 	struct ctl_table tmp_table = {
5664ffa09a9SDave Martin 		.data = &vl,
5674ffa09a9SDave Martin 		.maxlen = sizeof(vl),
5684ffa09a9SDave Martin 	};
5694ffa09a9SDave Martin 
5704ffa09a9SDave Martin 	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
5714ffa09a9SDave Martin 	if (ret || !write)
5724ffa09a9SDave Martin 		return ret;
5734ffa09a9SDave Martin 
5744ffa09a9SDave Martin 	/* Writing -1 has the special meaning "set to max": */
57587c021a8SDave Martin 	if (vl == -1)
576b5bc00ffSMark Brown 		vl = info->max_vl;
5774ffa09a9SDave Martin 
5784ffa09a9SDave Martin 	if (!sve_vl_valid(vl))
5794ffa09a9SDave Martin 		return -EINVAL;
5804ffa09a9SDave Martin 
58197bcbee4SMark Brown 	set_default_vl(type, find_supported_vector_length(type, vl));
5824ffa09a9SDave Martin 	return 0;
5834ffa09a9SDave Martin }
5844ffa09a9SDave Martin 
5854ffa09a9SDave Martin static struct ctl_table sve_default_vl_table[] = {
5864ffa09a9SDave Martin 	{
5874ffa09a9SDave Martin 		.procname	= "sve_default_vector_length",
5884ffa09a9SDave Martin 		.mode		= 0644,
58997bcbee4SMark Brown 		.proc_handler	= vec_proc_do_default_vl,
59097bcbee4SMark Brown 		.extra1		= &vl_info[ARM64_VEC_SVE],
5914ffa09a9SDave Martin 	},
5924ffa09a9SDave Martin 	{ }
5934ffa09a9SDave Martin };
5944ffa09a9SDave Martin 
sve_sysctl_init(void)5954ffa09a9SDave Martin static int __init sve_sysctl_init(void)
5964ffa09a9SDave Martin {
5974ffa09a9SDave Martin 	if (system_supports_sve())
5984ffa09a9SDave Martin 		if (!register_sysctl("abi", sve_default_vl_table))
5994ffa09a9SDave Martin 			return -EINVAL;
6004ffa09a9SDave Martin 
6014ffa09a9SDave Martin 	return 0;
6024ffa09a9SDave Martin }
6034ffa09a9SDave Martin 
604e575fb9eSWill Deacon #else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
sve_sysctl_init(void)6054ffa09a9SDave Martin static int __init sve_sysctl_init(void) { return 0; }
606e575fb9eSWill Deacon #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
6074ffa09a9SDave Martin 
60812f1bacfSMark Brown #if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
60912f1bacfSMark Brown static struct ctl_table sme_default_vl_table[] = {
61012f1bacfSMark Brown 	{
61112f1bacfSMark Brown 		.procname	= "sme_default_vector_length",
61212f1bacfSMark Brown 		.mode		= 0644,
61312f1bacfSMark Brown 		.proc_handler	= vec_proc_do_default_vl,
61412f1bacfSMark Brown 		.extra1		= &vl_info[ARM64_VEC_SME],
61512f1bacfSMark Brown 	},
61612f1bacfSMark Brown 	{ }
61712f1bacfSMark Brown };
61812f1bacfSMark Brown 
sme_sysctl_init(void)61912f1bacfSMark Brown static int __init sme_sysctl_init(void)
62012f1bacfSMark Brown {
62112f1bacfSMark Brown 	if (system_supports_sme())
62212f1bacfSMark Brown 		if (!register_sysctl("abi", sme_default_vl_table))
62312f1bacfSMark Brown 			return -EINVAL;
62412f1bacfSMark Brown 
62512f1bacfSMark Brown 	return 0;
62612f1bacfSMark Brown }
62712f1bacfSMark Brown 
62812f1bacfSMark Brown #else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
sme_sysctl_init(void)62912f1bacfSMark Brown static int __init sme_sysctl_init(void) { return 0; }
63012f1bacfSMark Brown #endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
63112f1bacfSMark Brown 
632bc0ee476SDave Martin #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
633bc0ee476SDave Martin 	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
634bc0ee476SDave Martin 
63541040cf7SDave Martin #ifdef CONFIG_CPU_BIG_ENDIAN
arm64_cpu_to_le128(__uint128_t x)63641040cf7SDave Martin static __uint128_t arm64_cpu_to_le128(__uint128_t x)
63741040cf7SDave Martin {
63841040cf7SDave Martin 	u64 a = swab64(x);
63941040cf7SDave Martin 	u64 b = swab64(x >> 64);
64041040cf7SDave Martin 
64141040cf7SDave Martin 	return ((__uint128_t)a << 64) | b;
64241040cf7SDave Martin }
64341040cf7SDave Martin #else
arm64_cpu_to_le128(__uint128_t x)64441040cf7SDave Martin static __uint128_t arm64_cpu_to_le128(__uint128_t x)
64541040cf7SDave Martin {
64641040cf7SDave Martin 	return x;
64741040cf7SDave Martin }
64841040cf7SDave Martin #endif
64941040cf7SDave Martin 
65041040cf7SDave Martin #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
65141040cf7SDave Martin 
__fpsimd_to_sve(void * sst,struct user_fpsimd_state const * fst,unsigned int vq)652d16af870SDave Martin static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
653d16af870SDave Martin 			    unsigned int vq)
654d16af870SDave Martin {
655d16af870SDave Martin 	unsigned int i;
656d16af870SDave Martin 	__uint128_t *p;
657d16af870SDave Martin 
658ed2f3e9fSDave Martin 	for (i = 0; i < SVE_NUM_ZREGS; ++i) {
659d16af870SDave Martin 		p = (__uint128_t *)ZREG(sst, vq, i);
660d16af870SDave Martin 		*p = arm64_cpu_to_le128(fst->vregs[i]);
661d16af870SDave Martin 	}
662d16af870SDave Martin }
663d16af870SDave Martin 
664bc0ee476SDave Martin /*
66565896545SDave Martin  * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
666bc0ee476SDave Martin  * task->thread.sve_state.
667bc0ee476SDave Martin  *
668bc0ee476SDave Martin  * Task can be a non-runnable task, or current.  In the latter case,
6696dcdefcdSJulien Grall  * the caller must have ownership of the cpu FPSIMD context before calling
6706dcdefcdSJulien Grall  * this function.
671bc0ee476SDave Martin  * task->thread.sve_state must point to at least sve_state_size(task)
672bc0ee476SDave Martin  * bytes of allocated kernel memory.
67365896545SDave Martin  * task->thread.uw.fpsimd_state must be up to date before calling this
67465896545SDave Martin  * function.
675bc0ee476SDave Martin  */
fpsimd_to_sve(struct task_struct * task)676bc0ee476SDave Martin static void fpsimd_to_sve(struct task_struct *task)
677bc0ee476SDave Martin {
678bc0ee476SDave Martin 	unsigned int vq;
679bc0ee476SDave Martin 	void *sst = task->thread.sve_state;
68065896545SDave Martin 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
681bc0ee476SDave Martin 
682507ea5ddSMark Brown 	if (!system_supports_sve() && !system_supports_sme())
683bc0ee476SDave Martin 		return;
684bc0ee476SDave Martin 
685e12310a0SMark Brown 	vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
686d16af870SDave Martin 	__fpsimd_to_sve(sst, fst, vq);
687bc0ee476SDave Martin }
688bc0ee476SDave Martin 
6898cd969d2SDave Martin /*
6908cd969d2SDave Martin  * Transfer the SVE state in task->thread.sve_state to
69165896545SDave Martin  * task->thread.uw.fpsimd_state.
6928cd969d2SDave Martin  *
6938cd969d2SDave Martin  * Task can be a non-runnable task, or current.  In the latter case,
6946dcdefcdSJulien Grall  * the caller must have ownership of the cpu FPSIMD context before calling
6956dcdefcdSJulien Grall  * this function.
6968cd969d2SDave Martin  * task->thread.sve_state must point to at least sve_state_size(task)
6978cd969d2SDave Martin  * bytes of allocated kernel memory.
6988cd969d2SDave Martin  * task->thread.sve_state must be up to date before calling this function.
6998cd969d2SDave Martin  */
sve_to_fpsimd(struct task_struct * task)7008cd969d2SDave Martin static void sve_to_fpsimd(struct task_struct *task)
7018cd969d2SDave Martin {
702e12310a0SMark Brown 	unsigned int vq, vl;
7038cd969d2SDave Martin 	void const *sst = task->thread.sve_state;
70465896545SDave Martin 	struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
7058cd969d2SDave Martin 	unsigned int i;
70641040cf7SDave Martin 	__uint128_t const *p;
7078cd969d2SDave Martin 
708507ea5ddSMark Brown 	if (!system_supports_sve() && !system_supports_sme())
7098cd969d2SDave Martin 		return;
7108cd969d2SDave Martin 
711e12310a0SMark Brown 	vl = thread_get_cur_vl(&task->thread);
712e12310a0SMark Brown 	vq = sve_vq_from_vl(vl);
713ed2f3e9fSDave Martin 	for (i = 0; i < SVE_NUM_ZREGS; ++i) {
71441040cf7SDave Martin 		p = (__uint128_t const *)ZREG(sst, vq, i);
71541040cf7SDave Martin 		fst->vregs[i] = arm64_le128_to_cpu(*p);
71641040cf7SDave Martin 	}
7178cd969d2SDave Martin }
7188cd969d2SDave Martin 
719bc0ee476SDave Martin #ifdef CONFIG_ARM64_SVE
7208e1f78a9SGeert Uytterhoeven /*
7218e1f78a9SGeert Uytterhoeven  * Call __sve_free() directly only if you know task can't be scheduled
7228e1f78a9SGeert Uytterhoeven  * or preempted.
7238e1f78a9SGeert Uytterhoeven  */
__sve_free(struct task_struct * task)7248e1f78a9SGeert Uytterhoeven static void __sve_free(struct task_struct *task)
7258e1f78a9SGeert Uytterhoeven {
7268e1f78a9SGeert Uytterhoeven 	kfree(task->thread.sve_state);
7278e1f78a9SGeert Uytterhoeven 	task->thread.sve_state = NULL;
7288e1f78a9SGeert Uytterhoeven }
7298e1f78a9SGeert Uytterhoeven 
sve_free(struct task_struct * task)7308e1f78a9SGeert Uytterhoeven static void sve_free(struct task_struct *task)
7318e1f78a9SGeert Uytterhoeven {
7328e1f78a9SGeert Uytterhoeven 	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
7338e1f78a9SGeert Uytterhoeven 
7348e1f78a9SGeert Uytterhoeven 	__sve_free(task);
7358e1f78a9SGeert Uytterhoeven }
736bc0ee476SDave Martin 
737bc0ee476SDave Martin /*
738bc0ee476SDave Martin  * Return how many bytes of memory are required to store the full SVE
739bc0ee476SDave Martin  * state for task, given task's currently configured vector length.
740bc0ee476SDave Martin  */
sve_state_size(struct task_struct const * task)7418bd7f91cSMark Brown size_t sve_state_size(struct task_struct const *task)
742bc0ee476SDave Martin {
743af7167d6SMark Brown 	unsigned int vl = 0;
744af7167d6SMark Brown 
745af7167d6SMark Brown 	if (system_supports_sve())
746af7167d6SMark Brown 		vl = task_get_sve_vl(task);
747af7167d6SMark Brown 	if (system_supports_sme())
748af7167d6SMark Brown 		vl = max(vl, task_get_sme_vl(task));
749af7167d6SMark Brown 
750af7167d6SMark Brown 	return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
751bc0ee476SDave Martin }
752bc0ee476SDave Martin 
753bc0ee476SDave Martin /*
754bc0ee476SDave Martin  * Ensure that task->thread.sve_state is allocated and sufficiently large.
755bc0ee476SDave Martin  *
756bc0ee476SDave Martin  * This function should be used only in preparation for replacing
757bc0ee476SDave Martin  * task->thread.sve_state with new data.  The memory is always zeroed
758bc0ee476SDave Martin  * here to prevent stale data from showing through: this is done in
759bc0ee476SDave Martin  * the interest of testability and predictability: except in the
760bc0ee476SDave Martin  * do_sve_acc() case, there is no ABI requirement to hide stale data
761bc0ee476SDave Martin  * written previously be task.
762bc0ee476SDave Martin  */
sve_alloc(struct task_struct * task,bool flush)763826a4fddSMark Brown void sve_alloc(struct task_struct *task, bool flush)
764bc0ee476SDave Martin {
765bc0ee476SDave Martin 	if (task->thread.sve_state) {
766826a4fddSMark Brown 		if (flush)
767826a4fddSMark Brown 			memset(task->thread.sve_state, 0,
768826a4fddSMark Brown 			       sve_state_size(task));
769bc0ee476SDave Martin 		return;
770bc0ee476SDave Martin 	}
771bc0ee476SDave Martin 
772bc0ee476SDave Martin 	/* This is a small allocation (maximum ~8KB) and Should Not Fail. */
773bc0ee476SDave Martin 	task->thread.sve_state =
774bc0ee476SDave Martin 		kzalloc(sve_state_size(task), GFP_KERNEL);
775bc0ee476SDave Martin }
776bc0ee476SDave Martin 
77743d4da2cSDave Martin 
77843d4da2cSDave Martin /*
779e12310a0SMark Brown  * Force the FPSIMD state shared with SVE to be updated in the SVE state
780e12310a0SMark Brown  * even if the SVE state is the current active state.
781e12310a0SMark Brown  *
782e12310a0SMark Brown  * This should only be called by ptrace.  task must be non-runnable.
783e12310a0SMark Brown  * task->thread.sve_state must point to at least sve_state_size(task)
784e12310a0SMark Brown  * bytes of allocated kernel memory.
785e12310a0SMark Brown  */
fpsimd_force_sync_to_sve(struct task_struct * task)786e12310a0SMark Brown void fpsimd_force_sync_to_sve(struct task_struct *task)
787e12310a0SMark Brown {
788e12310a0SMark Brown 	fpsimd_to_sve(task);
789e12310a0SMark Brown }
790e12310a0SMark Brown 
791e12310a0SMark Brown /*
79243d4da2cSDave Martin  * Ensure that task->thread.sve_state is up to date with respect to
79343d4da2cSDave Martin  * the user task, irrespective of when SVE is in use or not.
79443d4da2cSDave Martin  *
79543d4da2cSDave Martin  * This should only be called by ptrace.  task must be non-runnable.
79643d4da2cSDave Martin  * task->thread.sve_state must point to at least sve_state_size(task)
79743d4da2cSDave Martin  * bytes of allocated kernel memory.
79843d4da2cSDave Martin  */
fpsimd_sync_to_sve(struct task_struct * task)79943d4da2cSDave Martin void fpsimd_sync_to_sve(struct task_struct *task)
80043d4da2cSDave Martin {
801e12310a0SMark Brown 	if (!test_tsk_thread_flag(task, TIF_SVE) &&
802e12310a0SMark Brown 	    !thread_sm_enabled(&task->thread))
80343d4da2cSDave Martin 		fpsimd_to_sve(task);
80443d4da2cSDave Martin }
80543d4da2cSDave Martin 
80643d4da2cSDave Martin /*
80765896545SDave Martin  * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
80843d4da2cSDave Martin  * the user task, irrespective of whether SVE is in use or not.
80943d4da2cSDave Martin  *
81043d4da2cSDave Martin  * This should only be called by ptrace.  task must be non-runnable.
81143d4da2cSDave Martin  * task->thread.sve_state must point to at least sve_state_size(task)
81243d4da2cSDave Martin  * bytes of allocated kernel memory.
81343d4da2cSDave Martin  */
sve_sync_to_fpsimd(struct task_struct * task)81443d4da2cSDave Martin void sve_sync_to_fpsimd(struct task_struct *task)
81543d4da2cSDave Martin {
816bbc6172eSMark Brown 	if (task->thread.fp_type == FP_STATE_SVE)
81743d4da2cSDave Martin 		sve_to_fpsimd(task);
81843d4da2cSDave Martin }
81943d4da2cSDave Martin 
82043d4da2cSDave Martin /*
82143d4da2cSDave Martin  * Ensure that task->thread.sve_state is up to date with respect to
82265896545SDave Martin  * the task->thread.uw.fpsimd_state.
82343d4da2cSDave Martin  *
82443d4da2cSDave Martin  * This should only be called by ptrace to merge new FPSIMD register
82543d4da2cSDave Martin  * values into a task for which SVE is currently active.
82643d4da2cSDave Martin  * task must be non-runnable.
82743d4da2cSDave Martin  * task->thread.sve_state must point to at least sve_state_size(task)
82843d4da2cSDave Martin  * bytes of allocated kernel memory.
82965896545SDave Martin  * task->thread.uw.fpsimd_state must already have been initialised with
83043d4da2cSDave Martin  * the new FPSIMD register values to be merged in.
83143d4da2cSDave Martin  */
sve_sync_from_fpsimd_zeropad(struct task_struct * task)83243d4da2cSDave Martin void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
83343d4da2cSDave Martin {
83443d4da2cSDave Martin 	unsigned int vq;
83543d4da2cSDave Martin 	void *sst = task->thread.sve_state;
83665896545SDave Martin 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
83743d4da2cSDave Martin 
83869af56aeSMark Brown 	if (!test_tsk_thread_flag(task, TIF_SVE) &&
83969af56aeSMark Brown 	    !thread_sm_enabled(&task->thread))
84043d4da2cSDave Martin 		return;
84143d4da2cSDave Martin 
842e12310a0SMark Brown 	vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
84343d4da2cSDave Martin 
84443d4da2cSDave Martin 	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
845d16af870SDave Martin 	__fpsimd_to_sve(sst, fst, vq);
84643d4da2cSDave Martin }
84743d4da2cSDave Martin 
vec_set_vector_length(struct task_struct * task,enum vec_type type,unsigned long vl,unsigned long flags)84830c43e73SMark Brown int vec_set_vector_length(struct task_struct *task, enum vec_type type,
8497582e220SDave Martin 			  unsigned long vl, unsigned long flags)
8507582e220SDave Martin {
851d4d5be94SMark Brown 	bool free_sme = false;
852d4d5be94SMark Brown 
8537582e220SDave Martin 	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
8547582e220SDave Martin 				     PR_SVE_SET_VL_ONEXEC))
8557582e220SDave Martin 		return -EINVAL;
8567582e220SDave Martin 
8577582e220SDave Martin 	if (!sve_vl_valid(vl))
8587582e220SDave Martin 		return -EINVAL;
8597582e220SDave Martin 
8607582e220SDave Martin 	/*
86130c43e73SMark Brown 	 * Clamp to the maximum vector length that VL-agnostic code
86230c43e73SMark Brown 	 * can work with.  A flag may be assigned in the future to
86330c43e73SMark Brown 	 * allow setting of larger vector lengths without confusing
86430c43e73SMark Brown 	 * older software.
8657582e220SDave Martin 	 */
86630c43e73SMark Brown 	if (vl > VL_ARCH_MAX)
86730c43e73SMark Brown 		vl = VL_ARCH_MAX;
8687582e220SDave Martin 
86930c43e73SMark Brown 	vl = find_supported_vector_length(type, vl);
8707582e220SDave Martin 
8717582e220SDave Martin 	if (flags & (PR_SVE_VL_INHERIT |
8727582e220SDave Martin 		     PR_SVE_SET_VL_ONEXEC))
87330c43e73SMark Brown 		task_set_vl_onexec(task, type, vl);
8747582e220SDave Martin 	else
8757582e220SDave Martin 		/* Reset VL to system default on next exec: */
87630c43e73SMark Brown 		task_set_vl_onexec(task, type, 0);
8777582e220SDave Martin 
8787582e220SDave Martin 	/* Only actually set the VL if not deferred: */
8797582e220SDave Martin 	if (flags & PR_SVE_SET_VL_ONEXEC)
8807582e220SDave Martin 		goto out;
8817582e220SDave Martin 
88230c43e73SMark Brown 	if (vl == task_get_vl(task, type))
8837582e220SDave Martin 		goto out;
8847582e220SDave Martin 
8857582e220SDave Martin 	/*
8867582e220SDave Martin 	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
8877582e220SDave Martin 	 * write any live register state back to task_struct, and convert to a
888e12310a0SMark Brown 	 * regular FPSIMD thread.
8897582e220SDave Martin 	 */
8907582e220SDave Martin 	if (task == current) {
8916dcdefcdSJulien Grall 		get_cpu_fpsimd_context();
8927582e220SDave Martin 
893d1797615SDave Martin 		fpsimd_save();
8947582e220SDave Martin 	}
8957582e220SDave Martin 
8967582e220SDave Martin 	fpsimd_flush_task_state(task);
897af7167d6SMark Brown 	if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
898baa85152SMark Brown 	    thread_sm_enabled(&task->thread)) {
8997582e220SDave Martin 		sve_to_fpsimd(task);
900baa85152SMark Brown 		task->thread.fp_type = FP_STATE_FPSIMD;
901baa85152SMark Brown 	}
9027582e220SDave Martin 
903d4d5be94SMark Brown 	if (system_supports_sme()) {
904d4d5be94SMark Brown 		if (type == ARM64_VEC_SME ||
905d4d5be94SMark Brown 		    !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
906d4d5be94SMark Brown 			/*
907d4d5be94SMark Brown 			 * We are changing the SME VL or weren't using
908d4d5be94SMark Brown 			 * SME anyway, discard the state and force a
909d4d5be94SMark Brown 			 * reallocation.
910d4d5be94SMark Brown 			 */
911ec0067a6SMark Brown 			task->thread.svcr &= ~(SVCR_SM_MASK |
912ec0067a6SMark Brown 					       SVCR_ZA_MASK);
913c9bb40b7SMark Brown 			clear_tsk_thread_flag(task, TIF_SME);
914d4d5be94SMark Brown 			free_sme = true;
915d4d5be94SMark Brown 		}
9168bd7f91cSMark Brown 	}
917b40c559bSMark Brown 
9187582e220SDave Martin 	if (task == current)
9196dcdefcdSJulien Grall 		put_cpu_fpsimd_context();
9207582e220SDave Martin 
92105d881b8SMark Brown 	task_set_vl(task, type, vl);
92205d881b8SMark Brown 
9237582e220SDave Martin 	/*
924d4d5be94SMark Brown 	 * Free the changed states if they are not in use, SME will be
925d4d5be94SMark Brown 	 * reallocated to the correct size on next use and we just
926d4d5be94SMark Brown 	 * allocate SVE now in case it is needed for use in streaming
927d4d5be94SMark Brown 	 * mode.
9287582e220SDave Martin 	 */
929d4d5be94SMark Brown 	if (system_supports_sve()) {
9307582e220SDave Martin 		sve_free(task);
931d4d5be94SMark Brown 		sve_alloc(task, true);
932d4d5be94SMark Brown 	}
933d4d5be94SMark Brown 
934d4d5be94SMark Brown 	if (free_sme)
9358bd7f91cSMark Brown 		sme_free(task);
9367582e220SDave Martin 
9377582e220SDave Martin out:
93830c43e73SMark Brown 	update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
93909d1223aSDave Martin 			       flags & PR_SVE_VL_INHERIT);
9407582e220SDave Martin 
9417582e220SDave Martin 	return 0;
9427582e220SDave Martin }
9437582e220SDave Martin 
944bc0ee476SDave Martin /*
9452d2123bcSDave Martin  * Encode the current vector length and flags for return.
94630c43e73SMark Brown  * This is only required for prctl(): ptrace has separate fields.
94730c43e73SMark Brown  * SVE and SME use the same bits for _ONEXEC and _INHERIT.
9482d2123bcSDave Martin  *
94930c43e73SMark Brown  * flags are as for vec_set_vector_length().
9502d2123bcSDave Martin  */
vec_prctl_status(enum vec_type type,unsigned long flags)95130c43e73SMark Brown static int vec_prctl_status(enum vec_type type, unsigned long flags)
9522d2123bcSDave Martin {
9532d2123bcSDave Martin 	int ret;
9542d2123bcSDave Martin 
9552d2123bcSDave Martin 	if (flags & PR_SVE_SET_VL_ONEXEC)
95630c43e73SMark Brown 		ret = task_get_vl_onexec(current, type);
9572d2123bcSDave Martin 	else
95830c43e73SMark Brown 		ret = task_get_vl(current, type);
9592d2123bcSDave Martin 
96030c43e73SMark Brown 	if (test_thread_flag(vec_vl_inherit_flag(type)))
9612d2123bcSDave Martin 		ret |= PR_SVE_VL_INHERIT;
9622d2123bcSDave Martin 
9632d2123bcSDave Martin 	return ret;
9642d2123bcSDave Martin }
9652d2123bcSDave Martin 
9662d2123bcSDave Martin /* PR_SVE_SET_VL */
sve_set_current_vl(unsigned long arg)9672d2123bcSDave Martin int sve_set_current_vl(unsigned long arg)
9682d2123bcSDave Martin {
9692d2123bcSDave Martin 	unsigned long vl, flags;
9702d2123bcSDave Martin 	int ret;
9712d2123bcSDave Martin 
9722d2123bcSDave Martin 	vl = arg & PR_SVE_VL_LEN_MASK;
9732d2123bcSDave Martin 	flags = arg & ~vl;
9742d2123bcSDave Martin 
9754b7a6ce7SPeter Collingbourne 	if (!system_supports_sve() || is_compat_task())
9762d2123bcSDave Martin 		return -EINVAL;
9772d2123bcSDave Martin 
97830c43e73SMark Brown 	ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);
9792d2123bcSDave Martin 	if (ret)
9802d2123bcSDave Martin 		return ret;
9812d2123bcSDave Martin 
98230c43e73SMark Brown 	return vec_prctl_status(ARM64_VEC_SVE, flags);
9832d2123bcSDave Martin }
9842d2123bcSDave Martin 
9852d2123bcSDave Martin /* PR_SVE_GET_VL */
sve_get_current_vl(void)9862d2123bcSDave Martin int sve_get_current_vl(void)
9872d2123bcSDave Martin {
9884b7a6ce7SPeter Collingbourne 	if (!system_supports_sve() || is_compat_task())
9892d2123bcSDave Martin 		return -EINVAL;
9902d2123bcSDave Martin 
99130c43e73SMark Brown 	return vec_prctl_status(ARM64_VEC_SVE, 0);
9922d2123bcSDave Martin }
9932d2123bcSDave Martin 
9949e4ab6c8SMark Brown #ifdef CONFIG_ARM64_SME
9959e4ab6c8SMark Brown /* PR_SME_SET_VL */
sme_set_current_vl(unsigned long arg)9969e4ab6c8SMark Brown int sme_set_current_vl(unsigned long arg)
9979e4ab6c8SMark Brown {
9989e4ab6c8SMark Brown 	unsigned long vl, flags;
9999e4ab6c8SMark Brown 	int ret;
10009e4ab6c8SMark Brown 
10019e4ab6c8SMark Brown 	vl = arg & PR_SME_VL_LEN_MASK;
10029e4ab6c8SMark Brown 	flags = arg & ~vl;
10039e4ab6c8SMark Brown 
10049e4ab6c8SMark Brown 	if (!system_supports_sme() || is_compat_task())
10059e4ab6c8SMark Brown 		return -EINVAL;
10069e4ab6c8SMark Brown 
10079e4ab6c8SMark Brown 	ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
10089e4ab6c8SMark Brown 	if (ret)
10099e4ab6c8SMark Brown 		return ret;
10109e4ab6c8SMark Brown 
10119e4ab6c8SMark Brown 	return vec_prctl_status(ARM64_VEC_SME, flags);
10129e4ab6c8SMark Brown }
10139e4ab6c8SMark Brown 
10149e4ab6c8SMark Brown /* PR_SME_GET_VL */
sme_get_current_vl(void)10159e4ab6c8SMark Brown int sme_get_current_vl(void)
10169e4ab6c8SMark Brown {
10179e4ab6c8SMark Brown 	if (!system_supports_sme() || is_compat_task())
10189e4ab6c8SMark Brown 		return -EINVAL;
10199e4ab6c8SMark Brown 
10209e4ab6c8SMark Brown 	return vec_prctl_status(ARM64_VEC_SME, 0);
10219e4ab6c8SMark Brown }
10229e4ab6c8SMark Brown #endif /* CONFIG_ARM64_SME */
10239e4ab6c8SMark Brown 
vec_probe_vqs(struct vl_info * info,DECLARE_BITMAP (map,SVE_VQ_MAX))1024b5bc00ffSMark Brown static void vec_probe_vqs(struct vl_info *info,
1025b5bc00ffSMark Brown 			  DECLARE_BITMAP(map, SVE_VQ_MAX))
10262e0f2478SDave Martin {
10272e0f2478SDave Martin 	unsigned int vq, vl;
10282e0f2478SDave Martin 
10292e0f2478SDave Martin 	bitmap_zero(map, SVE_VQ_MAX);
10302e0f2478SDave Martin 
10312e0f2478SDave Martin 	for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
1032b5bc00ffSMark Brown 		write_vl(info->type, vq - 1); /* self-syncing */
1033b42990d3SMark Brown 
1034b42990d3SMark Brown 		switch (info->type) {
1035b42990d3SMark Brown 		case ARM64_VEC_SVE:
10362e0f2478SDave Martin 			vl = sve_get_vl();
1037b42990d3SMark Brown 			break;
1038b42990d3SMark Brown 		case ARM64_VEC_SME:
1039b42990d3SMark Brown 			vl = sme_get_vl();
1040b42990d3SMark Brown 			break;
1041b42990d3SMark Brown 		default:
1042b42990d3SMark Brown 			vl = 0;
1043b42990d3SMark Brown 			break;
1044b42990d3SMark Brown 		}
1045b42990d3SMark Brown 
1046b42990d3SMark Brown 		/* Minimum VL identified? */
1047b42990d3SMark Brown 		if (sve_vq_from_vl(vl) > vq)
1048b42990d3SMark Brown 			break;
1049b42990d3SMark Brown 
10502e0f2478SDave Martin 		vq = sve_vq_from_vl(vl); /* skip intervening lengths */
1051ead9e430SDave Martin 		set_bit(__vq_to_bit(vq), map);
10522e0f2478SDave Martin 	}
10532e0f2478SDave Martin }
10542e0f2478SDave Martin 
10558b08e840SDave Martin /*
10568b08e840SDave Martin  * Initialise the set of known supported VQs for the boot CPU.
10578b08e840SDave Martin  * This is called during kernel boot, before secondary CPUs are brought up.
10588b08e840SDave Martin  */
vec_init_vq_map(enum vec_type type)1059b5bc00ffSMark Brown void __init vec_init_vq_map(enum vec_type type)
10602e0f2478SDave Martin {
1061b5bc00ffSMark Brown 	struct vl_info *info = &vl_info[type];
1062b5bc00ffSMark Brown 	vec_probe_vqs(info, info->vq_map);
1063b5bc00ffSMark Brown 	bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
10642e0f2478SDave Martin }
10652e0f2478SDave Martin 
10662e0f2478SDave Martin /*
10672e0f2478SDave Martin  * If we haven't committed to the set of supported VQs yet, filter out
10682e0f2478SDave Martin  * those not supported by the current CPU.
10698b08e840SDave Martin  * This function is called during the bring-up of early secondary CPUs only.
10702e0f2478SDave Martin  */
vec_update_vq_map(enum vec_type type)1071b5bc00ffSMark Brown void vec_update_vq_map(enum vec_type type)
10722e0f2478SDave Martin {
1073b5bc00ffSMark Brown 	struct vl_info *info = &vl_info[type];
1074d06b76beSDave Martin 	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1075d06b76beSDave Martin 
1076b5bc00ffSMark Brown 	vec_probe_vqs(info, tmp_map);
1077b5bc00ffSMark Brown 	bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
1078b5bc00ffSMark Brown 	bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
1079b5bc00ffSMark Brown 		  SVE_VQ_MAX);
10802e0f2478SDave Martin }
10812e0f2478SDave Martin 
10828b08e840SDave Martin /*
10838b08e840SDave Martin  * Check whether the current CPU supports all VQs in the committed set.
10848b08e840SDave Martin  * This function is called during the bring-up of late secondary CPUs only.
10858b08e840SDave Martin  */
vec_verify_vq_map(enum vec_type type)1086b5bc00ffSMark Brown int vec_verify_vq_map(enum vec_type type)
10872e0f2478SDave Martin {
1088b5bc00ffSMark Brown 	struct vl_info *info = &vl_info[type];
1089d06b76beSDave Martin 	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1090d06b76beSDave Martin 	unsigned long b;
10912e0f2478SDave Martin 
1092b5bc00ffSMark Brown 	vec_probe_vqs(info, tmp_map);
1093d06b76beSDave Martin 
1094d06b76beSDave Martin 	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
1095b5bc00ffSMark Brown 	if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
1096b5bc00ffSMark Brown 		pr_warn("%s: cpu%d: Required vector length(s) missing\n",
1097b5bc00ffSMark Brown 			info->name, smp_processor_id());
1098d06b76beSDave Martin 		return -EINVAL;
10992e0f2478SDave Martin 	}
11002e0f2478SDave Martin 
1101d06b76beSDave Martin 	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
1102d06b76beSDave Martin 		return 0;
1103d06b76beSDave Martin 
1104d06b76beSDave Martin 	/*
1105d06b76beSDave Martin 	 * For KVM, it is necessary to ensure that this CPU doesn't
1106d06b76beSDave Martin 	 * support any vector length that guests may have probed as
1107d06b76beSDave Martin 	 * unsupported.
1108d06b76beSDave Martin 	 */
1109d06b76beSDave Martin 
1110d06b76beSDave Martin 	/* Recover the set of supported VQs: */
1111d06b76beSDave Martin 	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
1112d06b76beSDave Martin 	/* Find VQs supported that are not globally supported: */
1113b5bc00ffSMark Brown 	bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);
1114d06b76beSDave Martin 
1115d06b76beSDave Martin 	/* Find the lowest such VQ, if any: */
1116d06b76beSDave Martin 	b = find_last_bit(tmp_map, SVE_VQ_MAX);
1117d06b76beSDave Martin 	if (b >= SVE_VQ_MAX)
1118d06b76beSDave Martin 		return 0; /* no mismatches */
1119d06b76beSDave Martin 
1120d06b76beSDave Martin 	/*
1121d06b76beSDave Martin 	 * Mismatches above sve_max_virtualisable_vl are fine, since
1122d06b76beSDave Martin 	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
1123d06b76beSDave Martin 	 */
1124b5bc00ffSMark Brown 	if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {
1125b5bc00ffSMark Brown 		pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",
1126b5bc00ffSMark Brown 			info->name, smp_processor_id());
1127d06b76beSDave Martin 		return -EINVAL;
1128d06b76beSDave Martin 	}
1129d06b76beSDave Martin 
1130d06b76beSDave Martin 	return 0;
11312e0f2478SDave Martin }
11322e0f2478SDave Martin 
sve_efi_setup(void)1133fdfa976cSDave Martin static void __init sve_efi_setup(void)
1134fdfa976cSDave Martin {
1135e0838f63SMark Brown 	int max_vl = 0;
1136e0838f63SMark Brown 	int i;
1137b5bc00ffSMark Brown 
1138fdfa976cSDave Martin 	if (!IS_ENABLED(CONFIG_EFI))
1139fdfa976cSDave Martin 		return;
1140fdfa976cSDave Martin 
1141e0838f63SMark Brown 	for (i = 0; i < ARRAY_SIZE(vl_info); i++)
1142e0838f63SMark Brown 		max_vl = max(vl_info[i].max_vl, max_vl);
1143e0838f63SMark Brown 
1144fdfa976cSDave Martin 	/*
1145fdfa976cSDave Martin 	 * alloc_percpu() warns and prints a backtrace if this goes wrong.
1146fdfa976cSDave Martin 	 * This is evidence of a crippled system and we are returning void,
1147fdfa976cSDave Martin 	 * so no attempt is made to handle this situation here.
1148fdfa976cSDave Martin 	 */
1149e0838f63SMark Brown 	if (!sve_vl_valid(max_vl))
1150fdfa976cSDave Martin 		goto fail;
1151fdfa976cSDave Martin 
1152fdfa976cSDave Martin 	efi_sve_state = __alloc_percpu(
1153e0838f63SMark Brown 		SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
1154fdfa976cSDave Martin 	if (!efi_sve_state)
1155fdfa976cSDave Martin 		goto fail;
1156fdfa976cSDave Martin 
1157fdfa976cSDave Martin 	return;
1158fdfa976cSDave Martin 
1159fdfa976cSDave Martin fail:
1160fdfa976cSDave Martin 	panic("Cannot allocate percpu memory for EFI SVE save/restore");
1161fdfa976cSDave Martin }
1162fdfa976cSDave Martin 
11632e0f2478SDave Martin /*
11642e0f2478SDave Martin  * Enable SVE for EL1.
11652e0f2478SDave Martin  * Intended for use by the cpufeatures code during CPU boot.
11662e0f2478SDave Martin  */
sve_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)1167c0cda3b8SDave Martin void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
11682e0f2478SDave Martin {
11692e0f2478SDave Martin 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
11702e0f2478SDave Martin 	isb();
11712e0f2478SDave Martin }
11722e0f2478SDave Martin 
117331dc52b3SDave Martin /*
117431dc52b3SDave Martin  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
117531dc52b3SDave Martin  * vector length.
117631dc52b3SDave Martin  *
117731dc52b3SDave Martin  * Use only if SVE is present.
117831dc52b3SDave Martin  * This function clobbers the SVE vector length.
117931dc52b3SDave Martin  */
read_zcr_features(void)118031dc52b3SDave Martin u64 read_zcr_features(void)
118131dc52b3SDave Martin {
118231dc52b3SDave Martin 	/*
118331dc52b3SDave Martin 	 * Set the maximum possible VL, and write zeroes to all other
118431dc52b3SDave Martin 	 * bits to see if they stick.
118531dc52b3SDave Martin 	 */
118631dc52b3SDave Martin 	sve_kernel_enable(NULL);
118731dc52b3SDave Martin 	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
118831dc52b3SDave Martin 
118901948b09SMark Brown 	/* Return LEN value that would be written to get the maximum VL */
119001948b09SMark Brown 	return sve_vq_from_vl(sve_get_vl()) - 1;
119131dc52b3SDave Martin }
119231dc52b3SDave Martin 
sve_setup(void)11932e0f2478SDave Martin void __init sve_setup(void)
11942e0f2478SDave Martin {
1195b5bc00ffSMark Brown 	struct vl_info *info = &vl_info[ARM64_VEC_SVE];
11962e0f2478SDave Martin 	u64 zcr;
1197d06b76beSDave Martin 	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1198d06b76beSDave Martin 	unsigned long b;
11992e0f2478SDave Martin 
12002e0f2478SDave Martin 	if (!system_supports_sve())
12012e0f2478SDave Martin 		return;
12022e0f2478SDave Martin 
12032e0f2478SDave Martin 	/*
12042e0f2478SDave Martin 	 * The SVE architecture mandates support for 128-bit vectors,
12052e0f2478SDave Martin 	 * so sve_vq_map must have at least SVE_VQ_MIN set.
12062e0f2478SDave Martin 	 * If something went wrong, at least try to patch it up:
12072e0f2478SDave Martin 	 */
1208b5bc00ffSMark Brown 	if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
1209b5bc00ffSMark Brown 		set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
12102e0f2478SDave Martin 
12112e0f2478SDave Martin 	zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
1212b5bc00ffSMark Brown 	info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
12132e0f2478SDave Martin 
12142e0f2478SDave Martin 	/*
12152e0f2478SDave Martin 	 * Sanity-check that the max VL we determined through CPU features
12162e0f2478SDave Martin 	 * corresponds properly to sve_vq_map.  If not, do our best:
12172e0f2478SDave Martin 	 */
1218b5bc00ffSMark Brown 	if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
1219b5bc00ffSMark Brown 								 info->max_vl)))
1220b5bc00ffSMark Brown 		info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
1221b5bc00ffSMark Brown 							    info->max_vl);
12222e0f2478SDave Martin 
12232e0f2478SDave Martin 	/*
12242e0f2478SDave Martin 	 * For the default VL, pick the maximum supported value <= 64.
12252e0f2478SDave Martin 	 * VL == 64 is guaranteed not to grow the signal frame.
12262e0f2478SDave Martin 	 */
1227b5bc00ffSMark Brown 	set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));
12282e0f2478SDave Martin 
1229b5bc00ffSMark Brown 	bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,
1230d06b76beSDave Martin 		      SVE_VQ_MAX);
1231d06b76beSDave Martin 
1232d06b76beSDave Martin 	b = find_last_bit(tmp_map, SVE_VQ_MAX);
1233d06b76beSDave Martin 	if (b >= SVE_VQ_MAX)
1234d06b76beSDave Martin 		/* No non-virtualisable VLs found */
1235b5bc00ffSMark Brown 		info->max_virtualisable_vl = SVE_VQ_MAX;
1236d06b76beSDave Martin 	else if (WARN_ON(b == SVE_VQ_MAX - 1))
1237d06b76beSDave Martin 		/* No virtualisable VLs?  This is architecturally forbidden. */
1238b5bc00ffSMark Brown 		info->max_virtualisable_vl = SVE_VQ_MIN;
1239d06b76beSDave Martin 	else /* b + 1 < SVE_VQ_MAX */
1240b5bc00ffSMark Brown 		info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
1241d06b76beSDave Martin 
1242b5bc00ffSMark Brown 	if (info->max_virtualisable_vl > info->max_vl)
1243b5bc00ffSMark Brown 		info->max_virtualisable_vl = info->max_vl;
1244d06b76beSDave Martin 
1245b5bc00ffSMark Brown 	pr_info("%s: maximum available vector length %u bytes per vector\n",
1246b5bc00ffSMark Brown 		info->name, info->max_vl);
1247b5bc00ffSMark Brown 	pr_info("%s: default vector length %u bytes per vector\n",
1248b5bc00ffSMark Brown 		info->name, get_sve_default_vl());
1249fdfa976cSDave Martin 
1250d06b76beSDave Martin 	/* KVM decides whether to support mismatched systems. Just warn here: */
1251b5bc00ffSMark Brown 	if (sve_max_virtualisable_vl() < sve_max_vl())
1252b5bc00ffSMark Brown 		pr_warn("%s: unvirtualisable vector lengths present\n",
1253b5bc00ffSMark Brown 			info->name);
1254d06b76beSDave Martin 
1255fdfa976cSDave Martin 	sve_efi_setup();
12562e0f2478SDave Martin }
12572e0f2478SDave Martin 
12582e0f2478SDave Martin /*
1259bc0ee476SDave Martin  * Called from the put_task_struct() path, which cannot get here
1260bc0ee476SDave Martin  * unless dead_task is really dead and not schedulable.
1261bc0ee476SDave Martin  */
fpsimd_release_task(struct task_struct * dead_task)1262bc0ee476SDave Martin void fpsimd_release_task(struct task_struct *dead_task)
1263bc0ee476SDave Martin {
1264bc0ee476SDave Martin 	__sve_free(dead_task);
12658bd7f91cSMark Brown 	sme_free(dead_task);
1266bc0ee476SDave Martin }
1267bc0ee476SDave Martin 
1268bc0ee476SDave Martin #endif /* CONFIG_ARM64_SVE */
1269bc0ee476SDave Martin 
12705e64b862SMark Brown #ifdef CONFIG_ARM64_SME
12715e64b862SMark Brown 
12728bd7f91cSMark Brown /*
1273ce514000SMark Brown  * Ensure that task->thread.sme_state is allocated and sufficiently large.
12748bd7f91cSMark Brown  *
12758bd7f91cSMark Brown  * This function should be used only in preparation for replacing
1276ce514000SMark Brown  * task->thread.sme_state with new data.  The memory is always zeroed
12778bd7f91cSMark Brown  * here to prevent stale data from showing through: this is done in
12788bd7f91cSMark Brown  * the interest of testability and predictability, the architecture
12798bd7f91cSMark Brown  * guarantees that when ZA is enabled it will be zeroed.
12808bd7f91cSMark Brown  */
sme_alloc(struct task_struct * task,bool flush)12815d0a8d2fSMark Brown void sme_alloc(struct task_struct *task, bool flush)
12828bd7f91cSMark Brown {
1283569156e4SMark Brown 	if (task->thread.sme_state) {
1284569156e4SMark Brown 		if (flush)
1285569156e4SMark Brown 			memset(task->thread.sme_state, 0,
1286569156e4SMark Brown 			       sme_state_size(task));
12878bd7f91cSMark Brown 		return;
12888bd7f91cSMark Brown 	}
12898bd7f91cSMark Brown 
12908bd7f91cSMark Brown 	/* This could potentially be up to 64K. */
1291ce514000SMark Brown 	task->thread.sme_state =
1292ce514000SMark Brown 		kzalloc(sme_state_size(task), GFP_KERNEL);
12938bd7f91cSMark Brown }
12948bd7f91cSMark Brown 
sme_free(struct task_struct * task)12958bd7f91cSMark Brown static void sme_free(struct task_struct *task)
12968bd7f91cSMark Brown {
1297ce514000SMark Brown 	kfree(task->thread.sme_state);
1298ce514000SMark Brown 	task->thread.sme_state = NULL;
12998bd7f91cSMark Brown }
13008bd7f91cSMark Brown 
sme_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)13015e64b862SMark Brown void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
13025e64b862SMark Brown {
13035e64b862SMark Brown 	/* Set priority for all PEs to architecturally defined minimum */
13045e64b862SMark Brown 	write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
13055e64b862SMark Brown 		       SYS_SMPRI_EL1);
13065e64b862SMark Brown 
13075e64b862SMark Brown 	/* Allow SME in kernel */
13085e64b862SMark Brown 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
13095e64b862SMark Brown 	isb();
1310a9d69158SMark Brown 
1311a9d69158SMark Brown 	/* Allow EL0 to access TPIDR2 */
1312a9d69158SMark Brown 	write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
1313a9d69158SMark Brown 	isb();
13145e64b862SMark Brown }
13155e64b862SMark Brown 
13165e64b862SMark Brown /*
13175e64b862SMark Brown  * This must be called after sme_kernel_enable(), we rely on the
13185e64b862SMark Brown  * feature table being sorted to ensure this.
13195e64b862SMark Brown  */
sme2_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)1320d4913eeeSMark Brown void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
1321d4913eeeSMark Brown {
1322d4913eeeSMark Brown 	/* Allow use of ZT0 */
1323d4913eeeSMark Brown 	write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
1324d4913eeeSMark Brown 		       SYS_SMCR_EL1);
1325d4913eeeSMark Brown }
1326d4913eeeSMark Brown 
1327d4913eeeSMark Brown /*
1328d4913eeeSMark Brown  * This must be called after sme_kernel_enable(), we rely on the
1329d4913eeeSMark Brown  * feature table being sorted to ensure this.
1330d4913eeeSMark Brown  */
fa64_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)13315e64b862SMark Brown void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
13325e64b862SMark Brown {
13335e64b862SMark Brown 	/* Allow use of FA64 */
13345e64b862SMark Brown 	write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
13355e64b862SMark Brown 		       SYS_SMCR_EL1);
13365e64b862SMark Brown }
13375e64b862SMark Brown 
1338b42990d3SMark Brown /*
1339b42990d3SMark Brown  * Read the pseudo-SMCR used by cpufeatures to identify the supported
1340b42990d3SMark Brown  * vector length.
1341b42990d3SMark Brown  *
1342b42990d3SMark Brown  * Use only if SME is present.
1343b42990d3SMark Brown  * This function clobbers the SME vector length.
1344b42990d3SMark Brown  */
read_smcr_features(void)1345b42990d3SMark Brown u64 read_smcr_features(void)
1346b42990d3SMark Brown {
1347b42990d3SMark Brown 	sme_kernel_enable(NULL);
1348b42990d3SMark Brown 
1349b42990d3SMark Brown 	/*
1350b42990d3SMark Brown 	 * Set the maximum possible VL.
1351b42990d3SMark Brown 	 */
1352b42990d3SMark Brown 	write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
1353b42990d3SMark Brown 		       SYS_SMCR_EL1);
1354b42990d3SMark Brown 
135501948b09SMark Brown 	/* Return LEN value that would be written to get the maximum VL */
135601948b09SMark Brown 	return sve_vq_from_vl(sme_get_vl()) - 1;
1357b42990d3SMark Brown }
1358b42990d3SMark Brown 
sme_setup(void)1359b42990d3SMark Brown void __init sme_setup(void)
1360b42990d3SMark Brown {
1361b42990d3SMark Brown 	struct vl_info *info = &vl_info[ARM64_VEC_SME];
1362b42990d3SMark Brown 	u64 smcr;
1363b42990d3SMark Brown 	int min_bit;
1364b42990d3SMark Brown 
1365b42990d3SMark Brown 	if (!system_supports_sme())
1366b42990d3SMark Brown 		return;
1367b42990d3SMark Brown 
1368b42990d3SMark Brown 	/*
1369b42990d3SMark Brown 	 * SME doesn't require any particular vector length be
1370b42990d3SMark Brown 	 * supported but it does require at least one.  We should have
1371b42990d3SMark Brown 	 * disabled the feature entirely while bringing up CPUs but
1372b42990d3SMark Brown 	 * let's double check here.
1373b42990d3SMark Brown 	 */
1374b42990d3SMark Brown 	WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
1375b42990d3SMark Brown 
1376b42990d3SMark Brown 	min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
1377b42990d3SMark Brown 	info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
1378b42990d3SMark Brown 
1379b42990d3SMark Brown 	smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
1380b42990d3SMark Brown 	info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
1381b42990d3SMark Brown 
1382b42990d3SMark Brown 	/*
1383b42990d3SMark Brown 	 * Sanity-check that the max VL we determined through CPU features
1384b42990d3SMark Brown 	 * corresponds properly to sme_vq_map.  If not, do our best:
1385b42990d3SMark Brown 	 */
1386b42990d3SMark Brown 	if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
1387b42990d3SMark Brown 								 info->max_vl)))
1388b42990d3SMark Brown 		info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
1389b42990d3SMark Brown 							    info->max_vl);
1390b42990d3SMark Brown 
1391b42990d3SMark Brown 	WARN_ON(info->min_vl > info->max_vl);
1392b42990d3SMark Brown 
1393b42990d3SMark Brown 	/*
1394b42990d3SMark Brown 	 * For the default VL, pick the maximum supported value <= 32
1395b42990d3SMark Brown 	 * (256 bits) if there is one since this is guaranteed not to
1396b42990d3SMark Brown 	 * grow the signal frame when in streaming mode, otherwise the
1397b42990d3SMark Brown 	 * minimum available VL will be used.
1398b42990d3SMark Brown 	 */
1399b42990d3SMark Brown 	set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
1400b42990d3SMark Brown 
1401b42990d3SMark Brown 	pr_info("SME: minimum available vector length %u bytes per vector\n",
1402b42990d3SMark Brown 		info->min_vl);
1403b42990d3SMark Brown 	pr_info("SME: maximum available vector length %u bytes per vector\n",
1404b42990d3SMark Brown 		info->max_vl);
1405b42990d3SMark Brown 	pr_info("SME: default vector length %u bytes per vector\n",
1406b42990d3SMark Brown 		get_sme_default_vl());
1407b42990d3SMark Brown }
1408b42990d3SMark Brown 
sme_suspend_exit(void)14097c892383SMark Brown void sme_suspend_exit(void)
14107c892383SMark Brown {
14117c892383SMark Brown 	u64 smcr = 0;
14127c892383SMark Brown 
14137c892383SMark Brown 	if (!system_supports_sme())
14147c892383SMark Brown 		return;
14157c892383SMark Brown 
14167c892383SMark Brown 	if (system_supports_fa64())
14177c892383SMark Brown 		smcr |= SMCR_ELx_FA64;
141879491ddfSMark Brown 	if (system_supports_sme2())
141979491ddfSMark Brown 		smcr |= SMCR_ELx_EZT0;
14207c892383SMark Brown 
14217c892383SMark Brown 	write_sysreg_s(smcr, SYS_SMCR_EL1);
14227c892383SMark Brown 	write_sysreg_s(0, SYS_SMPRI_EL1);
14237c892383SMark Brown }
14247c892383SMark Brown 
1425b42990d3SMark Brown #endif /* CONFIG_ARM64_SME */
14265e64b862SMark Brown 
sve_init_regs(void)14278bd7f91cSMark Brown static void sve_init_regs(void)
14288bd7f91cSMark Brown {
14298bd7f91cSMark Brown 	/*
14308bd7f91cSMark Brown 	 * Convert the FPSIMD state to SVE, zeroing all the state that
14318bd7f91cSMark Brown 	 * is not shared with FPSIMD. If (as is likely) the current
14328bd7f91cSMark Brown 	 * state is live in the registers then do this there and
14338bd7f91cSMark Brown 	 * update our metadata for the current task including
14348bd7f91cSMark Brown 	 * disabling the trap, otherwise update our in-memory copy.
14358bd7f91cSMark Brown 	 * We are guaranteed to not be in streaming mode, we can only
14368bd7f91cSMark Brown 	 * take a SVE trap when not in streaming mode and we can't be
14378bd7f91cSMark Brown 	 * in streaming mode when taking a SME trap.
14388bd7f91cSMark Brown 	 */
14398bd7f91cSMark Brown 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
14408bd7f91cSMark Brown 		unsigned long vq_minus_one =
14418bd7f91cSMark Brown 			sve_vq_from_vl(task_get_sve_vl(current)) - 1;
14428bd7f91cSMark Brown 		sve_set_vq(vq_minus_one);
14438bd7f91cSMark Brown 		sve_flush_live(true, vq_minus_one);
14448bd7f91cSMark Brown 		fpsimd_bind_task_to_cpu();
14458bd7f91cSMark Brown 	} else {
14468bd7f91cSMark Brown 		fpsimd_to_sve(current);
1447baa85152SMark Brown 		current->thread.fp_type = FP_STATE_SVE;
1448*51d11ea0SMark Brown 		fpsimd_flush_task_state(current);
14498bd7f91cSMark Brown 	}
14508bd7f91cSMark Brown }
14518bd7f91cSMark Brown 
1452bc0ee476SDave Martin /*
1453bc0ee476SDave Martin  * Trapped SVE access
1454bc0ee476SDave Martin  *
1455bc0ee476SDave Martin  * Storage is allocated for the full SVE state, the current FPSIMD
1456cccb78ceSMark Brown  * register contents are migrated across, and the access trap is
1457cccb78ceSMark Brown  * disabled.
1458bc0ee476SDave Martin  *
1459f186a84dSJulien Grall  * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
1460bc0ee476SDave Martin  * would have disabled the SVE access trap for userspace during
1461bc0ee476SDave Martin  * ret_to_user, making an SVE access trap impossible in that case.
1462bc0ee476SDave Martin  */
do_sve_acc(unsigned long esr,struct pt_regs * regs)14638d56e5c5SAlexandru Elisei void do_sve_acc(unsigned long esr, struct pt_regs *regs)
1464bc0ee476SDave Martin {
1465bc0ee476SDave Martin 	/* Even if we chose not to use SVE, the hardware could still trap: */
1466bc0ee476SDave Martin 	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
14674ef333b2SAmit Daniel Kachhap 		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1468bc0ee476SDave Martin 		return;
1469bc0ee476SDave Martin 	}
1470bc0ee476SDave Martin 
1471826a4fddSMark Brown 	sve_alloc(current, true);
14727559b7d7SMark Brown 	if (!current->thread.sve_state) {
14737559b7d7SMark Brown 		force_sig(SIGKILL);
14747559b7d7SMark Brown 		return;
14757559b7d7SMark Brown 	}
1476bc0ee476SDave Martin 
14776dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1478bc0ee476SDave Martin 
1479bc0ee476SDave Martin 	if (test_and_set_thread_flag(TIF_SVE))
1480bc0ee476SDave Martin 		WARN_ON(1); /* SVE access shouldn't have trapped */
1481bc0ee476SDave Martin 
1482cccb78ceSMark Brown 	/*
14838bd7f91cSMark Brown 	 * Even if the task can have used streaming mode we can only
14848bd7f91cSMark Brown 	 * generate SVE access traps in normal SVE mode and
14858bd7f91cSMark Brown 	 * transitioning out of streaming mode may discard any
14868bd7f91cSMark Brown 	 * streaming mode state.  Always clear the high bits to avoid
14878bd7f91cSMark Brown 	 * any potential errors tracking what is properly initialised.
1488cccb78ceSMark Brown 	 */
14898bd7f91cSMark Brown 	sve_init_regs();
14908bd7f91cSMark Brown 
14918bd7f91cSMark Brown 	put_cpu_fpsimd_context();
14928bd7f91cSMark Brown }
14938bd7f91cSMark Brown 
14948bd7f91cSMark Brown /*
14958bd7f91cSMark Brown  * Trapped SME access
14968bd7f91cSMark Brown  *
14978bd7f91cSMark Brown  * Storage is allocated for the full SVE and SME state, the current
14988bd7f91cSMark Brown  * FPSIMD register contents are migrated to SVE if SVE is not already
14998bd7f91cSMark Brown  * active, and the access trap is disabled.
15008bd7f91cSMark Brown  *
15018bd7f91cSMark Brown  * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
15028bd7f91cSMark Brown  * would have disabled the SME access trap for userspace during
150397b5576bSDongxu Sun  * ret_to_user, making an SME access trap impossible in that case.
15048bd7f91cSMark Brown  */
do_sme_acc(unsigned long esr,struct pt_regs * regs)15050616ea3fSCatalin Marinas void do_sme_acc(unsigned long esr, struct pt_regs *regs)
15068bd7f91cSMark Brown {
15078bd7f91cSMark Brown 	/* Even if we chose not to use SME, the hardware could still trap: */
15088bd7f91cSMark Brown 	if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
15098bd7f91cSMark Brown 		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
15108bd7f91cSMark Brown 		return;
15118bd7f91cSMark Brown 	}
15128bd7f91cSMark Brown 
15138bd7f91cSMark Brown 	/*
15148bd7f91cSMark Brown 	 * If this not a trap due to SME being disabled then something
15158bd7f91cSMark Brown 	 * is being used in the wrong mode, report as SIGILL.
15168bd7f91cSMark Brown 	 */
15178bd7f91cSMark Brown 	if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
15188bd7f91cSMark Brown 		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
15198bd7f91cSMark Brown 		return;
15208bd7f91cSMark Brown 	}
15218bd7f91cSMark Brown 
1522826a4fddSMark Brown 	sve_alloc(current, false);
15235d0a8d2fSMark Brown 	sme_alloc(current, true);
1524ce514000SMark Brown 	if (!current->thread.sve_state || !current->thread.sme_state) {
15258bd7f91cSMark Brown 		force_sig(SIGKILL);
15268bd7f91cSMark Brown 		return;
15278bd7f91cSMark Brown 	}
15288bd7f91cSMark Brown 
15298bd7f91cSMark Brown 	get_cpu_fpsimd_context();
15308bd7f91cSMark Brown 
15318bd7f91cSMark Brown 	/* With TIF_SME userspace shouldn't generate any traps */
15328bd7f91cSMark Brown 	if (test_and_set_thread_flag(TIF_SME))
15338bd7f91cSMark Brown 		WARN_ON(1);
15348bd7f91cSMark Brown 
1535cccb78ceSMark Brown 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1536ad4711f9SMark Brown 		unsigned long vq_minus_one =
15378bd7f91cSMark Brown 			sve_vq_from_vl(task_get_sme_vl(current)) - 1;
15388bd7f91cSMark Brown 		sme_set_vq(vq_minus_one);
15398bd7f91cSMark Brown 
1540cccb78ceSMark Brown 		fpsimd_bind_task_to_cpu();
1541cccb78ceSMark Brown 	}
1542cccb78ceSMark Brown 
15436dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
1544bc0ee476SDave Martin }
1545bc0ee476SDave Martin 
1546005f78cdSArd Biesheuvel /*
154753631b54SCatalin Marinas  * Trapped FP/ASIMD access.
154853631b54SCatalin Marinas  */
do_fpsimd_acc(unsigned long esr,struct pt_regs * regs)15498d56e5c5SAlexandru Elisei void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
155053631b54SCatalin Marinas {
155153631b54SCatalin Marinas 	/* TODO: implement lazy context saving/restoring */
155253631b54SCatalin Marinas 	WARN_ON(1);
155353631b54SCatalin Marinas }
155453631b54SCatalin Marinas 
155553631b54SCatalin Marinas /*
155653631b54SCatalin Marinas  * Raise a SIGFPE for the current process.
155753631b54SCatalin Marinas  */
do_fpsimd_exc(unsigned long esr,struct pt_regs * regs)15588d56e5c5SAlexandru Elisei void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
155953631b54SCatalin Marinas {
1560af4a81b9SDave Martin 	unsigned int si_code = FPE_FLTUNK;
156153631b54SCatalin Marinas 
1562af4a81b9SDave Martin 	if (esr & ESR_ELx_FP_EXC_TFV) {
156353631b54SCatalin Marinas 		if (esr & FPEXC_IOF)
156453631b54SCatalin Marinas 			si_code = FPE_FLTINV;
156553631b54SCatalin Marinas 		else if (esr & FPEXC_DZF)
156653631b54SCatalin Marinas 			si_code = FPE_FLTDIV;
156753631b54SCatalin Marinas 		else if (esr & FPEXC_OFF)
156853631b54SCatalin Marinas 			si_code = FPE_FLTOVF;
156953631b54SCatalin Marinas 		else if (esr & FPEXC_UFF)
157053631b54SCatalin Marinas 			si_code = FPE_FLTUND;
157153631b54SCatalin Marinas 		else if (esr & FPEXC_IXF)
157253631b54SCatalin Marinas 			si_code = FPE_FLTRES;
1573af4a81b9SDave Martin 	}
157453631b54SCatalin Marinas 
1575c8526809SEric W. Biederman 	send_sig_fault(SIGFPE, si_code,
1576c8526809SEric W. Biederman 		       (void __user *)instruction_pointer(regs),
1577c8526809SEric W. Biederman 		       current);
157853631b54SCatalin Marinas }
157953631b54SCatalin Marinas 
fpsimd_thread_switch(struct task_struct * next)158053631b54SCatalin Marinas void fpsimd_thread_switch(struct task_struct *next)
158153631b54SCatalin Marinas {
1582df3fb968SDave Martin 	bool wrong_task, wrong_cpu;
1583df3fb968SDave Martin 
158482e0191aSSuzuki K Poulose 	if (!system_supports_fpsimd())
158582e0191aSSuzuki K Poulose 		return;
1586005f78cdSArd Biesheuvel 
15876dcdefcdSJulien Grall 	__get_cpu_fpsimd_context();
15886dcdefcdSJulien Grall 
1589df3fb968SDave Martin 	/* Save unsaved fpsimd state, if any: */
1590d1797615SDave Martin 	fpsimd_save();
1591005f78cdSArd Biesheuvel 
1592005f78cdSArd Biesheuvel 	/*
1593df3fb968SDave Martin 	 * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
1594df3fb968SDave Martin 	 * state.  For kernel threads, FPSIMD registers are never loaded
1595df3fb968SDave Martin 	 * and wrong_task and wrong_cpu will always be true.
1596005f78cdSArd Biesheuvel 	 */
1597df3fb968SDave Martin 	wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
159809d1223aSDave Martin 					&next->thread.uw.fpsimd_state;
1599df3fb968SDave Martin 	wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
160009d1223aSDave Martin 
160109d1223aSDave Martin 	update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
160209d1223aSDave Martin 			       wrong_task || wrong_cpu);
16036dcdefcdSJulien Grall 
16046dcdefcdSJulien Grall 	__put_cpu_fpsimd_context();
160553631b54SCatalin Marinas }
160653631b54SCatalin Marinas 
fpsimd_flush_thread_vl(enum vec_type type)16075838a155SMark Brown static void fpsimd_flush_thread_vl(enum vec_type type)
160853631b54SCatalin Marinas {
16097582e220SDave Martin 	int vl, supported_vl;
1610bc0ee476SDave Martin 
16115838a155SMark Brown 	/*
16125838a155SMark Brown 	 * Reset the task vector length as required.  This is where we
16135838a155SMark Brown 	 * ensure that all user tasks have a valid vector length
16145838a155SMark Brown 	 * configured: no kernel task can become a user task without
16155838a155SMark Brown 	 * an exec and hence a call to this function.  By the time the
16165838a155SMark Brown 	 * first call to this function is made, all early hardware
16175838a155SMark Brown 	 * probing is complete, so __sve_default_vl should be valid.
16185838a155SMark Brown 	 * If a bug causes this to go wrong, we make some noise and
16195838a155SMark Brown 	 * try to fudge thread.sve_vl to a safe value here.
16205838a155SMark Brown 	 */
16215838a155SMark Brown 	vl = task_get_vl_onexec(current, type);
16225838a155SMark Brown 	if (!vl)
16235838a155SMark Brown 		vl = get_default_vl(type);
16245838a155SMark Brown 
16255838a155SMark Brown 	if (WARN_ON(!sve_vl_valid(vl)))
162697bcbee4SMark Brown 		vl = vl_info[type].min_vl;
16275838a155SMark Brown 
16285838a155SMark Brown 	supported_vl = find_supported_vector_length(type, vl);
16295838a155SMark Brown 	if (WARN_ON(supported_vl != vl))
16305838a155SMark Brown 		vl = supported_vl;
16315838a155SMark Brown 
16325838a155SMark Brown 	task_set_vl(current, type, vl);
16335838a155SMark Brown 
16345838a155SMark Brown 	/*
16355838a155SMark Brown 	 * If the task is not set to inherit, ensure that the vector
16365838a155SMark Brown 	 * length will be reset by a subsequent exec:
16375838a155SMark Brown 	 */
16385838a155SMark Brown 	if (!test_thread_flag(vec_vl_inherit_flag(type)))
16395838a155SMark Brown 		task_set_vl_onexec(current, type, 0);
16405838a155SMark Brown }
16415838a155SMark Brown 
fpsimd_flush_thread(void)16425838a155SMark Brown void fpsimd_flush_thread(void)
16435838a155SMark Brown {
1644a1259dd8SSebastian Andrzej Siewior 	void *sve_state = NULL;
1645ce514000SMark Brown 	void *sme_state = NULL;
1646a1259dd8SSebastian Andrzej Siewior 
164782e0191aSSuzuki K Poulose 	if (!system_supports_fpsimd())
164882e0191aSSuzuki K Poulose 		return;
1649cb84d11eSDave Martin 
16506dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1651cb84d11eSDave Martin 
1652efbc2024SDave Martin 	fpsimd_flush_task_state(current);
165365896545SDave Martin 	memset(&current->thread.uw.fpsimd_state, 0,
165465896545SDave Martin 	       sizeof(current->thread.uw.fpsimd_state));
1655bc0ee476SDave Martin 
1656bc0ee476SDave Martin 	if (system_supports_sve()) {
1657bc0ee476SDave Martin 		clear_thread_flag(TIF_SVE);
1658a1259dd8SSebastian Andrzej Siewior 
1659a1259dd8SSebastian Andrzej Siewior 		/* Defer kfree() while in atomic context */
1660a1259dd8SSebastian Andrzej Siewior 		sve_state = current->thread.sve_state;
1661a1259dd8SSebastian Andrzej Siewior 		current->thread.sve_state = NULL;
1662a1259dd8SSebastian Andrzej Siewior 
16635838a155SMark Brown 		fpsimd_flush_thread_vl(ARM64_VEC_SVE);
1664bc0ee476SDave Martin 	}
1665bc0ee476SDave Martin 
16668bd7f91cSMark Brown 	if (system_supports_sme()) {
16678bd7f91cSMark Brown 		clear_thread_flag(TIF_SME);
1668a1259dd8SSebastian Andrzej Siewior 
1669a1259dd8SSebastian Andrzej Siewior 		/* Defer kfree() while in atomic context */
1670ce514000SMark Brown 		sme_state = current->thread.sme_state;
1671ce514000SMark Brown 		current->thread.sme_state = NULL;
1672a1259dd8SSebastian Andrzej Siewior 
1673af7167d6SMark Brown 		fpsimd_flush_thread_vl(ARM64_VEC_SME);
16748bd7f91cSMark Brown 		current->thread.svcr = 0;
16758bd7f91cSMark Brown 	}
1676af7167d6SMark Brown 
1677baa85152SMark Brown 	current->thread.fp_type = FP_STATE_FPSIMD;
1678baa85152SMark Brown 
16796dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
1680a1259dd8SSebastian Andrzej Siewior 	kfree(sve_state);
1681ce514000SMark Brown 	kfree(sme_state);
168253631b54SCatalin Marinas }
168353631b54SCatalin Marinas 
1684c51f9269SArd Biesheuvel /*
1685005f78cdSArd Biesheuvel  * Save the userland FPSIMD state of 'current' to memory, but only if the state
1686005f78cdSArd Biesheuvel  * currently held in the registers does in fact belong to 'current'
1687c51f9269SArd Biesheuvel  */
fpsimd_preserve_current_state(void)1688c51f9269SArd Biesheuvel void fpsimd_preserve_current_state(void)
1689c51f9269SArd Biesheuvel {
169082e0191aSSuzuki K Poulose 	if (!system_supports_fpsimd())
169182e0191aSSuzuki K Poulose 		return;
1692cb84d11eSDave Martin 
16936dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1694d1797615SDave Martin 	fpsimd_save();
16956dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
1696c51f9269SArd Biesheuvel }
1697c51f9269SArd Biesheuvel 
1698c51f9269SArd Biesheuvel /*
16998cd969d2SDave Martin  * Like fpsimd_preserve_current_state(), but ensure that
170065896545SDave Martin  * current->thread.uw.fpsimd_state is updated so that it can be copied to
17018cd969d2SDave Martin  * the signal frame.
17028cd969d2SDave Martin  */
fpsimd_signal_preserve_current_state(void)17038cd969d2SDave Martin void fpsimd_signal_preserve_current_state(void)
17048cd969d2SDave Martin {
17058cd969d2SDave Martin 	fpsimd_preserve_current_state();
170660480c6bSMark Brown 	if (current->thread.fp_type == FP_STATE_SVE)
17078cd969d2SDave Martin 		sve_to_fpsimd(current);
17088cd969d2SDave Martin }
17098cd969d2SDave Martin 
17108cd969d2SDave Martin /*
171193ae6b01SMark Brown  * Called by KVM when entering the guest.
171293ae6b01SMark Brown  */
fpsimd_kvm_prepare(void)171393ae6b01SMark Brown void fpsimd_kvm_prepare(void)
171493ae6b01SMark Brown {
171593ae6b01SMark Brown 	if (!system_supports_sve())
171693ae6b01SMark Brown 		return;
171793ae6b01SMark Brown 
171893ae6b01SMark Brown 	/*
171993ae6b01SMark Brown 	 * KVM does not save host SVE state since we can only enter
172093ae6b01SMark Brown 	 * the guest from a syscall so the ABI means that only the
172193ae6b01SMark Brown 	 * non-saved SVE state needs to be saved.  If we have left
172293ae6b01SMark Brown 	 * SVE enabled for performance reasons then update the task
172393ae6b01SMark Brown 	 * state to be FPSIMD only.
172493ae6b01SMark Brown 	 */
172593ae6b01SMark Brown 	get_cpu_fpsimd_context();
172693ae6b01SMark Brown 
1727baa85152SMark Brown 	if (test_and_clear_thread_flag(TIF_SVE)) {
172893ae6b01SMark Brown 		sve_to_fpsimd(current);
1729baa85152SMark Brown 		current->thread.fp_type = FP_STATE_FPSIMD;
1730baa85152SMark Brown 	}
173193ae6b01SMark Brown 
173293ae6b01SMark Brown 	put_cpu_fpsimd_context();
173393ae6b01SMark Brown }
173493ae6b01SMark Brown 
173593ae6b01SMark Brown /*
17368884b7bdSDave Martin  * Associate current's FPSIMD context with this cpu
17376dcdefcdSJulien Grall  * The caller must have ownership of the cpu FPSIMD context before calling
17386dcdefcdSJulien Grall  * this function.
17398884b7bdSDave Martin  */
fpsimd_bind_task_to_cpu(void)1740b24b5205SMark Brown static void fpsimd_bind_task_to_cpu(void)
17418884b7bdSDave Martin {
17421192b93bSMark Brown 	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
17438884b7bdSDave Martin 
174452f73c38SSuzuki K Poulose 	WARN_ON(!system_supports_fpsimd());
174565896545SDave Martin 	last->st = &current->thread.uw.fpsimd_state;
174604950674SDave Martin 	last->sve_state = current->thread.sve_state;
1747ce514000SMark Brown 	last->sme_state = current->thread.sme_state;
17480423eedcSMark Brown 	last->sve_vl = task_get_sve_vl(current);
1749af7167d6SMark Brown 	last->sme_vl = task_get_sme_vl(current);
1750b40c559bSMark Brown 	last->svcr = &current->thread.svcr;
1751baa85152SMark Brown 	last->fp_type = &current->thread.fp_type;
1752deeb8f9aSMark Brown 	last->to_save = FP_STATE_CURRENT;
175320b85472SDave Martin 	current->thread.fpsimd_cpu = smp_processor_id();
17540cff8e77SDave Martin 
17558bd7f91cSMark Brown 	/*
17568bd7f91cSMark Brown 	 * Toggle SVE and SME trapping for userspace if needed, these
17578bd7f91cSMark Brown 	 * are serialsied by ret_to_user().
17588bd7f91cSMark Brown 	 */
17598bd7f91cSMark Brown 	if (system_supports_sme()) {
17608bd7f91cSMark Brown 		if (test_thread_flag(TIF_SME))
17618bd7f91cSMark Brown 			sme_user_enable();
17628bd7f91cSMark Brown 		else
17638bd7f91cSMark Brown 			sme_user_disable();
17648bd7f91cSMark Brown 	}
17658bd7f91cSMark Brown 
17660cff8e77SDave Martin 	if (system_supports_sve()) {
17670cff8e77SDave Martin 		if (test_thread_flag(TIF_SVE))
17680cff8e77SDave Martin 			sve_user_enable();
17690cff8e77SDave Martin 		else
17700cff8e77SDave Martin 			sve_user_disable();
17710cff8e77SDave Martin 	}
17728884b7bdSDave Martin }
17738884b7bdSDave Martin 
fpsimd_bind_state_to_cpu(struct cpu_fp_state * state)17741192b93bSMark Brown void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
1775e6b673b7SDave Martin {
17761192b93bSMark Brown 	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
1777e6b673b7SDave Martin 
177852f73c38SSuzuki K Poulose 	WARN_ON(!system_supports_fpsimd());
1779e6b673b7SDave Martin 	WARN_ON(!in_softirq() && !irqs_disabled());
1780e6b673b7SDave Martin 
17811192b93bSMark Brown 	*last = *state;
17828884b7bdSDave Martin }
17838884b7bdSDave Martin 
17848884b7bdSDave Martin /*
1785005f78cdSArd Biesheuvel  * Load the userland FPSIMD state of 'current' from memory, but only if the
1786005f78cdSArd Biesheuvel  * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
178712b792e5SMark Brown  * state of 'current'.  This is called when we are preparing to return to
178812b792e5SMark Brown  * userspace to ensure that userspace sees a good register state.
1789005f78cdSArd Biesheuvel  */
fpsimd_restore_current_state(void)1790005f78cdSArd Biesheuvel void fpsimd_restore_current_state(void)
1791005f78cdSArd Biesheuvel {
179252f73c38SSuzuki K Poulose 	/*
179352f73c38SSuzuki K Poulose 	 * For the tasks that were created before we detected the absence of
179452f73c38SSuzuki K Poulose 	 * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
179552f73c38SSuzuki K Poulose 	 * e.g, init. This could be then inherited by the children processes.
179652f73c38SSuzuki K Poulose 	 * If we later detect that the system doesn't support FP/SIMD,
179752f73c38SSuzuki K Poulose 	 * we must clear the flag for  all the tasks to indicate that the
179852f73c38SSuzuki K Poulose 	 * FPSTATE is clean (as we can't have one) to avoid looping for ever in
179952f73c38SSuzuki K Poulose 	 * do_notify_resume().
180052f73c38SSuzuki K Poulose 	 */
180152f73c38SSuzuki K Poulose 	if (!system_supports_fpsimd()) {
180252f73c38SSuzuki K Poulose 		clear_thread_flag(TIF_FOREIGN_FPSTATE);
180382e0191aSSuzuki K Poulose 		return;
180452f73c38SSuzuki K Poulose 	}
1805cb84d11eSDave Martin 
18066dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1807cb84d11eSDave Martin 
1808005f78cdSArd Biesheuvel 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1809bc0ee476SDave Martin 		task_fpsimd_load();
18100cff8e77SDave Martin 		fpsimd_bind_task_to_cpu();
1811005f78cdSArd Biesheuvel 	}
1812cb84d11eSDave Martin 
18136dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
1814005f78cdSArd Biesheuvel }
1815005f78cdSArd Biesheuvel 
1816005f78cdSArd Biesheuvel /*
1817005f78cdSArd Biesheuvel  * Load an updated userland FPSIMD state for 'current' from memory and set the
1818005f78cdSArd Biesheuvel  * flag that indicates that the FPSIMD register contents are the most recent
181912b792e5SMark Brown  * FPSIMD state of 'current'. This is used by the signal code to restore the
182012b792e5SMark Brown  * register state when returning from a signal handler in FPSIMD only cases,
182112b792e5SMark Brown  * any SVE context will be discarded.
1822c51f9269SArd Biesheuvel  */
fpsimd_update_current_state(struct user_fpsimd_state const * state)18230abdeff5SDave Martin void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1824c51f9269SArd Biesheuvel {
182552f73c38SSuzuki K Poulose 	if (WARN_ON(!system_supports_fpsimd()))
182682e0191aSSuzuki K Poulose 		return;
1827cb84d11eSDave Martin 
18286dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1829cb84d11eSDave Martin 
183065896545SDave Martin 	current->thread.uw.fpsimd_state = *state;
1831ef9c5d09SMark Brown 	if (test_thread_flag(TIF_SVE))
18328cd969d2SDave Martin 		fpsimd_to_sve(current);
18339de52a75SDave Martin 
18348cd969d2SDave Martin 	task_fpsimd_load();
18350cff8e77SDave Martin 	fpsimd_bind_task_to_cpu();
18368cd969d2SDave Martin 
18370cff8e77SDave Martin 	clear_thread_flag(TIF_FOREIGN_FPSTATE);
1838cb84d11eSDave Martin 
18396dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
1840c51f9269SArd Biesheuvel }
1841c51f9269SArd Biesheuvel 
1842005f78cdSArd Biesheuvel /*
1843005f78cdSArd Biesheuvel  * Invalidate live CPU copies of task t's FPSIMD state
1844efbc2024SDave Martin  *
1845efbc2024SDave Martin  * This function may be called with preemption enabled.  The barrier()
1846efbc2024SDave Martin  * ensures that the assignment to fpsimd_cpu is visible to any
1847efbc2024SDave Martin  * preemption/softirq that could race with set_tsk_thread_flag(), so
1848efbc2024SDave Martin  * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
1849efbc2024SDave Martin  *
1850efbc2024SDave Martin  * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
1851efbc2024SDave Martin  * subsequent code.
1852005f78cdSArd Biesheuvel  */
fpsimd_flush_task_state(struct task_struct * t)1853005f78cdSArd Biesheuvel void fpsimd_flush_task_state(struct task_struct *t)
1854005f78cdSArd Biesheuvel {
185520b85472SDave Martin 	t->thread.fpsimd_cpu = NR_CPUS;
185652f73c38SSuzuki K Poulose 	/*
185752f73c38SSuzuki K Poulose 	 * If we don't support fpsimd, bail out after we have
185852f73c38SSuzuki K Poulose 	 * reset the fpsimd_cpu for this task and clear the
185952f73c38SSuzuki K Poulose 	 * FPSTATE.
186052f73c38SSuzuki K Poulose 	 */
186152f73c38SSuzuki K Poulose 	if (!system_supports_fpsimd())
186252f73c38SSuzuki K Poulose 		return;
1863efbc2024SDave Martin 	barrier();
1864efbc2024SDave Martin 	set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
1865efbc2024SDave Martin 
1866efbc2024SDave Martin 	barrier();
1867005f78cdSArd Biesheuvel }
1868005f78cdSArd Biesheuvel 
1869efbc2024SDave Martin /*
1870efbc2024SDave Martin  * Invalidate any task's FPSIMD state that is present on this cpu.
18716dcdefcdSJulien Grall  * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
18726dcdefcdSJulien Grall  * before calling this function.
1873efbc2024SDave Martin  */
fpsimd_flush_cpu_state(void)187454b8c7cbSJulien Grall static void fpsimd_flush_cpu_state(void)
187517eed27bSDave Martin {
187652f73c38SSuzuki K Poulose 	WARN_ON(!system_supports_fpsimd());
1877cb968afcSDave Martin 	__this_cpu_write(fpsimd_last_state.st, NULL);
1878d45d7ff7SMark Brown 
1879d45d7ff7SMark Brown 	/*
1880d45d7ff7SMark Brown 	 * Leaving streaming mode enabled will cause issues for any kernel
1881d45d7ff7SMark Brown 	 * NEON and leaving streaming mode or ZA enabled may increase power
1882d45d7ff7SMark Brown 	 * consumption.
1883d45d7ff7SMark Brown 	 */
1884d45d7ff7SMark Brown 	if (system_supports_sme())
1885d45d7ff7SMark Brown 		sme_smstop();
1886d45d7ff7SMark Brown 
1887d8ad71faSDave Martin 	set_thread_flag(TIF_FOREIGN_FPSTATE);
188817eed27bSDave Martin }
188917eed27bSDave Martin 
189054b8c7cbSJulien Grall /*
189154b8c7cbSJulien Grall  * Save the FPSIMD state to memory and invalidate cpu view.
18926dcdefcdSJulien Grall  * This function must be called with preemption disabled.
189354b8c7cbSJulien Grall  */
fpsimd_save_and_flush_cpu_state(void)189454b8c7cbSJulien Grall void fpsimd_save_and_flush_cpu_state(void)
189554b8c7cbSJulien Grall {
189652f73c38SSuzuki K Poulose 	if (!system_supports_fpsimd())
189752f73c38SSuzuki K Poulose 		return;
18986dcdefcdSJulien Grall 	WARN_ON(preemptible());
18996dcdefcdSJulien Grall 	__get_cpu_fpsimd_context();
190054b8c7cbSJulien Grall 	fpsimd_save();
190154b8c7cbSJulien Grall 	fpsimd_flush_cpu_state();
19026dcdefcdSJulien Grall 	__put_cpu_fpsimd_context();
190354b8c7cbSJulien Grall }
19044cfb3613SArd Biesheuvel 
19054cfb3613SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
1906190f1ca8SArd Biesheuvel 
19074cfb3613SArd Biesheuvel /*
19084cfb3613SArd Biesheuvel  * Kernel-side NEON support functions
19094cfb3613SArd Biesheuvel  */
1910cb84d11eSDave Martin 
1911cb84d11eSDave Martin /*
1912cb84d11eSDave Martin  * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
1913cb84d11eSDave Martin  * context
1914cb84d11eSDave Martin  *
1915cb84d11eSDave Martin  * Must not be called unless may_use_simd() returns true.
1916cb84d11eSDave Martin  * Task context in the FPSIMD registers is saved back to memory as necessary.
1917cb84d11eSDave Martin  *
1918cb84d11eSDave Martin  * A matching call to kernel_neon_end() must be made before returning from the
1919cb84d11eSDave Martin  * calling context.
1920cb84d11eSDave Martin  *
1921cb84d11eSDave Martin  * The caller may freely use the FPSIMD registers until kernel_neon_end() is
1922cb84d11eSDave Martin  * called.
1923cb84d11eSDave Martin  */
kernel_neon_begin(void)1924cb84d11eSDave Martin void kernel_neon_begin(void)
19254cfb3613SArd Biesheuvel {
192682e0191aSSuzuki K Poulose 	if (WARN_ON(!system_supports_fpsimd()))
192782e0191aSSuzuki K Poulose 		return;
19284cfb3613SArd Biesheuvel 
1929cb84d11eSDave Martin 	BUG_ON(!may_use_simd());
1930cb84d11eSDave Martin 
19316dcdefcdSJulien Grall 	get_cpu_fpsimd_context();
1932cb84d11eSDave Martin 
1933df3fb968SDave Martin 	/* Save unsaved fpsimd state, if any: */
1934d1797615SDave Martin 	fpsimd_save();
19354cfb3613SArd Biesheuvel 
1936cb84d11eSDave Martin 	/* Invalidate any task state remaining in the fpsimd regs: */
193717eed27bSDave Martin 	fpsimd_flush_cpu_state();
1938cb84d11eSDave Martin }
1939aaeca984SMark Brown EXPORT_SYMBOL_GPL(kernel_neon_begin);
1940cb84d11eSDave Martin 
1941cb84d11eSDave Martin /*
1942cb84d11eSDave Martin  * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
1943cb84d11eSDave Martin  *
1944cb84d11eSDave Martin  * Must be called from a context in which kernel_neon_begin() was previously
1945cb84d11eSDave Martin  * called, with no call to kernel_neon_end() in the meantime.
1946cb84d11eSDave Martin  *
1947cb84d11eSDave Martin  * The caller must not use the FPSIMD registers after this function is called,
1948cb84d11eSDave Martin  * unless kernel_neon_begin() is called again in the meantime.
1949cb84d11eSDave Martin  */
kernel_neon_end(void)19504cfb3613SArd Biesheuvel void kernel_neon_end(void)
19514cfb3613SArd Biesheuvel {
195282e0191aSSuzuki K Poulose 	if (!system_supports_fpsimd())
195382e0191aSSuzuki K Poulose 		return;
1954cb84d11eSDave Martin 
19556dcdefcdSJulien Grall 	put_cpu_fpsimd_context();
19564cfb3613SArd Biesheuvel }
1957aaeca984SMark Brown EXPORT_SYMBOL_GPL(kernel_neon_end);
19584cfb3613SArd Biesheuvel 
1959e580b8bcSDave Martin #ifdef CONFIG_EFI
1960e580b8bcSDave Martin 
196120b85472SDave Martin static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
19623b66023dSDave Martin static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1963fdfa976cSDave Martin static DEFINE_PER_CPU(bool, efi_sve_state_used);
1964e0838f63SMark Brown static DEFINE_PER_CPU(bool, efi_sm_state);
19654328825dSDave Martin 
19664328825dSDave Martin /*
19674328825dSDave Martin  * EFI runtime services support functions
19684328825dSDave Martin  *
19694328825dSDave Martin  * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
19704328825dSDave Martin  * This means that for EFI (and only for EFI), we have to assume that FPSIMD
19714328825dSDave Martin  * is always used rather than being an optional accelerator.
19724328825dSDave Martin  *
19734328825dSDave Martin  * These functions provide the necessary support for ensuring FPSIMD
19744328825dSDave Martin  * save/restore in the contexts from which EFI is used.
19754328825dSDave Martin  *
19764328825dSDave Martin  * Do not use them for any other purpose -- if tempted to do so, you are
19774328825dSDave Martin  * either doing something wrong or you need to propose some refactoring.
19784328825dSDave Martin  */
19794328825dSDave Martin 
19804328825dSDave Martin /*
19814328825dSDave Martin  * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
19824328825dSDave Martin  */
__efi_fpsimd_begin(void)19834328825dSDave Martin void __efi_fpsimd_begin(void)
19844328825dSDave Martin {
19854328825dSDave Martin 	if (!system_supports_fpsimd())
19864328825dSDave Martin 		return;
19874328825dSDave Martin 
19884328825dSDave Martin 	WARN_ON(preemptible());
19894328825dSDave Martin 
1990fdfa976cSDave Martin 	if (may_use_simd()) {
19914328825dSDave Martin 		kernel_neon_begin();
1992fdfa976cSDave Martin 	} else {
1993fdfa976cSDave Martin 		/*
1994fdfa976cSDave Martin 		 * If !efi_sve_state, SVE can't be in use yet and doesn't need
1995fdfa976cSDave Martin 		 * preserving:
1996fdfa976cSDave Martin 		 */
1997fdfa976cSDave Martin 		if (system_supports_sve() && likely(efi_sve_state)) {
1998fdfa976cSDave Martin 			char *sve_state = this_cpu_ptr(efi_sve_state);
1999e0838f63SMark Brown 			bool ffr = true;
2000e0838f63SMark Brown 			u64 svcr;
2001fdfa976cSDave Martin 
2002fdfa976cSDave Martin 			__this_cpu_write(efi_sve_state_used, true);
2003fdfa976cSDave Martin 
2004e0838f63SMark Brown 			if (system_supports_sme()) {
2005ec0067a6SMark Brown 				svcr = read_sysreg_s(SYS_SVCR);
2006e0838f63SMark Brown 
20072e990e63SMark Brown 				__this_cpu_write(efi_sm_state,
20082e990e63SMark Brown 						 svcr & SVCR_SM_MASK);
2009e0838f63SMark Brown 
20102e990e63SMark Brown 				/*
20112e990e63SMark Brown 				 * Unless we have FA64 FFR does not
20122e990e63SMark Brown 				 * exist in streaming mode.
20132e990e63SMark Brown 				 */
20142e990e63SMark Brown 				if (!system_supports_fa64())
20152e990e63SMark Brown 					ffr = !(svcr & SVCR_SM_MASK);
2016e0838f63SMark Brown 			}
2017e0838f63SMark Brown 
2018b5bc00ffSMark Brown 			sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
20199f584866SMark Brown 				       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
2020e0838f63SMark Brown 				       ffr);
2021e0838f63SMark Brown 
2022e0838f63SMark Brown 			if (system_supports_sme())
2023ec0067a6SMark Brown 				sysreg_clear_set_s(SYS_SVCR,
2024ec0067a6SMark Brown 						   SVCR_SM_MASK, 0);
2025e0838f63SMark Brown 
2026fdfa976cSDave Martin 		} else {
20274328825dSDave Martin 			fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
2028fdfa976cSDave Martin 		}
2029fdfa976cSDave Martin 
20304328825dSDave Martin 		__this_cpu_write(efi_fpsimd_state_used, true);
20314328825dSDave Martin 	}
20324328825dSDave Martin }
20334328825dSDave Martin 
20344328825dSDave Martin /*
20354328825dSDave Martin  * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
20364328825dSDave Martin  */
__efi_fpsimd_end(void)20374328825dSDave Martin void __efi_fpsimd_end(void)
20384328825dSDave Martin {
20394328825dSDave Martin 	if (!system_supports_fpsimd())
20404328825dSDave Martin 		return;
20414328825dSDave Martin 
2042fdfa976cSDave Martin 	if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
20434328825dSDave Martin 		kernel_neon_end();
2044fdfa976cSDave Martin 	} else {
2045fdfa976cSDave Martin 		if (system_supports_sve() &&
2046fdfa976cSDave Martin 		    likely(__this_cpu_read(efi_sve_state_used))) {
2047fdfa976cSDave Martin 			char const *sve_state = this_cpu_ptr(efi_sve_state);
2048e0838f63SMark Brown 			bool ffr = true;
2049fdfa976cSDave Martin 
2050e0838f63SMark Brown 			/*
2051e0838f63SMark Brown 			 * Restore streaming mode; EFI calls are
2052e0838f63SMark Brown 			 * normal function calls so should not return in
2053e0838f63SMark Brown 			 * streaming mode.
2054e0838f63SMark Brown 			 */
2055e0838f63SMark Brown 			if (system_supports_sme()) {
2056e0838f63SMark Brown 				if (__this_cpu_read(efi_sm_state)) {
2057ec0067a6SMark Brown 					sysreg_clear_set_s(SYS_SVCR,
2058e0838f63SMark Brown 							   0,
2059ec0067a6SMark Brown 							   SVCR_SM_MASK);
20602e990e63SMark Brown 
20612e990e63SMark Brown 					/*
20622e990e63SMark Brown 					 * Unless we have FA64 FFR does not
20632e990e63SMark Brown 					 * exist in streaming mode.
20642e990e63SMark Brown 					 */
2065e0838f63SMark Brown 					if (!system_supports_fa64())
20662e990e63SMark Brown 						ffr = false;
2067e0838f63SMark Brown 				}
2068e0838f63SMark Brown 			}
2069e0838f63SMark Brown 
2070b5bc00ffSMark Brown 			sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
2071fdfa976cSDave Martin 				       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
2072e0838f63SMark Brown 				       ffr);
2073fdfa976cSDave Martin 
2074fdfa976cSDave Martin 			__this_cpu_write(efi_sve_state_used, false);
2075fdfa976cSDave Martin 		} else {
2076fdfa976cSDave Martin 			fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
2077fdfa976cSDave Martin 		}
2078fdfa976cSDave Martin 	}
20794328825dSDave Martin }
20804328825dSDave Martin 
2081e580b8bcSDave Martin #endif /* CONFIG_EFI */
2082e580b8bcSDave Martin 
20834cfb3613SArd Biesheuvel #endif /* CONFIG_KERNEL_MODE_NEON */
20844cfb3613SArd Biesheuvel 
2085fb1ab1abSLorenzo Pieralisi #ifdef CONFIG_CPU_PM
fpsimd_cpu_pm_notifier(struct notifier_block * self,unsigned long cmd,void * v)2086fb1ab1abSLorenzo Pieralisi static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
2087fb1ab1abSLorenzo Pieralisi 				  unsigned long cmd, void *v)
2088fb1ab1abSLorenzo Pieralisi {
2089fb1ab1abSLorenzo Pieralisi 	switch (cmd) {
2090fb1ab1abSLorenzo Pieralisi 	case CPU_PM_ENTER:
209154b8c7cbSJulien Grall 		fpsimd_save_and_flush_cpu_state();
2092fb1ab1abSLorenzo Pieralisi 		break;
2093fb1ab1abSLorenzo Pieralisi 	case CPU_PM_EXIT:
2094fb1ab1abSLorenzo Pieralisi 		break;
2095fb1ab1abSLorenzo Pieralisi 	case CPU_PM_ENTER_FAILED:
2096fb1ab1abSLorenzo Pieralisi 	default:
2097fb1ab1abSLorenzo Pieralisi 		return NOTIFY_DONE;
2098fb1ab1abSLorenzo Pieralisi 	}
2099fb1ab1abSLorenzo Pieralisi 	return NOTIFY_OK;
2100fb1ab1abSLorenzo Pieralisi }
2101fb1ab1abSLorenzo Pieralisi 
2102fb1ab1abSLorenzo Pieralisi static struct notifier_block fpsimd_cpu_pm_notifier_block = {
2103fb1ab1abSLorenzo Pieralisi 	.notifier_call = fpsimd_cpu_pm_notifier,
2104fb1ab1abSLorenzo Pieralisi };
2105fb1ab1abSLorenzo Pieralisi 
fpsimd_pm_init(void)2106a7c61a34SJisheng Zhang static void __init fpsimd_pm_init(void)
2107fb1ab1abSLorenzo Pieralisi {
2108fb1ab1abSLorenzo Pieralisi 	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
2109fb1ab1abSLorenzo Pieralisi }
2110fb1ab1abSLorenzo Pieralisi 
2111fb1ab1abSLorenzo Pieralisi #else
fpsimd_pm_init(void)2112fb1ab1abSLorenzo Pieralisi static inline void fpsimd_pm_init(void) { }
2113fb1ab1abSLorenzo Pieralisi #endif /* CONFIG_CPU_PM */
2114fb1ab1abSLorenzo Pieralisi 
211532365e64SJanet Liu #ifdef CONFIG_HOTPLUG_CPU
fpsimd_cpu_dead(unsigned int cpu)2116c23a7266SSebastian Andrzej Siewior static int fpsimd_cpu_dead(unsigned int cpu)
211732365e64SJanet Liu {
2118cb968afcSDave Martin 	per_cpu(fpsimd_last_state.st, cpu) = NULL;
2119c23a7266SSebastian Andrzej Siewior 	return 0;
212032365e64SJanet Liu }
212132365e64SJanet Liu 
fpsimd_hotplug_init(void)212232365e64SJanet Liu static inline void fpsimd_hotplug_init(void)
212332365e64SJanet Liu {
2124c23a7266SSebastian Andrzej Siewior 	cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
2125c23a7266SSebastian Andrzej Siewior 				  NULL, fpsimd_cpu_dead);
212632365e64SJanet Liu }
212732365e64SJanet Liu 
212832365e64SJanet Liu #else
fpsimd_hotplug_init(void)212932365e64SJanet Liu static inline void fpsimd_hotplug_init(void) { }
213032365e64SJanet Liu #endif
213132365e64SJanet Liu 
213253631b54SCatalin Marinas /*
213353631b54SCatalin Marinas  * FP/SIMD support code initialisation.
213453631b54SCatalin Marinas  */
fpsimd_init(void)213553631b54SCatalin Marinas static int __init fpsimd_init(void)
213653631b54SCatalin Marinas {
2137aaba098fSAndrew Murray 	if (cpu_have_named_feature(FP)) {
2138fb1ab1abSLorenzo Pieralisi 		fpsimd_pm_init();
213932365e64SJanet Liu 		fpsimd_hotplug_init();
2140fe80f9f2SSuzuki K. Poulose 	} else {
2141fe80f9f2SSuzuki K. Poulose 		pr_notice("Floating-point is not implemented\n");
2142fe80f9f2SSuzuki K. Poulose 	}
2143fe80f9f2SSuzuki K. Poulose 
2144aaba098fSAndrew Murray 	if (!cpu_have_named_feature(ASIMD))
2145fe80f9f2SSuzuki K. Poulose 		pr_notice("Advanced SIMD is not implemented\n");
2146fb1ab1abSLorenzo Pieralisi 
21475e64b862SMark Brown 
214812f1bacfSMark Brown 	sve_sysctl_init();
214912f1bacfSMark Brown 	sme_sysctl_init();
215012f1bacfSMark Brown 
215112f1bacfSMark Brown 	return 0;
215253631b54SCatalin Marinas }
2153ae2e972dSSuzuki K Poulose core_initcall(fpsimd_init);
2154