1caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
253631b54SCatalin Marinas /*
353631b54SCatalin Marinas * FP/SIMD context switching and fault handling
453631b54SCatalin Marinas *
553631b54SCatalin Marinas * Copyright (C) 2012 ARM Ltd.
653631b54SCatalin Marinas * Author: Catalin Marinas <catalin.marinas@arm.com>
753631b54SCatalin Marinas */
853631b54SCatalin Marinas
97582e220SDave Martin #include <linux/bitmap.h>
10d06b76beSDave Martin #include <linux/bitops.h>
11cb84d11eSDave Martin #include <linux/bottom_half.h>
12bc0ee476SDave Martin #include <linux/bug.h>
137582e220SDave Martin #include <linux/cache.h>
14bc0ee476SDave Martin #include <linux/compat.h>
151e570f51SDave Martin #include <linux/compiler.h>
1632365e64SJanet Liu #include <linux/cpu.h>
17fb1ab1abSLorenzo Pieralisi #include <linux/cpu_pm.h>
1897bcbee4SMark Brown #include <linux/ctype.h>
1953631b54SCatalin Marinas #include <linux/kernel.h>
2094ef7ecbSDave Martin #include <linux/linkage.h>
21bc0ee476SDave Martin #include <linux/irqflags.h>
2253631b54SCatalin Marinas #include <linux/init.h>
23cb84d11eSDave Martin #include <linux/percpu.h>
242d2123bcSDave Martin #include <linux/prctl.h>
254328825dSDave Martin #include <linux/preempt.h>
26bc0ee476SDave Martin #include <linux/ptrace.h>
273f07c014SIngo Molnar #include <linux/sched/signal.h>
28bc0ee476SDave Martin #include <linux/sched/task_stack.h>
2953631b54SCatalin Marinas #include <linux/signal.h>
30bc0ee476SDave Martin #include <linux/slab.h>
3131dc52b3SDave Martin #include <linux/stddef.h>
324ffa09a9SDave Martin #include <linux/sysctl.h>
3341040cf7SDave Martin #include <linux/swab.h>
3453631b54SCatalin Marinas
35af4a81b9SDave Martin #include <asm/esr.h>
36c6b90d5cSTian Tao #include <asm/exception.h>
3753631b54SCatalin Marinas #include <asm/fpsimd.h>
38c0cda3b8SDave Martin #include <asm/cpufeature.h>
3953631b54SCatalin Marinas #include <asm/cputype.h>
40c6b90d5cSTian Tao #include <asm/neon.h>
412cf97d46SDave Martin #include <asm/processor.h>
424328825dSDave Martin #include <asm/simd.h>
43bc0ee476SDave Martin #include <asm/sigcontext.h>
44bc0ee476SDave Martin #include <asm/sysreg.h>
45bc0ee476SDave Martin #include <asm/traps.h>
46d06b76beSDave Martin #include <asm/virt.h>
4753631b54SCatalin Marinas
4853631b54SCatalin Marinas #define FPEXC_IOF (1 << 0)
4953631b54SCatalin Marinas #define FPEXC_DZF (1 << 1)
5053631b54SCatalin Marinas #define FPEXC_OFF (1 << 2)
5153631b54SCatalin Marinas #define FPEXC_UFF (1 << 3)
5253631b54SCatalin Marinas #define FPEXC_IXF (1 << 4)
5353631b54SCatalin Marinas #define FPEXC_IDF (1 << 7)
5453631b54SCatalin Marinas
5553631b54SCatalin Marinas /*
56bc0ee476SDave Martin * (Note: in this discussion, statements about FPSIMD apply equally to SVE.)
57bc0ee476SDave Martin *
58005f78cdSArd Biesheuvel * In order to reduce the number of times the FPSIMD state is needlessly saved
59005f78cdSArd Biesheuvel * and restored, we need to keep track of two things:
60005f78cdSArd Biesheuvel * (a) for each task, we need to remember which CPU was the last one to have
61005f78cdSArd Biesheuvel * the task's FPSIMD state loaded into its FPSIMD registers;
62005f78cdSArd Biesheuvel * (b) for each CPU, we need to remember which task's userland FPSIMD state has
63005f78cdSArd Biesheuvel * been loaded into its FPSIMD registers most recently, or whether it has
64005f78cdSArd Biesheuvel * been used to perform kernel mode NEON in the meantime.
65005f78cdSArd Biesheuvel *
6620b85472SDave Martin * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to
67005f78cdSArd Biesheuvel * the id of the current CPU every time the state is loaded onto a CPU. For (b),
68005f78cdSArd Biesheuvel * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
69005f78cdSArd Biesheuvel * address of the userland FPSIMD state of the task that was loaded onto the CPU
70005f78cdSArd Biesheuvel * the most recently, or NULL if kernel mode NEON has been performed after that.
71005f78cdSArd Biesheuvel *
72005f78cdSArd Biesheuvel * With this in place, we no longer have to restore the next FPSIMD state right
73005f78cdSArd Biesheuvel * when switching between tasks. Instead, we can defer this check to userland
74005f78cdSArd Biesheuvel * resume, at which time we verify whether the CPU's fpsimd_last_state and the
7520b85472SDave Martin * task's fpsimd_cpu are still mutually in sync. If this is the case, we
76005f78cdSArd Biesheuvel * can omit the FPSIMD restore.
77005f78cdSArd Biesheuvel *
78005f78cdSArd Biesheuvel * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
79005f78cdSArd Biesheuvel * indicate whether or not the userland FPSIMD state of the current task is
80005f78cdSArd Biesheuvel * present in the registers. The flag is set unless the FPSIMD registers of this
81005f78cdSArd Biesheuvel * CPU currently contain the most recent userland FPSIMD state of the current
8231aa126dSMarc Zyngier * task. If the task is behaving as a VMM, then this is will be managed by
8331aa126dSMarc Zyngier * KVM which will clear it to indicate that the vcpu FPSIMD state is currently
8431aa126dSMarc Zyngier * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
8531aa126dSMarc Zyngier * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
8631aa126dSMarc Zyngier * flag the register state as invalid.
87005f78cdSArd Biesheuvel *
88cb84d11eSDave Martin * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
89cb84d11eSDave Martin * save the task's FPSIMD context back to task_struct from softirq context.
90cb84d11eSDave Martin * To prevent this from racing with the manipulation of the task's FPSIMD state
91cb84d11eSDave Martin * from task context and thereby corrupting the state, it is necessary to
92cb84d11eSDave Martin * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
936dcdefcdSJulien Grall * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
946dcdefcdSJulien Grall * run but prevent them to use FPSIMD.
95cb84d11eSDave Martin *
96005f78cdSArd Biesheuvel * For a certain task, the sequence may look something like this:
9720b85472SDave Martin * - the task gets scheduled in; if both the task's fpsimd_cpu field
98005f78cdSArd Biesheuvel * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
99005f78cdSArd Biesheuvel * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
100005f78cdSArd Biesheuvel * cleared, otherwise it is set;
101005f78cdSArd Biesheuvel *
102005f78cdSArd Biesheuvel * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
103005f78cdSArd Biesheuvel * userland FPSIMD state is copied from memory to the registers, the task's
10420b85472SDave Martin * fpsimd_cpu field is set to the id of the current CPU, the current
105005f78cdSArd Biesheuvel * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
106005f78cdSArd Biesheuvel * TIF_FOREIGN_FPSTATE flag is cleared;
107005f78cdSArd Biesheuvel *
108005f78cdSArd Biesheuvel * - the task executes an ordinary syscall; upon return to userland, the
109005f78cdSArd Biesheuvel * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
110005f78cdSArd Biesheuvel * restored;
111005f78cdSArd Biesheuvel *
112005f78cdSArd Biesheuvel * - the task executes a syscall which executes some NEON instructions; this is
113005f78cdSArd Biesheuvel * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
114005f78cdSArd Biesheuvel * register contents to memory, clears the fpsimd_last_state per-cpu variable
115005f78cdSArd Biesheuvel * and sets the TIF_FOREIGN_FPSTATE flag;
116005f78cdSArd Biesheuvel *
117005f78cdSArd Biesheuvel * - the task gets preempted after kernel_neon_end() is called; as we have not
118005f78cdSArd Biesheuvel * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
119005f78cdSArd Biesheuvel * whatever is in the FPSIMD registers is not saved to memory, but discarded.
120005f78cdSArd Biesheuvel */
121cb968afcSDave Martin
1221192b93bSMark Brown static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
123005f78cdSArd Biesheuvel
124b5bc00ffSMark Brown __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
125b5bc00ffSMark Brown #ifdef CONFIG_ARM64_SVE
126b5bc00ffSMark Brown [ARM64_VEC_SVE] = {
127b5bc00ffSMark Brown .type = ARM64_VEC_SVE,
128b5bc00ffSMark Brown .name = "SVE",
129b5bc00ffSMark Brown .min_vl = SVE_VL_MIN,
130b5bc00ffSMark Brown .max_vl = SVE_VL_MIN,
131b5bc00ffSMark Brown .max_virtualisable_vl = SVE_VL_MIN,
132b5bc00ffSMark Brown },
133b5bc00ffSMark Brown #endif
134b42990d3SMark Brown #ifdef CONFIG_ARM64_SME
135b42990d3SMark Brown [ARM64_VEC_SME] = {
136b42990d3SMark Brown .type = ARM64_VEC_SME,
137b42990d3SMark Brown .name = "SME",
138b42990d3SMark Brown },
139b42990d3SMark Brown #endif
140b5bc00ffSMark Brown };
141b5bc00ffSMark Brown
vec_vl_inherit_flag(enum vec_type type)1425838a155SMark Brown static unsigned int vec_vl_inherit_flag(enum vec_type type)
1435838a155SMark Brown {
1445838a155SMark Brown switch (type) {
1455838a155SMark Brown case ARM64_VEC_SVE:
1465838a155SMark Brown return TIF_SVE_VL_INHERIT;
1479e4ab6c8SMark Brown case ARM64_VEC_SME:
1489e4ab6c8SMark Brown return TIF_SME_VL_INHERIT;
1495838a155SMark Brown default:
1505838a155SMark Brown WARN_ON_ONCE(1);
1515838a155SMark Brown return 0;
1525838a155SMark Brown }
1535838a155SMark Brown }
1545838a155SMark Brown
155b5bc00ffSMark Brown struct vl_config {
156b5bc00ffSMark Brown int __default_vl; /* Default VL for tasks */
157b5bc00ffSMark Brown };
158b5bc00ffSMark Brown
159b5bc00ffSMark Brown static struct vl_config vl_config[ARM64_VEC_MAX];
160b5bc00ffSMark Brown
get_default_vl(enum vec_type type)16104ee53a5SMark Brown static inline int get_default_vl(enum vec_type type)
162b5bc00ffSMark Brown {
163b5bc00ffSMark Brown return READ_ONCE(vl_config[type].__default_vl);
164b5bc00ffSMark Brown }
1651e570f51SDave Martin
16604ee53a5SMark Brown #ifdef CONFIG_ARM64_SVE
16704ee53a5SMark Brown
get_sve_default_vl(void)16804ee53a5SMark Brown static inline int get_sve_default_vl(void)
1691e570f51SDave Martin {
170b5bc00ffSMark Brown return get_default_vl(ARM64_VEC_SVE);
1711e570f51SDave Martin }
17279ab047cSDave Martin
set_default_vl(enum vec_type type,int val)17304ee53a5SMark Brown static inline void set_default_vl(enum vec_type type, int val)
1741e570f51SDave Martin {
175b5bc00ffSMark Brown WRITE_ONCE(vl_config[type].__default_vl, val);
1761e570f51SDave Martin }
1771e570f51SDave Martin
set_sve_default_vl(int val)17804ee53a5SMark Brown static inline void set_sve_default_vl(int val)
179b5bc00ffSMark Brown {
180b5bc00ffSMark Brown set_default_vl(ARM64_VEC_SVE, val);
181b5bc00ffSMark Brown }
182624835abSDave Martin
183fdfa976cSDave Martin static void __percpu *efi_sve_state;
1847582e220SDave Martin
1857582e220SDave Martin #else /* ! CONFIG_ARM64_SVE */
1867582e220SDave Martin
1877582e220SDave Martin /* Dummy declaration for code that will be optimised out: */
188fdfa976cSDave Martin extern void __percpu *efi_sve_state;
1897582e220SDave Martin
1907582e220SDave Martin #endif /* ! CONFIG_ARM64_SVE */
1917582e220SDave Martin
192b42990d3SMark Brown #ifdef CONFIG_ARM64_SME
193b42990d3SMark Brown
get_sme_default_vl(void)194b42990d3SMark Brown static int get_sme_default_vl(void)
195b42990d3SMark Brown {
196b42990d3SMark Brown return get_default_vl(ARM64_VEC_SME);
197b42990d3SMark Brown }
198b42990d3SMark Brown
set_sme_default_vl(int val)199b42990d3SMark Brown static void set_sme_default_vl(int val)
200b42990d3SMark Brown {
201b42990d3SMark Brown set_default_vl(ARM64_VEC_SME, val);
202b42990d3SMark Brown }
203b42990d3SMark Brown
2048bd7f91cSMark Brown static void sme_free(struct task_struct *);
2058bd7f91cSMark Brown
2068bd7f91cSMark Brown #else
2078bd7f91cSMark Brown
sme_free(struct task_struct * t)2088bd7f91cSMark Brown static inline void sme_free(struct task_struct *t) { }
2098bd7f91cSMark Brown
210b42990d3SMark Brown #endif
211b42990d3SMark Brown
2126dcdefcdSJulien Grall DEFINE_PER_CPU(bool, fpsimd_context_busy);
2136dcdefcdSJulien Grall EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
2146dcdefcdSJulien Grall
215b24b5205SMark Brown static void fpsimd_bind_task_to_cpu(void);
216b24b5205SMark Brown
__get_cpu_fpsimd_context(void)2176dcdefcdSJulien Grall static void __get_cpu_fpsimd_context(void)
2186dcdefcdSJulien Grall {
2196dcdefcdSJulien Grall bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
2206dcdefcdSJulien Grall
2216dcdefcdSJulien Grall WARN_ON(busy);
2226dcdefcdSJulien Grall }
2236dcdefcdSJulien Grall
2246dcdefcdSJulien Grall /*
2256dcdefcdSJulien Grall * Claim ownership of the CPU FPSIMD context for use by the calling context.
2266dcdefcdSJulien Grall *
2276dcdefcdSJulien Grall * The caller may freely manipulate the FPSIMD context metadata until
2286dcdefcdSJulien Grall * put_cpu_fpsimd_context() is called.
2296dcdefcdSJulien Grall *
2306dcdefcdSJulien Grall * The double-underscore version must only be called if you know the task
2316dcdefcdSJulien Grall * can't be preempted.
232696207d4SSebastian Andrzej Siewior *
233696207d4SSebastian Andrzej Siewior * On RT kernels local_bh_disable() is not sufficient because it only
234696207d4SSebastian Andrzej Siewior * serializes soft interrupt related sections via a local lock, but stays
235696207d4SSebastian Andrzej Siewior * preemptible. Disabling preemption is the right choice here as bottom
236696207d4SSebastian Andrzej Siewior * half processing is always in thread context on RT kernels so it
237696207d4SSebastian Andrzej Siewior * implicitly prevents bottom half processing as well.
2386dcdefcdSJulien Grall */
get_cpu_fpsimd_context(void)2396dcdefcdSJulien Grall static void get_cpu_fpsimd_context(void)
2406dcdefcdSJulien Grall {
241696207d4SSebastian Andrzej Siewior if (!IS_ENABLED(CONFIG_PREEMPT_RT))
24213150149SArd Biesheuvel local_bh_disable();
243696207d4SSebastian Andrzej Siewior else
244696207d4SSebastian Andrzej Siewior preempt_disable();
2456dcdefcdSJulien Grall __get_cpu_fpsimd_context();
2466dcdefcdSJulien Grall }
2476dcdefcdSJulien Grall
__put_cpu_fpsimd_context(void)2486dcdefcdSJulien Grall static void __put_cpu_fpsimd_context(void)
2496dcdefcdSJulien Grall {
2506dcdefcdSJulien Grall bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
2516dcdefcdSJulien Grall
2526dcdefcdSJulien Grall WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
2536dcdefcdSJulien Grall }
2546dcdefcdSJulien Grall
2556dcdefcdSJulien Grall /*
2566dcdefcdSJulien Grall * Release the CPU FPSIMD context.
2576dcdefcdSJulien Grall *
2586dcdefcdSJulien Grall * Must be called from a context in which get_cpu_fpsimd_context() was
2596dcdefcdSJulien Grall * previously called, with no call to put_cpu_fpsimd_context() in the
2606dcdefcdSJulien Grall * meantime.
2616dcdefcdSJulien Grall */
put_cpu_fpsimd_context(void)2626dcdefcdSJulien Grall static void put_cpu_fpsimd_context(void)
2636dcdefcdSJulien Grall {
2646dcdefcdSJulien Grall __put_cpu_fpsimd_context();
265696207d4SSebastian Andrzej Siewior if (!IS_ENABLED(CONFIG_PREEMPT_RT))
26613150149SArd Biesheuvel local_bh_enable();
267696207d4SSebastian Andrzej Siewior else
268696207d4SSebastian Andrzej Siewior preempt_enable();
2696dcdefcdSJulien Grall }
2706dcdefcdSJulien Grall
have_cpu_fpsimd_context(void)2716dcdefcdSJulien Grall static bool have_cpu_fpsimd_context(void)
2726dcdefcdSJulien Grall {
2736dcdefcdSJulien Grall return !preemptible() && __this_cpu_read(fpsimd_context_busy);
2746dcdefcdSJulien Grall }
2756dcdefcdSJulien Grall
task_get_vl(const struct task_struct * task,enum vec_type type)2765838a155SMark Brown unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
2770423eedcSMark Brown {
2785838a155SMark Brown return task->thread.vl[type];
2790423eedcSMark Brown }
2800423eedcSMark Brown
task_set_vl(struct task_struct * task,enum vec_type type,unsigned long vl)2815838a155SMark Brown void task_set_vl(struct task_struct *task, enum vec_type type,
2825838a155SMark Brown unsigned long vl)
2830423eedcSMark Brown {
2845838a155SMark Brown task->thread.vl[type] = vl;
2850423eedcSMark Brown }
2860423eedcSMark Brown
task_get_vl_onexec(const struct task_struct * task,enum vec_type type)2875838a155SMark Brown unsigned int task_get_vl_onexec(const struct task_struct *task,
2885838a155SMark Brown enum vec_type type)
2890423eedcSMark Brown {
2905838a155SMark Brown return task->thread.vl_onexec[type];
2910423eedcSMark Brown }
2920423eedcSMark Brown
task_set_vl_onexec(struct task_struct * task,enum vec_type type,unsigned long vl)2935838a155SMark Brown void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
2945838a155SMark Brown unsigned long vl)
2950423eedcSMark Brown {
2965838a155SMark Brown task->thread.vl_onexec[type] = vl;
2970423eedcSMark Brown }
2980423eedcSMark Brown
299bc0ee476SDave Martin /*
300af7167d6SMark Brown * TIF_SME controls whether a task can use SME without trapping while
301af7167d6SMark Brown * in userspace, when TIF_SME is set then we must have storage
30297b5576bSDongxu Sun * allocated in sve_state and sme_state to store the contents of both ZA
303af7167d6SMark Brown * and the SVE registers for both streaming and non-streaming modes.
304af7167d6SMark Brown *
305af7167d6SMark Brown * If both SVCR.ZA and SVCR.SM are disabled then at any point we
306af7167d6SMark Brown * may disable TIF_SME and reenable traps.
307af7167d6SMark Brown */
308af7167d6SMark Brown
309af7167d6SMark Brown
310af7167d6SMark Brown /*
311bc0ee476SDave Martin * TIF_SVE controls whether a task can use SVE without trapping while
312af7167d6SMark Brown * in userspace, and also (together with TIF_SME) the way a task's
313af7167d6SMark Brown * FPSIMD/SVE state is stored in thread_struct.
314bc0ee476SDave Martin *
315bc0ee476SDave Martin * The kernel uses this flag to track whether a user task is actively
316bc0ee476SDave Martin * using SVE, and therefore whether full SVE register state needs to
317bc0ee476SDave Martin * be tracked. If not, the cheaper FPSIMD context handling code can
318bc0ee476SDave Martin * be used instead of the more costly SVE equivalents.
319bc0ee476SDave Martin *
320af7167d6SMark Brown * * TIF_SVE or SVCR.SM set:
321bc0ee476SDave Martin *
322bc0ee476SDave Martin * The task can execute SVE instructions while in userspace without
323bc0ee476SDave Martin * trapping to the kernel.
324bc0ee476SDave Martin *
325bc0ee476SDave Martin * During any syscall, the kernel may optionally clear TIF_SVE and
326bc0ee476SDave Martin * discard the vector state except for the FPSIMD subset.
327bc0ee476SDave Martin *
328bc0ee476SDave Martin * * TIF_SVE clear:
329bc0ee476SDave Martin *
330bc0ee476SDave Martin * An attempt by the user task to execute an SVE instruction causes
331bc0ee476SDave Martin * do_sve_acc() to be called, which does some preparation and then
332bc0ee476SDave Martin * sets TIF_SVE.
333bc0ee476SDave Martin *
334baa85152SMark Brown * During any syscall, the kernel may optionally clear TIF_SVE and
335baa85152SMark Brown * discard the vector state except for the FPSIMD subset.
336baa85152SMark Brown *
337baa85152SMark Brown * The data will be stored in one of two formats:
338baa85152SMark Brown *
339baa85152SMark Brown * * FPSIMD only - FP_STATE_FPSIMD:
340baa85152SMark Brown *
341baa85152SMark Brown * When the FPSIMD only state stored task->thread.fp_type is set to
342baa85152SMark Brown * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
34365896545SDave Martin * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
344bc0ee476SDave Martin * logically zero but not stored anywhere; P0-P15 and FFR are not
345bc0ee476SDave Martin * stored and have unspecified values from userspace's point of
346bc0ee476SDave Martin * view. For hygiene purposes, the kernel zeroes them on next use,
347bc0ee476SDave Martin * but userspace is discouraged from relying on this.
348bc0ee476SDave Martin *
349bc0ee476SDave Martin * task->thread.sve_state does not need to be non-NULL, valid or any
350deeb8f9aSMark Brown * particular size: it must not be dereferenced and any data stored
351deeb8f9aSMark Brown * there should be considered stale and not referenced.
352bc0ee476SDave Martin *
353baa85152SMark Brown * * SVE state - FP_STATE_SVE:
354baa85152SMark Brown *
355baa85152SMark Brown * When the full SVE state is stored task->thread.fp_type is set to
356baa85152SMark Brown * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
357baa85152SMark Brown * corresponding Zn), P0-P15 and FFR are encoded in in
358baa85152SMark Brown * task->thread.sve_state, formatted appropriately for vector
359baa85152SMark Brown * length task->thread.sve_vl or, if SVCR.SM is set,
360baa85152SMark Brown * task->thread.sme_vl. The storage for the vector registers in
361baa85152SMark Brown * task->thread.uw.fpsimd_state should be ignored.
362baa85152SMark Brown *
363baa85152SMark Brown * task->thread.sve_state must point to a valid buffer at least
364deeb8f9aSMark Brown * sve_state_size(task) bytes in size. The data stored in
365deeb8f9aSMark Brown * task->thread.uw.fpsimd_state.vregs should be considered stale
366deeb8f9aSMark Brown * and not referenced.
367bc0ee476SDave Martin *
36865896545SDave Martin * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
36965896545SDave Martin * irrespective of whether TIF_SVE is clear or set, since these are
37065896545SDave Martin * not vector length dependent.
371bc0ee476SDave Martin */
372bc0ee476SDave Martin
373bc0ee476SDave Martin /*
374bc0ee476SDave Martin * Update current's FPSIMD/SVE registers from thread_struct.
375bc0ee476SDave Martin *
376bc0ee476SDave Martin * This function should be called only when the FPSIMD/SVE state in
377bc0ee476SDave Martin * thread_struct is known to be up to date, when preparing to enter
378bc0ee476SDave Martin * userspace.
379bc0ee476SDave Martin */
task_fpsimd_load(void)380bc0ee476SDave Martin static void task_fpsimd_load(void)
381bc0ee476SDave Martin {
382af7167d6SMark Brown bool restore_sve_regs = false;
383af7167d6SMark Brown bool restore_ffr;
384af7167d6SMark Brown
38552f73c38SSuzuki K Poulose WARN_ON(!system_supports_fpsimd());
3866dcdefcdSJulien Grall WARN_ON(!have_cpu_fpsimd_context());
387bc0ee476SDave Martin
3880cab5b49SMark Brown if (system_supports_sve() || system_supports_sme()) {
389a0136be4SMark Brown switch (current->thread.fp_type) {
390a0136be4SMark Brown case FP_STATE_FPSIMD:
391a0136be4SMark Brown /* Stop tracking SVE for this task until next use. */
392a0136be4SMark Brown if (test_and_clear_thread_flag(TIF_SVE))
393a0136be4SMark Brown sve_user_disable();
394a0136be4SMark Brown break;
395a0136be4SMark Brown case FP_STATE_SVE:
396a0136be4SMark Brown if (!thread_sm_enabled(¤t->thread) &&
397a0136be4SMark Brown !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
398a0136be4SMark Brown sve_user_enable();
399a0136be4SMark Brown
400a0136be4SMark Brown if (test_thread_flag(TIF_SVE))
401ddc806b5SMark Brown sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
402a0136be4SMark Brown
403af7167d6SMark Brown restore_sve_regs = true;
404af7167d6SMark Brown restore_ffr = true;
405a0136be4SMark Brown break;
406a0136be4SMark Brown default:
407a0136be4SMark Brown /*
408a0136be4SMark Brown * This indicates either a bug in
409a0136be4SMark Brown * fpsimd_save() or memory corruption, we
410a0136be4SMark Brown * should always record an explicit format
411a0136be4SMark Brown * when we save. We always at least have the
412a0136be4SMark Brown * memory allocated for FPSMID registers so
413a0136be4SMark Brown * try that and hope for the best.
414a0136be4SMark Brown */
415a0136be4SMark Brown WARN_ON_ONCE(1);
416a0136be4SMark Brown clear_thread_flag(TIF_SVE);
417a0136be4SMark Brown break;
418a0136be4SMark Brown }
419bc0ee476SDave Martin }
420af7167d6SMark Brown
421af7167d6SMark Brown /* Restore SME, override SVE register configuration if needed */
422af7167d6SMark Brown if (system_supports_sme()) {
423af7167d6SMark Brown unsigned long sme_vl = task_get_sme_vl(current);
424af7167d6SMark Brown
4250033cd93SMark Brown /* Ensure VL is set up for restoring data */
426af7167d6SMark Brown if (test_thread_flag(TIF_SME))
427af7167d6SMark Brown sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
428af7167d6SMark Brown
429ec0067a6SMark Brown write_sysreg_s(current->thread.svcr, SYS_SVCR);
430af7167d6SMark Brown
4310033cd93SMark Brown if (thread_za_enabled(¤t->thread))
43295fcec71SMark Brown sme_load_state(current->thread.sme_state,
43395fcec71SMark Brown system_supports_sme2());
4340033cd93SMark Brown
435a0136be4SMark Brown if (thread_sm_enabled(¤t->thread))
436af7167d6SMark Brown restore_ffr = system_supports_fa64();
437af7167d6SMark Brown }
438af7167d6SMark Brown
439baa85152SMark Brown if (restore_sve_regs) {
440baa85152SMark Brown WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
441af7167d6SMark Brown sve_load_state(sve_pffr(¤t->thread),
442af7167d6SMark Brown ¤t->thread.uw.fpsimd_state.fpsr,
443af7167d6SMark Brown restore_ffr);
444baa85152SMark Brown } else {
445baa85152SMark Brown WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
446af7167d6SMark Brown fpsimd_load_state(¤t->thread.uw.fpsimd_state);
447ddc806b5SMark Brown }
448baa85152SMark Brown }
449bc0ee476SDave Martin
450bc0ee476SDave Martin /*
451d1797615SDave Martin * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
452432110cdSMark Brown * date with respect to the CPU registers. Note carefully that the
453432110cdSMark Brown * current context is the context last bound to the CPU stored in
454432110cdSMark Brown * last, if KVM is involved this may be the guest VM context rather
455432110cdSMark Brown * than the host thread for the VM pointed to by current. This means
456432110cdSMark Brown * that we must always reference the state storage via last rather
45762021cc3SMark Brown * than via current, if we are saving KVM state then it will have
45862021cc3SMark Brown * ensured that the type of registers to save is set in last->to_save.
459bc0ee476SDave Martin */
fpsimd_save(void)46054b8c7cbSJulien Grall static void fpsimd_save(void)
461bc0ee476SDave Martin {
4621192b93bSMark Brown struct cpu_fp_state const *last =
46304950674SDave Martin this_cpu_ptr(&fpsimd_last_state);
464e6b673b7SDave Martin /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
465af7167d6SMark Brown bool save_sve_regs = false;
466af7167d6SMark Brown bool save_ffr;
467af7167d6SMark Brown unsigned int vl;
468d1797615SDave Martin
46952f73c38SSuzuki K Poulose WARN_ON(!system_supports_fpsimd());
4706dcdefcdSJulien Grall WARN_ON(!have_cpu_fpsimd_context());
471bc0ee476SDave Martin
4722d481bd3SMark Brown if (test_thread_flag(TIF_FOREIGN_FPSTATE))
4732d481bd3SMark Brown return;
4742d481bd3SMark Brown
4758c845e27SMark Brown /*
4768c845e27SMark Brown * If a task is in a syscall the ABI allows us to only
4778c845e27SMark Brown * preserve the state shared with FPSIMD so don't bother
4788c845e27SMark Brown * saving the full SVE state in that case.
4798c845e27SMark Brown */
4808c845e27SMark Brown if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
4818c845e27SMark Brown !in_syscall(current_pt_regs())) ||
48262021cc3SMark Brown last->to_save == FP_STATE_SVE) {
483af7167d6SMark Brown save_sve_regs = true;
484af7167d6SMark Brown save_ffr = true;
485af7167d6SMark Brown vl = last->sve_vl;
486b40c559bSMark Brown }
487b40c559bSMark Brown
488af7167d6SMark Brown if (system_supports_sme()) {
489af7167d6SMark Brown u64 *svcr = last->svcr;
490af7167d6SMark Brown
491ec0067a6SMark Brown *svcr = read_sysreg_s(SYS_SVCR);
4920033cd93SMark Brown
493ec0067a6SMark Brown if (*svcr & SVCR_ZA_MASK)
49495fcec71SMark Brown sme_save_state(last->sme_state,
49595fcec71SMark Brown system_supports_sme2());
496af7167d6SMark Brown
497af7167d6SMark Brown /* If we are in streaming mode override regular SVE. */
498ec0067a6SMark Brown if (*svcr & SVCR_SM_MASK) {
499af7167d6SMark Brown save_sve_regs = true;
500af7167d6SMark Brown save_ffr = system_supports_fa64();
501af7167d6SMark Brown vl = last->sme_vl;
502af7167d6SMark Brown }
503af7167d6SMark Brown }
504af7167d6SMark Brown
505af7167d6SMark Brown if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
506af7167d6SMark Brown /* Get the configured VL from RDVL, will account for SM */
507af7167d6SMark Brown if (WARN_ON(sve_get_vl() != vl)) {
508bc0ee476SDave Martin /*
509bc0ee476SDave Martin * Can't save the user regs, so current would
510bc0ee476SDave Martin * re-enter user with corrupt state.
511bc0ee476SDave Martin * There's no way to recover, so kill it:
512bc0ee476SDave Martin */
5134ef333b2SAmit Daniel Kachhap force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
514bc0ee476SDave Martin return;
515bc0ee476SDave Martin }
516bc0ee476SDave Martin
51704950674SDave Martin sve_save_state((char *)last->sve_state +
518af7167d6SMark Brown sve_ffr_offset(vl),
519af7167d6SMark Brown &last->st->fpsr, save_ffr);
520baa85152SMark Brown *last->fp_type = FP_STATE_SVE;
5212d481bd3SMark Brown } else {
52204950674SDave Martin fpsimd_save_state(last->st);
523baa85152SMark Brown *last->fp_type = FP_STATE_FPSIMD;
524bc0ee476SDave Martin }
525bc0ee476SDave Martin }
526bc0ee476SDave Martin
5277582e220SDave Martin /*
5287582e220SDave Martin * All vector length selection from userspace comes through here.
5297582e220SDave Martin * We're on a slow path, so some sanity-checks are included.
5307582e220SDave Martin * If things go wrong there's a bug somewhere, but try to fall back to a
5317582e220SDave Martin * safe choice.
5327582e220SDave Martin */
find_supported_vector_length(enum vec_type type,unsigned int vl)533b5bc00ffSMark Brown static unsigned int find_supported_vector_length(enum vec_type type,
534b5bc00ffSMark Brown unsigned int vl)
5357582e220SDave Martin {
536b5bc00ffSMark Brown struct vl_info *info = &vl_info[type];
5377582e220SDave Martin int bit;
538b5bc00ffSMark Brown int max_vl = info->max_vl;
5397582e220SDave Martin
5407582e220SDave Martin if (WARN_ON(!sve_vl_valid(vl)))
541b5bc00ffSMark Brown vl = info->min_vl;
5427582e220SDave Martin
5437582e220SDave Martin if (WARN_ON(!sve_vl_valid(max_vl)))
544b5bc00ffSMark Brown max_vl = info->min_vl;
5457582e220SDave Martin
5467582e220SDave Martin if (vl > max_vl)
5477582e220SDave Martin vl = max_vl;
548b42990d3SMark Brown if (vl < info->min_vl)
549b42990d3SMark Brown vl = info->min_vl;
5507582e220SDave Martin
551b5bc00ffSMark Brown bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
552ead9e430SDave Martin __vq_to_bit(sve_vq_from_vl(vl)));
553ead9e430SDave Martin return sve_vl_from_vq(__bit_to_vq(bit));
5547582e220SDave Martin }
5557582e220SDave Martin
556e575fb9eSWill Deacon #if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
5574ffa09a9SDave Martin
vec_proc_do_default_vl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)55897bcbee4SMark Brown static int vec_proc_do_default_vl(struct ctl_table *table, int write,
55932927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos)
5604ffa09a9SDave Martin {
56197bcbee4SMark Brown struct vl_info *info = table->extra1;
56297bcbee4SMark Brown enum vec_type type = info->type;
5634ffa09a9SDave Martin int ret;
56497bcbee4SMark Brown int vl = get_default_vl(type);
5654ffa09a9SDave Martin struct ctl_table tmp_table = {
5664ffa09a9SDave Martin .data = &vl,
5674ffa09a9SDave Martin .maxlen = sizeof(vl),
5684ffa09a9SDave Martin };
5694ffa09a9SDave Martin
5704ffa09a9SDave Martin ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
5714ffa09a9SDave Martin if (ret || !write)
5724ffa09a9SDave Martin return ret;
5734ffa09a9SDave Martin
5744ffa09a9SDave Martin /* Writing -1 has the special meaning "set to max": */
57587c021a8SDave Martin if (vl == -1)
576b5bc00ffSMark Brown vl = info->max_vl;
5774ffa09a9SDave Martin
5784ffa09a9SDave Martin if (!sve_vl_valid(vl))
5794ffa09a9SDave Martin return -EINVAL;
5804ffa09a9SDave Martin
58197bcbee4SMark Brown set_default_vl(type, find_supported_vector_length(type, vl));
5824ffa09a9SDave Martin return 0;
5834ffa09a9SDave Martin }
5844ffa09a9SDave Martin
5854ffa09a9SDave Martin static struct ctl_table sve_default_vl_table[] = {
5864ffa09a9SDave Martin {
5874ffa09a9SDave Martin .procname = "sve_default_vector_length",
5884ffa09a9SDave Martin .mode = 0644,
58997bcbee4SMark Brown .proc_handler = vec_proc_do_default_vl,
59097bcbee4SMark Brown .extra1 = &vl_info[ARM64_VEC_SVE],
5914ffa09a9SDave Martin },
5924ffa09a9SDave Martin { }
5934ffa09a9SDave Martin };
5944ffa09a9SDave Martin
sve_sysctl_init(void)5954ffa09a9SDave Martin static int __init sve_sysctl_init(void)
5964ffa09a9SDave Martin {
5974ffa09a9SDave Martin if (system_supports_sve())
5984ffa09a9SDave Martin if (!register_sysctl("abi", sve_default_vl_table))
5994ffa09a9SDave Martin return -EINVAL;
6004ffa09a9SDave Martin
6014ffa09a9SDave Martin return 0;
6024ffa09a9SDave Martin }
6034ffa09a9SDave Martin
604e575fb9eSWill Deacon #else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
sve_sysctl_init(void)6054ffa09a9SDave Martin static int __init sve_sysctl_init(void) { return 0; }
606e575fb9eSWill Deacon #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
6074ffa09a9SDave Martin
60812f1bacfSMark Brown #if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
60912f1bacfSMark Brown static struct ctl_table sme_default_vl_table[] = {
61012f1bacfSMark Brown {
61112f1bacfSMark Brown .procname = "sme_default_vector_length",
61212f1bacfSMark Brown .mode = 0644,
61312f1bacfSMark Brown .proc_handler = vec_proc_do_default_vl,
61412f1bacfSMark Brown .extra1 = &vl_info[ARM64_VEC_SME],
61512f1bacfSMark Brown },
61612f1bacfSMark Brown { }
61712f1bacfSMark Brown };
61812f1bacfSMark Brown
sme_sysctl_init(void)61912f1bacfSMark Brown static int __init sme_sysctl_init(void)
62012f1bacfSMark Brown {
62112f1bacfSMark Brown if (system_supports_sme())
62212f1bacfSMark Brown if (!register_sysctl("abi", sme_default_vl_table))
62312f1bacfSMark Brown return -EINVAL;
62412f1bacfSMark Brown
62512f1bacfSMark Brown return 0;
62612f1bacfSMark Brown }
62712f1bacfSMark Brown
62812f1bacfSMark Brown #else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
sme_sysctl_init(void)62912f1bacfSMark Brown static int __init sme_sysctl_init(void) { return 0; }
63012f1bacfSMark Brown #endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
63112f1bacfSMark Brown
632bc0ee476SDave Martin #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
633bc0ee476SDave Martin (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
634bc0ee476SDave Martin
63541040cf7SDave Martin #ifdef CONFIG_CPU_BIG_ENDIAN
arm64_cpu_to_le128(__uint128_t x)63641040cf7SDave Martin static __uint128_t arm64_cpu_to_le128(__uint128_t x)
63741040cf7SDave Martin {
63841040cf7SDave Martin u64 a = swab64(x);
63941040cf7SDave Martin u64 b = swab64(x >> 64);
64041040cf7SDave Martin
64141040cf7SDave Martin return ((__uint128_t)a << 64) | b;
64241040cf7SDave Martin }
64341040cf7SDave Martin #else
arm64_cpu_to_le128(__uint128_t x)64441040cf7SDave Martin static __uint128_t arm64_cpu_to_le128(__uint128_t x)
64541040cf7SDave Martin {
64641040cf7SDave Martin return x;
64741040cf7SDave Martin }
64841040cf7SDave Martin #endif
64941040cf7SDave Martin
65041040cf7SDave Martin #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
65141040cf7SDave Martin
__fpsimd_to_sve(void * sst,struct user_fpsimd_state const * fst,unsigned int vq)652d16af870SDave Martin static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
653d16af870SDave Martin unsigned int vq)
654d16af870SDave Martin {
655d16af870SDave Martin unsigned int i;
656d16af870SDave Martin __uint128_t *p;
657d16af870SDave Martin
658ed2f3e9fSDave Martin for (i = 0; i < SVE_NUM_ZREGS; ++i) {
659d16af870SDave Martin p = (__uint128_t *)ZREG(sst, vq, i);
660d16af870SDave Martin *p = arm64_cpu_to_le128(fst->vregs[i]);
661d16af870SDave Martin }
662d16af870SDave Martin }
663d16af870SDave Martin
664bc0ee476SDave Martin /*
66565896545SDave Martin * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
666bc0ee476SDave Martin * task->thread.sve_state.
667bc0ee476SDave Martin *
668bc0ee476SDave Martin * Task can be a non-runnable task, or current. In the latter case,
6696dcdefcdSJulien Grall * the caller must have ownership of the cpu FPSIMD context before calling
6706dcdefcdSJulien Grall * this function.
671bc0ee476SDave Martin * task->thread.sve_state must point to at least sve_state_size(task)
672bc0ee476SDave Martin * bytes of allocated kernel memory.
67365896545SDave Martin * task->thread.uw.fpsimd_state must be up to date before calling this
67465896545SDave Martin * function.
675bc0ee476SDave Martin */
fpsimd_to_sve(struct task_struct * task)676bc0ee476SDave Martin static void fpsimd_to_sve(struct task_struct *task)
677bc0ee476SDave Martin {
678bc0ee476SDave Martin unsigned int vq;
679bc0ee476SDave Martin void *sst = task->thread.sve_state;
68065896545SDave Martin struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
681bc0ee476SDave Martin
682507ea5ddSMark Brown if (!system_supports_sve() && !system_supports_sme())
683bc0ee476SDave Martin return;
684bc0ee476SDave Martin
685e12310a0SMark Brown vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
686d16af870SDave Martin __fpsimd_to_sve(sst, fst, vq);
687bc0ee476SDave Martin }
688bc0ee476SDave Martin
6898cd969d2SDave Martin /*
6908cd969d2SDave Martin * Transfer the SVE state in task->thread.sve_state to
69165896545SDave Martin * task->thread.uw.fpsimd_state.
6928cd969d2SDave Martin *
6938cd969d2SDave Martin * Task can be a non-runnable task, or current. In the latter case,
6946dcdefcdSJulien Grall * the caller must have ownership of the cpu FPSIMD context before calling
6956dcdefcdSJulien Grall * this function.
6968cd969d2SDave Martin * task->thread.sve_state must point to at least sve_state_size(task)
6978cd969d2SDave Martin * bytes of allocated kernel memory.
6988cd969d2SDave Martin * task->thread.sve_state must be up to date before calling this function.
6998cd969d2SDave Martin */
sve_to_fpsimd(struct task_struct * task)7008cd969d2SDave Martin static void sve_to_fpsimd(struct task_struct *task)
7018cd969d2SDave Martin {
702e12310a0SMark Brown unsigned int vq, vl;
7038cd969d2SDave Martin void const *sst = task->thread.sve_state;
70465896545SDave Martin struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
7058cd969d2SDave Martin unsigned int i;
70641040cf7SDave Martin __uint128_t const *p;
7078cd969d2SDave Martin
708507ea5ddSMark Brown if (!system_supports_sve() && !system_supports_sme())
7098cd969d2SDave Martin return;
7108cd969d2SDave Martin
711e12310a0SMark Brown vl = thread_get_cur_vl(&task->thread);
712e12310a0SMark Brown vq = sve_vq_from_vl(vl);
713ed2f3e9fSDave Martin for (i = 0; i < SVE_NUM_ZREGS; ++i) {
71441040cf7SDave Martin p = (__uint128_t const *)ZREG(sst, vq, i);
71541040cf7SDave Martin fst->vregs[i] = arm64_le128_to_cpu(*p);
71641040cf7SDave Martin }
7178cd969d2SDave Martin }
7188cd969d2SDave Martin
719bc0ee476SDave Martin #ifdef CONFIG_ARM64_SVE
7208e1f78a9SGeert Uytterhoeven /*
7218e1f78a9SGeert Uytterhoeven * Call __sve_free() directly only if you know task can't be scheduled
7228e1f78a9SGeert Uytterhoeven * or preempted.
7238e1f78a9SGeert Uytterhoeven */
__sve_free(struct task_struct * task)7248e1f78a9SGeert Uytterhoeven static void __sve_free(struct task_struct *task)
7258e1f78a9SGeert Uytterhoeven {
7268e1f78a9SGeert Uytterhoeven kfree(task->thread.sve_state);
7278e1f78a9SGeert Uytterhoeven task->thread.sve_state = NULL;
7288e1f78a9SGeert Uytterhoeven }
7298e1f78a9SGeert Uytterhoeven
sve_free(struct task_struct * task)7308e1f78a9SGeert Uytterhoeven static void sve_free(struct task_struct *task)
7318e1f78a9SGeert Uytterhoeven {
7328e1f78a9SGeert Uytterhoeven WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
7338e1f78a9SGeert Uytterhoeven
7348e1f78a9SGeert Uytterhoeven __sve_free(task);
7358e1f78a9SGeert Uytterhoeven }
736bc0ee476SDave Martin
737bc0ee476SDave Martin /*
738bc0ee476SDave Martin * Return how many bytes of memory are required to store the full SVE
739bc0ee476SDave Martin * state for task, given task's currently configured vector length.
740bc0ee476SDave Martin */
sve_state_size(struct task_struct const * task)7418bd7f91cSMark Brown size_t sve_state_size(struct task_struct const *task)
742bc0ee476SDave Martin {
743af7167d6SMark Brown unsigned int vl = 0;
744af7167d6SMark Brown
745af7167d6SMark Brown if (system_supports_sve())
746af7167d6SMark Brown vl = task_get_sve_vl(task);
747af7167d6SMark Brown if (system_supports_sme())
748af7167d6SMark Brown vl = max(vl, task_get_sme_vl(task));
749af7167d6SMark Brown
750af7167d6SMark Brown return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
751bc0ee476SDave Martin }
752bc0ee476SDave Martin
753bc0ee476SDave Martin /*
754bc0ee476SDave Martin * Ensure that task->thread.sve_state is allocated and sufficiently large.
755bc0ee476SDave Martin *
756bc0ee476SDave Martin * This function should be used only in preparation for replacing
757bc0ee476SDave Martin * task->thread.sve_state with new data. The memory is always zeroed
758bc0ee476SDave Martin * here to prevent stale data from showing through: this is done in
759bc0ee476SDave Martin * the interest of testability and predictability: except in the
760bc0ee476SDave Martin * do_sve_acc() case, there is no ABI requirement to hide stale data
761bc0ee476SDave Martin * written previously be task.
762bc0ee476SDave Martin */
sve_alloc(struct task_struct * task,bool flush)763826a4fddSMark Brown void sve_alloc(struct task_struct *task, bool flush)
764bc0ee476SDave Martin {
765bc0ee476SDave Martin if (task->thread.sve_state) {
766826a4fddSMark Brown if (flush)
767826a4fddSMark Brown memset(task->thread.sve_state, 0,
768826a4fddSMark Brown sve_state_size(task));
769bc0ee476SDave Martin return;
770bc0ee476SDave Martin }
771bc0ee476SDave Martin
772bc0ee476SDave Martin /* This is a small allocation (maximum ~8KB) and Should Not Fail. */
773bc0ee476SDave Martin task->thread.sve_state =
774bc0ee476SDave Martin kzalloc(sve_state_size(task), GFP_KERNEL);
775bc0ee476SDave Martin }
776bc0ee476SDave Martin
77743d4da2cSDave Martin
77843d4da2cSDave Martin /*
779e12310a0SMark Brown * Force the FPSIMD state shared with SVE to be updated in the SVE state
780e12310a0SMark Brown * even if the SVE state is the current active state.
781e12310a0SMark Brown *
782e12310a0SMark Brown * This should only be called by ptrace. task must be non-runnable.
783e12310a0SMark Brown * task->thread.sve_state must point to at least sve_state_size(task)
784e12310a0SMark Brown * bytes of allocated kernel memory.
785e12310a0SMark Brown */
fpsimd_force_sync_to_sve(struct task_struct * task)786e12310a0SMark Brown void fpsimd_force_sync_to_sve(struct task_struct *task)
787e12310a0SMark Brown {
788e12310a0SMark Brown fpsimd_to_sve(task);
789e12310a0SMark Brown }
790e12310a0SMark Brown
791e12310a0SMark Brown /*
79243d4da2cSDave Martin * Ensure that task->thread.sve_state is up to date with respect to
79343d4da2cSDave Martin * the user task, irrespective of when SVE is in use or not.
79443d4da2cSDave Martin *
79543d4da2cSDave Martin * This should only be called by ptrace. task must be non-runnable.
79643d4da2cSDave Martin * task->thread.sve_state must point to at least sve_state_size(task)
79743d4da2cSDave Martin * bytes of allocated kernel memory.
79843d4da2cSDave Martin */
fpsimd_sync_to_sve(struct task_struct * task)79943d4da2cSDave Martin void fpsimd_sync_to_sve(struct task_struct *task)
80043d4da2cSDave Martin {
801e12310a0SMark Brown if (!test_tsk_thread_flag(task, TIF_SVE) &&
802e12310a0SMark Brown !thread_sm_enabled(&task->thread))
80343d4da2cSDave Martin fpsimd_to_sve(task);
80443d4da2cSDave Martin }
80543d4da2cSDave Martin
80643d4da2cSDave Martin /*
80765896545SDave Martin * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
80843d4da2cSDave Martin * the user task, irrespective of whether SVE is in use or not.
80943d4da2cSDave Martin *
81043d4da2cSDave Martin * This should only be called by ptrace. task must be non-runnable.
81143d4da2cSDave Martin * task->thread.sve_state must point to at least sve_state_size(task)
81243d4da2cSDave Martin * bytes of allocated kernel memory.
81343d4da2cSDave Martin */
sve_sync_to_fpsimd(struct task_struct * task)81443d4da2cSDave Martin void sve_sync_to_fpsimd(struct task_struct *task)
81543d4da2cSDave Martin {
816bbc6172eSMark Brown if (task->thread.fp_type == FP_STATE_SVE)
81743d4da2cSDave Martin sve_to_fpsimd(task);
81843d4da2cSDave Martin }
81943d4da2cSDave Martin
82043d4da2cSDave Martin /*
82143d4da2cSDave Martin * Ensure that task->thread.sve_state is up to date with respect to
82265896545SDave Martin * the task->thread.uw.fpsimd_state.
82343d4da2cSDave Martin *
82443d4da2cSDave Martin * This should only be called by ptrace to merge new FPSIMD register
82543d4da2cSDave Martin * values into a task for which SVE is currently active.
82643d4da2cSDave Martin * task must be non-runnable.
82743d4da2cSDave Martin * task->thread.sve_state must point to at least sve_state_size(task)
82843d4da2cSDave Martin * bytes of allocated kernel memory.
82965896545SDave Martin * task->thread.uw.fpsimd_state must already have been initialised with
83043d4da2cSDave Martin * the new FPSIMD register values to be merged in.
83143d4da2cSDave Martin */
sve_sync_from_fpsimd_zeropad(struct task_struct * task)83243d4da2cSDave Martin void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
83343d4da2cSDave Martin {
83443d4da2cSDave Martin unsigned int vq;
83543d4da2cSDave Martin void *sst = task->thread.sve_state;
83665896545SDave Martin struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
83743d4da2cSDave Martin
83869af56aeSMark Brown if (!test_tsk_thread_flag(task, TIF_SVE) &&
83969af56aeSMark Brown !thread_sm_enabled(&task->thread))
84043d4da2cSDave Martin return;
84143d4da2cSDave Martin
842e12310a0SMark Brown vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
84343d4da2cSDave Martin
84443d4da2cSDave Martin memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
845d16af870SDave Martin __fpsimd_to_sve(sst, fst, vq);
84643d4da2cSDave Martin }
84743d4da2cSDave Martin
vec_set_vector_length(struct task_struct * task,enum vec_type type,unsigned long vl,unsigned long flags)84830c43e73SMark Brown int vec_set_vector_length(struct task_struct *task, enum vec_type type,
8497582e220SDave Martin unsigned long vl, unsigned long flags)
8507582e220SDave Martin {
851d4d5be94SMark Brown bool free_sme = false;
852d4d5be94SMark Brown
8537582e220SDave Martin if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
8547582e220SDave Martin PR_SVE_SET_VL_ONEXEC))
8557582e220SDave Martin return -EINVAL;
8567582e220SDave Martin
8577582e220SDave Martin if (!sve_vl_valid(vl))
8587582e220SDave Martin return -EINVAL;
8597582e220SDave Martin
8607582e220SDave Martin /*
86130c43e73SMark Brown * Clamp to the maximum vector length that VL-agnostic code
86230c43e73SMark Brown * can work with. A flag may be assigned in the future to
86330c43e73SMark Brown * allow setting of larger vector lengths without confusing
86430c43e73SMark Brown * older software.
8657582e220SDave Martin */
86630c43e73SMark Brown if (vl > VL_ARCH_MAX)
86730c43e73SMark Brown vl = VL_ARCH_MAX;
8687582e220SDave Martin
86930c43e73SMark Brown vl = find_supported_vector_length(type, vl);
8707582e220SDave Martin
8717582e220SDave Martin if (flags & (PR_SVE_VL_INHERIT |
8727582e220SDave Martin PR_SVE_SET_VL_ONEXEC))
87330c43e73SMark Brown task_set_vl_onexec(task, type, vl);
8747582e220SDave Martin else
8757582e220SDave Martin /* Reset VL to system default on next exec: */
87630c43e73SMark Brown task_set_vl_onexec(task, type, 0);
8777582e220SDave Martin
8787582e220SDave Martin /* Only actually set the VL if not deferred: */
8797582e220SDave Martin if (flags & PR_SVE_SET_VL_ONEXEC)
8807582e220SDave Martin goto out;
8817582e220SDave Martin
88230c43e73SMark Brown if (vl == task_get_vl(task, type))
8837582e220SDave Martin goto out;
8847582e220SDave Martin
8857582e220SDave Martin /*
8867582e220SDave Martin * To ensure the FPSIMD bits of the SVE vector registers are preserved,
8877582e220SDave Martin * write any live register state back to task_struct, and convert to a
888e12310a0SMark Brown * regular FPSIMD thread.
8897582e220SDave Martin */
8907582e220SDave Martin if (task == current) {
8916dcdefcdSJulien Grall get_cpu_fpsimd_context();
8927582e220SDave Martin
893d1797615SDave Martin fpsimd_save();
8947582e220SDave Martin }
8957582e220SDave Martin
8967582e220SDave Martin fpsimd_flush_task_state(task);
897af7167d6SMark Brown if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
898baa85152SMark Brown thread_sm_enabled(&task->thread)) {
8997582e220SDave Martin sve_to_fpsimd(task);
900baa85152SMark Brown task->thread.fp_type = FP_STATE_FPSIMD;
901baa85152SMark Brown }
9027582e220SDave Martin
903d4d5be94SMark Brown if (system_supports_sme()) {
904d4d5be94SMark Brown if (type == ARM64_VEC_SME ||
905d4d5be94SMark Brown !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
906d4d5be94SMark Brown /*
907d4d5be94SMark Brown * We are changing the SME VL or weren't using
908d4d5be94SMark Brown * SME anyway, discard the state and force a
909d4d5be94SMark Brown * reallocation.
910d4d5be94SMark Brown */
911ec0067a6SMark Brown task->thread.svcr &= ~(SVCR_SM_MASK |
912ec0067a6SMark Brown SVCR_ZA_MASK);
913c9bb40b7SMark Brown clear_tsk_thread_flag(task, TIF_SME);
914d4d5be94SMark Brown free_sme = true;
915d4d5be94SMark Brown }
9168bd7f91cSMark Brown }
917b40c559bSMark Brown
9187582e220SDave Martin if (task == current)
9196dcdefcdSJulien Grall put_cpu_fpsimd_context();
9207582e220SDave Martin
92105d881b8SMark Brown task_set_vl(task, type, vl);
92205d881b8SMark Brown
9237582e220SDave Martin /*
924d4d5be94SMark Brown * Free the changed states if they are not in use, SME will be
925d4d5be94SMark Brown * reallocated to the correct size on next use and we just
926d4d5be94SMark Brown * allocate SVE now in case it is needed for use in streaming
927d4d5be94SMark Brown * mode.
9287582e220SDave Martin */
929d4d5be94SMark Brown if (system_supports_sve()) {
9307582e220SDave Martin sve_free(task);
931d4d5be94SMark Brown sve_alloc(task, true);
932d4d5be94SMark Brown }
933d4d5be94SMark Brown
934d4d5be94SMark Brown if (free_sme)
9358bd7f91cSMark Brown sme_free(task);
9367582e220SDave Martin
9377582e220SDave Martin out:
93830c43e73SMark Brown update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
93909d1223aSDave Martin flags & PR_SVE_VL_INHERIT);
9407582e220SDave Martin
9417582e220SDave Martin return 0;
9427582e220SDave Martin }
9437582e220SDave Martin
944bc0ee476SDave Martin /*
9452d2123bcSDave Martin * Encode the current vector length and flags for return.
94630c43e73SMark Brown * This is only required for prctl(): ptrace has separate fields.
94730c43e73SMark Brown * SVE and SME use the same bits for _ONEXEC and _INHERIT.
9482d2123bcSDave Martin *
94930c43e73SMark Brown * flags are as for vec_set_vector_length().
9502d2123bcSDave Martin */
vec_prctl_status(enum vec_type type,unsigned long flags)95130c43e73SMark Brown static int vec_prctl_status(enum vec_type type, unsigned long flags)
9522d2123bcSDave Martin {
9532d2123bcSDave Martin int ret;
9542d2123bcSDave Martin
9552d2123bcSDave Martin if (flags & PR_SVE_SET_VL_ONEXEC)
95630c43e73SMark Brown ret = task_get_vl_onexec(current, type);
9572d2123bcSDave Martin else
95830c43e73SMark Brown ret = task_get_vl(current, type);
9592d2123bcSDave Martin
96030c43e73SMark Brown if (test_thread_flag(vec_vl_inherit_flag(type)))
9612d2123bcSDave Martin ret |= PR_SVE_VL_INHERIT;
9622d2123bcSDave Martin
9632d2123bcSDave Martin return ret;
9642d2123bcSDave Martin }
9652d2123bcSDave Martin
9662d2123bcSDave Martin /* PR_SVE_SET_VL */
sve_set_current_vl(unsigned long arg)9672d2123bcSDave Martin int sve_set_current_vl(unsigned long arg)
9682d2123bcSDave Martin {
9692d2123bcSDave Martin unsigned long vl, flags;
9702d2123bcSDave Martin int ret;
9712d2123bcSDave Martin
9722d2123bcSDave Martin vl = arg & PR_SVE_VL_LEN_MASK;
9732d2123bcSDave Martin flags = arg & ~vl;
9742d2123bcSDave Martin
9754b7a6ce7SPeter Collingbourne if (!system_supports_sve() || is_compat_task())
9762d2123bcSDave Martin return -EINVAL;
9772d2123bcSDave Martin
97830c43e73SMark Brown ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);
9792d2123bcSDave Martin if (ret)
9802d2123bcSDave Martin return ret;
9812d2123bcSDave Martin
98230c43e73SMark Brown return vec_prctl_status(ARM64_VEC_SVE, flags);
9832d2123bcSDave Martin }
9842d2123bcSDave Martin
9852d2123bcSDave Martin /* PR_SVE_GET_VL */
sve_get_current_vl(void)9862d2123bcSDave Martin int sve_get_current_vl(void)
9872d2123bcSDave Martin {
9884b7a6ce7SPeter Collingbourne if (!system_supports_sve() || is_compat_task())
9892d2123bcSDave Martin return -EINVAL;
9902d2123bcSDave Martin
99130c43e73SMark Brown return vec_prctl_status(ARM64_VEC_SVE, 0);
9922d2123bcSDave Martin }
9932d2123bcSDave Martin
9949e4ab6c8SMark Brown #ifdef CONFIG_ARM64_SME
9959e4ab6c8SMark Brown /* PR_SME_SET_VL */
sme_set_current_vl(unsigned long arg)9969e4ab6c8SMark Brown int sme_set_current_vl(unsigned long arg)
9979e4ab6c8SMark Brown {
9989e4ab6c8SMark Brown unsigned long vl, flags;
9999e4ab6c8SMark Brown int ret;
10009e4ab6c8SMark Brown
10019e4ab6c8SMark Brown vl = arg & PR_SME_VL_LEN_MASK;
10029e4ab6c8SMark Brown flags = arg & ~vl;
10039e4ab6c8SMark Brown
10049e4ab6c8SMark Brown if (!system_supports_sme() || is_compat_task())
10059e4ab6c8SMark Brown return -EINVAL;
10069e4ab6c8SMark Brown
10079e4ab6c8SMark Brown ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
10089e4ab6c8SMark Brown if (ret)
10099e4ab6c8SMark Brown return ret;
10109e4ab6c8SMark Brown
10119e4ab6c8SMark Brown return vec_prctl_status(ARM64_VEC_SME, flags);
10129e4ab6c8SMark Brown }
10139e4ab6c8SMark Brown
10149e4ab6c8SMark Brown /* PR_SME_GET_VL */
sme_get_current_vl(void)10159e4ab6c8SMark Brown int sme_get_current_vl(void)
10169e4ab6c8SMark Brown {
10179e4ab6c8SMark Brown if (!system_supports_sme() || is_compat_task())
10189e4ab6c8SMark Brown return -EINVAL;
10199e4ab6c8SMark Brown
10209e4ab6c8SMark Brown return vec_prctl_status(ARM64_VEC_SME, 0);
10219e4ab6c8SMark Brown }
10229e4ab6c8SMark Brown #endif /* CONFIG_ARM64_SME */
10239e4ab6c8SMark Brown
vec_probe_vqs(struct vl_info * info,DECLARE_BITMAP (map,SVE_VQ_MAX))1024b5bc00ffSMark Brown static void vec_probe_vqs(struct vl_info *info,
1025b5bc00ffSMark Brown DECLARE_BITMAP(map, SVE_VQ_MAX))
10262e0f2478SDave Martin {
10272e0f2478SDave Martin unsigned int vq, vl;
10282e0f2478SDave Martin
10292e0f2478SDave Martin bitmap_zero(map, SVE_VQ_MAX);
10302e0f2478SDave Martin
10312e0f2478SDave Martin for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
1032b5bc00ffSMark Brown write_vl(info->type, vq - 1); /* self-syncing */
1033b42990d3SMark Brown
1034b42990d3SMark Brown switch (info->type) {
1035b42990d3SMark Brown case ARM64_VEC_SVE:
10362e0f2478SDave Martin vl = sve_get_vl();
1037b42990d3SMark Brown break;
1038b42990d3SMark Brown case ARM64_VEC_SME:
1039b42990d3SMark Brown vl = sme_get_vl();
1040b42990d3SMark Brown break;
1041b42990d3SMark Brown default:
1042b42990d3SMark Brown vl = 0;
1043b42990d3SMark Brown break;
1044b42990d3SMark Brown }
1045b42990d3SMark Brown
1046b42990d3SMark Brown /* Minimum VL identified? */
1047b42990d3SMark Brown if (sve_vq_from_vl(vl) > vq)
1048b42990d3SMark Brown break;
1049b42990d3SMark Brown
10502e0f2478SDave Martin vq = sve_vq_from_vl(vl); /* skip intervening lengths */
1051ead9e430SDave Martin set_bit(__vq_to_bit(vq), map);
10522e0f2478SDave Martin }
10532e0f2478SDave Martin }
10542e0f2478SDave Martin
10558b08e840SDave Martin /*
10568b08e840SDave Martin * Initialise the set of known supported VQs for the boot CPU.
10578b08e840SDave Martin * This is called during kernel boot, before secondary CPUs are brought up.
10588b08e840SDave Martin */
vec_init_vq_map(enum vec_type type)1059b5bc00ffSMark Brown void __init vec_init_vq_map(enum vec_type type)
10602e0f2478SDave Martin {
1061b5bc00ffSMark Brown struct vl_info *info = &vl_info[type];
1062b5bc00ffSMark Brown vec_probe_vqs(info, info->vq_map);
1063b5bc00ffSMark Brown bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
10642e0f2478SDave Martin }
10652e0f2478SDave Martin
10662e0f2478SDave Martin /*
10672e0f2478SDave Martin * If we haven't committed to the set of supported VQs yet, filter out
10682e0f2478SDave Martin * those not supported by the current CPU.
10698b08e840SDave Martin * This function is called during the bring-up of early secondary CPUs only.
10702e0f2478SDave Martin */
vec_update_vq_map(enum vec_type type)1071b5bc00ffSMark Brown void vec_update_vq_map(enum vec_type type)
10722e0f2478SDave Martin {
1073b5bc00ffSMark Brown struct vl_info *info = &vl_info[type];
1074d06b76beSDave Martin DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1075d06b76beSDave Martin
1076b5bc00ffSMark Brown vec_probe_vqs(info, tmp_map);
1077b5bc00ffSMark Brown bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
1078b5bc00ffSMark Brown bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
1079b5bc00ffSMark Brown SVE_VQ_MAX);
10802e0f2478SDave Martin }
10812e0f2478SDave Martin
10828b08e840SDave Martin /*
10838b08e840SDave Martin * Check whether the current CPU supports all VQs in the committed set.
10848b08e840SDave Martin * This function is called during the bring-up of late secondary CPUs only.
10858b08e840SDave Martin */
vec_verify_vq_map(enum vec_type type)1086b5bc00ffSMark Brown int vec_verify_vq_map(enum vec_type type)
10872e0f2478SDave Martin {
1088b5bc00ffSMark Brown struct vl_info *info = &vl_info[type];
1089d06b76beSDave Martin DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1090d06b76beSDave Martin unsigned long b;
10912e0f2478SDave Martin
1092b5bc00ffSMark Brown vec_probe_vqs(info, tmp_map);
1093d06b76beSDave Martin
1094d06b76beSDave Martin bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
1095b5bc00ffSMark Brown if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
1096b5bc00ffSMark Brown pr_warn("%s: cpu%d: Required vector length(s) missing\n",
1097b5bc00ffSMark Brown info->name, smp_processor_id());
1098d06b76beSDave Martin return -EINVAL;
10992e0f2478SDave Martin }
11002e0f2478SDave Martin
1101d06b76beSDave Martin if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
1102d06b76beSDave Martin return 0;
1103d06b76beSDave Martin
1104d06b76beSDave Martin /*
1105d06b76beSDave Martin * For KVM, it is necessary to ensure that this CPU doesn't
1106d06b76beSDave Martin * support any vector length that guests may have probed as
1107d06b76beSDave Martin * unsupported.
1108d06b76beSDave Martin */
1109d06b76beSDave Martin
1110d06b76beSDave Martin /* Recover the set of supported VQs: */
1111d06b76beSDave Martin bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
1112d06b76beSDave Martin /* Find VQs supported that are not globally supported: */
1113b5bc00ffSMark Brown bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);
1114d06b76beSDave Martin
1115d06b76beSDave Martin /* Find the lowest such VQ, if any: */
1116d06b76beSDave Martin b = find_last_bit(tmp_map, SVE_VQ_MAX);
1117d06b76beSDave Martin if (b >= SVE_VQ_MAX)
1118d06b76beSDave Martin return 0; /* no mismatches */
1119d06b76beSDave Martin
1120d06b76beSDave Martin /*
1121d06b76beSDave Martin * Mismatches above sve_max_virtualisable_vl are fine, since
1122d06b76beSDave Martin * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
1123d06b76beSDave Martin */
1124b5bc00ffSMark Brown if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {
1125b5bc00ffSMark Brown pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",
1126b5bc00ffSMark Brown info->name, smp_processor_id());
1127d06b76beSDave Martin return -EINVAL;
1128d06b76beSDave Martin }
1129d06b76beSDave Martin
1130d06b76beSDave Martin return 0;
11312e0f2478SDave Martin }
11322e0f2478SDave Martin
sve_efi_setup(void)1133fdfa976cSDave Martin static void __init sve_efi_setup(void)
1134fdfa976cSDave Martin {
1135e0838f63SMark Brown int max_vl = 0;
1136e0838f63SMark Brown int i;
1137b5bc00ffSMark Brown
1138fdfa976cSDave Martin if (!IS_ENABLED(CONFIG_EFI))
1139fdfa976cSDave Martin return;
1140fdfa976cSDave Martin
1141e0838f63SMark Brown for (i = 0; i < ARRAY_SIZE(vl_info); i++)
1142e0838f63SMark Brown max_vl = max(vl_info[i].max_vl, max_vl);
1143e0838f63SMark Brown
1144fdfa976cSDave Martin /*
1145fdfa976cSDave Martin * alloc_percpu() warns and prints a backtrace if this goes wrong.
1146fdfa976cSDave Martin * This is evidence of a crippled system and we are returning void,
1147fdfa976cSDave Martin * so no attempt is made to handle this situation here.
1148fdfa976cSDave Martin */
1149e0838f63SMark Brown if (!sve_vl_valid(max_vl))
1150fdfa976cSDave Martin goto fail;
1151fdfa976cSDave Martin
1152fdfa976cSDave Martin efi_sve_state = __alloc_percpu(
1153e0838f63SMark Brown SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
1154fdfa976cSDave Martin if (!efi_sve_state)
1155fdfa976cSDave Martin goto fail;
1156fdfa976cSDave Martin
1157fdfa976cSDave Martin return;
1158fdfa976cSDave Martin
1159fdfa976cSDave Martin fail:
1160fdfa976cSDave Martin panic("Cannot allocate percpu memory for EFI SVE save/restore");
1161fdfa976cSDave Martin }
1162fdfa976cSDave Martin
11632e0f2478SDave Martin /*
11642e0f2478SDave Martin * Enable SVE for EL1.
11652e0f2478SDave Martin * Intended for use by the cpufeatures code during CPU boot.
11662e0f2478SDave Martin */
sve_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)1167c0cda3b8SDave Martin void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
11682e0f2478SDave Martin {
11692e0f2478SDave Martin write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
11702e0f2478SDave Martin isb();
11712e0f2478SDave Martin }
11722e0f2478SDave Martin
117331dc52b3SDave Martin /*
117431dc52b3SDave Martin * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
117531dc52b3SDave Martin * vector length.
117631dc52b3SDave Martin *
117731dc52b3SDave Martin * Use only if SVE is present.
117831dc52b3SDave Martin * This function clobbers the SVE vector length.
117931dc52b3SDave Martin */
read_zcr_features(void)118031dc52b3SDave Martin u64 read_zcr_features(void)
118131dc52b3SDave Martin {
118231dc52b3SDave Martin /*
118331dc52b3SDave Martin * Set the maximum possible VL, and write zeroes to all other
118431dc52b3SDave Martin * bits to see if they stick.
118531dc52b3SDave Martin */
118631dc52b3SDave Martin sve_kernel_enable(NULL);
118731dc52b3SDave Martin write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
118831dc52b3SDave Martin
118901948b09SMark Brown /* Return LEN value that would be written to get the maximum VL */
119001948b09SMark Brown return sve_vq_from_vl(sve_get_vl()) - 1;
119131dc52b3SDave Martin }
119231dc52b3SDave Martin
sve_setup(void)11932e0f2478SDave Martin void __init sve_setup(void)
11942e0f2478SDave Martin {
1195b5bc00ffSMark Brown struct vl_info *info = &vl_info[ARM64_VEC_SVE];
11962e0f2478SDave Martin u64 zcr;
1197d06b76beSDave Martin DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
1198d06b76beSDave Martin unsigned long b;
11992e0f2478SDave Martin
12002e0f2478SDave Martin if (!system_supports_sve())
12012e0f2478SDave Martin return;
12022e0f2478SDave Martin
12032e0f2478SDave Martin /*
12042e0f2478SDave Martin * The SVE architecture mandates support for 128-bit vectors,
12052e0f2478SDave Martin * so sve_vq_map must have at least SVE_VQ_MIN set.
12062e0f2478SDave Martin * If something went wrong, at least try to patch it up:
12072e0f2478SDave Martin */
1208b5bc00ffSMark Brown if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
1209b5bc00ffSMark Brown set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
12102e0f2478SDave Martin
12112e0f2478SDave Martin zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
1212b5bc00ffSMark Brown info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
12132e0f2478SDave Martin
12142e0f2478SDave Martin /*
12152e0f2478SDave Martin * Sanity-check that the max VL we determined through CPU features
12162e0f2478SDave Martin * corresponds properly to sve_vq_map. If not, do our best:
12172e0f2478SDave Martin */
1218b5bc00ffSMark Brown if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
1219b5bc00ffSMark Brown info->max_vl)))
1220b5bc00ffSMark Brown info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
1221b5bc00ffSMark Brown info->max_vl);
12222e0f2478SDave Martin
12232e0f2478SDave Martin /*
12242e0f2478SDave Martin * For the default VL, pick the maximum supported value <= 64.
12252e0f2478SDave Martin * VL == 64 is guaranteed not to grow the signal frame.
12262e0f2478SDave Martin */
1227b5bc00ffSMark Brown set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));
12282e0f2478SDave Martin
1229b5bc00ffSMark Brown bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,
1230d06b76beSDave Martin SVE_VQ_MAX);
1231d06b76beSDave Martin
1232d06b76beSDave Martin b = find_last_bit(tmp_map, SVE_VQ_MAX);
1233d06b76beSDave Martin if (b >= SVE_VQ_MAX)
1234d06b76beSDave Martin /* No non-virtualisable VLs found */
1235b5bc00ffSMark Brown info->max_virtualisable_vl = SVE_VQ_MAX;
1236d06b76beSDave Martin else if (WARN_ON(b == SVE_VQ_MAX - 1))
1237d06b76beSDave Martin /* No virtualisable VLs? This is architecturally forbidden. */
1238b5bc00ffSMark Brown info->max_virtualisable_vl = SVE_VQ_MIN;
1239d06b76beSDave Martin else /* b + 1 < SVE_VQ_MAX */
1240b5bc00ffSMark Brown info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
1241d06b76beSDave Martin
1242b5bc00ffSMark Brown if (info->max_virtualisable_vl > info->max_vl)
1243b5bc00ffSMark Brown info->max_virtualisable_vl = info->max_vl;
1244d06b76beSDave Martin
1245b5bc00ffSMark Brown pr_info("%s: maximum available vector length %u bytes per vector\n",
1246b5bc00ffSMark Brown info->name, info->max_vl);
1247b5bc00ffSMark Brown pr_info("%s: default vector length %u bytes per vector\n",
1248b5bc00ffSMark Brown info->name, get_sve_default_vl());
1249fdfa976cSDave Martin
1250d06b76beSDave Martin /* KVM decides whether to support mismatched systems. Just warn here: */
1251b5bc00ffSMark Brown if (sve_max_virtualisable_vl() < sve_max_vl())
1252b5bc00ffSMark Brown pr_warn("%s: unvirtualisable vector lengths present\n",
1253b5bc00ffSMark Brown info->name);
1254d06b76beSDave Martin
1255fdfa976cSDave Martin sve_efi_setup();
12562e0f2478SDave Martin }
12572e0f2478SDave Martin
12582e0f2478SDave Martin /*
1259bc0ee476SDave Martin * Called from the put_task_struct() path, which cannot get here
1260bc0ee476SDave Martin * unless dead_task is really dead and not schedulable.
1261bc0ee476SDave Martin */
fpsimd_release_task(struct task_struct * dead_task)1262bc0ee476SDave Martin void fpsimd_release_task(struct task_struct *dead_task)
1263bc0ee476SDave Martin {
1264bc0ee476SDave Martin __sve_free(dead_task);
12658bd7f91cSMark Brown sme_free(dead_task);
1266bc0ee476SDave Martin }
1267bc0ee476SDave Martin
1268bc0ee476SDave Martin #endif /* CONFIG_ARM64_SVE */
1269bc0ee476SDave Martin
12705e64b862SMark Brown #ifdef CONFIG_ARM64_SME
12715e64b862SMark Brown
12728bd7f91cSMark Brown /*
1273ce514000SMark Brown * Ensure that task->thread.sme_state is allocated and sufficiently large.
12748bd7f91cSMark Brown *
12758bd7f91cSMark Brown * This function should be used only in preparation for replacing
1276ce514000SMark Brown * task->thread.sme_state with new data. The memory is always zeroed
12778bd7f91cSMark Brown * here to prevent stale data from showing through: this is done in
12788bd7f91cSMark Brown * the interest of testability and predictability, the architecture
12798bd7f91cSMark Brown * guarantees that when ZA is enabled it will be zeroed.
12808bd7f91cSMark Brown */
sme_alloc(struct task_struct * task,bool flush)12815d0a8d2fSMark Brown void sme_alloc(struct task_struct *task, bool flush)
12828bd7f91cSMark Brown {
1283569156e4SMark Brown if (task->thread.sme_state) {
1284569156e4SMark Brown if (flush)
1285569156e4SMark Brown memset(task->thread.sme_state, 0,
1286569156e4SMark Brown sme_state_size(task));
12878bd7f91cSMark Brown return;
12888bd7f91cSMark Brown }
12898bd7f91cSMark Brown
12908bd7f91cSMark Brown /* This could potentially be up to 64K. */
1291ce514000SMark Brown task->thread.sme_state =
1292ce514000SMark Brown kzalloc(sme_state_size(task), GFP_KERNEL);
12938bd7f91cSMark Brown }
12948bd7f91cSMark Brown
sme_free(struct task_struct * task)12958bd7f91cSMark Brown static void sme_free(struct task_struct *task)
12968bd7f91cSMark Brown {
1297ce514000SMark Brown kfree(task->thread.sme_state);
1298ce514000SMark Brown task->thread.sme_state = NULL;
12998bd7f91cSMark Brown }
13008bd7f91cSMark Brown
sme_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)13015e64b862SMark Brown void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
13025e64b862SMark Brown {
13035e64b862SMark Brown /* Set priority for all PEs to architecturally defined minimum */
13045e64b862SMark Brown write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
13055e64b862SMark Brown SYS_SMPRI_EL1);
13065e64b862SMark Brown
13075e64b862SMark Brown /* Allow SME in kernel */
13085e64b862SMark Brown write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
13095e64b862SMark Brown isb();
1310a9d69158SMark Brown
1311a9d69158SMark Brown /* Allow EL0 to access TPIDR2 */
1312a9d69158SMark Brown write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
1313a9d69158SMark Brown isb();
13145e64b862SMark Brown }
13155e64b862SMark Brown
13165e64b862SMark Brown /*
13175e64b862SMark Brown * This must be called after sme_kernel_enable(), we rely on the
13185e64b862SMark Brown * feature table being sorted to ensure this.
13195e64b862SMark Brown */
sme2_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)1320d4913eeeSMark Brown void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
1321d4913eeeSMark Brown {
1322d4913eeeSMark Brown /* Allow use of ZT0 */
1323d4913eeeSMark Brown write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
1324d4913eeeSMark Brown SYS_SMCR_EL1);
1325d4913eeeSMark Brown }
1326d4913eeeSMark Brown
1327d4913eeeSMark Brown /*
1328d4913eeeSMark Brown * This must be called after sme_kernel_enable(), we rely on the
1329d4913eeeSMark Brown * feature table being sorted to ensure this.
1330d4913eeeSMark Brown */
fa64_kernel_enable(const struct arm64_cpu_capabilities * __always_unused p)13315e64b862SMark Brown void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
13325e64b862SMark Brown {
13335e64b862SMark Brown /* Allow use of FA64 */
13345e64b862SMark Brown write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
13355e64b862SMark Brown SYS_SMCR_EL1);
13365e64b862SMark Brown }
13375e64b862SMark Brown
1338b42990d3SMark Brown /*
1339b42990d3SMark Brown * Read the pseudo-SMCR used by cpufeatures to identify the supported
1340b42990d3SMark Brown * vector length.
1341b42990d3SMark Brown *
1342b42990d3SMark Brown * Use only if SME is present.
1343b42990d3SMark Brown * This function clobbers the SME vector length.
1344b42990d3SMark Brown */
read_smcr_features(void)1345b42990d3SMark Brown u64 read_smcr_features(void)
1346b42990d3SMark Brown {
1347b42990d3SMark Brown sme_kernel_enable(NULL);
1348b42990d3SMark Brown
1349b42990d3SMark Brown /*
1350b42990d3SMark Brown * Set the maximum possible VL.
1351b42990d3SMark Brown */
1352b42990d3SMark Brown write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
1353b42990d3SMark Brown SYS_SMCR_EL1);
1354b42990d3SMark Brown
135501948b09SMark Brown /* Return LEN value that would be written to get the maximum VL */
135601948b09SMark Brown return sve_vq_from_vl(sme_get_vl()) - 1;
1357b42990d3SMark Brown }
1358b42990d3SMark Brown
sme_setup(void)1359b42990d3SMark Brown void __init sme_setup(void)
1360b42990d3SMark Brown {
1361b42990d3SMark Brown struct vl_info *info = &vl_info[ARM64_VEC_SME];
1362b42990d3SMark Brown u64 smcr;
1363b42990d3SMark Brown int min_bit;
1364b42990d3SMark Brown
1365b42990d3SMark Brown if (!system_supports_sme())
1366b42990d3SMark Brown return;
1367b42990d3SMark Brown
1368b42990d3SMark Brown /*
1369b42990d3SMark Brown * SME doesn't require any particular vector length be
1370b42990d3SMark Brown * supported but it does require at least one. We should have
1371b42990d3SMark Brown * disabled the feature entirely while bringing up CPUs but
1372b42990d3SMark Brown * let's double check here.
1373b42990d3SMark Brown */
1374b42990d3SMark Brown WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
1375b42990d3SMark Brown
1376b42990d3SMark Brown min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
1377b42990d3SMark Brown info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
1378b42990d3SMark Brown
1379b42990d3SMark Brown smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
1380b42990d3SMark Brown info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
1381b42990d3SMark Brown
1382b42990d3SMark Brown /*
1383b42990d3SMark Brown * Sanity-check that the max VL we determined through CPU features
1384b42990d3SMark Brown * corresponds properly to sme_vq_map. If not, do our best:
1385b42990d3SMark Brown */
1386b42990d3SMark Brown if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
1387b42990d3SMark Brown info->max_vl)))
1388b42990d3SMark Brown info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
1389b42990d3SMark Brown info->max_vl);
1390b42990d3SMark Brown
1391b42990d3SMark Brown WARN_ON(info->min_vl > info->max_vl);
1392b42990d3SMark Brown
1393b42990d3SMark Brown /*
1394b42990d3SMark Brown * For the default VL, pick the maximum supported value <= 32
1395b42990d3SMark Brown * (256 bits) if there is one since this is guaranteed not to
1396b42990d3SMark Brown * grow the signal frame when in streaming mode, otherwise the
1397b42990d3SMark Brown * minimum available VL will be used.
1398b42990d3SMark Brown */
1399b42990d3SMark Brown set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
1400b42990d3SMark Brown
1401b42990d3SMark Brown pr_info("SME: minimum available vector length %u bytes per vector\n",
1402b42990d3SMark Brown info->min_vl);
1403b42990d3SMark Brown pr_info("SME: maximum available vector length %u bytes per vector\n",
1404b42990d3SMark Brown info->max_vl);
1405b42990d3SMark Brown pr_info("SME: default vector length %u bytes per vector\n",
1406b42990d3SMark Brown get_sme_default_vl());
1407b42990d3SMark Brown }
1408b42990d3SMark Brown
sme_suspend_exit(void)14097c892383SMark Brown void sme_suspend_exit(void)
14107c892383SMark Brown {
14117c892383SMark Brown u64 smcr = 0;
14127c892383SMark Brown
14137c892383SMark Brown if (!system_supports_sme())
14147c892383SMark Brown return;
14157c892383SMark Brown
14167c892383SMark Brown if (system_supports_fa64())
14177c892383SMark Brown smcr |= SMCR_ELx_FA64;
141879491ddfSMark Brown if (system_supports_sme2())
141979491ddfSMark Brown smcr |= SMCR_ELx_EZT0;
14207c892383SMark Brown
14217c892383SMark Brown write_sysreg_s(smcr, SYS_SMCR_EL1);
14227c892383SMark Brown write_sysreg_s(0, SYS_SMPRI_EL1);
14237c892383SMark Brown }
14247c892383SMark Brown
1425b42990d3SMark Brown #endif /* CONFIG_ARM64_SME */
14265e64b862SMark Brown
sve_init_regs(void)14278bd7f91cSMark Brown static void sve_init_regs(void)
14288bd7f91cSMark Brown {
14298bd7f91cSMark Brown /*
14308bd7f91cSMark Brown * Convert the FPSIMD state to SVE, zeroing all the state that
14318bd7f91cSMark Brown * is not shared with FPSIMD. If (as is likely) the current
14328bd7f91cSMark Brown * state is live in the registers then do this there and
14338bd7f91cSMark Brown * update our metadata for the current task including
14348bd7f91cSMark Brown * disabling the trap, otherwise update our in-memory copy.
14358bd7f91cSMark Brown * We are guaranteed to not be in streaming mode, we can only
14368bd7f91cSMark Brown * take a SVE trap when not in streaming mode and we can't be
14378bd7f91cSMark Brown * in streaming mode when taking a SME trap.
14388bd7f91cSMark Brown */
14398bd7f91cSMark Brown if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
14408bd7f91cSMark Brown unsigned long vq_minus_one =
14418bd7f91cSMark Brown sve_vq_from_vl(task_get_sve_vl(current)) - 1;
14428bd7f91cSMark Brown sve_set_vq(vq_minus_one);
14438bd7f91cSMark Brown sve_flush_live(true, vq_minus_one);
14448bd7f91cSMark Brown fpsimd_bind_task_to_cpu();
14458bd7f91cSMark Brown } else {
14468bd7f91cSMark Brown fpsimd_to_sve(current);
1447baa85152SMark Brown current->thread.fp_type = FP_STATE_SVE;
1448*51d11ea0SMark Brown fpsimd_flush_task_state(current);
14498bd7f91cSMark Brown }
14508bd7f91cSMark Brown }
14518bd7f91cSMark Brown
1452bc0ee476SDave Martin /*
1453bc0ee476SDave Martin * Trapped SVE access
1454bc0ee476SDave Martin *
1455bc0ee476SDave Martin * Storage is allocated for the full SVE state, the current FPSIMD
1456cccb78ceSMark Brown * register contents are migrated across, and the access trap is
1457cccb78ceSMark Brown * disabled.
1458bc0ee476SDave Martin *
1459f186a84dSJulien Grall * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
1460bc0ee476SDave Martin * would have disabled the SVE access trap for userspace during
1461bc0ee476SDave Martin * ret_to_user, making an SVE access trap impossible in that case.
1462bc0ee476SDave Martin */
do_sve_acc(unsigned long esr,struct pt_regs * regs)14638d56e5c5SAlexandru Elisei void do_sve_acc(unsigned long esr, struct pt_regs *regs)
1464bc0ee476SDave Martin {
1465bc0ee476SDave Martin /* Even if we chose not to use SVE, the hardware could still trap: */
1466bc0ee476SDave Martin if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
14674ef333b2SAmit Daniel Kachhap force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1468bc0ee476SDave Martin return;
1469bc0ee476SDave Martin }
1470bc0ee476SDave Martin
1471826a4fddSMark Brown sve_alloc(current, true);
14727559b7d7SMark Brown if (!current->thread.sve_state) {
14737559b7d7SMark Brown force_sig(SIGKILL);
14747559b7d7SMark Brown return;
14757559b7d7SMark Brown }
1476bc0ee476SDave Martin
14776dcdefcdSJulien Grall get_cpu_fpsimd_context();
1478bc0ee476SDave Martin
1479bc0ee476SDave Martin if (test_and_set_thread_flag(TIF_SVE))
1480bc0ee476SDave Martin WARN_ON(1); /* SVE access shouldn't have trapped */
1481bc0ee476SDave Martin
1482cccb78ceSMark Brown /*
14838bd7f91cSMark Brown * Even if the task can have used streaming mode we can only
14848bd7f91cSMark Brown * generate SVE access traps in normal SVE mode and
14858bd7f91cSMark Brown * transitioning out of streaming mode may discard any
14868bd7f91cSMark Brown * streaming mode state. Always clear the high bits to avoid
14878bd7f91cSMark Brown * any potential errors tracking what is properly initialised.
1488cccb78ceSMark Brown */
14898bd7f91cSMark Brown sve_init_regs();
14908bd7f91cSMark Brown
14918bd7f91cSMark Brown put_cpu_fpsimd_context();
14928bd7f91cSMark Brown }
14938bd7f91cSMark Brown
14948bd7f91cSMark Brown /*
14958bd7f91cSMark Brown * Trapped SME access
14968bd7f91cSMark Brown *
14978bd7f91cSMark Brown * Storage is allocated for the full SVE and SME state, the current
14988bd7f91cSMark Brown * FPSIMD register contents are migrated to SVE if SVE is not already
14998bd7f91cSMark Brown * active, and the access trap is disabled.
15008bd7f91cSMark Brown *
15018bd7f91cSMark Brown * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
15028bd7f91cSMark Brown * would have disabled the SME access trap for userspace during
150397b5576bSDongxu Sun * ret_to_user, making an SME access trap impossible in that case.
15048bd7f91cSMark Brown */
do_sme_acc(unsigned long esr,struct pt_regs * regs)15050616ea3fSCatalin Marinas void do_sme_acc(unsigned long esr, struct pt_regs *regs)
15068bd7f91cSMark Brown {
15078bd7f91cSMark Brown /* Even if we chose not to use SME, the hardware could still trap: */
15088bd7f91cSMark Brown if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
15098bd7f91cSMark Brown force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
15108bd7f91cSMark Brown return;
15118bd7f91cSMark Brown }
15128bd7f91cSMark Brown
15138bd7f91cSMark Brown /*
15148bd7f91cSMark Brown * If this not a trap due to SME being disabled then something
15158bd7f91cSMark Brown * is being used in the wrong mode, report as SIGILL.
15168bd7f91cSMark Brown */
15178bd7f91cSMark Brown if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
15188bd7f91cSMark Brown force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
15198bd7f91cSMark Brown return;
15208bd7f91cSMark Brown }
15218bd7f91cSMark Brown
1522826a4fddSMark Brown sve_alloc(current, false);
15235d0a8d2fSMark Brown sme_alloc(current, true);
1524ce514000SMark Brown if (!current->thread.sve_state || !current->thread.sme_state) {
15258bd7f91cSMark Brown force_sig(SIGKILL);
15268bd7f91cSMark Brown return;
15278bd7f91cSMark Brown }
15288bd7f91cSMark Brown
15298bd7f91cSMark Brown get_cpu_fpsimd_context();
15308bd7f91cSMark Brown
15318bd7f91cSMark Brown /* With TIF_SME userspace shouldn't generate any traps */
15328bd7f91cSMark Brown if (test_and_set_thread_flag(TIF_SME))
15338bd7f91cSMark Brown WARN_ON(1);
15348bd7f91cSMark Brown
1535cccb78ceSMark Brown if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1536ad4711f9SMark Brown unsigned long vq_minus_one =
15378bd7f91cSMark Brown sve_vq_from_vl(task_get_sme_vl(current)) - 1;
15388bd7f91cSMark Brown sme_set_vq(vq_minus_one);
15398bd7f91cSMark Brown
1540cccb78ceSMark Brown fpsimd_bind_task_to_cpu();
1541cccb78ceSMark Brown }
1542cccb78ceSMark Brown
15436dcdefcdSJulien Grall put_cpu_fpsimd_context();
1544bc0ee476SDave Martin }
1545bc0ee476SDave Martin
1546005f78cdSArd Biesheuvel /*
154753631b54SCatalin Marinas * Trapped FP/ASIMD access.
154853631b54SCatalin Marinas */
do_fpsimd_acc(unsigned long esr,struct pt_regs * regs)15498d56e5c5SAlexandru Elisei void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
155053631b54SCatalin Marinas {
155153631b54SCatalin Marinas /* TODO: implement lazy context saving/restoring */
155253631b54SCatalin Marinas WARN_ON(1);
155353631b54SCatalin Marinas }
155453631b54SCatalin Marinas
155553631b54SCatalin Marinas /*
155653631b54SCatalin Marinas * Raise a SIGFPE for the current process.
155753631b54SCatalin Marinas */
do_fpsimd_exc(unsigned long esr,struct pt_regs * regs)15588d56e5c5SAlexandru Elisei void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
155953631b54SCatalin Marinas {
1560af4a81b9SDave Martin unsigned int si_code = FPE_FLTUNK;
156153631b54SCatalin Marinas
1562af4a81b9SDave Martin if (esr & ESR_ELx_FP_EXC_TFV) {
156353631b54SCatalin Marinas if (esr & FPEXC_IOF)
156453631b54SCatalin Marinas si_code = FPE_FLTINV;
156553631b54SCatalin Marinas else if (esr & FPEXC_DZF)
156653631b54SCatalin Marinas si_code = FPE_FLTDIV;
156753631b54SCatalin Marinas else if (esr & FPEXC_OFF)
156853631b54SCatalin Marinas si_code = FPE_FLTOVF;
156953631b54SCatalin Marinas else if (esr & FPEXC_UFF)
157053631b54SCatalin Marinas si_code = FPE_FLTUND;
157153631b54SCatalin Marinas else if (esr & FPEXC_IXF)
157253631b54SCatalin Marinas si_code = FPE_FLTRES;
1573af4a81b9SDave Martin }
157453631b54SCatalin Marinas
1575c8526809SEric W. Biederman send_sig_fault(SIGFPE, si_code,
1576c8526809SEric W. Biederman (void __user *)instruction_pointer(regs),
1577c8526809SEric W. Biederman current);
157853631b54SCatalin Marinas }
157953631b54SCatalin Marinas
fpsimd_thread_switch(struct task_struct * next)158053631b54SCatalin Marinas void fpsimd_thread_switch(struct task_struct *next)
158153631b54SCatalin Marinas {
1582df3fb968SDave Martin bool wrong_task, wrong_cpu;
1583df3fb968SDave Martin
158482e0191aSSuzuki K Poulose if (!system_supports_fpsimd())
158582e0191aSSuzuki K Poulose return;
1586005f78cdSArd Biesheuvel
15876dcdefcdSJulien Grall __get_cpu_fpsimd_context();
15886dcdefcdSJulien Grall
1589df3fb968SDave Martin /* Save unsaved fpsimd state, if any: */
1590d1797615SDave Martin fpsimd_save();
1591005f78cdSArd Biesheuvel
1592005f78cdSArd Biesheuvel /*
1593df3fb968SDave Martin * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
1594df3fb968SDave Martin * state. For kernel threads, FPSIMD registers are never loaded
1595df3fb968SDave Martin * and wrong_task and wrong_cpu will always be true.
1596005f78cdSArd Biesheuvel */
1597df3fb968SDave Martin wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
159809d1223aSDave Martin &next->thread.uw.fpsimd_state;
1599df3fb968SDave Martin wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
160009d1223aSDave Martin
160109d1223aSDave Martin update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
160209d1223aSDave Martin wrong_task || wrong_cpu);
16036dcdefcdSJulien Grall
16046dcdefcdSJulien Grall __put_cpu_fpsimd_context();
160553631b54SCatalin Marinas }
160653631b54SCatalin Marinas
fpsimd_flush_thread_vl(enum vec_type type)16075838a155SMark Brown static void fpsimd_flush_thread_vl(enum vec_type type)
160853631b54SCatalin Marinas {
16097582e220SDave Martin int vl, supported_vl;
1610bc0ee476SDave Martin
16115838a155SMark Brown /*
16125838a155SMark Brown * Reset the task vector length as required. This is where we
16135838a155SMark Brown * ensure that all user tasks have a valid vector length
16145838a155SMark Brown * configured: no kernel task can become a user task without
16155838a155SMark Brown * an exec and hence a call to this function. By the time the
16165838a155SMark Brown * first call to this function is made, all early hardware
16175838a155SMark Brown * probing is complete, so __sve_default_vl should be valid.
16185838a155SMark Brown * If a bug causes this to go wrong, we make some noise and
16195838a155SMark Brown * try to fudge thread.sve_vl to a safe value here.
16205838a155SMark Brown */
16215838a155SMark Brown vl = task_get_vl_onexec(current, type);
16225838a155SMark Brown if (!vl)
16235838a155SMark Brown vl = get_default_vl(type);
16245838a155SMark Brown
16255838a155SMark Brown if (WARN_ON(!sve_vl_valid(vl)))
162697bcbee4SMark Brown vl = vl_info[type].min_vl;
16275838a155SMark Brown
16285838a155SMark Brown supported_vl = find_supported_vector_length(type, vl);
16295838a155SMark Brown if (WARN_ON(supported_vl != vl))
16305838a155SMark Brown vl = supported_vl;
16315838a155SMark Brown
16325838a155SMark Brown task_set_vl(current, type, vl);
16335838a155SMark Brown
16345838a155SMark Brown /*
16355838a155SMark Brown * If the task is not set to inherit, ensure that the vector
16365838a155SMark Brown * length will be reset by a subsequent exec:
16375838a155SMark Brown */
16385838a155SMark Brown if (!test_thread_flag(vec_vl_inherit_flag(type)))
16395838a155SMark Brown task_set_vl_onexec(current, type, 0);
16405838a155SMark Brown }
16415838a155SMark Brown
fpsimd_flush_thread(void)16425838a155SMark Brown void fpsimd_flush_thread(void)
16435838a155SMark Brown {
1644a1259dd8SSebastian Andrzej Siewior void *sve_state = NULL;
1645ce514000SMark Brown void *sme_state = NULL;
1646a1259dd8SSebastian Andrzej Siewior
164782e0191aSSuzuki K Poulose if (!system_supports_fpsimd())
164882e0191aSSuzuki K Poulose return;
1649cb84d11eSDave Martin
16506dcdefcdSJulien Grall get_cpu_fpsimd_context();
1651cb84d11eSDave Martin
1652efbc2024SDave Martin fpsimd_flush_task_state(current);
165365896545SDave Martin memset(¤t->thread.uw.fpsimd_state, 0,
165465896545SDave Martin sizeof(current->thread.uw.fpsimd_state));
1655bc0ee476SDave Martin
1656bc0ee476SDave Martin if (system_supports_sve()) {
1657bc0ee476SDave Martin clear_thread_flag(TIF_SVE);
1658a1259dd8SSebastian Andrzej Siewior
1659a1259dd8SSebastian Andrzej Siewior /* Defer kfree() while in atomic context */
1660a1259dd8SSebastian Andrzej Siewior sve_state = current->thread.sve_state;
1661a1259dd8SSebastian Andrzej Siewior current->thread.sve_state = NULL;
1662a1259dd8SSebastian Andrzej Siewior
16635838a155SMark Brown fpsimd_flush_thread_vl(ARM64_VEC_SVE);
1664bc0ee476SDave Martin }
1665bc0ee476SDave Martin
16668bd7f91cSMark Brown if (system_supports_sme()) {
16678bd7f91cSMark Brown clear_thread_flag(TIF_SME);
1668a1259dd8SSebastian Andrzej Siewior
1669a1259dd8SSebastian Andrzej Siewior /* Defer kfree() while in atomic context */
1670ce514000SMark Brown sme_state = current->thread.sme_state;
1671ce514000SMark Brown current->thread.sme_state = NULL;
1672a1259dd8SSebastian Andrzej Siewior
1673af7167d6SMark Brown fpsimd_flush_thread_vl(ARM64_VEC_SME);
16748bd7f91cSMark Brown current->thread.svcr = 0;
16758bd7f91cSMark Brown }
1676af7167d6SMark Brown
1677baa85152SMark Brown current->thread.fp_type = FP_STATE_FPSIMD;
1678baa85152SMark Brown
16796dcdefcdSJulien Grall put_cpu_fpsimd_context();
1680a1259dd8SSebastian Andrzej Siewior kfree(sve_state);
1681ce514000SMark Brown kfree(sme_state);
168253631b54SCatalin Marinas }
168353631b54SCatalin Marinas
1684c51f9269SArd Biesheuvel /*
1685005f78cdSArd Biesheuvel * Save the userland FPSIMD state of 'current' to memory, but only if the state
1686005f78cdSArd Biesheuvel * currently held in the registers does in fact belong to 'current'
1687c51f9269SArd Biesheuvel */
fpsimd_preserve_current_state(void)1688c51f9269SArd Biesheuvel void fpsimd_preserve_current_state(void)
1689c51f9269SArd Biesheuvel {
169082e0191aSSuzuki K Poulose if (!system_supports_fpsimd())
169182e0191aSSuzuki K Poulose return;
1692cb84d11eSDave Martin
16936dcdefcdSJulien Grall get_cpu_fpsimd_context();
1694d1797615SDave Martin fpsimd_save();
16956dcdefcdSJulien Grall put_cpu_fpsimd_context();
1696c51f9269SArd Biesheuvel }
1697c51f9269SArd Biesheuvel
1698c51f9269SArd Biesheuvel /*
16998cd969d2SDave Martin * Like fpsimd_preserve_current_state(), but ensure that
170065896545SDave Martin * current->thread.uw.fpsimd_state is updated so that it can be copied to
17018cd969d2SDave Martin * the signal frame.
17028cd969d2SDave Martin */
fpsimd_signal_preserve_current_state(void)17038cd969d2SDave Martin void fpsimd_signal_preserve_current_state(void)
17048cd969d2SDave Martin {
17058cd969d2SDave Martin fpsimd_preserve_current_state();
170660480c6bSMark Brown if (current->thread.fp_type == FP_STATE_SVE)
17078cd969d2SDave Martin sve_to_fpsimd(current);
17088cd969d2SDave Martin }
17098cd969d2SDave Martin
17108cd969d2SDave Martin /*
171193ae6b01SMark Brown * Called by KVM when entering the guest.
171293ae6b01SMark Brown */
fpsimd_kvm_prepare(void)171393ae6b01SMark Brown void fpsimd_kvm_prepare(void)
171493ae6b01SMark Brown {
171593ae6b01SMark Brown if (!system_supports_sve())
171693ae6b01SMark Brown return;
171793ae6b01SMark Brown
171893ae6b01SMark Brown /*
171993ae6b01SMark Brown * KVM does not save host SVE state since we can only enter
172093ae6b01SMark Brown * the guest from a syscall so the ABI means that only the
172193ae6b01SMark Brown * non-saved SVE state needs to be saved. If we have left
172293ae6b01SMark Brown * SVE enabled for performance reasons then update the task
172393ae6b01SMark Brown * state to be FPSIMD only.
172493ae6b01SMark Brown */
172593ae6b01SMark Brown get_cpu_fpsimd_context();
172693ae6b01SMark Brown
1727baa85152SMark Brown if (test_and_clear_thread_flag(TIF_SVE)) {
172893ae6b01SMark Brown sve_to_fpsimd(current);
1729baa85152SMark Brown current->thread.fp_type = FP_STATE_FPSIMD;
1730baa85152SMark Brown }
173193ae6b01SMark Brown
173293ae6b01SMark Brown put_cpu_fpsimd_context();
173393ae6b01SMark Brown }
173493ae6b01SMark Brown
173593ae6b01SMark Brown /*
17368884b7bdSDave Martin * Associate current's FPSIMD context with this cpu
17376dcdefcdSJulien Grall * The caller must have ownership of the cpu FPSIMD context before calling
17386dcdefcdSJulien Grall * this function.
17398884b7bdSDave Martin */
fpsimd_bind_task_to_cpu(void)1740b24b5205SMark Brown static void fpsimd_bind_task_to_cpu(void)
17418884b7bdSDave Martin {
17421192b93bSMark Brown struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
17438884b7bdSDave Martin
174452f73c38SSuzuki K Poulose WARN_ON(!system_supports_fpsimd());
174565896545SDave Martin last->st = ¤t->thread.uw.fpsimd_state;
174604950674SDave Martin last->sve_state = current->thread.sve_state;
1747ce514000SMark Brown last->sme_state = current->thread.sme_state;
17480423eedcSMark Brown last->sve_vl = task_get_sve_vl(current);
1749af7167d6SMark Brown last->sme_vl = task_get_sme_vl(current);
1750b40c559bSMark Brown last->svcr = ¤t->thread.svcr;
1751baa85152SMark Brown last->fp_type = ¤t->thread.fp_type;
1752deeb8f9aSMark Brown last->to_save = FP_STATE_CURRENT;
175320b85472SDave Martin current->thread.fpsimd_cpu = smp_processor_id();
17540cff8e77SDave Martin
17558bd7f91cSMark Brown /*
17568bd7f91cSMark Brown * Toggle SVE and SME trapping for userspace if needed, these
17578bd7f91cSMark Brown * are serialsied by ret_to_user().
17588bd7f91cSMark Brown */
17598bd7f91cSMark Brown if (system_supports_sme()) {
17608bd7f91cSMark Brown if (test_thread_flag(TIF_SME))
17618bd7f91cSMark Brown sme_user_enable();
17628bd7f91cSMark Brown else
17638bd7f91cSMark Brown sme_user_disable();
17648bd7f91cSMark Brown }
17658bd7f91cSMark Brown
17660cff8e77SDave Martin if (system_supports_sve()) {
17670cff8e77SDave Martin if (test_thread_flag(TIF_SVE))
17680cff8e77SDave Martin sve_user_enable();
17690cff8e77SDave Martin else
17700cff8e77SDave Martin sve_user_disable();
17710cff8e77SDave Martin }
17728884b7bdSDave Martin }
17738884b7bdSDave Martin
fpsimd_bind_state_to_cpu(struct cpu_fp_state * state)17741192b93bSMark Brown void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
1775e6b673b7SDave Martin {
17761192b93bSMark Brown struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
1777e6b673b7SDave Martin
177852f73c38SSuzuki K Poulose WARN_ON(!system_supports_fpsimd());
1779e6b673b7SDave Martin WARN_ON(!in_softirq() && !irqs_disabled());
1780e6b673b7SDave Martin
17811192b93bSMark Brown *last = *state;
17828884b7bdSDave Martin }
17838884b7bdSDave Martin
17848884b7bdSDave Martin /*
1785005f78cdSArd Biesheuvel * Load the userland FPSIMD state of 'current' from memory, but only if the
1786005f78cdSArd Biesheuvel * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
178712b792e5SMark Brown * state of 'current'. This is called when we are preparing to return to
178812b792e5SMark Brown * userspace to ensure that userspace sees a good register state.
1789005f78cdSArd Biesheuvel */
fpsimd_restore_current_state(void)1790005f78cdSArd Biesheuvel void fpsimd_restore_current_state(void)
1791005f78cdSArd Biesheuvel {
179252f73c38SSuzuki K Poulose /*
179352f73c38SSuzuki K Poulose * For the tasks that were created before we detected the absence of
179452f73c38SSuzuki K Poulose * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
179552f73c38SSuzuki K Poulose * e.g, init. This could be then inherited by the children processes.
179652f73c38SSuzuki K Poulose * If we later detect that the system doesn't support FP/SIMD,
179752f73c38SSuzuki K Poulose * we must clear the flag for all the tasks to indicate that the
179852f73c38SSuzuki K Poulose * FPSTATE is clean (as we can't have one) to avoid looping for ever in
179952f73c38SSuzuki K Poulose * do_notify_resume().
180052f73c38SSuzuki K Poulose */
180152f73c38SSuzuki K Poulose if (!system_supports_fpsimd()) {
180252f73c38SSuzuki K Poulose clear_thread_flag(TIF_FOREIGN_FPSTATE);
180382e0191aSSuzuki K Poulose return;
180452f73c38SSuzuki K Poulose }
1805cb84d11eSDave Martin
18066dcdefcdSJulien Grall get_cpu_fpsimd_context();
1807cb84d11eSDave Martin
1808005f78cdSArd Biesheuvel if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1809bc0ee476SDave Martin task_fpsimd_load();
18100cff8e77SDave Martin fpsimd_bind_task_to_cpu();
1811005f78cdSArd Biesheuvel }
1812cb84d11eSDave Martin
18136dcdefcdSJulien Grall put_cpu_fpsimd_context();
1814005f78cdSArd Biesheuvel }
1815005f78cdSArd Biesheuvel
1816005f78cdSArd Biesheuvel /*
1817005f78cdSArd Biesheuvel * Load an updated userland FPSIMD state for 'current' from memory and set the
1818005f78cdSArd Biesheuvel * flag that indicates that the FPSIMD register contents are the most recent
181912b792e5SMark Brown * FPSIMD state of 'current'. This is used by the signal code to restore the
182012b792e5SMark Brown * register state when returning from a signal handler in FPSIMD only cases,
182112b792e5SMark Brown * any SVE context will be discarded.
1822c51f9269SArd Biesheuvel */
fpsimd_update_current_state(struct user_fpsimd_state const * state)18230abdeff5SDave Martin void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1824c51f9269SArd Biesheuvel {
182552f73c38SSuzuki K Poulose if (WARN_ON(!system_supports_fpsimd()))
182682e0191aSSuzuki K Poulose return;
1827cb84d11eSDave Martin
18286dcdefcdSJulien Grall get_cpu_fpsimd_context();
1829cb84d11eSDave Martin
183065896545SDave Martin current->thread.uw.fpsimd_state = *state;
1831ef9c5d09SMark Brown if (test_thread_flag(TIF_SVE))
18328cd969d2SDave Martin fpsimd_to_sve(current);
18339de52a75SDave Martin
18348cd969d2SDave Martin task_fpsimd_load();
18350cff8e77SDave Martin fpsimd_bind_task_to_cpu();
18368cd969d2SDave Martin
18370cff8e77SDave Martin clear_thread_flag(TIF_FOREIGN_FPSTATE);
1838cb84d11eSDave Martin
18396dcdefcdSJulien Grall put_cpu_fpsimd_context();
1840c51f9269SArd Biesheuvel }
1841c51f9269SArd Biesheuvel
1842005f78cdSArd Biesheuvel /*
1843005f78cdSArd Biesheuvel * Invalidate live CPU copies of task t's FPSIMD state
1844efbc2024SDave Martin *
1845efbc2024SDave Martin * This function may be called with preemption enabled. The barrier()
1846efbc2024SDave Martin * ensures that the assignment to fpsimd_cpu is visible to any
1847efbc2024SDave Martin * preemption/softirq that could race with set_tsk_thread_flag(), so
1848efbc2024SDave Martin * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
1849efbc2024SDave Martin *
1850efbc2024SDave Martin * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
1851efbc2024SDave Martin * subsequent code.
1852005f78cdSArd Biesheuvel */
fpsimd_flush_task_state(struct task_struct * t)1853005f78cdSArd Biesheuvel void fpsimd_flush_task_state(struct task_struct *t)
1854005f78cdSArd Biesheuvel {
185520b85472SDave Martin t->thread.fpsimd_cpu = NR_CPUS;
185652f73c38SSuzuki K Poulose /*
185752f73c38SSuzuki K Poulose * If we don't support fpsimd, bail out after we have
185852f73c38SSuzuki K Poulose * reset the fpsimd_cpu for this task and clear the
185952f73c38SSuzuki K Poulose * FPSTATE.
186052f73c38SSuzuki K Poulose */
186152f73c38SSuzuki K Poulose if (!system_supports_fpsimd())
186252f73c38SSuzuki K Poulose return;
1863efbc2024SDave Martin barrier();
1864efbc2024SDave Martin set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
1865efbc2024SDave Martin
1866efbc2024SDave Martin barrier();
1867005f78cdSArd Biesheuvel }
1868005f78cdSArd Biesheuvel
1869efbc2024SDave Martin /*
1870efbc2024SDave Martin * Invalidate any task's FPSIMD state that is present on this cpu.
18716dcdefcdSJulien Grall * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
18726dcdefcdSJulien Grall * before calling this function.
1873efbc2024SDave Martin */
fpsimd_flush_cpu_state(void)187454b8c7cbSJulien Grall static void fpsimd_flush_cpu_state(void)
187517eed27bSDave Martin {
187652f73c38SSuzuki K Poulose WARN_ON(!system_supports_fpsimd());
1877cb968afcSDave Martin __this_cpu_write(fpsimd_last_state.st, NULL);
1878d45d7ff7SMark Brown
1879d45d7ff7SMark Brown /*
1880d45d7ff7SMark Brown * Leaving streaming mode enabled will cause issues for any kernel
1881d45d7ff7SMark Brown * NEON and leaving streaming mode or ZA enabled may increase power
1882d45d7ff7SMark Brown * consumption.
1883d45d7ff7SMark Brown */
1884d45d7ff7SMark Brown if (system_supports_sme())
1885d45d7ff7SMark Brown sme_smstop();
1886d45d7ff7SMark Brown
1887d8ad71faSDave Martin set_thread_flag(TIF_FOREIGN_FPSTATE);
188817eed27bSDave Martin }
188917eed27bSDave Martin
189054b8c7cbSJulien Grall /*
189154b8c7cbSJulien Grall * Save the FPSIMD state to memory and invalidate cpu view.
18926dcdefcdSJulien Grall * This function must be called with preemption disabled.
189354b8c7cbSJulien Grall */
fpsimd_save_and_flush_cpu_state(void)189454b8c7cbSJulien Grall void fpsimd_save_and_flush_cpu_state(void)
189554b8c7cbSJulien Grall {
189652f73c38SSuzuki K Poulose if (!system_supports_fpsimd())
189752f73c38SSuzuki K Poulose return;
18986dcdefcdSJulien Grall WARN_ON(preemptible());
18996dcdefcdSJulien Grall __get_cpu_fpsimd_context();
190054b8c7cbSJulien Grall fpsimd_save();
190154b8c7cbSJulien Grall fpsimd_flush_cpu_state();
19026dcdefcdSJulien Grall __put_cpu_fpsimd_context();
190354b8c7cbSJulien Grall }
19044cfb3613SArd Biesheuvel
19054cfb3613SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
1906190f1ca8SArd Biesheuvel
19074cfb3613SArd Biesheuvel /*
19084cfb3613SArd Biesheuvel * Kernel-side NEON support functions
19094cfb3613SArd Biesheuvel */
1910cb84d11eSDave Martin
1911cb84d11eSDave Martin /*
1912cb84d11eSDave Martin * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
1913cb84d11eSDave Martin * context
1914cb84d11eSDave Martin *
1915cb84d11eSDave Martin * Must not be called unless may_use_simd() returns true.
1916cb84d11eSDave Martin * Task context in the FPSIMD registers is saved back to memory as necessary.
1917cb84d11eSDave Martin *
1918cb84d11eSDave Martin * A matching call to kernel_neon_end() must be made before returning from the
1919cb84d11eSDave Martin * calling context.
1920cb84d11eSDave Martin *
1921cb84d11eSDave Martin * The caller may freely use the FPSIMD registers until kernel_neon_end() is
1922cb84d11eSDave Martin * called.
1923cb84d11eSDave Martin */
kernel_neon_begin(void)1924cb84d11eSDave Martin void kernel_neon_begin(void)
19254cfb3613SArd Biesheuvel {
192682e0191aSSuzuki K Poulose if (WARN_ON(!system_supports_fpsimd()))
192782e0191aSSuzuki K Poulose return;
19284cfb3613SArd Biesheuvel
1929cb84d11eSDave Martin BUG_ON(!may_use_simd());
1930cb84d11eSDave Martin
19316dcdefcdSJulien Grall get_cpu_fpsimd_context();
1932cb84d11eSDave Martin
1933df3fb968SDave Martin /* Save unsaved fpsimd state, if any: */
1934d1797615SDave Martin fpsimd_save();
19354cfb3613SArd Biesheuvel
1936cb84d11eSDave Martin /* Invalidate any task state remaining in the fpsimd regs: */
193717eed27bSDave Martin fpsimd_flush_cpu_state();
1938cb84d11eSDave Martin }
1939aaeca984SMark Brown EXPORT_SYMBOL_GPL(kernel_neon_begin);
1940cb84d11eSDave Martin
1941cb84d11eSDave Martin /*
1942cb84d11eSDave Martin * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
1943cb84d11eSDave Martin *
1944cb84d11eSDave Martin * Must be called from a context in which kernel_neon_begin() was previously
1945cb84d11eSDave Martin * called, with no call to kernel_neon_end() in the meantime.
1946cb84d11eSDave Martin *
1947cb84d11eSDave Martin * The caller must not use the FPSIMD registers after this function is called,
1948cb84d11eSDave Martin * unless kernel_neon_begin() is called again in the meantime.
1949cb84d11eSDave Martin */
kernel_neon_end(void)19504cfb3613SArd Biesheuvel void kernel_neon_end(void)
19514cfb3613SArd Biesheuvel {
195282e0191aSSuzuki K Poulose if (!system_supports_fpsimd())
195382e0191aSSuzuki K Poulose return;
1954cb84d11eSDave Martin
19556dcdefcdSJulien Grall put_cpu_fpsimd_context();
19564cfb3613SArd Biesheuvel }
1957aaeca984SMark Brown EXPORT_SYMBOL_GPL(kernel_neon_end);
19584cfb3613SArd Biesheuvel
1959e580b8bcSDave Martin #ifdef CONFIG_EFI
1960e580b8bcSDave Martin
196120b85472SDave Martin static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
19623b66023dSDave Martin static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1963fdfa976cSDave Martin static DEFINE_PER_CPU(bool, efi_sve_state_used);
1964e0838f63SMark Brown static DEFINE_PER_CPU(bool, efi_sm_state);
19654328825dSDave Martin
19664328825dSDave Martin /*
19674328825dSDave Martin * EFI runtime services support functions
19684328825dSDave Martin *
19694328825dSDave Martin * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
19704328825dSDave Martin * This means that for EFI (and only for EFI), we have to assume that FPSIMD
19714328825dSDave Martin * is always used rather than being an optional accelerator.
19724328825dSDave Martin *
19734328825dSDave Martin * These functions provide the necessary support for ensuring FPSIMD
19744328825dSDave Martin * save/restore in the contexts from which EFI is used.
19754328825dSDave Martin *
19764328825dSDave Martin * Do not use them for any other purpose -- if tempted to do so, you are
19774328825dSDave Martin * either doing something wrong or you need to propose some refactoring.
19784328825dSDave Martin */
19794328825dSDave Martin
19804328825dSDave Martin /*
19814328825dSDave Martin * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
19824328825dSDave Martin */
__efi_fpsimd_begin(void)19834328825dSDave Martin void __efi_fpsimd_begin(void)
19844328825dSDave Martin {
19854328825dSDave Martin if (!system_supports_fpsimd())
19864328825dSDave Martin return;
19874328825dSDave Martin
19884328825dSDave Martin WARN_ON(preemptible());
19894328825dSDave Martin
1990fdfa976cSDave Martin if (may_use_simd()) {
19914328825dSDave Martin kernel_neon_begin();
1992fdfa976cSDave Martin } else {
1993fdfa976cSDave Martin /*
1994fdfa976cSDave Martin * If !efi_sve_state, SVE can't be in use yet and doesn't need
1995fdfa976cSDave Martin * preserving:
1996fdfa976cSDave Martin */
1997fdfa976cSDave Martin if (system_supports_sve() && likely(efi_sve_state)) {
1998fdfa976cSDave Martin char *sve_state = this_cpu_ptr(efi_sve_state);
1999e0838f63SMark Brown bool ffr = true;
2000e0838f63SMark Brown u64 svcr;
2001fdfa976cSDave Martin
2002fdfa976cSDave Martin __this_cpu_write(efi_sve_state_used, true);
2003fdfa976cSDave Martin
2004e0838f63SMark Brown if (system_supports_sme()) {
2005ec0067a6SMark Brown svcr = read_sysreg_s(SYS_SVCR);
2006e0838f63SMark Brown
20072e990e63SMark Brown __this_cpu_write(efi_sm_state,
20082e990e63SMark Brown svcr & SVCR_SM_MASK);
2009e0838f63SMark Brown
20102e990e63SMark Brown /*
20112e990e63SMark Brown * Unless we have FA64 FFR does not
20122e990e63SMark Brown * exist in streaming mode.
20132e990e63SMark Brown */
20142e990e63SMark Brown if (!system_supports_fa64())
20152e990e63SMark Brown ffr = !(svcr & SVCR_SM_MASK);
2016e0838f63SMark Brown }
2017e0838f63SMark Brown
2018b5bc00ffSMark Brown sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
20199f584866SMark Brown &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
2020e0838f63SMark Brown ffr);
2021e0838f63SMark Brown
2022e0838f63SMark Brown if (system_supports_sme())
2023ec0067a6SMark Brown sysreg_clear_set_s(SYS_SVCR,
2024ec0067a6SMark Brown SVCR_SM_MASK, 0);
2025e0838f63SMark Brown
2026fdfa976cSDave Martin } else {
20274328825dSDave Martin fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
2028fdfa976cSDave Martin }
2029fdfa976cSDave Martin
20304328825dSDave Martin __this_cpu_write(efi_fpsimd_state_used, true);
20314328825dSDave Martin }
20324328825dSDave Martin }
20334328825dSDave Martin
20344328825dSDave Martin /*
20354328825dSDave Martin * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
20364328825dSDave Martin */
__efi_fpsimd_end(void)20374328825dSDave Martin void __efi_fpsimd_end(void)
20384328825dSDave Martin {
20394328825dSDave Martin if (!system_supports_fpsimd())
20404328825dSDave Martin return;
20414328825dSDave Martin
2042fdfa976cSDave Martin if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
20434328825dSDave Martin kernel_neon_end();
2044fdfa976cSDave Martin } else {
2045fdfa976cSDave Martin if (system_supports_sve() &&
2046fdfa976cSDave Martin likely(__this_cpu_read(efi_sve_state_used))) {
2047fdfa976cSDave Martin char const *sve_state = this_cpu_ptr(efi_sve_state);
2048e0838f63SMark Brown bool ffr = true;
2049fdfa976cSDave Martin
2050e0838f63SMark Brown /*
2051e0838f63SMark Brown * Restore streaming mode; EFI calls are
2052e0838f63SMark Brown * normal function calls so should not return in
2053e0838f63SMark Brown * streaming mode.
2054e0838f63SMark Brown */
2055e0838f63SMark Brown if (system_supports_sme()) {
2056e0838f63SMark Brown if (__this_cpu_read(efi_sm_state)) {
2057ec0067a6SMark Brown sysreg_clear_set_s(SYS_SVCR,
2058e0838f63SMark Brown 0,
2059ec0067a6SMark Brown SVCR_SM_MASK);
20602e990e63SMark Brown
20612e990e63SMark Brown /*
20622e990e63SMark Brown * Unless we have FA64 FFR does not
20632e990e63SMark Brown * exist in streaming mode.
20642e990e63SMark Brown */
2065e0838f63SMark Brown if (!system_supports_fa64())
20662e990e63SMark Brown ffr = false;
2067e0838f63SMark Brown }
2068e0838f63SMark Brown }
2069e0838f63SMark Brown
2070b5bc00ffSMark Brown sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
2071fdfa976cSDave Martin &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
2072e0838f63SMark Brown ffr);
2073fdfa976cSDave Martin
2074fdfa976cSDave Martin __this_cpu_write(efi_sve_state_used, false);
2075fdfa976cSDave Martin } else {
2076fdfa976cSDave Martin fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
2077fdfa976cSDave Martin }
2078fdfa976cSDave Martin }
20794328825dSDave Martin }
20804328825dSDave Martin
2081e580b8bcSDave Martin #endif /* CONFIG_EFI */
2082e580b8bcSDave Martin
20834cfb3613SArd Biesheuvel #endif /* CONFIG_KERNEL_MODE_NEON */
20844cfb3613SArd Biesheuvel
2085fb1ab1abSLorenzo Pieralisi #ifdef CONFIG_CPU_PM
fpsimd_cpu_pm_notifier(struct notifier_block * self,unsigned long cmd,void * v)2086fb1ab1abSLorenzo Pieralisi static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
2087fb1ab1abSLorenzo Pieralisi unsigned long cmd, void *v)
2088fb1ab1abSLorenzo Pieralisi {
2089fb1ab1abSLorenzo Pieralisi switch (cmd) {
2090fb1ab1abSLorenzo Pieralisi case CPU_PM_ENTER:
209154b8c7cbSJulien Grall fpsimd_save_and_flush_cpu_state();
2092fb1ab1abSLorenzo Pieralisi break;
2093fb1ab1abSLorenzo Pieralisi case CPU_PM_EXIT:
2094fb1ab1abSLorenzo Pieralisi break;
2095fb1ab1abSLorenzo Pieralisi case CPU_PM_ENTER_FAILED:
2096fb1ab1abSLorenzo Pieralisi default:
2097fb1ab1abSLorenzo Pieralisi return NOTIFY_DONE;
2098fb1ab1abSLorenzo Pieralisi }
2099fb1ab1abSLorenzo Pieralisi return NOTIFY_OK;
2100fb1ab1abSLorenzo Pieralisi }
2101fb1ab1abSLorenzo Pieralisi
2102fb1ab1abSLorenzo Pieralisi static struct notifier_block fpsimd_cpu_pm_notifier_block = {
2103fb1ab1abSLorenzo Pieralisi .notifier_call = fpsimd_cpu_pm_notifier,
2104fb1ab1abSLorenzo Pieralisi };
2105fb1ab1abSLorenzo Pieralisi
fpsimd_pm_init(void)2106a7c61a34SJisheng Zhang static void __init fpsimd_pm_init(void)
2107fb1ab1abSLorenzo Pieralisi {
2108fb1ab1abSLorenzo Pieralisi cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
2109fb1ab1abSLorenzo Pieralisi }
2110fb1ab1abSLorenzo Pieralisi
2111fb1ab1abSLorenzo Pieralisi #else
fpsimd_pm_init(void)2112fb1ab1abSLorenzo Pieralisi static inline void fpsimd_pm_init(void) { }
2113fb1ab1abSLorenzo Pieralisi #endif /* CONFIG_CPU_PM */
2114fb1ab1abSLorenzo Pieralisi
211532365e64SJanet Liu #ifdef CONFIG_HOTPLUG_CPU
fpsimd_cpu_dead(unsigned int cpu)2116c23a7266SSebastian Andrzej Siewior static int fpsimd_cpu_dead(unsigned int cpu)
211732365e64SJanet Liu {
2118cb968afcSDave Martin per_cpu(fpsimd_last_state.st, cpu) = NULL;
2119c23a7266SSebastian Andrzej Siewior return 0;
212032365e64SJanet Liu }
212132365e64SJanet Liu
fpsimd_hotplug_init(void)212232365e64SJanet Liu static inline void fpsimd_hotplug_init(void)
212332365e64SJanet Liu {
2124c23a7266SSebastian Andrzej Siewior cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
2125c23a7266SSebastian Andrzej Siewior NULL, fpsimd_cpu_dead);
212632365e64SJanet Liu }
212732365e64SJanet Liu
212832365e64SJanet Liu #else
fpsimd_hotplug_init(void)212932365e64SJanet Liu static inline void fpsimd_hotplug_init(void) { }
213032365e64SJanet Liu #endif
213132365e64SJanet Liu
213253631b54SCatalin Marinas /*
213353631b54SCatalin Marinas * FP/SIMD support code initialisation.
213453631b54SCatalin Marinas */
fpsimd_init(void)213553631b54SCatalin Marinas static int __init fpsimd_init(void)
213653631b54SCatalin Marinas {
2137aaba098fSAndrew Murray if (cpu_have_named_feature(FP)) {
2138fb1ab1abSLorenzo Pieralisi fpsimd_pm_init();
213932365e64SJanet Liu fpsimd_hotplug_init();
2140fe80f9f2SSuzuki K. Poulose } else {
2141fe80f9f2SSuzuki K. Poulose pr_notice("Floating-point is not implemented\n");
2142fe80f9f2SSuzuki K. Poulose }
2143fe80f9f2SSuzuki K. Poulose
2144aaba098fSAndrew Murray if (!cpu_have_named_feature(ASIMD))
2145fe80f9f2SSuzuki K. Poulose pr_notice("Advanced SIMD is not implemented\n");
2146fb1ab1abSLorenzo Pieralisi
21475e64b862SMark Brown
214812f1bacfSMark Brown sve_sysctl_init();
214912f1bacfSMark Brown sme_sysctl_init();
215012f1bacfSMark Brown
215112f1bacfSMark Brown return 0;
215253631b54SCatalin Marinas }
2153ae2e972dSSuzuki K Poulose core_initcall(fpsimd_init);
2154