fpsimd.c (e0838f6373e5cb72516fc4c26bba309097e2a80a) fpsimd.c (8d56e5c5a99ce1d17d39ce5a8260e42c2a2d7682)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * FP/SIMD context switching and fault handling
4 *
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 */
8

--- 107 unchanged lines hidden (view full) ---

116 *
117 * - the task gets preempted after kernel_neon_end() is called; as we have not
118 * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
119 * whatever is in the FPSIMD registers is not saved to memory, but discarded.
120 */
121struct fpsimd_last_state_struct {
122 struct user_fpsimd_state *st;
123 void *sve_state;
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * FP/SIMD context switching and fault handling
4 *
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 */
8

--- 107 unchanged lines hidden (view full) ---

116 *
117 * - the task gets preempted after kernel_neon_end() is called; as we have not
118 * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
119 * whatever is in the FPSIMD registers is not saved to memory, but discarded.
120 */
121struct fpsimd_last_state_struct {
122 struct user_fpsimd_state *st;
123 void *sve_state;
124 void *za_state;
125 u64 *svcr;
126 unsigned int sve_vl;
124 unsigned int sve_vl;
127 unsigned int sme_vl;
128};
129
130static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
131
132__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
133#ifdef CONFIG_ARM64_SVE
134 [ARM64_VEC_SVE] = {
135 .type = ARM64_VEC_SVE,
136 .name = "SVE",
137 .min_vl = SVE_VL_MIN,
138 .max_vl = SVE_VL_MIN,
139 .max_virtualisable_vl = SVE_VL_MIN,
140 },
141#endif
125};
126
127static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
128
129__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
130#ifdef CONFIG_ARM64_SVE
131 [ARM64_VEC_SVE] = {
132 .type = ARM64_VEC_SVE,
133 .name = "SVE",
134 .min_vl = SVE_VL_MIN,
135 .max_vl = SVE_VL_MIN,
136 .max_virtualisable_vl = SVE_VL_MIN,
137 },
138#endif
142#ifdef CONFIG_ARM64_SME
143 [ARM64_VEC_SME] = {
144 .type = ARM64_VEC_SME,
145 .name = "SME",
146 },
147#endif
148};
149
150static unsigned int vec_vl_inherit_flag(enum vec_type type)
151{
152 switch (type) {
153 case ARM64_VEC_SVE:
154 return TIF_SVE_VL_INHERIT;
139};
140
141static unsigned int vec_vl_inherit_flag(enum vec_type type)
142{
143 switch (type) {
144 case ARM64_VEC_SVE:
145 return TIF_SVE_VL_INHERIT;
155 case ARM64_VEC_SME:
156 return TIF_SME_VL_INHERIT;
157 default:
158 WARN_ON_ONCE(1);
159 return 0;
160 }
161}
162
163struct vl_config {
164 int __default_vl; /* Default VL for tasks */

--- 27 unchanged lines hidden (view full) ---

192
193#else /* ! CONFIG_ARM64_SVE */
194
195/* Dummy declaration for code that will be optimised out: */
196extern void __percpu *efi_sve_state;
197
198#endif /* ! CONFIG_ARM64_SVE */
199
146 default:
147 WARN_ON_ONCE(1);
148 return 0;
149 }
150}
151
152struct vl_config {
153 int __default_vl; /* Default VL for tasks */

--- 27 unchanged lines hidden (view full) ---

181
182#else /* ! CONFIG_ARM64_SVE */
183
184/* Dummy declaration for code that will be optimised out: */
185extern void __percpu *efi_sve_state;
186
187#endif /* ! CONFIG_ARM64_SVE */
188
200#ifdef CONFIG_ARM64_SME
201
202static int get_sme_default_vl(void)
203{
204 return get_default_vl(ARM64_VEC_SME);
205}
206
207static void set_sme_default_vl(int val)
208{
209 set_default_vl(ARM64_VEC_SME, val);
210}
211
212static void sme_free(struct task_struct *);
213
214#else
215
216static inline void sme_free(struct task_struct *t) { }
217
218#endif
219
220DEFINE_PER_CPU(bool, fpsimd_context_busy);
221EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
222
223static void fpsimd_bind_task_to_cpu(void);
224
225static void __get_cpu_fpsimd_context(void)
226{
227 bool busy = __this_cpu_xchg(fpsimd_context_busy, true);

--- 77 unchanged lines hidden (view full) ---

305
306void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
307 unsigned long vl)
308{
309 task->thread.vl_onexec[type] = vl;
310}
311
312/*
189DEFINE_PER_CPU(bool, fpsimd_context_busy);
190EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
191
192static void fpsimd_bind_task_to_cpu(void);
193
194static void __get_cpu_fpsimd_context(void)
195{
196 bool busy = __this_cpu_xchg(fpsimd_context_busy, true);

--- 77 unchanged lines hidden (view full) ---

274
275void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
276 unsigned long vl)
277{
278 task->thread.vl_onexec[type] = vl;
279}
280
281/*
313 * TIF_SME controls whether a task can use SME without trapping while
314 * in userspace, when TIF_SME is set then we must have storage
315 * alocated in sve_state and za_state to store the contents of both ZA
316 * and the SVE registers for both streaming and non-streaming modes.
317 *
318 * If both SVCR.ZA and SVCR.SM are disabled then at any point we
319 * may disable TIF_SME and reenable traps.
320 */
321
322
323/*
324 * TIF_SVE controls whether a task can use SVE without trapping while
282 * TIF_SVE controls whether a task can use SVE without trapping while
325 * in userspace, and also (together with TIF_SME) the way a task's
326 * FPSIMD/SVE state is stored in thread_struct.
283 * in userspace, and also the way a task's FPSIMD/SVE state is stored
284 * in thread_struct.
327 *
328 * The kernel uses this flag to track whether a user task is actively
329 * using SVE, and therefore whether full SVE register state needs to
330 * be tracked. If not, the cheaper FPSIMD context handling code can
331 * be used instead of the more costly SVE equivalents.
332 *
285 *
286 * The kernel uses this flag to track whether a user task is actively
287 * using SVE, and therefore whether full SVE register state needs to
288 * be tracked. If not, the cheaper FPSIMD context handling code can
289 * be used instead of the more costly SVE equivalents.
290 *
333 * * TIF_SVE or SVCR.SM set:
291 * * TIF_SVE set:
334 *
335 * The task can execute SVE instructions while in userspace without
336 * trapping to the kernel.
337 *
338 * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
339 * corresponding Zn), P0-P15 and FFR are encoded in in
340 * task->thread.sve_state, formatted appropriately for vector
292 *
293 * The task can execute SVE instructions while in userspace without
294 * trapping to the kernel.
295 *
296 * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
297 * corresponding Zn), P0-P15 and FFR are encoded in in
298 * task->thread.sve_state, formatted appropriately for vector
341 * length task->thread.sve_vl or, if SVCR.SM is set,
342 * task->thread.sme_vl.
299 * length task->thread.sve_vl.
343 *
344 * task->thread.sve_state must point to a valid buffer at least
345 * sve_state_size(task) bytes in size.
346 *
347 * During any syscall, the kernel may optionally clear TIF_SVE and
348 * discard the vector state except for the FPSIMD subset.
349 *
350 * * TIF_SVE clear:

--- 21 unchanged lines hidden (view full) ---

372 * Update current's FPSIMD/SVE registers from thread_struct.
373 *
374 * This function should be called only when the FPSIMD/SVE state in
375 * thread_struct is known to be up to date, when preparing to enter
376 * userspace.
377 */
378static void task_fpsimd_load(void)
379{
300 *
301 * task->thread.sve_state must point to a valid buffer at least
302 * sve_state_size(task) bytes in size.
303 *
304 * During any syscall, the kernel may optionally clear TIF_SVE and
305 * discard the vector state except for the FPSIMD subset.
306 *
307 * * TIF_SVE clear:

--- 21 unchanged lines hidden (view full) ---

329 * Update current's FPSIMD/SVE registers from thread_struct.
330 *
331 * This function should be called only when the FPSIMD/SVE state in
332 * thread_struct is known to be up to date, when preparing to enter
333 * userspace.
334 */
335static void task_fpsimd_load(void)
336{
380 bool restore_sve_regs = false;
381 bool restore_ffr;
382
383 WARN_ON(!system_supports_fpsimd());
384 WARN_ON(!have_cpu_fpsimd_context());
385
337 WARN_ON(!system_supports_fpsimd());
338 WARN_ON(!have_cpu_fpsimd_context());
339
386 /* Check if we should restore SVE first */
387 if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
388 sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
340 if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
341 sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
389 restore_sve_regs = true;
390 restore_ffr = true;
391 }
392
393 /* Restore SME, override SVE register configuration if needed */
394 if (system_supports_sme()) {
395 unsigned long sme_vl = task_get_sme_vl(current);
396
397 /* Ensure VL is set up for restoring data */
398 if (test_thread_flag(TIF_SME))
399 sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
400
401 write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
402
403 if (thread_za_enabled(&current->thread))
404 za_load_state(current->thread.za_state);
405
406 if (thread_sm_enabled(&current->thread)) {
407 restore_sve_regs = true;
408 restore_ffr = system_supports_fa64();
409 }
410 }
411
412 if (restore_sve_regs)
413 sve_load_state(sve_pffr(&current->thread),
342 sve_load_state(sve_pffr(&current->thread),
414 &current->thread.uw.fpsimd_state.fpsr,
415 restore_ffr);
416 else
343 &current->thread.uw.fpsimd_state.fpsr, true);
344 } else {
417 fpsimd_load_state(&current->thread.uw.fpsimd_state);
345 fpsimd_load_state(&current->thread.uw.fpsimd_state);
346 }
418}
419
420/*
421 * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
422 * date with respect to the CPU registers. Note carefully that the
423 * current context is the context last bound to the CPU stored in
424 * last, if KVM is involved this may be the guest VM context rather
425 * than the host thread for the VM pointed to by current. This means
426 * that we must always reference the state storage via last rather
427 * than via current, other than the TIF_ flags which KVM will
428 * carefully maintain for us.
429 */
430static void fpsimd_save(void)
431{
432 struct fpsimd_last_state_struct const *last =
433 this_cpu_ptr(&fpsimd_last_state);
434 /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
347}
348
349/*
350 * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
351 * date with respect to the CPU registers. Note carefully that the
352 * current context is the context last bound to the CPU stored in
353 * last, if KVM is involved this may be the guest VM context rather
354 * than the host thread for the VM pointed to by current. This means
355 * that we must always reference the state storage via last rather
356 * than via current, other than the TIF_ flags which KVM will
357 * carefully maintain for us.
358 */
359static void fpsimd_save(void)
360{
361 struct fpsimd_last_state_struct const *last =
362 this_cpu_ptr(&fpsimd_last_state);
363 /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
435 bool save_sve_regs = false;
436 bool save_ffr;
437 unsigned int vl;
438
439 WARN_ON(!system_supports_fpsimd());
440 WARN_ON(!have_cpu_fpsimd_context());
441
442 if (test_thread_flag(TIF_FOREIGN_FPSTATE))
443 return;
444
364
365 WARN_ON(!system_supports_fpsimd());
366 WARN_ON(!have_cpu_fpsimd_context());
367
368 if (test_thread_flag(TIF_FOREIGN_FPSTATE))
369 return;
370
445 if (test_thread_flag(TIF_SVE)) {
446 save_sve_regs = true;
447 save_ffr = true;
448 vl = last->sve_vl;
449 }
450
451 if (system_supports_sme()) {
452 u64 *svcr = last->svcr;
453 *svcr = read_sysreg_s(SYS_SVCR_EL0);
454
455 *svcr = read_sysreg_s(SYS_SVCR_EL0);
456
457 if (*svcr & SYS_SVCR_EL0_ZA_MASK)
458 za_save_state(last->za_state);
459
460 /* If we are in streaming mode override regular SVE. */
461 if (*svcr & SYS_SVCR_EL0_SM_MASK) {
462 save_sve_regs = true;
463 save_ffr = system_supports_fa64();
464 vl = last->sme_vl;
465 }
466 }
467
468 if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
469 /* Get the configured VL from RDVL, will account for SM */
470 if (WARN_ON(sve_get_vl() != vl)) {
371 if (IS_ENABLED(CONFIG_ARM64_SVE) &&
372 test_thread_flag(TIF_SVE)) {
373 if (WARN_ON(sve_get_vl() != last->sve_vl)) {
471 /*
472 * Can't save the user regs, so current would
473 * re-enter user with corrupt state.
474 * There's no way to recover, so kill it:
475 */
476 force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
477 return;
478 }
479
480 sve_save_state((char *)last->sve_state +
374 /*
375 * Can't save the user regs, so current would
376 * re-enter user with corrupt state.
377 * There's no way to recover, so kill it:
378 */
379 force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
380 return;
381 }
382
383 sve_save_state((char *)last->sve_state +
481 sve_ffr_offset(vl),
482 &last->st->fpsr, save_ffr);
384 sve_ffr_offset(last->sve_vl),
385 &last->st->fpsr, true);
483 } else {
484 fpsimd_save_state(last->st);
485 }
486}
487
488/*
489 * All vector length selection from userspace comes through here.
490 * We're on a slow path, so some sanity-checks are included.

--- 10 unchanged lines hidden (view full) ---

501 if (WARN_ON(!sve_vl_valid(vl)))
502 vl = info->min_vl;
503
504 if (WARN_ON(!sve_vl_valid(max_vl)))
505 max_vl = info->min_vl;
506
507 if (vl > max_vl)
508 vl = max_vl;
386 } else {
387 fpsimd_save_state(last->st);
388 }
389}
390
391/*
392 * All vector length selection from userspace comes through here.
393 * We're on a slow path, so some sanity-checks are included.

--- 10 unchanged lines hidden (view full) ---

404 if (WARN_ON(!sve_vl_valid(vl)))
405 vl = info->min_vl;
406
407 if (WARN_ON(!sve_vl_valid(max_vl)))
408 max_vl = info->min_vl;
409
410 if (vl > max_vl)
411 vl = max_vl;
509 if (vl < info->min_vl)
510 vl = info->min_vl;
511
512 bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
513 __vq_to_bit(sve_vq_from_vl(vl)));
514 return sve_vl_from_vq(__bit_to_vq(bit));
515}
516
517#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
518

--- 42 unchanged lines hidden (view full) ---

561
562 return 0;
563}
564
565#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
566static int __init sve_sysctl_init(void) { return 0; }
567#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
568
412
413 bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
414 __vq_to_bit(sve_vq_from_vl(vl)));
415 return sve_vl_from_vq(__bit_to_vq(bit));
416}
417
418#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
419

--- 42 unchanged lines hidden (view full) ---

462
463 return 0;
464}
465
466#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
467static int __init sve_sysctl_init(void) { return 0; }
468#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
469
569#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
570static struct ctl_table sme_default_vl_table[] = {
571 {
572 .procname = "sme_default_vector_length",
573 .mode = 0644,
574 .proc_handler = vec_proc_do_default_vl,
575 .extra1 = &vl_info[ARM64_VEC_SME],
576 },
577 { }
578};
579
580static int __init sme_sysctl_init(void)
581{
582 if (system_supports_sme())
583 if (!register_sysctl("abi", sme_default_vl_table))
584 return -EINVAL;
585
586 return 0;
587}
588
589#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
590static int __init sme_sysctl_init(void) { return 0; }
591#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
592
593#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
594 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
595
596#ifdef CONFIG_CPU_BIG_ENDIAN
597static __uint128_t arm64_cpu_to_le128(__uint128_t x)
598{
599 u64 a = swab64(x);
600 u64 b = swab64(x >> 64);

--- 37 unchanged lines hidden (view full) ---

638{
639 unsigned int vq;
640 void *sst = task->thread.sve_state;
641 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
642
643 if (!system_supports_sve())
644 return;
645
470#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
471 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
472
473#ifdef CONFIG_CPU_BIG_ENDIAN
474static __uint128_t arm64_cpu_to_le128(__uint128_t x)
475{
476 u64 a = swab64(x);
477 u64 b = swab64(x >> 64);

--- 37 unchanged lines hidden (view full) ---

515{
516 unsigned int vq;
517 void *sst = task->thread.sve_state;
518 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
519
520 if (!system_supports_sve())
521 return;
522
646 vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
523 vq = sve_vq_from_vl(task_get_sve_vl(task));
647 __fpsimd_to_sve(sst, fst, vq);
648}
649
650/*
651 * Transfer the SVE state in task->thread.sve_state to
652 * task->thread.uw.fpsimd_state.
653 *
654 * Task can be a non-runnable task, or current. In the latter case,
655 * the caller must have ownership of the cpu FPSIMD context before calling
656 * this function.
657 * task->thread.sve_state must point to at least sve_state_size(task)
658 * bytes of allocated kernel memory.
659 * task->thread.sve_state must be up to date before calling this function.
660 */
661static void sve_to_fpsimd(struct task_struct *task)
662{
524 __fpsimd_to_sve(sst, fst, vq);
525}
526
527/*
528 * Transfer the SVE state in task->thread.sve_state to
529 * task->thread.uw.fpsimd_state.
530 *
531 * Task can be a non-runnable task, or current. In the latter case,
532 * the caller must have ownership of the cpu FPSIMD context before calling
533 * this function.
534 * task->thread.sve_state must point to at least sve_state_size(task)
535 * bytes of allocated kernel memory.
536 * task->thread.sve_state must be up to date before calling this function.
537 */
538static void sve_to_fpsimd(struct task_struct *task)
539{
663 unsigned int vq, vl;
540 unsigned int vq;
664 void const *sst = task->thread.sve_state;
665 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
666 unsigned int i;
667 __uint128_t const *p;
668
669 if (!system_supports_sve())
670 return;
671
541 void const *sst = task->thread.sve_state;
542 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
543 unsigned int i;
544 __uint128_t const *p;
545
546 if (!system_supports_sve())
547 return;
548
672 vl = thread_get_cur_vl(&task->thread);
673 vq = sve_vq_from_vl(vl);
549 vq = sve_vq_from_vl(task_get_sve_vl(task));
674 for (i = 0; i < SVE_NUM_ZREGS; ++i) {
675 p = (__uint128_t const *)ZREG(sst, vq, i);
676 fst->vregs[i] = arm64_le128_to_cpu(*p);
677 }
678}
679
680#ifdef CONFIG_ARM64_SVE
681
682/*
683 * Return how many bytes of memory are required to store the full SVE
684 * state for task, given task's currently configured vector length.
685 */
550 for (i = 0; i < SVE_NUM_ZREGS; ++i) {
551 p = (__uint128_t const *)ZREG(sst, vq, i);
552 fst->vregs[i] = arm64_le128_to_cpu(*p);
553 }
554}
555
556#ifdef CONFIG_ARM64_SVE
557
558/*
559 * Return how many bytes of memory are required to store the full SVE
560 * state for task, given task's currently configured vector length.
561 */
686size_t sve_state_size(struct task_struct const *task)
562static size_t sve_state_size(struct task_struct const *task)
687{
563{
688 unsigned int vl = 0;
689
690 if (system_supports_sve())
691 vl = task_get_sve_vl(task);
692 if (system_supports_sme())
693 vl = max(vl, task_get_sme_vl(task));
694
695 return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
564 return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
696}
697
698/*
699 * Ensure that task->thread.sve_state is allocated and sufficiently large.
700 *
701 * This function should be used only in preparation for replacing
702 * task->thread.sve_state with new data. The memory is always zeroed
703 * here to prevent stale data from showing through: this is done in

--- 10 unchanged lines hidden (view full) ---

714
715 /* This is a small allocation (maximum ~8KB) and Should Not Fail. */
716 task->thread.sve_state =
717 kzalloc(sve_state_size(task), GFP_KERNEL);
718}
719
720
721/*
565}
566
567/*
568 * Ensure that task->thread.sve_state is allocated and sufficiently large.
569 *
570 * This function should be used only in preparation for replacing
571 * task->thread.sve_state with new data. The memory is always zeroed
572 * here to prevent stale data from showing through: this is done in

--- 10 unchanged lines hidden (view full) ---

583
584 /* This is a small allocation (maximum ~8KB) and Should Not Fail. */
585 task->thread.sve_state =
586 kzalloc(sve_state_size(task), GFP_KERNEL);
587}
588
589
590/*
722 * Force the FPSIMD state shared with SVE to be updated in the SVE state
723 * even if the SVE state is the current active state.
724 *
725 * This should only be called by ptrace. task must be non-runnable.
726 * task->thread.sve_state must point to at least sve_state_size(task)
727 * bytes of allocated kernel memory.
728 */
729void fpsimd_force_sync_to_sve(struct task_struct *task)
730{
731 fpsimd_to_sve(task);
732}
733
734/*
735 * Ensure that task->thread.sve_state is up to date with respect to
736 * the user task, irrespective of when SVE is in use or not.
737 *
738 * This should only be called by ptrace. task must be non-runnable.
739 * task->thread.sve_state must point to at least sve_state_size(task)
740 * bytes of allocated kernel memory.
741 */
742void fpsimd_sync_to_sve(struct task_struct *task)
743{
591 * Ensure that task->thread.sve_state is up to date with respect to
592 * the user task, irrespective of when SVE is in use or not.
593 *
594 * This should only be called by ptrace. task must be non-runnable.
595 * task->thread.sve_state must point to at least sve_state_size(task)
596 * bytes of allocated kernel memory.
597 */
598void fpsimd_sync_to_sve(struct task_struct *task)
599{
744 if (!test_tsk_thread_flag(task, TIF_SVE) &&
745 !thread_sm_enabled(&task->thread))
600 if (!test_tsk_thread_flag(task, TIF_SVE))
746 fpsimd_to_sve(task);
747}
748
749/*
750 * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
751 * the user task, irrespective of whether SVE is in use or not.
752 *
753 * This should only be called by ptrace. task must be non-runnable.
754 * task->thread.sve_state must point to at least sve_state_size(task)
755 * bytes of allocated kernel memory.
756 */
757void sve_sync_to_fpsimd(struct task_struct *task)
758{
601 fpsimd_to_sve(task);
602}
603
604/*
605 * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
606 * the user task, irrespective of whether SVE is in use or not.
607 *
608 * This should only be called by ptrace. task must be non-runnable.
609 * task->thread.sve_state must point to at least sve_state_size(task)
610 * bytes of allocated kernel memory.
611 */
612void sve_sync_to_fpsimd(struct task_struct *task)
613{
759 if (test_tsk_thread_flag(task, TIF_SVE) ||
760 thread_sm_enabled(&task->thread))
614 if (test_tsk_thread_flag(task, TIF_SVE))
761 sve_to_fpsimd(task);
762}
763
764/*
765 * Ensure that task->thread.sve_state is up to date with respect to
766 * the task->thread.uw.fpsimd_state.
767 *
768 * This should only be called by ptrace to merge new FPSIMD register

--- 8 unchanged lines hidden (view full) ---

777{
778 unsigned int vq;
779 void *sst = task->thread.sve_state;
780 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
781
782 if (!test_tsk_thread_flag(task, TIF_SVE))
783 return;
784
615 sve_to_fpsimd(task);
616}
617
618/*
619 * Ensure that task->thread.sve_state is up to date with respect to
620 * the task->thread.uw.fpsimd_state.
621 *
622 * This should only be called by ptrace to merge new FPSIMD register

--- 8 unchanged lines hidden (view full) ---

631{
632 unsigned int vq;
633 void *sst = task->thread.sve_state;
634 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
635
636 if (!test_tsk_thread_flag(task, TIF_SVE))
637 return;
638
785 vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
639 vq = sve_vq_from_vl(task_get_sve_vl(task));
786
787 memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
788 __fpsimd_to_sve(sst, fst, vq);
789}
790
791int vec_set_vector_length(struct task_struct *task, enum vec_type type,
792 unsigned long vl, unsigned long flags)
793{

--- 27 unchanged lines hidden (view full) ---

821 goto out;
822
823 if (vl == task_get_vl(task, type))
824 goto out;
825
826 /*
827 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
828 * write any live register state back to task_struct, and convert to a
640
641 memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
642 __fpsimd_to_sve(sst, fst, vq);
643}
644
645int vec_set_vector_length(struct task_struct *task, enum vec_type type,
646 unsigned long vl, unsigned long flags)
647{

--- 27 unchanged lines hidden (view full) ---

675 goto out;
676
677 if (vl == task_get_vl(task, type))
678 goto out;
679
680 /*
681 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
682 * write any live register state back to task_struct, and convert to a
829 * regular FPSIMD thread.
683 * regular FPSIMD thread. Since the vector length can only be changed
684 * with a syscall we can't be in streaming mode while reconfiguring.
830 */
831 if (task == current) {
832 get_cpu_fpsimd_context();
833
834 fpsimd_save();
835 }
836
837 fpsimd_flush_task_state(task);
685 */
686 if (task == current) {
687 get_cpu_fpsimd_context();
688
689 fpsimd_save();
690 }
691
692 fpsimd_flush_task_state(task);
838 if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
839 thread_sm_enabled(&task->thread))
693 if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
840 sve_to_fpsimd(task);
841
694 sve_to_fpsimd(task);
695
842 if (system_supports_sme() && type == ARM64_VEC_SME) {
843 task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK |
844 SYS_SVCR_EL0_ZA_MASK);
845 clear_thread_flag(TIF_SME);
846 }
847
848 if (task == current)
849 put_cpu_fpsimd_context();
850
851 /*
696 if (task == current)
697 put_cpu_fpsimd_context();
698
699 /*
852 * Force reallocation of task SVE and SME state to the correct
853 * size on next use:
700 * Force reallocation of task SVE state to the correct size
701 * on next use:
854 */
855 sve_free(task);
702 */
703 sve_free(task);
856 if (system_supports_sme() && type == ARM64_VEC_SME)
857 sme_free(task);
858
859 task_set_vl(task, type, vl);
860
861out:
862 update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
863 flags & PR_SVE_VL_INHERIT);
864
865 return 0;

--- 44 unchanged lines hidden (view full) ---

910int sve_get_current_vl(void)
911{
912 if (!system_supports_sve() || is_compat_task())
913 return -EINVAL;
914
915 return vec_prctl_status(ARM64_VEC_SVE, 0);
916}
917
704
705 task_set_vl(task, type, vl);
706
707out:
708 update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
709 flags & PR_SVE_VL_INHERIT);
710
711 return 0;

--- 44 unchanged lines hidden (view full) ---

756int sve_get_current_vl(void)
757{
758 if (!system_supports_sve() || is_compat_task())
759 return -EINVAL;
760
761 return vec_prctl_status(ARM64_VEC_SVE, 0);
762}
763
918#ifdef CONFIG_ARM64_SME
919/* PR_SME_SET_VL */
920int sme_set_current_vl(unsigned long arg)
921{
922 unsigned long vl, flags;
923 int ret;
924
925 vl = arg & PR_SME_VL_LEN_MASK;
926 flags = arg & ~vl;
927
928 if (!system_supports_sme() || is_compat_task())
929 return -EINVAL;
930
931 ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
932 if (ret)
933 return ret;
934
935 return vec_prctl_status(ARM64_VEC_SME, flags);
936}
937
938/* PR_SME_GET_VL */
939int sme_get_current_vl(void)
940{
941 if (!system_supports_sme() || is_compat_task())
942 return -EINVAL;
943
944 return vec_prctl_status(ARM64_VEC_SME, 0);
945}
946#endif /* CONFIG_ARM64_SME */
947
948static void vec_probe_vqs(struct vl_info *info,
949 DECLARE_BITMAP(map, SVE_VQ_MAX))
950{
951 unsigned int vq, vl;
952
953 bitmap_zero(map, SVE_VQ_MAX);
954
955 for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
956 write_vl(info->type, vq - 1); /* self-syncing */
764static void vec_probe_vqs(struct vl_info *info,
765 DECLARE_BITMAP(map, SVE_VQ_MAX))
766{
767 unsigned int vq, vl;
768
769 bitmap_zero(map, SVE_VQ_MAX);
770
771 for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
772 write_vl(info->type, vq - 1); /* self-syncing */
957
958 switch (info->type) {
959 case ARM64_VEC_SVE:
960 vl = sve_get_vl();
961 break;
962 case ARM64_VEC_SME:
963 vl = sme_get_vl();
964 break;
965 default:
966 vl = 0;
967 break;
968 }
969
970 /* Minimum VL identified? */
971 if (sve_vq_from_vl(vl) > vq)
972 break;
973
773 vl = sve_get_vl();
974 vq = sve_vq_from_vl(vl); /* skip intervening lengths */
975 set_bit(__vq_to_bit(vq), map);
976 }
977}
978
979/*
980 * Initialise the set of known supported VQs for the boot CPU.
981 * This is called during kernel boot, before secondary CPUs are brought up.

--- 69 unchanged lines hidden (view full) ---

1051 return -EINVAL;
1052 }
1053
1054 return 0;
1055}
1056
1057static void __init sve_efi_setup(void)
1058{
774 vq = sve_vq_from_vl(vl); /* skip intervening lengths */
775 set_bit(__vq_to_bit(vq), map);
776 }
777}
778
779/*
780 * Initialise the set of known supported VQs for the boot CPU.
781 * This is called during kernel boot, before secondary CPUs are brought up.

--- 69 unchanged lines hidden (view full) ---

851 return -EINVAL;
852 }
853
854 return 0;
855}
856
857static void __init sve_efi_setup(void)
858{
1059 int max_vl = 0;
1060 int i;
859 struct vl_info *info = &vl_info[ARM64_VEC_SVE];
1061
1062 if (!IS_ENABLED(CONFIG_EFI))
1063 return;
1064
860
861 if (!IS_ENABLED(CONFIG_EFI))
862 return;
863
1065 for (i = 0; i < ARRAY_SIZE(vl_info); i++)
1066 max_vl = max(vl_info[i].max_vl, max_vl);
1067
1068 /*
1069 * alloc_percpu() warns and prints a backtrace if this goes wrong.
1070 * This is evidence of a crippled system and we are returning void,
1071 * so no attempt is made to handle this situation here.
1072 */
864 /*
865 * alloc_percpu() warns and prints a backtrace if this goes wrong.
866 * This is evidence of a crippled system and we are returning void,
867 * so no attempt is made to handle this situation here.
868 */
1073 if (!sve_vl_valid(max_vl))
869 if (!sve_vl_valid(info->max_vl))
1074 goto fail;
1075
1076 efi_sve_state = __alloc_percpu(
870 goto fail;
871
872 efi_sve_state = __alloc_percpu(
1077 SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
873 SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES);
1078 if (!efi_sve_state)
1079 goto fail;
1080
1081 return;
1082
1083fail:
1084 panic("Cannot allocate percpu memory for EFI SVE save/restore");
1085}

--- 102 unchanged lines hidden (view full) ---

1188
1189/*
1190 * Called from the put_task_struct() path, which cannot get here
1191 * unless dead_task is really dead and not schedulable.
1192 */
1193void fpsimd_release_task(struct task_struct *dead_task)
1194{
1195 __sve_free(dead_task);
874 if (!efi_sve_state)
875 goto fail;
876
877 return;
878
879fail:
880 panic("Cannot allocate percpu memory for EFI SVE save/restore");
881}

--- 102 unchanged lines hidden (view full) ---

984
985/*
986 * Called from the put_task_struct() path, which cannot get here
987 * unless dead_task is really dead and not schedulable.
988 */
989void fpsimd_release_task(struct task_struct *dead_task)
990{
991 __sve_free(dead_task);
1196 sme_free(dead_task);
1197}
1198
1199#endif /* CONFIG_ARM64_SVE */
1200
992}
993
994#endif /* CONFIG_ARM64_SVE */
995
1201#ifdef CONFIG_ARM64_SME
1202
1203/*
996/*
1204 * Ensure that task->thread.za_state is allocated and sufficiently large.
1205 *
1206 * This function should be used only in preparation for replacing
1207 * task->thread.za_state with new data. The memory is always zeroed
1208 * here to prevent stale data from showing through: this is done in
1209 * the interest of testability and predictability, the architecture
1210 * guarantees that when ZA is enabled it will be zeroed.
1211 */
1212void sme_alloc(struct task_struct *task)
1213{
1214 if (task->thread.za_state) {
1215 memset(task->thread.za_state, 0, za_state_size(task));
1216 return;
1217 }
1218
1219 /* This could potentially be up to 64K. */
1220 task->thread.za_state =
1221 kzalloc(za_state_size(task), GFP_KERNEL);
1222}
1223
1224static void sme_free(struct task_struct *task)
1225{
1226 kfree(task->thread.za_state);
1227 task->thread.za_state = NULL;
1228}
1229
1230void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
1231{
1232 /* Set priority for all PEs to architecturally defined minimum */
1233 write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
1234 SYS_SMPRI_EL1);
1235
1236 /* Allow SME in kernel */
1237 write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
1238 isb();
1239
1240 /* Allow EL0 to access TPIDR2 */
1241 write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
1242 isb();
1243}
1244
1245/*
1246 * This must be called after sme_kernel_enable(), we rely on the
1247 * feature table being sorted to ensure this.
1248 */
1249void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
1250{
1251 /* Allow use of FA64 */
1252 write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
1253 SYS_SMCR_EL1);
1254}
1255
1256/*
1257 * Read the pseudo-SMCR used by cpufeatures to identify the supported
1258 * vector length.
1259 *
1260 * Use only if SME is present.
1261 * This function clobbers the SME vector length.
1262 */
1263u64 read_smcr_features(void)
1264{
1265 u64 smcr;
1266 unsigned int vq_max;
1267
1268 sme_kernel_enable(NULL);
1269 sme_smstart_sm();
1270
1271 /*
1272 * Set the maximum possible VL.
1273 */
1274 write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
1275 SYS_SMCR_EL1);
1276
1277 smcr = read_sysreg_s(SYS_SMCR_EL1);
1278 smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
1279 vq_max = sve_vq_from_vl(sve_get_vl());
1280 smcr |= vq_max - 1; /* set LEN field to maximum effective value */
1281
1282 sme_smstop_sm();
1283
1284 return smcr;
1285}
1286
1287void __init sme_setup(void)
1288{
1289 struct vl_info *info = &vl_info[ARM64_VEC_SME];
1290 u64 smcr;
1291 int min_bit;
1292
1293 if (!system_supports_sme())
1294 return;
1295
1296 /*
1297 * SME doesn't require any particular vector length be
1298 * supported but it does require at least one. We should have
1299 * disabled the feature entirely while bringing up CPUs but
1300 * let's double check here.
1301 */
1302 WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
1303
1304 min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
1305 info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
1306
1307 smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
1308 info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
1309
1310 /*
1311 * Sanity-check that the max VL we determined through CPU features
1312 * corresponds properly to sme_vq_map. If not, do our best:
1313 */
1314 if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
1315 info->max_vl)))
1316 info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
1317 info->max_vl);
1318
1319 WARN_ON(info->min_vl > info->max_vl);
1320
1321 /*
1322 * For the default VL, pick the maximum supported value <= 32
1323 * (256 bits) if there is one since this is guaranteed not to
1324 * grow the signal frame when in streaming mode, otherwise the
1325 * minimum available VL will be used.
1326 */
1327 set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
1328
1329 pr_info("SME: minimum available vector length %u bytes per vector\n",
1330 info->min_vl);
1331 pr_info("SME: maximum available vector length %u bytes per vector\n",
1332 info->max_vl);
1333 pr_info("SME: default vector length %u bytes per vector\n",
1334 get_sme_default_vl());
1335}
1336
1337#endif /* CONFIG_ARM64_SME */
1338
1339static void sve_init_regs(void)
1340{
1341 /*
1342 * Convert the FPSIMD state to SVE, zeroing all the state that
1343 * is not shared with FPSIMD. If (as is likely) the current
1344 * state is live in the registers then do this there and
1345 * update our metadata for the current task including
1346 * disabling the trap, otherwise update our in-memory copy.
1347 * We are guaranteed to not be in streaming mode, we can only
1348 * take a SVE trap when not in streaming mode and we can't be
1349 * in streaming mode when taking a SME trap.
1350 */
1351 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1352 unsigned long vq_minus_one =
1353 sve_vq_from_vl(task_get_sve_vl(current)) - 1;
1354 sve_set_vq(vq_minus_one);
1355 sve_flush_live(true, vq_minus_one);
1356 fpsimd_bind_task_to_cpu();
1357 } else {
1358 fpsimd_to_sve(current);
1359 }
1360}
1361
1362/*
1363 * Trapped SVE access
1364 *
1365 * Storage is allocated for the full SVE state, the current FPSIMD
1366 * register contents are migrated across, and the access trap is
1367 * disabled.
1368 *
1369 * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
1370 * would have disabled the SVE access trap for userspace during
1371 * ret_to_user, making an SVE access trap impossible in that case.
1372 */
997 * Trapped SVE access
998 *
999 * Storage is allocated for the full SVE state, the current FPSIMD
1000 * register contents are migrated across, and the access trap is
1001 * disabled.
1002 *
1003 * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
1004 * would have disabled the SVE access trap for userspace during
1005 * ret_to_user, making an SVE access trap impossible in that case.
1006 */
1373void do_sve_acc(unsigned int esr, struct pt_regs *regs)
1007void do_sve_acc(unsigned long esr, struct pt_regs *regs)
1374{
1375 /* Even if we chose not to use SVE, the hardware could still trap: */
1376 if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
1377 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1378 return;
1379 }
1380
1381 sve_alloc(current);
1382 if (!current->thread.sve_state) {
1383 force_sig(SIGKILL);
1384 return;
1385 }
1386
1387 get_cpu_fpsimd_context();
1388
1389 if (test_and_set_thread_flag(TIF_SVE))
1390 WARN_ON(1); /* SVE access shouldn't have trapped */
1391
1392 /*
1008{
1009 /* Even if we chose not to use SVE, the hardware could still trap: */
1010 if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
1011 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1012 return;
1013 }
1014
1015 sve_alloc(current);
1016 if (!current->thread.sve_state) {
1017 force_sig(SIGKILL);
1018 return;
1019 }
1020
1021 get_cpu_fpsimd_context();
1022
1023 if (test_and_set_thread_flag(TIF_SVE))
1024 WARN_ON(1); /* SVE access shouldn't have trapped */
1025
1026 /*
1393 * Even if the task can have used streaming mode we can only
1394 * generate SVE access traps in normal SVE mode and
1395 * transitioning out of streaming mode may discard any
1396 * streaming mode state. Always clear the high bits to avoid
1397 * any potential errors tracking what is properly initialised.
1027 * Convert the FPSIMD state to SVE, zeroing all the state that
1028 * is not shared with FPSIMD. If (as is likely) the current
1029 * state is live in the registers then do this there and
1030 * update our metadata for the current task including
1031 * disabling the trap, otherwise update our in-memory copy.
1398 */
1032 */
1399 sve_init_regs();
1400
1401 put_cpu_fpsimd_context();
1402}
1403
1404/*
1405 * Trapped SME access
1406 *
1407 * Storage is allocated for the full SVE and SME state, the current
1408 * FPSIMD register contents are migrated to SVE if SVE is not already
1409 * active, and the access trap is disabled.
1410 *
1411 * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
1412 * would have disabled the SME access trap for userspace during
1413 * ret_to_user, making an SVE access trap impossible in that case.
1414 */
1415void do_sme_acc(unsigned int esr, struct pt_regs *regs)
1416{
1417 /* Even if we chose not to use SME, the hardware could still trap: */
1418 if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
1419 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1420 return;
1421 }
1422
1423 /*
1424 * If this not a trap due to SME being disabled then something
1425 * is being used in the wrong mode, report as SIGILL.
1426 */
1427 if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
1428 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1429 return;
1430 }
1431
1432 sve_alloc(current);
1433 sme_alloc(current);
1434 if (!current->thread.sve_state || !current->thread.za_state) {
1435 force_sig(SIGKILL);
1436 return;
1437 }
1438
1439 get_cpu_fpsimd_context();
1440
1441 /* With TIF_SME userspace shouldn't generate any traps */
1442 if (test_and_set_thread_flag(TIF_SME))
1443 WARN_ON(1);
1444
1445 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1446 unsigned long vq_minus_one =
1033 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1034 unsigned long vq_minus_one =
1447 sve_vq_from_vl(task_get_sme_vl(current)) - 1;
1448 sme_set_vq(vq_minus_one);
1449
1035 sve_vq_from_vl(task_get_sve_vl(current)) - 1;
1036 sve_set_vq(vq_minus_one);
1037 sve_flush_live(true, vq_minus_one);
1450 fpsimd_bind_task_to_cpu();
1038 fpsimd_bind_task_to_cpu();
1039 } else {
1040 fpsimd_to_sve(current);
1451 }
1452
1041 }
1042
1453 /*
1454 * If SVE was not already active initialise the SVE registers,
1455 * any non-shared state between the streaming and regular SVE
1456 * registers is architecturally guaranteed to be zeroed when
1457 * we enter streaming mode. We do not need to initialize ZA
1458 * since ZA must be disabled at this point and enabling ZA is
1459 * architecturally defined to zero ZA.
1460 */
1461 if (system_supports_sve() && !test_thread_flag(TIF_SVE))
1462 sve_init_regs();
1463
1464 put_cpu_fpsimd_context();
1465}
1466
1467/*
1468 * Trapped FP/ASIMD access.
1469 */
1043 put_cpu_fpsimd_context();
1044}
1045
1046/*
1047 * Trapped FP/ASIMD access.
1048 */
1470void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
1049void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
1471{
1472 /* TODO: implement lazy context saving/restoring */
1473 WARN_ON(1);
1474}
1475
1476/*
1477 * Raise a SIGFPE for the current process.
1478 */
1050{
1051 /* TODO: implement lazy context saving/restoring */
1052 WARN_ON(1);
1053}
1054
1055/*
1056 * Raise a SIGFPE for the current process.
1057 */
1479void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
1058void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
1480{
1481 unsigned int si_code = FPE_FLTUNK;
1482
1483 if (esr & ESR_ELx_FP_EXC_TFV) {
1484 if (esr & FPEXC_IOF)
1485 si_code = FPE_FLTINV;
1486 else if (esr & FPEXC_DZF)
1487 si_code = FPE_FLTDIV;

--- 84 unchanged lines hidden (view full) ---

1572 sizeof(current->thread.uw.fpsimd_state));
1573
1574 if (system_supports_sve()) {
1575 clear_thread_flag(TIF_SVE);
1576 sve_free(current);
1577 fpsimd_flush_thread_vl(ARM64_VEC_SVE);
1578 }
1579
1059{
1060 unsigned int si_code = FPE_FLTUNK;
1061
1062 if (esr & ESR_ELx_FP_EXC_TFV) {
1063 if (esr & FPEXC_IOF)
1064 si_code = FPE_FLTINV;
1065 else if (esr & FPEXC_DZF)
1066 si_code = FPE_FLTDIV;

--- 84 unchanged lines hidden (view full) ---

1151 sizeof(current->thread.uw.fpsimd_state));
1152
1153 if (system_supports_sve()) {
1154 clear_thread_flag(TIF_SVE);
1155 sve_free(current);
1156 fpsimd_flush_thread_vl(ARM64_VEC_SVE);
1157 }
1158
1580 if (system_supports_sme()) {
1581 clear_thread_flag(TIF_SME);
1582 sme_free(current);
1583 fpsimd_flush_thread_vl(ARM64_VEC_SME);
1584 current->thread.svcr = 0;
1585 }
1586
1587 put_cpu_fpsimd_context();
1588}
1589
1590/*
1591 * Save the userland FPSIMD state of 'current' to memory, but only if the state
1592 * currently held in the registers does in fact belong to 'current'
1593 */
1594void fpsimd_preserve_current_state(void)

--- 26 unchanged lines hidden (view full) ---

1621static void fpsimd_bind_task_to_cpu(void)
1622{
1623 struct fpsimd_last_state_struct *last =
1624 this_cpu_ptr(&fpsimd_last_state);
1625
1626 WARN_ON(!system_supports_fpsimd());
1627 last->st = &current->thread.uw.fpsimd_state;
1628 last->sve_state = current->thread.sve_state;
1159 put_cpu_fpsimd_context();
1160}
1161
1162/*
1163 * Save the userland FPSIMD state of 'current' to memory, but only if the state
1164 * currently held in the registers does in fact belong to 'current'
1165 */
1166void fpsimd_preserve_current_state(void)

--- 26 unchanged lines hidden (view full) ---

1193static void fpsimd_bind_task_to_cpu(void)
1194{
1195 struct fpsimd_last_state_struct *last =
1196 this_cpu_ptr(&fpsimd_last_state);
1197
1198 WARN_ON(!system_supports_fpsimd());
1199 last->st = &current->thread.uw.fpsimd_state;
1200 last->sve_state = current->thread.sve_state;
1629 last->za_state = current->thread.za_state;
1630 last->sve_vl = task_get_sve_vl(current);
1201 last->sve_vl = task_get_sve_vl(current);
1631 last->sme_vl = task_get_sme_vl(current);
1632 last->svcr = &current->thread.svcr;
1633 current->thread.fpsimd_cpu = smp_processor_id();
1634
1202 current->thread.fpsimd_cpu = smp_processor_id();
1203
1635 /*
1636 * Toggle SVE and SME trapping for userspace if needed, these
1637 * are serialsied by ret_to_user().
1638 */
1639 if (system_supports_sme()) {
1640 if (test_thread_flag(TIF_SME))
1641 sme_user_enable();
1642 else
1643 sme_user_disable();
1644 }
1645
1646 if (system_supports_sve()) {
1204 if (system_supports_sve()) {
1205 /* Toggle SVE trapping for userspace if needed */
1647 if (test_thread_flag(TIF_SVE))
1648 sve_user_enable();
1649 else
1650 sve_user_disable();
1206 if (test_thread_flag(TIF_SVE))
1207 sve_user_enable();
1208 else
1209 sve_user_disable();
1210
1211 /* Serialised by exception return to user */
1651 }
1652}
1653
1654void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1212 }
1213}
1214
1215void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1655 unsigned int sve_vl, void *za_state,
1656 unsigned int sme_vl, u64 *svcr)
1216 unsigned int sve_vl)
1657{
1658 struct fpsimd_last_state_struct *last =
1659 this_cpu_ptr(&fpsimd_last_state);
1660
1661 WARN_ON(!system_supports_fpsimd());
1662 WARN_ON(!in_softirq() && !irqs_disabled());
1663
1664 last->st = st;
1217{
1218 struct fpsimd_last_state_struct *last =
1219 this_cpu_ptr(&fpsimd_last_state);
1220
1221 WARN_ON(!system_supports_fpsimd());
1222 WARN_ON(!in_softirq() && !irqs_disabled());
1223
1224 last->st = st;
1665 last->svcr = svcr;
1666 last->sve_state = sve_state;
1225 last->sve_state = sve_state;
1667 last->za_state = za_state;
1668 last->sve_vl = sve_vl;
1226 last->sve_vl = sve_vl;
1669 last->sme_vl = sme_vl;
1670}
1671
1672/*
1673 * Load the userland FPSIMD state of 'current' from memory, but only if the
1674 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
1675 * state of 'current'. This is called when we are preparing to return to
1676 * userspace to ensure that userspace sees a good register state.
1677 */

--- 80 unchanged lines hidden (view full) ---

1758 * Invalidate any task's FPSIMD state that is present on this cpu.
1759 * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
1760 * before calling this function.
1761 */
1762static void fpsimd_flush_cpu_state(void)
1763{
1764 WARN_ON(!system_supports_fpsimd());
1765 __this_cpu_write(fpsimd_last_state.st, NULL);
1227}
1228
1229/*
1230 * Load the userland FPSIMD state of 'current' from memory, but only if the
1231 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
1232 * state of 'current'. This is called when we are preparing to return to
1233 * userspace to ensure that userspace sees a good register state.
1234 */

--- 80 unchanged lines hidden (view full) ---

1315 * Invalidate any task's FPSIMD state that is present on this cpu.
1316 * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
1317 * before calling this function.
1318 */
1319static void fpsimd_flush_cpu_state(void)
1320{
1321 WARN_ON(!system_supports_fpsimd());
1322 __this_cpu_write(fpsimd_last_state.st, NULL);
1766
1767 /*
1768 * Leaving streaming mode enabled will cause issues for any kernel
1769 * NEON and leaving streaming mode or ZA enabled may increase power
1770 * consumption.
1771 */
1772 if (system_supports_sme())
1773 sme_smstop();
1774
1775 set_thread_flag(TIF_FOREIGN_FPSTATE);
1776}
1777
1778/*
1779 * Save the FPSIMD state to memory and invalidate cpu view.
1780 * This function must be called with preemption disabled.
1781 */
1782void fpsimd_save_and_flush_cpu_state(void)

--- 61 unchanged lines hidden (view full) ---

1844}
1845EXPORT_SYMBOL(kernel_neon_end);
1846
1847#ifdef CONFIG_EFI
1848
1849static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
1850static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1851static DEFINE_PER_CPU(bool, efi_sve_state_used);
1323 set_thread_flag(TIF_FOREIGN_FPSTATE);
1324}
1325
1326/*
1327 * Save the FPSIMD state to memory and invalidate cpu view.
1328 * This function must be called with preemption disabled.
1329 */
1330void fpsimd_save_and_flush_cpu_state(void)

--- 61 unchanged lines hidden (view full) ---

1392}
1393EXPORT_SYMBOL(kernel_neon_end);
1394
1395#ifdef CONFIG_EFI
1396
1397static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
1398static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1399static DEFINE_PER_CPU(bool, efi_sve_state_used);
1852static DEFINE_PER_CPU(bool, efi_sm_state);
1853
1854/*
1855 * EFI runtime services support functions
1856 *
1857 * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
1858 * This means that for EFI (and only for EFI), we have to assume that FPSIMD
1859 * is always used rather than being an optional accelerator.
1860 *

--- 18 unchanged lines hidden (view full) ---

1879 kernel_neon_begin();
1880 } else {
1881 /*
1882 * If !efi_sve_state, SVE can't be in use yet and doesn't need
1883 * preserving:
1884 */
1885 if (system_supports_sve() && likely(efi_sve_state)) {
1886 char *sve_state = this_cpu_ptr(efi_sve_state);
1400
1401/*
1402 * EFI runtime services support functions
1403 *
1404 * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
1405 * This means that for EFI (and only for EFI), we have to assume that FPSIMD
1406 * is always used rather than being an optional accelerator.
1407 *

--- 18 unchanged lines hidden (view full) ---

1426 kernel_neon_begin();
1427 } else {
1428 /*
1429 * If !efi_sve_state, SVE can't be in use yet and doesn't need
1430 * preserving:
1431 */
1432 if (system_supports_sve() && likely(efi_sve_state)) {
1433 char *sve_state = this_cpu_ptr(efi_sve_state);
1887 bool ffr = true;
1888 u64 svcr;
1889
1890 __this_cpu_write(efi_sve_state_used, true);
1891
1434
1435 __this_cpu_write(efi_sve_state_used, true);
1436
1892 if (system_supports_sme()) {
1893 svcr = read_sysreg_s(SYS_SVCR_EL0);
1894
1895 if (!system_supports_fa64())
1896 ffr = svcr & SYS_SVCR_EL0_SM_MASK;
1897
1898 __this_cpu_write(efi_sm_state, ffr);
1899 }
1900
1901 sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
1902 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1437 sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
1438 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1903 ffr);
1904
1905 if (system_supports_sme())
1906 sysreg_clear_set_s(SYS_SVCR_EL0,
1907 SYS_SVCR_EL0_SM_MASK, 0);
1908
1439 true);
1909 } else {
1910 fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
1911 }
1912
1913 __this_cpu_write(efi_fpsimd_state_used, true);
1914 }
1915}
1916

--- 6 unchanged lines hidden (view full) ---

1923 return;
1924
1925 if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
1926 kernel_neon_end();
1927 } else {
1928 if (system_supports_sve() &&
1929 likely(__this_cpu_read(efi_sve_state_used))) {
1930 char const *sve_state = this_cpu_ptr(efi_sve_state);
1440 } else {
1441 fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
1442 }
1443
1444 __this_cpu_write(efi_fpsimd_state_used, true);
1445 }
1446}
1447

--- 6 unchanged lines hidden (view full) ---

1454 return;
1455
1456 if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
1457 kernel_neon_end();
1458 } else {
1459 if (system_supports_sve() &&
1460 likely(__this_cpu_read(efi_sve_state_used))) {
1461 char const *sve_state = this_cpu_ptr(efi_sve_state);
1931 bool ffr = true;
1932
1462
1933 /*
1934 * Restore streaming mode; EFI calls are
1935 * normal function calls so should not return in
1936 * streaming mode.
1937 */
1938 if (system_supports_sme()) {
1939 if (__this_cpu_read(efi_sm_state)) {
1940 sysreg_clear_set_s(SYS_SVCR_EL0,
1941 0,
1942 SYS_SVCR_EL0_SM_MASK);
1943 if (!system_supports_fa64())
1944 ffr = efi_sm_state;
1945 }
1946 }
1947
1463 sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1);
1948 sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
1949 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1464 sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
1465 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1950 ffr);
1466 true);
1951
1952 __this_cpu_write(efi_sve_state_used, false);
1953 } else {
1954 fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
1955 }
1956 }
1957}
1958

--- 58 unchanged lines hidden (view full) ---

2017 fpsimd_hotplug_init();
2018 } else {
2019 pr_notice("Floating-point is not implemented\n");
2020 }
2021
2022 if (!cpu_have_named_feature(ASIMD))
2023 pr_notice("Advanced SIMD is not implemented\n");
2024
1467
1468 __this_cpu_write(efi_sve_state_used, false);
1469 } else {
1470 fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
1471 }
1472 }
1473}
1474

--- 58 unchanged lines hidden (view full) ---

1533 fpsimd_hotplug_init();
1534 } else {
1535 pr_notice("Floating-point is not implemented\n");
1536 }
1537
1538 if (!cpu_have_named_feature(ASIMD))
1539 pr_notice("Advanced SIMD is not implemented\n");
1540
2025
2026 if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
2027 pr_notice("SME is implemented but not SVE\n");
2028
2029 sve_sysctl_init();
2030 sme_sysctl_init();
2031
2032 return 0;
1541 return sve_sysctl_init();
2033}
2034core_initcall(fpsimd_init);
1542}
1543core_initcall(fpsimd_init);