1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 239326182SMasahiro Yamada#include <linux/export.h> 329a011fcSSathvika Vasireddy#include <linux/linkage.h> 4e821ea70SBenjamin Herrenschmidt#include <asm/processor.h> 514cf11afSPaul Mackerras#include <asm/ppc_asm.h> 6b3b8dc6cSPaul Mackerras#include <asm/reg.h> 7e821ea70SBenjamin Herrenschmidt#include <asm/asm-offsets.h> 8e821ea70SBenjamin Herrenschmidt#include <asm/cputable.h> 9e821ea70SBenjamin Herrenschmidt#include <asm/thread_info.h> 10e821ea70SBenjamin Herrenschmidt#include <asm/page.h> 1146f52210SStephen Rothwell#include <asm/ptrace.h> 12ec0c464cSChristophe Leroy#include <asm/asm-compat.h> 13e821ea70SBenjamin Herrenschmidt 14e821ea70SBenjamin Herrenschmidt/* 1518461960SPaul Mackerras * Load state from memory into VMX registers including VSCR. 1618461960SPaul Mackerras * Assumes the caller has enabled VMX in the MSR. 1718461960SPaul Mackerras */ 1818461960SPaul Mackerras_GLOBAL(load_vr_state) 1918461960SPaul Mackerras li r4,VRSTATE_VSCR 20c2ce6f9fSAnton Blanchard lvx v0,r4,r3 21c2ce6f9fSAnton Blanchard mtvscr v0 2218461960SPaul Mackerras REST_32VRS(0,r4,r3) 2318461960SPaul Mackerras blr 249445aa1aSAl ViroEXPORT_SYMBOL(load_vr_state) 25e2b36d59SNicholas Piggin_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 2618461960SPaul Mackerras 2718461960SPaul Mackerras/* 2818461960SPaul Mackerras * Store VMX state into memory, including VSCR. 2918461960SPaul Mackerras * Assumes the caller has enabled VMX in the MSR. 3018461960SPaul Mackerras */ 3118461960SPaul Mackerras_GLOBAL(store_vr_state) 3218461960SPaul Mackerras SAVE_32VRS(0, r4, r3) 33c2ce6f9fSAnton Blanchard mfvscr v0 3418461960SPaul Mackerras li r4, VRSTATE_VSCR 35c2ce6f9fSAnton Blanchard stvx v0, r4, r3 36*76aeadbdSTimothy Pearson lvx v0, 0, r3 3718461960SPaul Mackerras blr 389445aa1aSAl ViroEXPORT_SYMBOL(store_vr_state) 3918461960SPaul Mackerras 4018461960SPaul Mackerras/* 41e821ea70SBenjamin Herrenschmidt * Disable VMX for the task which had it previously, 42e821ea70SBenjamin Herrenschmidt * and save its vector registers in its thread_struct. 43e821ea70SBenjamin Herrenschmidt * Enables the VMX for use in the kernel on return. 44e821ea70SBenjamin Herrenschmidt * On SMP we know the VMX is free, since we give it up every 45e821ea70SBenjamin Herrenschmidt * switch (ie, no lazy save of the vector registers). 46955c1cabSPaul Mackerras * 47955c1cabSPaul Mackerras * Note that on 32-bit this can only use registers that will be 48955c1cabSPaul Mackerras * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 49e821ea70SBenjamin Herrenschmidt */ 50e821ea70SBenjamin Herrenschmidt_GLOBAL(load_up_altivec) 51e821ea70SBenjamin Herrenschmidt mfmsr r5 /* grab the current MSR */ 52ff0b0d6eSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64 53ff0b0d6eSNicholas Piggin /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 54ff0b0d6eSNicholas Piggin ori r5,r5,MSR_RI 55ff0b0d6eSNicholas Piggin#endif 56e821ea70SBenjamin Herrenschmidt oris r5,r5,MSR_VEC@h 57e821ea70SBenjamin Herrenschmidt MTMSRD(r5) /* enable use of AltiVec now */ 58e821ea70SBenjamin Herrenschmidt isync 59e821ea70SBenjamin Herrenschmidt 60dd570237SAnton Blanchard /* 61dd570237SAnton Blanchard * While userspace in general ignores VRSAVE, glibc uses it as a boolean 62dd570237SAnton Blanchard * to optimise userspace context save/restore. Whenever we take an 63dd570237SAnton Blanchard * altivec unavailable exception we must set VRSAVE to something non 64dd570237SAnton Blanchard * zero. Set it to all 1s. See also the programming note in the ISA. 65e821ea70SBenjamin Herrenschmidt */ 66e821ea70SBenjamin Herrenschmidt mfspr r4,SPRN_VRSAVE 67e090aa80SBenjamin Herrenschmidt cmpwi 0,r4,0 68e821ea70SBenjamin Herrenschmidt bne+ 1f 69e821ea70SBenjamin Herrenschmidt li r4,-1 70e821ea70SBenjamin Herrenschmidt mtspr SPRN_VRSAVE,r4 71e821ea70SBenjamin Herrenschmidt1: 72e821ea70SBenjamin Herrenschmidt /* enable use of VMX after return */ 73e821ea70SBenjamin Herrenschmidt#ifdef CONFIG_PPC32 7451ed00e7SChristophe Leroy addi r5,r2,THREAD 75e821ea70SBenjamin Herrenschmidt oris r9,r9,MSR_VEC@h 76e821ea70SBenjamin Herrenschmidt#else 77e821ea70SBenjamin Herrenschmidt ld r4,PACACURRENT(r13) 78e821ea70SBenjamin Herrenschmidt addi r5,r4,THREAD /* Get THREAD */ 79e821ea70SBenjamin Herrenschmidt oris r12,r12,MSR_VEC@h 80e821ea70SBenjamin Herrenschmidt std r12,_MSR(r1) 8159dc5bfcSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64 8259dc5bfcSNicholas Piggin li r4,0 8359dc5bfcSNicholas Piggin stb r4,PACASRR_VALID(r13) 8459dc5bfcSNicholas Piggin#endif 85e821ea70SBenjamin Herrenschmidt#endif 86b2b46304SNicholas Piggin li r4,1 8770fe3d98SCyril Bur stb r4,THREAD_LOAD_VEC(r5) 88955c1cabSPaul Mackerras addi r6,r5,THREAD_VRSTATE 89de79f7b9SPaul Mackerras li r10,VRSTATE_VSCR 90e821ea70SBenjamin Herrenschmidt stw r4,THREAD_USED_VR(r5) 91c2ce6f9fSAnton Blanchard lvx v0,r10,r6 92c2ce6f9fSAnton Blanchard mtvscr v0 93955c1cabSPaul Mackerras REST_32VRS(0,r4,r6) 94e821ea70SBenjamin Herrenschmidt /* restore registers and return */ 95e821ea70SBenjamin Herrenschmidt blr 965f32e836SChristophe Leroy_ASM_NOKPROBE_SYMBOL(load_up_altivec) 97e821ea70SBenjamin Herrenschmidt 98e821ea70SBenjamin Herrenschmidt/* 996f515d84SCyril Bur * save_altivec(tsk) 1006f515d84SCyril Bur * Save the vector registers to its thread_struct 101e821ea70SBenjamin Herrenschmidt */ 1026f515d84SCyril Bur_GLOBAL(save_altivec) 103e821ea70SBenjamin Herrenschmidt addi r3,r3,THREAD /* want THREAD of task */ 10418461960SPaul Mackerras PPC_LL r7,THREAD_VRSAVEAREA(r3) 105e821ea70SBenjamin Herrenschmidt PPC_LL r5,PT_REGS(r3) 10618461960SPaul Mackerras PPC_LCMPI 0,r7,0 10718461960SPaul Mackerras bne 2f 10818461960SPaul Mackerras addi r7,r3,THREAD_VRSTATE 1096f515d84SCyril Bur2: SAVE_32VRS(0,r4,r7) 110c2ce6f9fSAnton Blanchard mfvscr v0 111de79f7b9SPaul Mackerras li r4,VRSTATE_VSCR 112c2ce6f9fSAnton Blanchard stvx v0,r4,r7 113*76aeadbdSTimothy Pearson lvx v0,0,r7 114e821ea70SBenjamin Herrenschmidt blr 115e821ea70SBenjamin Herrenschmidt 116e821ea70SBenjamin Herrenschmidt#ifdef CONFIG_VSX 117e821ea70SBenjamin Herrenschmidt 118e821ea70SBenjamin Herrenschmidt#ifdef CONFIG_PPC32 119e821ea70SBenjamin Herrenschmidt#error This asm code isn't ready for 32-bit kernels 120e821ea70SBenjamin Herrenschmidt#endif 121e821ea70SBenjamin Herrenschmidt 122e821ea70SBenjamin Herrenschmidt/* 123e821ea70SBenjamin Herrenschmidt * load_up_vsx(unused, unused, tsk) 124e821ea70SBenjamin Herrenschmidt * Disable VSX for the task which had it previously, 125e821ea70SBenjamin Herrenschmidt * and save its vector registers in its thread_struct. 126e821ea70SBenjamin Herrenschmidt * Reuse the fp and vsx saves, but first check to see if they have 127e821ea70SBenjamin Herrenschmidt * been saved already. 128e821ea70SBenjamin Herrenschmidt */ 129e821ea70SBenjamin Herrenschmidt_GLOBAL(load_up_vsx) 130e821ea70SBenjamin Herrenschmidt/* Load FP and VSX registers if they haven't been done yet */ 131e821ea70SBenjamin Herrenschmidt andi. r5,r12,MSR_FP 132e821ea70SBenjamin Herrenschmidt beql+ load_up_fpu /* skip if already loaded */ 133e821ea70SBenjamin Herrenschmidt andis. r5,r12,MSR_VEC@h 134e821ea70SBenjamin Herrenschmidt beql+ load_up_altivec /* skip if already loaded */ 135e821ea70SBenjamin Herrenschmidt 136ff0b0d6eSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64 137ff0b0d6eSNicholas Piggin /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 138ff0b0d6eSNicholas Piggin li r5,MSR_RI 139ff0b0d6eSNicholas Piggin mtmsrd r5,1 140ff0b0d6eSNicholas Piggin#endif 141ff0b0d6eSNicholas Piggin 142e821ea70SBenjamin Herrenschmidt ld r4,PACACURRENT(r13) 143e821ea70SBenjamin Herrenschmidt addi r4,r4,THREAD /* Get THREAD */ 144e821ea70SBenjamin Herrenschmidt li r6,1 145e821ea70SBenjamin Herrenschmidt stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 146e821ea70SBenjamin Herrenschmidt /* enable use of VSX after return */ 147e821ea70SBenjamin Herrenschmidt oris r12,r12,MSR_VSX@h 148e821ea70SBenjamin Herrenschmidt std r12,_MSR(r1) 14959dc5bfcSNicholas Piggin li r4,0 15059dc5bfcSNicholas Piggin stb r4,PACASRR_VALID(r13) 1511df7d5e4SNicholas Piggin b fast_interrupt_return_srr 152e821ea70SBenjamin Herrenschmidt 153e821ea70SBenjamin Herrenschmidt#endif /* CONFIG_VSX */ 154e821ea70SBenjamin Herrenschmidt 15514cf11afSPaul Mackerras 15614cf11afSPaul Mackerras/* 15714cf11afSPaul Mackerras * The routines below are in assembler so we can closely control the 15814cf11afSPaul Mackerras * usage of floating-point registers. These routines must be called 15914cf11afSPaul Mackerras * with preempt disabled. 16014cf11afSPaul Mackerras */ 16114cf11afSPaul Mackerras .data 162dab3b8f4SNicholas Piggin#ifdef CONFIG_PPC32 16314cf11afSPaul Mackerrasfpzero: 16414cf11afSPaul Mackerras .long 0 16514cf11afSPaul Mackerrasfpone: 16614cf11afSPaul Mackerras .long 0x3f800000 /* 1.0 in single-precision FP */ 16714cf11afSPaul Mackerrasfphalf: 16814cf11afSPaul Mackerras .long 0x3f000000 /* 0.5 in single-precision FP */ 16914cf11afSPaul Mackerras 17014cf11afSPaul Mackerras#define LDCONST(fr, name) \ 17114cf11afSPaul Mackerras lis r11,name@ha; \ 17214cf11afSPaul Mackerras lfs fr,name@l(r11) 17314cf11afSPaul Mackerras#else 17414cf11afSPaul Mackerras 17514cf11afSPaul Mackerrasfpzero: 176dab3b8f4SNicholas Piggin .quad 0 17714cf11afSPaul Mackerrasfpone: 178dab3b8f4SNicholas Piggin .quad 0x3ff0000000000000 /* 1.0 */ 17914cf11afSPaul Mackerrasfphalf: 180dab3b8f4SNicholas Piggin .quad 0x3fe0000000000000 /* 0.5 */ 18114cf11afSPaul Mackerras 1827e3a68beSNicholas Piggin#ifdef CONFIG_PPC_KERNEL_PCREL 1837e3a68beSNicholas Piggin#define LDCONST(fr, name) \ 1847e3a68beSNicholas Piggin pla r11,name@pcrel; \ 1857e3a68beSNicholas Piggin lfd fr,0(r11) 1867e3a68beSNicholas Piggin#else 18714cf11afSPaul Mackerras#define LDCONST(fr, name) \ 188dab3b8f4SNicholas Piggin addis r11,r2,name@toc@ha; \ 189dab3b8f4SNicholas Piggin lfd fr,name@toc@l(r11) 19014cf11afSPaul Mackerras#endif 1917e3a68beSNicholas Piggin#endif 19214cf11afSPaul Mackerras .text 19314cf11afSPaul Mackerras/* 19414cf11afSPaul Mackerras * Internal routine to enable floating point and set FPSCR to 0. 19514cf11afSPaul Mackerras * Don't call it from C; it doesn't use the normal calling convention. 19614cf11afSPaul Mackerras */ 19729a011fcSSathvika VasireddySYM_FUNC_START_LOCAL(fpenable) 19814cf11afSPaul Mackerras#ifdef CONFIG_PPC32 19914cf11afSPaul Mackerras stwu r1,-64(r1) 20014cf11afSPaul Mackerras#else 20114cf11afSPaul Mackerras stdu r1,-64(r1) 20214cf11afSPaul Mackerras#endif 20314cf11afSPaul Mackerras mfmsr r10 20414cf11afSPaul Mackerras ori r11,r10,MSR_FP 20514cf11afSPaul Mackerras mtmsr r11 20614cf11afSPaul Mackerras isync 20714cf11afSPaul Mackerras stfd fr0,24(r1) 20814cf11afSPaul Mackerras stfd fr1,16(r1) 20914cf11afSPaul Mackerras stfd fr31,8(r1) 21014cf11afSPaul Mackerras LDCONST(fr1, fpzero) 21114cf11afSPaul Mackerras mffs fr31 2123a2c48cfSAnton Blanchard MTFSF_L(fr1) 21314cf11afSPaul Mackerras blr 21429a011fcSSathvika VasireddySYM_FUNC_END(fpenable) 21514cf11afSPaul Mackerras 21614cf11afSPaul Mackerrasfpdisable: 21714cf11afSPaul Mackerras mtlr r12 2183a2c48cfSAnton Blanchard MTFSF_L(fr31) 21914cf11afSPaul Mackerras lfd fr31,8(r1) 22014cf11afSPaul Mackerras lfd fr1,16(r1) 22114cf11afSPaul Mackerras lfd fr0,24(r1) 22214cf11afSPaul Mackerras mtmsr r10 22314cf11afSPaul Mackerras isync 22414cf11afSPaul Mackerras addi r1,r1,64 22514cf11afSPaul Mackerras blr 22614cf11afSPaul Mackerras 22714cf11afSPaul Mackerras/* 22814cf11afSPaul Mackerras * Vector add, floating point. 22914cf11afSPaul Mackerras */ 23014cf11afSPaul Mackerras_GLOBAL(vaddfp) 23114cf11afSPaul Mackerras mflr r12 23214cf11afSPaul Mackerras bl fpenable 23314cf11afSPaul Mackerras li r0,4 23414cf11afSPaul Mackerras mtctr r0 23514cf11afSPaul Mackerras li r6,0 23614cf11afSPaul Mackerras1: lfsx fr0,r4,r6 23714cf11afSPaul Mackerras lfsx fr1,r5,r6 23814cf11afSPaul Mackerras fadds fr0,fr0,fr1 23914cf11afSPaul Mackerras stfsx fr0,r3,r6 24014cf11afSPaul Mackerras addi r6,r6,4 24114cf11afSPaul Mackerras bdnz 1b 24214cf11afSPaul Mackerras b fpdisable 24314cf11afSPaul Mackerras 24414cf11afSPaul Mackerras/* 24514cf11afSPaul Mackerras * Vector subtract, floating point. 24614cf11afSPaul Mackerras */ 24714cf11afSPaul Mackerras_GLOBAL(vsubfp) 24814cf11afSPaul Mackerras mflr r12 24914cf11afSPaul Mackerras bl fpenable 25014cf11afSPaul Mackerras li r0,4 25114cf11afSPaul Mackerras mtctr r0 25214cf11afSPaul Mackerras li r6,0 25314cf11afSPaul Mackerras1: lfsx fr0,r4,r6 25414cf11afSPaul Mackerras lfsx fr1,r5,r6 25514cf11afSPaul Mackerras fsubs fr0,fr0,fr1 25614cf11afSPaul Mackerras stfsx fr0,r3,r6 25714cf11afSPaul Mackerras addi r6,r6,4 25814cf11afSPaul Mackerras bdnz 1b 25914cf11afSPaul Mackerras b fpdisable 26014cf11afSPaul Mackerras 26114cf11afSPaul Mackerras/* 26214cf11afSPaul Mackerras * Vector multiply and add, floating point. 26314cf11afSPaul Mackerras */ 26414cf11afSPaul Mackerras_GLOBAL(vmaddfp) 26514cf11afSPaul Mackerras mflr r12 26614cf11afSPaul Mackerras bl fpenable 26714cf11afSPaul Mackerras stfd fr2,32(r1) 26814cf11afSPaul Mackerras li r0,4 26914cf11afSPaul Mackerras mtctr r0 27014cf11afSPaul Mackerras li r7,0 27114cf11afSPaul Mackerras1: lfsx fr0,r4,r7 27214cf11afSPaul Mackerras lfsx fr1,r5,r7 27314cf11afSPaul Mackerras lfsx fr2,r6,r7 27414cf11afSPaul Mackerras fmadds fr0,fr0,fr2,fr1 27514cf11afSPaul Mackerras stfsx fr0,r3,r7 27614cf11afSPaul Mackerras addi r7,r7,4 27714cf11afSPaul Mackerras bdnz 1b 27814cf11afSPaul Mackerras lfd fr2,32(r1) 27914cf11afSPaul Mackerras b fpdisable 28014cf11afSPaul Mackerras 28114cf11afSPaul Mackerras/* 28214cf11afSPaul Mackerras * Vector negative multiply and subtract, floating point. 28314cf11afSPaul Mackerras */ 28414cf11afSPaul Mackerras_GLOBAL(vnmsubfp) 28514cf11afSPaul Mackerras mflr r12 28614cf11afSPaul Mackerras bl fpenable 28714cf11afSPaul Mackerras stfd fr2,32(r1) 28814cf11afSPaul Mackerras li r0,4 28914cf11afSPaul Mackerras mtctr r0 29014cf11afSPaul Mackerras li r7,0 29114cf11afSPaul Mackerras1: lfsx fr0,r4,r7 29214cf11afSPaul Mackerras lfsx fr1,r5,r7 29314cf11afSPaul Mackerras lfsx fr2,r6,r7 29414cf11afSPaul Mackerras fnmsubs fr0,fr0,fr2,fr1 29514cf11afSPaul Mackerras stfsx fr0,r3,r7 29614cf11afSPaul Mackerras addi r7,r7,4 29714cf11afSPaul Mackerras bdnz 1b 29814cf11afSPaul Mackerras lfd fr2,32(r1) 29914cf11afSPaul Mackerras b fpdisable 30014cf11afSPaul Mackerras 30114cf11afSPaul Mackerras/* 30214cf11afSPaul Mackerras * Vector reciprocal estimate. We just compute 1.0/x. 30314cf11afSPaul Mackerras * r3 -> destination, r4 -> source. 30414cf11afSPaul Mackerras */ 30514cf11afSPaul Mackerras_GLOBAL(vrefp) 30614cf11afSPaul Mackerras mflr r12 30714cf11afSPaul Mackerras bl fpenable 30814cf11afSPaul Mackerras li r0,4 30914cf11afSPaul Mackerras LDCONST(fr1, fpone) 31014cf11afSPaul Mackerras mtctr r0 31114cf11afSPaul Mackerras li r6,0 31214cf11afSPaul Mackerras1: lfsx fr0,r4,r6 31314cf11afSPaul Mackerras fdivs fr0,fr1,fr0 31414cf11afSPaul Mackerras stfsx fr0,r3,r6 31514cf11afSPaul Mackerras addi r6,r6,4 31614cf11afSPaul Mackerras bdnz 1b 31714cf11afSPaul Mackerras b fpdisable 31814cf11afSPaul Mackerras 31914cf11afSPaul Mackerras/* 32014cf11afSPaul Mackerras * Vector reciprocal square-root estimate, floating point. 32114cf11afSPaul Mackerras * We use the frsqrte instruction for the initial estimate followed 32214cf11afSPaul Mackerras * by 2 iterations of Newton-Raphson to get sufficient accuracy. 32314cf11afSPaul Mackerras * r3 -> destination, r4 -> source. 32414cf11afSPaul Mackerras */ 32514cf11afSPaul Mackerras_GLOBAL(vrsqrtefp) 32614cf11afSPaul Mackerras mflr r12 32714cf11afSPaul Mackerras bl fpenable 32814cf11afSPaul Mackerras stfd fr2,32(r1) 32914cf11afSPaul Mackerras stfd fr3,40(r1) 33014cf11afSPaul Mackerras stfd fr4,48(r1) 33114cf11afSPaul Mackerras stfd fr5,56(r1) 33214cf11afSPaul Mackerras li r0,4 33314cf11afSPaul Mackerras LDCONST(fr4, fpone) 33414cf11afSPaul Mackerras LDCONST(fr5, fphalf) 33514cf11afSPaul Mackerras mtctr r0 33614cf11afSPaul Mackerras li r6,0 33714cf11afSPaul Mackerras1: lfsx fr0,r4,r6 33814cf11afSPaul Mackerras frsqrte fr1,fr0 /* r = frsqrte(s) */ 33914cf11afSPaul Mackerras fmuls fr3,fr1,fr0 /* r * s */ 34014cf11afSPaul Mackerras fmuls fr2,fr1,fr5 /* r * 0.5 */ 34114cf11afSPaul Mackerras fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 34214cf11afSPaul Mackerras fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 34314cf11afSPaul Mackerras fmuls fr3,fr1,fr0 /* r * s */ 34414cf11afSPaul Mackerras fmuls fr2,fr1,fr5 /* r * 0.5 */ 34514cf11afSPaul Mackerras fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 34614cf11afSPaul Mackerras fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 34714cf11afSPaul Mackerras stfsx fr1,r3,r6 34814cf11afSPaul Mackerras addi r6,r6,4 34914cf11afSPaul Mackerras bdnz 1b 35014cf11afSPaul Mackerras lfd fr5,56(r1) 35114cf11afSPaul Mackerras lfd fr4,48(r1) 35214cf11afSPaul Mackerras lfd fr3,40(r1) 35314cf11afSPaul Mackerras lfd fr2,32(r1) 35414cf11afSPaul Mackerras b fpdisable 355