1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 25 26/* 27 * Store VMX state into memory, including VSCR. 28 * Assumes the caller has enabled VMX in the MSR. 29 */ 30_GLOBAL(store_vr_state) 31 SAVE_32VRS(0, r4, r3) 32 mfvscr v0 33 li r4, VRSTATE_VSCR 34 stvx v0, r4, r3 35 blr 36EXPORT_SYMBOL(store_vr_state) 37 38/* 39 * Disable VMX for the task which had it previously, 40 * and save its vector registers in its thread_struct. 41 * Enables the VMX for use in the kernel on return. 42 * On SMP we know the VMX is free, since we give it up every 43 * switch (ie, no lazy save of the vector registers). 44 * 45 * Note that on 32-bit this can only use registers that will be 46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 47 */ 48_GLOBAL(load_up_altivec) 49 mfmsr r5 /* grab the current MSR */ 50 oris r5,r5,MSR_VEC@h 51 MTMSRD(r5) /* enable use of AltiVec now */ 52 isync 53 54 /* 55 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 56 * to optimise userspace context save/restore. Whenever we take an 57 * altivec unavailable exception we must set VRSAVE to something non 58 * zero. Set it to all 1s. See also the programming note in the ISA. 59 */ 60 mfspr r4,SPRN_VRSAVE 61 cmpwi 0,r4,0 62 bne+ 1f 63 li r4,-1 64 mtspr SPRN_VRSAVE,r4 651: 66 /* enable use of VMX after return */ 67#ifdef CONFIG_PPC32 68 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 69 oris r9,r9,MSR_VEC@h 70 tovirt(r5, r5) 71#else 72 ld r4,PACACURRENT(r13) 73 addi r5,r4,THREAD /* Get THREAD */ 74 oris r12,r12,MSR_VEC@h 75 std r12,_MSR(r1) 76#ifdef CONFIG_PPC_BOOK3S_64 77 li r4,0 78 stb r4,PACASRR_VALID(r13) 79#endif 80#endif 81 li r4,1 82 stb r4,THREAD_LOAD_VEC(r5) 83 addi r6,r5,THREAD_VRSTATE 84 li r4,1 85 li r10,VRSTATE_VSCR 86 stw r4,THREAD_USED_VR(r5) 87 lvx v0,r10,r6 88 mtvscr v0 89 REST_32VRS(0,r4,r6) 90 /* restore registers and return */ 91 blr 92_ASM_NOKPROBE_SYMBOL(load_up_altivec) 93 94/* 95 * save_altivec(tsk) 96 * Save the vector registers to its thread_struct 97 */ 98_GLOBAL(save_altivec) 99 addi r3,r3,THREAD /* want THREAD of task */ 100 PPC_LL r7,THREAD_VRSAVEAREA(r3) 101 PPC_LL r5,PT_REGS(r3) 102 PPC_LCMPI 0,r7,0 103 bne 2f 104 addi r7,r3,THREAD_VRSTATE 1052: SAVE_32VRS(0,r4,r7) 106 mfvscr v0 107 li r4,VRSTATE_VSCR 108 stvx v0,r4,r7 109 blr 110 111#ifdef CONFIG_VSX 112 113#ifdef CONFIG_PPC32 114#error This asm code isn't ready for 32-bit kernels 115#endif 116 117/* 118 * load_up_vsx(unused, unused, tsk) 119 * Disable VSX for the task which had it previously, 120 * and save its vector registers in its thread_struct. 121 * Reuse the fp and vsx saves, but first check to see if they have 122 * been saved already. 123 */ 124_GLOBAL(load_up_vsx) 125/* Load FP and VSX registers if they haven't been done yet */ 126 andi. r5,r12,MSR_FP 127 beql+ load_up_fpu /* skip if already loaded */ 128 andis. r5,r12,MSR_VEC@h 129 beql+ load_up_altivec /* skip if already loaded */ 130 131 ld r4,PACACURRENT(r13) 132 addi r4,r4,THREAD /* Get THREAD */ 133 li r6,1 134 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 135 /* enable use of VSX after return */ 136 oris r12,r12,MSR_VSX@h 137 std r12,_MSR(r1) 138 li r4,0 139 stb r4,PACASRR_VALID(r13) 140 b fast_interrupt_return_srr 141 142#endif /* CONFIG_VSX */ 143 144 145/* 146 * The routines below are in assembler so we can closely control the 147 * usage of floating-point registers. These routines must be called 148 * with preempt disabled. 149 */ 150#ifdef CONFIG_PPC32 151 .data 152fpzero: 153 .long 0 154fpone: 155 .long 0x3f800000 /* 1.0 in single-precision FP */ 156fphalf: 157 .long 0x3f000000 /* 0.5 in single-precision FP */ 158 159#define LDCONST(fr, name) \ 160 lis r11,name@ha; \ 161 lfs fr,name@l(r11) 162#else 163 164 .section ".toc","aw" 165fpzero: 166 .tc FD_0_0[TC],0 167fpone: 168 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 169fphalf: 170 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 171 172#define LDCONST(fr, name) \ 173 lfd fr,name@toc(r2) 174#endif 175 176 .text 177/* 178 * Internal routine to enable floating point and set FPSCR to 0. 179 * Don't call it from C; it doesn't use the normal calling convention. 180 */ 181fpenable: 182#ifdef CONFIG_PPC32 183 stwu r1,-64(r1) 184#else 185 stdu r1,-64(r1) 186#endif 187 mfmsr r10 188 ori r11,r10,MSR_FP 189 mtmsr r11 190 isync 191 stfd fr0,24(r1) 192 stfd fr1,16(r1) 193 stfd fr31,8(r1) 194 LDCONST(fr1, fpzero) 195 mffs fr31 196 MTFSF_L(fr1) 197 blr 198 199fpdisable: 200 mtlr r12 201 MTFSF_L(fr31) 202 lfd fr31,8(r1) 203 lfd fr1,16(r1) 204 lfd fr0,24(r1) 205 mtmsr r10 206 isync 207 addi r1,r1,64 208 blr 209 210/* 211 * Vector add, floating point. 212 */ 213_GLOBAL(vaddfp) 214 mflr r12 215 bl fpenable 216 li r0,4 217 mtctr r0 218 li r6,0 2191: lfsx fr0,r4,r6 220 lfsx fr1,r5,r6 221 fadds fr0,fr0,fr1 222 stfsx fr0,r3,r6 223 addi r6,r6,4 224 bdnz 1b 225 b fpdisable 226 227/* 228 * Vector subtract, floating point. 229 */ 230_GLOBAL(vsubfp) 231 mflr r12 232 bl fpenable 233 li r0,4 234 mtctr r0 235 li r6,0 2361: lfsx fr0,r4,r6 237 lfsx fr1,r5,r6 238 fsubs fr0,fr0,fr1 239 stfsx fr0,r3,r6 240 addi r6,r6,4 241 bdnz 1b 242 b fpdisable 243 244/* 245 * Vector multiply and add, floating point. 246 */ 247_GLOBAL(vmaddfp) 248 mflr r12 249 bl fpenable 250 stfd fr2,32(r1) 251 li r0,4 252 mtctr r0 253 li r7,0 2541: lfsx fr0,r4,r7 255 lfsx fr1,r5,r7 256 lfsx fr2,r6,r7 257 fmadds fr0,fr0,fr2,fr1 258 stfsx fr0,r3,r7 259 addi r7,r7,4 260 bdnz 1b 261 lfd fr2,32(r1) 262 b fpdisable 263 264/* 265 * Vector negative multiply and subtract, floating point. 266 */ 267_GLOBAL(vnmsubfp) 268 mflr r12 269 bl fpenable 270 stfd fr2,32(r1) 271 li r0,4 272 mtctr r0 273 li r7,0 2741: lfsx fr0,r4,r7 275 lfsx fr1,r5,r7 276 lfsx fr2,r6,r7 277 fnmsubs fr0,fr0,fr2,fr1 278 stfsx fr0,r3,r7 279 addi r7,r7,4 280 bdnz 1b 281 lfd fr2,32(r1) 282 b fpdisable 283 284/* 285 * Vector reciprocal estimate. We just compute 1.0/x. 286 * r3 -> destination, r4 -> source. 287 */ 288_GLOBAL(vrefp) 289 mflr r12 290 bl fpenable 291 li r0,4 292 LDCONST(fr1, fpone) 293 mtctr r0 294 li r6,0 2951: lfsx fr0,r4,r6 296 fdivs fr0,fr1,fr0 297 stfsx fr0,r3,r6 298 addi r6,r6,4 299 bdnz 1b 300 b fpdisable 301 302/* 303 * Vector reciprocal square-root estimate, floating point. 304 * We use the frsqrte instruction for the initial estimate followed 305 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 306 * r3 -> destination, r4 -> source. 307 */ 308_GLOBAL(vrsqrtefp) 309 mflr r12 310 bl fpenable 311 stfd fr2,32(r1) 312 stfd fr3,40(r1) 313 stfd fr4,48(r1) 314 stfd fr5,56(r1) 315 li r0,4 316 LDCONST(fr4, fpone) 317 LDCONST(fr5, fphalf) 318 mtctr r0 319 li r6,0 3201: lfsx fr0,r4,r6 321 frsqrte fr1,fr0 /* r = frsqrte(s) */ 322 fmuls fr3,fr1,fr0 /* r * s */ 323 fmuls fr2,fr1,fr5 /* r * 0.5 */ 324 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 325 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 326 fmuls fr3,fr1,fr0 /* r * s */ 327 fmuls fr2,fr1,fr5 /* r * 0.5 */ 328 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 329 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 330 stfsx fr1,r3,r6 331 addi r6,r6,4 332 bdnz 1b 333 lfd fr5,56(r1) 334 lfd fr4,48(r1) 335 lfd fr3,40(r1) 336 lfd fr2,32(r1) 337 b fpdisable 338