1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 25 26/* 27 * Store VMX state into memory, including VSCR. 28 * Assumes the caller has enabled VMX in the MSR. 29 */ 30_GLOBAL(store_vr_state) 31 SAVE_32VRS(0, r4, r3) 32 mfvscr v0 33 li r4, VRSTATE_VSCR 34 stvx v0, r4, r3 35 blr 36EXPORT_SYMBOL(store_vr_state) 37 38/* 39 * Disable VMX for the task which had it previously, 40 * and save its vector registers in its thread_struct. 41 * Enables the VMX for use in the kernel on return. 42 * On SMP we know the VMX is free, since we give it up every 43 * switch (ie, no lazy save of the vector registers). 44 * 45 * Note that on 32-bit this can only use registers that will be 46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 47 */ 48_GLOBAL(load_up_altivec) 49 mfmsr r5 /* grab the current MSR */ 50 oris r5,r5,MSR_VEC@h 51 MTMSRD(r5) /* enable use of AltiVec now */ 52 isync 53 54 /* 55 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 56 * to optimise userspace context save/restore. Whenever we take an 57 * altivec unavailable exception we must set VRSAVE to something non 58 * zero. Set it to all 1s. See also the programming note in the ISA. 59 */ 60 mfspr r4,SPRN_VRSAVE 61 cmpwi 0,r4,0 62 bne+ 1f 63 li r4,-1 64 mtspr SPRN_VRSAVE,r4 651: 66 /* enable use of VMX after return */ 67#ifdef CONFIG_PPC32 68 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 69 oris r9,r9,MSR_VEC@h 70#ifdef CONFIG_VMAP_STACK 71 tovirt(r5, r5) 72#endif 73#else 74 ld r4,PACACURRENT(r13) 75 addi r5,r4,THREAD /* Get THREAD */ 76 oris r12,r12,MSR_VEC@h 77 std r12,_MSR(r1) 78#endif 79 /* Don't care if r4 overflows, this is desired behaviour */ 80 lbz r4,THREAD_LOAD_VEC(r5) 81 addi r4,r4,1 82 stb r4,THREAD_LOAD_VEC(r5) 83 addi r6,r5,THREAD_VRSTATE 84 li r4,1 85 li r10,VRSTATE_VSCR 86 stw r4,THREAD_USED_VR(r5) 87 lvx v0,r10,r6 88 mtvscr v0 89 REST_32VRS(0,r4,r6) 90 /* restore registers and return */ 91 blr 92 93/* 94 * save_altivec(tsk) 95 * Save the vector registers to its thread_struct 96 */ 97_GLOBAL(save_altivec) 98 addi r3,r3,THREAD /* want THREAD of task */ 99 PPC_LL r7,THREAD_VRSAVEAREA(r3) 100 PPC_LL r5,PT_REGS(r3) 101 PPC_LCMPI 0,r7,0 102 bne 2f 103 addi r7,r3,THREAD_VRSTATE 1042: SAVE_32VRS(0,r4,r7) 105 mfvscr v0 106 li r4,VRSTATE_VSCR 107 stvx v0,r4,r7 108 blr 109 110#ifdef CONFIG_VSX 111 112#ifdef CONFIG_PPC32 113#error This asm code isn't ready for 32-bit kernels 114#endif 115 116/* 117 * load_up_vsx(unused, unused, tsk) 118 * Disable VSX for the task which had it previously, 119 * and save its vector registers in its thread_struct. 120 * Reuse the fp and vsx saves, but first check to see if they have 121 * been saved already. 122 */ 123_GLOBAL(load_up_vsx) 124/* Load FP and VSX registers if they haven't been done yet */ 125 andi. r5,r12,MSR_FP 126 beql+ load_up_fpu /* skip if already loaded */ 127 andis. r5,r12,MSR_VEC@h 128 beql+ load_up_altivec /* skip if already loaded */ 129 130 ld r4,PACACURRENT(r13) 131 addi r4,r4,THREAD /* Get THREAD */ 132 li r6,1 133 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 134 /* enable use of VSX after return */ 135 oris r12,r12,MSR_VSX@h 136 std r12,_MSR(r1) 137 b fast_exception_return 138 139#endif /* CONFIG_VSX */ 140 141 142/* 143 * The routines below are in assembler so we can closely control the 144 * usage of floating-point registers. These routines must be called 145 * with preempt disabled. 146 */ 147#ifdef CONFIG_PPC32 148 .data 149fpzero: 150 .long 0 151fpone: 152 .long 0x3f800000 /* 1.0 in single-precision FP */ 153fphalf: 154 .long 0x3f000000 /* 0.5 in single-precision FP */ 155 156#define LDCONST(fr, name) \ 157 lis r11,name@ha; \ 158 lfs fr,name@l(r11) 159#else 160 161 .section ".toc","aw" 162fpzero: 163 .tc FD_0_0[TC],0 164fpone: 165 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 166fphalf: 167 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 168 169#define LDCONST(fr, name) \ 170 lfd fr,name@toc(r2) 171#endif 172 173 .text 174/* 175 * Internal routine to enable floating point and set FPSCR to 0. 176 * Don't call it from C; it doesn't use the normal calling convention. 177 */ 178fpenable: 179#ifdef CONFIG_PPC32 180 stwu r1,-64(r1) 181#else 182 stdu r1,-64(r1) 183#endif 184 mfmsr r10 185 ori r11,r10,MSR_FP 186 mtmsr r11 187 isync 188 stfd fr0,24(r1) 189 stfd fr1,16(r1) 190 stfd fr31,8(r1) 191 LDCONST(fr1, fpzero) 192 mffs fr31 193 MTFSF_L(fr1) 194 blr 195 196fpdisable: 197 mtlr r12 198 MTFSF_L(fr31) 199 lfd fr31,8(r1) 200 lfd fr1,16(r1) 201 lfd fr0,24(r1) 202 mtmsr r10 203 isync 204 addi r1,r1,64 205 blr 206 207/* 208 * Vector add, floating point. 209 */ 210_GLOBAL(vaddfp) 211 mflr r12 212 bl fpenable 213 li r0,4 214 mtctr r0 215 li r6,0 2161: lfsx fr0,r4,r6 217 lfsx fr1,r5,r6 218 fadds fr0,fr0,fr1 219 stfsx fr0,r3,r6 220 addi r6,r6,4 221 bdnz 1b 222 b fpdisable 223 224/* 225 * Vector subtract, floating point. 226 */ 227_GLOBAL(vsubfp) 228 mflr r12 229 bl fpenable 230 li r0,4 231 mtctr r0 232 li r6,0 2331: lfsx fr0,r4,r6 234 lfsx fr1,r5,r6 235 fsubs fr0,fr0,fr1 236 stfsx fr0,r3,r6 237 addi r6,r6,4 238 bdnz 1b 239 b fpdisable 240 241/* 242 * Vector multiply and add, floating point. 243 */ 244_GLOBAL(vmaddfp) 245 mflr r12 246 bl fpenable 247 stfd fr2,32(r1) 248 li r0,4 249 mtctr r0 250 li r7,0 2511: lfsx fr0,r4,r7 252 lfsx fr1,r5,r7 253 lfsx fr2,r6,r7 254 fmadds fr0,fr0,fr2,fr1 255 stfsx fr0,r3,r7 256 addi r7,r7,4 257 bdnz 1b 258 lfd fr2,32(r1) 259 b fpdisable 260 261/* 262 * Vector negative multiply and subtract, floating point. 263 */ 264_GLOBAL(vnmsubfp) 265 mflr r12 266 bl fpenable 267 stfd fr2,32(r1) 268 li r0,4 269 mtctr r0 270 li r7,0 2711: lfsx fr0,r4,r7 272 lfsx fr1,r5,r7 273 lfsx fr2,r6,r7 274 fnmsubs fr0,fr0,fr2,fr1 275 stfsx fr0,r3,r7 276 addi r7,r7,4 277 bdnz 1b 278 lfd fr2,32(r1) 279 b fpdisable 280 281/* 282 * Vector reciprocal estimate. We just compute 1.0/x. 283 * r3 -> destination, r4 -> source. 284 */ 285_GLOBAL(vrefp) 286 mflr r12 287 bl fpenable 288 li r0,4 289 LDCONST(fr1, fpone) 290 mtctr r0 291 li r6,0 2921: lfsx fr0,r4,r6 293 fdivs fr0,fr1,fr0 294 stfsx fr0,r3,r6 295 addi r6,r6,4 296 bdnz 1b 297 b fpdisable 298 299/* 300 * Vector reciprocal square-root estimate, floating point. 301 * We use the frsqrte instruction for the initial estimate followed 302 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 303 * r3 -> destination, r4 -> source. 304 */ 305_GLOBAL(vrsqrtefp) 306 mflr r12 307 bl fpenable 308 stfd fr2,32(r1) 309 stfd fr3,40(r1) 310 stfd fr4,48(r1) 311 stfd fr5,56(r1) 312 li r0,4 313 LDCONST(fr4, fpone) 314 LDCONST(fr5, fphalf) 315 mtctr r0 316 li r6,0 3171: lfsx fr0,r4,r6 318 frsqrte fr1,fr0 /* r = frsqrte(s) */ 319 fmuls fr3,fr1,fr0 /* r * s */ 320 fmuls fr2,fr1,fr5 /* r * 0.5 */ 321 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 322 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 323 fmuls fr3,fr1,fr0 /* r * s */ 324 fmuls fr2,fr1,fr5 /* r * 0.5 */ 325 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 326 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 327 stfsx fr1,r3,r6 328 addi r6,r6,4 329 bdnz 1b 330 lfd fr5,56(r1) 331 lfd fr4,48(r1) 332 lfd fr3,40(r1) 333 lfd fr2,32(r1) 334 b fpdisable 335