1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 25 26/* 27 * Store VMX state into memory, including VSCR. 28 * Assumes the caller has enabled VMX in the MSR. 29 */ 30_GLOBAL(store_vr_state) 31 SAVE_32VRS(0, r4, r3) 32 mfvscr v0 33 li r4, VRSTATE_VSCR 34 stvx v0, r4, r3 35 blr 36EXPORT_SYMBOL(store_vr_state) 37 38/* 39 * Disable VMX for the task which had it previously, 40 * and save its vector registers in its thread_struct. 41 * Enables the VMX for use in the kernel on return. 42 * On SMP we know the VMX is free, since we give it up every 43 * switch (ie, no lazy save of the vector registers). 44 * 45 * Note that on 32-bit this can only use registers that will be 46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 47 */ 48_GLOBAL(load_up_altivec) 49 mfmsr r5 /* grab the current MSR */ 50 oris r5,r5,MSR_VEC@h 51 MTMSRD(r5) /* enable use of AltiVec now */ 52 isync 53 54 /* 55 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 56 * to optimise userspace context save/restore. Whenever we take an 57 * altivec unavailable exception we must set VRSAVE to something non 58 * zero. Set it to all 1s. See also the programming note in the ISA. 59 */ 60 mfspr r4,SPRN_VRSAVE 61 cmpwi 0,r4,0 62 bne+ 1f 63 li r4,-1 64 mtspr SPRN_VRSAVE,r4 651: 66 /* enable use of VMX after return */ 67#ifdef CONFIG_PPC32 68 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 69 oris r9,r9,MSR_VEC@h 70#ifdef CONFIG_VMAP_STACK 71 tovirt(r5, r5) 72#endif 73#else 74 ld r4,PACACURRENT(r13) 75 addi r5,r4,THREAD /* Get THREAD */ 76 oris r12,r12,MSR_VEC@h 77 std r12,_MSR(r1) 78#endif 79 /* Don't care if r4 overflows, this is desired behaviour */ 80 lbz r4,THREAD_LOAD_VEC(r5) 81 addi r4,r4,1 82 stb r4,THREAD_LOAD_VEC(r5) 83 addi r6,r5,THREAD_VRSTATE 84 li r4,1 85 li r10,VRSTATE_VSCR 86 stw r4,THREAD_USED_VR(r5) 87 lvx v0,r10,r6 88 mtvscr v0 89 REST_32VRS(0,r4,r6) 90 /* restore registers and return */ 91 blr 92_ASM_NOKPROBE_SYMBOL(load_up_altivec) 93 94/* 95 * save_altivec(tsk) 96 * Save the vector registers to its thread_struct 97 */ 98_GLOBAL(save_altivec) 99 addi r3,r3,THREAD /* want THREAD of task */ 100 PPC_LL r7,THREAD_VRSAVEAREA(r3) 101 PPC_LL r5,PT_REGS(r3) 102 PPC_LCMPI 0,r7,0 103 bne 2f 104 addi r7,r3,THREAD_VRSTATE 1052: SAVE_32VRS(0,r4,r7) 106 mfvscr v0 107 li r4,VRSTATE_VSCR 108 stvx v0,r4,r7 109 blr 110 111#ifdef CONFIG_VSX 112 113#ifdef CONFIG_PPC32 114#error This asm code isn't ready for 32-bit kernels 115#endif 116 117/* 118 * load_up_vsx(unused, unused, tsk) 119 * Disable VSX for the task which had it previously, 120 * and save its vector registers in its thread_struct. 121 * Reuse the fp and vsx saves, but first check to see if they have 122 * been saved already. 123 */ 124_GLOBAL(load_up_vsx) 125/* Load FP and VSX registers if they haven't been done yet */ 126 andi. r5,r12,MSR_FP 127 beql+ load_up_fpu /* skip if already loaded */ 128 andis. r5,r12,MSR_VEC@h 129 beql+ load_up_altivec /* skip if already loaded */ 130 131 ld r4,PACACURRENT(r13) 132 addi r4,r4,THREAD /* Get THREAD */ 133 li r6,1 134 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 135 /* enable use of VSX after return */ 136 oris r12,r12,MSR_VSX@h 137 std r12,_MSR(r1) 138 b fast_interrupt_return 139 140#endif /* CONFIG_VSX */ 141 142 143/* 144 * The routines below are in assembler so we can closely control the 145 * usage of floating-point registers. These routines must be called 146 * with preempt disabled. 147 */ 148#ifdef CONFIG_PPC32 149 .data 150fpzero: 151 .long 0 152fpone: 153 .long 0x3f800000 /* 1.0 in single-precision FP */ 154fphalf: 155 .long 0x3f000000 /* 0.5 in single-precision FP */ 156 157#define LDCONST(fr, name) \ 158 lis r11,name@ha; \ 159 lfs fr,name@l(r11) 160#else 161 162 .section ".toc","aw" 163fpzero: 164 .tc FD_0_0[TC],0 165fpone: 166 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 167fphalf: 168 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 169 170#define LDCONST(fr, name) \ 171 lfd fr,name@toc(r2) 172#endif 173 174 .text 175/* 176 * Internal routine to enable floating point and set FPSCR to 0. 177 * Don't call it from C; it doesn't use the normal calling convention. 178 */ 179fpenable: 180#ifdef CONFIG_PPC32 181 stwu r1,-64(r1) 182#else 183 stdu r1,-64(r1) 184#endif 185 mfmsr r10 186 ori r11,r10,MSR_FP 187 mtmsr r11 188 isync 189 stfd fr0,24(r1) 190 stfd fr1,16(r1) 191 stfd fr31,8(r1) 192 LDCONST(fr1, fpzero) 193 mffs fr31 194 MTFSF_L(fr1) 195 blr 196 197fpdisable: 198 mtlr r12 199 MTFSF_L(fr31) 200 lfd fr31,8(r1) 201 lfd fr1,16(r1) 202 lfd fr0,24(r1) 203 mtmsr r10 204 isync 205 addi r1,r1,64 206 blr 207 208/* 209 * Vector add, floating point. 210 */ 211_GLOBAL(vaddfp) 212 mflr r12 213 bl fpenable 214 li r0,4 215 mtctr r0 216 li r6,0 2171: lfsx fr0,r4,r6 218 lfsx fr1,r5,r6 219 fadds fr0,fr0,fr1 220 stfsx fr0,r3,r6 221 addi r6,r6,4 222 bdnz 1b 223 b fpdisable 224 225/* 226 * Vector subtract, floating point. 227 */ 228_GLOBAL(vsubfp) 229 mflr r12 230 bl fpenable 231 li r0,4 232 mtctr r0 233 li r6,0 2341: lfsx fr0,r4,r6 235 lfsx fr1,r5,r6 236 fsubs fr0,fr0,fr1 237 stfsx fr0,r3,r6 238 addi r6,r6,4 239 bdnz 1b 240 b fpdisable 241 242/* 243 * Vector multiply and add, floating point. 244 */ 245_GLOBAL(vmaddfp) 246 mflr r12 247 bl fpenable 248 stfd fr2,32(r1) 249 li r0,4 250 mtctr r0 251 li r7,0 2521: lfsx fr0,r4,r7 253 lfsx fr1,r5,r7 254 lfsx fr2,r6,r7 255 fmadds fr0,fr0,fr2,fr1 256 stfsx fr0,r3,r7 257 addi r7,r7,4 258 bdnz 1b 259 lfd fr2,32(r1) 260 b fpdisable 261 262/* 263 * Vector negative multiply and subtract, floating point. 264 */ 265_GLOBAL(vnmsubfp) 266 mflr r12 267 bl fpenable 268 stfd fr2,32(r1) 269 li r0,4 270 mtctr r0 271 li r7,0 2721: lfsx fr0,r4,r7 273 lfsx fr1,r5,r7 274 lfsx fr2,r6,r7 275 fnmsubs fr0,fr0,fr2,fr1 276 stfsx fr0,r3,r7 277 addi r7,r7,4 278 bdnz 1b 279 lfd fr2,32(r1) 280 b fpdisable 281 282/* 283 * Vector reciprocal estimate. We just compute 1.0/x. 284 * r3 -> destination, r4 -> source. 285 */ 286_GLOBAL(vrefp) 287 mflr r12 288 bl fpenable 289 li r0,4 290 LDCONST(fr1, fpone) 291 mtctr r0 292 li r6,0 2931: lfsx fr0,r4,r6 294 fdivs fr0,fr1,fr0 295 stfsx fr0,r3,r6 296 addi r6,r6,4 297 bdnz 1b 298 b fpdisable 299 300/* 301 * Vector reciprocal square-root estimate, floating point. 302 * We use the frsqrte instruction for the initial estimate followed 303 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 304 * r3 -> destination, r4 -> source. 305 */ 306_GLOBAL(vrsqrtefp) 307 mflr r12 308 bl fpenable 309 stfd fr2,32(r1) 310 stfd fr3,40(r1) 311 stfd fr4,48(r1) 312 stfd fr5,56(r1) 313 li r0,4 314 LDCONST(fr4, fpone) 315 LDCONST(fr5, fphalf) 316 mtctr r0 317 li r6,0 3181: lfsx fr0,r4,r6 319 frsqrte fr1,fr0 /* r = frsqrte(s) */ 320 fmuls fr3,fr1,fr0 /* r * s */ 321 fmuls fr2,fr1,fr5 /* r * 0.5 */ 322 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 323 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 324 fmuls fr3,fr1,fr0 /* r * s */ 325 fmuls fr2,fr1,fr5 /* r * 0.5 */ 326 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 327 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 328 stfsx fr1,r3,r6 329 addi r6,r6,4 330 bdnz 1b 331 lfd fr5,56(r1) 332 lfd fr4,48(r1) 333 lfd fr3,40(r1) 334 lfd fr2,32(r1) 335 b fpdisable 336