1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24 25/* 26 * Store VMX state into memory, including VSCR. 27 * Assumes the caller has enabled VMX in the MSR. 28 */ 29_GLOBAL(store_vr_state) 30 SAVE_32VRS(0, r4, r3) 31 mfvscr v0 32 li r4, VRSTATE_VSCR 33 stvx v0, r4, r3 34 blr 35EXPORT_SYMBOL(store_vr_state) 36 37/* 38 * Disable VMX for the task which had it previously, 39 * and save its vector registers in its thread_struct. 40 * Enables the VMX for use in the kernel on return. 41 * On SMP we know the VMX is free, since we give it up every 42 * switch (ie, no lazy save of the vector registers). 43 * 44 * Note that on 32-bit this can only use registers that will be 45 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 46 */ 47_GLOBAL(load_up_altivec) 48 mfmsr r5 /* grab the current MSR */ 49 oris r5,r5,MSR_VEC@h 50 MTMSRD(r5) /* enable use of AltiVec now */ 51 isync 52 53 /* 54 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 55 * to optimise userspace context save/restore. Whenever we take an 56 * altivec unavailable exception we must set VRSAVE to something non 57 * zero. Set it to all 1s. See also the programming note in the ISA. 58 */ 59 mfspr r4,SPRN_VRSAVE 60 cmpwi 0,r4,0 61 bne+ 1f 62 li r4,-1 63 mtspr SPRN_VRSAVE,r4 641: 65 /* enable use of VMX after return */ 66#ifdef CONFIG_PPC32 67 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 68 oris r9,r9,MSR_VEC@h 69#else 70 ld r4,PACACURRENT(r13) 71 addi r5,r4,THREAD /* Get THREAD */ 72 oris r12,r12,MSR_VEC@h 73 std r12,_MSR(r1) 74#endif 75 /* Don't care if r4 overflows, this is desired behaviour */ 76 lbz r4,THREAD_LOAD_VEC(r5) 77 addi r4,r4,1 78 stb r4,THREAD_LOAD_VEC(r5) 79 addi r6,r5,THREAD_VRSTATE 80 li r4,1 81 li r10,VRSTATE_VSCR 82 stw r4,THREAD_USED_VR(r5) 83 lvx v0,r10,r6 84 mtvscr v0 85 REST_32VRS(0,r4,r6) 86 /* restore registers and return */ 87 blr 88 89/* 90 * save_altivec(tsk) 91 * Save the vector registers to its thread_struct 92 */ 93_GLOBAL(save_altivec) 94 addi r3,r3,THREAD /* want THREAD of task */ 95 PPC_LL r7,THREAD_VRSAVEAREA(r3) 96 PPC_LL r5,PT_REGS(r3) 97 PPC_LCMPI 0,r7,0 98 bne 2f 99 addi r7,r3,THREAD_VRSTATE 1002: SAVE_32VRS(0,r4,r7) 101 mfvscr v0 102 li r4,VRSTATE_VSCR 103 stvx v0,r4,r7 104 blr 105 106#ifdef CONFIG_VSX 107 108#ifdef CONFIG_PPC32 109#error This asm code isn't ready for 32-bit kernels 110#endif 111 112/* 113 * load_up_vsx(unused, unused, tsk) 114 * Disable VSX for the task which had it previously, 115 * and save its vector registers in its thread_struct. 116 * Reuse the fp and vsx saves, but first check to see if they have 117 * been saved already. 118 */ 119_GLOBAL(load_up_vsx) 120/* Load FP and VSX registers if they haven't been done yet */ 121 andi. r5,r12,MSR_FP 122 beql+ load_up_fpu /* skip if already loaded */ 123 andis. r5,r12,MSR_VEC@h 124 beql+ load_up_altivec /* skip if already loaded */ 125 126 ld r4,PACACURRENT(r13) 127 addi r4,r4,THREAD /* Get THREAD */ 128 li r6,1 129 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 130 /* enable use of VSX after return */ 131 oris r12,r12,MSR_VSX@h 132 std r12,_MSR(r1) 133 b fast_exception_return 134 135#endif /* CONFIG_VSX */ 136 137 138/* 139 * The routines below are in assembler so we can closely control the 140 * usage of floating-point registers. These routines must be called 141 * with preempt disabled. 142 */ 143#ifdef CONFIG_PPC32 144 .data 145fpzero: 146 .long 0 147fpone: 148 .long 0x3f800000 /* 1.0 in single-precision FP */ 149fphalf: 150 .long 0x3f000000 /* 0.5 in single-precision FP */ 151 152#define LDCONST(fr, name) \ 153 lis r11,name@ha; \ 154 lfs fr,name@l(r11) 155#else 156 157 .section ".toc","aw" 158fpzero: 159 .tc FD_0_0[TC],0 160fpone: 161 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 162fphalf: 163 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 164 165#define LDCONST(fr, name) \ 166 lfd fr,name@toc(r2) 167#endif 168 169 .text 170/* 171 * Internal routine to enable floating point and set FPSCR to 0. 172 * Don't call it from C; it doesn't use the normal calling convention. 173 */ 174fpenable: 175#ifdef CONFIG_PPC32 176 stwu r1,-64(r1) 177#else 178 stdu r1,-64(r1) 179#endif 180 mfmsr r10 181 ori r11,r10,MSR_FP 182 mtmsr r11 183 isync 184 stfd fr0,24(r1) 185 stfd fr1,16(r1) 186 stfd fr31,8(r1) 187 LDCONST(fr1, fpzero) 188 mffs fr31 189 MTFSF_L(fr1) 190 blr 191 192fpdisable: 193 mtlr r12 194 MTFSF_L(fr31) 195 lfd fr31,8(r1) 196 lfd fr1,16(r1) 197 lfd fr0,24(r1) 198 mtmsr r10 199 isync 200 addi r1,r1,64 201 blr 202 203/* 204 * Vector add, floating point. 205 */ 206_GLOBAL(vaddfp) 207 mflr r12 208 bl fpenable 209 li r0,4 210 mtctr r0 211 li r6,0 2121: lfsx fr0,r4,r6 213 lfsx fr1,r5,r6 214 fadds fr0,fr0,fr1 215 stfsx fr0,r3,r6 216 addi r6,r6,4 217 bdnz 1b 218 b fpdisable 219 220/* 221 * Vector subtract, floating point. 222 */ 223_GLOBAL(vsubfp) 224 mflr r12 225 bl fpenable 226 li r0,4 227 mtctr r0 228 li r6,0 2291: lfsx fr0,r4,r6 230 lfsx fr1,r5,r6 231 fsubs fr0,fr0,fr1 232 stfsx fr0,r3,r6 233 addi r6,r6,4 234 bdnz 1b 235 b fpdisable 236 237/* 238 * Vector multiply and add, floating point. 239 */ 240_GLOBAL(vmaddfp) 241 mflr r12 242 bl fpenable 243 stfd fr2,32(r1) 244 li r0,4 245 mtctr r0 246 li r7,0 2471: lfsx fr0,r4,r7 248 lfsx fr1,r5,r7 249 lfsx fr2,r6,r7 250 fmadds fr0,fr0,fr2,fr1 251 stfsx fr0,r3,r7 252 addi r7,r7,4 253 bdnz 1b 254 lfd fr2,32(r1) 255 b fpdisable 256 257/* 258 * Vector negative multiply and subtract, floating point. 259 */ 260_GLOBAL(vnmsubfp) 261 mflr r12 262 bl fpenable 263 stfd fr2,32(r1) 264 li r0,4 265 mtctr r0 266 li r7,0 2671: lfsx fr0,r4,r7 268 lfsx fr1,r5,r7 269 lfsx fr2,r6,r7 270 fnmsubs fr0,fr0,fr2,fr1 271 stfsx fr0,r3,r7 272 addi r7,r7,4 273 bdnz 1b 274 lfd fr2,32(r1) 275 b fpdisable 276 277/* 278 * Vector reciprocal estimate. We just compute 1.0/x. 279 * r3 -> destination, r4 -> source. 280 */ 281_GLOBAL(vrefp) 282 mflr r12 283 bl fpenable 284 li r0,4 285 LDCONST(fr1, fpone) 286 mtctr r0 287 li r6,0 2881: lfsx fr0,r4,r6 289 fdivs fr0,fr1,fr0 290 stfsx fr0,r3,r6 291 addi r6,r6,4 292 bdnz 1b 293 b fpdisable 294 295/* 296 * Vector reciprocal square-root estimate, floating point. 297 * We use the frsqrte instruction for the initial estimate followed 298 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 299 * r3 -> destination, r4 -> source. 300 */ 301_GLOBAL(vrsqrtefp) 302 mflr r12 303 bl fpenable 304 stfd fr2,32(r1) 305 stfd fr3,40(r1) 306 stfd fr4,48(r1) 307 stfd fr5,56(r1) 308 li r0,4 309 LDCONST(fr4, fpone) 310 LDCONST(fr5, fphalf) 311 mtctr r0 312 li r6,0 3131: lfsx fr0,r4,r6 314 frsqrte fr1,fr0 /* r = frsqrte(s) */ 315 fmuls fr3,fr1,fr0 /* r * s */ 316 fmuls fr2,fr1,fr5 /* r * 0.5 */ 317 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 318 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 319 fmuls fr3,fr1,fr0 /* r * s */ 320 fmuls fr2,fr1,fr5 /* r * 0.5 */ 321 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 322 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 323 stfsx fr1,r3,r6 324 addi r6,r6,4 325 bdnz 1b 326 lfd fr5,56(r1) 327 lfd fr4,48(r1) 328 lfd fr3,40(r1) 329 lfd fr2,32(r1) 330 b fpdisable 331