1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <linux/linkage.h> 3#include <asm/processor.h> 4#include <asm/ppc_asm.h> 5#include <asm/reg.h> 6#include <asm/asm-offsets.h> 7#include <asm/cputable.h> 8#include <asm/thread_info.h> 9#include <asm/page.h> 10#include <asm/ptrace.h> 11#include <asm/export.h> 12#include <asm/asm-compat.h> 13 14/* 15 * Load state from memory into VMX registers including VSCR. 16 * Assumes the caller has enabled VMX in the MSR. 17 */ 18_GLOBAL(load_vr_state) 19 li r4,VRSTATE_VSCR 20 lvx v0,r4,r3 21 mtvscr v0 22 REST_32VRS(0,r4,r3) 23 blr 24EXPORT_SYMBOL(load_vr_state) 25_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 26 27/* 28 * Store VMX state into memory, including VSCR. 29 * Assumes the caller has enabled VMX in the MSR. 30 */ 31_GLOBAL(store_vr_state) 32 SAVE_32VRS(0, r4, r3) 33 mfvscr v0 34 li r4, VRSTATE_VSCR 35 stvx v0, r4, r3 36 blr 37EXPORT_SYMBOL(store_vr_state) 38 39/* 40 * Disable VMX for the task which had it previously, 41 * and save its vector registers in its thread_struct. 42 * Enables the VMX for use in the kernel on return. 43 * On SMP we know the VMX is free, since we give it up every 44 * switch (ie, no lazy save of the vector registers). 45 * 46 * Note that on 32-bit this can only use registers that will be 47 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 48 */ 49_GLOBAL(load_up_altivec) 50 mfmsr r5 /* grab the current MSR */ 51#ifdef CONFIG_PPC_BOOK3S_64 52 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 53 ori r5,r5,MSR_RI 54#endif 55 oris r5,r5,MSR_VEC@h 56 MTMSRD(r5) /* enable use of AltiVec now */ 57 isync 58 59 /* 60 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 61 * to optimise userspace context save/restore. Whenever we take an 62 * altivec unavailable exception we must set VRSAVE to something non 63 * zero. Set it to all 1s. See also the programming note in the ISA. 64 */ 65 mfspr r4,SPRN_VRSAVE 66 cmpwi 0,r4,0 67 bne+ 1f 68 li r4,-1 69 mtspr SPRN_VRSAVE,r4 701: 71 /* enable use of VMX after return */ 72#ifdef CONFIG_PPC32 73 addi r5,r2,THREAD 74 oris r9,r9,MSR_VEC@h 75#else 76 ld r4,PACACURRENT(r13) 77 addi r5,r4,THREAD /* Get THREAD */ 78 oris r12,r12,MSR_VEC@h 79 std r12,_MSR(r1) 80#ifdef CONFIG_PPC_BOOK3S_64 81 li r4,0 82 stb r4,PACASRR_VALID(r13) 83#endif 84#endif 85 li r4,1 86 stb r4,THREAD_LOAD_VEC(r5) 87 addi r6,r5,THREAD_VRSTATE 88 li r10,VRSTATE_VSCR 89 stw r4,THREAD_USED_VR(r5) 90 lvx v0,r10,r6 91 mtvscr v0 92 REST_32VRS(0,r4,r6) 93 /* restore registers and return */ 94 blr 95_ASM_NOKPROBE_SYMBOL(load_up_altivec) 96 97/* 98 * save_altivec(tsk) 99 * Save the vector registers to its thread_struct 100 */ 101_GLOBAL(save_altivec) 102 addi r3,r3,THREAD /* want THREAD of task */ 103 PPC_LL r7,THREAD_VRSAVEAREA(r3) 104 PPC_LL r5,PT_REGS(r3) 105 PPC_LCMPI 0,r7,0 106 bne 2f 107 addi r7,r3,THREAD_VRSTATE 1082: SAVE_32VRS(0,r4,r7) 109 mfvscr v0 110 li r4,VRSTATE_VSCR 111 stvx v0,r4,r7 112 blr 113 114#ifdef CONFIG_VSX 115 116#ifdef CONFIG_PPC32 117#error This asm code isn't ready for 32-bit kernels 118#endif 119 120/* 121 * load_up_vsx(unused, unused, tsk) 122 * Disable VSX for the task which had it previously, 123 * and save its vector registers in its thread_struct. 124 * Reuse the fp and vsx saves, but first check to see if they have 125 * been saved already. 126 */ 127_GLOBAL(load_up_vsx) 128/* Load FP and VSX registers if they haven't been done yet */ 129 andi. r5,r12,MSR_FP 130 beql+ load_up_fpu /* skip if already loaded */ 131 andis. r5,r12,MSR_VEC@h 132 beql+ load_up_altivec /* skip if already loaded */ 133 134#ifdef CONFIG_PPC_BOOK3S_64 135 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 136 li r5,MSR_RI 137 mtmsrd r5,1 138#endif 139 140 ld r4,PACACURRENT(r13) 141 addi r4,r4,THREAD /* Get THREAD */ 142 li r6,1 143 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 144 /* enable use of VSX after return */ 145 oris r12,r12,MSR_VSX@h 146 std r12,_MSR(r1) 147 li r4,0 148 stb r4,PACASRR_VALID(r13) 149 b fast_interrupt_return_srr 150 151#endif /* CONFIG_VSX */ 152 153 154/* 155 * The routines below are in assembler so we can closely control the 156 * usage of floating-point registers. These routines must be called 157 * with preempt disabled. 158 */ 159 .data 160#ifdef CONFIG_PPC32 161fpzero: 162 .long 0 163fpone: 164 .long 0x3f800000 /* 1.0 in single-precision FP */ 165fphalf: 166 .long 0x3f000000 /* 0.5 in single-precision FP */ 167 168#define LDCONST(fr, name) \ 169 lis r11,name@ha; \ 170 lfs fr,name@l(r11) 171#else 172 173fpzero: 174 .quad 0 175fpone: 176 .quad 0x3ff0000000000000 /* 1.0 */ 177fphalf: 178 .quad 0x3fe0000000000000 /* 0.5 */ 179 180#ifdef CONFIG_PPC_KERNEL_PCREL 181#define LDCONST(fr, name) \ 182 pla r11,name@pcrel; \ 183 lfd fr,0(r11) 184#else 185#define LDCONST(fr, name) \ 186 addis r11,r2,name@toc@ha; \ 187 lfd fr,name@toc@l(r11) 188#endif 189#endif 190 .text 191/* 192 * Internal routine to enable floating point and set FPSCR to 0. 193 * Don't call it from C; it doesn't use the normal calling convention. 194 */ 195SYM_FUNC_START_LOCAL(fpenable) 196#ifdef CONFIG_PPC32 197 stwu r1,-64(r1) 198#else 199 stdu r1,-64(r1) 200#endif 201 mfmsr r10 202 ori r11,r10,MSR_FP 203 mtmsr r11 204 isync 205 stfd fr0,24(r1) 206 stfd fr1,16(r1) 207 stfd fr31,8(r1) 208 LDCONST(fr1, fpzero) 209 mffs fr31 210 MTFSF_L(fr1) 211 blr 212SYM_FUNC_END(fpenable) 213 214fpdisable: 215 mtlr r12 216 MTFSF_L(fr31) 217 lfd fr31,8(r1) 218 lfd fr1,16(r1) 219 lfd fr0,24(r1) 220 mtmsr r10 221 isync 222 addi r1,r1,64 223 blr 224 225/* 226 * Vector add, floating point. 227 */ 228_GLOBAL(vaddfp) 229 mflr r12 230 bl fpenable 231 li r0,4 232 mtctr r0 233 li r6,0 2341: lfsx fr0,r4,r6 235 lfsx fr1,r5,r6 236 fadds fr0,fr0,fr1 237 stfsx fr0,r3,r6 238 addi r6,r6,4 239 bdnz 1b 240 b fpdisable 241 242/* 243 * Vector subtract, floating point. 244 */ 245_GLOBAL(vsubfp) 246 mflr r12 247 bl fpenable 248 li r0,4 249 mtctr r0 250 li r6,0 2511: lfsx fr0,r4,r6 252 lfsx fr1,r5,r6 253 fsubs fr0,fr0,fr1 254 stfsx fr0,r3,r6 255 addi r6,r6,4 256 bdnz 1b 257 b fpdisable 258 259/* 260 * Vector multiply and add, floating point. 261 */ 262_GLOBAL(vmaddfp) 263 mflr r12 264 bl fpenable 265 stfd fr2,32(r1) 266 li r0,4 267 mtctr r0 268 li r7,0 2691: lfsx fr0,r4,r7 270 lfsx fr1,r5,r7 271 lfsx fr2,r6,r7 272 fmadds fr0,fr0,fr2,fr1 273 stfsx fr0,r3,r7 274 addi r7,r7,4 275 bdnz 1b 276 lfd fr2,32(r1) 277 b fpdisable 278 279/* 280 * Vector negative multiply and subtract, floating point. 281 */ 282_GLOBAL(vnmsubfp) 283 mflr r12 284 bl fpenable 285 stfd fr2,32(r1) 286 li r0,4 287 mtctr r0 288 li r7,0 2891: lfsx fr0,r4,r7 290 lfsx fr1,r5,r7 291 lfsx fr2,r6,r7 292 fnmsubs fr0,fr0,fr2,fr1 293 stfsx fr0,r3,r7 294 addi r7,r7,4 295 bdnz 1b 296 lfd fr2,32(r1) 297 b fpdisable 298 299/* 300 * Vector reciprocal estimate. We just compute 1.0/x. 301 * r3 -> destination, r4 -> source. 302 */ 303_GLOBAL(vrefp) 304 mflr r12 305 bl fpenable 306 li r0,4 307 LDCONST(fr1, fpone) 308 mtctr r0 309 li r6,0 3101: lfsx fr0,r4,r6 311 fdivs fr0,fr1,fr0 312 stfsx fr0,r3,r6 313 addi r6,r6,4 314 bdnz 1b 315 b fpdisable 316 317/* 318 * Vector reciprocal square-root estimate, floating point. 319 * We use the frsqrte instruction for the initial estimate followed 320 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 321 * r3 -> destination, r4 -> source. 322 */ 323_GLOBAL(vrsqrtefp) 324 mflr r12 325 bl fpenable 326 stfd fr2,32(r1) 327 stfd fr3,40(r1) 328 stfd fr4,48(r1) 329 stfd fr5,56(r1) 330 li r0,4 331 LDCONST(fr4, fpone) 332 LDCONST(fr5, fphalf) 333 mtctr r0 334 li r6,0 3351: lfsx fr0,r4,r6 336 frsqrte fr1,fr0 /* r = frsqrte(s) */ 337 fmuls fr3,fr1,fr0 /* r * s */ 338 fmuls fr2,fr1,fr5 /* r * 0.5 */ 339 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 340 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 341 fmuls fr3,fr1,fr0 /* r * s */ 342 fmuls fr2,fr1,fr5 /* r * 0.5 */ 343 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 344 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 345 stfsx fr1,r3,r6 346 addi r6,r6,4 347 bdnz 1b 348 lfd fr5,56(r1) 349 lfd fr4,48(r1) 350 lfd fr3,40(r1) 351 lfd fr2,32(r1) 352 b fpdisable 353