1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8#include <asm/ptrace.h> 9 10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 11/* void do_load_up_transact_altivec(struct thread_struct *thread) 12 * 13 * This is similar to load_up_altivec but for the transactional version of the 14 * vector regs. It doesn't mess with the task MSR or valid flags. 15 * Furthermore, VEC laziness is not supported with TM currently. 16 */ 17_GLOBAL(do_load_up_transact_altivec) 18 mfmsr r6 19 oris r5,r6,MSR_VEC@h 20 MTMSRD(r5) 21 isync 22 23 li r4,1 24 stw r4,THREAD_USED_VR(r3) 25 26 li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR 27 lvx v0,r10,r3 28 mtvscr v0 29 addi r10,r3,THREAD_TRANSACT_VRSTATE 30 REST_32VRS(0,r4,r10) 31 32 blr 33#endif 34 35/* 36 * Load state from memory into VMX registers including VSCR. 37 * Assumes the caller has enabled VMX in the MSR. 38 */ 39_GLOBAL(load_vr_state) 40 li r4,VRSTATE_VSCR 41 lvx v0,r4,r3 42 mtvscr v0 43 REST_32VRS(0,r4,r3) 44 blr 45 46/* 47 * Store VMX state into memory, including VSCR. 48 * Assumes the caller has enabled VMX in the MSR. 49 */ 50_GLOBAL(store_vr_state) 51 SAVE_32VRS(0, r4, r3) 52 mfvscr v0 53 li r4, VRSTATE_VSCR 54 stvx v0, r4, r3 55 blr 56 57/* 58 * Disable VMX for the task which had it previously, 59 * and save its vector registers in its thread_struct. 60 * Enables the VMX for use in the kernel on return. 61 * On SMP we know the VMX is free, since we give it up every 62 * switch (ie, no lazy save of the vector registers). 63 * 64 * Note that on 32-bit this can only use registers that will be 65 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 66 */ 67_GLOBAL(load_up_altivec) 68 mfmsr r5 /* grab the current MSR */ 69 oris r5,r5,MSR_VEC@h 70 MTMSRD(r5) /* enable use of AltiVec now */ 71 isync 72 73 /* Hack: if we get an altivec unavailable trap with VRSAVE 74 * set to all zeros, we assume this is a broken application 75 * that fails to set it properly, and thus we switch it to 76 * all 1's 77 */ 78 mfspr r4,SPRN_VRSAVE 79 cmpwi 0,r4,0 80 bne+ 1f 81 li r4,-1 82 mtspr SPRN_VRSAVE,r4 831: 84 /* enable use of VMX after return */ 85#ifdef CONFIG_PPC32 86 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 87 oris r9,r9,MSR_VEC@h 88#else 89 ld r4,PACACURRENT(r13) 90 addi r5,r4,THREAD /* Get THREAD */ 91 oris r12,r12,MSR_VEC@h 92 std r12,_MSR(r1) 93#endif 94 /* Don't care if r4 overflows, this is desired behaviour */ 95 lbz r4,THREAD_LOAD_VEC(r5) 96 addi r4,r4,1 97 stb r4,THREAD_LOAD_VEC(r5) 98 addi r6,r5,THREAD_VRSTATE 99 li r4,1 100 li r10,VRSTATE_VSCR 101 stw r4,THREAD_USED_VR(r5) 102 lvx v0,r10,r6 103 mtvscr v0 104 REST_32VRS(0,r4,r6) 105 /* restore registers and return */ 106 blr 107 108/* 109 * save_altivec(tsk) 110 * Save the vector registers to its thread_struct 111 */ 112_GLOBAL(save_altivec) 113 addi r3,r3,THREAD /* want THREAD of task */ 114 PPC_LL r7,THREAD_VRSAVEAREA(r3) 115 PPC_LL r5,PT_REGS(r3) 116 PPC_LCMPI 0,r7,0 117 bne 2f 118 addi r7,r3,THREAD_VRSTATE 1192: SAVE_32VRS(0,r4,r7) 120 mfvscr v0 121 li r4,VRSTATE_VSCR 122 stvx v0,r4,r7 123 blr 124 125#ifdef CONFIG_VSX 126 127#ifdef CONFIG_PPC32 128#error This asm code isn't ready for 32-bit kernels 129#endif 130 131/* 132 * load_up_vsx(unused, unused, tsk) 133 * Disable VSX for the task which had it previously, 134 * and save its vector registers in its thread_struct. 135 * Reuse the fp and vsx saves, but first check to see if they have 136 * been saved already. 137 */ 138_GLOBAL(load_up_vsx) 139/* Load FP and VSX registers if they haven't been done yet */ 140 andi. r5,r12,MSR_FP 141 beql+ load_up_fpu /* skip if already loaded */ 142 andis. r5,r12,MSR_VEC@h 143 beql+ load_up_altivec /* skip if already loaded */ 144 145 ld r4,PACACURRENT(r13) 146 addi r4,r4,THREAD /* Get THREAD */ 147 li r6,1 148 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 149 /* enable use of VSX after return */ 150 oris r12,r12,MSR_VSX@h 151 std r12,_MSR(r1) 152 b fast_exception_return 153 154#endif /* CONFIG_VSX */ 155 156 157/* 158 * The routines below are in assembler so we can closely control the 159 * usage of floating-point registers. These routines must be called 160 * with preempt disabled. 161 */ 162#ifdef CONFIG_PPC32 163 .data 164fpzero: 165 .long 0 166fpone: 167 .long 0x3f800000 /* 1.0 in single-precision FP */ 168fphalf: 169 .long 0x3f000000 /* 0.5 in single-precision FP */ 170 171#define LDCONST(fr, name) \ 172 lis r11,name@ha; \ 173 lfs fr,name@l(r11) 174#else 175 176 .section ".toc","aw" 177fpzero: 178 .tc FD_0_0[TC],0 179fpone: 180 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 181fphalf: 182 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 183 184#define LDCONST(fr, name) \ 185 lfd fr,name@toc(r2) 186#endif 187 188 .text 189/* 190 * Internal routine to enable floating point and set FPSCR to 0. 191 * Don't call it from C; it doesn't use the normal calling convention. 192 */ 193fpenable: 194#ifdef CONFIG_PPC32 195 stwu r1,-64(r1) 196#else 197 stdu r1,-64(r1) 198#endif 199 mfmsr r10 200 ori r11,r10,MSR_FP 201 mtmsr r11 202 isync 203 stfd fr0,24(r1) 204 stfd fr1,16(r1) 205 stfd fr31,8(r1) 206 LDCONST(fr1, fpzero) 207 mffs fr31 208 MTFSF_L(fr1) 209 blr 210 211fpdisable: 212 mtlr r12 213 MTFSF_L(fr31) 214 lfd fr31,8(r1) 215 lfd fr1,16(r1) 216 lfd fr0,24(r1) 217 mtmsr r10 218 isync 219 addi r1,r1,64 220 blr 221 222/* 223 * Vector add, floating point. 224 */ 225_GLOBAL(vaddfp) 226 mflr r12 227 bl fpenable 228 li r0,4 229 mtctr r0 230 li r6,0 2311: lfsx fr0,r4,r6 232 lfsx fr1,r5,r6 233 fadds fr0,fr0,fr1 234 stfsx fr0,r3,r6 235 addi r6,r6,4 236 bdnz 1b 237 b fpdisable 238 239/* 240 * Vector subtract, floating point. 241 */ 242_GLOBAL(vsubfp) 243 mflr r12 244 bl fpenable 245 li r0,4 246 mtctr r0 247 li r6,0 2481: lfsx fr0,r4,r6 249 lfsx fr1,r5,r6 250 fsubs fr0,fr0,fr1 251 stfsx fr0,r3,r6 252 addi r6,r6,4 253 bdnz 1b 254 b fpdisable 255 256/* 257 * Vector multiply and add, floating point. 258 */ 259_GLOBAL(vmaddfp) 260 mflr r12 261 bl fpenable 262 stfd fr2,32(r1) 263 li r0,4 264 mtctr r0 265 li r7,0 2661: lfsx fr0,r4,r7 267 lfsx fr1,r5,r7 268 lfsx fr2,r6,r7 269 fmadds fr0,fr0,fr2,fr1 270 stfsx fr0,r3,r7 271 addi r7,r7,4 272 bdnz 1b 273 lfd fr2,32(r1) 274 b fpdisable 275 276/* 277 * Vector negative multiply and subtract, floating point. 278 */ 279_GLOBAL(vnmsubfp) 280 mflr r12 281 bl fpenable 282 stfd fr2,32(r1) 283 li r0,4 284 mtctr r0 285 li r7,0 2861: lfsx fr0,r4,r7 287 lfsx fr1,r5,r7 288 lfsx fr2,r6,r7 289 fnmsubs fr0,fr0,fr2,fr1 290 stfsx fr0,r3,r7 291 addi r7,r7,4 292 bdnz 1b 293 lfd fr2,32(r1) 294 b fpdisable 295 296/* 297 * Vector reciprocal estimate. We just compute 1.0/x. 298 * r3 -> destination, r4 -> source. 299 */ 300_GLOBAL(vrefp) 301 mflr r12 302 bl fpenable 303 li r0,4 304 LDCONST(fr1, fpone) 305 mtctr r0 306 li r6,0 3071: lfsx fr0,r4,r6 308 fdivs fr0,fr1,fr0 309 stfsx fr0,r3,r6 310 addi r6,r6,4 311 bdnz 1b 312 b fpdisable 313 314/* 315 * Vector reciprocal square-root estimate, floating point. 316 * We use the frsqrte instruction for the initial estimate followed 317 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 318 * r3 -> destination, r4 -> source. 319 */ 320_GLOBAL(vrsqrtefp) 321 mflr r12 322 bl fpenable 323 stfd fr2,32(r1) 324 stfd fr3,40(r1) 325 stfd fr4,48(r1) 326 stfd fr5,56(r1) 327 li r0,4 328 LDCONST(fr4, fpone) 329 LDCONST(fr5, fphalf) 330 mtctr r0 331 li r6,0 3321: lfsx fr0,r4,r6 333 frsqrte fr1,fr0 /* r = frsqrte(s) */ 334 fmuls fr3,fr1,fr0 /* r * s */ 335 fmuls fr2,fr1,fr5 /* r * 0.5 */ 336 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 337 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 338 fmuls fr3,fr1,fr0 /* r * s */ 339 fmuls fr2,fr1,fr5 /* r * 0.5 */ 340 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 341 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 342 stfsx fr1,r3,r6 343 addi r6,r6,4 344 bdnz 1b 345 lfd fr5,56(r1) 346 lfd fr4,48(r1) 347 lfd fr3,40(r1) 348 lfd fr2,32(r1) 349 b fpdisable 350