1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8#include <asm/ptrace.h> 9 10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 11/* void do_load_up_transact_altivec(struct thread_struct *thread) 12 * 13 * This is similar to load_up_altivec but for the transactional version of the 14 * vector regs. It doesn't mess with the task MSR or valid flags. 15 * Furthermore, VEC laziness is not supported with TM currently. 16 */ 17_GLOBAL(do_load_up_transact_altivec) 18 mfmsr r6 19 oris r5,r6,MSR_VEC@h 20 MTMSRD(r5) 21 isync 22 23 li r4,1 24 stw r4,THREAD_USED_VR(r3) 25 26 li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR 27 lvx vr0,r10,r3 28 mtvscr vr0 29 addi r10,r3,THREAD_TRANSACT_VRSTATE 30 REST_32VRS(0,r4,r10) 31 32 /* Disable VEC again. */ 33 MTMSRD(r6) 34 isync 35 36 blr 37#endif 38 39/* 40 * Load state from memory into VMX registers including VSCR. 41 * Assumes the caller has enabled VMX in the MSR. 42 */ 43_GLOBAL(load_vr_state) 44 li r4,VRSTATE_VSCR 45 lvx vr0,r4,r3 46 mtvscr vr0 47 REST_32VRS(0,r4,r3) 48 blr 49 50/* 51 * Store VMX state into memory, including VSCR. 52 * Assumes the caller has enabled VMX in the MSR. 53 */ 54_GLOBAL(store_vr_state) 55 SAVE_32VRS(0, r4, r3) 56 mfvscr vr0 57 li r4, VRSTATE_VSCR 58 stvx vr0, r4, r3 59 blr 60 61/* 62 * Disable VMX for the task which had it previously, 63 * and save its vector registers in its thread_struct. 64 * Enables the VMX for use in the kernel on return. 65 * On SMP we know the VMX is free, since we give it up every 66 * switch (ie, no lazy save of the vector registers). 67 * 68 * Note that on 32-bit this can only use registers that will be 69 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 70 */ 71_GLOBAL(load_up_altivec) 72 mfmsr r5 /* grab the current MSR */ 73 oris r5,r5,MSR_VEC@h 74 MTMSRD(r5) /* enable use of AltiVec now */ 75 isync 76 77/* 78 * For SMP, we don't do lazy VMX switching because it just gets too 79 * horrendously complex, especially when a task switches from one CPU 80 * to another. Instead we call giveup_altvec in switch_to. 81 * VRSAVE isn't dealt with here, that is done in the normal context 82 * switch code. Note that we could rely on vrsave value to eventually 83 * avoid saving all of the VREGs here... 84 */ 85#ifndef CONFIG_SMP 86 LOAD_REG_ADDRBASE(r3, last_task_used_altivec) 87 toreal(r3) 88 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) 89 PPC_LCMPI 0,r4,0 90 beq 1f 91 92 /* Save VMX state to last_task_used_altivec's THREAD struct */ 93 toreal(r4) 94 addi r4,r4,THREAD 95 addi r6,r4,THREAD_VRSTATE 96 SAVE_32VRS(0,r5,r6) 97 mfvscr vr0 98 li r10,VRSTATE_VSCR 99 stvx vr0,r10,r6 100 /* Disable VMX for last_task_used_altivec */ 101 PPC_LL r5,PT_REGS(r4) 102 toreal(r5) 103 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 104 lis r10,MSR_VEC@h 105 andc r4,r4,r10 106 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1071: 108#endif /* CONFIG_SMP */ 109 110 /* Hack: if we get an altivec unavailable trap with VRSAVE 111 * set to all zeros, we assume this is a broken application 112 * that fails to set it properly, and thus we switch it to 113 * all 1's 114 */ 115 mfspr r4,SPRN_VRSAVE 116 cmpwi 0,r4,0 117 bne+ 1f 118 li r4,-1 119 mtspr SPRN_VRSAVE,r4 1201: 121 /* enable use of VMX after return */ 122#ifdef CONFIG_PPC32 123 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 124 oris r9,r9,MSR_VEC@h 125#else 126 ld r4,PACACURRENT(r13) 127 addi r5,r4,THREAD /* Get THREAD */ 128 oris r12,r12,MSR_VEC@h 129 std r12,_MSR(r1) 130#endif 131 addi r6,r5,THREAD_VRSTATE 132 li r4,1 133 li r10,VRSTATE_VSCR 134 stw r4,THREAD_USED_VR(r5) 135 lvx vr0,r10,r6 136 mtvscr vr0 137 REST_32VRS(0,r4,r6) 138#ifndef CONFIG_SMP 139 /* Update last_task_used_altivec to 'current' */ 140 subi r4,r5,THREAD /* Back to 'current' */ 141 fromreal(r4) 142 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) 143#endif /* CONFIG_SMP */ 144 /* restore registers and return */ 145 blr 146 147_GLOBAL(giveup_altivec_notask) 148 mfmsr r3 149 andis. r4,r3,MSR_VEC@h 150 bnelr /* Already enabled? */ 151 oris r3,r3,MSR_VEC@h 152 SYNC 153 MTMSRD(r3) /* enable use of VMX now */ 154 isync 155 blr 156 157/* 158 * giveup_altivec(tsk) 159 * Disable VMX for the task given as the argument, 160 * and save the vector registers in its thread_struct. 161 * Enables the VMX for use in the kernel on return. 162 */ 163_GLOBAL(giveup_altivec) 164 mfmsr r5 165 oris r5,r5,MSR_VEC@h 166 SYNC 167 MTMSRD(r5) /* enable use of VMX now */ 168 isync 169 PPC_LCMPI 0,r3,0 170 beqlr /* if no previous owner, done */ 171 addi r3,r3,THREAD /* want THREAD of task */ 172 PPC_LL r7,THREAD_VRSAVEAREA(r3) 173 PPC_LL r5,PT_REGS(r3) 174 PPC_LCMPI 0,r7,0 175 bne 2f 176 addi r7,r3,THREAD_VRSTATE 1772: PPC_LCMPI 0,r5,0 178 SAVE_32VRS(0,r4,r7) 179 mfvscr vr0 180 li r4,VRSTATE_VSCR 181 stvx vr0,r4,r7 182 beq 1f 183 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 184#ifdef CONFIG_VSX 185BEGIN_FTR_SECTION 186 lis r3,(MSR_VEC|MSR_VSX)@h 187FTR_SECTION_ELSE 188 lis r3,MSR_VEC@h 189ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) 190#else 191 lis r3,MSR_VEC@h 192#endif 193 andc r4,r4,r3 /* disable FP for previous task */ 194 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1951: 196#ifndef CONFIG_SMP 197 li r5,0 198 LOAD_REG_ADDRBASE(r4,last_task_used_altivec) 199 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) 200#endif /* CONFIG_SMP */ 201 blr 202 203#ifdef CONFIG_VSX 204 205#ifdef CONFIG_PPC32 206#error This asm code isn't ready for 32-bit kernels 207#endif 208 209/* 210 * load_up_vsx(unused, unused, tsk) 211 * Disable VSX for the task which had it previously, 212 * and save its vector registers in its thread_struct. 213 * Reuse the fp and vsx saves, but first check to see if they have 214 * been saved already. 215 */ 216_GLOBAL(load_up_vsx) 217/* Load FP and VSX registers if they haven't been done yet */ 218 andi. r5,r12,MSR_FP 219 beql+ load_up_fpu /* skip if already loaded */ 220 andis. r5,r12,MSR_VEC@h 221 beql+ load_up_altivec /* skip if already loaded */ 222 223#ifndef CONFIG_SMP 224 ld r3,last_task_used_vsx@got(r2) 225 ld r4,0(r3) 226 cmpdi 0,r4,0 227 beq 1f 228 /* Disable VSX for last_task_used_vsx */ 229 addi r4,r4,THREAD 230 ld r5,PT_REGS(r4) 231 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 232 lis r6,MSR_VSX@h 233 andc r6,r4,r6 234 std r6,_MSR-STACK_FRAME_OVERHEAD(r5) 2351: 236#endif /* CONFIG_SMP */ 237 ld r4,PACACURRENT(r13) 238 addi r4,r4,THREAD /* Get THREAD */ 239 li r6,1 240 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 241 /* enable use of VSX after return */ 242 oris r12,r12,MSR_VSX@h 243 std r12,_MSR(r1) 244#ifndef CONFIG_SMP 245 /* Update last_task_used_vsx to 'current' */ 246 ld r4,PACACURRENT(r13) 247 std r4,0(r3) 248#endif /* CONFIG_SMP */ 249 b fast_exception_return 250 251/* 252 * __giveup_vsx(tsk) 253 * Disable VSX for the task given as the argument. 254 * Does NOT save vsx registers. 255 * Enables the VSX for use in the kernel on return. 256 */ 257_GLOBAL(__giveup_vsx) 258 mfmsr r5 259 oris r5,r5,MSR_VSX@h 260 mtmsrd r5 /* enable use of VSX now */ 261 isync 262 263 cmpdi 0,r3,0 264 beqlr- /* if no previous owner, done */ 265 addi r3,r3,THREAD /* want THREAD of task */ 266 ld r5,PT_REGS(r3) 267 cmpdi 0,r5,0 268 beq 1f 269 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 270 lis r3,MSR_VSX@h 271 andc r4,r4,r3 /* disable VSX for previous task */ 272 std r4,_MSR-STACK_FRAME_OVERHEAD(r5) 2731: 274#ifndef CONFIG_SMP 275 li r5,0 276 ld r4,last_task_used_vsx@got(r2) 277 std r5,0(r4) 278#endif /* CONFIG_SMP */ 279 blr 280 281#endif /* CONFIG_VSX */ 282 283 284/* 285 * The routines below are in assembler so we can closely control the 286 * usage of floating-point registers. These routines must be called 287 * with preempt disabled. 288 */ 289#ifdef CONFIG_PPC32 290 .data 291fpzero: 292 .long 0 293fpone: 294 .long 0x3f800000 /* 1.0 in single-precision FP */ 295fphalf: 296 .long 0x3f000000 /* 0.5 in single-precision FP */ 297 298#define LDCONST(fr, name) \ 299 lis r11,name@ha; \ 300 lfs fr,name@l(r11) 301#else 302 303 .section ".toc","aw" 304fpzero: 305 .tc FD_0_0[TC],0 306fpone: 307 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 308fphalf: 309 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 310 311#define LDCONST(fr, name) \ 312 lfd fr,name@toc(r2) 313#endif 314 315 .text 316/* 317 * Internal routine to enable floating point and set FPSCR to 0. 318 * Don't call it from C; it doesn't use the normal calling convention. 319 */ 320fpenable: 321#ifdef CONFIG_PPC32 322 stwu r1,-64(r1) 323#else 324 stdu r1,-64(r1) 325#endif 326 mfmsr r10 327 ori r11,r10,MSR_FP 328 mtmsr r11 329 isync 330 stfd fr0,24(r1) 331 stfd fr1,16(r1) 332 stfd fr31,8(r1) 333 LDCONST(fr1, fpzero) 334 mffs fr31 335 MTFSF_L(fr1) 336 blr 337 338fpdisable: 339 mtlr r12 340 MTFSF_L(fr31) 341 lfd fr31,8(r1) 342 lfd fr1,16(r1) 343 lfd fr0,24(r1) 344 mtmsr r10 345 isync 346 addi r1,r1,64 347 blr 348 349/* 350 * Vector add, floating point. 351 */ 352_GLOBAL(vaddfp) 353 mflr r12 354 bl fpenable 355 li r0,4 356 mtctr r0 357 li r6,0 3581: lfsx fr0,r4,r6 359 lfsx fr1,r5,r6 360 fadds fr0,fr0,fr1 361 stfsx fr0,r3,r6 362 addi r6,r6,4 363 bdnz 1b 364 b fpdisable 365 366/* 367 * Vector subtract, floating point. 368 */ 369_GLOBAL(vsubfp) 370 mflr r12 371 bl fpenable 372 li r0,4 373 mtctr r0 374 li r6,0 3751: lfsx fr0,r4,r6 376 lfsx fr1,r5,r6 377 fsubs fr0,fr0,fr1 378 stfsx fr0,r3,r6 379 addi r6,r6,4 380 bdnz 1b 381 b fpdisable 382 383/* 384 * Vector multiply and add, floating point. 385 */ 386_GLOBAL(vmaddfp) 387 mflr r12 388 bl fpenable 389 stfd fr2,32(r1) 390 li r0,4 391 mtctr r0 392 li r7,0 3931: lfsx fr0,r4,r7 394 lfsx fr1,r5,r7 395 lfsx fr2,r6,r7 396 fmadds fr0,fr0,fr2,fr1 397 stfsx fr0,r3,r7 398 addi r7,r7,4 399 bdnz 1b 400 lfd fr2,32(r1) 401 b fpdisable 402 403/* 404 * Vector negative multiply and subtract, floating point. 405 */ 406_GLOBAL(vnmsubfp) 407 mflr r12 408 bl fpenable 409 stfd fr2,32(r1) 410 li r0,4 411 mtctr r0 412 li r7,0 4131: lfsx fr0,r4,r7 414 lfsx fr1,r5,r7 415 lfsx fr2,r6,r7 416 fnmsubs fr0,fr0,fr2,fr1 417 stfsx fr0,r3,r7 418 addi r7,r7,4 419 bdnz 1b 420 lfd fr2,32(r1) 421 b fpdisable 422 423/* 424 * Vector reciprocal estimate. We just compute 1.0/x. 425 * r3 -> destination, r4 -> source. 426 */ 427_GLOBAL(vrefp) 428 mflr r12 429 bl fpenable 430 li r0,4 431 LDCONST(fr1, fpone) 432 mtctr r0 433 li r6,0 4341: lfsx fr0,r4,r6 435 fdivs fr0,fr1,fr0 436 stfsx fr0,r3,r6 437 addi r6,r6,4 438 bdnz 1b 439 b fpdisable 440 441/* 442 * Vector reciprocal square-root estimate, floating point. 443 * We use the frsqrte instruction for the initial estimate followed 444 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 445 * r3 -> destination, r4 -> source. 446 */ 447_GLOBAL(vrsqrtefp) 448 mflr r12 449 bl fpenable 450 stfd fr2,32(r1) 451 stfd fr3,40(r1) 452 stfd fr4,48(r1) 453 stfd fr5,56(r1) 454 li r0,4 455 LDCONST(fr4, fpone) 456 LDCONST(fr5, fphalf) 457 mtctr r0 458 li r6,0 4591: lfsx fr0,r4,r6 460 frsqrte fr1,fr0 /* r = frsqrte(s) */ 461 fmuls fr3,fr1,fr0 /* r * s */ 462 fmuls fr2,fr1,fr5 /* r * 0.5 */ 463 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 464 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 465 fmuls fr3,fr1,fr0 /* r * s */ 466 fmuls fr2,fr1,fr5 /* r * 0.5 */ 467 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 468 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 469 stfsx fr1,r3,r6 470 addi r6,r6,4 471 bdnz 1b 472 lfd fr5,56(r1) 473 lfd fr4,48(r1) 474 lfd fr3,40(r1) 475 lfd fr2,32(r1) 476 b fpdisable 477