1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8#include <asm/ptrace.h> 9 10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 11/* 12 * Wrapper to call load_up_altivec from C. 13 * void do_load_up_altivec(struct pt_regs *regs); 14 */ 15_GLOBAL(do_load_up_altivec) 16 mflr r0 17 std r0, 16(r1) 18 stdu r1, -112(r1) 19 20 subi r6, r3, STACK_FRAME_OVERHEAD 21 /* load_up_altivec expects r12=MSR, r13=PACA, and returns 22 * with r12 = new MSR. 23 */ 24 ld r12,_MSR(r6) 25 GET_PACA(r13) 26 bl load_up_altivec 27 std r12,_MSR(r6) 28 29 ld r0, 112+16(r1) 30 addi r1, r1, 112 31 mtlr r0 32 blr 33 34/* void do_load_up_transact_altivec(struct thread_struct *thread) 35 * 36 * This is similar to load_up_altivec but for the transactional version of the 37 * vector regs. It doesn't mess with the task MSR or valid flags. 38 * Furthermore, VEC laziness is not supported with TM currently. 39 */ 40_GLOBAL(do_load_up_transact_altivec) 41 mfmsr r6 42 oris r5,r6,MSR_VEC@h 43 MTMSRD(r5) 44 isync 45 46 li r4,1 47 stw r4,THREAD_USED_VR(r3) 48 49 li r10,THREAD_TRANSACT_VSCR 50 lvx vr0,r10,r3 51 mtvscr vr0 52 REST_32VRS_TRANSACT(0,r4,r3) 53 54 /* Disable VEC again. */ 55 MTMSRD(r6) 56 isync 57 58 blr 59#endif 60 61/* 62 * load_up_altivec(unused, unused, tsk) 63 * Disable VMX for the task which had it previously, 64 * and save its vector registers in its thread_struct. 65 * Enables the VMX for use in the kernel on return. 66 * On SMP we know the VMX is free, since we give it up every 67 * switch (ie, no lazy save of the vector registers). 68 */ 69_GLOBAL(load_up_altivec) 70 mfmsr r5 /* grab the current MSR */ 71 oris r5,r5,MSR_VEC@h 72 MTMSRD(r5) /* enable use of AltiVec now */ 73 isync 74 75/* 76 * For SMP, we don't do lazy VMX switching because it just gets too 77 * horrendously complex, especially when a task switches from one CPU 78 * to another. Instead we call giveup_altvec in switch_to. 79 * VRSAVE isn't dealt with here, that is done in the normal context 80 * switch code. Note that we could rely on vrsave value to eventually 81 * avoid saving all of the VREGs here... 82 */ 83#ifndef CONFIG_SMP 84 LOAD_REG_ADDRBASE(r3, last_task_used_altivec) 85 toreal(r3) 86 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) 87 PPC_LCMPI 0,r4,0 88 beq 1f 89 90 /* Save VMX state to last_task_used_altivec's THREAD struct */ 91 toreal(r4) 92 addi r4,r4,THREAD 93 SAVE_32VRS(0,r5,r4) 94 mfvscr vr0 95 li r10,THREAD_VSCR 96 stvx vr0,r10,r4 97 /* Disable VMX for last_task_used_altivec */ 98 PPC_LL r5,PT_REGS(r4) 99 toreal(r5) 100 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 101 lis r10,MSR_VEC@h 102 andc r4,r4,r10 103 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1041: 105#endif /* CONFIG_SMP */ 106 107 /* Hack: if we get an altivec unavailable trap with VRSAVE 108 * set to all zeros, we assume this is a broken application 109 * that fails to set it properly, and thus we switch it to 110 * all 1's 111 */ 112 mfspr r4,SPRN_VRSAVE 113 cmpwi 0,r4,0 114 bne+ 1f 115 li r4,-1 116 mtspr SPRN_VRSAVE,r4 1171: 118 /* enable use of VMX after return */ 119#ifdef CONFIG_PPC32 120 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 121 oris r9,r9,MSR_VEC@h 122#else 123 ld r4,PACACURRENT(r13) 124 addi r5,r4,THREAD /* Get THREAD */ 125 oris r12,r12,MSR_VEC@h 126 std r12,_MSR(r1) 127#endif 128 li r4,1 129 li r10,THREAD_VSCR 130 stw r4,THREAD_USED_VR(r5) 131 lvx vr0,r10,r5 132 mtvscr vr0 133 REST_32VRS(0,r4,r5) 134#ifndef CONFIG_SMP 135 /* Update last_task_used_altivec to 'current' */ 136 subi r4,r5,THREAD /* Back to 'current' */ 137 fromreal(r4) 138 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) 139#endif /* CONFIG_SMP */ 140 /* restore registers and return */ 141 blr 142 143_GLOBAL(giveup_altivec_notask) 144 mfmsr r3 145 andis. r4,r3,MSR_VEC@h 146 bnelr /* Already enabled? */ 147 oris r3,r3,MSR_VEC@h 148 SYNC 149 MTMSRD(r3) /* enable use of VMX now */ 150 isync 151 blr 152 153/* 154 * giveup_altivec(tsk) 155 * Disable VMX for the task given as the argument, 156 * and save the vector registers in its thread_struct. 157 * Enables the VMX for use in the kernel on return. 158 */ 159_GLOBAL(giveup_altivec) 160 mfmsr r5 161 oris r5,r5,MSR_VEC@h 162 SYNC 163 MTMSRD(r5) /* enable use of VMX now */ 164 isync 165 PPC_LCMPI 0,r3,0 166 beqlr /* if no previous owner, done */ 167 addi r3,r3,THREAD /* want THREAD of task */ 168 PPC_LL r5,PT_REGS(r3) 169 PPC_LCMPI 0,r5,0 170 SAVE_32VRS(0,r4,r3) 171 mfvscr vr0 172 li r4,THREAD_VSCR 173 stvx vr0,r4,r3 174 beq 1f 175 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 176#ifdef CONFIG_VSX 177BEGIN_FTR_SECTION 178 lis r3,(MSR_VEC|MSR_VSX)@h 179FTR_SECTION_ELSE 180 lis r3,MSR_VEC@h 181ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) 182#else 183 lis r3,MSR_VEC@h 184#endif 185 andc r4,r4,r3 /* disable FP for previous task */ 186 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1871: 188#ifndef CONFIG_SMP 189 li r5,0 190 LOAD_REG_ADDRBASE(r4,last_task_used_altivec) 191 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) 192#endif /* CONFIG_SMP */ 193 blr 194 195#ifdef CONFIG_VSX 196 197#ifdef CONFIG_PPC32 198#error This asm code isn't ready for 32-bit kernels 199#endif 200 201/* 202 * load_up_vsx(unused, unused, tsk) 203 * Disable VSX for the task which had it previously, 204 * and save its vector registers in its thread_struct. 205 * Reuse the fp and vsx saves, but first check to see if they have 206 * been saved already. 207 */ 208_GLOBAL(load_up_vsx) 209/* Load FP and VSX registers if they haven't been done yet */ 210 andi. r5,r12,MSR_FP 211 beql+ load_up_fpu /* skip if already loaded */ 212 andis. r5,r12,MSR_VEC@h 213 beql+ load_up_altivec /* skip if already loaded */ 214 215#ifndef CONFIG_SMP 216 ld r3,last_task_used_vsx@got(r2) 217 ld r4,0(r3) 218 cmpdi 0,r4,0 219 beq 1f 220 /* Disable VSX for last_task_used_vsx */ 221 addi r4,r4,THREAD 222 ld r5,PT_REGS(r4) 223 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 224 lis r6,MSR_VSX@h 225 andc r6,r4,r6 226 std r6,_MSR-STACK_FRAME_OVERHEAD(r5) 2271: 228#endif /* CONFIG_SMP */ 229 ld r4,PACACURRENT(r13) 230 addi r4,r4,THREAD /* Get THREAD */ 231 li r6,1 232 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 233 /* enable use of VSX after return */ 234 oris r12,r12,MSR_VSX@h 235 std r12,_MSR(r1) 236#ifndef CONFIG_SMP 237 /* Update last_task_used_vsx to 'current' */ 238 ld r4,PACACURRENT(r13) 239 std r4,0(r3) 240#endif /* CONFIG_SMP */ 241 b fast_exception_return 242 243/* 244 * __giveup_vsx(tsk) 245 * Disable VSX for the task given as the argument. 246 * Does NOT save vsx registers. 247 * Enables the VSX for use in the kernel on return. 248 */ 249_GLOBAL(__giveup_vsx) 250 mfmsr r5 251 oris r5,r5,MSR_VSX@h 252 mtmsrd r5 /* enable use of VSX now */ 253 isync 254 255 cmpdi 0,r3,0 256 beqlr- /* if no previous owner, done */ 257 addi r3,r3,THREAD /* want THREAD of task */ 258 ld r5,PT_REGS(r3) 259 cmpdi 0,r5,0 260 beq 1f 261 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 262 lis r3,MSR_VSX@h 263 andc r4,r4,r3 /* disable VSX for previous task */ 264 std r4,_MSR-STACK_FRAME_OVERHEAD(r5) 2651: 266#ifndef CONFIG_SMP 267 li r5,0 268 ld r4,last_task_used_vsx@got(r2) 269 std r5,0(r4) 270#endif /* CONFIG_SMP */ 271 blr 272 273#endif /* CONFIG_VSX */ 274 275 276/* 277 * The routines below are in assembler so we can closely control the 278 * usage of floating-point registers. These routines must be called 279 * with preempt disabled. 280 */ 281#ifdef CONFIG_PPC32 282 .data 283fpzero: 284 .long 0 285fpone: 286 .long 0x3f800000 /* 1.0 in single-precision FP */ 287fphalf: 288 .long 0x3f000000 /* 0.5 in single-precision FP */ 289 290#define LDCONST(fr, name) \ 291 lis r11,name@ha; \ 292 lfs fr,name@l(r11) 293#else 294 295 .section ".toc","aw" 296fpzero: 297 .tc FD_0_0[TC],0 298fpone: 299 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 300fphalf: 301 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 302 303#define LDCONST(fr, name) \ 304 lfd fr,name@toc(r2) 305#endif 306 307 .text 308/* 309 * Internal routine to enable floating point and set FPSCR to 0. 310 * Don't call it from C; it doesn't use the normal calling convention. 311 */ 312fpenable: 313#ifdef CONFIG_PPC32 314 stwu r1,-64(r1) 315#else 316 stdu r1,-64(r1) 317#endif 318 mfmsr r10 319 ori r11,r10,MSR_FP 320 mtmsr r11 321 isync 322 stfd fr0,24(r1) 323 stfd fr1,16(r1) 324 stfd fr31,8(r1) 325 LDCONST(fr1, fpzero) 326 mffs fr31 327 MTFSF_L(fr1) 328 blr 329 330fpdisable: 331 mtlr r12 332 MTFSF_L(fr31) 333 lfd fr31,8(r1) 334 lfd fr1,16(r1) 335 lfd fr0,24(r1) 336 mtmsr r10 337 isync 338 addi r1,r1,64 339 blr 340 341/* 342 * Vector add, floating point. 343 */ 344_GLOBAL(vaddfp) 345 mflr r12 346 bl fpenable 347 li r0,4 348 mtctr r0 349 li r6,0 3501: lfsx fr0,r4,r6 351 lfsx fr1,r5,r6 352 fadds fr0,fr0,fr1 353 stfsx fr0,r3,r6 354 addi r6,r6,4 355 bdnz 1b 356 b fpdisable 357 358/* 359 * Vector subtract, floating point. 360 */ 361_GLOBAL(vsubfp) 362 mflr r12 363 bl fpenable 364 li r0,4 365 mtctr r0 366 li r6,0 3671: lfsx fr0,r4,r6 368 lfsx fr1,r5,r6 369 fsubs fr0,fr0,fr1 370 stfsx fr0,r3,r6 371 addi r6,r6,4 372 bdnz 1b 373 b fpdisable 374 375/* 376 * Vector multiply and add, floating point. 377 */ 378_GLOBAL(vmaddfp) 379 mflr r12 380 bl fpenable 381 stfd fr2,32(r1) 382 li r0,4 383 mtctr r0 384 li r7,0 3851: lfsx fr0,r4,r7 386 lfsx fr1,r5,r7 387 lfsx fr2,r6,r7 388 fmadds fr0,fr0,fr2,fr1 389 stfsx fr0,r3,r7 390 addi r7,r7,4 391 bdnz 1b 392 lfd fr2,32(r1) 393 b fpdisable 394 395/* 396 * Vector negative multiply and subtract, floating point. 397 */ 398_GLOBAL(vnmsubfp) 399 mflr r12 400 bl fpenable 401 stfd fr2,32(r1) 402 li r0,4 403 mtctr r0 404 li r7,0 4051: lfsx fr0,r4,r7 406 lfsx fr1,r5,r7 407 lfsx fr2,r6,r7 408 fnmsubs fr0,fr0,fr2,fr1 409 stfsx fr0,r3,r7 410 addi r7,r7,4 411 bdnz 1b 412 lfd fr2,32(r1) 413 b fpdisable 414 415/* 416 * Vector reciprocal estimate. We just compute 1.0/x. 417 * r3 -> destination, r4 -> source. 418 */ 419_GLOBAL(vrefp) 420 mflr r12 421 bl fpenable 422 li r0,4 423 LDCONST(fr1, fpone) 424 mtctr r0 425 li r6,0 4261: lfsx fr0,r4,r6 427 fdivs fr0,fr1,fr0 428 stfsx fr0,r3,r6 429 addi r6,r6,4 430 bdnz 1b 431 b fpdisable 432 433/* 434 * Vector reciprocal square-root estimate, floating point. 435 * We use the frsqrte instruction for the initial estimate followed 436 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 437 * r3 -> destination, r4 -> source. 438 */ 439_GLOBAL(vrsqrtefp) 440 mflr r12 441 bl fpenable 442 stfd fr2,32(r1) 443 stfd fr3,40(r1) 444 stfd fr4,48(r1) 445 stfd fr5,56(r1) 446 li r0,4 447 LDCONST(fr4, fpone) 448 LDCONST(fr5, fphalf) 449 mtctr r0 450 li r6,0 4511: lfsx fr0,r4,r6 452 frsqrte fr1,fr0 /* r = frsqrte(s) */ 453 fmuls fr3,fr1,fr0 /* r * s */ 454 fmuls fr2,fr1,fr5 /* r * 0.5 */ 455 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 456 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 457 fmuls fr3,fr1,fr0 /* r * s */ 458 fmuls fr2,fr1,fr5 /* r * 0.5 */ 459 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 460 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 461 stfsx fr1,r3,r6 462 addi r6,r6,4 463 bdnz 1b 464 lfd fr5,56(r1) 465 lfd fr4,48(r1) 466 lfd fr3,40(r1) 467 lfd fr2,32(r1) 468 b fpdisable 469