1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8 9/* 10 * load_up_altivec(unused, unused, tsk) 11 * Disable VMX for the task which had it previously, 12 * and save its vector registers in its thread_struct. 13 * Enables the VMX for use in the kernel on return. 14 * On SMP we know the VMX is free, since we give it up every 15 * switch (ie, no lazy save of the vector registers). 16 */ 17_GLOBAL(load_up_altivec) 18 mfmsr r5 /* grab the current MSR */ 19 oris r5,r5,MSR_VEC@h 20 MTMSRD(r5) /* enable use of AltiVec now */ 21 isync 22 23/* 24 * For SMP, we don't do lazy VMX switching because it just gets too 25 * horrendously complex, especially when a task switches from one CPU 26 * to another. Instead we call giveup_altvec in switch_to. 27 * VRSAVE isn't dealt with here, that is done in the normal context 28 * switch code. Note that we could rely on vrsave value to eventually 29 * avoid saving all of the VREGs here... 30 */ 31#ifndef CONFIG_SMP 32 LOAD_REG_ADDRBASE(r3, last_task_used_altivec) 33 toreal(r3) 34 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) 35 PPC_LCMPI 0,r4,0 36 beq 1f 37 38 /* Save VMX state to last_task_used_altivec's THREAD struct */ 39 toreal(r4) 40 addi r4,r4,THREAD 41 SAVE_32VRS(0,r5,r4) 42 mfvscr vr0 43 li r10,THREAD_VSCR 44 stvx vr0,r10,r4 45 /* Disable VMX for last_task_used_altivec */ 46 PPC_LL r5,PT_REGS(r4) 47 toreal(r5) 48 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 49 lis r10,MSR_VEC@h 50 andc r4,r4,r10 51 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 521: 53#endif /* CONFIG_SMP */ 54 55 /* Hack: if we get an altivec unavailable trap with VRSAVE 56 * set to all zeros, we assume this is a broken application 57 * that fails to set it properly, and thus we switch it to 58 * all 1's 59 */ 60 mfspr r4,SPRN_VRSAVE 61 cmpwi 0,r4,0 62 bne+ 1f 63 li r4,-1 64 mtspr SPRN_VRSAVE,r4 651: 66 /* enable use of VMX after return */ 67#ifdef CONFIG_PPC32 68 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 69 oris r9,r9,MSR_VEC@h 70#else 71 ld r4,PACACURRENT(r13) 72 addi r5,r4,THREAD /* Get THREAD */ 73 oris r12,r12,MSR_VEC@h 74 std r12,_MSR(r1) 75#endif 76 li r4,1 77 li r10,THREAD_VSCR 78 stw r4,THREAD_USED_VR(r5) 79 lvx vr0,r10,r5 80 mtvscr vr0 81 REST_32VRS(0,r4,r5) 82#ifndef CONFIG_SMP 83 /* Update last_task_used_altivec to 'current' */ 84 subi r4,r5,THREAD /* Back to 'current' */ 85 fromreal(r4) 86 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) 87#endif /* CONFIG_SMP */ 88 /* restore registers and return */ 89 blr 90 91/* 92 * giveup_altivec(tsk) 93 * Disable VMX for the task given as the argument, 94 * and save the vector registers in its thread_struct. 95 * Enables the VMX for use in the kernel on return. 96 */ 97_GLOBAL(giveup_altivec) 98 mfmsr r5 99 oris r5,r5,MSR_VEC@h 100 SYNC 101 MTMSRD(r5) /* enable use of VMX now */ 102 isync 103 PPC_LCMPI 0,r3,0 104 beqlr- /* if no previous owner, done */ 105 addi r3,r3,THREAD /* want THREAD of task */ 106 PPC_LL r5,PT_REGS(r3) 107 PPC_LCMPI 0,r5,0 108 SAVE_32VRS(0,r4,r3) 109 mfvscr vr0 110 li r4,THREAD_VSCR 111 stvx vr0,r4,r3 112 beq 1f 113 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 114#ifdef CONFIG_VSX 115BEGIN_FTR_SECTION 116 lis r3,(MSR_VEC|MSR_VSX)@h 117FTR_SECTION_ELSE 118 lis r3,MSR_VEC@h 119ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) 120#else 121 lis r3,MSR_VEC@h 122#endif 123 andc r4,r4,r3 /* disable FP for previous task */ 124 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1251: 126#ifndef CONFIG_SMP 127 li r5,0 128 LOAD_REG_ADDRBASE(r4,last_task_used_altivec) 129 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) 130#endif /* CONFIG_SMP */ 131 blr 132 133#ifdef CONFIG_VSX 134 135#ifdef CONFIG_PPC32 136#error This asm code isn't ready for 32-bit kernels 137#endif 138 139/* 140 * load_up_vsx(unused, unused, tsk) 141 * Disable VSX for the task which had it previously, 142 * and save its vector registers in its thread_struct. 143 * Reuse the fp and vsx saves, but first check to see if they have 144 * been saved already. 145 */ 146_GLOBAL(load_up_vsx) 147/* Load FP and VSX registers if they haven't been done yet */ 148 andi. r5,r12,MSR_FP 149 beql+ load_up_fpu /* skip if already loaded */ 150 andis. r5,r12,MSR_VEC@h 151 beql+ load_up_altivec /* skip if already loaded */ 152 153#ifndef CONFIG_SMP 154 ld r3,last_task_used_vsx@got(r2) 155 ld r4,0(r3) 156 cmpdi 0,r4,0 157 beq 1f 158 /* Disable VSX for last_task_used_vsx */ 159 addi r4,r4,THREAD 160 ld r5,PT_REGS(r4) 161 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 162 lis r6,MSR_VSX@h 163 andc r6,r4,r6 164 std r6,_MSR-STACK_FRAME_OVERHEAD(r5) 1651: 166#endif /* CONFIG_SMP */ 167 ld r4,PACACURRENT(r13) 168 addi r4,r4,THREAD /* Get THREAD */ 169 li r6,1 170 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 171 /* enable use of VSX after return */ 172 oris r12,r12,MSR_VSX@h 173 std r12,_MSR(r1) 174#ifndef CONFIG_SMP 175 /* Update last_task_used_vsx to 'current' */ 176 ld r4,PACACURRENT(r13) 177 std r4,0(r3) 178#endif /* CONFIG_SMP */ 179 b fast_exception_return 180 181/* 182 * __giveup_vsx(tsk) 183 * Disable VSX for the task given as the argument. 184 * Does NOT save vsx registers. 185 * Enables the VSX for use in the kernel on return. 186 */ 187_GLOBAL(__giveup_vsx) 188 mfmsr r5 189 oris r5,r5,MSR_VSX@h 190 mtmsrd r5 /* enable use of VSX now */ 191 isync 192 193 cmpdi 0,r3,0 194 beqlr- /* if no previous owner, done */ 195 addi r3,r3,THREAD /* want THREAD of task */ 196 ld r5,PT_REGS(r3) 197 cmpdi 0,r5,0 198 beq 1f 199 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) 200 lis r3,MSR_VSX@h 201 andc r4,r4,r3 /* disable VSX for previous task */ 202 std r4,_MSR-STACK_FRAME_OVERHEAD(r5) 2031: 204#ifndef CONFIG_SMP 205 li r5,0 206 ld r4,last_task_used_vsx@got(r2) 207 std r5,0(r4) 208#endif /* CONFIG_SMP */ 209 blr 210 211#endif /* CONFIG_VSX */ 212 213 214/* 215 * The routines below are in assembler so we can closely control the 216 * usage of floating-point registers. These routines must be called 217 * with preempt disabled. 218 */ 219#ifdef CONFIG_PPC32 220 .data 221fpzero: 222 .long 0 223fpone: 224 .long 0x3f800000 /* 1.0 in single-precision FP */ 225fphalf: 226 .long 0x3f000000 /* 0.5 in single-precision FP */ 227 228#define LDCONST(fr, name) \ 229 lis r11,name@ha; \ 230 lfs fr,name@l(r11) 231#else 232 233 .section ".toc","aw" 234fpzero: 235 .tc FD_0_0[TC],0 236fpone: 237 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 238fphalf: 239 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 240 241#define LDCONST(fr, name) \ 242 lfd fr,name@toc(r2) 243#endif 244 245 .text 246/* 247 * Internal routine to enable floating point and set FPSCR to 0. 248 * Don't call it from C; it doesn't use the normal calling convention. 249 */ 250fpenable: 251#ifdef CONFIG_PPC32 252 stwu r1,-64(r1) 253#else 254 stdu r1,-64(r1) 255#endif 256 mfmsr r10 257 ori r11,r10,MSR_FP 258 mtmsr r11 259 isync 260 stfd fr0,24(r1) 261 stfd fr1,16(r1) 262 stfd fr31,8(r1) 263 LDCONST(fr1, fpzero) 264 mffs fr31 265 MTFSF_L(fr1) 266 blr 267 268fpdisable: 269 mtlr r12 270 MTFSF_L(fr31) 271 lfd fr31,8(r1) 272 lfd fr1,16(r1) 273 lfd fr0,24(r1) 274 mtmsr r10 275 isync 276 addi r1,r1,64 277 blr 278 279/* 280 * Vector add, floating point. 281 */ 282_GLOBAL(vaddfp) 283 mflr r12 284 bl fpenable 285 li r0,4 286 mtctr r0 287 li r6,0 2881: lfsx fr0,r4,r6 289 lfsx fr1,r5,r6 290 fadds fr0,fr0,fr1 291 stfsx fr0,r3,r6 292 addi r6,r6,4 293 bdnz 1b 294 b fpdisable 295 296/* 297 * Vector subtract, floating point. 298 */ 299_GLOBAL(vsubfp) 300 mflr r12 301 bl fpenable 302 li r0,4 303 mtctr r0 304 li r6,0 3051: lfsx fr0,r4,r6 306 lfsx fr1,r5,r6 307 fsubs fr0,fr0,fr1 308 stfsx fr0,r3,r6 309 addi r6,r6,4 310 bdnz 1b 311 b fpdisable 312 313/* 314 * Vector multiply and add, floating point. 315 */ 316_GLOBAL(vmaddfp) 317 mflr r12 318 bl fpenable 319 stfd fr2,32(r1) 320 li r0,4 321 mtctr r0 322 li r7,0 3231: lfsx fr0,r4,r7 324 lfsx fr1,r5,r7 325 lfsx fr2,r6,r7 326 fmadds fr0,fr0,fr2,fr1 327 stfsx fr0,r3,r7 328 addi r7,r7,4 329 bdnz 1b 330 lfd fr2,32(r1) 331 b fpdisable 332 333/* 334 * Vector negative multiply and subtract, floating point. 335 */ 336_GLOBAL(vnmsubfp) 337 mflr r12 338 bl fpenable 339 stfd fr2,32(r1) 340 li r0,4 341 mtctr r0 342 li r7,0 3431: lfsx fr0,r4,r7 344 lfsx fr1,r5,r7 345 lfsx fr2,r6,r7 346 fnmsubs fr0,fr0,fr2,fr1 347 stfsx fr0,r3,r7 348 addi r7,r7,4 349 bdnz 1b 350 lfd fr2,32(r1) 351 b fpdisable 352 353/* 354 * Vector reciprocal estimate. We just compute 1.0/x. 355 * r3 -> destination, r4 -> source. 356 */ 357_GLOBAL(vrefp) 358 mflr r12 359 bl fpenable 360 li r0,4 361 LDCONST(fr1, fpone) 362 mtctr r0 363 li r6,0 3641: lfsx fr0,r4,r6 365 fdivs fr0,fr1,fr0 366 stfsx fr0,r3,r6 367 addi r6,r6,4 368 bdnz 1b 369 b fpdisable 370 371/* 372 * Vector reciprocal square-root estimate, floating point. 373 * We use the frsqrte instruction for the initial estimate followed 374 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 375 * r3 -> destination, r4 -> source. 376 */ 377_GLOBAL(vrsqrtefp) 378 mflr r12 379 bl fpenable 380 stfd fr2,32(r1) 381 stfd fr3,40(r1) 382 stfd fr4,48(r1) 383 stfd fr5,56(r1) 384 li r0,4 385 LDCONST(fr4, fpone) 386 LDCONST(fr5, fphalf) 387 mtctr r0 388 li r6,0 3891: lfsx fr0,r4,r6 390 frsqrte fr1,fr0 /* r = frsqrte(s) */ 391 fmuls fr3,fr1,fr0 /* r * s */ 392 fmuls fr2,fr1,fr5 /* r * 0.5 */ 393 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 394 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 395 fmuls fr3,fr1,fr0 /* r * s */ 396 fmuls fr2,fr1,fr5 /* r * 0.5 */ 397 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 398 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 399 stfsx fr1,r3,r6 400 addi r6,r6,4 401 bdnz 1b 402 lfd fr5,56(r1) 403 lfd fr4,48(r1) 404 lfd fr3,40(r1) 405 lfd fr2,32(r1) 406 b fpdisable 407