1#include <linux/config.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4 5/* 6 * The routines below are in assembler so we can closely control the 7 * usage of floating-point registers. These routines must be called 8 * with preempt disabled. 9 */ 10#ifdef CONFIG_PPC32 11 .data 12fpzero: 13 .long 0 14fpone: 15 .long 0x3f800000 /* 1.0 in single-precision FP */ 16fphalf: 17 .long 0x3f000000 /* 0.5 in single-precision FP */ 18 19#define LDCONST(fr, name) \ 20 lis r11,name@ha; \ 21 lfs fr,name@l(r11) 22#else 23 24 .section ".toc","aw" 25fpzero: 26 .tc FD_0_0[TC],0 27fpone: 28 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 29fphalf: 30 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 31 32#define LDCONST(fr, name) \ 33 lfd fr,name@toc(r2) 34#endif 35 36 .text 37/* 38 * Internal routine to enable floating point and set FPSCR to 0. 39 * Don't call it from C; it doesn't use the normal calling convention. 40 */ 41fpenable: 42#ifdef CONFIG_PPC32 43 stwu r1,-64(r1) 44#else 45 stdu r1,-64(r1) 46#endif 47 mfmsr r10 48 ori r11,r10,MSR_FP 49 mtmsr r11 50 isync 51 stfd fr0,24(r1) 52 stfd fr1,16(r1) 53 stfd fr31,8(r1) 54 LDCONST(fr1, fpzero) 55 mffs fr31 56 mtfsf 0xff,fr1 57 blr 58 59fpdisable: 60 mtlr r12 61 mtfsf 0xff,fr31 62 lfd fr31,8(r1) 63 lfd fr1,16(r1) 64 lfd fr0,24(r1) 65 mtmsr r10 66 isync 67 addi r1,r1,64 68 blr 69 70/* 71 * Vector add, floating point. 72 */ 73_GLOBAL(vaddfp) 74 mflr r12 75 bl fpenable 76 li r0,4 77 mtctr r0 78 li r6,0 791: lfsx fr0,r4,r6 80 lfsx fr1,r5,r6 81 fadds fr0,fr0,fr1 82 stfsx fr0,r3,r6 83 addi r6,r6,4 84 bdnz 1b 85 b fpdisable 86 87/* 88 * Vector subtract, floating point. 89 */ 90_GLOBAL(vsubfp) 91 mflr r12 92 bl fpenable 93 li r0,4 94 mtctr r0 95 li r6,0 961: lfsx fr0,r4,r6 97 lfsx fr1,r5,r6 98 fsubs fr0,fr0,fr1 99 stfsx fr0,r3,r6 100 addi r6,r6,4 101 bdnz 1b 102 b fpdisable 103 104/* 105 * Vector multiply and add, floating point. 106 */ 107_GLOBAL(vmaddfp) 108 mflr r12 109 bl fpenable 110 stfd fr2,32(r1) 111 li r0,4 112 mtctr r0 113 li r7,0 1141: lfsx fr0,r4,r7 115 lfsx fr1,r5,r7 116 lfsx fr2,r6,r7 117 fmadds fr0,fr0,fr2,fr1 118 stfsx fr0,r3,r7 119 addi r7,r7,4 120 bdnz 1b 121 lfd fr2,32(r1) 122 b fpdisable 123 124/* 125 * Vector negative multiply and subtract, floating point. 126 */ 127_GLOBAL(vnmsubfp) 128 mflr r12 129 bl fpenable 130 stfd fr2,32(r1) 131 li r0,4 132 mtctr r0 133 li r7,0 1341: lfsx fr0,r4,r7 135 lfsx fr1,r5,r7 136 lfsx fr2,r6,r7 137 fnmsubs fr0,fr0,fr2,fr1 138 stfsx fr0,r3,r7 139 addi r7,r7,4 140 bdnz 1b 141 lfd fr2,32(r1) 142 b fpdisable 143 144/* 145 * Vector reciprocal estimate. We just compute 1.0/x. 146 * r3 -> destination, r4 -> source. 147 */ 148_GLOBAL(vrefp) 149 mflr r12 150 bl fpenable 151 li r0,4 152 LDCONST(fr1, fpone) 153 mtctr r0 154 li r6,0 1551: lfsx fr0,r4,r6 156 fdivs fr0,fr1,fr0 157 stfsx fr0,r3,r6 158 addi r6,r6,4 159 bdnz 1b 160 b fpdisable 161 162/* 163 * Vector reciprocal square-root estimate, floating point. 164 * We use the frsqrte instruction for the initial estimate followed 165 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 166 * r3 -> destination, r4 -> source. 167 */ 168_GLOBAL(vrsqrtefp) 169 mflr r12 170 bl fpenable 171 stfd fr2,32(r1) 172 stfd fr3,40(r1) 173 stfd fr4,48(r1) 174 stfd fr5,56(r1) 175 li r0,4 176 LDCONST(fr4, fpone) 177 LDCONST(fr5, fphalf) 178 mtctr r0 179 li r6,0 1801: lfsx fr0,r4,r6 181 frsqrte fr1,fr0 /* r = frsqrte(s) */ 182 fmuls fr3,fr1,fr0 /* r * s */ 183 fmuls fr2,fr1,fr5 /* r * 0.5 */ 184 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 185 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 186 fmuls fr3,fr1,fr0 /* r * s */ 187 fmuls fr2,fr1,fr5 /* r * 0.5 */ 188 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 189 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 190 stfsx fr1,r3,r6 191 addi r6,r6,4 192 bdnz 1b 193 lfd fr5,56(r1) 194 lfd fr4,48(r1) 195 lfd fr3,40(r1) 196 lfd fr2,32(r1) 197 b fpdisable 198