1*1da177e4SLinus Torvalds /* Software floating-point emulation. 2*1da177e4SLinus Torvalds Basic four-word fraction declaration and manipulation. 3*1da177e4SLinus Torvalds Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4*1da177e4SLinus Torvalds This file is part of the GNU C Library. 5*1da177e4SLinus Torvalds Contributed by Richard Henderson (rth@cygnus.com), 6*1da177e4SLinus Torvalds Jakub Jelinek (jj@ultra.linux.cz), 7*1da177e4SLinus Torvalds David S. Miller (davem@redhat.com) and 8*1da177e4SLinus Torvalds Peter Maydell (pmaydell@chiark.greenend.org.uk). 9*1da177e4SLinus Torvalds 10*1da177e4SLinus Torvalds The GNU C Library is free software; you can redistribute it and/or 11*1da177e4SLinus Torvalds modify it under the terms of the GNU Library General Public License as 12*1da177e4SLinus Torvalds published by the Free Software Foundation; either version 2 of the 13*1da177e4SLinus Torvalds License, or (at your option) any later version. 14*1da177e4SLinus Torvalds 15*1da177e4SLinus Torvalds The GNU C Library is distributed in the hope that it will be useful, 16*1da177e4SLinus Torvalds but WITHOUT ANY WARRANTY; without even the implied warranty of 17*1da177e4SLinus Torvalds MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18*1da177e4SLinus Torvalds Library General Public License for more details. 19*1da177e4SLinus Torvalds 20*1da177e4SLinus Torvalds You should have received a copy of the GNU Library General Public 21*1da177e4SLinus Torvalds License along with the GNU C Library; see the file COPYING.LIB. If 22*1da177e4SLinus Torvalds not, write to the Free Software Foundation, Inc., 23*1da177e4SLinus Torvalds 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24*1da177e4SLinus Torvalds 25*1da177e4SLinus Torvalds #ifndef __MATH_EMU_OP_4_H__ 26*1da177e4SLinus Torvalds #define __MATH_EMU_OP_4_H__ 27*1da177e4SLinus Torvalds 28*1da177e4SLinus Torvalds #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] 29*1da177e4SLinus Torvalds #define _FP_FRAC_COPY_4(D,S) \ 30*1da177e4SLinus Torvalds (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ 31*1da177e4SLinus Torvalds D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) 32*1da177e4SLinus Torvalds #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) 33*1da177e4SLinus Torvalds #define _FP_FRAC_HIGH_4(X) (X##_f[3]) 34*1da177e4SLinus Torvalds #define _FP_FRAC_LOW_4(X) (X##_f[0]) 35*1da177e4SLinus Torvalds #define _FP_FRAC_WORD_4(X,w) (X##_f[w]) 36*1da177e4SLinus Torvalds 37*1da177e4SLinus Torvalds #define _FP_FRAC_SLL_4(X,N) \ 38*1da177e4SLinus Torvalds do { \ 39*1da177e4SLinus Torvalds _FP_I_TYPE _up, _down, _skip, _i; \ 40*1da177e4SLinus Torvalds _skip = (N) / _FP_W_TYPE_SIZE; \ 41*1da177e4SLinus Torvalds _up = (N) % _FP_W_TYPE_SIZE; \ 42*1da177e4SLinus Torvalds _down = _FP_W_TYPE_SIZE - _up; \ 43*1da177e4SLinus Torvalds if (!_up) \ 44*1da177e4SLinus Torvalds for (_i = 3; _i >= _skip; --_i) \ 45*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i-_skip]; \ 46*1da177e4SLinus Torvalds else \ 47*1da177e4SLinus Torvalds { \ 48*1da177e4SLinus Torvalds for (_i = 3; _i > _skip; --_i) \ 49*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i-_skip] << _up \ 50*1da177e4SLinus Torvalds | X##_f[_i-_skip-1] >> _down; \ 51*1da177e4SLinus Torvalds X##_f[_i--] = X##_f[0] << _up; \ 52*1da177e4SLinus Torvalds } \ 53*1da177e4SLinus Torvalds for (; _i >= 0; --_i) \ 54*1da177e4SLinus Torvalds X##_f[_i] = 0; \ 55*1da177e4SLinus Torvalds } while (0) 56*1da177e4SLinus Torvalds 57*1da177e4SLinus Torvalds /* This one was broken too */ 58*1da177e4SLinus Torvalds #define _FP_FRAC_SRL_4(X,N) \ 59*1da177e4SLinus Torvalds do { \ 60*1da177e4SLinus Torvalds _FP_I_TYPE _up, _down, _skip, _i; \ 61*1da177e4SLinus Torvalds _skip = (N) / _FP_W_TYPE_SIZE; \ 62*1da177e4SLinus Torvalds _down = (N) % _FP_W_TYPE_SIZE; \ 63*1da177e4SLinus Torvalds _up = _FP_W_TYPE_SIZE - _down; \ 64*1da177e4SLinus Torvalds if (!_down) \ 65*1da177e4SLinus Torvalds for (_i = 0; _i <= 3-_skip; ++_i) \ 66*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i+_skip]; \ 67*1da177e4SLinus Torvalds else \ 68*1da177e4SLinus Torvalds { \ 69*1da177e4SLinus Torvalds for (_i = 0; _i < 3-_skip; ++_i) \ 70*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i+_skip] >> _down \ 71*1da177e4SLinus Torvalds | X##_f[_i+_skip+1] << _up; \ 72*1da177e4SLinus Torvalds X##_f[_i++] = X##_f[3] >> _down; \ 73*1da177e4SLinus Torvalds } \ 74*1da177e4SLinus Torvalds for (; _i < 4; ++_i) \ 75*1da177e4SLinus Torvalds X##_f[_i] = 0; \ 76*1da177e4SLinus Torvalds } while (0) 77*1da177e4SLinus Torvalds 78*1da177e4SLinus Torvalds 79*1da177e4SLinus Torvalds /* Right shift with sticky-lsb. 80*1da177e4SLinus Torvalds * What this actually means is that we do a standard right-shift, 81*1da177e4SLinus Torvalds * but that if any of the bits that fall off the right hand side 82*1da177e4SLinus Torvalds * were one then we always set the LSbit. 83*1da177e4SLinus Torvalds */ 84*1da177e4SLinus Torvalds #define _FP_FRAC_SRS_4(X,N,size) \ 85*1da177e4SLinus Torvalds do { \ 86*1da177e4SLinus Torvalds _FP_I_TYPE _up, _down, _skip, _i; \ 87*1da177e4SLinus Torvalds _FP_W_TYPE _s; \ 88*1da177e4SLinus Torvalds _skip = (N) / _FP_W_TYPE_SIZE; \ 89*1da177e4SLinus Torvalds _down = (N) % _FP_W_TYPE_SIZE; \ 90*1da177e4SLinus Torvalds _up = _FP_W_TYPE_SIZE - _down; \ 91*1da177e4SLinus Torvalds for (_s = _i = 0; _i < _skip; ++_i) \ 92*1da177e4SLinus Torvalds _s |= X##_f[_i]; \ 93*1da177e4SLinus Torvalds _s |= X##_f[_i] << _up; \ 94*1da177e4SLinus Torvalds /* s is now != 0 if we want to set the LSbit */ \ 95*1da177e4SLinus Torvalds if (!_down) \ 96*1da177e4SLinus Torvalds for (_i = 0; _i <= 3-_skip; ++_i) \ 97*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i+_skip]; \ 98*1da177e4SLinus Torvalds else \ 99*1da177e4SLinus Torvalds { \ 100*1da177e4SLinus Torvalds for (_i = 0; _i < 3-_skip; ++_i) \ 101*1da177e4SLinus Torvalds X##_f[_i] = X##_f[_i+_skip] >> _down \ 102*1da177e4SLinus Torvalds | X##_f[_i+_skip+1] << _up; \ 103*1da177e4SLinus Torvalds X##_f[_i++] = X##_f[3] >> _down; \ 104*1da177e4SLinus Torvalds } \ 105*1da177e4SLinus Torvalds for (; _i < 4; ++_i) \ 106*1da177e4SLinus Torvalds X##_f[_i] = 0; \ 107*1da177e4SLinus Torvalds /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ 108*1da177e4SLinus Torvalds X##_f[0] |= (_s != 0); \ 109*1da177e4SLinus Torvalds } while (0) 110*1da177e4SLinus Torvalds 111*1da177e4SLinus Torvalds #define _FP_FRAC_ADD_4(R,X,Y) \ 112*1da177e4SLinus Torvalds __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 113*1da177e4SLinus Torvalds X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 114*1da177e4SLinus Torvalds Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 115*1da177e4SLinus Torvalds 116*1da177e4SLinus Torvalds #define _FP_FRAC_SUB_4(R,X,Y) \ 117*1da177e4SLinus Torvalds __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 118*1da177e4SLinus Torvalds X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 119*1da177e4SLinus Torvalds Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 120*1da177e4SLinus Torvalds 121*1da177e4SLinus Torvalds #define _FP_FRAC_DEC_4(X,Y) \ 122*1da177e4SLinus Torvalds __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 123*1da177e4SLinus Torvalds Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 124*1da177e4SLinus Torvalds 125*1da177e4SLinus Torvalds #define _FP_FRAC_ADDI_4(X,I) \ 126*1da177e4SLinus Torvalds __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) 127*1da177e4SLinus Torvalds 128*1da177e4SLinus Torvalds #define _FP_ZEROFRAC_4 0,0,0,0 129*1da177e4SLinus Torvalds #define _FP_MINFRAC_4 0,0,0,1 130*1da177e4SLinus Torvalds #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 131*1da177e4SLinus Torvalds 132*1da177e4SLinus Torvalds #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) 133*1da177e4SLinus Torvalds #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) 134*1da177e4SLinus Torvalds #define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 135*1da177e4SLinus Torvalds #define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 136*1da177e4SLinus Torvalds 137*1da177e4SLinus Torvalds #define _FP_FRAC_EQ_4(X,Y) \ 138*1da177e4SLinus Torvalds (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ 139*1da177e4SLinus Torvalds && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) 140*1da177e4SLinus Torvalds 141*1da177e4SLinus Torvalds #define _FP_FRAC_GT_4(X,Y) \ 142*1da177e4SLinus Torvalds (X##_f[3] > Y##_f[3] || \ 143*1da177e4SLinus Torvalds (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 144*1da177e4SLinus Torvalds (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 145*1da177e4SLinus Torvalds (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ 146*1da177e4SLinus Torvalds )) \ 147*1da177e4SLinus Torvalds )) \ 148*1da177e4SLinus Torvalds ) 149*1da177e4SLinus Torvalds 150*1da177e4SLinus Torvalds #define _FP_FRAC_GE_4(X,Y) \ 151*1da177e4SLinus Torvalds (X##_f[3] > Y##_f[3] || \ 152*1da177e4SLinus Torvalds (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 153*1da177e4SLinus Torvalds (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 154*1da177e4SLinus Torvalds (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ 155*1da177e4SLinus Torvalds )) \ 156*1da177e4SLinus Torvalds )) \ 157*1da177e4SLinus Torvalds ) 158*1da177e4SLinus Torvalds 159*1da177e4SLinus Torvalds 160*1da177e4SLinus Torvalds #define _FP_FRAC_CLZ_4(R,X) \ 161*1da177e4SLinus Torvalds do { \ 162*1da177e4SLinus Torvalds if (X##_f[3]) \ 163*1da177e4SLinus Torvalds { \ 164*1da177e4SLinus Torvalds __FP_CLZ(R,X##_f[3]); \ 165*1da177e4SLinus Torvalds } \ 166*1da177e4SLinus Torvalds else if (X##_f[2]) \ 167*1da177e4SLinus Torvalds { \ 168*1da177e4SLinus Torvalds __FP_CLZ(R,X##_f[2]); \ 169*1da177e4SLinus Torvalds R += _FP_W_TYPE_SIZE; \ 170*1da177e4SLinus Torvalds } \ 171*1da177e4SLinus Torvalds else if (X##_f[1]) \ 172*1da177e4SLinus Torvalds { \ 173*1da177e4SLinus Torvalds __FP_CLZ(R,X##_f[2]); \ 174*1da177e4SLinus Torvalds R += _FP_W_TYPE_SIZE*2; \ 175*1da177e4SLinus Torvalds } \ 176*1da177e4SLinus Torvalds else \ 177*1da177e4SLinus Torvalds { \ 178*1da177e4SLinus Torvalds __FP_CLZ(R,X##_f[0]); \ 179*1da177e4SLinus Torvalds R += _FP_W_TYPE_SIZE*3; \ 180*1da177e4SLinus Torvalds } \ 181*1da177e4SLinus Torvalds } while(0) 182*1da177e4SLinus Torvalds 183*1da177e4SLinus Torvalds 184*1da177e4SLinus Torvalds #define _FP_UNPACK_RAW_4(fs, X, val) \ 185*1da177e4SLinus Torvalds do { \ 186*1da177e4SLinus Torvalds union _FP_UNION_##fs _flo; _flo.flt = (val); \ 187*1da177e4SLinus Torvalds X##_f[0] = _flo.bits.frac0; \ 188*1da177e4SLinus Torvalds X##_f[1] = _flo.bits.frac1; \ 189*1da177e4SLinus Torvalds X##_f[2] = _flo.bits.frac2; \ 190*1da177e4SLinus Torvalds X##_f[3] = _flo.bits.frac3; \ 191*1da177e4SLinus Torvalds X##_e = _flo.bits.exp; \ 192*1da177e4SLinus Torvalds X##_s = _flo.bits.sign; \ 193*1da177e4SLinus Torvalds } while (0) 194*1da177e4SLinus Torvalds 195*1da177e4SLinus Torvalds #define _FP_UNPACK_RAW_4_P(fs, X, val) \ 196*1da177e4SLinus Torvalds do { \ 197*1da177e4SLinus Torvalds union _FP_UNION_##fs *_flo = \ 198*1da177e4SLinus Torvalds (union _FP_UNION_##fs *)(val); \ 199*1da177e4SLinus Torvalds \ 200*1da177e4SLinus Torvalds X##_f[0] = _flo->bits.frac0; \ 201*1da177e4SLinus Torvalds X##_f[1] = _flo->bits.frac1; \ 202*1da177e4SLinus Torvalds X##_f[2] = _flo->bits.frac2; \ 203*1da177e4SLinus Torvalds X##_f[3] = _flo->bits.frac3; \ 204*1da177e4SLinus Torvalds X##_e = _flo->bits.exp; \ 205*1da177e4SLinus Torvalds X##_s = _flo->bits.sign; \ 206*1da177e4SLinus Torvalds } while (0) 207*1da177e4SLinus Torvalds 208*1da177e4SLinus Torvalds #define _FP_PACK_RAW_4(fs, val, X) \ 209*1da177e4SLinus Torvalds do { \ 210*1da177e4SLinus Torvalds union _FP_UNION_##fs _flo; \ 211*1da177e4SLinus Torvalds _flo.bits.frac0 = X##_f[0]; \ 212*1da177e4SLinus Torvalds _flo.bits.frac1 = X##_f[1]; \ 213*1da177e4SLinus Torvalds _flo.bits.frac2 = X##_f[2]; \ 214*1da177e4SLinus Torvalds _flo.bits.frac3 = X##_f[3]; \ 215*1da177e4SLinus Torvalds _flo.bits.exp = X##_e; \ 216*1da177e4SLinus Torvalds _flo.bits.sign = X##_s; \ 217*1da177e4SLinus Torvalds (val) = _flo.flt; \ 218*1da177e4SLinus Torvalds } while (0) 219*1da177e4SLinus Torvalds 220*1da177e4SLinus Torvalds #define _FP_PACK_RAW_4_P(fs, val, X) \ 221*1da177e4SLinus Torvalds do { \ 222*1da177e4SLinus Torvalds union _FP_UNION_##fs *_flo = \ 223*1da177e4SLinus Torvalds (union _FP_UNION_##fs *)(val); \ 224*1da177e4SLinus Torvalds \ 225*1da177e4SLinus Torvalds _flo->bits.frac0 = X##_f[0]; \ 226*1da177e4SLinus Torvalds _flo->bits.frac1 = X##_f[1]; \ 227*1da177e4SLinus Torvalds _flo->bits.frac2 = X##_f[2]; \ 228*1da177e4SLinus Torvalds _flo->bits.frac3 = X##_f[3]; \ 229*1da177e4SLinus Torvalds _flo->bits.exp = X##_e; \ 230*1da177e4SLinus Torvalds _flo->bits.sign = X##_s; \ 231*1da177e4SLinus Torvalds } while (0) 232*1da177e4SLinus Torvalds 233*1da177e4SLinus Torvalds /* 234*1da177e4SLinus Torvalds * Multiplication algorithms: 235*1da177e4SLinus Torvalds */ 236*1da177e4SLinus Torvalds 237*1da177e4SLinus Torvalds /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 238*1da177e4SLinus Torvalds 239*1da177e4SLinus Torvalds #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ 240*1da177e4SLinus Torvalds do { \ 241*1da177e4SLinus Torvalds _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 242*1da177e4SLinus Torvalds _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \ 243*1da177e4SLinus Torvalds \ 244*1da177e4SLinus Torvalds doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \ 245*1da177e4SLinus Torvalds doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ 246*1da177e4SLinus Torvalds doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ 247*1da177e4SLinus Torvalds doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ 248*1da177e4SLinus Torvalds doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ 249*1da177e4SLinus Torvalds doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ 250*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 251*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \ 252*1da177e4SLinus Torvalds 0,0,_FP_FRAC_WORD_8(_z,1)); \ 253*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 254*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \ 255*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 256*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,1)); \ 257*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 258*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \ 259*1da177e4SLinus Torvalds 0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \ 260*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 261*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \ 262*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 263*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,2)); \ 264*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 265*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \ 266*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 267*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,2)); \ 268*1da177e4SLinus Torvalds doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ 269*1da177e4SLinus Torvalds doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ 270*1da177e4SLinus Torvalds doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ 271*1da177e4SLinus Torvalds doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ 272*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 273*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \ 274*1da177e4SLinus Torvalds 0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \ 275*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 276*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \ 277*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 278*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3)); \ 279*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 280*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \ 281*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 282*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3)); \ 283*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 284*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \ 285*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 286*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,3)); \ 287*1da177e4SLinus Torvalds doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ 288*1da177e4SLinus Torvalds doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ 289*1da177e4SLinus Torvalds doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ 290*1da177e4SLinus Torvalds doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ 291*1da177e4SLinus Torvalds doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ 292*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 293*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \ 294*1da177e4SLinus Torvalds 0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \ 295*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 296*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \ 297*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 298*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4)); \ 299*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 300*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \ 301*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 302*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,4)); \ 303*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 304*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \ 305*1da177e4SLinus Torvalds 0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \ 306*1da177e4SLinus Torvalds __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 307*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \ 308*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 309*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,5)); \ 310*1da177e4SLinus Torvalds doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ 311*1da177e4SLinus Torvalds __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 312*1da177e4SLinus Torvalds _b_f1,_b_f0, \ 313*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \ 314*1da177e4SLinus Torvalds \ 315*1da177e4SLinus Torvalds /* Normalize since we know where the msb of the multiplicands \ 316*1da177e4SLinus Torvalds were (bit B), we know that the msb of the of the product is \ 317*1da177e4SLinus Torvalds at either 2B or 2B-1. */ \ 318*1da177e4SLinus Torvalds _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 319*1da177e4SLinus Torvalds __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 320*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 321*1da177e4SLinus Torvalds } while (0) 322*1da177e4SLinus Torvalds 323*1da177e4SLinus Torvalds #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ 324*1da177e4SLinus Torvalds do { \ 325*1da177e4SLinus Torvalds _FP_FRAC_DECL_8(_z); \ 326*1da177e4SLinus Torvalds \ 327*1da177e4SLinus Torvalds mpn_mul_n(_z_f, _x_f, _y_f, 4); \ 328*1da177e4SLinus Torvalds \ 329*1da177e4SLinus Torvalds /* Normalize since we know where the msb of the multiplicands \ 330*1da177e4SLinus Torvalds were (bit B), we know that the msb of the of the product is \ 331*1da177e4SLinus Torvalds at either 2B or 2B-1. */ \ 332*1da177e4SLinus Torvalds _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 333*1da177e4SLinus Torvalds __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 334*1da177e4SLinus Torvalds _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 335*1da177e4SLinus Torvalds } while (0) 336*1da177e4SLinus Torvalds 337*1da177e4SLinus Torvalds /* 338*1da177e4SLinus Torvalds * Helper utility for _FP_DIV_MEAT_4_udiv: 339*1da177e4SLinus Torvalds * pppp = m * nnn 340*1da177e4SLinus Torvalds */ 341*1da177e4SLinus Torvalds #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \ 342*1da177e4SLinus Torvalds do { \ 343*1da177e4SLinus Torvalds UWtype _t; \ 344*1da177e4SLinus Torvalds umul_ppmm(p1,p0,m,n0); \ 345*1da177e4SLinus Torvalds umul_ppmm(p2,_t,m,n1); \ 346*1da177e4SLinus Torvalds __FP_FRAC_ADDI_2(p2,p1,_t); \ 347*1da177e4SLinus Torvalds umul_ppmm(p3,_t,m,n2); \ 348*1da177e4SLinus Torvalds __FP_FRAC_ADDI_2(p3,p2,_t); \ 349*1da177e4SLinus Torvalds } while (0) 350*1da177e4SLinus Torvalds 351*1da177e4SLinus Torvalds /* 352*1da177e4SLinus Torvalds * Division algorithms: 353*1da177e4SLinus Torvalds */ 354*1da177e4SLinus Torvalds 355*1da177e4SLinus Torvalds #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ 356*1da177e4SLinus Torvalds do { \ 357*1da177e4SLinus Torvalds int _i; \ 358*1da177e4SLinus Torvalds _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \ 359*1da177e4SLinus Torvalds _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \ 360*1da177e4SLinus Torvalds if (_FP_FRAC_GT_4(X, Y)) \ 361*1da177e4SLinus Torvalds { \ 362*1da177e4SLinus Torvalds _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ 363*1da177e4SLinus Torvalds _FP_FRAC_SRL_4(X, 1); \ 364*1da177e4SLinus Torvalds } \ 365*1da177e4SLinus Torvalds else \ 366*1da177e4SLinus Torvalds R##_e--; \ 367*1da177e4SLinus Torvalds \ 368*1da177e4SLinus Torvalds /* Normalize, i.e. make the most significant bit of the \ 369*1da177e4SLinus Torvalds denominator set. */ \ 370*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \ 371*1da177e4SLinus Torvalds \ 372*1da177e4SLinus Torvalds for (_i = 3; ; _i--) \ 373*1da177e4SLinus Torvalds { \ 374*1da177e4SLinus Torvalds if (X##_f[3] == Y##_f[3]) \ 375*1da177e4SLinus Torvalds { \ 376*1da177e4SLinus Torvalds /* This is a special case, not an optimization \ 377*1da177e4SLinus Torvalds (X##_f[3]/Y##_f[3] would not fit into UWtype). \ 378*1da177e4SLinus Torvalds As X## is guaranteed to be < Y, R##_f[_i] can be either \ 379*1da177e4SLinus Torvalds (UWtype)-1 or (UWtype)-2. */ \ 380*1da177e4SLinus Torvalds R##_f[_i] = -1; \ 381*1da177e4SLinus Torvalds if (!_i) \ 382*1da177e4SLinus Torvalds break; \ 383*1da177e4SLinus Torvalds __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 384*1da177e4SLinus Torvalds Y##_f[2], Y##_f[1], Y##_f[0], 0, \ 385*1da177e4SLinus Torvalds X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ 386*1da177e4SLinus Torvalds _FP_FRAC_SUB_4(X, Y, X); \ 387*1da177e4SLinus Torvalds if (X##_f[3] > Y##_f[3]) \ 388*1da177e4SLinus Torvalds { \ 389*1da177e4SLinus Torvalds R##_f[_i] = -2; \ 390*1da177e4SLinus Torvalds _FP_FRAC_ADD_4(X, Y, X); \ 391*1da177e4SLinus Torvalds } \ 392*1da177e4SLinus Torvalds } \ 393*1da177e4SLinus Torvalds else \ 394*1da177e4SLinus Torvalds { \ 395*1da177e4SLinus Torvalds udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ 396*1da177e4SLinus Torvalds umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ 397*1da177e4SLinus Torvalds R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ 398*1da177e4SLinus Torvalds X##_f[2] = X##_f[1]; \ 399*1da177e4SLinus Torvalds X##_f[1] = X##_f[0]; \ 400*1da177e4SLinus Torvalds X##_f[0] = _n_f[_i]; \ 401*1da177e4SLinus Torvalds if (_FP_FRAC_GT_4(_m, X)) \ 402*1da177e4SLinus Torvalds { \ 403*1da177e4SLinus Torvalds R##_f[_i]--; \ 404*1da177e4SLinus Torvalds _FP_FRAC_ADD_4(X, Y, X); \ 405*1da177e4SLinus Torvalds if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \ 406*1da177e4SLinus Torvalds { \ 407*1da177e4SLinus Torvalds R##_f[_i]--; \ 408*1da177e4SLinus Torvalds _FP_FRAC_ADD_4(X, Y, X); \ 409*1da177e4SLinus Torvalds } \ 410*1da177e4SLinus Torvalds } \ 411*1da177e4SLinus Torvalds _FP_FRAC_DEC_4(X, _m); \ 412*1da177e4SLinus Torvalds if (!_i) \ 413*1da177e4SLinus Torvalds { \ 414*1da177e4SLinus Torvalds if (!_FP_FRAC_EQ_4(X, _m)) \ 415*1da177e4SLinus Torvalds R##_f[0] |= _FP_WORK_STICKY; \ 416*1da177e4SLinus Torvalds break; \ 417*1da177e4SLinus Torvalds } \ 418*1da177e4SLinus Torvalds } \ 419*1da177e4SLinus Torvalds } \ 420*1da177e4SLinus Torvalds } while (0) 421*1da177e4SLinus Torvalds 422*1da177e4SLinus Torvalds 423*1da177e4SLinus Torvalds /* 424*1da177e4SLinus Torvalds * Square root algorithms: 425*1da177e4SLinus Torvalds * We have just one right now, maybe Newton approximation 426*1da177e4SLinus Torvalds * should be added for those machines where division is fast. 427*1da177e4SLinus Torvalds */ 428*1da177e4SLinus Torvalds 429*1da177e4SLinus Torvalds #define _FP_SQRT_MEAT_4(R, S, T, X, q) \ 430*1da177e4SLinus Torvalds do { \ 431*1da177e4SLinus Torvalds while (q) \ 432*1da177e4SLinus Torvalds { \ 433*1da177e4SLinus Torvalds T##_f[3] = S##_f[3] + q; \ 434*1da177e4SLinus Torvalds if (T##_f[3] <= X##_f[3]) \ 435*1da177e4SLinus Torvalds { \ 436*1da177e4SLinus Torvalds S##_f[3] = T##_f[3] + q; \ 437*1da177e4SLinus Torvalds X##_f[3] -= T##_f[3]; \ 438*1da177e4SLinus Torvalds R##_f[3] += q; \ 439*1da177e4SLinus Torvalds } \ 440*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(X, 1); \ 441*1da177e4SLinus Torvalds q >>= 1; \ 442*1da177e4SLinus Torvalds } \ 443*1da177e4SLinus Torvalds q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 444*1da177e4SLinus Torvalds while (q) \ 445*1da177e4SLinus Torvalds { \ 446*1da177e4SLinus Torvalds T##_f[2] = S##_f[2] + q; \ 447*1da177e4SLinus Torvalds T##_f[3] = S##_f[3]; \ 448*1da177e4SLinus Torvalds if (T##_f[3] < X##_f[3] || \ 449*1da177e4SLinus Torvalds (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ 450*1da177e4SLinus Torvalds { \ 451*1da177e4SLinus Torvalds S##_f[2] = T##_f[2] + q; \ 452*1da177e4SLinus Torvalds S##_f[3] += (T##_f[2] > S##_f[2]); \ 453*1da177e4SLinus Torvalds __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \ 454*1da177e4SLinus Torvalds T##_f[3], T##_f[2]); \ 455*1da177e4SLinus Torvalds R##_f[2] += q; \ 456*1da177e4SLinus Torvalds } \ 457*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(X, 1); \ 458*1da177e4SLinus Torvalds q >>= 1; \ 459*1da177e4SLinus Torvalds } \ 460*1da177e4SLinus Torvalds q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 461*1da177e4SLinus Torvalds while (q) \ 462*1da177e4SLinus Torvalds { \ 463*1da177e4SLinus Torvalds T##_f[1] = S##_f[1] + q; \ 464*1da177e4SLinus Torvalds T##_f[2] = S##_f[2]; \ 465*1da177e4SLinus Torvalds T##_f[3] = S##_f[3]; \ 466*1da177e4SLinus Torvalds if (T##_f[3] < X##_f[3] || \ 467*1da177e4SLinus Torvalds (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \ 468*1da177e4SLinus Torvalds (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \ 469*1da177e4SLinus Torvalds { \ 470*1da177e4SLinus Torvalds S##_f[1] = T##_f[1] + q; \ 471*1da177e4SLinus Torvalds S##_f[2] += (T##_f[1] > S##_f[1]); \ 472*1da177e4SLinus Torvalds S##_f[3] += (T##_f[2] > S##_f[2]); \ 473*1da177e4SLinus Torvalds __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \ 474*1da177e4SLinus Torvalds T##_f[3], T##_f[2], T##_f[1]); \ 475*1da177e4SLinus Torvalds R##_f[1] += q; \ 476*1da177e4SLinus Torvalds } \ 477*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(X, 1); \ 478*1da177e4SLinus Torvalds q >>= 1; \ 479*1da177e4SLinus Torvalds } \ 480*1da177e4SLinus Torvalds q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 481*1da177e4SLinus Torvalds while (q != _FP_WORK_ROUND) \ 482*1da177e4SLinus Torvalds { \ 483*1da177e4SLinus Torvalds T##_f[0] = S##_f[0] + q; \ 484*1da177e4SLinus Torvalds T##_f[1] = S##_f[1]; \ 485*1da177e4SLinus Torvalds T##_f[2] = S##_f[2]; \ 486*1da177e4SLinus Torvalds T##_f[3] = S##_f[3]; \ 487*1da177e4SLinus Torvalds if (_FP_FRAC_GE_4(X,T)) \ 488*1da177e4SLinus Torvalds { \ 489*1da177e4SLinus Torvalds S##_f[0] = T##_f[0] + q; \ 490*1da177e4SLinus Torvalds S##_f[1] += (T##_f[0] > S##_f[0]); \ 491*1da177e4SLinus Torvalds S##_f[2] += (T##_f[1] > S##_f[1]); \ 492*1da177e4SLinus Torvalds S##_f[3] += (T##_f[2] > S##_f[2]); \ 493*1da177e4SLinus Torvalds _FP_FRAC_DEC_4(X, T); \ 494*1da177e4SLinus Torvalds R##_f[0] += q; \ 495*1da177e4SLinus Torvalds } \ 496*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(X, 1); \ 497*1da177e4SLinus Torvalds q >>= 1; \ 498*1da177e4SLinus Torvalds } \ 499*1da177e4SLinus Torvalds if (!_FP_FRAC_ZEROP_4(X)) \ 500*1da177e4SLinus Torvalds { \ 501*1da177e4SLinus Torvalds if (_FP_FRAC_GT_4(X,S)) \ 502*1da177e4SLinus Torvalds R##_f[0] |= _FP_WORK_ROUND; \ 503*1da177e4SLinus Torvalds R##_f[0] |= _FP_WORK_STICKY; \ 504*1da177e4SLinus Torvalds } \ 505*1da177e4SLinus Torvalds } while (0) 506*1da177e4SLinus Torvalds 507*1da177e4SLinus Torvalds 508*1da177e4SLinus Torvalds /* 509*1da177e4SLinus Torvalds * Internals 510*1da177e4SLinus Torvalds */ 511*1da177e4SLinus Torvalds 512*1da177e4SLinus Torvalds #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ 513*1da177e4SLinus Torvalds (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) 514*1da177e4SLinus Torvalds 515*1da177e4SLinus Torvalds #ifndef __FP_FRAC_ADD_3 516*1da177e4SLinus Torvalds #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 517*1da177e4SLinus Torvalds do { \ 518*1da177e4SLinus Torvalds int _c1, _c2; \ 519*1da177e4SLinus Torvalds r0 = x0 + y0; \ 520*1da177e4SLinus Torvalds _c1 = r0 < x0; \ 521*1da177e4SLinus Torvalds r1 = x1 + y1; \ 522*1da177e4SLinus Torvalds _c2 = r1 < x1; \ 523*1da177e4SLinus Torvalds r1 += _c1; \ 524*1da177e4SLinus Torvalds _c2 |= r1 < _c1; \ 525*1da177e4SLinus Torvalds r2 = x2 + y2 + _c2; \ 526*1da177e4SLinus Torvalds } while (0) 527*1da177e4SLinus Torvalds #endif 528*1da177e4SLinus Torvalds 529*1da177e4SLinus Torvalds #ifndef __FP_FRAC_ADD_4 530*1da177e4SLinus Torvalds #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 531*1da177e4SLinus Torvalds do { \ 532*1da177e4SLinus Torvalds int _c1, _c2, _c3; \ 533*1da177e4SLinus Torvalds r0 = x0 + y0; \ 534*1da177e4SLinus Torvalds _c1 = r0 < x0; \ 535*1da177e4SLinus Torvalds r1 = x1 + y1; \ 536*1da177e4SLinus Torvalds _c2 = r1 < x1; \ 537*1da177e4SLinus Torvalds r1 += _c1; \ 538*1da177e4SLinus Torvalds _c2 |= r1 < _c1; \ 539*1da177e4SLinus Torvalds r2 = x2 + y2; \ 540*1da177e4SLinus Torvalds _c3 = r2 < x2; \ 541*1da177e4SLinus Torvalds r2 += _c2; \ 542*1da177e4SLinus Torvalds _c3 |= r2 < _c2; \ 543*1da177e4SLinus Torvalds r3 = x3 + y3 + _c3; \ 544*1da177e4SLinus Torvalds } while (0) 545*1da177e4SLinus Torvalds #endif 546*1da177e4SLinus Torvalds 547*1da177e4SLinus Torvalds #ifndef __FP_FRAC_SUB_3 548*1da177e4SLinus Torvalds #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 549*1da177e4SLinus Torvalds do { \ 550*1da177e4SLinus Torvalds int _c1, _c2; \ 551*1da177e4SLinus Torvalds r0 = x0 - y0; \ 552*1da177e4SLinus Torvalds _c1 = r0 > x0; \ 553*1da177e4SLinus Torvalds r1 = x1 - y1; \ 554*1da177e4SLinus Torvalds _c2 = r1 > x1; \ 555*1da177e4SLinus Torvalds r1 -= _c1; \ 556*1da177e4SLinus Torvalds _c2 |= r1 > _c1; \ 557*1da177e4SLinus Torvalds r2 = x2 - y2 - _c2; \ 558*1da177e4SLinus Torvalds } while (0) 559*1da177e4SLinus Torvalds #endif 560*1da177e4SLinus Torvalds 561*1da177e4SLinus Torvalds #ifndef __FP_FRAC_SUB_4 562*1da177e4SLinus Torvalds #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 563*1da177e4SLinus Torvalds do { \ 564*1da177e4SLinus Torvalds int _c1, _c2, _c3; \ 565*1da177e4SLinus Torvalds r0 = x0 - y0; \ 566*1da177e4SLinus Torvalds _c1 = r0 > x0; \ 567*1da177e4SLinus Torvalds r1 = x1 - y1; \ 568*1da177e4SLinus Torvalds _c2 = r1 > x1; \ 569*1da177e4SLinus Torvalds r1 -= _c1; \ 570*1da177e4SLinus Torvalds _c2 |= r1 > _c1; \ 571*1da177e4SLinus Torvalds r2 = x2 - y2; \ 572*1da177e4SLinus Torvalds _c3 = r2 > x2; \ 573*1da177e4SLinus Torvalds r2 -= _c2; \ 574*1da177e4SLinus Torvalds _c3 |= r2 > _c2; \ 575*1da177e4SLinus Torvalds r3 = x3 - y3 - _c3; \ 576*1da177e4SLinus Torvalds } while (0) 577*1da177e4SLinus Torvalds #endif 578*1da177e4SLinus Torvalds 579*1da177e4SLinus Torvalds #ifndef __FP_FRAC_DEC_3 580*1da177e4SLinus Torvalds #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \ 581*1da177e4SLinus Torvalds do { \ 582*1da177e4SLinus Torvalds UWtype _t0, _t1, _t2; \ 583*1da177e4SLinus Torvalds _t0 = x0, _t1 = x1, _t2 = x2; \ 584*1da177e4SLinus Torvalds __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ 585*1da177e4SLinus Torvalds } while (0) 586*1da177e4SLinus Torvalds #endif 587*1da177e4SLinus Torvalds 588*1da177e4SLinus Torvalds #ifndef __FP_FRAC_DEC_4 589*1da177e4SLinus Torvalds #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \ 590*1da177e4SLinus Torvalds do { \ 591*1da177e4SLinus Torvalds UWtype _t0, _t1, _t2, _t3; \ 592*1da177e4SLinus Torvalds _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ 593*1da177e4SLinus Torvalds __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \ 594*1da177e4SLinus Torvalds } while (0) 595*1da177e4SLinus Torvalds #endif 596*1da177e4SLinus Torvalds 597*1da177e4SLinus Torvalds #ifndef __FP_FRAC_ADDI_4 598*1da177e4SLinus Torvalds #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ 599*1da177e4SLinus Torvalds do { \ 600*1da177e4SLinus Torvalds UWtype _t; \ 601*1da177e4SLinus Torvalds _t = ((x0 += i) < i); \ 602*1da177e4SLinus Torvalds x1 += _t; _t = (x1 < _t); \ 603*1da177e4SLinus Torvalds x2 += _t; _t = (x2 < _t); \ 604*1da177e4SLinus Torvalds x3 += _t; \ 605*1da177e4SLinus Torvalds } while (0) 606*1da177e4SLinus Torvalds #endif 607*1da177e4SLinus Torvalds 608*1da177e4SLinus Torvalds /* Convert FP values between word sizes. This appears to be more 609*1da177e4SLinus Torvalds * complicated than I'd have expected it to be, so these might be 610*1da177e4SLinus Torvalds * wrong... These macros are in any case somewhat bogus because they 611*1da177e4SLinus Torvalds * use information about what various FRAC_n variables look like 612*1da177e4SLinus Torvalds * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do 613*1da177e4SLinus Torvalds * the ones in op-2.h and op-1.h. 614*1da177e4SLinus Torvalds */ 615*1da177e4SLinus Torvalds #define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \ 616*1da177e4SLinus Torvalds do { \ 617*1da177e4SLinus Torvalds if (S##_c != FP_CLS_NAN) \ 618*1da177e4SLinus Torvalds _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 619*1da177e4SLinus Torvalds _FP_WFRACBITS_##sfs); \ 620*1da177e4SLinus Torvalds else \ 621*1da177e4SLinus Torvalds _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 622*1da177e4SLinus Torvalds D##_f = S##_f[0]; \ 623*1da177e4SLinus Torvalds } while (0) 624*1da177e4SLinus Torvalds 625*1da177e4SLinus Torvalds #define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \ 626*1da177e4SLinus Torvalds do { \ 627*1da177e4SLinus Torvalds if (S##_c != FP_CLS_NAN) \ 628*1da177e4SLinus Torvalds _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 629*1da177e4SLinus Torvalds _FP_WFRACBITS_##sfs); \ 630*1da177e4SLinus Torvalds else \ 631*1da177e4SLinus Torvalds _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 632*1da177e4SLinus Torvalds D##_f0 = S##_f[0]; \ 633*1da177e4SLinus Torvalds D##_f1 = S##_f[1]; \ 634*1da177e4SLinus Torvalds } while (0) 635*1da177e4SLinus Torvalds 636*1da177e4SLinus Torvalds /* Assembly/disassembly for converting to/from integral types. 637*1da177e4SLinus Torvalds * No shifting or overflow handled here. 638*1da177e4SLinus Torvalds */ 639*1da177e4SLinus Torvalds /* Put the FP value X into r, which is an integer of size rsize. */ 640*1da177e4SLinus Torvalds #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ 641*1da177e4SLinus Torvalds do { \ 642*1da177e4SLinus Torvalds if (rsize <= _FP_W_TYPE_SIZE) \ 643*1da177e4SLinus Torvalds r = X##_f[0]; \ 644*1da177e4SLinus Torvalds else if (rsize <= 2*_FP_W_TYPE_SIZE) \ 645*1da177e4SLinus Torvalds { \ 646*1da177e4SLinus Torvalds r = X##_f[1]; \ 647*1da177e4SLinus Torvalds r <<= _FP_W_TYPE_SIZE; \ 648*1da177e4SLinus Torvalds r += X##_f[0]; \ 649*1da177e4SLinus Torvalds } \ 650*1da177e4SLinus Torvalds else \ 651*1da177e4SLinus Torvalds { \ 652*1da177e4SLinus Torvalds /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ 653*1da177e4SLinus Torvalds /* and int == 4words as a single case. */ \ 654*1da177e4SLinus Torvalds r = X##_f[3]; \ 655*1da177e4SLinus Torvalds r <<= _FP_W_TYPE_SIZE; \ 656*1da177e4SLinus Torvalds r += X##_f[2]; \ 657*1da177e4SLinus Torvalds r <<= _FP_W_TYPE_SIZE; \ 658*1da177e4SLinus Torvalds r += X##_f[1]; \ 659*1da177e4SLinus Torvalds r <<= _FP_W_TYPE_SIZE; \ 660*1da177e4SLinus Torvalds r += X##_f[0]; \ 661*1da177e4SLinus Torvalds } \ 662*1da177e4SLinus Torvalds } while (0) 663*1da177e4SLinus Torvalds 664*1da177e4SLinus Torvalds /* "No disassemble Number Five!" */ 665*1da177e4SLinus Torvalds /* move an integer of size rsize into X's fractional part. We rely on 666*1da177e4SLinus Torvalds * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid 667*1da177e4SLinus Torvalds * having to mask the values we store into it. 668*1da177e4SLinus Torvalds */ 669*1da177e4SLinus Torvalds #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ 670*1da177e4SLinus Torvalds do { \ 671*1da177e4SLinus Torvalds X##_f[0] = r; \ 672*1da177e4SLinus Torvalds X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 673*1da177e4SLinus Torvalds X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ 674*1da177e4SLinus Torvalds X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ 675*1da177e4SLinus Torvalds } while (0) 676*1da177e4SLinus Torvalds 677*1da177e4SLinus Torvalds #define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \ 678*1da177e4SLinus Torvalds do { \ 679*1da177e4SLinus Torvalds D##_f[0] = S##_f; \ 680*1da177e4SLinus Torvalds D##_f[1] = D##_f[2] = D##_f[3] = 0; \ 681*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 682*1da177e4SLinus Torvalds } while (0) 683*1da177e4SLinus Torvalds 684*1da177e4SLinus Torvalds #define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \ 685*1da177e4SLinus Torvalds do { \ 686*1da177e4SLinus Torvalds D##_f[0] = S##_f0; \ 687*1da177e4SLinus Torvalds D##_f[1] = S##_f1; \ 688*1da177e4SLinus Torvalds D##_f[2] = D##_f[3] = 0; \ 689*1da177e4SLinus Torvalds _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 690*1da177e4SLinus Torvalds } while (0) 691*1da177e4SLinus Torvalds 692*1da177e4SLinus Torvalds #endif 693