1 /* Software floating-point emulation. 2 Basic four-word fraction declaration and manipulation. 3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 Contributed by Richard Henderson (rth@cygnus.com), 6 Jakub Jelinek (jj@ultra.linux.cz), 7 David S. Miller (davem@redhat.com) and 8 Peter Maydell (pmaydell@chiark.greenend.org.uk). 9 10 The GNU C Library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Library General Public License as 12 published by the Free Software Foundation; either version 2 of the 13 License, or (at your option) any later version. 14 15 The GNU C Library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Library General Public License for more details. 19 20 You should have received a copy of the GNU Library General Public 21 License along with the GNU C Library; see the file COPYING.LIB. If 22 not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25 #ifndef __MATH_EMU_OP_4_H__ 26 #define __MATH_EMU_OP_4_H__ 27 28 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] 29 #define _FP_FRAC_COPY_4(D,S) \ 30 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ 31 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) 32 #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) 33 #define _FP_FRAC_HIGH_4(X) (X##_f[3]) 34 #define _FP_FRAC_LOW_4(X) (X##_f[0]) 35 #define _FP_FRAC_WORD_4(X,w) (X##_f[w]) 36 37 #define _FP_FRAC_SLL_4(X,N) \ 38 do { \ 39 _FP_I_TYPE _up, _down, _skip, _i; \ 40 _skip = (N) / _FP_W_TYPE_SIZE; \ 41 _up = (N) % _FP_W_TYPE_SIZE; \ 42 _down = _FP_W_TYPE_SIZE - _up; \ 43 if (!_up) \ 44 for (_i = 3; _i >= _skip; --_i) \ 45 X##_f[_i] = X##_f[_i-_skip]; \ 46 else \ 47 { \ 48 for (_i = 3; _i > _skip; --_i) \ 49 X##_f[_i] = X##_f[_i-_skip] << _up \ 50 | X##_f[_i-_skip-1] >> _down; \ 51 X##_f[_i--] = X##_f[0] << _up; \ 52 } \ 53 for (; _i >= 0; --_i) \ 54 X##_f[_i] = 0; \ 55 } while (0) 56 57 /* This one was broken too */ 58 #define _FP_FRAC_SRL_4(X,N) \ 59 do { \ 60 _FP_I_TYPE _up, _down, _skip, _i; \ 61 _skip = (N) / _FP_W_TYPE_SIZE; \ 62 _down = (N) % _FP_W_TYPE_SIZE; \ 63 _up = _FP_W_TYPE_SIZE - _down; \ 64 if (!_down) \ 65 for (_i = 0; _i <= 3-_skip; ++_i) \ 66 X##_f[_i] = X##_f[_i+_skip]; \ 67 else \ 68 { \ 69 for (_i = 0; _i < 3-_skip; ++_i) \ 70 X##_f[_i] = X##_f[_i+_skip] >> _down \ 71 | X##_f[_i+_skip+1] << _up; \ 72 X##_f[_i++] = X##_f[3] >> _down; \ 73 } \ 74 for (; _i < 4; ++_i) \ 75 X##_f[_i] = 0; \ 76 } while (0) 77 78 79 /* Right shift with sticky-lsb. 80 * What this actually means is that we do a standard right-shift, 81 * but that if any of the bits that fall off the right hand side 82 * were one then we always set the LSbit. 83 */ 84 #define _FP_FRAC_SRS_4(X,N,size) \ 85 do { \ 86 _FP_I_TYPE _up, _down, _skip, _i; \ 87 _FP_W_TYPE _s; \ 88 _skip = (N) / _FP_W_TYPE_SIZE; \ 89 _down = (N) % _FP_W_TYPE_SIZE; \ 90 _up = _FP_W_TYPE_SIZE - _down; \ 91 for (_s = _i = 0; _i < _skip; ++_i) \ 92 _s |= X##_f[_i]; \ 93 _s |= X##_f[_i] << _up; \ 94 /* s is now != 0 if we want to set the LSbit */ \ 95 if (!_down) \ 96 for (_i = 0; _i <= 3-_skip; ++_i) \ 97 X##_f[_i] = X##_f[_i+_skip]; \ 98 else \ 99 { \ 100 for (_i = 0; _i < 3-_skip; ++_i) \ 101 X##_f[_i] = X##_f[_i+_skip] >> _down \ 102 | X##_f[_i+_skip+1] << _up; \ 103 X##_f[_i++] = X##_f[3] >> _down; \ 104 } \ 105 for (; _i < 4; ++_i) \ 106 X##_f[_i] = 0; \ 107 /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ 108 X##_f[0] |= (_s != 0); \ 109 } while (0) 110 111 #define _FP_FRAC_ADD_4(R,X,Y) \ 112 __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 113 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 114 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 115 116 #define _FP_FRAC_SUB_4(R,X,Y) \ 117 __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ 118 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 119 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 120 121 #define _FP_FRAC_DEC_4(X,Y) \ 122 __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 123 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) 124 125 #define _FP_FRAC_ADDI_4(X,I) \ 126 __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) 127 128 #define _FP_ZEROFRAC_4 0,0,0,0 129 #define _FP_MINFRAC_4 0,0,0,1 130 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 131 132 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) 133 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) 134 #define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 135 #define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 136 137 #define _FP_FRAC_EQ_4(X,Y) \ 138 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ 139 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) 140 141 #define _FP_FRAC_GT_4(X,Y) \ 142 (X##_f[3] > Y##_f[3] || \ 143 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 144 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 145 (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ 146 )) \ 147 )) \ 148 ) 149 150 #define _FP_FRAC_GE_4(X,Y) \ 151 (X##_f[3] > Y##_f[3] || \ 152 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ 153 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ 154 (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ 155 )) \ 156 )) \ 157 ) 158 159 160 #define _FP_FRAC_CLZ_4(R,X) \ 161 do { \ 162 if (X##_f[3]) \ 163 { \ 164 __FP_CLZ(R,X##_f[3]); \ 165 } \ 166 else if (X##_f[2]) \ 167 { \ 168 __FP_CLZ(R,X##_f[2]); \ 169 R += _FP_W_TYPE_SIZE; \ 170 } \ 171 else if (X##_f[1]) \ 172 { \ 173 __FP_CLZ(R,X##_f[2]); \ 174 R += _FP_W_TYPE_SIZE*2; \ 175 } \ 176 else \ 177 { \ 178 __FP_CLZ(R,X##_f[0]); \ 179 R += _FP_W_TYPE_SIZE*3; \ 180 } \ 181 } while(0) 182 183 184 #define _FP_UNPACK_RAW_4(fs, X, val) \ 185 do { \ 186 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 187 X##_f[0] = _flo.bits.frac0; \ 188 X##_f[1] = _flo.bits.frac1; \ 189 X##_f[2] = _flo.bits.frac2; \ 190 X##_f[3] = _flo.bits.frac3; \ 191 X##_e = _flo.bits.exp; \ 192 X##_s = _flo.bits.sign; \ 193 } while (0) 194 195 #define _FP_UNPACK_RAW_4_P(fs, X, val) \ 196 do { \ 197 union _FP_UNION_##fs *_flo = \ 198 (union _FP_UNION_##fs *)(val); \ 199 \ 200 X##_f[0] = _flo->bits.frac0; \ 201 X##_f[1] = _flo->bits.frac1; \ 202 X##_f[2] = _flo->bits.frac2; \ 203 X##_f[3] = _flo->bits.frac3; \ 204 X##_e = _flo->bits.exp; \ 205 X##_s = _flo->bits.sign; \ 206 } while (0) 207 208 #define _FP_PACK_RAW_4(fs, val, X) \ 209 do { \ 210 union _FP_UNION_##fs _flo; \ 211 _flo.bits.frac0 = X##_f[0]; \ 212 _flo.bits.frac1 = X##_f[1]; \ 213 _flo.bits.frac2 = X##_f[2]; \ 214 _flo.bits.frac3 = X##_f[3]; \ 215 _flo.bits.exp = X##_e; \ 216 _flo.bits.sign = X##_s; \ 217 (val) = _flo.flt; \ 218 } while (0) 219 220 #define _FP_PACK_RAW_4_P(fs, val, X) \ 221 do { \ 222 union _FP_UNION_##fs *_flo = \ 223 (union _FP_UNION_##fs *)(val); \ 224 \ 225 _flo->bits.frac0 = X##_f[0]; \ 226 _flo->bits.frac1 = X##_f[1]; \ 227 _flo->bits.frac2 = X##_f[2]; \ 228 _flo->bits.frac3 = X##_f[3]; \ 229 _flo->bits.exp = X##_e; \ 230 _flo->bits.sign = X##_s; \ 231 } while (0) 232 233 /* 234 * Multiplication algorithms: 235 */ 236 237 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 238 239 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ 240 do { \ 241 _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 242 _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \ 243 \ 244 doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \ 245 doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ 246 doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ 247 doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ 248 doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ 249 doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ 250 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 251 _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \ 252 0,0,_FP_FRAC_WORD_8(_z,1)); \ 253 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 254 _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \ 255 _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ 256 _FP_FRAC_WORD_8(_z,1)); \ 257 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 258 _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \ 259 0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \ 260 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 261 _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \ 262 _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 263 _FP_FRAC_WORD_8(_z,2)); \ 264 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 265 _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \ 266 _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ 267 _FP_FRAC_WORD_8(_z,2)); \ 268 doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ 269 doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ 270 doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ 271 doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ 272 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 273 _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \ 274 0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \ 275 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 276 _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \ 277 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 278 _FP_FRAC_WORD_8(_z,3)); \ 279 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 280 _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \ 281 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 282 _FP_FRAC_WORD_8(_z,3)); \ 283 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 284 _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \ 285 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ 286 _FP_FRAC_WORD_8(_z,3)); \ 287 doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ 288 doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ 289 doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ 290 doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ 291 doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ 292 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 293 _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \ 294 0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \ 295 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 296 _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \ 297 _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 298 _FP_FRAC_WORD_8(_z,4)); \ 299 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 300 _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \ 301 _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ 302 _FP_FRAC_WORD_8(_z,4)); \ 303 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 304 _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \ 305 0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \ 306 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 307 _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \ 308 _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 309 _FP_FRAC_WORD_8(_z,5)); \ 310 doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ 311 __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ 312 _b_f1,_b_f0, \ 313 _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \ 314 \ 315 /* Normalize since we know where the msb of the multiplicands \ 316 were (bit B), we know that the msb of the of the product is \ 317 at either 2B or 2B-1. */ \ 318 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 319 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 320 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 321 } while (0) 322 323 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ 324 do { \ 325 _FP_FRAC_DECL_8(_z); \ 326 \ 327 mpn_mul_n(_z_f, _x_f, _y_f, 4); \ 328 \ 329 /* Normalize since we know where the msb of the multiplicands \ 330 were (bit B), we know that the msb of the of the product is \ 331 at either 2B or 2B-1. */ \ 332 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ 333 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ 334 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ 335 } while (0) 336 337 /* 338 * Helper utility for _FP_DIV_MEAT_4_udiv: 339 * pppp = m * nnn 340 */ 341 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \ 342 do { \ 343 UWtype _t; \ 344 umul_ppmm(p1,p0,m,n0); \ 345 umul_ppmm(p2,_t,m,n1); \ 346 __FP_FRAC_ADDI_2(p2,p1,_t); \ 347 umul_ppmm(p3,_t,m,n2); \ 348 __FP_FRAC_ADDI_2(p3,p2,_t); \ 349 } while (0) 350 351 /* 352 * Division algorithms: 353 */ 354 355 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ 356 do { \ 357 int _i; \ 358 _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \ 359 _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \ 360 if (_FP_FRAC_GT_4(X, Y)) \ 361 { \ 362 _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ 363 _FP_FRAC_SRL_4(X, 1); \ 364 } \ 365 else \ 366 R##_e--; \ 367 \ 368 /* Normalize, i.e. make the most significant bit of the \ 369 denominator set. */ \ 370 _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \ 371 \ 372 for (_i = 3; ; _i--) \ 373 { \ 374 if (X##_f[3] == Y##_f[3]) \ 375 { \ 376 /* This is a special case, not an optimization \ 377 (X##_f[3]/Y##_f[3] would not fit into UWtype). \ 378 As X## is guaranteed to be < Y, R##_f[_i] can be either \ 379 (UWtype)-1 or (UWtype)-2. */ \ 380 R##_f[_i] = -1; \ 381 if (!_i) \ 382 break; \ 383 __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ 384 Y##_f[2], Y##_f[1], Y##_f[0], 0, \ 385 X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ 386 _FP_FRAC_SUB_4(X, Y, X); \ 387 if (X##_f[3] > Y##_f[3]) \ 388 { \ 389 R##_f[_i] = -2; \ 390 _FP_FRAC_ADD_4(X, Y, X); \ 391 } \ 392 } \ 393 else \ 394 { \ 395 udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ 396 umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ 397 R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ 398 X##_f[2] = X##_f[1]; \ 399 X##_f[1] = X##_f[0]; \ 400 X##_f[0] = _n_f[_i]; \ 401 if (_FP_FRAC_GT_4(_m, X)) \ 402 { \ 403 R##_f[_i]--; \ 404 _FP_FRAC_ADD_4(X, Y, X); \ 405 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \ 406 { \ 407 R##_f[_i]--; \ 408 _FP_FRAC_ADD_4(X, Y, X); \ 409 } \ 410 } \ 411 _FP_FRAC_DEC_4(X, _m); \ 412 if (!_i) \ 413 { \ 414 if (!_FP_FRAC_EQ_4(X, _m)) \ 415 R##_f[0] |= _FP_WORK_STICKY; \ 416 break; \ 417 } \ 418 } \ 419 } \ 420 } while (0) 421 422 423 /* 424 * Square root algorithms: 425 * We have just one right now, maybe Newton approximation 426 * should be added for those machines where division is fast. 427 */ 428 429 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \ 430 do { \ 431 while (q) \ 432 { \ 433 T##_f[3] = S##_f[3] + q; \ 434 if (T##_f[3] <= X##_f[3]) \ 435 { \ 436 S##_f[3] = T##_f[3] + q; \ 437 X##_f[3] -= T##_f[3]; \ 438 R##_f[3] += q; \ 439 } \ 440 _FP_FRAC_SLL_4(X, 1); \ 441 q >>= 1; \ 442 } \ 443 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 444 while (q) \ 445 { \ 446 T##_f[2] = S##_f[2] + q; \ 447 T##_f[3] = S##_f[3]; \ 448 if (T##_f[3] < X##_f[3] || \ 449 (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ 450 { \ 451 S##_f[2] = T##_f[2] + q; \ 452 S##_f[3] += (T##_f[2] > S##_f[2]); \ 453 __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \ 454 T##_f[3], T##_f[2]); \ 455 R##_f[2] += q; \ 456 } \ 457 _FP_FRAC_SLL_4(X, 1); \ 458 q >>= 1; \ 459 } \ 460 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 461 while (q) \ 462 { \ 463 T##_f[1] = S##_f[1] + q; \ 464 T##_f[2] = S##_f[2]; \ 465 T##_f[3] = S##_f[3]; \ 466 if (T##_f[3] < X##_f[3] || \ 467 (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \ 468 (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \ 469 { \ 470 S##_f[1] = T##_f[1] + q; \ 471 S##_f[2] += (T##_f[1] > S##_f[1]); \ 472 S##_f[3] += (T##_f[2] > S##_f[2]); \ 473 __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \ 474 T##_f[3], T##_f[2], T##_f[1]); \ 475 R##_f[1] += q; \ 476 } \ 477 _FP_FRAC_SLL_4(X, 1); \ 478 q >>= 1; \ 479 } \ 480 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 481 while (q != _FP_WORK_ROUND) \ 482 { \ 483 T##_f[0] = S##_f[0] + q; \ 484 T##_f[1] = S##_f[1]; \ 485 T##_f[2] = S##_f[2]; \ 486 T##_f[3] = S##_f[3]; \ 487 if (_FP_FRAC_GE_4(X,T)) \ 488 { \ 489 S##_f[0] = T##_f[0] + q; \ 490 S##_f[1] += (T##_f[0] > S##_f[0]); \ 491 S##_f[2] += (T##_f[1] > S##_f[1]); \ 492 S##_f[3] += (T##_f[2] > S##_f[2]); \ 493 _FP_FRAC_DEC_4(X, T); \ 494 R##_f[0] += q; \ 495 } \ 496 _FP_FRAC_SLL_4(X, 1); \ 497 q >>= 1; \ 498 } \ 499 if (!_FP_FRAC_ZEROP_4(X)) \ 500 { \ 501 if (_FP_FRAC_GT_4(X,S)) \ 502 R##_f[0] |= _FP_WORK_ROUND; \ 503 R##_f[0] |= _FP_WORK_STICKY; \ 504 } \ 505 } while (0) 506 507 508 /* 509 * Internals 510 */ 511 512 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ 513 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) 514 515 #ifndef __FP_FRAC_ADD_3 516 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 517 do { \ 518 int _c1, _c2; \ 519 r0 = x0 + y0; \ 520 _c1 = r0 < x0; \ 521 r1 = x1 + y1; \ 522 _c2 = r1 < x1; \ 523 r1 += _c1; \ 524 _c2 |= r1 < _c1; \ 525 r2 = x2 + y2 + _c2; \ 526 } while (0) 527 #endif 528 529 #ifndef __FP_FRAC_ADD_4 530 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 531 do { \ 532 int _c1, _c2, _c3; \ 533 r0 = x0 + y0; \ 534 _c1 = r0 < x0; \ 535 r1 = x1 + y1; \ 536 _c2 = r1 < x1; \ 537 r1 += _c1; \ 538 _c2 |= r1 < _c1; \ 539 r2 = x2 + y2; \ 540 _c3 = r2 < x2; \ 541 r2 += _c2; \ 542 _c3 |= r2 < _c2; \ 543 r3 = x3 + y3 + _c3; \ 544 } while (0) 545 #endif 546 547 #ifndef __FP_FRAC_SUB_3 548 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ 549 do { \ 550 int _c1, _c2; \ 551 r0 = x0 - y0; \ 552 _c1 = r0 > x0; \ 553 r1 = x1 - y1; \ 554 _c2 = r1 > x1; \ 555 r1 -= _c1; \ 556 _c2 |= r1 > _c1; \ 557 r2 = x2 - y2 - _c2; \ 558 } while (0) 559 #endif 560 561 #ifndef __FP_FRAC_SUB_4 562 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ 563 do { \ 564 int _c1, _c2, _c3; \ 565 r0 = x0 - y0; \ 566 _c1 = r0 > x0; \ 567 r1 = x1 - y1; \ 568 _c2 = r1 > x1; \ 569 r1 -= _c1; \ 570 _c2 |= r1 > _c1; \ 571 r2 = x2 - y2; \ 572 _c3 = r2 > x2; \ 573 r2 -= _c2; \ 574 _c3 |= r2 > _c2; \ 575 r3 = x3 - y3 - _c3; \ 576 } while (0) 577 #endif 578 579 #ifndef __FP_FRAC_DEC_3 580 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \ 581 do { \ 582 UWtype _t0, _t1, _t2; \ 583 _t0 = x0, _t1 = x1, _t2 = x2; \ 584 __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ 585 } while (0) 586 #endif 587 588 #ifndef __FP_FRAC_DEC_4 589 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \ 590 do { \ 591 UWtype _t0, _t1, _t2, _t3; \ 592 _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ 593 __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \ 594 } while (0) 595 #endif 596 597 #ifndef __FP_FRAC_ADDI_4 598 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ 599 do { \ 600 UWtype _t; \ 601 _t = ((x0 += i) < i); \ 602 x1 += _t; _t = (x1 < _t); \ 603 x2 += _t; _t = (x2 < _t); \ 604 x3 += _t; \ 605 } while (0) 606 #endif 607 608 /* Convert FP values between word sizes. This appears to be more 609 * complicated than I'd have expected it to be, so these might be 610 * wrong... These macros are in any case somewhat bogus because they 611 * use information about what various FRAC_n variables look like 612 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do 613 * the ones in op-2.h and op-1.h. 614 */ 615 #define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \ 616 do { \ 617 if (S##_c != FP_CLS_NAN) \ 618 _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 619 _FP_WFRACBITS_##sfs); \ 620 else \ 621 _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 622 D##_f = S##_f[0]; \ 623 } while (0) 624 625 #define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \ 626 do { \ 627 if (S##_c != FP_CLS_NAN) \ 628 _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 629 _FP_WFRACBITS_##sfs); \ 630 else \ 631 _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 632 D##_f0 = S##_f[0]; \ 633 D##_f1 = S##_f[1]; \ 634 } while (0) 635 636 /* Assembly/disassembly for converting to/from integral types. 637 * No shifting or overflow handled here. 638 */ 639 /* Put the FP value X into r, which is an integer of size rsize. */ 640 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ 641 do { \ 642 if (rsize <= _FP_W_TYPE_SIZE) \ 643 r = X##_f[0]; \ 644 else if (rsize <= 2*_FP_W_TYPE_SIZE) \ 645 { \ 646 r = X##_f[1]; \ 647 r <<= _FP_W_TYPE_SIZE; \ 648 r += X##_f[0]; \ 649 } \ 650 else \ 651 { \ 652 /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ 653 /* and int == 4words as a single case. */ \ 654 r = X##_f[3]; \ 655 r <<= _FP_W_TYPE_SIZE; \ 656 r += X##_f[2]; \ 657 r <<= _FP_W_TYPE_SIZE; \ 658 r += X##_f[1]; \ 659 r <<= _FP_W_TYPE_SIZE; \ 660 r += X##_f[0]; \ 661 } \ 662 } while (0) 663 664 /* "No disassemble Number Five!" */ 665 /* move an integer of size rsize into X's fractional part. We rely on 666 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid 667 * having to mask the values we store into it. 668 */ 669 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ 670 do { \ 671 X##_f[0] = r; \ 672 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 673 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ 674 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ 675 } while (0) 676 677 #define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \ 678 do { \ 679 D##_f[0] = S##_f; \ 680 D##_f[1] = D##_f[2] = D##_f[3] = 0; \ 681 _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 682 } while (0) 683 684 #define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \ 685 do { \ 686 D##_f[0] = S##_f0; \ 687 D##_f[1] = S##_f1; \ 688 D##_f[2] = D##_f[3] = 0; \ 689 _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 690 } while (0) 691 692 #endif 693