1 /* 2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, 3 * derived from NetBSD M68040 FPSP functions, 4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic 5 * Package. Those parts of the code (and some later contributions) are 6 * provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file will be taken to be licensed under 14 * the Softfloat-2a license unless specifically indicated otherwise. 15 */ 16 17 /* Portions of this work are licensed under the terms of the GNU GPL, 18 * version 2 or later. See the COPYING file in the top-level directory. 19 */ 20 21 #include "qemu/osdep.h" 22 #include "softfloat.h" 23 #include "fpu/softfloat-macros.h" 24 #include "softfloat_fpsp_tables.h" 25 26 #define pi_exp 0x4000 27 #define piby2_exp 0x3FFF 28 #define pi_sig LIT64(0xc90fdaa22168c235) 29 30 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) 31 { 32 if (floatx80_is_signaling_nan(a, status)) { 33 float_raise(float_flag_invalid, status); 34 a = floatx80_silence_nan(a, status); 35 } 36 37 if (status->default_nan_mode) { 38 return floatx80_default_nan(status); 39 } 40 41 return a; 42 } 43 44 /*---------------------------------------------------------------------------- 45 | Returns the modulo remainder of the extended double-precision floating-point 46 | value `a' with respect to the corresponding value `b'. 47 *----------------------------------------------------------------------------*/ 48 49 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 50 { 51 flag aSign, zSign; 52 int32_t aExp, bExp, expDiff; 53 uint64_t aSig0, aSig1, bSig; 54 uint64_t qTemp, term0, term1; 55 56 aSig0 = extractFloatx80Frac(a); 57 aExp = extractFloatx80Exp(a); 58 aSign = extractFloatx80Sign(a); 59 bSig = extractFloatx80Frac(b); 60 bExp = extractFloatx80Exp(b); 61 62 if (aExp == 0x7FFF) { 63 if ((uint64_t) (aSig0 << 1) 64 || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { 65 return propagateFloatx80NaN(a, b, status); 66 } 67 goto invalid; 68 } 69 if (bExp == 0x7FFF) { 70 if ((uint64_t) (bSig << 1)) { 71 return propagateFloatx80NaN(a, b, status); 72 } 73 return a; 74 } 75 if (bExp == 0) { 76 if (bSig == 0) { 77 invalid: 78 float_raise(float_flag_invalid, status); 79 return floatx80_default_nan(status); 80 } 81 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 82 } 83 if (aExp == 0) { 84 if ((uint64_t) (aSig0 << 1) == 0) { 85 return a; 86 } 87 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 88 } 89 bSig |= LIT64(0x8000000000000000); 90 zSign = aSign; 91 expDiff = aExp - bExp; 92 aSig1 = 0; 93 if (expDiff < 0) { 94 return a; 95 } 96 qTemp = (bSig <= aSig0); 97 if (qTemp) { 98 aSig0 -= bSig; 99 } 100 expDiff -= 64; 101 while (0 < expDiff) { 102 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 103 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 104 mul64To128(bSig, qTemp, &term0, &term1); 105 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 106 shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); 107 expDiff -= 62; 108 } 109 expDiff += 64; 110 if (0 < expDiff) { 111 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 112 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 113 qTemp >>= 64 - expDiff; 114 mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); 115 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 116 shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); 117 while (le128(term0, term1, aSig0, aSig1)) { 118 ++qTemp; 119 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 120 } 121 } 122 return 123 normalizeRoundAndPackFloatx80( 124 80, zSign, bExp + expDiff, aSig0, aSig1, status); 125 } 126 127 /*---------------------------------------------------------------------------- 128 | Returns the mantissa of the extended double-precision floating-point 129 | value `a'. 130 *----------------------------------------------------------------------------*/ 131 132 floatx80 floatx80_getman(floatx80 a, float_status *status) 133 { 134 flag aSign; 135 int32_t aExp; 136 uint64_t aSig; 137 138 aSig = extractFloatx80Frac(a); 139 aExp = extractFloatx80Exp(a); 140 aSign = extractFloatx80Sign(a); 141 142 if (aExp == 0x7FFF) { 143 if ((uint64_t) (aSig << 1)) { 144 return propagateFloatx80NaNOneArg(a , status); 145 } 146 float_raise(float_flag_invalid , status); 147 return floatx80_default_nan(status); 148 } 149 150 if (aExp == 0) { 151 if (aSig == 0) { 152 return packFloatx80(aSign, 0, 0); 153 } 154 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 155 } 156 157 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 158 0x3FFF, aSig, 0, status); 159 } 160 161 /*---------------------------------------------------------------------------- 162 | Returns the exponent of the extended double-precision floating-point 163 | value `a' as an extended double-precision value. 164 *----------------------------------------------------------------------------*/ 165 166 floatx80 floatx80_getexp(floatx80 a, float_status *status) 167 { 168 flag aSign; 169 int32_t aExp; 170 uint64_t aSig; 171 172 aSig = extractFloatx80Frac(a); 173 aExp = extractFloatx80Exp(a); 174 aSign = extractFloatx80Sign(a); 175 176 if (aExp == 0x7FFF) { 177 if ((uint64_t) (aSig << 1)) { 178 return propagateFloatx80NaNOneArg(a , status); 179 } 180 float_raise(float_flag_invalid , status); 181 return floatx80_default_nan(status); 182 } 183 184 if (aExp == 0) { 185 if (aSig == 0) { 186 return packFloatx80(aSign, 0, 0); 187 } 188 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 189 } 190 191 return int32_to_floatx80(aExp - 0x3FFF, status); 192 } 193 194 /*---------------------------------------------------------------------------- 195 | Scales extended double-precision floating-point value in operand `a' by 196 | value `b'. The function truncates the value in the second operand 'b' to 197 | an integral value and adds that value to the exponent of the operand 'a'. 198 | The operation performed according to the IEC/IEEE Standard for Binary 199 | Floating-Point Arithmetic. 200 *----------------------------------------------------------------------------*/ 201 202 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) 203 { 204 flag aSign, bSign; 205 int32_t aExp, bExp, shiftCount; 206 uint64_t aSig, bSig; 207 208 aSig = extractFloatx80Frac(a); 209 aExp = extractFloatx80Exp(a); 210 aSign = extractFloatx80Sign(a); 211 bSig = extractFloatx80Frac(b); 212 bExp = extractFloatx80Exp(b); 213 bSign = extractFloatx80Sign(b); 214 215 if (bExp == 0x7FFF) { 216 if ((uint64_t) (bSig << 1) || 217 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { 218 return propagateFloatx80NaN(a, b, status); 219 } 220 float_raise(float_flag_invalid , status); 221 return floatx80_default_nan(status); 222 } 223 if (aExp == 0x7FFF) { 224 if ((uint64_t) (aSig << 1)) { 225 return propagateFloatx80NaN(a, b, status); 226 } 227 return packFloatx80(aSign, floatx80_infinity.high, 228 floatx80_infinity.low); 229 } 230 if (aExp == 0) { 231 if (aSig == 0) { 232 return packFloatx80(aSign, 0, 0); 233 } 234 if (bExp < 0x3FFF) { 235 return a; 236 } 237 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 238 } 239 240 if (bExp < 0x3FFF) { 241 return a; 242 } 243 244 if (0x400F < bExp) { 245 aExp = bSign ? -0x6001 : 0xE000; 246 return roundAndPackFloatx80(status->floatx80_rounding_precision, 247 aSign, aExp, aSig, 0, status); 248 } 249 250 shiftCount = 0x403E - bExp; 251 bSig >>= shiftCount; 252 aExp = bSign ? (aExp - bSig) : (aExp + bSig); 253 254 return roundAndPackFloatx80(status->floatx80_rounding_precision, 255 aSign, aExp, aSig, 0, status); 256 } 257 258 floatx80 floatx80_move(floatx80 a, float_status *status) 259 { 260 flag aSign; 261 int32_t aExp; 262 uint64_t aSig; 263 264 aSig = extractFloatx80Frac(a); 265 aExp = extractFloatx80Exp(a); 266 aSign = extractFloatx80Sign(a); 267 268 if (aExp == 0x7FFF) { 269 if ((uint64_t)(aSig << 1)) { 270 return propagateFloatx80NaNOneArg(a, status); 271 } 272 return a; 273 } 274 if (aExp == 0) { 275 if (aSig == 0) { 276 return a; 277 } 278 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 279 aSign, aExp, aSig, 0, status); 280 } 281 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 282 aExp, aSig, 0, status); 283 } 284 285 /*---------------------------------------------------------------------------- 286 | Algorithms for transcendental functions supported by MC68881 and MC68882 287 | mathematical coprocessors. The functions are derived from FPSP library. 288 *----------------------------------------------------------------------------*/ 289 290 #define one_exp 0x3FFF 291 #define one_sig LIT64(0x8000000000000000) 292 293 /*---------------------------------------------------------------------------- 294 | Function for compactifying extended double-precision floating point values. 295 *----------------------------------------------------------------------------*/ 296 297 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) 298 { 299 return (aExp << 16) | (aSig >> 48); 300 } 301 302 /*---------------------------------------------------------------------------- 303 | Log base e of x plus 1 304 *----------------------------------------------------------------------------*/ 305 306 floatx80 floatx80_lognp1(floatx80 a, float_status *status) 307 { 308 flag aSign; 309 int32_t aExp; 310 uint64_t aSig, fSig; 311 312 int8_t user_rnd_mode, user_rnd_prec; 313 314 int32_t compact, j, k; 315 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 316 317 aSig = extractFloatx80Frac(a); 318 aExp = extractFloatx80Exp(a); 319 aSign = extractFloatx80Sign(a); 320 321 if (aExp == 0x7FFF) { 322 if ((uint64_t) (aSig << 1)) { 323 propagateFloatx80NaNOneArg(a, status); 324 } 325 if (aSign) { 326 float_raise(float_flag_invalid, status); 327 return floatx80_default_nan(status); 328 } 329 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); 330 } 331 332 if (aExp == 0 && aSig == 0) { 333 return packFloatx80(aSign, 0, 0); 334 } 335 336 if (aSign && aExp >= one_exp) { 337 if (aExp == one_exp && aSig == one_sig) { 338 float_raise(float_flag_divbyzero, status); 339 return packFloatx80(aSign, floatx80_infinity.high, 340 floatx80_infinity.low); 341 } 342 float_raise(float_flag_invalid, status); 343 return floatx80_default_nan(status); 344 } 345 346 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { 347 /* <= min threshold */ 348 float_raise(float_flag_inexact, status); 349 return floatx80_move(a, status); 350 } 351 352 user_rnd_mode = status->float_rounding_mode; 353 user_rnd_prec = status->floatx80_rounding_precision; 354 status->float_rounding_mode = float_round_nearest_even; 355 status->floatx80_rounding_precision = 80; 356 357 compact = floatx80_make_compact(aExp, aSig); 358 359 fp0 = a; /* Z */ 360 fp1 = a; 361 362 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 363 status), status); /* X = (1+Z) */ 364 365 aExp = extractFloatx80Exp(fp0); 366 aSig = extractFloatx80Frac(fp0); 367 368 compact = floatx80_make_compact(aExp, aSig); 369 370 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { 371 /* |X| < 1/2 or |X| > 3/2 */ 372 k = aExp - 0x3FFF; 373 fp1 = int32_to_floatx80(k, status); 374 375 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 376 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 377 378 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 379 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 380 381 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 382 383 lp1cont1: 384 /* LP1CONT1 */ 385 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 386 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 387 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 388 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 389 390 fp3 = fp2; 391 fp1 = fp2; 392 393 fp1 = floatx80_mul(fp1, float64_to_floatx80( 394 make_float64(0x3FC2499AB5E4040B), status), 395 status); /* V*A6 */ 396 fp2 = floatx80_mul(fp2, float64_to_floatx80( 397 make_float64(0xBFC555B5848CB7DB), status), 398 status); /* V*A5 */ 399 fp1 = floatx80_add(fp1, float64_to_floatx80( 400 make_float64(0x3FC99999987D8730), status), 401 status); /* A4+V*A6 */ 402 fp2 = floatx80_add(fp2, float64_to_floatx80( 403 make_float64(0xBFCFFFFFFF6F7E97), status), 404 status); /* A3+V*A5 */ 405 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 406 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 407 fp1 = floatx80_add(fp1, float64_to_floatx80( 408 make_float64(0x3FD55555555555A4), status), 409 status); /* A2+V*(A4+V*A6) */ 410 fp2 = floatx80_add(fp2, float64_to_floatx80( 411 make_float64(0xBFE0000000000008), status), 412 status); /* A1+V*(A3+V*A5) */ 413 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 414 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 415 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 416 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 417 418 fp1 = floatx80_add(fp1, log_tbl[j + 1], 419 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 420 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 421 422 status->float_rounding_mode = user_rnd_mode; 423 status->floatx80_rounding_precision = user_rnd_prec; 424 425 a = floatx80_add(fp0, klog2, status); 426 427 float_raise(float_flag_inexact, status); 428 429 return a; 430 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 431 /* |X| < 1/16 or |X| > -1/16 */ 432 /* LP1CARE */ 433 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 434 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 435 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 436 437 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ 438 /* KISZERO */ 439 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), 440 status), f, status); /* 1-F */ 441 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ 442 fp1 = packFloatx80(0, 0, 0); /* K = 0 */ 443 } else { 444 /* KISNEG */ 445 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), 446 status), f, status); /* 2-F */ 447 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ 448 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ 449 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ 450 } 451 goto lp1cont1; 452 } else { 453 /* LP1ONE16 */ 454 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ 455 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 456 status), status); /* FP0 IS 1+X */ 457 458 /* LP1CONT2 */ 459 fp1 = floatx80_div(fp1, fp0, status); /* U */ 460 saveu = fp1; 461 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 462 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 463 464 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 465 status); /* B5 */ 466 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 467 status); /* B4 */ 468 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 469 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 470 fp3 = floatx80_add(fp3, float64_to_floatx80( 471 make_float64(0x3F624924928BCCFF), status), 472 status); /* B3+W*B5 */ 473 fp2 = floatx80_add(fp2, float64_to_floatx80( 474 make_float64(0x3F899999999995EC), status), 475 status); /* B2+W*B4 */ 476 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 477 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 478 fp1 = floatx80_add(fp1, float64_to_floatx80( 479 make_float64(0x3FB5555555555555), status), 480 status); /* B1+W*(B3+W*B5) */ 481 482 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 483 fp1 = floatx80_add(fp1, fp2, 484 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 485 fp0 = floatx80_mul(fp0, fp1, 486 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 487 488 status->float_rounding_mode = user_rnd_mode; 489 status->floatx80_rounding_precision = user_rnd_prec; 490 491 a = floatx80_add(fp0, saveu, status); 492 493 /*if (!floatx80_is_zero(a)) { */ 494 float_raise(float_flag_inexact, status); 495 /*} */ 496 497 return a; 498 } 499 } 500 501 /*---------------------------------------------------------------------------- 502 | Log base e 503 *----------------------------------------------------------------------------*/ 504 505 floatx80 floatx80_logn(floatx80 a, float_status *status) 506 { 507 flag aSign; 508 int32_t aExp; 509 uint64_t aSig, fSig; 510 511 int8_t user_rnd_mode, user_rnd_prec; 512 513 int32_t compact, j, k, adjk; 514 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 515 516 aSig = extractFloatx80Frac(a); 517 aExp = extractFloatx80Exp(a); 518 aSign = extractFloatx80Sign(a); 519 520 if (aExp == 0x7FFF) { 521 if ((uint64_t) (aSig << 1)) { 522 propagateFloatx80NaNOneArg(a, status); 523 } 524 if (aSign == 0) { 525 return packFloatx80(0, floatx80_infinity.high, 526 floatx80_infinity.low); 527 } 528 } 529 530 adjk = 0; 531 532 if (aExp == 0) { 533 if (aSig == 0) { /* zero */ 534 float_raise(float_flag_divbyzero, status); 535 return packFloatx80(1, floatx80_infinity.high, 536 floatx80_infinity.low); 537 } 538 if ((aSig & one_sig) == 0) { /* denormal */ 539 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 540 adjk = -100; 541 aExp += 100; 542 a = packFloatx80(aSign, aExp, aSig); 543 } 544 } 545 546 if (aSign) { 547 float_raise(float_flag_invalid, status); 548 return floatx80_default_nan(status); 549 } 550 551 user_rnd_mode = status->float_rounding_mode; 552 user_rnd_prec = status->floatx80_rounding_precision; 553 status->float_rounding_mode = float_round_nearest_even; 554 status->floatx80_rounding_precision = 80; 555 556 compact = floatx80_make_compact(aExp, aSig); 557 558 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 559 /* |X| < 15/16 or |X| > 17/16 */ 560 k = aExp - 0x3FFF; 561 k += adjk; 562 fp1 = int32_to_floatx80(k, status); 563 564 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 565 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 566 567 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 568 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 569 570 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 571 572 /* LP1CONT1 */ 573 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 574 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 575 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 576 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 577 578 fp3 = fp2; 579 fp1 = fp2; 580 581 fp1 = floatx80_mul(fp1, float64_to_floatx80( 582 make_float64(0x3FC2499AB5E4040B), status), 583 status); /* V*A6 */ 584 fp2 = floatx80_mul(fp2, float64_to_floatx80( 585 make_float64(0xBFC555B5848CB7DB), status), 586 status); /* V*A5 */ 587 fp1 = floatx80_add(fp1, float64_to_floatx80( 588 make_float64(0x3FC99999987D8730), status), 589 status); /* A4+V*A6 */ 590 fp2 = floatx80_add(fp2, float64_to_floatx80( 591 make_float64(0xBFCFFFFFFF6F7E97), status), 592 status); /* A3+V*A5 */ 593 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 594 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 595 fp1 = floatx80_add(fp1, float64_to_floatx80( 596 make_float64(0x3FD55555555555A4), status), 597 status); /* A2+V*(A4+V*A6) */ 598 fp2 = floatx80_add(fp2, float64_to_floatx80( 599 make_float64(0xBFE0000000000008), status), 600 status); /* A1+V*(A3+V*A5) */ 601 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 602 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 603 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 604 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 605 606 fp1 = floatx80_add(fp1, log_tbl[j + 1], 607 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 608 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 609 610 status->float_rounding_mode = user_rnd_mode; 611 status->floatx80_rounding_precision = user_rnd_prec; 612 613 a = floatx80_add(fp0, klog2, status); 614 615 float_raise(float_flag_inexact, status); 616 617 return a; 618 } else { /* |X-1| >= 1/16 */ 619 fp0 = a; 620 fp1 = a; 621 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), 622 status), status); /* FP1 IS X-1 */ 623 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 624 status), status); /* FP0 IS X+1 */ 625 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ 626 627 /* LP1CONT2 */ 628 fp1 = floatx80_div(fp1, fp0, status); /* U */ 629 saveu = fp1; 630 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 631 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 632 633 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 634 status); /* B5 */ 635 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 636 status); /* B4 */ 637 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 638 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 639 fp3 = floatx80_add(fp3, float64_to_floatx80( 640 make_float64(0x3F624924928BCCFF), status), 641 status); /* B3+W*B5 */ 642 fp2 = floatx80_add(fp2, float64_to_floatx80( 643 make_float64(0x3F899999999995EC), status), 644 status); /* B2+W*B4 */ 645 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 646 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 647 fp1 = floatx80_add(fp1, float64_to_floatx80( 648 make_float64(0x3FB5555555555555), status), 649 status); /* B1+W*(B3+W*B5) */ 650 651 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 652 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 653 fp0 = floatx80_mul(fp0, fp1, 654 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 655 656 status->float_rounding_mode = user_rnd_mode; 657 status->floatx80_rounding_precision = user_rnd_prec; 658 659 a = floatx80_add(fp0, saveu, status); 660 661 /*if (!floatx80_is_zero(a)) { */ 662 float_raise(float_flag_inexact, status); 663 /*} */ 664 665 return a; 666 } 667 } 668 669 /*---------------------------------------------------------------------------- 670 | Log base 10 671 *----------------------------------------------------------------------------*/ 672 673 floatx80 floatx80_log10(floatx80 a, float_status *status) 674 { 675 flag aSign; 676 int32_t aExp; 677 uint64_t aSig; 678 679 int8_t user_rnd_mode, user_rnd_prec; 680 681 floatx80 fp0, fp1; 682 683 aSig = extractFloatx80Frac(a); 684 aExp = extractFloatx80Exp(a); 685 aSign = extractFloatx80Sign(a); 686 687 if (aExp == 0x7FFF) { 688 if ((uint64_t) (aSig << 1)) { 689 propagateFloatx80NaNOneArg(a, status); 690 } 691 if (aSign == 0) { 692 return packFloatx80(0, floatx80_infinity.high, 693 floatx80_infinity.low); 694 } 695 } 696 697 if (aExp == 0 && aSig == 0) { 698 float_raise(float_flag_divbyzero, status); 699 return packFloatx80(1, floatx80_infinity.high, 700 floatx80_infinity.low); 701 } 702 703 if (aSign) { 704 float_raise(float_flag_invalid, status); 705 return floatx80_default_nan(status); 706 } 707 708 user_rnd_mode = status->float_rounding_mode; 709 user_rnd_prec = status->floatx80_rounding_precision; 710 status->float_rounding_mode = float_round_nearest_even; 711 status->floatx80_rounding_precision = 80; 712 713 fp0 = floatx80_logn(a, status); 714 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */ 715 716 status->float_rounding_mode = user_rnd_mode; 717 status->floatx80_rounding_precision = user_rnd_prec; 718 719 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ 720 721 float_raise(float_flag_inexact, status); 722 723 return a; 724 } 725 726 /*---------------------------------------------------------------------------- 727 | Log base 2 728 *----------------------------------------------------------------------------*/ 729 730 floatx80 floatx80_log2(floatx80 a, float_status *status) 731 { 732 flag aSign; 733 int32_t aExp; 734 uint64_t aSig; 735 736 int8_t user_rnd_mode, user_rnd_prec; 737 738 floatx80 fp0, fp1; 739 740 aSig = extractFloatx80Frac(a); 741 aExp = extractFloatx80Exp(a); 742 aSign = extractFloatx80Sign(a); 743 744 if (aExp == 0x7FFF) { 745 if ((uint64_t) (aSig << 1)) { 746 propagateFloatx80NaNOneArg(a, status); 747 } 748 if (aSign == 0) { 749 return packFloatx80(0, floatx80_infinity.high, 750 floatx80_infinity.low); 751 } 752 } 753 754 if (aExp == 0) { 755 if (aSig == 0) { 756 float_raise(float_flag_divbyzero, status); 757 return packFloatx80(1, floatx80_infinity.high, 758 floatx80_infinity.low); 759 } 760 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 761 } 762 763 if (aSign) { 764 float_raise(float_flag_invalid, status); 765 return floatx80_default_nan(status); 766 } 767 768 user_rnd_mode = status->float_rounding_mode; 769 user_rnd_prec = status->floatx80_rounding_precision; 770 status->float_rounding_mode = float_round_nearest_even; 771 status->floatx80_rounding_precision = 80; 772 773 if (aSig == one_sig) { /* X is 2^k */ 774 status->float_rounding_mode = user_rnd_mode; 775 status->floatx80_rounding_precision = user_rnd_prec; 776 777 a = int32_to_floatx80(aExp - 0x3FFF, status); 778 } else { 779 fp0 = floatx80_logn(a, status); 780 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */ 781 782 status->float_rounding_mode = user_rnd_mode; 783 status->floatx80_rounding_precision = user_rnd_prec; 784 785 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ 786 } 787 788 float_raise(float_flag_inexact, status); 789 790 return a; 791 } 792 793 /*---------------------------------------------------------------------------- 794 | e to x 795 *----------------------------------------------------------------------------*/ 796 797 floatx80 floatx80_etox(floatx80 a, float_status *status) 798 { 799 flag aSign; 800 int32_t aExp; 801 uint64_t aSig; 802 803 int8_t user_rnd_mode, user_rnd_prec; 804 805 int32_t compact, n, j, k, m, m1; 806 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; 807 flag adjflag; 808 809 aSig = extractFloatx80Frac(a); 810 aExp = extractFloatx80Exp(a); 811 aSign = extractFloatx80Sign(a); 812 813 if (aExp == 0x7FFF) { 814 if ((uint64_t) (aSig << 1)) { 815 return propagateFloatx80NaNOneArg(a, status); 816 } 817 if (aSign) { 818 return packFloatx80(0, 0, 0); 819 } 820 return packFloatx80(0, floatx80_infinity.high, 821 floatx80_infinity.low); 822 } 823 824 if (aExp == 0 && aSig == 0) { 825 return packFloatx80(0, one_exp, one_sig); 826 } 827 828 user_rnd_mode = status->float_rounding_mode; 829 user_rnd_prec = status->floatx80_rounding_precision; 830 status->float_rounding_mode = float_round_nearest_even; 831 status->floatx80_rounding_precision = 80; 832 833 adjflag = 0; 834 835 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ 836 compact = floatx80_make_compact(aExp, aSig); 837 838 if (compact < 0x400CB167) { /* |X| < 16380 log2 */ 839 fp0 = a; 840 fp1 = a; 841 fp0 = floatx80_mul(fp0, float32_to_floatx80( 842 make_float32(0x42B8AA3B), status), 843 status); /* 64/log2 * X */ 844 adjflag = 0; 845 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 846 fp0 = int32_to_floatx80(n, status); 847 848 j = n & 0x3F; /* J = N mod 64 */ 849 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 850 if (n < 0 && j) { 851 /* arithmetic right shift is division and 852 * round towards minus infinity 853 */ 854 m--; 855 } 856 m += 0x3FFF; /* biased exponent of 2^(M) */ 857 858 expcont1: 859 fp2 = fp0; /* N */ 860 fp0 = floatx80_mul(fp0, float32_to_floatx80( 861 make_float32(0xBC317218), status), 862 status); /* N * L1, L1 = lead(-log2/64) */ 863 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 864 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 865 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 866 fp0 = floatx80_add(fp0, fp2, status); /* R */ 867 868 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 869 fp2 = float32_to_floatx80(make_float32(0x3AB60B70), 870 status); /* A5 */ 871 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ 872 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), 873 status), fp1, 874 status); /* fp3 is S*A4 */ 875 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( 876 0x3FA5555555554431), status), 877 status); /* fp2 is A3+S*A5 */ 878 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( 879 0x3FC5555555554018), status), 880 status); /* fp3 is A2+S*A4 */ 881 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ 882 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ 883 fp2 = floatx80_add(fp2, float32_to_floatx80( 884 make_float32(0x3F000000), status), 885 status); /* fp2 is A1+S*(A3+S*A5) */ 886 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ 887 fp2 = floatx80_mul(fp2, fp1, 888 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ 889 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ 890 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 891 892 fp1 = exp_tbl[j]; 893 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ 894 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), 895 status); /* accurate 2^(J/64) */ 896 fp0 = floatx80_add(fp0, fp1, 897 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ 898 899 scale = packFloatx80(0, m, one_sig); 900 if (adjflag) { 901 adjscale = packFloatx80(0, m1, one_sig); 902 fp0 = floatx80_mul(fp0, adjscale, status); 903 } 904 905 status->float_rounding_mode = user_rnd_mode; 906 status->floatx80_rounding_precision = user_rnd_prec; 907 908 a = floatx80_mul(fp0, scale, status); 909 910 float_raise(float_flag_inexact, status); 911 912 return a; 913 } else { /* |X| >= 16380 log2 */ 914 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ 915 status->float_rounding_mode = user_rnd_mode; 916 status->floatx80_rounding_precision = user_rnd_prec; 917 if (aSign) { 918 a = roundAndPackFloatx80( 919 status->floatx80_rounding_precision, 920 0, -0x1000, aSig, 0, status); 921 } else { 922 a = roundAndPackFloatx80( 923 status->floatx80_rounding_precision, 924 0, 0x8000, aSig, 0, status); 925 } 926 float_raise(float_flag_inexact, status); 927 928 return a; 929 } else { 930 fp0 = a; 931 fp1 = a; 932 fp0 = floatx80_mul(fp0, float32_to_floatx80( 933 make_float32(0x42B8AA3B), status), 934 status); /* 64/log2 * X */ 935 adjflag = 1; 936 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 937 fp0 = int32_to_floatx80(n, status); 938 939 j = n & 0x3F; /* J = N mod 64 */ 940 /* NOTE: this is really arithmetic right shift by 6 */ 941 k = n / 64; 942 if (n < 0 && j) { 943 /* arithmetic right shift is division and 944 * round towards minus infinity 945 */ 946 k--; 947 } 948 /* NOTE: this is really arithmetic right shift by 1 */ 949 m1 = k / 2; 950 if (k < 0 && (k & 1)) { 951 /* arithmetic right shift is division and 952 * round towards minus infinity 953 */ 954 m1--; 955 } 956 m = k - m1; 957 m1 += 0x3FFF; /* biased exponent of 2^(M1) */ 958 m += 0x3FFF; /* biased exponent of 2^(M) */ 959 960 goto expcont1; 961 } 962 } 963 } else { /* |X| < 2^(-65) */ 964 status->float_rounding_mode = user_rnd_mode; 965 status->floatx80_rounding_precision = user_rnd_prec; 966 967 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), 968 status), status); /* 1 + X */ 969 970 float_raise(float_flag_inexact, status); 971 972 return a; 973 } 974 } 975 976 /*---------------------------------------------------------------------------- 977 | 2 to x 978 *----------------------------------------------------------------------------*/ 979 980 floatx80 floatx80_twotox(floatx80 a, float_status *status) 981 { 982 flag aSign; 983 int32_t aExp; 984 uint64_t aSig; 985 986 int8_t user_rnd_mode, user_rnd_prec; 987 988 int32_t compact, n, j, l, m, m1; 989 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 990 991 aSig = extractFloatx80Frac(a); 992 aExp = extractFloatx80Exp(a); 993 aSign = extractFloatx80Sign(a); 994 995 if (aExp == 0x7FFF) { 996 if ((uint64_t) (aSig << 1)) { 997 return propagateFloatx80NaNOneArg(a, status); 998 } 999 if (aSign) { 1000 return packFloatx80(0, 0, 0); 1001 } 1002 return packFloatx80(0, floatx80_infinity.high, 1003 floatx80_infinity.low); 1004 } 1005 1006 if (aExp == 0 && aSig == 0) { 1007 return packFloatx80(0, one_exp, one_sig); 1008 } 1009 1010 user_rnd_mode = status->float_rounding_mode; 1011 user_rnd_prec = status->floatx80_rounding_precision; 1012 status->float_rounding_mode = float_round_nearest_even; 1013 status->floatx80_rounding_precision = 80; 1014 1015 fp0 = a; 1016 1017 compact = floatx80_make_compact(aExp, aSig); 1018 1019 if (compact < 0x3FB98000 || compact > 0x400D80C0) { 1020 /* |X| > 16480 or |X| < 2^(-70) */ 1021 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1022 status->float_rounding_mode = user_rnd_mode; 1023 status->floatx80_rounding_precision = user_rnd_prec; 1024 1025 if (aSign) { 1026 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1027 0, -0x1000, aSig, 0, status); 1028 } else { 1029 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1030 0, 0x8000, aSig, 0, status); 1031 } 1032 } else { /* |X| < 2^(-70) */ 1033 status->float_rounding_mode = user_rnd_mode; 1034 status->floatx80_rounding_precision = user_rnd_prec; 1035 1036 a = floatx80_add(fp0, float32_to_floatx80( 1037 make_float32(0x3F800000), status), 1038 status); /* 1 + X */ 1039 1040 float_raise(float_flag_inexact, status); 1041 1042 return a; 1043 } 1044 } else { /* 2^(-70) <= |X| <= 16480 */ 1045 fp1 = fp0; /* X */ 1046 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1047 make_float32(0x42800000), status), 1048 status); /* X * 64 */ 1049 n = floatx80_to_int32(fp1, status); 1050 fp1 = int32_to_floatx80(n, status); 1051 j = n & 0x3F; 1052 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1053 if (n < 0 && j) { 1054 /* arithmetic right shift is division and 1055 * round towards minus infinity 1056 */ 1057 l--; 1058 } 1059 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1060 if (l < 0 && (l & 1)) { 1061 /* arithmetic right shift is division and 1062 * round towards minus infinity 1063 */ 1064 m--; 1065 } 1066 m1 = l - m; 1067 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1068 1069 adjfact = packFloatx80(0, m1, one_sig); 1070 fact1 = exp2_tbl[j]; 1071 fact1.high += m; 1072 fact2.high = exp2_tbl2[j] >> 16; 1073 fact2.high += m; 1074 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1075 fact2.low <<= 48; 1076 1077 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1078 make_float32(0x3C800000), status), 1079 status); /* (1/64)*N */ 1080 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ 1081 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */ 1082 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1083 1084 /* EXPR */ 1085 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1086 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1087 status); /* A5 */ 1088 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1089 status); /* A4 */ 1090 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1091 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1092 fp2 = floatx80_add(fp2, float64_to_floatx80( 1093 make_float64(0x3FA5555555554CC1), status), 1094 status); /* A3+S*A5 */ 1095 fp3 = floatx80_add(fp3, float64_to_floatx80( 1096 make_float64(0x3FC5555555554A54), status), 1097 status); /* A2+S*A4 */ 1098 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1099 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1100 fp2 = floatx80_add(fp2, float64_to_floatx80( 1101 make_float64(0x3FE0000000000000), status), 1102 status); /* A1+S*(A3+S*A5) */ 1103 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1104 1105 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1106 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1107 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1108 1109 fp0 = floatx80_mul(fp0, fact1, status); 1110 fp0 = floatx80_add(fp0, fact2, status); 1111 fp0 = floatx80_add(fp0, fact1, status); 1112 1113 status->float_rounding_mode = user_rnd_mode; 1114 status->floatx80_rounding_precision = user_rnd_prec; 1115 1116 a = floatx80_mul(fp0, adjfact, status); 1117 1118 float_raise(float_flag_inexact, status); 1119 1120 return a; 1121 } 1122 } 1123 1124 /*---------------------------------------------------------------------------- 1125 | 10 to x 1126 *----------------------------------------------------------------------------*/ 1127 1128 floatx80 floatx80_tentox(floatx80 a, float_status *status) 1129 { 1130 flag aSign; 1131 int32_t aExp; 1132 uint64_t aSig; 1133 1134 int8_t user_rnd_mode, user_rnd_prec; 1135 1136 int32_t compact, n, j, l, m, m1; 1137 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 1138 1139 aSig = extractFloatx80Frac(a); 1140 aExp = extractFloatx80Exp(a); 1141 aSign = extractFloatx80Sign(a); 1142 1143 if (aExp == 0x7FFF) { 1144 if ((uint64_t) (aSig << 1)) { 1145 return propagateFloatx80NaNOneArg(a, status); 1146 } 1147 if (aSign) { 1148 return packFloatx80(0, 0, 0); 1149 } 1150 return packFloatx80(0, floatx80_infinity.high, 1151 floatx80_infinity.low); 1152 } 1153 1154 if (aExp == 0 && aSig == 0) { 1155 return packFloatx80(0, one_exp, one_sig); 1156 } 1157 1158 user_rnd_mode = status->float_rounding_mode; 1159 user_rnd_prec = status->floatx80_rounding_precision; 1160 status->float_rounding_mode = float_round_nearest_even; 1161 status->floatx80_rounding_precision = 80; 1162 1163 fp0 = a; 1164 1165 compact = floatx80_make_compact(aExp, aSig); 1166 1167 if (compact < 0x3FB98000 || compact > 0x400B9B07) { 1168 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ 1169 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1170 status->float_rounding_mode = user_rnd_mode; 1171 status->floatx80_rounding_precision = user_rnd_prec; 1172 1173 if (aSign) { 1174 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1175 0, -0x1000, aSig, 0, status); 1176 } else { 1177 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1178 0, 0x8000, aSig, 0, status); 1179 } 1180 } else { /* |X| < 2^(-70) */ 1181 status->float_rounding_mode = user_rnd_mode; 1182 status->floatx80_rounding_precision = user_rnd_prec; 1183 1184 a = floatx80_add(fp0, float32_to_floatx80( 1185 make_float32(0x3F800000), status), 1186 status); /* 1 + X */ 1187 1188 float_raise(float_flag_inexact, status); 1189 1190 return a; 1191 } 1192 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ 1193 fp1 = fp0; /* X */ 1194 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1195 make_float64(0x406A934F0979A371), 1196 status), status); /* X*64*LOG10/LOG2 */ 1197 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ 1198 fp1 = int32_to_floatx80(n, status); 1199 1200 j = n & 0x3F; 1201 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1202 if (n < 0 && j) { 1203 /* arithmetic right shift is division and 1204 * round towards minus infinity 1205 */ 1206 l--; 1207 } 1208 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1209 if (l < 0 && (l & 1)) { 1210 /* arithmetic right shift is division and 1211 * round towards minus infinity 1212 */ 1213 m--; 1214 } 1215 m1 = l - m; 1216 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1217 1218 adjfact = packFloatx80(0, m1, one_sig); 1219 fact1 = exp2_tbl[j]; 1220 fact1.high += m; 1221 fact2.high = exp2_tbl2[j] >> 16; 1222 fact2.high += m; 1223 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1224 fact2.low <<= 48; 1225 1226 fp2 = fp1; /* N */ 1227 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1228 make_float64(0x3F734413509F8000), status), 1229 status); /* N*(LOG2/64LOG10)_LEAD */ 1230 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); 1231 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ 1232 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ 1233 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ 1234 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */ 1235 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1236 1237 /* EXPR */ 1238 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1239 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1240 status); /* A5 */ 1241 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1242 status); /* A4 */ 1243 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1244 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1245 fp2 = floatx80_add(fp2, float64_to_floatx80( 1246 make_float64(0x3FA5555555554CC1), status), 1247 status); /* A3+S*A5 */ 1248 fp3 = floatx80_add(fp3, float64_to_floatx80( 1249 make_float64(0x3FC5555555554A54), status), 1250 status); /* A2+S*A4 */ 1251 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1252 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1253 fp2 = floatx80_add(fp2, float64_to_floatx80( 1254 make_float64(0x3FE0000000000000), status), 1255 status); /* A1+S*(A3+S*A5) */ 1256 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1257 1258 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1259 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1260 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1261 1262 fp0 = floatx80_mul(fp0, fact1, status); 1263 fp0 = floatx80_add(fp0, fact2, status); 1264 fp0 = floatx80_add(fp0, fact1, status); 1265 1266 status->float_rounding_mode = user_rnd_mode; 1267 status->floatx80_rounding_precision = user_rnd_prec; 1268 1269 a = floatx80_mul(fp0, adjfact, status); 1270 1271 float_raise(float_flag_inexact, status); 1272 1273 return a; 1274 } 1275 } 1276 1277 /*---------------------------------------------------------------------------- 1278 | Tangent 1279 *----------------------------------------------------------------------------*/ 1280 1281 floatx80 floatx80_tan(floatx80 a, float_status *status) 1282 { 1283 flag aSign, xSign; 1284 int32_t aExp, xExp; 1285 uint64_t aSig, xSig; 1286 1287 int8_t user_rnd_mode, user_rnd_prec; 1288 1289 int32_t compact, l, n, j; 1290 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; 1291 float32 twoto63; 1292 flag endflag; 1293 1294 aSig = extractFloatx80Frac(a); 1295 aExp = extractFloatx80Exp(a); 1296 aSign = extractFloatx80Sign(a); 1297 1298 if (aExp == 0x7FFF) { 1299 if ((uint64_t) (aSig << 1)) { 1300 return propagateFloatx80NaNOneArg(a, status); 1301 } 1302 float_raise(float_flag_invalid, status); 1303 return floatx80_default_nan(status); 1304 } 1305 1306 if (aExp == 0 && aSig == 0) { 1307 return packFloatx80(aSign, 0, 0); 1308 } 1309 1310 user_rnd_mode = status->float_rounding_mode; 1311 user_rnd_prec = status->floatx80_rounding_precision; 1312 status->float_rounding_mode = float_round_nearest_even; 1313 status->floatx80_rounding_precision = 80; 1314 1315 compact = floatx80_make_compact(aExp, aSig); 1316 1317 fp0 = a; 1318 1319 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1320 /* 2^(-40) > |X| > 15 PI */ 1321 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1322 /* REDUCEX */ 1323 fp1 = packFloatx80(0, 0, 0); 1324 if (compact == 0x7FFEFFFF) { 1325 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1326 LIT64(0xC90FDAA200000000)); 1327 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1328 LIT64(0x85A308D300000000)); 1329 fp0 = floatx80_add(fp0, twopi1, status); 1330 fp1 = fp0; 1331 fp0 = floatx80_add(fp0, twopi2, status); 1332 fp1 = floatx80_sub(fp1, fp0, status); 1333 fp1 = floatx80_add(fp1, twopi2, status); 1334 } 1335 loop: 1336 xSign = extractFloatx80Sign(fp0); 1337 xExp = extractFloatx80Exp(fp0); 1338 xExp -= 0x3FFF; 1339 if (xExp <= 28) { 1340 l = 0; 1341 endflag = 1; 1342 } else { 1343 l = xExp - 27; 1344 endflag = 0; 1345 } 1346 invtwopi = packFloatx80(0, 0x3FFE - l, 1347 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1348 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1349 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1350 1351 /* SIGN(INARG)*2^63 IN SGL */ 1352 twoto63 = packFloat32(xSign, 0xBE, 0); 1353 1354 fp2 = floatx80_mul(fp0, invtwopi, status); 1355 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1356 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1357 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1358 status); /* FP2 is N */ 1359 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1360 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1361 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1362 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1363 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1364 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1365 fp3 = fp0; /* FP3 is A */ 1366 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1367 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1368 1369 if (endflag > 0) { 1370 n = floatx80_to_int32(fp2, status); 1371 goto tancont; 1372 } 1373 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1374 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1375 goto loop; 1376 } else { 1377 status->float_rounding_mode = user_rnd_mode; 1378 status->floatx80_rounding_precision = user_rnd_prec; 1379 1380 a = floatx80_move(a, status); 1381 1382 float_raise(float_flag_inexact, status); 1383 1384 return a; 1385 } 1386 } else { 1387 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1388 make_float64(0x3FE45F306DC9C883), status), 1389 status); /* X*2/PI */ 1390 1391 n = floatx80_to_int32(fp1, status); 1392 j = 32 + n; 1393 1394 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1395 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1396 status); /* FP0 IS R = (X-Y1)-Y2 */ 1397 1398 tancont: 1399 if (n & 1) { 1400 /* NODD */ 1401 fp1 = fp0; /* R */ 1402 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1403 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1404 status); /* Q4 */ 1405 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1406 status); /* P3 */ 1407 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ 1408 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ 1409 fp3 = floatx80_add(fp3, float64_to_floatx80( 1410 make_float64(0xBF346F59B39BA65F), status), 1411 status); /* Q3+SQ4 */ 1412 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1413 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1414 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ 1415 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ 1416 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1417 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1418 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1419 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1420 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ 1421 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ 1422 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1423 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1424 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ 1425 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1426 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1427 fp0 = floatx80_add(fp0, float32_to_floatx80( 1428 make_float32(0x3F800000), status), 1429 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1430 1431 xSign = extractFloatx80Sign(fp1); 1432 xExp = extractFloatx80Exp(fp1); 1433 xSig = extractFloatx80Frac(fp1); 1434 xSign ^= 1; 1435 fp1 = packFloatx80(xSign, xExp, xSig); 1436 1437 status->float_rounding_mode = user_rnd_mode; 1438 status->floatx80_rounding_precision = user_rnd_prec; 1439 1440 a = floatx80_div(fp0, fp1, status); 1441 1442 float_raise(float_flag_inexact, status); 1443 1444 return a; 1445 } else { 1446 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1447 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1448 status); /* Q4 */ 1449 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1450 status); /* P3 */ 1451 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ 1452 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ 1453 fp3 = floatx80_add(fp3, float64_to_floatx80( 1454 make_float64(0xBF346F59B39BA65F), status), 1455 status); /* Q3+SQ4 */ 1456 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1457 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1458 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ 1459 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ 1460 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1461 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1462 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1463 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1464 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ 1465 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ 1466 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1467 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1468 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ 1469 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1470 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1471 fp1 = floatx80_add(fp1, float32_to_floatx80( 1472 make_float32(0x3F800000), status), 1473 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1474 1475 status->float_rounding_mode = user_rnd_mode; 1476 status->floatx80_rounding_precision = user_rnd_prec; 1477 1478 a = floatx80_div(fp0, fp1, status); 1479 1480 float_raise(float_flag_inexact, status); 1481 1482 return a; 1483 } 1484 } 1485 } 1486 1487 /*---------------------------------------------------------------------------- 1488 | Sine 1489 *----------------------------------------------------------------------------*/ 1490 1491 floatx80 floatx80_sin(floatx80 a, float_status *status) 1492 { 1493 flag aSign, xSign; 1494 int32_t aExp, xExp; 1495 uint64_t aSig, xSig; 1496 1497 int8_t user_rnd_mode, user_rnd_prec; 1498 1499 int32_t compact, l, n, j; 1500 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1501 float32 posneg1, twoto63; 1502 flag endflag; 1503 1504 aSig = extractFloatx80Frac(a); 1505 aExp = extractFloatx80Exp(a); 1506 aSign = extractFloatx80Sign(a); 1507 1508 if (aExp == 0x7FFF) { 1509 if ((uint64_t) (aSig << 1)) { 1510 return propagateFloatx80NaNOneArg(a, status); 1511 } 1512 float_raise(float_flag_invalid, status); 1513 return floatx80_default_nan(status); 1514 } 1515 1516 if (aExp == 0 && aSig == 0) { 1517 return packFloatx80(aSign, 0, 0); 1518 } 1519 1520 user_rnd_mode = status->float_rounding_mode; 1521 user_rnd_prec = status->floatx80_rounding_precision; 1522 status->float_rounding_mode = float_round_nearest_even; 1523 status->floatx80_rounding_precision = 80; 1524 1525 compact = floatx80_make_compact(aExp, aSig); 1526 1527 fp0 = a; 1528 1529 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1530 /* 2^(-40) > |X| > 15 PI */ 1531 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1532 /* REDUCEX */ 1533 fp1 = packFloatx80(0, 0, 0); 1534 if (compact == 0x7FFEFFFF) { 1535 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1536 LIT64(0xC90FDAA200000000)); 1537 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1538 LIT64(0x85A308D300000000)); 1539 fp0 = floatx80_add(fp0, twopi1, status); 1540 fp1 = fp0; 1541 fp0 = floatx80_add(fp0, twopi2, status); 1542 fp1 = floatx80_sub(fp1, fp0, status); 1543 fp1 = floatx80_add(fp1, twopi2, status); 1544 } 1545 loop: 1546 xSign = extractFloatx80Sign(fp0); 1547 xExp = extractFloatx80Exp(fp0); 1548 xExp -= 0x3FFF; 1549 if (xExp <= 28) { 1550 l = 0; 1551 endflag = 1; 1552 } else { 1553 l = xExp - 27; 1554 endflag = 0; 1555 } 1556 invtwopi = packFloatx80(0, 0x3FFE - l, 1557 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1558 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1559 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1560 1561 /* SIGN(INARG)*2^63 IN SGL */ 1562 twoto63 = packFloat32(xSign, 0xBE, 0); 1563 1564 fp2 = floatx80_mul(fp0, invtwopi, status); 1565 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1566 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1567 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1568 status); /* FP2 is N */ 1569 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1570 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1571 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1572 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1573 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1574 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1575 fp3 = fp0; /* FP3 is A */ 1576 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1577 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1578 1579 if (endflag > 0) { 1580 n = floatx80_to_int32(fp2, status); 1581 goto sincont; 1582 } 1583 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1584 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1585 goto loop; 1586 } else { 1587 /* SINSM */ 1588 fp0 = float32_to_floatx80(make_float32(0x3F800000), 1589 status); /* 1 */ 1590 1591 status->float_rounding_mode = user_rnd_mode; 1592 status->floatx80_rounding_precision = user_rnd_prec; 1593 1594 /* SINTINY */ 1595 a = floatx80_move(a, status); 1596 float_raise(float_flag_inexact, status); 1597 1598 return a; 1599 } 1600 } else { 1601 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1602 make_float64(0x3FE45F306DC9C883), status), 1603 status); /* X*2/PI */ 1604 1605 n = floatx80_to_int32(fp1, status); 1606 j = 32 + n; 1607 1608 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1609 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1610 status); /* FP0 IS R = (X-Y1)-Y2 */ 1611 1612 sincont: 1613 if (n & 1) { 1614 /* COSPOLY */ 1615 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1616 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1617 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1618 status); /* B8 */ 1619 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1620 status); /* B7 */ 1621 1622 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1623 xExp = extractFloatx80Exp(fp0); 1624 xSig = extractFloatx80Frac(fp0); 1625 1626 if ((n >> 1) & 1) { 1627 xSign ^= 1; 1628 posneg1 = make_float32(0xBF800000); /* -1 */ 1629 } else { 1630 xSign ^= 0; 1631 posneg1 = make_float32(0x3F800000); /* 1 */ 1632 } /* X IS NOW R'= SGN*R */ 1633 1634 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1635 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1636 fp2 = floatx80_add(fp2, float64_to_floatx80( 1637 make_float64(0x3E21EED90612C972), status), 1638 status); /* B6+TB8 */ 1639 fp3 = floatx80_add(fp3, float64_to_floatx80( 1640 make_float64(0xBE927E4FB79D9FCF), status), 1641 status); /* B5+TB7 */ 1642 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1643 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1644 fp2 = floatx80_add(fp2, float64_to_floatx80( 1645 make_float64(0x3EFA01A01A01D423), status), 1646 status); /* B4+T(B6+TB8) */ 1647 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1648 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1649 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1650 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1651 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1652 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1653 fp1 = floatx80_add(fp1, float32_to_floatx80( 1654 make_float32(0xBF000000), status), 1655 status); /* B1+T(B3+T(B5+TB7)) */ 1656 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1657 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ 1658 * [S(B2+T(B4+T(B6+TB8)))] 1659 */ 1660 1661 x = packFloatx80(xSign, xExp, xSig); 1662 fp0 = floatx80_mul(fp0, x, status); 1663 1664 status->float_rounding_mode = user_rnd_mode; 1665 status->floatx80_rounding_precision = user_rnd_prec; 1666 1667 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1668 1669 float_raise(float_flag_inexact, status); 1670 1671 return a; 1672 } else { 1673 /* SINPOLY */ 1674 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1675 xExp = extractFloatx80Exp(fp0); 1676 xSig = extractFloatx80Frac(fp0); 1677 1678 xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ 1679 1680 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1681 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1682 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1683 status); /* A7 */ 1684 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1685 status); /* A6 */ 1686 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1687 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1688 fp3 = floatx80_add(fp3, float64_to_floatx80( 1689 make_float64(0xBE5AE6452A118AE4), status), 1690 status); /* A5+T*A7 */ 1691 fp2 = floatx80_add(fp2, float64_to_floatx80( 1692 make_float64(0x3EC71DE3A5341531), status), 1693 status); /* A4+T*A6 */ 1694 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1695 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1696 fp3 = floatx80_add(fp3, float64_to_floatx80( 1697 make_float64(0xBF2A01A01A018B59), status), 1698 status); /* A3+T(A5+TA7) */ 1699 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1700 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1701 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1702 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1703 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1704 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1705 fp1 = floatx80_add(fp1, fp2, 1706 status); /* [A1+T(A3+T(A5+TA7))]+ 1707 * [S(A2+T(A4+TA6))] 1708 */ 1709 1710 x = packFloatx80(xSign, xExp, xSig); 1711 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1712 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1713 1714 status->float_rounding_mode = user_rnd_mode; 1715 status->floatx80_rounding_precision = user_rnd_prec; 1716 1717 a = floatx80_add(fp0, x, status); 1718 1719 float_raise(float_flag_inexact, status); 1720 1721 return a; 1722 } 1723 } 1724 } 1725 1726 /*---------------------------------------------------------------------------- 1727 | Cosine 1728 *----------------------------------------------------------------------------*/ 1729 1730 floatx80 floatx80_cos(floatx80 a, float_status *status) 1731 { 1732 flag aSign, xSign; 1733 int32_t aExp, xExp; 1734 uint64_t aSig, xSig; 1735 1736 int8_t user_rnd_mode, user_rnd_prec; 1737 1738 int32_t compact, l, n, j; 1739 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1740 float32 posneg1, twoto63; 1741 flag endflag; 1742 1743 aSig = extractFloatx80Frac(a); 1744 aExp = extractFloatx80Exp(a); 1745 aSign = extractFloatx80Sign(a); 1746 1747 if (aExp == 0x7FFF) { 1748 if ((uint64_t) (aSig << 1)) { 1749 return propagateFloatx80NaNOneArg(a, status); 1750 } 1751 float_raise(float_flag_invalid, status); 1752 return floatx80_default_nan(status); 1753 } 1754 1755 if (aExp == 0 && aSig == 0) { 1756 return packFloatx80(0, one_exp, one_sig); 1757 } 1758 1759 user_rnd_mode = status->float_rounding_mode; 1760 user_rnd_prec = status->floatx80_rounding_precision; 1761 status->float_rounding_mode = float_round_nearest_even; 1762 status->floatx80_rounding_precision = 80; 1763 1764 compact = floatx80_make_compact(aExp, aSig); 1765 1766 fp0 = a; 1767 1768 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1769 /* 2^(-40) > |X| > 15 PI */ 1770 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1771 /* REDUCEX */ 1772 fp1 = packFloatx80(0, 0, 0); 1773 if (compact == 0x7FFEFFFF) { 1774 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1775 LIT64(0xC90FDAA200000000)); 1776 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1777 LIT64(0x85A308D300000000)); 1778 fp0 = floatx80_add(fp0, twopi1, status); 1779 fp1 = fp0; 1780 fp0 = floatx80_add(fp0, twopi2, status); 1781 fp1 = floatx80_sub(fp1, fp0, status); 1782 fp1 = floatx80_add(fp1, twopi2, status); 1783 } 1784 loop: 1785 xSign = extractFloatx80Sign(fp0); 1786 xExp = extractFloatx80Exp(fp0); 1787 xExp -= 0x3FFF; 1788 if (xExp <= 28) { 1789 l = 0; 1790 endflag = 1; 1791 } else { 1792 l = xExp - 27; 1793 endflag = 0; 1794 } 1795 invtwopi = packFloatx80(0, 0x3FFE - l, 1796 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1797 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1798 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1799 1800 /* SIGN(INARG)*2^63 IN SGL */ 1801 twoto63 = packFloat32(xSign, 0xBE, 0); 1802 1803 fp2 = floatx80_mul(fp0, invtwopi, status); 1804 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1805 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1806 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1807 status); /* FP2 is N */ 1808 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1809 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1810 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1811 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1812 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1813 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1814 fp3 = fp0; /* FP3 is A */ 1815 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1816 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1817 1818 if (endflag > 0) { 1819 n = floatx80_to_int32(fp2, status); 1820 goto sincont; 1821 } 1822 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1823 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1824 goto loop; 1825 } else { 1826 /* SINSM */ 1827 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ 1828 1829 status->float_rounding_mode = user_rnd_mode; 1830 status->floatx80_rounding_precision = user_rnd_prec; 1831 1832 /* COSTINY */ 1833 a = floatx80_sub(fp0, float32_to_floatx80( 1834 make_float32(0x00800000), status), 1835 status); 1836 float_raise(float_flag_inexact, status); 1837 1838 return a; 1839 } 1840 } else { 1841 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1842 make_float64(0x3FE45F306DC9C883), status), 1843 status); /* X*2/PI */ 1844 1845 n = floatx80_to_int32(fp1, status); 1846 j = 32 + n; 1847 1848 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1849 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1850 status); /* FP0 IS R = (X-Y1)-Y2 */ 1851 1852 sincont: 1853 if ((n + 1) & 1) { 1854 /* COSPOLY */ 1855 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1856 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1857 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1858 status); /* B8 */ 1859 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1860 status); /* B7 */ 1861 1862 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1863 xExp = extractFloatx80Exp(fp0); 1864 xSig = extractFloatx80Frac(fp0); 1865 1866 if (((n + 1) >> 1) & 1) { 1867 xSign ^= 1; 1868 posneg1 = make_float32(0xBF800000); /* -1 */ 1869 } else { 1870 xSign ^= 0; 1871 posneg1 = make_float32(0x3F800000); /* 1 */ 1872 } /* X IS NOW R'= SGN*R */ 1873 1874 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1875 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1876 fp2 = floatx80_add(fp2, float64_to_floatx80( 1877 make_float64(0x3E21EED90612C972), status), 1878 status); /* B6+TB8 */ 1879 fp3 = floatx80_add(fp3, float64_to_floatx80( 1880 make_float64(0xBE927E4FB79D9FCF), status), 1881 status); /* B5+TB7 */ 1882 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1883 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1884 fp2 = floatx80_add(fp2, float64_to_floatx80( 1885 make_float64(0x3EFA01A01A01D423), status), 1886 status); /* B4+T(B6+TB8) */ 1887 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1888 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1889 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1890 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1891 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1892 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1893 fp1 = floatx80_add(fp1, float32_to_floatx80( 1894 make_float32(0xBF000000), status), 1895 status); /* B1+T(B3+T(B5+TB7)) */ 1896 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1897 fp0 = floatx80_add(fp0, fp1, status); 1898 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ 1899 1900 x = packFloatx80(xSign, xExp, xSig); 1901 fp0 = floatx80_mul(fp0, x, status); 1902 1903 status->float_rounding_mode = user_rnd_mode; 1904 status->floatx80_rounding_precision = user_rnd_prec; 1905 1906 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1907 1908 float_raise(float_flag_inexact, status); 1909 1910 return a; 1911 } else { 1912 /* SINPOLY */ 1913 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1914 xExp = extractFloatx80Exp(fp0); 1915 xSig = extractFloatx80Frac(fp0); 1916 1917 xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ 1918 1919 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1920 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1921 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1922 status); /* A7 */ 1923 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1924 status); /* A6 */ 1925 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1926 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1927 fp3 = floatx80_add(fp3, float64_to_floatx80( 1928 make_float64(0xBE5AE6452A118AE4), status), 1929 status); /* A5+T*A7 */ 1930 fp2 = floatx80_add(fp2, float64_to_floatx80( 1931 make_float64(0x3EC71DE3A5341531), status), 1932 status); /* A4+T*A6 */ 1933 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1934 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1935 fp3 = floatx80_add(fp3, float64_to_floatx80( 1936 make_float64(0xBF2A01A01A018B59), status), 1937 status); /* A3+T(A5+TA7) */ 1938 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1939 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1940 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1941 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1942 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1943 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1944 fp1 = floatx80_add(fp1, fp2, status); 1945 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ 1946 1947 x = packFloatx80(xSign, xExp, xSig); 1948 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1949 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1950 1951 status->float_rounding_mode = user_rnd_mode; 1952 status->floatx80_rounding_precision = user_rnd_prec; 1953 1954 a = floatx80_add(fp0, x, status); 1955 1956 float_raise(float_flag_inexact, status); 1957 1958 return a; 1959 } 1960 } 1961 } 1962 1963 /*---------------------------------------------------------------------------- 1964 | Arc tangent 1965 *----------------------------------------------------------------------------*/ 1966 1967 floatx80 floatx80_atan(floatx80 a, float_status *status) 1968 { 1969 flag aSign; 1970 int32_t aExp; 1971 uint64_t aSig; 1972 1973 int8_t user_rnd_mode, user_rnd_prec; 1974 1975 int32_t compact, tbl_index; 1976 floatx80 fp0, fp1, fp2, fp3, xsave; 1977 1978 aSig = extractFloatx80Frac(a); 1979 aExp = extractFloatx80Exp(a); 1980 aSign = extractFloatx80Sign(a); 1981 1982 if (aExp == 0x7FFF) { 1983 if ((uint64_t) (aSig << 1)) { 1984 return propagateFloatx80NaNOneArg(a, status); 1985 } 1986 a = packFloatx80(aSign, piby2_exp, pi_sig); 1987 float_raise(float_flag_inexact, status); 1988 return floatx80_move(a, status); 1989 } 1990 1991 if (aExp == 0 && aSig == 0) { 1992 return packFloatx80(aSign, 0, 0); 1993 } 1994 1995 compact = floatx80_make_compact(aExp, aSig); 1996 1997 user_rnd_mode = status->float_rounding_mode; 1998 user_rnd_prec = status->floatx80_rounding_precision; 1999 status->float_rounding_mode = float_round_nearest_even; 2000 status->floatx80_rounding_precision = 80; 2001 2002 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { 2003 /* |X| >= 16 or |X| < 1/16 */ 2004 if (compact > 0x3FFF8000) { /* |X| >= 16 */ 2005 if (compact > 0x40638000) { /* |X| > 2^(100) */ 2006 fp0 = packFloatx80(aSign, piby2_exp, pi_sig); 2007 fp1 = packFloatx80(aSign, 0x0001, one_sig); 2008 2009 status->float_rounding_mode = user_rnd_mode; 2010 status->floatx80_rounding_precision = user_rnd_prec; 2011 2012 a = floatx80_sub(fp0, fp1, status); 2013 2014 float_raise(float_flag_inexact, status); 2015 2016 return a; 2017 } else { 2018 fp0 = a; 2019 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ 2020 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ 2021 xsave = fp1; 2022 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ 2023 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2024 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), 2025 status); /* C5 */ 2026 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), 2027 status); /* C4 */ 2028 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ 2029 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ 2030 fp3 = floatx80_add(fp3, float64_to_floatx80( 2031 make_float64(0xBFC24924827107B8), status), 2032 status); /* C3+Z*C5 */ 2033 fp2 = floatx80_add(fp2, float64_to_floatx80( 2034 make_float64(0x3FC999999996263E), status), 2035 status); /* C2+Z*C4 */ 2036 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ 2037 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ 2038 fp1 = floatx80_add(fp1, float64_to_floatx80( 2039 make_float64(0xBFD5555555555536), status), 2040 status); /* C1+Z*(C3+Z*C5) */ 2041 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ 2042 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ 2043 fp1 = floatx80_add(fp1, fp2, status); 2044 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ 2045 fp0 = floatx80_mul(fp0, fp1, status); 2046 fp0 = floatx80_add(fp0, xsave, status); 2047 fp1 = packFloatx80(aSign, piby2_exp, pi_sig); 2048 2049 status->float_rounding_mode = user_rnd_mode; 2050 status->floatx80_rounding_precision = user_rnd_prec; 2051 2052 a = floatx80_add(fp0, fp1, status); 2053 2054 float_raise(float_flag_inexact, status); 2055 2056 return a; 2057 } 2058 } else { /* |X| < 1/16 */ 2059 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ 2060 status->float_rounding_mode = user_rnd_mode; 2061 status->floatx80_rounding_precision = user_rnd_prec; 2062 2063 a = floatx80_move(a, status); 2064 2065 float_raise(float_flag_inexact, status); 2066 2067 return a; 2068 } else { 2069 fp0 = a; 2070 xsave = a; 2071 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ 2072 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2073 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), 2074 status); /* B6 */ 2075 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), 2076 status); /* B5 */ 2077 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ 2078 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ 2079 fp2 = floatx80_add(fp2, float64_to_floatx80( 2080 make_float64(0x3FBC71C646940220), status), 2081 status); /* B4+Z*B6 */ 2082 fp3 = floatx80_add(fp3, float64_to_floatx80( 2083 make_float64(0xBFC24924921872F9), 2084 status), status); /* B3+Z*B5 */ 2085 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ 2086 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ 2087 fp2 = floatx80_add(fp2, float64_to_floatx80( 2088 make_float64(0x3FC9999999998FA9), status), 2089 status); /* B2+Z*(B4+Z*B6) */ 2090 fp1 = floatx80_add(fp1, float64_to_floatx80( 2091 make_float64(0xBFD5555555555555), status), 2092 status); /* B1+Z*(B3+Z*B5) */ 2093 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ 2094 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ 2095 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ 2096 fp1 = floatx80_add(fp1, fp2, status); 2097 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ 2098 fp0 = floatx80_mul(fp0, fp1, status); 2099 2100 status->float_rounding_mode = user_rnd_mode; 2101 status->floatx80_rounding_precision = user_rnd_prec; 2102 2103 a = floatx80_add(fp0, xsave, status); 2104 2105 float_raise(float_flag_inexact, status); 2106 2107 return a; 2108 } 2109 } 2110 } else { 2111 aSig &= LIT64(0xF800000000000000); 2112 aSig |= LIT64(0x0400000000000000); 2113 xsave = packFloatx80(aSign, aExp, aSig); /* F */ 2114 fp0 = a; 2115 fp1 = a; /* X */ 2116 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ 2117 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ 2118 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ 2119 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ 2120 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ 2121 2122 tbl_index = compact; 2123 2124 tbl_index &= 0x7FFF0000; 2125 tbl_index -= 0x3FFB0000; 2126 tbl_index >>= 1; 2127 tbl_index += compact & 0x00007800; 2128 tbl_index >>= 11; 2129 2130 fp3 = atan_tbl[tbl_index]; 2131 2132 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ 2133 2134 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ 2135 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), 2136 status); /* A3 */ 2137 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ 2138 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ 2139 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ 2140 fp2 = floatx80_add(fp2, float64_to_floatx80( 2141 make_float64(0x4002AC6934A26DB3), status), 2142 status); /* A2+V*(A3+V) */ 2143 fp1 = floatx80_mul(fp1, float64_to_floatx80( 2144 make_float64(0xBFC2476F4E1DA28E), status), 2145 status); /* A1+U*V */ 2146 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ 2147 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ 2148 2149 status->float_rounding_mode = user_rnd_mode; 2150 status->floatx80_rounding_precision = user_rnd_prec; 2151 2152 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ 2153 2154 float_raise(float_flag_inexact, status); 2155 2156 return a; 2157 } 2158 } 2159 2160 /*---------------------------------------------------------------------------- 2161 | Arc sine 2162 *----------------------------------------------------------------------------*/ 2163 2164 floatx80 floatx80_asin(floatx80 a, float_status *status) 2165 { 2166 flag aSign; 2167 int32_t aExp; 2168 uint64_t aSig; 2169 2170 int8_t user_rnd_mode, user_rnd_prec; 2171 2172 int32_t compact; 2173 floatx80 fp0, fp1, fp2, one; 2174 2175 aSig = extractFloatx80Frac(a); 2176 aExp = extractFloatx80Exp(a); 2177 aSign = extractFloatx80Sign(a); 2178 2179 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2180 return propagateFloatx80NaNOneArg(a, status); 2181 } 2182 2183 if (aExp == 0 && aSig == 0) { 2184 return packFloatx80(aSign, 0, 0); 2185 } 2186 2187 compact = floatx80_make_compact(aExp, aSig); 2188 2189 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2190 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2191 float_raise(float_flag_inexact, status); 2192 a = packFloatx80(aSign, piby2_exp, pi_sig); 2193 return floatx80_move(a, status); 2194 } else { /* |X| > 1 */ 2195 float_raise(float_flag_invalid, status); 2196 return floatx80_default_nan(status); 2197 } 2198 2199 } /* |X| < 1 */ 2200 2201 user_rnd_mode = status->float_rounding_mode; 2202 user_rnd_prec = status->floatx80_rounding_precision; 2203 status->float_rounding_mode = float_round_nearest_even; 2204 status->floatx80_rounding_precision = 80; 2205 2206 one = packFloatx80(0, one_exp, one_sig); 2207 fp0 = a; 2208 2209 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ 2210 fp2 = floatx80_add(one, fp0, status); /* 1 + X */ 2211 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ 2212 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ 2213 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ 2214 2215 status->float_rounding_mode = user_rnd_mode; 2216 status->floatx80_rounding_precision = user_rnd_prec; 2217 2218 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ 2219 2220 float_raise(float_flag_inexact, status); 2221 2222 return a; 2223 } 2224 2225 /*---------------------------------------------------------------------------- 2226 | Arc cosine 2227 *----------------------------------------------------------------------------*/ 2228 2229 floatx80 floatx80_acos(floatx80 a, float_status *status) 2230 { 2231 flag aSign; 2232 int32_t aExp; 2233 uint64_t aSig; 2234 2235 int8_t user_rnd_mode, user_rnd_prec; 2236 2237 int32_t compact; 2238 floatx80 fp0, fp1, one; 2239 2240 aSig = extractFloatx80Frac(a); 2241 aExp = extractFloatx80Exp(a); 2242 aSign = extractFloatx80Sign(a); 2243 2244 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2245 return propagateFloatx80NaNOneArg(a, status); 2246 } 2247 if (aExp == 0 && aSig == 0) { 2248 float_raise(float_flag_inexact, status); 2249 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2250 piby2_exp, pi_sig, 0, status); 2251 } 2252 2253 compact = floatx80_make_compact(aExp, aSig); 2254 2255 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2256 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2257 if (aSign) { /* X == -1 */ 2258 a = packFloatx80(0, pi_exp, pi_sig); 2259 float_raise(float_flag_inexact, status); 2260 return floatx80_move(a, status); 2261 } else { /* X == +1 */ 2262 return packFloatx80(0, 0, 0); 2263 } 2264 } else { /* |X| > 1 */ 2265 float_raise(float_flag_invalid, status); 2266 return floatx80_default_nan(status); 2267 } 2268 } /* |X| < 1 */ 2269 2270 user_rnd_mode = status->float_rounding_mode; 2271 user_rnd_prec = status->floatx80_rounding_precision; 2272 status->float_rounding_mode = float_round_nearest_even; 2273 status->floatx80_rounding_precision = 80; 2274 2275 one = packFloatx80(0, one_exp, one_sig); 2276 fp0 = a; 2277 2278 fp1 = floatx80_add(one, fp0, status); /* 1 + X */ 2279 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ 2280 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ 2281 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ 2282 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ 2283 2284 status->float_rounding_mode = user_rnd_mode; 2285 status->floatx80_rounding_precision = user_rnd_prec; 2286 2287 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ 2288 2289 float_raise(float_flag_inexact, status); 2290 2291 return a; 2292 } 2293 2294 /*---------------------------------------------------------------------------- 2295 | Hyperbolic arc tangent 2296 *----------------------------------------------------------------------------*/ 2297 2298 floatx80 floatx80_atanh(floatx80 a, float_status *status) 2299 { 2300 flag aSign; 2301 int32_t aExp; 2302 uint64_t aSig; 2303 2304 int8_t user_rnd_mode, user_rnd_prec; 2305 2306 int32_t compact; 2307 floatx80 fp0, fp1, fp2, one; 2308 2309 aSig = extractFloatx80Frac(a); 2310 aExp = extractFloatx80Exp(a); 2311 aSign = extractFloatx80Sign(a); 2312 2313 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2314 return propagateFloatx80NaNOneArg(a, status); 2315 } 2316 2317 if (aExp == 0 && aSig == 0) { 2318 return packFloatx80(aSign, 0, 0); 2319 } 2320 2321 compact = floatx80_make_compact(aExp, aSig); 2322 2323 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2324 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2325 float_raise(float_flag_divbyzero, status); 2326 return packFloatx80(aSign, floatx80_infinity.high, 2327 floatx80_infinity.low); 2328 } else { /* |X| > 1 */ 2329 float_raise(float_flag_invalid, status); 2330 return floatx80_default_nan(status); 2331 } 2332 } /* |X| < 1 */ 2333 2334 user_rnd_mode = status->float_rounding_mode; 2335 user_rnd_prec = status->floatx80_rounding_precision; 2336 status->float_rounding_mode = float_round_nearest_even; 2337 status->floatx80_rounding_precision = 80; 2338 2339 one = packFloatx80(0, one_exp, one_sig); 2340 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ 2341 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ 2342 fp1 = packFloatx80(1, aExp, aSig); /* -Y */ 2343 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ 2344 fp1 = floatx80_add(fp1, one, status); /* 1-Y */ 2345 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ 2346 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ 2347 2348 status->float_rounding_mode = user_rnd_mode; 2349 status->floatx80_rounding_precision = user_rnd_prec; 2350 2351 a = floatx80_mul(fp0, fp2, 2352 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ 2353 2354 float_raise(float_flag_inexact, status); 2355 2356 return a; 2357 } 2358 2359 /*---------------------------------------------------------------------------- 2360 | e to x minus 1 2361 *----------------------------------------------------------------------------*/ 2362 2363 floatx80 floatx80_etoxm1(floatx80 a, float_status *status) 2364 { 2365 flag aSign; 2366 int32_t aExp; 2367 uint64_t aSig; 2368 2369 int8_t user_rnd_mode, user_rnd_prec; 2370 2371 int32_t compact, n, j, m, m1; 2372 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; 2373 2374 aSig = extractFloatx80Frac(a); 2375 aExp = extractFloatx80Exp(a); 2376 aSign = extractFloatx80Sign(a); 2377 2378 if (aExp == 0x7FFF) { 2379 if ((uint64_t) (aSig << 1)) { 2380 return propagateFloatx80NaNOneArg(a, status); 2381 } 2382 if (aSign) { 2383 return packFloatx80(aSign, one_exp, one_sig); 2384 } 2385 return packFloatx80(0, floatx80_infinity.high, 2386 floatx80_infinity.low); 2387 } 2388 2389 if (aExp == 0 && aSig == 0) { 2390 return packFloatx80(aSign, 0, 0); 2391 } 2392 2393 user_rnd_mode = status->float_rounding_mode; 2394 user_rnd_prec = status->floatx80_rounding_precision; 2395 status->float_rounding_mode = float_round_nearest_even; 2396 status->floatx80_rounding_precision = 80; 2397 2398 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ 2399 compact = floatx80_make_compact(aExp, aSig); 2400 2401 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ 2402 fp0 = a; 2403 fp1 = a; 2404 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2405 make_float32(0x42B8AA3B), status), 2406 status); /* 64/log2 * X */ 2407 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 2408 fp0 = int32_to_floatx80(n, status); 2409 2410 j = n & 0x3F; /* J = N mod 64 */ 2411 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 2412 if (n < 0 && j) { 2413 /* arithmetic right shift is division and 2414 * round towards minus infinity 2415 */ 2416 m--; 2417 } 2418 m1 = -m; 2419 /*m += 0x3FFF; // biased exponent of 2^(M) */ 2420 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ 2421 2422 fp2 = fp0; /* N */ 2423 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2424 make_float32(0xBC317218), status), 2425 status); /* N * L1, L1 = lead(-log2/64) */ 2426 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 2427 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 2428 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 2429 fp0 = floatx80_add(fp0, fp2, status); /* R */ 2430 2431 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 2432 fp2 = float32_to_floatx80(make_float32(0x3950097B), 2433 status); /* A6 */ 2434 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ 2435 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), 2436 status), fp1, status); /* fp3 is S*A5 */ 2437 fp2 = floatx80_add(fp2, float64_to_floatx80( 2438 make_float64(0x3F81111111174385), status), 2439 status); /* fp2 IS A4+S*A6 */ 2440 fp3 = floatx80_add(fp3, float64_to_floatx80( 2441 make_float64(0x3FA5555555554F5A), status), 2442 status); /* fp3 is A3+S*A5 */ 2443 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ 2444 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ 2445 fp2 = floatx80_add(fp2, float64_to_floatx80( 2446 make_float64(0x3FC5555555555555), status), 2447 status); /* fp2 IS A2+S*(A4+S*A6) */ 2448 fp3 = floatx80_add(fp3, float32_to_floatx80( 2449 make_float32(0x3F000000), status), 2450 status); /* fp3 IS A1+S*(A3+S*A5) */ 2451 fp2 = floatx80_mul(fp2, fp1, 2452 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ 2453 fp1 = floatx80_mul(fp1, fp3, 2454 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ 2455 fp2 = floatx80_mul(fp2, fp0, 2456 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ 2457 fp0 = floatx80_add(fp0, fp1, 2458 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ 2459 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 2460 2461 fp0 = floatx80_mul(fp0, exp_tbl[j], 2462 status); /* 2^(J/64)*(Exp(R)-1) */ 2463 2464 if (m >= 64) { 2465 fp1 = float32_to_floatx80(exp_tbl2[j], status); 2466 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2467 fp1 = floatx80_add(fp1, onebysc, status); 2468 fp0 = floatx80_add(fp0, fp1, status); 2469 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2470 } else if (m < -3) { 2471 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2472 status), status); 2473 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2474 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2475 fp0 = floatx80_add(fp0, onebysc, status); 2476 } else { /* -3 <= m <= 63 */ 2477 fp1 = exp_tbl[j]; 2478 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2479 status), status); 2480 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2481 fp1 = floatx80_add(fp1, onebysc, status); 2482 fp0 = floatx80_add(fp0, fp1, status); 2483 } 2484 2485 sc = packFloatx80(0, m + 0x3FFF, one_sig); 2486 2487 status->float_rounding_mode = user_rnd_mode; 2488 status->floatx80_rounding_precision = user_rnd_prec; 2489 2490 a = floatx80_mul(fp0, sc, status); 2491 2492 float_raise(float_flag_inexact, status); 2493 2494 return a; 2495 } else { /* |X| > 70 log2 */ 2496 if (aSign) { 2497 fp0 = float32_to_floatx80(make_float32(0xBF800000), 2498 status); /* -1 */ 2499 2500 status->float_rounding_mode = user_rnd_mode; 2501 status->floatx80_rounding_precision = user_rnd_prec; 2502 2503 a = floatx80_add(fp0, float32_to_floatx80( 2504 make_float32(0x00800000), status), 2505 status); /* -1 + 2^(-126) */ 2506 2507 float_raise(float_flag_inexact, status); 2508 2509 return a; 2510 } else { 2511 status->float_rounding_mode = user_rnd_mode; 2512 status->floatx80_rounding_precision = user_rnd_prec; 2513 2514 return floatx80_etox(a, status); 2515 } 2516 } 2517 } else { /* |X| < 1/4 */ 2518 if (aExp >= 0x3FBE) { 2519 fp0 = a; 2520 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ 2521 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), 2522 status); /* B12 */ 2523 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ 2524 fp2 = float32_to_floatx80(make_float32(0x310F8290), 2525 status); /* B11 */ 2526 fp1 = floatx80_add(fp1, float32_to_floatx80( 2527 make_float32(0x32D73220), status), 2528 status); /* B10 */ 2529 fp2 = floatx80_mul(fp2, fp0, status); 2530 fp1 = floatx80_mul(fp1, fp0, status); 2531 fp2 = floatx80_add(fp2, float32_to_floatx80( 2532 make_float32(0x3493F281), status), 2533 status); /* B9 */ 2534 fp1 = floatx80_add(fp1, float64_to_floatx80( 2535 make_float64(0x3EC71DE3A5774682), status), 2536 status); /* B8 */ 2537 fp2 = floatx80_mul(fp2, fp0, status); 2538 fp1 = floatx80_mul(fp1, fp0, status); 2539 fp2 = floatx80_add(fp2, float64_to_floatx80( 2540 make_float64(0x3EFA01A019D7CB68), status), 2541 status); /* B7 */ 2542 fp1 = floatx80_add(fp1, float64_to_floatx80( 2543 make_float64(0x3F2A01A01A019DF3), status), 2544 status); /* B6 */ 2545 fp2 = floatx80_mul(fp2, fp0, status); 2546 fp1 = floatx80_mul(fp1, fp0, status); 2547 fp2 = floatx80_add(fp2, float64_to_floatx80( 2548 make_float64(0x3F56C16C16C170E2), status), 2549 status); /* B5 */ 2550 fp1 = floatx80_add(fp1, float64_to_floatx80( 2551 make_float64(0x3F81111111111111), status), 2552 status); /* B4 */ 2553 fp2 = floatx80_mul(fp2, fp0, status); 2554 fp1 = floatx80_mul(fp1, fp0, status); 2555 fp2 = floatx80_add(fp2, float64_to_floatx80( 2556 make_float64(0x3FA5555555555555), status), 2557 status); /* B3 */ 2558 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); 2559 fp1 = floatx80_add(fp1, fp3, status); /* B2 */ 2560 fp2 = floatx80_mul(fp2, fp0, status); 2561 fp1 = floatx80_mul(fp1, fp0, status); 2562 2563 fp2 = floatx80_mul(fp2, fp0, status); 2564 fp1 = floatx80_mul(fp1, a, status); 2565 2566 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2567 make_float32(0x3F000000), status), 2568 status); /* S*B1 */ 2569 fp1 = floatx80_add(fp1, fp2, status); /* Q */ 2570 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ 2571 2572 status->float_rounding_mode = user_rnd_mode; 2573 status->floatx80_rounding_precision = user_rnd_prec; 2574 2575 a = floatx80_add(fp0, a, status); 2576 2577 float_raise(float_flag_inexact, status); 2578 2579 return a; 2580 } else { /* |X| < 2^(-65) */ 2581 sc = packFloatx80(1, 1, one_sig); 2582 fp0 = a; 2583 2584 if (aExp < 0x0033) { /* |X| < 2^(-16382) */ 2585 fp0 = floatx80_mul(fp0, float64_to_floatx80( 2586 make_float64(0x48B0000000000000), status), 2587 status); 2588 fp0 = floatx80_add(fp0, sc, status); 2589 2590 status->float_rounding_mode = user_rnd_mode; 2591 status->floatx80_rounding_precision = user_rnd_prec; 2592 2593 a = floatx80_mul(fp0, float64_to_floatx80( 2594 make_float64(0x3730000000000000), status), 2595 status); 2596 } else { 2597 status->float_rounding_mode = user_rnd_mode; 2598 status->floatx80_rounding_precision = user_rnd_prec; 2599 2600 a = floatx80_add(fp0, sc, status); 2601 } 2602 2603 float_raise(float_flag_inexact, status); 2604 2605 return a; 2606 } 2607 } 2608 } 2609 2610 /*---------------------------------------------------------------------------- 2611 | Hyperbolic tangent 2612 *----------------------------------------------------------------------------*/ 2613 2614 floatx80 floatx80_tanh(floatx80 a, float_status *status) 2615 { 2616 flag aSign, vSign; 2617 int32_t aExp, vExp; 2618 uint64_t aSig, vSig; 2619 2620 int8_t user_rnd_mode, user_rnd_prec; 2621 2622 int32_t compact; 2623 floatx80 fp0, fp1; 2624 uint32_t sign; 2625 2626 aSig = extractFloatx80Frac(a); 2627 aExp = extractFloatx80Exp(a); 2628 aSign = extractFloatx80Sign(a); 2629 2630 if (aExp == 0x7FFF) { 2631 if ((uint64_t) (aSig << 1)) { 2632 return propagateFloatx80NaNOneArg(a, status); 2633 } 2634 return packFloatx80(aSign, one_exp, one_sig); 2635 } 2636 2637 if (aExp == 0 && aSig == 0) { 2638 return packFloatx80(aSign, 0, 0); 2639 } 2640 2641 user_rnd_mode = status->float_rounding_mode; 2642 user_rnd_prec = status->floatx80_rounding_precision; 2643 status->float_rounding_mode = float_round_nearest_even; 2644 status->floatx80_rounding_precision = 80; 2645 2646 compact = floatx80_make_compact(aExp, aSig); 2647 2648 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { 2649 /* TANHBORS */ 2650 if (compact < 0x3FFF8000) { 2651 /* TANHSM */ 2652 status->float_rounding_mode = user_rnd_mode; 2653 status->floatx80_rounding_precision = user_rnd_prec; 2654 2655 a = floatx80_move(a, status); 2656 2657 float_raise(float_flag_inexact, status); 2658 2659 return a; 2660 } else { 2661 if (compact > 0x40048AA1) { 2662 /* TANHHUGE */ 2663 sign = 0x3F800000; 2664 sign |= aSign ? 0x80000000 : 0x00000000; 2665 fp0 = float32_to_floatx80(make_float32(sign), status); 2666 sign &= 0x80000000; 2667 sign ^= 0x80800000; /* -SIGN(X)*EPS */ 2668 2669 status->float_rounding_mode = user_rnd_mode; 2670 status->floatx80_rounding_precision = user_rnd_prec; 2671 2672 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), 2673 status), status); 2674 2675 float_raise(float_flag_inexact, status); 2676 2677 return a; 2678 } else { 2679 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2680 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ 2681 fp0 = floatx80_add(fp0, float32_to_floatx80( 2682 make_float32(0x3F800000), 2683 status), status); /* EXP(Y)+1 */ 2684 sign = aSign ? 0x80000000 : 0x00000000; 2685 fp1 = floatx80_div(float32_to_floatx80(make_float32( 2686 sign ^ 0xC0000000), status), fp0, 2687 status); /* -SIGN(X)*2 / [EXP(Y)+1] */ 2688 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), 2689 status); /* SIGN */ 2690 2691 status->float_rounding_mode = user_rnd_mode; 2692 status->floatx80_rounding_precision = user_rnd_prec; 2693 2694 a = floatx80_add(fp1, fp0, status); 2695 2696 float_raise(float_flag_inexact, status); 2697 2698 return a; 2699 } 2700 } 2701 } else { /* 2**(-40) < |X| < (5/2)LOG2 */ 2702 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2703 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2704 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), 2705 status), 2706 status); /* Z+2 */ 2707 2708 vSign = extractFloatx80Sign(fp1); 2709 vExp = extractFloatx80Exp(fp1); 2710 vSig = extractFloatx80Frac(fp1); 2711 2712 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); 2713 2714 status->float_rounding_mode = user_rnd_mode; 2715 status->floatx80_rounding_precision = user_rnd_prec; 2716 2717 a = floatx80_div(fp0, fp1, status); 2718 2719 float_raise(float_flag_inexact, status); 2720 2721 return a; 2722 } 2723 } 2724 2725 /*---------------------------------------------------------------------------- 2726 | Hyperbolic sine 2727 *----------------------------------------------------------------------------*/ 2728 2729 floatx80 floatx80_sinh(floatx80 a, float_status *status) 2730 { 2731 flag aSign; 2732 int32_t aExp; 2733 uint64_t aSig; 2734 2735 int8_t user_rnd_mode, user_rnd_prec; 2736 2737 int32_t compact; 2738 floatx80 fp0, fp1, fp2; 2739 float32 fact; 2740 2741 aSig = extractFloatx80Frac(a); 2742 aExp = extractFloatx80Exp(a); 2743 aSign = extractFloatx80Sign(a); 2744 2745 if (aExp == 0x7FFF) { 2746 if ((uint64_t) (aSig << 1)) { 2747 return propagateFloatx80NaNOneArg(a, status); 2748 } 2749 return packFloatx80(aSign, floatx80_infinity.high, 2750 floatx80_infinity.low); 2751 } 2752 2753 if (aExp == 0 && aSig == 0) { 2754 return packFloatx80(aSign, 0, 0); 2755 } 2756 2757 user_rnd_mode = status->float_rounding_mode; 2758 user_rnd_prec = status->floatx80_rounding_precision; 2759 status->float_rounding_mode = float_round_nearest_even; 2760 status->floatx80_rounding_precision = 80; 2761 2762 compact = floatx80_make_compact(aExp, aSig); 2763 2764 if (compact > 0x400CB167) { 2765 /* SINHBIG */ 2766 if (compact > 0x400CB2B3) { 2767 status->float_rounding_mode = user_rnd_mode; 2768 status->floatx80_rounding_precision = user_rnd_prec; 2769 2770 return roundAndPackFloatx80(status->floatx80_rounding_precision, 2771 aSign, 0x8000, aSig, 0, status); 2772 } else { 2773 fp0 = floatx80_abs(a); /* Y = |X| */ 2774 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2775 make_float64(0x40C62D38D3D64634), status), 2776 status); /* (|X|-16381LOG2_LEAD) */ 2777 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2778 make_float64(0x3D6F90AEB1E75CC7), status), 2779 status); /* |X| - 16381 LOG2, ACCURATE */ 2780 fp0 = floatx80_etox(fp0, status); 2781 fp2 = packFloatx80(aSign, 0x7FFB, one_sig); 2782 2783 status->float_rounding_mode = user_rnd_mode; 2784 status->floatx80_rounding_precision = user_rnd_prec; 2785 2786 a = floatx80_mul(fp0, fp2, status); 2787 2788 float_raise(float_flag_inexact, status); 2789 2790 return a; 2791 } 2792 } else { /* |X| < 16380 LOG2 */ 2793 fp0 = floatx80_abs(a); /* Y = |X| */ 2794 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2795 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 2796 status), status); /* 1+Z */ 2797 fp2 = fp0; 2798 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ 2799 fp0 = floatx80_add(fp0, fp2, status); 2800 2801 fact = packFloat32(aSign, 0x7E, 0); 2802 2803 status->float_rounding_mode = user_rnd_mode; 2804 status->floatx80_rounding_precision = user_rnd_prec; 2805 2806 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); 2807 2808 float_raise(float_flag_inexact, status); 2809 2810 return a; 2811 } 2812 } 2813 2814 /*---------------------------------------------------------------------------- 2815 | Hyperbolic cosine 2816 *----------------------------------------------------------------------------*/ 2817 2818 floatx80 floatx80_cosh(floatx80 a, float_status *status) 2819 { 2820 int32_t aExp; 2821 uint64_t aSig; 2822 2823 int8_t user_rnd_mode, user_rnd_prec; 2824 2825 int32_t compact; 2826 floatx80 fp0, fp1; 2827 2828 aSig = extractFloatx80Frac(a); 2829 aExp = extractFloatx80Exp(a); 2830 2831 if (aExp == 0x7FFF) { 2832 if ((uint64_t) (aSig << 1)) { 2833 return propagateFloatx80NaNOneArg(a, status); 2834 } 2835 return packFloatx80(0, floatx80_infinity.high, 2836 floatx80_infinity.low); 2837 } 2838 2839 if (aExp == 0 && aSig == 0) { 2840 return packFloatx80(0, one_exp, one_sig); 2841 } 2842 2843 user_rnd_mode = status->float_rounding_mode; 2844 user_rnd_prec = status->floatx80_rounding_precision; 2845 status->float_rounding_mode = float_round_nearest_even; 2846 status->floatx80_rounding_precision = 80; 2847 2848 compact = floatx80_make_compact(aExp, aSig); 2849 2850 if (compact > 0x400CB167) { 2851 if (compact > 0x400CB2B3) { 2852 status->float_rounding_mode = user_rnd_mode; 2853 status->floatx80_rounding_precision = user_rnd_prec; 2854 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2855 0x8000, one_sig, 0, status); 2856 } else { 2857 fp0 = packFloatx80(0, aExp, aSig); 2858 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2859 make_float64(0x40C62D38D3D64634), status), 2860 status); 2861 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2862 make_float64(0x3D6F90AEB1E75CC7), status), 2863 status); 2864 fp0 = floatx80_etox(fp0, status); 2865 fp1 = packFloatx80(0, 0x7FFB, one_sig); 2866 2867 status->float_rounding_mode = user_rnd_mode; 2868 status->floatx80_rounding_precision = user_rnd_prec; 2869 2870 a = floatx80_mul(fp0, fp1, status); 2871 2872 float_raise(float_flag_inexact, status); 2873 2874 return a; 2875 } 2876 } 2877 2878 fp0 = packFloatx80(0, aExp, aSig); /* |X| */ 2879 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ 2880 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), 2881 status), status); /* (1/2)*EXP(|X|) */ 2882 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ 2883 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ 2884 2885 status->float_rounding_mode = user_rnd_mode; 2886 status->floatx80_rounding_precision = user_rnd_prec; 2887 2888 a = floatx80_add(fp0, fp1, status); 2889 2890 float_raise(float_flag_inexact, status); 2891 2892 return a; 2893 } 2894