1 /* 2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, 3 * derived from NetBSD M68040 FPSP functions, 4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic 5 * Package. Those parts of the code (and some later contributions) are 6 * provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file will be taken to be licensed under 14 * the Softfloat-2a license unless specifically indicated otherwise. 15 */ 16 17 /* Portions of this work are licensed under the terms of the GNU GPL, 18 * version 2 or later. See the COPYING file in the top-level directory. 19 */ 20 21 #include "qemu/osdep.h" 22 #include "softfloat.h" 23 #include "fpu/softfloat-macros.h" 24 #include "softfloat_fpsp_tables.h" 25 26 #define pi_exp 0x4000 27 #define piby2_exp 0x3FFF 28 #define pi_sig LIT64(0xc90fdaa22168c235) 29 30 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) 31 { 32 if (floatx80_is_signaling_nan(a, status)) { 33 float_raise(float_flag_invalid, status); 34 } 35 36 if (status->default_nan_mode) { 37 return floatx80_default_nan(status); 38 } 39 40 return floatx80_maybe_silence_nan(a, status); 41 } 42 43 /*---------------------------------------------------------------------------- 44 | Returns the modulo remainder of the extended double-precision floating-point 45 | value `a' with respect to the corresponding value `b'. 46 *----------------------------------------------------------------------------*/ 47 48 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 49 { 50 flag aSign, zSign; 51 int32_t aExp, bExp, expDiff; 52 uint64_t aSig0, aSig1, bSig; 53 uint64_t qTemp, term0, term1; 54 55 aSig0 = extractFloatx80Frac(a); 56 aExp = extractFloatx80Exp(a); 57 aSign = extractFloatx80Sign(a); 58 bSig = extractFloatx80Frac(b); 59 bExp = extractFloatx80Exp(b); 60 61 if (aExp == 0x7FFF) { 62 if ((uint64_t) (aSig0 << 1) 63 || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { 64 return propagateFloatx80NaN(a, b, status); 65 } 66 goto invalid; 67 } 68 if (bExp == 0x7FFF) { 69 if ((uint64_t) (bSig << 1)) { 70 return propagateFloatx80NaN(a, b, status); 71 } 72 return a; 73 } 74 if (bExp == 0) { 75 if (bSig == 0) { 76 invalid: 77 float_raise(float_flag_invalid, status); 78 return floatx80_default_nan(status); 79 } 80 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 81 } 82 if (aExp == 0) { 83 if ((uint64_t) (aSig0 << 1) == 0) { 84 return a; 85 } 86 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 87 } 88 bSig |= LIT64(0x8000000000000000); 89 zSign = aSign; 90 expDiff = aExp - bExp; 91 aSig1 = 0; 92 if (expDiff < 0) { 93 return a; 94 } 95 qTemp = (bSig <= aSig0); 96 if (qTemp) { 97 aSig0 -= bSig; 98 } 99 expDiff -= 64; 100 while (0 < expDiff) { 101 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 102 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 103 mul64To128(bSig, qTemp, &term0, &term1); 104 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 105 shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); 106 } 107 expDiff += 64; 108 if (0 < expDiff) { 109 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 110 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 111 qTemp >>= 64 - expDiff; 112 mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); 113 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 114 shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); 115 while (le128(term0, term1, aSig0, aSig1)) { 116 ++qTemp; 117 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 118 } 119 } 120 return 121 normalizeRoundAndPackFloatx80( 122 80, zSign, bExp + expDiff, aSig0, aSig1, status); 123 } 124 125 /*---------------------------------------------------------------------------- 126 | Returns the mantissa of the extended double-precision floating-point 127 | value `a'. 128 *----------------------------------------------------------------------------*/ 129 130 floatx80 floatx80_getman(floatx80 a, float_status *status) 131 { 132 flag aSign; 133 int32_t aExp; 134 uint64_t aSig; 135 136 aSig = extractFloatx80Frac(a); 137 aExp = extractFloatx80Exp(a); 138 aSign = extractFloatx80Sign(a); 139 140 if (aExp == 0x7FFF) { 141 if ((uint64_t) (aSig << 1)) { 142 return propagateFloatx80NaNOneArg(a , status); 143 } 144 float_raise(float_flag_invalid , status); 145 return floatx80_default_nan(status); 146 } 147 148 if (aExp == 0) { 149 if (aSig == 0) { 150 return packFloatx80(aSign, 0, 0); 151 } 152 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 153 } 154 155 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 156 0x3FFF, aSig, 0, status); 157 } 158 159 /*---------------------------------------------------------------------------- 160 | Returns the exponent of the extended double-precision floating-point 161 | value `a' as an extended double-precision value. 162 *----------------------------------------------------------------------------*/ 163 164 floatx80 floatx80_getexp(floatx80 a, float_status *status) 165 { 166 flag aSign; 167 int32_t aExp; 168 uint64_t aSig; 169 170 aSig = extractFloatx80Frac(a); 171 aExp = extractFloatx80Exp(a); 172 aSign = extractFloatx80Sign(a); 173 174 if (aExp == 0x7FFF) { 175 if ((uint64_t) (aSig << 1)) { 176 return propagateFloatx80NaNOneArg(a , status); 177 } 178 float_raise(float_flag_invalid , status); 179 return floatx80_default_nan(status); 180 } 181 182 if (aExp == 0) { 183 if (aSig == 0) { 184 return packFloatx80(aSign, 0, 0); 185 } 186 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 187 } 188 189 return int32_to_floatx80(aExp - 0x3FFF, status); 190 } 191 192 /*---------------------------------------------------------------------------- 193 | Scales extended double-precision floating-point value in operand `a' by 194 | value `b'. The function truncates the value in the second operand 'b' to 195 | an integral value and adds that value to the exponent of the operand 'a'. 196 | The operation performed according to the IEC/IEEE Standard for Binary 197 | Floating-Point Arithmetic. 198 *----------------------------------------------------------------------------*/ 199 200 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) 201 { 202 flag aSign, bSign; 203 int32_t aExp, bExp, shiftCount; 204 uint64_t aSig, bSig; 205 206 aSig = extractFloatx80Frac(a); 207 aExp = extractFloatx80Exp(a); 208 aSign = extractFloatx80Sign(a); 209 bSig = extractFloatx80Frac(b); 210 bExp = extractFloatx80Exp(b); 211 bSign = extractFloatx80Sign(b); 212 213 if (bExp == 0x7FFF) { 214 if ((uint64_t) (bSig << 1) || 215 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { 216 return propagateFloatx80NaN(a, b, status); 217 } 218 float_raise(float_flag_invalid , status); 219 return floatx80_default_nan(status); 220 } 221 if (aExp == 0x7FFF) { 222 if ((uint64_t) (aSig << 1)) { 223 return propagateFloatx80NaN(a, b, status); 224 } 225 return packFloatx80(aSign, floatx80_infinity.high, 226 floatx80_infinity.low); 227 } 228 if (aExp == 0) { 229 if (aSig == 0) { 230 return packFloatx80(aSign, 0, 0); 231 } 232 if (bExp < 0x3FFF) { 233 return a; 234 } 235 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 236 } 237 238 if (bExp < 0x3FFF) { 239 return a; 240 } 241 242 if (0x400F < bExp) { 243 aExp = bSign ? -0x6001 : 0xE000; 244 return roundAndPackFloatx80(status->floatx80_rounding_precision, 245 aSign, aExp, aSig, 0, status); 246 } 247 248 shiftCount = 0x403E - bExp; 249 bSig >>= shiftCount; 250 aExp = bSign ? (aExp - bSig) : (aExp + bSig); 251 252 return roundAndPackFloatx80(status->floatx80_rounding_precision, 253 aSign, aExp, aSig, 0, status); 254 } 255 256 floatx80 floatx80_move(floatx80 a, float_status *status) 257 { 258 flag aSign; 259 int32_t aExp; 260 uint64_t aSig; 261 262 aSig = extractFloatx80Frac(a); 263 aExp = extractFloatx80Exp(a); 264 aSign = extractFloatx80Sign(a); 265 266 if (aExp == 0x7FFF) { 267 if ((uint64_t)(aSig << 1)) { 268 return propagateFloatx80NaNOneArg(a, status); 269 } 270 return a; 271 } 272 if (aExp == 0) { 273 if (aSig == 0) { 274 return a; 275 } 276 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 277 aSign, aExp, aSig, 0, status); 278 } 279 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 280 aExp, aSig, 0, status); 281 } 282 283 /*---------------------------------------------------------------------------- 284 | Algorithms for transcendental functions supported by MC68881 and MC68882 285 | mathematical coprocessors. The functions are derived from FPSP library. 286 *----------------------------------------------------------------------------*/ 287 288 #define one_exp 0x3FFF 289 #define one_sig LIT64(0x8000000000000000) 290 291 /*---------------------------------------------------------------------------- 292 | Function for compactifying extended double-precision floating point values. 293 *----------------------------------------------------------------------------*/ 294 295 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) 296 { 297 return (aExp << 16) | (aSig >> 48); 298 } 299 300 /*---------------------------------------------------------------------------- 301 | Log base e of x plus 1 302 *----------------------------------------------------------------------------*/ 303 304 floatx80 floatx80_lognp1(floatx80 a, float_status *status) 305 { 306 flag aSign; 307 int32_t aExp; 308 uint64_t aSig, fSig; 309 310 int8_t user_rnd_mode, user_rnd_prec; 311 312 int32_t compact, j, k; 313 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 314 315 aSig = extractFloatx80Frac(a); 316 aExp = extractFloatx80Exp(a); 317 aSign = extractFloatx80Sign(a); 318 319 if (aExp == 0x7FFF) { 320 if ((uint64_t) (aSig << 1)) { 321 propagateFloatx80NaNOneArg(a, status); 322 } 323 if (aSign) { 324 float_raise(float_flag_invalid, status); 325 return floatx80_default_nan(status); 326 } 327 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); 328 } 329 330 if (aExp == 0 && aSig == 0) { 331 return packFloatx80(aSign, 0, 0); 332 } 333 334 if (aSign && aExp >= one_exp) { 335 if (aExp == one_exp && aSig == one_sig) { 336 float_raise(float_flag_divbyzero, status); 337 packFloatx80(aSign, floatx80_infinity.high, floatx80_infinity.low); 338 } 339 float_raise(float_flag_invalid, status); 340 return floatx80_default_nan(status); 341 } 342 343 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { 344 /* <= min threshold */ 345 float_raise(float_flag_inexact, status); 346 return floatx80_move(a, status); 347 } 348 349 user_rnd_mode = status->float_rounding_mode; 350 user_rnd_prec = status->floatx80_rounding_precision; 351 status->float_rounding_mode = float_round_nearest_even; 352 status->floatx80_rounding_precision = 80; 353 354 compact = floatx80_make_compact(aExp, aSig); 355 356 fp0 = a; /* Z */ 357 fp1 = a; 358 359 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 360 status), status); /* X = (1+Z) */ 361 362 aExp = extractFloatx80Exp(fp0); 363 aSig = extractFloatx80Frac(fp0); 364 365 compact = floatx80_make_compact(aExp, aSig); 366 367 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { 368 /* |X| < 1/2 or |X| > 3/2 */ 369 k = aExp - 0x3FFF; 370 fp1 = int32_to_floatx80(k, status); 371 372 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 373 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 374 375 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 376 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 377 378 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 379 380 lp1cont1: 381 /* LP1CONT1 */ 382 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 383 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 384 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 385 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 386 387 fp3 = fp2; 388 fp1 = fp2; 389 390 fp1 = floatx80_mul(fp1, float64_to_floatx80( 391 make_float64(0x3FC2499AB5E4040B), status), 392 status); /* V*A6 */ 393 fp2 = floatx80_mul(fp2, float64_to_floatx80( 394 make_float64(0xBFC555B5848CB7DB), status), 395 status); /* V*A5 */ 396 fp1 = floatx80_add(fp1, float64_to_floatx80( 397 make_float64(0x3FC99999987D8730), status), 398 status); /* A4+V*A6 */ 399 fp2 = floatx80_add(fp2, float64_to_floatx80( 400 make_float64(0xBFCFFFFFFF6F7E97), status), 401 status); /* A3+V*A5 */ 402 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 403 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 404 fp1 = floatx80_add(fp1, float64_to_floatx80( 405 make_float64(0x3FD55555555555A4), status), 406 status); /* A2+V*(A4+V*A6) */ 407 fp2 = floatx80_add(fp2, float64_to_floatx80( 408 make_float64(0xBFE0000000000008), status), 409 status); /* A1+V*(A3+V*A5) */ 410 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 411 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 412 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 413 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 414 415 fp1 = floatx80_add(fp1, log_tbl[j + 1], 416 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 417 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 418 419 status->float_rounding_mode = user_rnd_mode; 420 status->floatx80_rounding_precision = user_rnd_prec; 421 422 a = floatx80_add(fp0, klog2, status); 423 424 float_raise(float_flag_inexact, status); 425 426 return a; 427 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 428 /* |X| < 1/16 or |X| > -1/16 */ 429 /* LP1CARE */ 430 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 431 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 432 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 433 434 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ 435 /* KISZERO */ 436 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), 437 status), f, status); /* 1-F */ 438 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ 439 fp1 = packFloatx80(0, 0, 0); /* K = 0 */ 440 } else { 441 /* KISNEG */ 442 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), 443 status), f, status); /* 2-F */ 444 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ 445 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ 446 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ 447 } 448 goto lp1cont1; 449 } else { 450 /* LP1ONE16 */ 451 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ 452 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 453 status), status); /* FP0 IS 1+X */ 454 455 /* LP1CONT2 */ 456 fp1 = floatx80_div(fp1, fp0, status); /* U */ 457 saveu = fp1; 458 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 459 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 460 461 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 462 status); /* B5 */ 463 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 464 status); /* B4 */ 465 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 466 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 467 fp3 = floatx80_add(fp3, float64_to_floatx80( 468 make_float64(0x3F624924928BCCFF), status), 469 status); /* B3+W*B5 */ 470 fp2 = floatx80_add(fp2, float64_to_floatx80( 471 make_float64(0x3F899999999995EC), status), 472 status); /* B2+W*B4 */ 473 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 474 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 475 fp1 = floatx80_add(fp1, float64_to_floatx80( 476 make_float64(0x3FB5555555555555), status), 477 status); /* B1+W*(B3+W*B5) */ 478 479 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 480 fp1 = floatx80_add(fp1, fp2, 481 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 482 fp0 = floatx80_mul(fp0, fp1, 483 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 484 485 status->float_rounding_mode = user_rnd_mode; 486 status->floatx80_rounding_precision = user_rnd_prec; 487 488 a = floatx80_add(fp0, saveu, status); 489 490 /*if (!floatx80_is_zero(a)) { */ 491 float_raise(float_flag_inexact, status); 492 /*} */ 493 494 return a; 495 } 496 } 497 498 /*---------------------------------------------------------------------------- 499 | Log base e 500 *----------------------------------------------------------------------------*/ 501 502 floatx80 floatx80_logn(floatx80 a, float_status *status) 503 { 504 flag aSign; 505 int32_t aExp; 506 uint64_t aSig, fSig; 507 508 int8_t user_rnd_mode, user_rnd_prec; 509 510 int32_t compact, j, k, adjk; 511 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 512 513 aSig = extractFloatx80Frac(a); 514 aExp = extractFloatx80Exp(a); 515 aSign = extractFloatx80Sign(a); 516 517 if (aExp == 0x7FFF) { 518 if ((uint64_t) (aSig << 1)) { 519 propagateFloatx80NaNOneArg(a, status); 520 } 521 if (aSign == 0) { 522 return packFloatx80(0, floatx80_infinity.high, 523 floatx80_infinity.low); 524 } 525 } 526 527 adjk = 0; 528 529 if (aExp == 0) { 530 if (aSig == 0) { /* zero */ 531 float_raise(float_flag_divbyzero, status); 532 return packFloatx80(1, floatx80_infinity.high, 533 floatx80_infinity.low); 534 } 535 if ((aSig & one_sig) == 0) { /* denormal */ 536 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 537 adjk = -100; 538 aExp += 100; 539 a = packFloatx80(aSign, aExp, aSig); 540 } 541 } 542 543 if (aSign) { 544 float_raise(float_flag_invalid, status); 545 return floatx80_default_nan(status); 546 } 547 548 user_rnd_mode = status->float_rounding_mode; 549 user_rnd_prec = status->floatx80_rounding_precision; 550 status->float_rounding_mode = float_round_nearest_even; 551 status->floatx80_rounding_precision = 80; 552 553 compact = floatx80_make_compact(aExp, aSig); 554 555 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 556 /* |X| < 15/16 or |X| > 17/16 */ 557 k = aExp - 0x3FFF; 558 k += adjk; 559 fp1 = int32_to_floatx80(k, status); 560 561 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 562 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 563 564 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 565 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 566 567 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 568 569 /* LP1CONT1 */ 570 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 571 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 572 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 573 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 574 575 fp3 = fp2; 576 fp1 = fp2; 577 578 fp1 = floatx80_mul(fp1, float64_to_floatx80( 579 make_float64(0x3FC2499AB5E4040B), status), 580 status); /* V*A6 */ 581 fp2 = floatx80_mul(fp2, float64_to_floatx80( 582 make_float64(0xBFC555B5848CB7DB), status), 583 status); /* V*A5 */ 584 fp1 = floatx80_add(fp1, float64_to_floatx80( 585 make_float64(0x3FC99999987D8730), status), 586 status); /* A4+V*A6 */ 587 fp2 = floatx80_add(fp2, float64_to_floatx80( 588 make_float64(0xBFCFFFFFFF6F7E97), status), 589 status); /* A3+V*A5 */ 590 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 591 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 592 fp1 = floatx80_add(fp1, float64_to_floatx80( 593 make_float64(0x3FD55555555555A4), status), 594 status); /* A2+V*(A4+V*A6) */ 595 fp2 = floatx80_add(fp2, float64_to_floatx80( 596 make_float64(0xBFE0000000000008), status), 597 status); /* A1+V*(A3+V*A5) */ 598 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 599 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 600 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 601 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 602 603 fp1 = floatx80_add(fp1, log_tbl[j + 1], 604 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 605 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 606 607 status->float_rounding_mode = user_rnd_mode; 608 status->floatx80_rounding_precision = user_rnd_prec; 609 610 a = floatx80_add(fp0, klog2, status); 611 612 float_raise(float_flag_inexact, status); 613 614 return a; 615 } else { /* |X-1| >= 1/16 */ 616 fp0 = a; 617 fp1 = a; 618 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), 619 status), status); /* FP1 IS X-1 */ 620 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 621 status), status); /* FP0 IS X+1 */ 622 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ 623 624 /* LP1CONT2 */ 625 fp1 = floatx80_div(fp1, fp0, status); /* U */ 626 saveu = fp1; 627 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 628 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 629 630 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 631 status); /* B5 */ 632 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 633 status); /* B4 */ 634 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 635 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 636 fp3 = floatx80_add(fp3, float64_to_floatx80( 637 make_float64(0x3F624924928BCCFF), status), 638 status); /* B3+W*B5 */ 639 fp2 = floatx80_add(fp2, float64_to_floatx80( 640 make_float64(0x3F899999999995EC), status), 641 status); /* B2+W*B4 */ 642 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 643 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 644 fp1 = floatx80_add(fp1, float64_to_floatx80( 645 make_float64(0x3FB5555555555555), status), 646 status); /* B1+W*(B3+W*B5) */ 647 648 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 649 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 650 fp0 = floatx80_mul(fp0, fp1, 651 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 652 653 status->float_rounding_mode = user_rnd_mode; 654 status->floatx80_rounding_precision = user_rnd_prec; 655 656 a = floatx80_add(fp0, saveu, status); 657 658 /*if (!floatx80_is_zero(a)) { */ 659 float_raise(float_flag_inexact, status); 660 /*} */ 661 662 return a; 663 } 664 } 665 666 /*---------------------------------------------------------------------------- 667 | Log base 10 668 *----------------------------------------------------------------------------*/ 669 670 floatx80 floatx80_log10(floatx80 a, float_status *status) 671 { 672 flag aSign; 673 int32_t aExp; 674 uint64_t aSig; 675 676 int8_t user_rnd_mode, user_rnd_prec; 677 678 floatx80 fp0, fp1; 679 680 aSig = extractFloatx80Frac(a); 681 aExp = extractFloatx80Exp(a); 682 aSign = extractFloatx80Sign(a); 683 684 if (aExp == 0x7FFF) { 685 if ((uint64_t) (aSig << 1)) { 686 propagateFloatx80NaNOneArg(a, status); 687 } 688 if (aSign == 0) { 689 return packFloatx80(0, floatx80_infinity.high, 690 floatx80_infinity.low); 691 } 692 } 693 694 if (aExp == 0 && aSig == 0) { 695 float_raise(float_flag_divbyzero, status); 696 return packFloatx80(1, floatx80_infinity.high, 697 floatx80_infinity.low); 698 } 699 700 if (aSign) { 701 float_raise(float_flag_invalid, status); 702 return floatx80_default_nan(status); 703 } 704 705 user_rnd_mode = status->float_rounding_mode; 706 user_rnd_prec = status->floatx80_rounding_precision; 707 status->float_rounding_mode = float_round_nearest_even; 708 status->floatx80_rounding_precision = 80; 709 710 fp0 = floatx80_logn(a, status); 711 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */ 712 713 status->float_rounding_mode = user_rnd_mode; 714 status->floatx80_rounding_precision = user_rnd_prec; 715 716 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ 717 718 float_raise(float_flag_inexact, status); 719 720 return a; 721 } 722 723 /*---------------------------------------------------------------------------- 724 | Log base 2 725 *----------------------------------------------------------------------------*/ 726 727 floatx80 floatx80_log2(floatx80 a, float_status *status) 728 { 729 flag aSign; 730 int32_t aExp; 731 uint64_t aSig; 732 733 int8_t user_rnd_mode, user_rnd_prec; 734 735 floatx80 fp0, fp1; 736 737 aSig = extractFloatx80Frac(a); 738 aExp = extractFloatx80Exp(a); 739 aSign = extractFloatx80Sign(a); 740 741 if (aExp == 0x7FFF) { 742 if ((uint64_t) (aSig << 1)) { 743 propagateFloatx80NaNOneArg(a, status); 744 } 745 if (aSign == 0) { 746 return packFloatx80(0, floatx80_infinity.high, 747 floatx80_infinity.low); 748 } 749 } 750 751 if (aExp == 0) { 752 if (aSig == 0) { 753 float_raise(float_flag_divbyzero, status); 754 return packFloatx80(1, floatx80_infinity.high, 755 floatx80_infinity.low); 756 } 757 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 758 } 759 760 if (aSign) { 761 float_raise(float_flag_invalid, status); 762 return floatx80_default_nan(status); 763 } 764 765 user_rnd_mode = status->float_rounding_mode; 766 user_rnd_prec = status->floatx80_rounding_precision; 767 status->float_rounding_mode = float_round_nearest_even; 768 status->floatx80_rounding_precision = 80; 769 770 if (aSig == one_sig) { /* X is 2^k */ 771 status->float_rounding_mode = user_rnd_mode; 772 status->floatx80_rounding_precision = user_rnd_prec; 773 774 a = int32_to_floatx80(aExp - 0x3FFF, status); 775 } else { 776 fp0 = floatx80_logn(a, status); 777 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */ 778 779 status->float_rounding_mode = user_rnd_mode; 780 status->floatx80_rounding_precision = user_rnd_prec; 781 782 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ 783 } 784 785 float_raise(float_flag_inexact, status); 786 787 return a; 788 } 789 790 /*---------------------------------------------------------------------------- 791 | e to x 792 *----------------------------------------------------------------------------*/ 793 794 floatx80 floatx80_etox(floatx80 a, float_status *status) 795 { 796 flag aSign; 797 int32_t aExp; 798 uint64_t aSig; 799 800 int8_t user_rnd_mode, user_rnd_prec; 801 802 int32_t compact, n, j, k, m, m1; 803 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; 804 flag adjflag; 805 806 aSig = extractFloatx80Frac(a); 807 aExp = extractFloatx80Exp(a); 808 aSign = extractFloatx80Sign(a); 809 810 if (aExp == 0x7FFF) { 811 if ((uint64_t) (aSig << 1)) { 812 return propagateFloatx80NaNOneArg(a, status); 813 } 814 if (aSign) { 815 return packFloatx80(0, 0, 0); 816 } 817 return packFloatx80(0, floatx80_infinity.high, 818 floatx80_infinity.low); 819 } 820 821 if (aExp == 0 && aSig == 0) { 822 return packFloatx80(0, one_exp, one_sig); 823 } 824 825 user_rnd_mode = status->float_rounding_mode; 826 user_rnd_prec = status->floatx80_rounding_precision; 827 status->float_rounding_mode = float_round_nearest_even; 828 status->floatx80_rounding_precision = 80; 829 830 adjflag = 0; 831 832 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ 833 compact = floatx80_make_compact(aExp, aSig); 834 835 if (compact < 0x400CB167) { /* |X| < 16380 log2 */ 836 fp0 = a; 837 fp1 = a; 838 fp0 = floatx80_mul(fp0, float32_to_floatx80( 839 make_float32(0x42B8AA3B), status), 840 status); /* 64/log2 * X */ 841 adjflag = 0; 842 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 843 fp0 = int32_to_floatx80(n, status); 844 845 j = n & 0x3F; /* J = N mod 64 */ 846 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 847 if (n < 0 && j) { 848 /* arithmetic right shift is division and 849 * round towards minus infinity 850 */ 851 m--; 852 } 853 m += 0x3FFF; /* biased exponent of 2^(M) */ 854 855 expcont1: 856 fp2 = fp0; /* N */ 857 fp0 = floatx80_mul(fp0, float32_to_floatx80( 858 make_float32(0xBC317218), status), 859 status); /* N * L1, L1 = lead(-log2/64) */ 860 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 861 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 862 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 863 fp0 = floatx80_add(fp0, fp2, status); /* R */ 864 865 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 866 fp2 = float32_to_floatx80(make_float32(0x3AB60B70), 867 status); /* A5 */ 868 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ 869 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), 870 status), fp1, 871 status); /* fp3 is S*A4 */ 872 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( 873 0x3FA5555555554431), status), 874 status); /* fp2 is A3+S*A5 */ 875 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( 876 0x3FC5555555554018), status), 877 status); /* fp3 is A2+S*A4 */ 878 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ 879 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ 880 fp2 = floatx80_add(fp2, float32_to_floatx80( 881 make_float32(0x3F000000), status), 882 status); /* fp2 is A1+S*(A3+S*A5) */ 883 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ 884 fp2 = floatx80_mul(fp2, fp1, 885 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ 886 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ 887 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 888 889 fp1 = exp_tbl[j]; 890 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ 891 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), 892 status); /* accurate 2^(J/64) */ 893 fp0 = floatx80_add(fp0, fp1, 894 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ 895 896 scale = packFloatx80(0, m, one_sig); 897 if (adjflag) { 898 adjscale = packFloatx80(0, m1, one_sig); 899 fp0 = floatx80_mul(fp0, adjscale, status); 900 } 901 902 status->float_rounding_mode = user_rnd_mode; 903 status->floatx80_rounding_precision = user_rnd_prec; 904 905 a = floatx80_mul(fp0, scale, status); 906 907 float_raise(float_flag_inexact, status); 908 909 return a; 910 } else { /* |X| >= 16380 log2 */ 911 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ 912 status->float_rounding_mode = user_rnd_mode; 913 status->floatx80_rounding_precision = user_rnd_prec; 914 if (aSign) { 915 a = roundAndPackFloatx80( 916 status->floatx80_rounding_precision, 917 0, -0x1000, aSig, 0, status); 918 } else { 919 a = roundAndPackFloatx80( 920 status->floatx80_rounding_precision, 921 0, 0x8000, aSig, 0, status); 922 } 923 float_raise(float_flag_inexact, status); 924 925 return a; 926 } else { 927 fp0 = a; 928 fp1 = a; 929 fp0 = floatx80_mul(fp0, float32_to_floatx80( 930 make_float32(0x42B8AA3B), status), 931 status); /* 64/log2 * X */ 932 adjflag = 1; 933 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 934 fp0 = int32_to_floatx80(n, status); 935 936 j = n & 0x3F; /* J = N mod 64 */ 937 /* NOTE: this is really arithmetic right shift by 6 */ 938 k = n / 64; 939 if (n < 0 && j) { 940 /* arithmetic right shift is division and 941 * round towards minus infinity 942 */ 943 k--; 944 } 945 /* NOTE: this is really arithmetic right shift by 1 */ 946 m1 = k / 2; 947 if (k < 0 && (k & 1)) { 948 /* arithmetic right shift is division and 949 * round towards minus infinity 950 */ 951 m1--; 952 } 953 m = k - m1; 954 m1 += 0x3FFF; /* biased exponent of 2^(M1) */ 955 m += 0x3FFF; /* biased exponent of 2^(M) */ 956 957 goto expcont1; 958 } 959 } 960 } else { /* |X| < 2^(-65) */ 961 status->float_rounding_mode = user_rnd_mode; 962 status->floatx80_rounding_precision = user_rnd_prec; 963 964 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), 965 status), status); /* 1 + X */ 966 967 float_raise(float_flag_inexact, status); 968 969 return a; 970 } 971 } 972 973 /*---------------------------------------------------------------------------- 974 | 2 to x 975 *----------------------------------------------------------------------------*/ 976 977 floatx80 floatx80_twotox(floatx80 a, float_status *status) 978 { 979 flag aSign; 980 int32_t aExp; 981 uint64_t aSig; 982 983 int8_t user_rnd_mode, user_rnd_prec; 984 985 int32_t compact, n, j, l, m, m1; 986 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 987 988 aSig = extractFloatx80Frac(a); 989 aExp = extractFloatx80Exp(a); 990 aSign = extractFloatx80Sign(a); 991 992 if (aExp == 0x7FFF) { 993 if ((uint64_t) (aSig << 1)) { 994 return propagateFloatx80NaNOneArg(a, status); 995 } 996 if (aSign) { 997 return packFloatx80(0, 0, 0); 998 } 999 return packFloatx80(0, floatx80_infinity.high, 1000 floatx80_infinity.low); 1001 } 1002 1003 if (aExp == 0 && aSig == 0) { 1004 return packFloatx80(0, one_exp, one_sig); 1005 } 1006 1007 user_rnd_mode = status->float_rounding_mode; 1008 user_rnd_prec = status->floatx80_rounding_precision; 1009 status->float_rounding_mode = float_round_nearest_even; 1010 status->floatx80_rounding_precision = 80; 1011 1012 fp0 = a; 1013 1014 compact = floatx80_make_compact(aExp, aSig); 1015 1016 if (compact < 0x3FB98000 || compact > 0x400D80C0) { 1017 /* |X| > 16480 or |X| < 2^(-70) */ 1018 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1019 status->float_rounding_mode = user_rnd_mode; 1020 status->floatx80_rounding_precision = user_rnd_prec; 1021 1022 if (aSign) { 1023 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1024 0, -0x1000, aSig, 0, status); 1025 } else { 1026 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1027 0, 0x8000, aSig, 0, status); 1028 } 1029 } else { /* |X| < 2^(-70) */ 1030 status->float_rounding_mode = user_rnd_mode; 1031 status->floatx80_rounding_precision = user_rnd_prec; 1032 1033 a = floatx80_add(fp0, float32_to_floatx80( 1034 make_float32(0x3F800000), status), 1035 status); /* 1 + X */ 1036 1037 float_raise(float_flag_inexact, status); 1038 1039 return a; 1040 } 1041 } else { /* 2^(-70) <= |X| <= 16480 */ 1042 fp1 = fp0; /* X */ 1043 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1044 make_float32(0x42800000), status), 1045 status); /* X * 64 */ 1046 n = floatx80_to_int32(fp1, status); 1047 fp1 = int32_to_floatx80(n, status); 1048 j = n & 0x3F; 1049 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1050 if (n < 0 && j) { 1051 /* arithmetic right shift is division and 1052 * round towards minus infinity 1053 */ 1054 l--; 1055 } 1056 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1057 if (l < 0 && (l & 1)) { 1058 /* arithmetic right shift is division and 1059 * round towards minus infinity 1060 */ 1061 m--; 1062 } 1063 m1 = l - m; 1064 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1065 1066 adjfact = packFloatx80(0, m1, one_sig); 1067 fact1 = exp2_tbl[j]; 1068 fact1.high += m; 1069 fact2.high = exp2_tbl2[j] >> 16; 1070 fact2.high += m; 1071 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1072 fact2.low <<= 48; 1073 1074 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1075 make_float32(0x3C800000), status), 1076 status); /* (1/64)*N */ 1077 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ 1078 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */ 1079 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1080 1081 /* EXPR */ 1082 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1083 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1084 status); /* A5 */ 1085 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1086 status); /* A4 */ 1087 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1088 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1089 fp2 = floatx80_add(fp2, float64_to_floatx80( 1090 make_float64(0x3FA5555555554CC1), status), 1091 status); /* A3+S*A5 */ 1092 fp3 = floatx80_add(fp3, float64_to_floatx80( 1093 make_float64(0x3FC5555555554A54), status), 1094 status); /* A2+S*A4 */ 1095 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1096 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1097 fp2 = floatx80_add(fp2, float64_to_floatx80( 1098 make_float64(0x3FE0000000000000), status), 1099 status); /* A1+S*(A3+S*A5) */ 1100 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1101 1102 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1103 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1104 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1105 1106 fp0 = floatx80_mul(fp0, fact1, status); 1107 fp0 = floatx80_add(fp0, fact2, status); 1108 fp0 = floatx80_add(fp0, fact1, status); 1109 1110 status->float_rounding_mode = user_rnd_mode; 1111 status->floatx80_rounding_precision = user_rnd_prec; 1112 1113 a = floatx80_mul(fp0, adjfact, status); 1114 1115 float_raise(float_flag_inexact, status); 1116 1117 return a; 1118 } 1119 } 1120 1121 /*---------------------------------------------------------------------------- 1122 | 10 to x 1123 *----------------------------------------------------------------------------*/ 1124 1125 floatx80 floatx80_tentox(floatx80 a, float_status *status) 1126 { 1127 flag aSign; 1128 int32_t aExp; 1129 uint64_t aSig; 1130 1131 int8_t user_rnd_mode, user_rnd_prec; 1132 1133 int32_t compact, n, j, l, m, m1; 1134 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 1135 1136 aSig = extractFloatx80Frac(a); 1137 aExp = extractFloatx80Exp(a); 1138 aSign = extractFloatx80Sign(a); 1139 1140 if (aExp == 0x7FFF) { 1141 if ((uint64_t) (aSig << 1)) { 1142 return propagateFloatx80NaNOneArg(a, status); 1143 } 1144 if (aSign) { 1145 return packFloatx80(0, 0, 0); 1146 } 1147 return packFloatx80(0, floatx80_infinity.high, 1148 floatx80_infinity.low); 1149 } 1150 1151 if (aExp == 0 && aSig == 0) { 1152 return packFloatx80(0, one_exp, one_sig); 1153 } 1154 1155 user_rnd_mode = status->float_rounding_mode; 1156 user_rnd_prec = status->floatx80_rounding_precision; 1157 status->float_rounding_mode = float_round_nearest_even; 1158 status->floatx80_rounding_precision = 80; 1159 1160 fp0 = a; 1161 1162 compact = floatx80_make_compact(aExp, aSig); 1163 1164 if (compact < 0x3FB98000 || compact > 0x400B9B07) { 1165 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ 1166 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1167 status->float_rounding_mode = user_rnd_mode; 1168 status->floatx80_rounding_precision = user_rnd_prec; 1169 1170 if (aSign) { 1171 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1172 0, -0x1000, aSig, 0, status); 1173 } else { 1174 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1175 0, 0x8000, aSig, 0, status); 1176 } 1177 } else { /* |X| < 2^(-70) */ 1178 status->float_rounding_mode = user_rnd_mode; 1179 status->floatx80_rounding_precision = user_rnd_prec; 1180 1181 a = floatx80_add(fp0, float32_to_floatx80( 1182 make_float32(0x3F800000), status), 1183 status); /* 1 + X */ 1184 1185 float_raise(float_flag_inexact, status); 1186 1187 return a; 1188 } 1189 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ 1190 fp1 = fp0; /* X */ 1191 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1192 make_float64(0x406A934F0979A371), 1193 status), status); /* X*64*LOG10/LOG2 */ 1194 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ 1195 fp1 = int32_to_floatx80(n, status); 1196 1197 j = n & 0x3F; 1198 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1199 if (n < 0 && j) { 1200 /* arithmetic right shift is division and 1201 * round towards minus infinity 1202 */ 1203 l--; 1204 } 1205 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1206 if (l < 0 && (l & 1)) { 1207 /* arithmetic right shift is division and 1208 * round towards minus infinity 1209 */ 1210 m--; 1211 } 1212 m1 = l - m; 1213 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1214 1215 adjfact = packFloatx80(0, m1, one_sig); 1216 fact1 = exp2_tbl[j]; 1217 fact1.high += m; 1218 fact2.high = exp2_tbl2[j] >> 16; 1219 fact2.high += m; 1220 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1221 fact2.low <<= 48; 1222 1223 fp2 = fp1; /* N */ 1224 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1225 make_float64(0x3F734413509F8000), status), 1226 status); /* N*(LOG2/64LOG10)_LEAD */ 1227 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); 1228 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ 1229 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ 1230 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ 1231 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */ 1232 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1233 1234 /* EXPR */ 1235 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1236 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1237 status); /* A5 */ 1238 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1239 status); /* A4 */ 1240 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1241 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1242 fp2 = floatx80_add(fp2, float64_to_floatx80( 1243 make_float64(0x3FA5555555554CC1), status), 1244 status); /* A3+S*A5 */ 1245 fp3 = floatx80_add(fp3, float64_to_floatx80( 1246 make_float64(0x3FC5555555554A54), status), 1247 status); /* A2+S*A4 */ 1248 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1249 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1250 fp2 = floatx80_add(fp2, float64_to_floatx80( 1251 make_float64(0x3FE0000000000000), status), 1252 status); /* A1+S*(A3+S*A5) */ 1253 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1254 1255 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1256 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1257 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1258 1259 fp0 = floatx80_mul(fp0, fact1, status); 1260 fp0 = floatx80_add(fp0, fact2, status); 1261 fp0 = floatx80_add(fp0, fact1, status); 1262 1263 status->float_rounding_mode = user_rnd_mode; 1264 status->floatx80_rounding_precision = user_rnd_prec; 1265 1266 a = floatx80_mul(fp0, adjfact, status); 1267 1268 float_raise(float_flag_inexact, status); 1269 1270 return a; 1271 } 1272 } 1273 1274 /*---------------------------------------------------------------------------- 1275 | Tangent 1276 *----------------------------------------------------------------------------*/ 1277 1278 floatx80 floatx80_tan(floatx80 a, float_status *status) 1279 { 1280 flag aSign, xSign; 1281 int32_t aExp, xExp; 1282 uint64_t aSig, xSig; 1283 1284 int8_t user_rnd_mode, user_rnd_prec; 1285 1286 int32_t compact, l, n, j; 1287 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; 1288 float32 twoto63; 1289 flag endflag; 1290 1291 aSig = extractFloatx80Frac(a); 1292 aExp = extractFloatx80Exp(a); 1293 aSign = extractFloatx80Sign(a); 1294 1295 if (aExp == 0x7FFF) { 1296 if ((uint64_t) (aSig << 1)) { 1297 return propagateFloatx80NaNOneArg(a, status); 1298 } 1299 float_raise(float_flag_invalid, status); 1300 return floatx80_default_nan(status); 1301 } 1302 1303 if (aExp == 0 && aSig == 0) { 1304 return packFloatx80(aSign, 0, 0); 1305 } 1306 1307 user_rnd_mode = status->float_rounding_mode; 1308 user_rnd_prec = status->floatx80_rounding_precision; 1309 status->float_rounding_mode = float_round_nearest_even; 1310 status->floatx80_rounding_precision = 80; 1311 1312 compact = floatx80_make_compact(aExp, aSig); 1313 1314 fp0 = a; 1315 1316 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1317 /* 2^(-40) > |X| > 15 PI */ 1318 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1319 /* REDUCEX */ 1320 fp1 = packFloatx80(0, 0, 0); 1321 if (compact == 0x7FFEFFFF) { 1322 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1323 LIT64(0xC90FDAA200000000)); 1324 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1325 LIT64(0x85A308D300000000)); 1326 fp0 = floatx80_add(fp0, twopi1, status); 1327 fp1 = fp0; 1328 fp0 = floatx80_add(fp0, twopi2, status); 1329 fp1 = floatx80_sub(fp1, fp0, status); 1330 fp1 = floatx80_add(fp1, twopi2, status); 1331 } 1332 loop: 1333 xSign = extractFloatx80Sign(fp0); 1334 xExp = extractFloatx80Exp(fp0); 1335 xExp -= 0x3FFF; 1336 if (xExp <= 28) { 1337 l = 0; 1338 endflag = 1; 1339 } else { 1340 l = xExp - 27; 1341 endflag = 0; 1342 } 1343 invtwopi = packFloatx80(0, 0x3FFE - l, 1344 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1345 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1346 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1347 1348 /* SIGN(INARG)*2^63 IN SGL */ 1349 twoto63 = packFloat32(xSign, 0xBE, 0); 1350 1351 fp2 = floatx80_mul(fp0, invtwopi, status); 1352 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1353 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1354 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1355 status); /* FP2 is N */ 1356 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1357 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1358 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1359 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1360 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1361 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1362 fp3 = fp0; /* FP3 is A */ 1363 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1364 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1365 1366 if (endflag > 0) { 1367 n = floatx80_to_int32(fp2, status); 1368 goto tancont; 1369 } 1370 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1371 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1372 goto loop; 1373 } else { 1374 status->float_rounding_mode = user_rnd_mode; 1375 status->floatx80_rounding_precision = user_rnd_prec; 1376 1377 a = floatx80_move(a, status); 1378 1379 float_raise(float_flag_inexact, status); 1380 1381 return a; 1382 } 1383 } else { 1384 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1385 make_float64(0x3FE45F306DC9C883), status), 1386 status); /* X*2/PI */ 1387 1388 n = floatx80_to_int32(fp1, status); 1389 j = 32 + n; 1390 1391 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1392 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1393 status); /* FP0 IS R = (X-Y1)-Y2 */ 1394 1395 tancont: 1396 if (n & 1) { 1397 /* NODD */ 1398 fp1 = fp0; /* R */ 1399 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1400 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1401 status); /* Q4 */ 1402 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1403 status); /* P3 */ 1404 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ 1405 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ 1406 fp3 = floatx80_add(fp3, float64_to_floatx80( 1407 make_float64(0xBF346F59B39BA65F), status), 1408 status); /* Q3+SQ4 */ 1409 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1410 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1411 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ 1412 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ 1413 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1414 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1415 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1416 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1417 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ 1418 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ 1419 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1420 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1421 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ 1422 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1423 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1424 fp0 = floatx80_add(fp0, float32_to_floatx80( 1425 make_float32(0x3F800000), status), 1426 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1427 1428 xSign = extractFloatx80Sign(fp1); 1429 xExp = extractFloatx80Exp(fp1); 1430 xSig = extractFloatx80Frac(fp1); 1431 xSign ^= 1; 1432 fp1 = packFloatx80(xSign, xExp, xSig); 1433 1434 status->float_rounding_mode = user_rnd_mode; 1435 status->floatx80_rounding_precision = user_rnd_prec; 1436 1437 a = floatx80_div(fp0, fp1, status); 1438 1439 float_raise(float_flag_inexact, status); 1440 1441 return a; 1442 } else { 1443 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1444 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1445 status); /* Q4 */ 1446 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1447 status); /* P3 */ 1448 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ 1449 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ 1450 fp3 = floatx80_add(fp3, float64_to_floatx80( 1451 make_float64(0xBF346F59B39BA65F), status), 1452 status); /* Q3+SQ4 */ 1453 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1454 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1455 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ 1456 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ 1457 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1458 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1459 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1460 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1461 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ 1462 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ 1463 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1464 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1465 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ 1466 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1467 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1468 fp1 = floatx80_add(fp1, float32_to_floatx80( 1469 make_float32(0x3F800000), status), 1470 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1471 1472 status->float_rounding_mode = user_rnd_mode; 1473 status->floatx80_rounding_precision = user_rnd_prec; 1474 1475 a = floatx80_div(fp0, fp1, status); 1476 1477 float_raise(float_flag_inexact, status); 1478 1479 return a; 1480 } 1481 } 1482 } 1483 1484 /*---------------------------------------------------------------------------- 1485 | Sine 1486 *----------------------------------------------------------------------------*/ 1487 1488 floatx80 floatx80_sin(floatx80 a, float_status *status) 1489 { 1490 flag aSign, xSign; 1491 int32_t aExp, xExp; 1492 uint64_t aSig, xSig; 1493 1494 int8_t user_rnd_mode, user_rnd_prec; 1495 1496 int32_t compact, l, n, j; 1497 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1498 float32 posneg1, twoto63; 1499 flag adjn, endflag; 1500 1501 aSig = extractFloatx80Frac(a); 1502 aExp = extractFloatx80Exp(a); 1503 aSign = extractFloatx80Sign(a); 1504 1505 if (aExp == 0x7FFF) { 1506 if ((uint64_t) (aSig << 1)) { 1507 return propagateFloatx80NaNOneArg(a, status); 1508 } 1509 float_raise(float_flag_invalid, status); 1510 return floatx80_default_nan(status); 1511 } 1512 1513 if (aExp == 0 && aSig == 0) { 1514 return packFloatx80(aSign, 0, 0); 1515 } 1516 1517 adjn = 0; 1518 1519 user_rnd_mode = status->float_rounding_mode; 1520 user_rnd_prec = status->floatx80_rounding_precision; 1521 status->float_rounding_mode = float_round_nearest_even; 1522 status->floatx80_rounding_precision = 80; 1523 1524 compact = floatx80_make_compact(aExp, aSig); 1525 1526 fp0 = a; 1527 1528 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1529 /* 2^(-40) > |X| > 15 PI */ 1530 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1531 /* REDUCEX */ 1532 fp1 = packFloatx80(0, 0, 0); 1533 if (compact == 0x7FFEFFFF) { 1534 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1535 LIT64(0xC90FDAA200000000)); 1536 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1537 LIT64(0x85A308D300000000)); 1538 fp0 = floatx80_add(fp0, twopi1, status); 1539 fp1 = fp0; 1540 fp0 = floatx80_add(fp0, twopi2, status); 1541 fp1 = floatx80_sub(fp1, fp0, status); 1542 fp1 = floatx80_add(fp1, twopi2, status); 1543 } 1544 loop: 1545 xSign = extractFloatx80Sign(fp0); 1546 xExp = extractFloatx80Exp(fp0); 1547 xExp -= 0x3FFF; 1548 if (xExp <= 28) { 1549 l = 0; 1550 endflag = 1; 1551 } else { 1552 l = xExp - 27; 1553 endflag = 0; 1554 } 1555 invtwopi = packFloatx80(0, 0x3FFE - l, 1556 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1557 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1558 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1559 1560 /* SIGN(INARG)*2^63 IN SGL */ 1561 twoto63 = packFloat32(xSign, 0xBE, 0); 1562 1563 fp2 = floatx80_mul(fp0, invtwopi, status); 1564 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1565 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1566 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1567 status); /* FP2 is N */ 1568 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1569 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1570 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1571 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1572 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1573 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1574 fp3 = fp0; /* FP3 is A */ 1575 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1576 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1577 1578 if (endflag > 0) { 1579 n = floatx80_to_int32(fp2, status); 1580 goto sincont; 1581 } 1582 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1583 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1584 goto loop; 1585 } else { 1586 /* SINSM */ 1587 fp0 = float32_to_floatx80(make_float32(0x3F800000), 1588 status); /* 1 */ 1589 1590 status->float_rounding_mode = user_rnd_mode; 1591 status->floatx80_rounding_precision = user_rnd_prec; 1592 1593 if (adjn) { 1594 /* COSTINY */ 1595 a = floatx80_sub(fp0, float32_to_floatx80( 1596 make_float32(0x00800000), status), status); 1597 } else { 1598 /* SINTINY */ 1599 a = floatx80_move(a, status); 1600 } 1601 float_raise(float_flag_inexact, status); 1602 1603 return a; 1604 } 1605 } else { 1606 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1607 make_float64(0x3FE45F306DC9C883), status), 1608 status); /* X*2/PI */ 1609 1610 n = floatx80_to_int32(fp1, status); 1611 j = 32 + n; 1612 1613 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1614 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1615 status); /* FP0 IS R = (X-Y1)-Y2 */ 1616 1617 sincont: 1618 if ((n + adjn) & 1) { 1619 /* COSPOLY */ 1620 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1621 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1622 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1623 status); /* B8 */ 1624 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1625 status); /* B7 */ 1626 1627 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1628 xExp = extractFloatx80Exp(fp0); 1629 xSig = extractFloatx80Frac(fp0); 1630 1631 if (((n + adjn) >> 1) & 1) { 1632 xSign ^= 1; 1633 posneg1 = make_float32(0xBF800000); /* -1 */ 1634 } else { 1635 xSign ^= 0; 1636 posneg1 = make_float32(0x3F800000); /* 1 */ 1637 } /* X IS NOW R'= SGN*R */ 1638 1639 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1640 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1641 fp2 = floatx80_add(fp2, float64_to_floatx80( 1642 make_float64(0x3E21EED90612C972), status), 1643 status); /* B6+TB8 */ 1644 fp3 = floatx80_add(fp3, float64_to_floatx80( 1645 make_float64(0xBE927E4FB79D9FCF), status), 1646 status); /* B5+TB7 */ 1647 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1648 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1649 fp2 = floatx80_add(fp2, float64_to_floatx80( 1650 make_float64(0x3EFA01A01A01D423), status), 1651 status); /* B4+T(B6+TB8) */ 1652 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1653 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1654 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1655 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1656 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1657 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1658 fp1 = floatx80_add(fp1, float32_to_floatx80( 1659 make_float32(0xBF000000), status), 1660 status); /* B1+T(B3+T(B5+TB7)) */ 1661 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1662 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ 1663 * [S(B2+T(B4+T(B6+TB8)))] 1664 */ 1665 1666 x = packFloatx80(xSign, xExp, xSig); 1667 fp0 = floatx80_mul(fp0, x, status); 1668 1669 status->float_rounding_mode = user_rnd_mode; 1670 status->floatx80_rounding_precision = user_rnd_prec; 1671 1672 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1673 1674 float_raise(float_flag_inexact, status); 1675 1676 return a; 1677 } else { 1678 /* SINPOLY */ 1679 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1680 xExp = extractFloatx80Exp(fp0); 1681 xSig = extractFloatx80Frac(fp0); 1682 1683 xSign ^= ((n + adjn) >> 1) & 1; /* X IS NOW R'= SGN*R */ 1684 1685 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1686 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1687 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1688 status); /* A7 */ 1689 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1690 status); /* A6 */ 1691 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1692 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1693 fp3 = floatx80_add(fp3, float64_to_floatx80( 1694 make_float64(0xBE5AE6452A118AE4), status), 1695 status); /* A5+T*A7 */ 1696 fp2 = floatx80_add(fp2, float64_to_floatx80( 1697 make_float64(0x3EC71DE3A5341531), status), 1698 status); /* A4+T*A6 */ 1699 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1700 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1701 fp3 = floatx80_add(fp3, float64_to_floatx80( 1702 make_float64(0xBF2A01A01A018B59), status), 1703 status); /* A3+T(A5+TA7) */ 1704 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1705 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1706 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1707 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1708 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1709 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1710 fp1 = floatx80_add(fp1, fp2, 1711 status); /* [A1+T(A3+T(A5+TA7))]+ 1712 * [S(A2+T(A4+TA6))] 1713 */ 1714 1715 x = packFloatx80(xSign, xExp, xSig); 1716 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1717 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1718 1719 status->float_rounding_mode = user_rnd_mode; 1720 status->floatx80_rounding_precision = user_rnd_prec; 1721 1722 a = floatx80_add(fp0, x, status); 1723 1724 float_raise(float_flag_inexact, status); 1725 1726 return a; 1727 } 1728 } 1729 } 1730 1731 /*---------------------------------------------------------------------------- 1732 | Cosine 1733 *----------------------------------------------------------------------------*/ 1734 1735 floatx80 floatx80_cos(floatx80 a, float_status *status) 1736 { 1737 flag aSign, xSign; 1738 int32_t aExp, xExp; 1739 uint64_t aSig, xSig; 1740 1741 int8_t user_rnd_mode, user_rnd_prec; 1742 1743 int32_t compact, l, n, j; 1744 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1745 float32 posneg1, twoto63; 1746 flag adjn, endflag; 1747 1748 aSig = extractFloatx80Frac(a); 1749 aExp = extractFloatx80Exp(a); 1750 aSign = extractFloatx80Sign(a); 1751 1752 if (aExp == 0x7FFF) { 1753 if ((uint64_t) (aSig << 1)) { 1754 return propagateFloatx80NaNOneArg(a, status); 1755 } 1756 float_raise(float_flag_invalid, status); 1757 return floatx80_default_nan(status); 1758 } 1759 1760 if (aExp == 0 && aSig == 0) { 1761 return packFloatx80(0, one_exp, one_sig); 1762 } 1763 1764 adjn = 1; 1765 1766 user_rnd_mode = status->float_rounding_mode; 1767 user_rnd_prec = status->floatx80_rounding_precision; 1768 status->float_rounding_mode = float_round_nearest_even; 1769 status->floatx80_rounding_precision = 80; 1770 1771 compact = floatx80_make_compact(aExp, aSig); 1772 1773 fp0 = a; 1774 1775 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1776 /* 2^(-40) > |X| > 15 PI */ 1777 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1778 /* REDUCEX */ 1779 fp1 = packFloatx80(0, 0, 0); 1780 if (compact == 0x7FFEFFFF) { 1781 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1782 LIT64(0xC90FDAA200000000)); 1783 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1784 LIT64(0x85A308D300000000)); 1785 fp0 = floatx80_add(fp0, twopi1, status); 1786 fp1 = fp0; 1787 fp0 = floatx80_add(fp0, twopi2, status); 1788 fp1 = floatx80_sub(fp1, fp0, status); 1789 fp1 = floatx80_add(fp1, twopi2, status); 1790 } 1791 loop: 1792 xSign = extractFloatx80Sign(fp0); 1793 xExp = extractFloatx80Exp(fp0); 1794 xExp -= 0x3FFF; 1795 if (xExp <= 28) { 1796 l = 0; 1797 endflag = 1; 1798 } else { 1799 l = xExp - 27; 1800 endflag = 0; 1801 } 1802 invtwopi = packFloatx80(0, 0x3FFE - l, 1803 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1804 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1805 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1806 1807 /* SIGN(INARG)*2^63 IN SGL */ 1808 twoto63 = packFloat32(xSign, 0xBE, 0); 1809 1810 fp2 = floatx80_mul(fp0, invtwopi, status); 1811 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1812 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1813 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1814 status); /* FP2 is N */ 1815 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1816 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1817 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1818 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1819 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1820 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1821 fp3 = fp0; /* FP3 is A */ 1822 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1823 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1824 1825 if (endflag > 0) { 1826 n = floatx80_to_int32(fp2, status); 1827 goto sincont; 1828 } 1829 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1830 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1831 goto loop; 1832 } else { 1833 /* SINSM */ 1834 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ 1835 1836 status->float_rounding_mode = user_rnd_mode; 1837 status->floatx80_rounding_precision = user_rnd_prec; 1838 1839 if (adjn) { 1840 /* COSTINY */ 1841 a = floatx80_sub(fp0, float32_to_floatx80( 1842 make_float32(0x00800000), status), 1843 status); 1844 } else { 1845 /* SINTINY */ 1846 a = floatx80_move(a, status); 1847 } 1848 float_raise(float_flag_inexact, status); 1849 1850 return a; 1851 } 1852 } else { 1853 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1854 make_float64(0x3FE45F306DC9C883), status), 1855 status); /* X*2/PI */ 1856 1857 n = floatx80_to_int32(fp1, status); 1858 j = 32 + n; 1859 1860 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1861 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1862 status); /* FP0 IS R = (X-Y1)-Y2 */ 1863 1864 sincont: 1865 if ((n + adjn) & 1) { 1866 /* COSPOLY */ 1867 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1868 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1869 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1870 status); /* B8 */ 1871 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1872 status); /* B7 */ 1873 1874 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1875 xExp = extractFloatx80Exp(fp0); 1876 xSig = extractFloatx80Frac(fp0); 1877 1878 if (((n + adjn) >> 1) & 1) { 1879 xSign ^= 1; 1880 posneg1 = make_float32(0xBF800000); /* -1 */ 1881 } else { 1882 xSign ^= 0; 1883 posneg1 = make_float32(0x3F800000); /* 1 */ 1884 } /* X IS NOW R'= SGN*R */ 1885 1886 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1887 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1888 fp2 = floatx80_add(fp2, float64_to_floatx80( 1889 make_float64(0x3E21EED90612C972), status), 1890 status); /* B6+TB8 */ 1891 fp3 = floatx80_add(fp3, float64_to_floatx80( 1892 make_float64(0xBE927E4FB79D9FCF), status), 1893 status); /* B5+TB7 */ 1894 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1895 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1896 fp2 = floatx80_add(fp2, float64_to_floatx80( 1897 make_float64(0x3EFA01A01A01D423), status), 1898 status); /* B4+T(B6+TB8) */ 1899 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1900 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1901 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1902 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1903 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1904 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1905 fp1 = floatx80_add(fp1, float32_to_floatx80( 1906 make_float32(0xBF000000), status), 1907 status); /* B1+T(B3+T(B5+TB7)) */ 1908 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1909 fp0 = floatx80_add(fp0, fp1, status); 1910 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ 1911 1912 x = packFloatx80(xSign, xExp, xSig); 1913 fp0 = floatx80_mul(fp0, x, status); 1914 1915 status->float_rounding_mode = user_rnd_mode; 1916 status->floatx80_rounding_precision = user_rnd_prec; 1917 1918 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1919 1920 float_raise(float_flag_inexact, status); 1921 1922 return a; 1923 } else { 1924 /* SINPOLY */ 1925 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1926 xExp = extractFloatx80Exp(fp0); 1927 xSig = extractFloatx80Frac(fp0); 1928 1929 xSign ^= ((n + adjn) >> 1) & 1; /* X IS NOW R'= SGN*R */ 1930 1931 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1932 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1933 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1934 status); /* A7 */ 1935 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1936 status); /* A6 */ 1937 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1938 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1939 fp3 = floatx80_add(fp3, float64_to_floatx80( 1940 make_float64(0xBE5AE6452A118AE4), status), 1941 status); /* A5+T*A7 */ 1942 fp2 = floatx80_add(fp2, float64_to_floatx80( 1943 make_float64(0x3EC71DE3A5341531), status), 1944 status); /* A4+T*A6 */ 1945 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1946 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1947 fp3 = floatx80_add(fp3, float64_to_floatx80( 1948 make_float64(0xBF2A01A01A018B59), status), 1949 status); /* A3+T(A5+TA7) */ 1950 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1951 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1952 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1953 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1954 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1955 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1956 fp1 = floatx80_add(fp1, fp2, status); 1957 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ 1958 1959 x = packFloatx80(xSign, xExp, xSig); 1960 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1961 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1962 1963 status->float_rounding_mode = user_rnd_mode; 1964 status->floatx80_rounding_precision = user_rnd_prec; 1965 1966 a = floatx80_add(fp0, x, status); 1967 1968 float_raise(float_flag_inexact, status); 1969 1970 return a; 1971 } 1972 } 1973 } 1974 1975 /*---------------------------------------------------------------------------- 1976 | Arc tangent 1977 *----------------------------------------------------------------------------*/ 1978 1979 floatx80 floatx80_atan(floatx80 a, float_status *status) 1980 { 1981 flag aSign; 1982 int32_t aExp; 1983 uint64_t aSig; 1984 1985 int8_t user_rnd_mode, user_rnd_prec; 1986 1987 int32_t compact, tbl_index; 1988 floatx80 fp0, fp1, fp2, fp3, xsave; 1989 1990 aSig = extractFloatx80Frac(a); 1991 aExp = extractFloatx80Exp(a); 1992 aSign = extractFloatx80Sign(a); 1993 1994 if (aExp == 0x7FFF) { 1995 if ((uint64_t) (aSig << 1)) { 1996 return propagateFloatx80NaNOneArg(a, status); 1997 } 1998 a = packFloatx80(aSign, piby2_exp, pi_sig); 1999 float_raise(float_flag_inexact, status); 2000 return floatx80_move(a, status); 2001 } 2002 2003 if (aExp == 0 && aSig == 0) { 2004 return packFloatx80(aSign, 0, 0); 2005 } 2006 2007 compact = floatx80_make_compact(aExp, aSig); 2008 2009 user_rnd_mode = status->float_rounding_mode; 2010 user_rnd_prec = status->floatx80_rounding_precision; 2011 status->float_rounding_mode = float_round_nearest_even; 2012 status->floatx80_rounding_precision = 80; 2013 2014 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { 2015 /* |X| >= 16 or |X| < 1/16 */ 2016 if (compact > 0x3FFF8000) { /* |X| >= 16 */ 2017 if (compact > 0x40638000) { /* |X| > 2^(100) */ 2018 fp0 = packFloatx80(aSign, piby2_exp, pi_sig); 2019 fp1 = packFloatx80(aSign, 0x0001, one_sig); 2020 2021 status->float_rounding_mode = user_rnd_mode; 2022 status->floatx80_rounding_precision = user_rnd_prec; 2023 2024 a = floatx80_sub(fp0, fp1, status); 2025 2026 float_raise(float_flag_inexact, status); 2027 2028 return a; 2029 } else { 2030 fp0 = a; 2031 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ 2032 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ 2033 xsave = fp1; 2034 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ 2035 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2036 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), 2037 status); /* C5 */ 2038 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), 2039 status); /* C4 */ 2040 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ 2041 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ 2042 fp3 = floatx80_add(fp3, float64_to_floatx80( 2043 make_float64(0xBFC24924827107B8), status), 2044 status); /* C3+Z*C5 */ 2045 fp2 = floatx80_add(fp2, float64_to_floatx80( 2046 make_float64(0x3FC999999996263E), status), 2047 status); /* C2+Z*C4 */ 2048 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ 2049 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ 2050 fp1 = floatx80_add(fp1, float64_to_floatx80( 2051 make_float64(0xBFD5555555555536), status), 2052 status); /* C1+Z*(C3+Z*C5) */ 2053 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ 2054 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ 2055 fp1 = floatx80_add(fp1, fp2, status); 2056 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ 2057 fp0 = floatx80_mul(fp0, fp1, status); 2058 fp0 = floatx80_add(fp0, xsave, status); 2059 fp1 = packFloatx80(aSign, piby2_exp, pi_sig); 2060 2061 status->float_rounding_mode = user_rnd_mode; 2062 status->floatx80_rounding_precision = user_rnd_prec; 2063 2064 a = floatx80_add(fp0, fp1, status); 2065 2066 float_raise(float_flag_inexact, status); 2067 2068 return a; 2069 } 2070 } else { /* |X| < 1/16 */ 2071 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ 2072 status->float_rounding_mode = user_rnd_mode; 2073 status->floatx80_rounding_precision = user_rnd_prec; 2074 2075 a = floatx80_move(a, status); 2076 2077 float_raise(float_flag_inexact, status); 2078 2079 return a; 2080 } else { 2081 fp0 = a; 2082 xsave = a; 2083 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ 2084 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2085 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), 2086 status); /* B6 */ 2087 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), 2088 status); /* B5 */ 2089 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ 2090 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ 2091 fp2 = floatx80_add(fp2, float64_to_floatx80( 2092 make_float64(0x3FBC71C646940220), status), 2093 status); /* B4+Z*B6 */ 2094 fp3 = floatx80_add(fp3, float64_to_floatx80( 2095 make_float64(0xBFC24924921872F9), 2096 status), status); /* B3+Z*B5 */ 2097 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ 2098 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ 2099 fp2 = floatx80_add(fp2, float64_to_floatx80( 2100 make_float64(0x3FC9999999998FA9), status), 2101 status); /* B2+Z*(B4+Z*B6) */ 2102 fp1 = floatx80_add(fp1, float64_to_floatx80( 2103 make_float64(0xBFD5555555555555), status), 2104 status); /* B1+Z*(B3+Z*B5) */ 2105 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ 2106 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ 2107 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ 2108 fp1 = floatx80_add(fp1, fp2, status); 2109 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ 2110 fp0 = floatx80_mul(fp0, fp1, status); 2111 2112 status->float_rounding_mode = user_rnd_mode; 2113 status->floatx80_rounding_precision = user_rnd_prec; 2114 2115 a = floatx80_add(fp0, xsave, status); 2116 2117 float_raise(float_flag_inexact, status); 2118 2119 return a; 2120 } 2121 } 2122 } else { 2123 aSig &= LIT64(0xF800000000000000); 2124 aSig |= LIT64(0x0400000000000000); 2125 xsave = packFloatx80(aSign, aExp, aSig); /* F */ 2126 fp0 = a; 2127 fp1 = a; /* X */ 2128 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ 2129 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ 2130 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ 2131 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ 2132 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ 2133 2134 tbl_index = compact; 2135 2136 tbl_index &= 0x7FFF0000; 2137 tbl_index -= 0x3FFB0000; 2138 tbl_index >>= 1; 2139 tbl_index += compact & 0x00007800; 2140 tbl_index >>= 11; 2141 2142 fp3 = atan_tbl[tbl_index]; 2143 2144 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ 2145 2146 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ 2147 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), 2148 status); /* A3 */ 2149 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ 2150 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ 2151 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ 2152 fp2 = floatx80_add(fp2, float64_to_floatx80( 2153 make_float64(0x4002AC6934A26DB3), status), 2154 status); /* A2+V*(A3+V) */ 2155 fp1 = floatx80_mul(fp1, float64_to_floatx80( 2156 make_float64(0xBFC2476F4E1DA28E), status), 2157 status); /* A1+U*V */ 2158 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ 2159 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ 2160 2161 status->float_rounding_mode = user_rnd_mode; 2162 status->floatx80_rounding_precision = user_rnd_prec; 2163 2164 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ 2165 2166 float_raise(float_flag_inexact, status); 2167 2168 return a; 2169 } 2170 } 2171 2172 /*---------------------------------------------------------------------------- 2173 | Arc sine 2174 *----------------------------------------------------------------------------*/ 2175 2176 floatx80 floatx80_asin(floatx80 a, float_status *status) 2177 { 2178 flag aSign; 2179 int32_t aExp; 2180 uint64_t aSig; 2181 2182 int8_t user_rnd_mode, user_rnd_prec; 2183 2184 int32_t compact; 2185 floatx80 fp0, fp1, fp2, one; 2186 2187 aSig = extractFloatx80Frac(a); 2188 aExp = extractFloatx80Exp(a); 2189 aSign = extractFloatx80Sign(a); 2190 2191 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2192 return propagateFloatx80NaNOneArg(a, status); 2193 } 2194 2195 if (aExp == 0 && aSig == 0) { 2196 return packFloatx80(aSign, 0, 0); 2197 } 2198 2199 compact = floatx80_make_compact(aExp, aSig); 2200 2201 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2202 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2203 float_raise(float_flag_inexact, status); 2204 a = packFloatx80(aSign, piby2_exp, pi_sig); 2205 return floatx80_move(a, status); 2206 } else { /* |X| > 1 */ 2207 float_raise(float_flag_invalid, status); 2208 return floatx80_default_nan(status); 2209 } 2210 2211 } /* |X| < 1 */ 2212 2213 user_rnd_mode = status->float_rounding_mode; 2214 user_rnd_prec = status->floatx80_rounding_precision; 2215 status->float_rounding_mode = float_round_nearest_even; 2216 status->floatx80_rounding_precision = 80; 2217 2218 one = packFloatx80(0, one_exp, one_sig); 2219 fp0 = a; 2220 2221 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ 2222 fp2 = floatx80_add(one, fp0, status); /* 1 + X */ 2223 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ 2224 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ 2225 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ 2226 2227 status->float_rounding_mode = user_rnd_mode; 2228 status->floatx80_rounding_precision = user_rnd_prec; 2229 2230 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ 2231 2232 float_raise(float_flag_inexact, status); 2233 2234 return a; 2235 } 2236 2237 /*---------------------------------------------------------------------------- 2238 | Arc cosine 2239 *----------------------------------------------------------------------------*/ 2240 2241 floatx80 floatx80_acos(floatx80 a, float_status *status) 2242 { 2243 flag aSign; 2244 int32_t aExp; 2245 uint64_t aSig; 2246 2247 int8_t user_rnd_mode, user_rnd_prec; 2248 2249 int32_t compact; 2250 floatx80 fp0, fp1, one; 2251 2252 aSig = extractFloatx80Frac(a); 2253 aExp = extractFloatx80Exp(a); 2254 aSign = extractFloatx80Sign(a); 2255 2256 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2257 return propagateFloatx80NaNOneArg(a, status); 2258 } 2259 if (aExp == 0 && aSig == 0) { 2260 float_raise(float_flag_inexact, status); 2261 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2262 piby2_exp, pi_sig, 0, status); 2263 } 2264 2265 compact = floatx80_make_compact(aExp, aSig); 2266 2267 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2268 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2269 if (aSign) { /* X == -1 */ 2270 a = packFloatx80(0, pi_exp, pi_sig); 2271 float_raise(float_flag_inexact, status); 2272 return floatx80_move(a, status); 2273 } else { /* X == +1 */ 2274 return packFloatx80(0, 0, 0); 2275 } 2276 } else { /* |X| > 1 */ 2277 float_raise(float_flag_invalid, status); 2278 return floatx80_default_nan(status); 2279 } 2280 } /* |X| < 1 */ 2281 2282 user_rnd_mode = status->float_rounding_mode; 2283 user_rnd_prec = status->floatx80_rounding_precision; 2284 status->float_rounding_mode = float_round_nearest_even; 2285 status->floatx80_rounding_precision = 80; 2286 2287 one = packFloatx80(0, one_exp, one_sig); 2288 fp0 = a; 2289 2290 fp1 = floatx80_add(one, fp0, status); /* 1 + X */ 2291 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ 2292 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ 2293 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ 2294 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ 2295 2296 status->float_rounding_mode = user_rnd_mode; 2297 status->floatx80_rounding_precision = user_rnd_prec; 2298 2299 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ 2300 2301 float_raise(float_flag_inexact, status); 2302 2303 return a; 2304 } 2305 2306 /*---------------------------------------------------------------------------- 2307 | Hyperbolic arc tangent 2308 *----------------------------------------------------------------------------*/ 2309 2310 floatx80 floatx80_atanh(floatx80 a, float_status *status) 2311 { 2312 flag aSign; 2313 int32_t aExp; 2314 uint64_t aSig; 2315 2316 int8_t user_rnd_mode, user_rnd_prec; 2317 2318 int32_t compact; 2319 floatx80 fp0, fp1, fp2, one; 2320 2321 aSig = extractFloatx80Frac(a); 2322 aExp = extractFloatx80Exp(a); 2323 aSign = extractFloatx80Sign(a); 2324 2325 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2326 return propagateFloatx80NaNOneArg(a, status); 2327 } 2328 2329 if (aExp == 0 && aSig == 0) { 2330 return packFloatx80(aSign, 0, 0); 2331 } 2332 2333 compact = floatx80_make_compact(aExp, aSig); 2334 2335 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2336 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2337 float_raise(float_flag_divbyzero, status); 2338 return packFloatx80(aSign, floatx80_infinity.high, 2339 floatx80_infinity.low); 2340 } else { /* |X| > 1 */ 2341 float_raise(float_flag_invalid, status); 2342 return floatx80_default_nan(status); 2343 } 2344 } /* |X| < 1 */ 2345 2346 user_rnd_mode = status->float_rounding_mode; 2347 user_rnd_prec = status->floatx80_rounding_precision; 2348 status->float_rounding_mode = float_round_nearest_even; 2349 status->floatx80_rounding_precision = 80; 2350 2351 one = packFloatx80(0, one_exp, one_sig); 2352 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ 2353 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ 2354 fp1 = packFloatx80(1, aExp, aSig); /* -Y */ 2355 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ 2356 fp1 = floatx80_add(fp1, one, status); /* 1-Y */ 2357 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ 2358 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ 2359 2360 status->float_rounding_mode = user_rnd_mode; 2361 status->floatx80_rounding_precision = user_rnd_prec; 2362 2363 a = floatx80_mul(fp0, fp2, 2364 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ 2365 2366 float_raise(float_flag_inexact, status); 2367 2368 return a; 2369 } 2370 2371 /*---------------------------------------------------------------------------- 2372 | e to x minus 1 2373 *----------------------------------------------------------------------------*/ 2374 2375 floatx80 floatx80_etoxm1(floatx80 a, float_status *status) 2376 { 2377 flag aSign; 2378 int32_t aExp; 2379 uint64_t aSig; 2380 2381 int8_t user_rnd_mode, user_rnd_prec; 2382 2383 int32_t compact, n, j, m, m1; 2384 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; 2385 2386 aSig = extractFloatx80Frac(a); 2387 aExp = extractFloatx80Exp(a); 2388 aSign = extractFloatx80Sign(a); 2389 2390 if (aExp == 0x7FFF) { 2391 if ((uint64_t) (aSig << 1)) { 2392 return propagateFloatx80NaNOneArg(a, status); 2393 } 2394 if (aSign) { 2395 return packFloatx80(aSign, one_exp, one_sig); 2396 } 2397 return packFloatx80(0, floatx80_infinity.high, 2398 floatx80_infinity.low); 2399 } 2400 2401 if (aExp == 0 && aSig == 0) { 2402 return packFloatx80(aSign, 0, 0); 2403 } 2404 2405 user_rnd_mode = status->float_rounding_mode; 2406 user_rnd_prec = status->floatx80_rounding_precision; 2407 status->float_rounding_mode = float_round_nearest_even; 2408 status->floatx80_rounding_precision = 80; 2409 2410 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ 2411 compact = floatx80_make_compact(aExp, aSig); 2412 2413 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ 2414 fp0 = a; 2415 fp1 = a; 2416 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2417 make_float32(0x42B8AA3B), status), 2418 status); /* 64/log2 * X */ 2419 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 2420 fp0 = int32_to_floatx80(n, status); 2421 2422 j = n & 0x3F; /* J = N mod 64 */ 2423 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 2424 if (n < 0 && j) { 2425 /* arithmetic right shift is division and 2426 * round towards minus infinity 2427 */ 2428 m--; 2429 } 2430 m1 = -m; 2431 /*m += 0x3FFF; // biased exponent of 2^(M) */ 2432 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ 2433 2434 fp2 = fp0; /* N */ 2435 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2436 make_float32(0xBC317218), status), 2437 status); /* N * L1, L1 = lead(-log2/64) */ 2438 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 2439 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 2440 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 2441 fp0 = floatx80_add(fp0, fp2, status); /* R */ 2442 2443 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 2444 fp2 = float32_to_floatx80(make_float32(0x3950097B), 2445 status); /* A6 */ 2446 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ 2447 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), 2448 status), fp1, status); /* fp3 is S*A5 */ 2449 fp2 = floatx80_add(fp2, float64_to_floatx80( 2450 make_float64(0x3F81111111174385), status), 2451 status); /* fp2 IS A4+S*A6 */ 2452 fp3 = floatx80_add(fp3, float64_to_floatx80( 2453 make_float64(0x3FA5555555554F5A), status), 2454 status); /* fp3 is A3+S*A5 */ 2455 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ 2456 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ 2457 fp2 = floatx80_add(fp2, float64_to_floatx80( 2458 make_float64(0x3FC5555555555555), status), 2459 status); /* fp2 IS A2+S*(A4+S*A6) */ 2460 fp3 = floatx80_add(fp3, float32_to_floatx80( 2461 make_float32(0x3F000000), status), 2462 status); /* fp3 IS A1+S*(A3+S*A5) */ 2463 fp2 = floatx80_mul(fp2, fp1, 2464 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ 2465 fp1 = floatx80_mul(fp1, fp3, 2466 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ 2467 fp2 = floatx80_mul(fp2, fp0, 2468 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ 2469 fp0 = floatx80_add(fp0, fp1, 2470 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ 2471 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 2472 2473 fp0 = floatx80_mul(fp0, exp_tbl[j], 2474 status); /* 2^(J/64)*(Exp(R)-1) */ 2475 2476 if (m >= 64) { 2477 fp1 = float32_to_floatx80(exp_tbl2[j], status); 2478 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2479 fp1 = floatx80_add(fp1, onebysc, status); 2480 fp0 = floatx80_add(fp0, fp1, status); 2481 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2482 } else if (m < -3) { 2483 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2484 status), status); 2485 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2486 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2487 fp0 = floatx80_add(fp0, onebysc, status); 2488 } else { /* -3 <= m <= 63 */ 2489 fp1 = exp_tbl[j]; 2490 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2491 status), status); 2492 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2493 fp1 = floatx80_add(fp1, onebysc, status); 2494 fp0 = floatx80_add(fp0, fp1, status); 2495 } 2496 2497 sc = packFloatx80(0, m + 0x3FFF, one_sig); 2498 2499 status->float_rounding_mode = user_rnd_mode; 2500 status->floatx80_rounding_precision = user_rnd_prec; 2501 2502 a = floatx80_mul(fp0, sc, status); 2503 2504 float_raise(float_flag_inexact, status); 2505 2506 return a; 2507 } else { /* |X| > 70 log2 */ 2508 if (aSign) { 2509 fp0 = float32_to_floatx80(make_float32(0xBF800000), 2510 status); /* -1 */ 2511 2512 status->float_rounding_mode = user_rnd_mode; 2513 status->floatx80_rounding_precision = user_rnd_prec; 2514 2515 a = floatx80_add(fp0, float32_to_floatx80( 2516 make_float32(0x00800000), status), 2517 status); /* -1 + 2^(-126) */ 2518 2519 float_raise(float_flag_inexact, status); 2520 2521 return a; 2522 } else { 2523 status->float_rounding_mode = user_rnd_mode; 2524 status->floatx80_rounding_precision = user_rnd_prec; 2525 2526 return floatx80_etox(a, status); 2527 } 2528 } 2529 } else { /* |X| < 1/4 */ 2530 if (aExp >= 0x3FBE) { 2531 fp0 = a; 2532 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ 2533 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), 2534 status); /* B12 */ 2535 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ 2536 fp2 = float32_to_floatx80(make_float32(0x310F8290), 2537 status); /* B11 */ 2538 fp1 = floatx80_add(fp1, float32_to_floatx80( 2539 make_float32(0x32D73220), status), 2540 status); /* B10 */ 2541 fp2 = floatx80_mul(fp2, fp0, status); 2542 fp1 = floatx80_mul(fp1, fp0, status); 2543 fp2 = floatx80_add(fp2, float32_to_floatx80( 2544 make_float32(0x3493F281), status), 2545 status); /* B9 */ 2546 fp1 = floatx80_add(fp1, float64_to_floatx80( 2547 make_float64(0x3EC71DE3A5774682), status), 2548 status); /* B8 */ 2549 fp2 = floatx80_mul(fp2, fp0, status); 2550 fp1 = floatx80_mul(fp1, fp0, status); 2551 fp2 = floatx80_add(fp2, float64_to_floatx80( 2552 make_float64(0x3EFA01A019D7CB68), status), 2553 status); /* B7 */ 2554 fp1 = floatx80_add(fp1, float64_to_floatx80( 2555 make_float64(0x3F2A01A01A019DF3), status), 2556 status); /* B6 */ 2557 fp2 = floatx80_mul(fp2, fp0, status); 2558 fp1 = floatx80_mul(fp1, fp0, status); 2559 fp2 = floatx80_add(fp2, float64_to_floatx80( 2560 make_float64(0x3F56C16C16C170E2), status), 2561 status); /* B5 */ 2562 fp1 = floatx80_add(fp1, float64_to_floatx80( 2563 make_float64(0x3F81111111111111), status), 2564 status); /* B4 */ 2565 fp2 = floatx80_mul(fp2, fp0, status); 2566 fp1 = floatx80_mul(fp1, fp0, status); 2567 fp2 = floatx80_add(fp2, float64_to_floatx80( 2568 make_float64(0x3FA5555555555555), status), 2569 status); /* B3 */ 2570 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); 2571 fp1 = floatx80_add(fp1, fp3, status); /* B2 */ 2572 fp2 = floatx80_mul(fp2, fp0, status); 2573 fp1 = floatx80_mul(fp1, fp0, status); 2574 2575 fp2 = floatx80_mul(fp2, fp0, status); 2576 fp1 = floatx80_mul(fp1, a, status); 2577 2578 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2579 make_float32(0x3F000000), status), 2580 status); /* S*B1 */ 2581 fp1 = floatx80_add(fp1, fp2, status); /* Q */ 2582 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ 2583 2584 status->float_rounding_mode = user_rnd_mode; 2585 status->floatx80_rounding_precision = user_rnd_prec; 2586 2587 a = floatx80_add(fp0, a, status); 2588 2589 float_raise(float_flag_inexact, status); 2590 2591 return a; 2592 } else { /* |X| < 2^(-65) */ 2593 sc = packFloatx80(1, 1, one_sig); 2594 fp0 = a; 2595 2596 if (aExp < 0x0033) { /* |X| < 2^(-16382) */ 2597 fp0 = floatx80_mul(fp0, float64_to_floatx80( 2598 make_float64(0x48B0000000000000), status), 2599 status); 2600 fp0 = floatx80_add(fp0, sc, status); 2601 2602 status->float_rounding_mode = user_rnd_mode; 2603 status->floatx80_rounding_precision = user_rnd_prec; 2604 2605 a = floatx80_mul(fp0, float64_to_floatx80( 2606 make_float64(0x3730000000000000), status), 2607 status); 2608 } else { 2609 status->float_rounding_mode = user_rnd_mode; 2610 status->floatx80_rounding_precision = user_rnd_prec; 2611 2612 a = floatx80_add(fp0, sc, status); 2613 } 2614 2615 float_raise(float_flag_inexact, status); 2616 2617 return a; 2618 } 2619 } 2620 } 2621 2622 /*---------------------------------------------------------------------------- 2623 | Hyperbolic tangent 2624 *----------------------------------------------------------------------------*/ 2625 2626 floatx80 floatx80_tanh(floatx80 a, float_status *status) 2627 { 2628 flag aSign, vSign; 2629 int32_t aExp, vExp; 2630 uint64_t aSig, vSig; 2631 2632 int8_t user_rnd_mode, user_rnd_prec; 2633 2634 int32_t compact; 2635 floatx80 fp0, fp1; 2636 uint32_t sign; 2637 2638 aSig = extractFloatx80Frac(a); 2639 aExp = extractFloatx80Exp(a); 2640 aSign = extractFloatx80Sign(a); 2641 2642 if (aExp == 0x7FFF) { 2643 if ((uint64_t) (aSig << 1)) { 2644 return propagateFloatx80NaNOneArg(a, status); 2645 } 2646 return packFloatx80(aSign, one_exp, one_sig); 2647 } 2648 2649 if (aExp == 0 && aSig == 0) { 2650 return packFloatx80(aSign, 0, 0); 2651 } 2652 2653 user_rnd_mode = status->float_rounding_mode; 2654 user_rnd_prec = status->floatx80_rounding_precision; 2655 status->float_rounding_mode = float_round_nearest_even; 2656 status->floatx80_rounding_precision = 80; 2657 2658 compact = floatx80_make_compact(aExp, aSig); 2659 2660 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { 2661 /* TANHBORS */ 2662 if (compact < 0x3FFF8000) { 2663 /* TANHSM */ 2664 status->float_rounding_mode = user_rnd_mode; 2665 status->floatx80_rounding_precision = user_rnd_prec; 2666 2667 a = floatx80_move(a, status); 2668 2669 float_raise(float_flag_inexact, status); 2670 2671 return a; 2672 } else { 2673 if (compact > 0x40048AA1) { 2674 /* TANHHUGE */ 2675 sign = 0x3F800000; 2676 sign |= aSign ? 0x80000000 : 0x00000000; 2677 fp0 = float32_to_floatx80(make_float32(sign), status); 2678 sign &= 0x80000000; 2679 sign ^= 0x80800000; /* -SIGN(X)*EPS */ 2680 2681 status->float_rounding_mode = user_rnd_mode; 2682 status->floatx80_rounding_precision = user_rnd_prec; 2683 2684 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), 2685 status), status); 2686 2687 float_raise(float_flag_inexact, status); 2688 2689 return a; 2690 } else { 2691 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2692 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ 2693 fp0 = floatx80_add(fp0, float32_to_floatx80( 2694 make_float32(0x3F800000), 2695 status), status); /* EXP(Y)+1 */ 2696 sign = aSign ? 0x80000000 : 0x00000000; 2697 fp1 = floatx80_div(float32_to_floatx80(make_float32( 2698 sign ^ 0xC0000000), status), fp0, 2699 status); /* -SIGN(X)*2 / [EXP(Y)+1] */ 2700 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), 2701 status); /* SIGN */ 2702 2703 status->float_rounding_mode = user_rnd_mode; 2704 status->floatx80_rounding_precision = user_rnd_prec; 2705 2706 a = floatx80_add(fp1, fp0, status); 2707 2708 float_raise(float_flag_inexact, status); 2709 2710 return a; 2711 } 2712 } 2713 } else { /* 2**(-40) < |X| < (5/2)LOG2 */ 2714 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2715 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2716 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), 2717 status), 2718 status); /* Z+2 */ 2719 2720 vSign = extractFloatx80Sign(fp1); 2721 vExp = extractFloatx80Exp(fp1); 2722 vSig = extractFloatx80Frac(fp1); 2723 2724 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); 2725 2726 status->float_rounding_mode = user_rnd_mode; 2727 status->floatx80_rounding_precision = user_rnd_prec; 2728 2729 a = floatx80_div(fp0, fp1, status); 2730 2731 float_raise(float_flag_inexact, status); 2732 2733 return a; 2734 } 2735 } 2736 2737 /*---------------------------------------------------------------------------- 2738 | Hyperbolic sine 2739 *----------------------------------------------------------------------------*/ 2740 2741 floatx80 floatx80_sinh(floatx80 a, float_status *status) 2742 { 2743 flag aSign; 2744 int32_t aExp; 2745 uint64_t aSig; 2746 2747 int8_t user_rnd_mode, user_rnd_prec; 2748 2749 int32_t compact; 2750 floatx80 fp0, fp1, fp2; 2751 float32 fact; 2752 2753 aSig = extractFloatx80Frac(a); 2754 aExp = extractFloatx80Exp(a); 2755 aSign = extractFloatx80Sign(a); 2756 2757 if (aExp == 0x7FFF) { 2758 if ((uint64_t) (aSig << 1)) { 2759 return propagateFloatx80NaNOneArg(a, status); 2760 } 2761 return packFloatx80(aSign, floatx80_infinity.high, 2762 floatx80_infinity.low); 2763 } 2764 2765 if (aExp == 0 && aSig == 0) { 2766 return packFloatx80(aSign, 0, 0); 2767 } 2768 2769 user_rnd_mode = status->float_rounding_mode; 2770 user_rnd_prec = status->floatx80_rounding_precision; 2771 status->float_rounding_mode = float_round_nearest_even; 2772 status->floatx80_rounding_precision = 80; 2773 2774 compact = floatx80_make_compact(aExp, aSig); 2775 2776 if (compact > 0x400CB167) { 2777 /* SINHBIG */ 2778 if (compact > 0x400CB2B3) { 2779 status->float_rounding_mode = user_rnd_mode; 2780 status->floatx80_rounding_precision = user_rnd_prec; 2781 2782 return roundAndPackFloatx80(status->floatx80_rounding_precision, 2783 aSign, 0x8000, aSig, 0, status); 2784 } else { 2785 fp0 = floatx80_abs(a); /* Y = |X| */ 2786 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2787 make_float64(0x40C62D38D3D64634), status), 2788 status); /* (|X|-16381LOG2_LEAD) */ 2789 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2790 make_float64(0x3D6F90AEB1E75CC7), status), 2791 status); /* |X| - 16381 LOG2, ACCURATE */ 2792 fp0 = floatx80_etox(fp0, status); 2793 fp2 = packFloatx80(aSign, 0x7FFB, one_sig); 2794 2795 status->float_rounding_mode = user_rnd_mode; 2796 status->floatx80_rounding_precision = user_rnd_prec; 2797 2798 a = floatx80_mul(fp0, fp2, status); 2799 2800 float_raise(float_flag_inexact, status); 2801 2802 return a; 2803 } 2804 } else { /* |X| < 16380 LOG2 */ 2805 fp0 = floatx80_abs(a); /* Y = |X| */ 2806 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2807 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 2808 status), status); /* 1+Z */ 2809 fp2 = fp0; 2810 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ 2811 fp0 = floatx80_add(fp0, fp2, status); 2812 2813 fact = packFloat32(aSign, 0x7E, 0); 2814 2815 status->float_rounding_mode = user_rnd_mode; 2816 status->floatx80_rounding_precision = user_rnd_prec; 2817 2818 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); 2819 2820 float_raise(float_flag_inexact, status); 2821 2822 return a; 2823 } 2824 } 2825 2826 /*---------------------------------------------------------------------------- 2827 | Hyperbolic cosine 2828 *----------------------------------------------------------------------------*/ 2829 2830 floatx80 floatx80_cosh(floatx80 a, float_status *status) 2831 { 2832 int32_t aExp; 2833 uint64_t aSig; 2834 2835 int8_t user_rnd_mode, user_rnd_prec; 2836 2837 int32_t compact; 2838 floatx80 fp0, fp1; 2839 2840 aSig = extractFloatx80Frac(a); 2841 aExp = extractFloatx80Exp(a); 2842 2843 if (aExp == 0x7FFF) { 2844 if ((uint64_t) (aSig << 1)) { 2845 return propagateFloatx80NaNOneArg(a, status); 2846 } 2847 return packFloatx80(0, floatx80_infinity.high, 2848 floatx80_infinity.low); 2849 } 2850 2851 if (aExp == 0 && aSig == 0) { 2852 return packFloatx80(0, one_exp, one_sig); 2853 } 2854 2855 user_rnd_mode = status->float_rounding_mode; 2856 user_rnd_prec = status->floatx80_rounding_precision; 2857 status->float_rounding_mode = float_round_nearest_even; 2858 status->floatx80_rounding_precision = 80; 2859 2860 compact = floatx80_make_compact(aExp, aSig); 2861 2862 if (compact > 0x400CB167) { 2863 if (compact > 0x400CB2B3) { 2864 status->float_rounding_mode = user_rnd_mode; 2865 status->floatx80_rounding_precision = user_rnd_prec; 2866 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2867 0x8000, one_sig, 0, status); 2868 } else { 2869 fp0 = packFloatx80(0, aExp, aSig); 2870 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2871 make_float64(0x40C62D38D3D64634), status), 2872 status); 2873 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2874 make_float64(0x3D6F90AEB1E75CC7), status), 2875 status); 2876 fp0 = floatx80_etox(fp0, status); 2877 fp1 = packFloatx80(0, 0x7FFB, one_sig); 2878 2879 status->float_rounding_mode = user_rnd_mode; 2880 status->floatx80_rounding_precision = user_rnd_prec; 2881 2882 a = floatx80_mul(fp0, fp1, status); 2883 2884 float_raise(float_flag_inexact, status); 2885 2886 return a; 2887 } 2888 } 2889 2890 fp0 = packFloatx80(0, aExp, aSig); /* |X| */ 2891 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ 2892 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), 2893 status), status); /* (1/2)*EXP(|X|) */ 2894 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ 2895 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ 2896 2897 status->float_rounding_mode = user_rnd_mode; 2898 status->floatx80_rounding_precision = user_rnd_prec; 2899 2900 a = floatx80_add(fp0, fp1, status); 2901 2902 float_raise(float_flag_inexact, status); 2903 2904 return a; 2905 } 2906