1 /* 2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, 3 * derived from NetBSD M68040 FPSP functions, 4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic 5 * Package. Those parts of the code (and some later contributions) are 6 * provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file will be taken to be licensed under 14 * the Softfloat-2a license unless specifically indicated otherwise. 15 */ 16 17 /* Portions of this work are licensed under the terms of the GNU GPL, 18 * version 2 or later. See the COPYING file in the top-level directory. 19 */ 20 21 #include "qemu/osdep.h" 22 #include "softfloat.h" 23 #include "fpu/softfloat-macros.h" 24 #include "softfloat_fpsp_tables.h" 25 26 #define pi_exp 0x4000 27 #define piby2_exp 0x3FFF 28 #define pi_sig LIT64(0xc90fdaa22168c235) 29 30 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) 31 { 32 if (floatx80_is_signaling_nan(a, status)) { 33 float_raise(float_flag_invalid, status); 34 } 35 36 if (status->default_nan_mode) { 37 return floatx80_default_nan(status); 38 } 39 40 return floatx80_maybe_silence_nan(a, status); 41 } 42 43 /*---------------------------------------------------------------------------- 44 | Returns the modulo remainder of the extended double-precision floating-point 45 | value `a' with respect to the corresponding value `b'. 46 *----------------------------------------------------------------------------*/ 47 48 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 49 { 50 flag aSign, zSign; 51 int32_t aExp, bExp, expDiff; 52 uint64_t aSig0, aSig1, bSig; 53 uint64_t qTemp, term0, term1; 54 55 aSig0 = extractFloatx80Frac(a); 56 aExp = extractFloatx80Exp(a); 57 aSign = extractFloatx80Sign(a); 58 bSig = extractFloatx80Frac(b); 59 bExp = extractFloatx80Exp(b); 60 61 if (aExp == 0x7FFF) { 62 if ((uint64_t) (aSig0 << 1) 63 || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { 64 return propagateFloatx80NaN(a, b, status); 65 } 66 goto invalid; 67 } 68 if (bExp == 0x7FFF) { 69 if ((uint64_t) (bSig << 1)) { 70 return propagateFloatx80NaN(a, b, status); 71 } 72 return a; 73 } 74 if (bExp == 0) { 75 if (bSig == 0) { 76 invalid: 77 float_raise(float_flag_invalid, status); 78 return floatx80_default_nan(status); 79 } 80 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 81 } 82 if (aExp == 0) { 83 if ((uint64_t) (aSig0 << 1) == 0) { 84 return a; 85 } 86 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 87 } 88 bSig |= LIT64(0x8000000000000000); 89 zSign = aSign; 90 expDiff = aExp - bExp; 91 aSig1 = 0; 92 if (expDiff < 0) { 93 return a; 94 } 95 qTemp = (bSig <= aSig0); 96 if (qTemp) { 97 aSig0 -= bSig; 98 } 99 expDiff -= 64; 100 while (0 < expDiff) { 101 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 102 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 103 mul64To128(bSig, qTemp, &term0, &term1); 104 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 105 shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); 106 } 107 expDiff += 64; 108 if (0 < expDiff) { 109 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 110 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 111 qTemp >>= 64 - expDiff; 112 mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); 113 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 114 shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); 115 while (le128(term0, term1, aSig0, aSig1)) { 116 ++qTemp; 117 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 118 } 119 } 120 return 121 normalizeRoundAndPackFloatx80( 122 80, zSign, bExp + expDiff, aSig0, aSig1, status); 123 } 124 125 /*---------------------------------------------------------------------------- 126 | Returns the mantissa of the extended double-precision floating-point 127 | value `a'. 128 *----------------------------------------------------------------------------*/ 129 130 floatx80 floatx80_getman(floatx80 a, float_status *status) 131 { 132 flag aSign; 133 int32_t aExp; 134 uint64_t aSig; 135 136 aSig = extractFloatx80Frac(a); 137 aExp = extractFloatx80Exp(a); 138 aSign = extractFloatx80Sign(a); 139 140 if (aExp == 0x7FFF) { 141 if ((uint64_t) (aSig << 1)) { 142 return propagateFloatx80NaNOneArg(a , status); 143 } 144 float_raise(float_flag_invalid , status); 145 return floatx80_default_nan(status); 146 } 147 148 if (aExp == 0) { 149 if (aSig == 0) { 150 return packFloatx80(aSign, 0, 0); 151 } 152 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 153 } 154 155 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 156 0x3FFF, aSig, 0, status); 157 } 158 159 /*---------------------------------------------------------------------------- 160 | Returns the exponent of the extended double-precision floating-point 161 | value `a' as an extended double-precision value. 162 *----------------------------------------------------------------------------*/ 163 164 floatx80 floatx80_getexp(floatx80 a, float_status *status) 165 { 166 flag aSign; 167 int32_t aExp; 168 uint64_t aSig; 169 170 aSig = extractFloatx80Frac(a); 171 aExp = extractFloatx80Exp(a); 172 aSign = extractFloatx80Sign(a); 173 174 if (aExp == 0x7FFF) { 175 if ((uint64_t) (aSig << 1)) { 176 return propagateFloatx80NaNOneArg(a , status); 177 } 178 float_raise(float_flag_invalid , status); 179 return floatx80_default_nan(status); 180 } 181 182 if (aExp == 0) { 183 if (aSig == 0) { 184 return packFloatx80(aSign, 0, 0); 185 } 186 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 187 } 188 189 return int32_to_floatx80(aExp - 0x3FFF, status); 190 } 191 192 /*---------------------------------------------------------------------------- 193 | Scales extended double-precision floating-point value in operand `a' by 194 | value `b'. The function truncates the value in the second operand 'b' to 195 | an integral value and adds that value to the exponent of the operand 'a'. 196 | The operation performed according to the IEC/IEEE Standard for Binary 197 | Floating-Point Arithmetic. 198 *----------------------------------------------------------------------------*/ 199 200 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) 201 { 202 flag aSign, bSign; 203 int32_t aExp, bExp, shiftCount; 204 uint64_t aSig, bSig; 205 206 aSig = extractFloatx80Frac(a); 207 aExp = extractFloatx80Exp(a); 208 aSign = extractFloatx80Sign(a); 209 bSig = extractFloatx80Frac(b); 210 bExp = extractFloatx80Exp(b); 211 bSign = extractFloatx80Sign(b); 212 213 if (bExp == 0x7FFF) { 214 if ((uint64_t) (bSig << 1) || 215 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { 216 return propagateFloatx80NaN(a, b, status); 217 } 218 float_raise(float_flag_invalid , status); 219 return floatx80_default_nan(status); 220 } 221 if (aExp == 0x7FFF) { 222 if ((uint64_t) (aSig << 1)) { 223 return propagateFloatx80NaN(a, b, status); 224 } 225 return packFloatx80(aSign, floatx80_infinity.high, 226 floatx80_infinity.low); 227 } 228 if (aExp == 0) { 229 if (aSig == 0) { 230 return packFloatx80(aSign, 0, 0); 231 } 232 if (bExp < 0x3FFF) { 233 return a; 234 } 235 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 236 } 237 238 if (bExp < 0x3FFF) { 239 return a; 240 } 241 242 if (0x400F < bExp) { 243 aExp = bSign ? -0x6001 : 0xE000; 244 return roundAndPackFloatx80(status->floatx80_rounding_precision, 245 aSign, aExp, aSig, 0, status); 246 } 247 248 shiftCount = 0x403E - bExp; 249 bSig >>= shiftCount; 250 aExp = bSign ? (aExp - bSig) : (aExp + bSig); 251 252 return roundAndPackFloatx80(status->floatx80_rounding_precision, 253 aSign, aExp, aSig, 0, status); 254 } 255 256 floatx80 floatx80_move(floatx80 a, float_status *status) 257 { 258 flag aSign; 259 int32_t aExp; 260 uint64_t aSig; 261 262 aSig = extractFloatx80Frac(a); 263 aExp = extractFloatx80Exp(a); 264 aSign = extractFloatx80Sign(a); 265 266 if (aExp == 0x7FFF) { 267 if ((uint64_t)(aSig << 1)) { 268 return propagateFloatx80NaNOneArg(a, status); 269 } 270 return a; 271 } 272 if (aExp == 0) { 273 if (aSig == 0) { 274 return a; 275 } 276 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 277 aSign, aExp, aSig, 0, status); 278 } 279 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 280 aExp, aSig, 0, status); 281 } 282 283 /*---------------------------------------------------------------------------- 284 | Algorithms for transcendental functions supported by MC68881 and MC68882 285 | mathematical coprocessors. The functions are derived from FPSP library. 286 *----------------------------------------------------------------------------*/ 287 288 #define one_exp 0x3FFF 289 #define one_sig LIT64(0x8000000000000000) 290 291 /*---------------------------------------------------------------------------- 292 | Function for compactifying extended double-precision floating point values. 293 *----------------------------------------------------------------------------*/ 294 295 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) 296 { 297 return (aExp << 16) | (aSig >> 48); 298 } 299 300 /*---------------------------------------------------------------------------- 301 | Log base e of x plus 1 302 *----------------------------------------------------------------------------*/ 303 304 floatx80 floatx80_lognp1(floatx80 a, float_status *status) 305 { 306 flag aSign; 307 int32_t aExp; 308 uint64_t aSig, fSig; 309 310 int8_t user_rnd_mode, user_rnd_prec; 311 312 int32_t compact, j, k; 313 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 314 315 aSig = extractFloatx80Frac(a); 316 aExp = extractFloatx80Exp(a); 317 aSign = extractFloatx80Sign(a); 318 319 if (aExp == 0x7FFF) { 320 if ((uint64_t) (aSig << 1)) { 321 propagateFloatx80NaNOneArg(a, status); 322 } 323 if (aSign) { 324 float_raise(float_flag_invalid, status); 325 return floatx80_default_nan(status); 326 } 327 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); 328 } 329 330 if (aExp == 0 && aSig == 0) { 331 return packFloatx80(aSign, 0, 0); 332 } 333 334 if (aSign && aExp >= one_exp) { 335 if (aExp == one_exp && aSig == one_sig) { 336 float_raise(float_flag_divbyzero, status); 337 return packFloatx80(aSign, floatx80_infinity.high, 338 floatx80_infinity.low); 339 } 340 float_raise(float_flag_invalid, status); 341 return floatx80_default_nan(status); 342 } 343 344 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { 345 /* <= min threshold */ 346 float_raise(float_flag_inexact, status); 347 return floatx80_move(a, status); 348 } 349 350 user_rnd_mode = status->float_rounding_mode; 351 user_rnd_prec = status->floatx80_rounding_precision; 352 status->float_rounding_mode = float_round_nearest_even; 353 status->floatx80_rounding_precision = 80; 354 355 compact = floatx80_make_compact(aExp, aSig); 356 357 fp0 = a; /* Z */ 358 fp1 = a; 359 360 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 361 status), status); /* X = (1+Z) */ 362 363 aExp = extractFloatx80Exp(fp0); 364 aSig = extractFloatx80Frac(fp0); 365 366 compact = floatx80_make_compact(aExp, aSig); 367 368 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { 369 /* |X| < 1/2 or |X| > 3/2 */ 370 k = aExp - 0x3FFF; 371 fp1 = int32_to_floatx80(k, status); 372 373 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 374 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 375 376 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 377 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 378 379 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 380 381 lp1cont1: 382 /* LP1CONT1 */ 383 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 384 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 385 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 386 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 387 388 fp3 = fp2; 389 fp1 = fp2; 390 391 fp1 = floatx80_mul(fp1, float64_to_floatx80( 392 make_float64(0x3FC2499AB5E4040B), status), 393 status); /* V*A6 */ 394 fp2 = floatx80_mul(fp2, float64_to_floatx80( 395 make_float64(0xBFC555B5848CB7DB), status), 396 status); /* V*A5 */ 397 fp1 = floatx80_add(fp1, float64_to_floatx80( 398 make_float64(0x3FC99999987D8730), status), 399 status); /* A4+V*A6 */ 400 fp2 = floatx80_add(fp2, float64_to_floatx80( 401 make_float64(0xBFCFFFFFFF6F7E97), status), 402 status); /* A3+V*A5 */ 403 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 404 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 405 fp1 = floatx80_add(fp1, float64_to_floatx80( 406 make_float64(0x3FD55555555555A4), status), 407 status); /* A2+V*(A4+V*A6) */ 408 fp2 = floatx80_add(fp2, float64_to_floatx80( 409 make_float64(0xBFE0000000000008), status), 410 status); /* A1+V*(A3+V*A5) */ 411 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 412 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 413 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 414 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 415 416 fp1 = floatx80_add(fp1, log_tbl[j + 1], 417 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 418 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 419 420 status->float_rounding_mode = user_rnd_mode; 421 status->floatx80_rounding_precision = user_rnd_prec; 422 423 a = floatx80_add(fp0, klog2, status); 424 425 float_raise(float_flag_inexact, status); 426 427 return a; 428 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 429 /* |X| < 1/16 or |X| > -1/16 */ 430 /* LP1CARE */ 431 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 432 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 433 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 434 435 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ 436 /* KISZERO */ 437 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), 438 status), f, status); /* 1-F */ 439 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ 440 fp1 = packFloatx80(0, 0, 0); /* K = 0 */ 441 } else { 442 /* KISNEG */ 443 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), 444 status), f, status); /* 2-F */ 445 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ 446 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ 447 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ 448 } 449 goto lp1cont1; 450 } else { 451 /* LP1ONE16 */ 452 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ 453 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 454 status), status); /* FP0 IS 1+X */ 455 456 /* LP1CONT2 */ 457 fp1 = floatx80_div(fp1, fp0, status); /* U */ 458 saveu = fp1; 459 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 460 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 461 462 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 463 status); /* B5 */ 464 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 465 status); /* B4 */ 466 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 467 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 468 fp3 = floatx80_add(fp3, float64_to_floatx80( 469 make_float64(0x3F624924928BCCFF), status), 470 status); /* B3+W*B5 */ 471 fp2 = floatx80_add(fp2, float64_to_floatx80( 472 make_float64(0x3F899999999995EC), status), 473 status); /* B2+W*B4 */ 474 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 475 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 476 fp1 = floatx80_add(fp1, float64_to_floatx80( 477 make_float64(0x3FB5555555555555), status), 478 status); /* B1+W*(B3+W*B5) */ 479 480 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 481 fp1 = floatx80_add(fp1, fp2, 482 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 483 fp0 = floatx80_mul(fp0, fp1, 484 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 485 486 status->float_rounding_mode = user_rnd_mode; 487 status->floatx80_rounding_precision = user_rnd_prec; 488 489 a = floatx80_add(fp0, saveu, status); 490 491 /*if (!floatx80_is_zero(a)) { */ 492 float_raise(float_flag_inexact, status); 493 /*} */ 494 495 return a; 496 } 497 } 498 499 /*---------------------------------------------------------------------------- 500 | Log base e 501 *----------------------------------------------------------------------------*/ 502 503 floatx80 floatx80_logn(floatx80 a, float_status *status) 504 { 505 flag aSign; 506 int32_t aExp; 507 uint64_t aSig, fSig; 508 509 int8_t user_rnd_mode, user_rnd_prec; 510 511 int32_t compact, j, k, adjk; 512 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 513 514 aSig = extractFloatx80Frac(a); 515 aExp = extractFloatx80Exp(a); 516 aSign = extractFloatx80Sign(a); 517 518 if (aExp == 0x7FFF) { 519 if ((uint64_t) (aSig << 1)) { 520 propagateFloatx80NaNOneArg(a, status); 521 } 522 if (aSign == 0) { 523 return packFloatx80(0, floatx80_infinity.high, 524 floatx80_infinity.low); 525 } 526 } 527 528 adjk = 0; 529 530 if (aExp == 0) { 531 if (aSig == 0) { /* zero */ 532 float_raise(float_flag_divbyzero, status); 533 return packFloatx80(1, floatx80_infinity.high, 534 floatx80_infinity.low); 535 } 536 if ((aSig & one_sig) == 0) { /* denormal */ 537 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 538 adjk = -100; 539 aExp += 100; 540 a = packFloatx80(aSign, aExp, aSig); 541 } 542 } 543 544 if (aSign) { 545 float_raise(float_flag_invalid, status); 546 return floatx80_default_nan(status); 547 } 548 549 user_rnd_mode = status->float_rounding_mode; 550 user_rnd_prec = status->floatx80_rounding_precision; 551 status->float_rounding_mode = float_round_nearest_even; 552 status->floatx80_rounding_precision = 80; 553 554 compact = floatx80_make_compact(aExp, aSig); 555 556 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 557 /* |X| < 15/16 or |X| > 17/16 */ 558 k = aExp - 0x3FFF; 559 k += adjk; 560 fp1 = int32_to_floatx80(k, status); 561 562 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 563 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 564 565 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 566 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 567 568 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 569 570 /* LP1CONT1 */ 571 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 572 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 573 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 574 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 575 576 fp3 = fp2; 577 fp1 = fp2; 578 579 fp1 = floatx80_mul(fp1, float64_to_floatx80( 580 make_float64(0x3FC2499AB5E4040B), status), 581 status); /* V*A6 */ 582 fp2 = floatx80_mul(fp2, float64_to_floatx80( 583 make_float64(0xBFC555B5848CB7DB), status), 584 status); /* V*A5 */ 585 fp1 = floatx80_add(fp1, float64_to_floatx80( 586 make_float64(0x3FC99999987D8730), status), 587 status); /* A4+V*A6 */ 588 fp2 = floatx80_add(fp2, float64_to_floatx80( 589 make_float64(0xBFCFFFFFFF6F7E97), status), 590 status); /* A3+V*A5 */ 591 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 592 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 593 fp1 = floatx80_add(fp1, float64_to_floatx80( 594 make_float64(0x3FD55555555555A4), status), 595 status); /* A2+V*(A4+V*A6) */ 596 fp2 = floatx80_add(fp2, float64_to_floatx80( 597 make_float64(0xBFE0000000000008), status), 598 status); /* A1+V*(A3+V*A5) */ 599 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 600 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 601 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 602 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 603 604 fp1 = floatx80_add(fp1, log_tbl[j + 1], 605 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 606 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 607 608 status->float_rounding_mode = user_rnd_mode; 609 status->floatx80_rounding_precision = user_rnd_prec; 610 611 a = floatx80_add(fp0, klog2, status); 612 613 float_raise(float_flag_inexact, status); 614 615 return a; 616 } else { /* |X-1| >= 1/16 */ 617 fp0 = a; 618 fp1 = a; 619 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), 620 status), status); /* FP1 IS X-1 */ 621 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 622 status), status); /* FP0 IS X+1 */ 623 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ 624 625 /* LP1CONT2 */ 626 fp1 = floatx80_div(fp1, fp0, status); /* U */ 627 saveu = fp1; 628 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 629 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 630 631 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 632 status); /* B5 */ 633 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 634 status); /* B4 */ 635 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 636 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 637 fp3 = floatx80_add(fp3, float64_to_floatx80( 638 make_float64(0x3F624924928BCCFF), status), 639 status); /* B3+W*B5 */ 640 fp2 = floatx80_add(fp2, float64_to_floatx80( 641 make_float64(0x3F899999999995EC), status), 642 status); /* B2+W*B4 */ 643 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 644 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 645 fp1 = floatx80_add(fp1, float64_to_floatx80( 646 make_float64(0x3FB5555555555555), status), 647 status); /* B1+W*(B3+W*B5) */ 648 649 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 650 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 651 fp0 = floatx80_mul(fp0, fp1, 652 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 653 654 status->float_rounding_mode = user_rnd_mode; 655 status->floatx80_rounding_precision = user_rnd_prec; 656 657 a = floatx80_add(fp0, saveu, status); 658 659 /*if (!floatx80_is_zero(a)) { */ 660 float_raise(float_flag_inexact, status); 661 /*} */ 662 663 return a; 664 } 665 } 666 667 /*---------------------------------------------------------------------------- 668 | Log base 10 669 *----------------------------------------------------------------------------*/ 670 671 floatx80 floatx80_log10(floatx80 a, float_status *status) 672 { 673 flag aSign; 674 int32_t aExp; 675 uint64_t aSig; 676 677 int8_t user_rnd_mode, user_rnd_prec; 678 679 floatx80 fp0, fp1; 680 681 aSig = extractFloatx80Frac(a); 682 aExp = extractFloatx80Exp(a); 683 aSign = extractFloatx80Sign(a); 684 685 if (aExp == 0x7FFF) { 686 if ((uint64_t) (aSig << 1)) { 687 propagateFloatx80NaNOneArg(a, status); 688 } 689 if (aSign == 0) { 690 return packFloatx80(0, floatx80_infinity.high, 691 floatx80_infinity.low); 692 } 693 } 694 695 if (aExp == 0 && aSig == 0) { 696 float_raise(float_flag_divbyzero, status); 697 return packFloatx80(1, floatx80_infinity.high, 698 floatx80_infinity.low); 699 } 700 701 if (aSign) { 702 float_raise(float_flag_invalid, status); 703 return floatx80_default_nan(status); 704 } 705 706 user_rnd_mode = status->float_rounding_mode; 707 user_rnd_prec = status->floatx80_rounding_precision; 708 status->float_rounding_mode = float_round_nearest_even; 709 status->floatx80_rounding_precision = 80; 710 711 fp0 = floatx80_logn(a, status); 712 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */ 713 714 status->float_rounding_mode = user_rnd_mode; 715 status->floatx80_rounding_precision = user_rnd_prec; 716 717 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ 718 719 float_raise(float_flag_inexact, status); 720 721 return a; 722 } 723 724 /*---------------------------------------------------------------------------- 725 | Log base 2 726 *----------------------------------------------------------------------------*/ 727 728 floatx80 floatx80_log2(floatx80 a, float_status *status) 729 { 730 flag aSign; 731 int32_t aExp; 732 uint64_t aSig; 733 734 int8_t user_rnd_mode, user_rnd_prec; 735 736 floatx80 fp0, fp1; 737 738 aSig = extractFloatx80Frac(a); 739 aExp = extractFloatx80Exp(a); 740 aSign = extractFloatx80Sign(a); 741 742 if (aExp == 0x7FFF) { 743 if ((uint64_t) (aSig << 1)) { 744 propagateFloatx80NaNOneArg(a, status); 745 } 746 if (aSign == 0) { 747 return packFloatx80(0, floatx80_infinity.high, 748 floatx80_infinity.low); 749 } 750 } 751 752 if (aExp == 0) { 753 if (aSig == 0) { 754 float_raise(float_flag_divbyzero, status); 755 return packFloatx80(1, floatx80_infinity.high, 756 floatx80_infinity.low); 757 } 758 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 759 } 760 761 if (aSign) { 762 float_raise(float_flag_invalid, status); 763 return floatx80_default_nan(status); 764 } 765 766 user_rnd_mode = status->float_rounding_mode; 767 user_rnd_prec = status->floatx80_rounding_precision; 768 status->float_rounding_mode = float_round_nearest_even; 769 status->floatx80_rounding_precision = 80; 770 771 if (aSig == one_sig) { /* X is 2^k */ 772 status->float_rounding_mode = user_rnd_mode; 773 status->floatx80_rounding_precision = user_rnd_prec; 774 775 a = int32_to_floatx80(aExp - 0x3FFF, status); 776 } else { 777 fp0 = floatx80_logn(a, status); 778 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */ 779 780 status->float_rounding_mode = user_rnd_mode; 781 status->floatx80_rounding_precision = user_rnd_prec; 782 783 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ 784 } 785 786 float_raise(float_flag_inexact, status); 787 788 return a; 789 } 790 791 /*---------------------------------------------------------------------------- 792 | e to x 793 *----------------------------------------------------------------------------*/ 794 795 floatx80 floatx80_etox(floatx80 a, float_status *status) 796 { 797 flag aSign; 798 int32_t aExp; 799 uint64_t aSig; 800 801 int8_t user_rnd_mode, user_rnd_prec; 802 803 int32_t compact, n, j, k, m, m1; 804 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; 805 flag adjflag; 806 807 aSig = extractFloatx80Frac(a); 808 aExp = extractFloatx80Exp(a); 809 aSign = extractFloatx80Sign(a); 810 811 if (aExp == 0x7FFF) { 812 if ((uint64_t) (aSig << 1)) { 813 return propagateFloatx80NaNOneArg(a, status); 814 } 815 if (aSign) { 816 return packFloatx80(0, 0, 0); 817 } 818 return packFloatx80(0, floatx80_infinity.high, 819 floatx80_infinity.low); 820 } 821 822 if (aExp == 0 && aSig == 0) { 823 return packFloatx80(0, one_exp, one_sig); 824 } 825 826 user_rnd_mode = status->float_rounding_mode; 827 user_rnd_prec = status->floatx80_rounding_precision; 828 status->float_rounding_mode = float_round_nearest_even; 829 status->floatx80_rounding_precision = 80; 830 831 adjflag = 0; 832 833 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ 834 compact = floatx80_make_compact(aExp, aSig); 835 836 if (compact < 0x400CB167) { /* |X| < 16380 log2 */ 837 fp0 = a; 838 fp1 = a; 839 fp0 = floatx80_mul(fp0, float32_to_floatx80( 840 make_float32(0x42B8AA3B), status), 841 status); /* 64/log2 * X */ 842 adjflag = 0; 843 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 844 fp0 = int32_to_floatx80(n, status); 845 846 j = n & 0x3F; /* J = N mod 64 */ 847 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 848 if (n < 0 && j) { 849 /* arithmetic right shift is division and 850 * round towards minus infinity 851 */ 852 m--; 853 } 854 m += 0x3FFF; /* biased exponent of 2^(M) */ 855 856 expcont1: 857 fp2 = fp0; /* N */ 858 fp0 = floatx80_mul(fp0, float32_to_floatx80( 859 make_float32(0xBC317218), status), 860 status); /* N * L1, L1 = lead(-log2/64) */ 861 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 862 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 863 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 864 fp0 = floatx80_add(fp0, fp2, status); /* R */ 865 866 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 867 fp2 = float32_to_floatx80(make_float32(0x3AB60B70), 868 status); /* A5 */ 869 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ 870 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), 871 status), fp1, 872 status); /* fp3 is S*A4 */ 873 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( 874 0x3FA5555555554431), status), 875 status); /* fp2 is A3+S*A5 */ 876 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( 877 0x3FC5555555554018), status), 878 status); /* fp3 is A2+S*A4 */ 879 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ 880 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ 881 fp2 = floatx80_add(fp2, float32_to_floatx80( 882 make_float32(0x3F000000), status), 883 status); /* fp2 is A1+S*(A3+S*A5) */ 884 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ 885 fp2 = floatx80_mul(fp2, fp1, 886 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ 887 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ 888 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 889 890 fp1 = exp_tbl[j]; 891 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ 892 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), 893 status); /* accurate 2^(J/64) */ 894 fp0 = floatx80_add(fp0, fp1, 895 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ 896 897 scale = packFloatx80(0, m, one_sig); 898 if (adjflag) { 899 adjscale = packFloatx80(0, m1, one_sig); 900 fp0 = floatx80_mul(fp0, adjscale, status); 901 } 902 903 status->float_rounding_mode = user_rnd_mode; 904 status->floatx80_rounding_precision = user_rnd_prec; 905 906 a = floatx80_mul(fp0, scale, status); 907 908 float_raise(float_flag_inexact, status); 909 910 return a; 911 } else { /* |X| >= 16380 log2 */ 912 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ 913 status->float_rounding_mode = user_rnd_mode; 914 status->floatx80_rounding_precision = user_rnd_prec; 915 if (aSign) { 916 a = roundAndPackFloatx80( 917 status->floatx80_rounding_precision, 918 0, -0x1000, aSig, 0, status); 919 } else { 920 a = roundAndPackFloatx80( 921 status->floatx80_rounding_precision, 922 0, 0x8000, aSig, 0, status); 923 } 924 float_raise(float_flag_inexact, status); 925 926 return a; 927 } else { 928 fp0 = a; 929 fp1 = a; 930 fp0 = floatx80_mul(fp0, float32_to_floatx80( 931 make_float32(0x42B8AA3B), status), 932 status); /* 64/log2 * X */ 933 adjflag = 1; 934 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 935 fp0 = int32_to_floatx80(n, status); 936 937 j = n & 0x3F; /* J = N mod 64 */ 938 /* NOTE: this is really arithmetic right shift by 6 */ 939 k = n / 64; 940 if (n < 0 && j) { 941 /* arithmetic right shift is division and 942 * round towards minus infinity 943 */ 944 k--; 945 } 946 /* NOTE: this is really arithmetic right shift by 1 */ 947 m1 = k / 2; 948 if (k < 0 && (k & 1)) { 949 /* arithmetic right shift is division and 950 * round towards minus infinity 951 */ 952 m1--; 953 } 954 m = k - m1; 955 m1 += 0x3FFF; /* biased exponent of 2^(M1) */ 956 m += 0x3FFF; /* biased exponent of 2^(M) */ 957 958 goto expcont1; 959 } 960 } 961 } else { /* |X| < 2^(-65) */ 962 status->float_rounding_mode = user_rnd_mode; 963 status->floatx80_rounding_precision = user_rnd_prec; 964 965 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), 966 status), status); /* 1 + X */ 967 968 float_raise(float_flag_inexact, status); 969 970 return a; 971 } 972 } 973 974 /*---------------------------------------------------------------------------- 975 | 2 to x 976 *----------------------------------------------------------------------------*/ 977 978 floatx80 floatx80_twotox(floatx80 a, float_status *status) 979 { 980 flag aSign; 981 int32_t aExp; 982 uint64_t aSig; 983 984 int8_t user_rnd_mode, user_rnd_prec; 985 986 int32_t compact, n, j, l, m, m1; 987 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 988 989 aSig = extractFloatx80Frac(a); 990 aExp = extractFloatx80Exp(a); 991 aSign = extractFloatx80Sign(a); 992 993 if (aExp == 0x7FFF) { 994 if ((uint64_t) (aSig << 1)) { 995 return propagateFloatx80NaNOneArg(a, status); 996 } 997 if (aSign) { 998 return packFloatx80(0, 0, 0); 999 } 1000 return packFloatx80(0, floatx80_infinity.high, 1001 floatx80_infinity.low); 1002 } 1003 1004 if (aExp == 0 && aSig == 0) { 1005 return packFloatx80(0, one_exp, one_sig); 1006 } 1007 1008 user_rnd_mode = status->float_rounding_mode; 1009 user_rnd_prec = status->floatx80_rounding_precision; 1010 status->float_rounding_mode = float_round_nearest_even; 1011 status->floatx80_rounding_precision = 80; 1012 1013 fp0 = a; 1014 1015 compact = floatx80_make_compact(aExp, aSig); 1016 1017 if (compact < 0x3FB98000 || compact > 0x400D80C0) { 1018 /* |X| > 16480 or |X| < 2^(-70) */ 1019 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1020 status->float_rounding_mode = user_rnd_mode; 1021 status->floatx80_rounding_precision = user_rnd_prec; 1022 1023 if (aSign) { 1024 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1025 0, -0x1000, aSig, 0, status); 1026 } else { 1027 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1028 0, 0x8000, aSig, 0, status); 1029 } 1030 } else { /* |X| < 2^(-70) */ 1031 status->float_rounding_mode = user_rnd_mode; 1032 status->floatx80_rounding_precision = user_rnd_prec; 1033 1034 a = floatx80_add(fp0, float32_to_floatx80( 1035 make_float32(0x3F800000), status), 1036 status); /* 1 + X */ 1037 1038 float_raise(float_flag_inexact, status); 1039 1040 return a; 1041 } 1042 } else { /* 2^(-70) <= |X| <= 16480 */ 1043 fp1 = fp0; /* X */ 1044 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1045 make_float32(0x42800000), status), 1046 status); /* X * 64 */ 1047 n = floatx80_to_int32(fp1, status); 1048 fp1 = int32_to_floatx80(n, status); 1049 j = n & 0x3F; 1050 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1051 if (n < 0 && j) { 1052 /* arithmetic right shift is division and 1053 * round towards minus infinity 1054 */ 1055 l--; 1056 } 1057 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1058 if (l < 0 && (l & 1)) { 1059 /* arithmetic right shift is division and 1060 * round towards minus infinity 1061 */ 1062 m--; 1063 } 1064 m1 = l - m; 1065 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1066 1067 adjfact = packFloatx80(0, m1, one_sig); 1068 fact1 = exp2_tbl[j]; 1069 fact1.high += m; 1070 fact2.high = exp2_tbl2[j] >> 16; 1071 fact2.high += m; 1072 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1073 fact2.low <<= 48; 1074 1075 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1076 make_float32(0x3C800000), status), 1077 status); /* (1/64)*N */ 1078 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ 1079 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */ 1080 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1081 1082 /* EXPR */ 1083 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1084 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1085 status); /* A5 */ 1086 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1087 status); /* A4 */ 1088 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1089 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1090 fp2 = floatx80_add(fp2, float64_to_floatx80( 1091 make_float64(0x3FA5555555554CC1), status), 1092 status); /* A3+S*A5 */ 1093 fp3 = floatx80_add(fp3, float64_to_floatx80( 1094 make_float64(0x3FC5555555554A54), status), 1095 status); /* A2+S*A4 */ 1096 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1097 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1098 fp2 = floatx80_add(fp2, float64_to_floatx80( 1099 make_float64(0x3FE0000000000000), status), 1100 status); /* A1+S*(A3+S*A5) */ 1101 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1102 1103 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1104 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1105 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1106 1107 fp0 = floatx80_mul(fp0, fact1, status); 1108 fp0 = floatx80_add(fp0, fact2, status); 1109 fp0 = floatx80_add(fp0, fact1, status); 1110 1111 status->float_rounding_mode = user_rnd_mode; 1112 status->floatx80_rounding_precision = user_rnd_prec; 1113 1114 a = floatx80_mul(fp0, adjfact, status); 1115 1116 float_raise(float_flag_inexact, status); 1117 1118 return a; 1119 } 1120 } 1121 1122 /*---------------------------------------------------------------------------- 1123 | 10 to x 1124 *----------------------------------------------------------------------------*/ 1125 1126 floatx80 floatx80_tentox(floatx80 a, float_status *status) 1127 { 1128 flag aSign; 1129 int32_t aExp; 1130 uint64_t aSig; 1131 1132 int8_t user_rnd_mode, user_rnd_prec; 1133 1134 int32_t compact, n, j, l, m, m1; 1135 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 1136 1137 aSig = extractFloatx80Frac(a); 1138 aExp = extractFloatx80Exp(a); 1139 aSign = extractFloatx80Sign(a); 1140 1141 if (aExp == 0x7FFF) { 1142 if ((uint64_t) (aSig << 1)) { 1143 return propagateFloatx80NaNOneArg(a, status); 1144 } 1145 if (aSign) { 1146 return packFloatx80(0, 0, 0); 1147 } 1148 return packFloatx80(0, floatx80_infinity.high, 1149 floatx80_infinity.low); 1150 } 1151 1152 if (aExp == 0 && aSig == 0) { 1153 return packFloatx80(0, one_exp, one_sig); 1154 } 1155 1156 user_rnd_mode = status->float_rounding_mode; 1157 user_rnd_prec = status->floatx80_rounding_precision; 1158 status->float_rounding_mode = float_round_nearest_even; 1159 status->floatx80_rounding_precision = 80; 1160 1161 fp0 = a; 1162 1163 compact = floatx80_make_compact(aExp, aSig); 1164 1165 if (compact < 0x3FB98000 || compact > 0x400B9B07) { 1166 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ 1167 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1168 status->float_rounding_mode = user_rnd_mode; 1169 status->floatx80_rounding_precision = user_rnd_prec; 1170 1171 if (aSign) { 1172 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1173 0, -0x1000, aSig, 0, status); 1174 } else { 1175 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1176 0, 0x8000, aSig, 0, status); 1177 } 1178 } else { /* |X| < 2^(-70) */ 1179 status->float_rounding_mode = user_rnd_mode; 1180 status->floatx80_rounding_precision = user_rnd_prec; 1181 1182 a = floatx80_add(fp0, float32_to_floatx80( 1183 make_float32(0x3F800000), status), 1184 status); /* 1 + X */ 1185 1186 float_raise(float_flag_inexact, status); 1187 1188 return a; 1189 } 1190 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ 1191 fp1 = fp0; /* X */ 1192 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1193 make_float64(0x406A934F0979A371), 1194 status), status); /* X*64*LOG10/LOG2 */ 1195 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ 1196 fp1 = int32_to_floatx80(n, status); 1197 1198 j = n & 0x3F; 1199 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1200 if (n < 0 && j) { 1201 /* arithmetic right shift is division and 1202 * round towards minus infinity 1203 */ 1204 l--; 1205 } 1206 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1207 if (l < 0 && (l & 1)) { 1208 /* arithmetic right shift is division and 1209 * round towards minus infinity 1210 */ 1211 m--; 1212 } 1213 m1 = l - m; 1214 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1215 1216 adjfact = packFloatx80(0, m1, one_sig); 1217 fact1 = exp2_tbl[j]; 1218 fact1.high += m; 1219 fact2.high = exp2_tbl2[j] >> 16; 1220 fact2.high += m; 1221 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1222 fact2.low <<= 48; 1223 1224 fp2 = fp1; /* N */ 1225 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1226 make_float64(0x3F734413509F8000), status), 1227 status); /* N*(LOG2/64LOG10)_LEAD */ 1228 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); 1229 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ 1230 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ 1231 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ 1232 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */ 1233 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1234 1235 /* EXPR */ 1236 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1237 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1238 status); /* A5 */ 1239 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1240 status); /* A4 */ 1241 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1242 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1243 fp2 = floatx80_add(fp2, float64_to_floatx80( 1244 make_float64(0x3FA5555555554CC1), status), 1245 status); /* A3+S*A5 */ 1246 fp3 = floatx80_add(fp3, float64_to_floatx80( 1247 make_float64(0x3FC5555555554A54), status), 1248 status); /* A2+S*A4 */ 1249 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1250 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1251 fp2 = floatx80_add(fp2, float64_to_floatx80( 1252 make_float64(0x3FE0000000000000), status), 1253 status); /* A1+S*(A3+S*A5) */ 1254 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1255 1256 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1257 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1258 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1259 1260 fp0 = floatx80_mul(fp0, fact1, status); 1261 fp0 = floatx80_add(fp0, fact2, status); 1262 fp0 = floatx80_add(fp0, fact1, status); 1263 1264 status->float_rounding_mode = user_rnd_mode; 1265 status->floatx80_rounding_precision = user_rnd_prec; 1266 1267 a = floatx80_mul(fp0, adjfact, status); 1268 1269 float_raise(float_flag_inexact, status); 1270 1271 return a; 1272 } 1273 } 1274 1275 /*---------------------------------------------------------------------------- 1276 | Tangent 1277 *----------------------------------------------------------------------------*/ 1278 1279 floatx80 floatx80_tan(floatx80 a, float_status *status) 1280 { 1281 flag aSign, xSign; 1282 int32_t aExp, xExp; 1283 uint64_t aSig, xSig; 1284 1285 int8_t user_rnd_mode, user_rnd_prec; 1286 1287 int32_t compact, l, n, j; 1288 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; 1289 float32 twoto63; 1290 flag endflag; 1291 1292 aSig = extractFloatx80Frac(a); 1293 aExp = extractFloatx80Exp(a); 1294 aSign = extractFloatx80Sign(a); 1295 1296 if (aExp == 0x7FFF) { 1297 if ((uint64_t) (aSig << 1)) { 1298 return propagateFloatx80NaNOneArg(a, status); 1299 } 1300 float_raise(float_flag_invalid, status); 1301 return floatx80_default_nan(status); 1302 } 1303 1304 if (aExp == 0 && aSig == 0) { 1305 return packFloatx80(aSign, 0, 0); 1306 } 1307 1308 user_rnd_mode = status->float_rounding_mode; 1309 user_rnd_prec = status->floatx80_rounding_precision; 1310 status->float_rounding_mode = float_round_nearest_even; 1311 status->floatx80_rounding_precision = 80; 1312 1313 compact = floatx80_make_compact(aExp, aSig); 1314 1315 fp0 = a; 1316 1317 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1318 /* 2^(-40) > |X| > 15 PI */ 1319 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1320 /* REDUCEX */ 1321 fp1 = packFloatx80(0, 0, 0); 1322 if (compact == 0x7FFEFFFF) { 1323 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1324 LIT64(0xC90FDAA200000000)); 1325 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1326 LIT64(0x85A308D300000000)); 1327 fp0 = floatx80_add(fp0, twopi1, status); 1328 fp1 = fp0; 1329 fp0 = floatx80_add(fp0, twopi2, status); 1330 fp1 = floatx80_sub(fp1, fp0, status); 1331 fp1 = floatx80_add(fp1, twopi2, status); 1332 } 1333 loop: 1334 xSign = extractFloatx80Sign(fp0); 1335 xExp = extractFloatx80Exp(fp0); 1336 xExp -= 0x3FFF; 1337 if (xExp <= 28) { 1338 l = 0; 1339 endflag = 1; 1340 } else { 1341 l = xExp - 27; 1342 endflag = 0; 1343 } 1344 invtwopi = packFloatx80(0, 0x3FFE - l, 1345 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1346 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1347 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1348 1349 /* SIGN(INARG)*2^63 IN SGL */ 1350 twoto63 = packFloat32(xSign, 0xBE, 0); 1351 1352 fp2 = floatx80_mul(fp0, invtwopi, status); 1353 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1354 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1355 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1356 status); /* FP2 is N */ 1357 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1358 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1359 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1360 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1361 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1362 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1363 fp3 = fp0; /* FP3 is A */ 1364 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1365 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1366 1367 if (endflag > 0) { 1368 n = floatx80_to_int32(fp2, status); 1369 goto tancont; 1370 } 1371 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1372 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1373 goto loop; 1374 } else { 1375 status->float_rounding_mode = user_rnd_mode; 1376 status->floatx80_rounding_precision = user_rnd_prec; 1377 1378 a = floatx80_move(a, status); 1379 1380 float_raise(float_flag_inexact, status); 1381 1382 return a; 1383 } 1384 } else { 1385 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1386 make_float64(0x3FE45F306DC9C883), status), 1387 status); /* X*2/PI */ 1388 1389 n = floatx80_to_int32(fp1, status); 1390 j = 32 + n; 1391 1392 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1393 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1394 status); /* FP0 IS R = (X-Y1)-Y2 */ 1395 1396 tancont: 1397 if (n & 1) { 1398 /* NODD */ 1399 fp1 = fp0; /* R */ 1400 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1401 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1402 status); /* Q4 */ 1403 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1404 status); /* P3 */ 1405 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ 1406 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ 1407 fp3 = floatx80_add(fp3, float64_to_floatx80( 1408 make_float64(0xBF346F59B39BA65F), status), 1409 status); /* Q3+SQ4 */ 1410 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1411 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1412 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ 1413 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ 1414 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1415 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1416 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1417 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1418 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ 1419 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ 1420 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1421 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1422 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ 1423 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1424 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1425 fp0 = floatx80_add(fp0, float32_to_floatx80( 1426 make_float32(0x3F800000), status), 1427 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1428 1429 xSign = extractFloatx80Sign(fp1); 1430 xExp = extractFloatx80Exp(fp1); 1431 xSig = extractFloatx80Frac(fp1); 1432 xSign ^= 1; 1433 fp1 = packFloatx80(xSign, xExp, xSig); 1434 1435 status->float_rounding_mode = user_rnd_mode; 1436 status->floatx80_rounding_precision = user_rnd_prec; 1437 1438 a = floatx80_div(fp0, fp1, status); 1439 1440 float_raise(float_flag_inexact, status); 1441 1442 return a; 1443 } else { 1444 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1445 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1446 status); /* Q4 */ 1447 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1448 status); /* P3 */ 1449 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ 1450 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ 1451 fp3 = floatx80_add(fp3, float64_to_floatx80( 1452 make_float64(0xBF346F59B39BA65F), status), 1453 status); /* Q3+SQ4 */ 1454 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1455 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1456 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ 1457 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ 1458 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1459 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1460 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1461 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1462 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ 1463 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ 1464 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1465 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1466 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ 1467 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1468 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1469 fp1 = floatx80_add(fp1, float32_to_floatx80( 1470 make_float32(0x3F800000), status), 1471 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1472 1473 status->float_rounding_mode = user_rnd_mode; 1474 status->floatx80_rounding_precision = user_rnd_prec; 1475 1476 a = floatx80_div(fp0, fp1, status); 1477 1478 float_raise(float_flag_inexact, status); 1479 1480 return a; 1481 } 1482 } 1483 } 1484 1485 /*---------------------------------------------------------------------------- 1486 | Sine 1487 *----------------------------------------------------------------------------*/ 1488 1489 floatx80 floatx80_sin(floatx80 a, float_status *status) 1490 { 1491 flag aSign, xSign; 1492 int32_t aExp, xExp; 1493 uint64_t aSig, xSig; 1494 1495 int8_t user_rnd_mode, user_rnd_prec; 1496 1497 int32_t compact, l, n, j; 1498 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1499 float32 posneg1, twoto63; 1500 flag endflag; 1501 1502 aSig = extractFloatx80Frac(a); 1503 aExp = extractFloatx80Exp(a); 1504 aSign = extractFloatx80Sign(a); 1505 1506 if (aExp == 0x7FFF) { 1507 if ((uint64_t) (aSig << 1)) { 1508 return propagateFloatx80NaNOneArg(a, status); 1509 } 1510 float_raise(float_flag_invalid, status); 1511 return floatx80_default_nan(status); 1512 } 1513 1514 if (aExp == 0 && aSig == 0) { 1515 return packFloatx80(aSign, 0, 0); 1516 } 1517 1518 user_rnd_mode = status->float_rounding_mode; 1519 user_rnd_prec = status->floatx80_rounding_precision; 1520 status->float_rounding_mode = float_round_nearest_even; 1521 status->floatx80_rounding_precision = 80; 1522 1523 compact = floatx80_make_compact(aExp, aSig); 1524 1525 fp0 = a; 1526 1527 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1528 /* 2^(-40) > |X| > 15 PI */ 1529 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1530 /* REDUCEX */ 1531 fp1 = packFloatx80(0, 0, 0); 1532 if (compact == 0x7FFEFFFF) { 1533 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1534 LIT64(0xC90FDAA200000000)); 1535 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1536 LIT64(0x85A308D300000000)); 1537 fp0 = floatx80_add(fp0, twopi1, status); 1538 fp1 = fp0; 1539 fp0 = floatx80_add(fp0, twopi2, status); 1540 fp1 = floatx80_sub(fp1, fp0, status); 1541 fp1 = floatx80_add(fp1, twopi2, status); 1542 } 1543 loop: 1544 xSign = extractFloatx80Sign(fp0); 1545 xExp = extractFloatx80Exp(fp0); 1546 xExp -= 0x3FFF; 1547 if (xExp <= 28) { 1548 l = 0; 1549 endflag = 1; 1550 } else { 1551 l = xExp - 27; 1552 endflag = 0; 1553 } 1554 invtwopi = packFloatx80(0, 0x3FFE - l, 1555 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1556 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1557 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1558 1559 /* SIGN(INARG)*2^63 IN SGL */ 1560 twoto63 = packFloat32(xSign, 0xBE, 0); 1561 1562 fp2 = floatx80_mul(fp0, invtwopi, status); 1563 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1564 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1565 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1566 status); /* FP2 is N */ 1567 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1568 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1569 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1570 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1571 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1572 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1573 fp3 = fp0; /* FP3 is A */ 1574 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1575 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1576 1577 if (endflag > 0) { 1578 n = floatx80_to_int32(fp2, status); 1579 goto sincont; 1580 } 1581 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1582 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1583 goto loop; 1584 } else { 1585 /* SINSM */ 1586 fp0 = float32_to_floatx80(make_float32(0x3F800000), 1587 status); /* 1 */ 1588 1589 status->float_rounding_mode = user_rnd_mode; 1590 status->floatx80_rounding_precision = user_rnd_prec; 1591 1592 /* SINTINY */ 1593 a = floatx80_move(a, status); 1594 float_raise(float_flag_inexact, status); 1595 1596 return a; 1597 } 1598 } else { 1599 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1600 make_float64(0x3FE45F306DC9C883), status), 1601 status); /* X*2/PI */ 1602 1603 n = floatx80_to_int32(fp1, status); 1604 j = 32 + n; 1605 1606 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1607 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1608 status); /* FP0 IS R = (X-Y1)-Y2 */ 1609 1610 sincont: 1611 if (n & 1) { 1612 /* COSPOLY */ 1613 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1614 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1615 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1616 status); /* B8 */ 1617 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1618 status); /* B7 */ 1619 1620 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1621 xExp = extractFloatx80Exp(fp0); 1622 xSig = extractFloatx80Frac(fp0); 1623 1624 if ((n >> 1) & 1) { 1625 xSign ^= 1; 1626 posneg1 = make_float32(0xBF800000); /* -1 */ 1627 } else { 1628 xSign ^= 0; 1629 posneg1 = make_float32(0x3F800000); /* 1 */ 1630 } /* X IS NOW R'= SGN*R */ 1631 1632 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1633 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1634 fp2 = floatx80_add(fp2, float64_to_floatx80( 1635 make_float64(0x3E21EED90612C972), status), 1636 status); /* B6+TB8 */ 1637 fp3 = floatx80_add(fp3, float64_to_floatx80( 1638 make_float64(0xBE927E4FB79D9FCF), status), 1639 status); /* B5+TB7 */ 1640 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1641 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1642 fp2 = floatx80_add(fp2, float64_to_floatx80( 1643 make_float64(0x3EFA01A01A01D423), status), 1644 status); /* B4+T(B6+TB8) */ 1645 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1646 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1647 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1648 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1649 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1650 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1651 fp1 = floatx80_add(fp1, float32_to_floatx80( 1652 make_float32(0xBF000000), status), 1653 status); /* B1+T(B3+T(B5+TB7)) */ 1654 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1655 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ 1656 * [S(B2+T(B4+T(B6+TB8)))] 1657 */ 1658 1659 x = packFloatx80(xSign, xExp, xSig); 1660 fp0 = floatx80_mul(fp0, x, status); 1661 1662 status->float_rounding_mode = user_rnd_mode; 1663 status->floatx80_rounding_precision = user_rnd_prec; 1664 1665 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1666 1667 float_raise(float_flag_inexact, status); 1668 1669 return a; 1670 } else { 1671 /* SINPOLY */ 1672 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1673 xExp = extractFloatx80Exp(fp0); 1674 xSig = extractFloatx80Frac(fp0); 1675 1676 xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ 1677 1678 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1679 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1680 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1681 status); /* A7 */ 1682 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1683 status); /* A6 */ 1684 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1685 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1686 fp3 = floatx80_add(fp3, float64_to_floatx80( 1687 make_float64(0xBE5AE6452A118AE4), status), 1688 status); /* A5+T*A7 */ 1689 fp2 = floatx80_add(fp2, float64_to_floatx80( 1690 make_float64(0x3EC71DE3A5341531), status), 1691 status); /* A4+T*A6 */ 1692 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1693 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1694 fp3 = floatx80_add(fp3, float64_to_floatx80( 1695 make_float64(0xBF2A01A01A018B59), status), 1696 status); /* A3+T(A5+TA7) */ 1697 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1698 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1699 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1700 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1701 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1702 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1703 fp1 = floatx80_add(fp1, fp2, 1704 status); /* [A1+T(A3+T(A5+TA7))]+ 1705 * [S(A2+T(A4+TA6))] 1706 */ 1707 1708 x = packFloatx80(xSign, xExp, xSig); 1709 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1710 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1711 1712 status->float_rounding_mode = user_rnd_mode; 1713 status->floatx80_rounding_precision = user_rnd_prec; 1714 1715 a = floatx80_add(fp0, x, status); 1716 1717 float_raise(float_flag_inexact, status); 1718 1719 return a; 1720 } 1721 } 1722 } 1723 1724 /*---------------------------------------------------------------------------- 1725 | Cosine 1726 *----------------------------------------------------------------------------*/ 1727 1728 floatx80 floatx80_cos(floatx80 a, float_status *status) 1729 { 1730 flag aSign, xSign; 1731 int32_t aExp, xExp; 1732 uint64_t aSig, xSig; 1733 1734 int8_t user_rnd_mode, user_rnd_prec; 1735 1736 int32_t compact, l, n, j; 1737 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1738 float32 posneg1, twoto63; 1739 flag endflag; 1740 1741 aSig = extractFloatx80Frac(a); 1742 aExp = extractFloatx80Exp(a); 1743 aSign = extractFloatx80Sign(a); 1744 1745 if (aExp == 0x7FFF) { 1746 if ((uint64_t) (aSig << 1)) { 1747 return propagateFloatx80NaNOneArg(a, status); 1748 } 1749 float_raise(float_flag_invalid, status); 1750 return floatx80_default_nan(status); 1751 } 1752 1753 if (aExp == 0 && aSig == 0) { 1754 return packFloatx80(0, one_exp, one_sig); 1755 } 1756 1757 user_rnd_mode = status->float_rounding_mode; 1758 user_rnd_prec = status->floatx80_rounding_precision; 1759 status->float_rounding_mode = float_round_nearest_even; 1760 status->floatx80_rounding_precision = 80; 1761 1762 compact = floatx80_make_compact(aExp, aSig); 1763 1764 fp0 = a; 1765 1766 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1767 /* 2^(-40) > |X| > 15 PI */ 1768 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1769 /* REDUCEX */ 1770 fp1 = packFloatx80(0, 0, 0); 1771 if (compact == 0x7FFEFFFF) { 1772 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1773 LIT64(0xC90FDAA200000000)); 1774 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1775 LIT64(0x85A308D300000000)); 1776 fp0 = floatx80_add(fp0, twopi1, status); 1777 fp1 = fp0; 1778 fp0 = floatx80_add(fp0, twopi2, status); 1779 fp1 = floatx80_sub(fp1, fp0, status); 1780 fp1 = floatx80_add(fp1, twopi2, status); 1781 } 1782 loop: 1783 xSign = extractFloatx80Sign(fp0); 1784 xExp = extractFloatx80Exp(fp0); 1785 xExp -= 0x3FFF; 1786 if (xExp <= 28) { 1787 l = 0; 1788 endflag = 1; 1789 } else { 1790 l = xExp - 27; 1791 endflag = 0; 1792 } 1793 invtwopi = packFloatx80(0, 0x3FFE - l, 1794 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1795 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1796 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1797 1798 /* SIGN(INARG)*2^63 IN SGL */ 1799 twoto63 = packFloat32(xSign, 0xBE, 0); 1800 1801 fp2 = floatx80_mul(fp0, invtwopi, status); 1802 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1803 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1804 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1805 status); /* FP2 is N */ 1806 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1807 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1808 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1809 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1810 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1811 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1812 fp3 = fp0; /* FP3 is A */ 1813 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1814 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1815 1816 if (endflag > 0) { 1817 n = floatx80_to_int32(fp2, status); 1818 goto sincont; 1819 } 1820 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1821 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1822 goto loop; 1823 } else { 1824 /* SINSM */ 1825 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ 1826 1827 status->float_rounding_mode = user_rnd_mode; 1828 status->floatx80_rounding_precision = user_rnd_prec; 1829 1830 /* COSTINY */ 1831 a = floatx80_sub(fp0, float32_to_floatx80( 1832 make_float32(0x00800000), status), 1833 status); 1834 float_raise(float_flag_inexact, status); 1835 1836 return a; 1837 } 1838 } else { 1839 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1840 make_float64(0x3FE45F306DC9C883), status), 1841 status); /* X*2/PI */ 1842 1843 n = floatx80_to_int32(fp1, status); 1844 j = 32 + n; 1845 1846 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1847 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1848 status); /* FP0 IS R = (X-Y1)-Y2 */ 1849 1850 sincont: 1851 if ((n + 1) & 1) { 1852 /* COSPOLY */ 1853 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1854 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1855 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1856 status); /* B8 */ 1857 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1858 status); /* B7 */ 1859 1860 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1861 xExp = extractFloatx80Exp(fp0); 1862 xSig = extractFloatx80Frac(fp0); 1863 1864 if (((n + 1) >> 1) & 1) { 1865 xSign ^= 1; 1866 posneg1 = make_float32(0xBF800000); /* -1 */ 1867 } else { 1868 xSign ^= 0; 1869 posneg1 = make_float32(0x3F800000); /* 1 */ 1870 } /* X IS NOW R'= SGN*R */ 1871 1872 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1873 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1874 fp2 = floatx80_add(fp2, float64_to_floatx80( 1875 make_float64(0x3E21EED90612C972), status), 1876 status); /* B6+TB8 */ 1877 fp3 = floatx80_add(fp3, float64_to_floatx80( 1878 make_float64(0xBE927E4FB79D9FCF), status), 1879 status); /* B5+TB7 */ 1880 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1881 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1882 fp2 = floatx80_add(fp2, float64_to_floatx80( 1883 make_float64(0x3EFA01A01A01D423), status), 1884 status); /* B4+T(B6+TB8) */ 1885 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1886 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1887 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1888 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1889 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1890 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1891 fp1 = floatx80_add(fp1, float32_to_floatx80( 1892 make_float32(0xBF000000), status), 1893 status); /* B1+T(B3+T(B5+TB7)) */ 1894 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1895 fp0 = floatx80_add(fp0, fp1, status); 1896 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ 1897 1898 x = packFloatx80(xSign, xExp, xSig); 1899 fp0 = floatx80_mul(fp0, x, status); 1900 1901 status->float_rounding_mode = user_rnd_mode; 1902 status->floatx80_rounding_precision = user_rnd_prec; 1903 1904 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1905 1906 float_raise(float_flag_inexact, status); 1907 1908 return a; 1909 } else { 1910 /* SINPOLY */ 1911 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1912 xExp = extractFloatx80Exp(fp0); 1913 xSig = extractFloatx80Frac(fp0); 1914 1915 xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ 1916 1917 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1918 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1919 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1920 status); /* A7 */ 1921 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1922 status); /* A6 */ 1923 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1924 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1925 fp3 = floatx80_add(fp3, float64_to_floatx80( 1926 make_float64(0xBE5AE6452A118AE4), status), 1927 status); /* A5+T*A7 */ 1928 fp2 = floatx80_add(fp2, float64_to_floatx80( 1929 make_float64(0x3EC71DE3A5341531), status), 1930 status); /* A4+T*A6 */ 1931 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1932 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1933 fp3 = floatx80_add(fp3, float64_to_floatx80( 1934 make_float64(0xBF2A01A01A018B59), status), 1935 status); /* A3+T(A5+TA7) */ 1936 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1937 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1938 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1939 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1940 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1941 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1942 fp1 = floatx80_add(fp1, fp2, status); 1943 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ 1944 1945 x = packFloatx80(xSign, xExp, xSig); 1946 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1947 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1948 1949 status->float_rounding_mode = user_rnd_mode; 1950 status->floatx80_rounding_precision = user_rnd_prec; 1951 1952 a = floatx80_add(fp0, x, status); 1953 1954 float_raise(float_flag_inexact, status); 1955 1956 return a; 1957 } 1958 } 1959 } 1960 1961 /*---------------------------------------------------------------------------- 1962 | Arc tangent 1963 *----------------------------------------------------------------------------*/ 1964 1965 floatx80 floatx80_atan(floatx80 a, float_status *status) 1966 { 1967 flag aSign; 1968 int32_t aExp; 1969 uint64_t aSig; 1970 1971 int8_t user_rnd_mode, user_rnd_prec; 1972 1973 int32_t compact, tbl_index; 1974 floatx80 fp0, fp1, fp2, fp3, xsave; 1975 1976 aSig = extractFloatx80Frac(a); 1977 aExp = extractFloatx80Exp(a); 1978 aSign = extractFloatx80Sign(a); 1979 1980 if (aExp == 0x7FFF) { 1981 if ((uint64_t) (aSig << 1)) { 1982 return propagateFloatx80NaNOneArg(a, status); 1983 } 1984 a = packFloatx80(aSign, piby2_exp, pi_sig); 1985 float_raise(float_flag_inexact, status); 1986 return floatx80_move(a, status); 1987 } 1988 1989 if (aExp == 0 && aSig == 0) { 1990 return packFloatx80(aSign, 0, 0); 1991 } 1992 1993 compact = floatx80_make_compact(aExp, aSig); 1994 1995 user_rnd_mode = status->float_rounding_mode; 1996 user_rnd_prec = status->floatx80_rounding_precision; 1997 status->float_rounding_mode = float_round_nearest_even; 1998 status->floatx80_rounding_precision = 80; 1999 2000 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { 2001 /* |X| >= 16 or |X| < 1/16 */ 2002 if (compact > 0x3FFF8000) { /* |X| >= 16 */ 2003 if (compact > 0x40638000) { /* |X| > 2^(100) */ 2004 fp0 = packFloatx80(aSign, piby2_exp, pi_sig); 2005 fp1 = packFloatx80(aSign, 0x0001, one_sig); 2006 2007 status->float_rounding_mode = user_rnd_mode; 2008 status->floatx80_rounding_precision = user_rnd_prec; 2009 2010 a = floatx80_sub(fp0, fp1, status); 2011 2012 float_raise(float_flag_inexact, status); 2013 2014 return a; 2015 } else { 2016 fp0 = a; 2017 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ 2018 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ 2019 xsave = fp1; 2020 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ 2021 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2022 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), 2023 status); /* C5 */ 2024 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), 2025 status); /* C4 */ 2026 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ 2027 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ 2028 fp3 = floatx80_add(fp3, float64_to_floatx80( 2029 make_float64(0xBFC24924827107B8), status), 2030 status); /* C3+Z*C5 */ 2031 fp2 = floatx80_add(fp2, float64_to_floatx80( 2032 make_float64(0x3FC999999996263E), status), 2033 status); /* C2+Z*C4 */ 2034 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ 2035 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ 2036 fp1 = floatx80_add(fp1, float64_to_floatx80( 2037 make_float64(0xBFD5555555555536), status), 2038 status); /* C1+Z*(C3+Z*C5) */ 2039 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ 2040 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ 2041 fp1 = floatx80_add(fp1, fp2, status); 2042 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ 2043 fp0 = floatx80_mul(fp0, fp1, status); 2044 fp0 = floatx80_add(fp0, xsave, status); 2045 fp1 = packFloatx80(aSign, piby2_exp, pi_sig); 2046 2047 status->float_rounding_mode = user_rnd_mode; 2048 status->floatx80_rounding_precision = user_rnd_prec; 2049 2050 a = floatx80_add(fp0, fp1, status); 2051 2052 float_raise(float_flag_inexact, status); 2053 2054 return a; 2055 } 2056 } else { /* |X| < 1/16 */ 2057 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ 2058 status->float_rounding_mode = user_rnd_mode; 2059 status->floatx80_rounding_precision = user_rnd_prec; 2060 2061 a = floatx80_move(a, status); 2062 2063 float_raise(float_flag_inexact, status); 2064 2065 return a; 2066 } else { 2067 fp0 = a; 2068 xsave = a; 2069 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ 2070 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2071 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), 2072 status); /* B6 */ 2073 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), 2074 status); /* B5 */ 2075 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ 2076 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ 2077 fp2 = floatx80_add(fp2, float64_to_floatx80( 2078 make_float64(0x3FBC71C646940220), status), 2079 status); /* B4+Z*B6 */ 2080 fp3 = floatx80_add(fp3, float64_to_floatx80( 2081 make_float64(0xBFC24924921872F9), 2082 status), status); /* B3+Z*B5 */ 2083 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ 2084 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ 2085 fp2 = floatx80_add(fp2, float64_to_floatx80( 2086 make_float64(0x3FC9999999998FA9), status), 2087 status); /* B2+Z*(B4+Z*B6) */ 2088 fp1 = floatx80_add(fp1, float64_to_floatx80( 2089 make_float64(0xBFD5555555555555), status), 2090 status); /* B1+Z*(B3+Z*B5) */ 2091 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ 2092 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ 2093 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ 2094 fp1 = floatx80_add(fp1, fp2, status); 2095 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ 2096 fp0 = floatx80_mul(fp0, fp1, status); 2097 2098 status->float_rounding_mode = user_rnd_mode; 2099 status->floatx80_rounding_precision = user_rnd_prec; 2100 2101 a = floatx80_add(fp0, xsave, status); 2102 2103 float_raise(float_flag_inexact, status); 2104 2105 return a; 2106 } 2107 } 2108 } else { 2109 aSig &= LIT64(0xF800000000000000); 2110 aSig |= LIT64(0x0400000000000000); 2111 xsave = packFloatx80(aSign, aExp, aSig); /* F */ 2112 fp0 = a; 2113 fp1 = a; /* X */ 2114 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ 2115 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ 2116 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ 2117 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ 2118 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ 2119 2120 tbl_index = compact; 2121 2122 tbl_index &= 0x7FFF0000; 2123 tbl_index -= 0x3FFB0000; 2124 tbl_index >>= 1; 2125 tbl_index += compact & 0x00007800; 2126 tbl_index >>= 11; 2127 2128 fp3 = atan_tbl[tbl_index]; 2129 2130 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ 2131 2132 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ 2133 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), 2134 status); /* A3 */ 2135 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ 2136 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ 2137 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ 2138 fp2 = floatx80_add(fp2, float64_to_floatx80( 2139 make_float64(0x4002AC6934A26DB3), status), 2140 status); /* A2+V*(A3+V) */ 2141 fp1 = floatx80_mul(fp1, float64_to_floatx80( 2142 make_float64(0xBFC2476F4E1DA28E), status), 2143 status); /* A1+U*V */ 2144 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ 2145 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ 2146 2147 status->float_rounding_mode = user_rnd_mode; 2148 status->floatx80_rounding_precision = user_rnd_prec; 2149 2150 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ 2151 2152 float_raise(float_flag_inexact, status); 2153 2154 return a; 2155 } 2156 } 2157 2158 /*---------------------------------------------------------------------------- 2159 | Arc sine 2160 *----------------------------------------------------------------------------*/ 2161 2162 floatx80 floatx80_asin(floatx80 a, float_status *status) 2163 { 2164 flag aSign; 2165 int32_t aExp; 2166 uint64_t aSig; 2167 2168 int8_t user_rnd_mode, user_rnd_prec; 2169 2170 int32_t compact; 2171 floatx80 fp0, fp1, fp2, one; 2172 2173 aSig = extractFloatx80Frac(a); 2174 aExp = extractFloatx80Exp(a); 2175 aSign = extractFloatx80Sign(a); 2176 2177 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2178 return propagateFloatx80NaNOneArg(a, status); 2179 } 2180 2181 if (aExp == 0 && aSig == 0) { 2182 return packFloatx80(aSign, 0, 0); 2183 } 2184 2185 compact = floatx80_make_compact(aExp, aSig); 2186 2187 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2188 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2189 float_raise(float_flag_inexact, status); 2190 a = packFloatx80(aSign, piby2_exp, pi_sig); 2191 return floatx80_move(a, status); 2192 } else { /* |X| > 1 */ 2193 float_raise(float_flag_invalid, status); 2194 return floatx80_default_nan(status); 2195 } 2196 2197 } /* |X| < 1 */ 2198 2199 user_rnd_mode = status->float_rounding_mode; 2200 user_rnd_prec = status->floatx80_rounding_precision; 2201 status->float_rounding_mode = float_round_nearest_even; 2202 status->floatx80_rounding_precision = 80; 2203 2204 one = packFloatx80(0, one_exp, one_sig); 2205 fp0 = a; 2206 2207 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ 2208 fp2 = floatx80_add(one, fp0, status); /* 1 + X */ 2209 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ 2210 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ 2211 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ 2212 2213 status->float_rounding_mode = user_rnd_mode; 2214 status->floatx80_rounding_precision = user_rnd_prec; 2215 2216 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ 2217 2218 float_raise(float_flag_inexact, status); 2219 2220 return a; 2221 } 2222 2223 /*---------------------------------------------------------------------------- 2224 | Arc cosine 2225 *----------------------------------------------------------------------------*/ 2226 2227 floatx80 floatx80_acos(floatx80 a, float_status *status) 2228 { 2229 flag aSign; 2230 int32_t aExp; 2231 uint64_t aSig; 2232 2233 int8_t user_rnd_mode, user_rnd_prec; 2234 2235 int32_t compact; 2236 floatx80 fp0, fp1, one; 2237 2238 aSig = extractFloatx80Frac(a); 2239 aExp = extractFloatx80Exp(a); 2240 aSign = extractFloatx80Sign(a); 2241 2242 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2243 return propagateFloatx80NaNOneArg(a, status); 2244 } 2245 if (aExp == 0 && aSig == 0) { 2246 float_raise(float_flag_inexact, status); 2247 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2248 piby2_exp, pi_sig, 0, status); 2249 } 2250 2251 compact = floatx80_make_compact(aExp, aSig); 2252 2253 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2254 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2255 if (aSign) { /* X == -1 */ 2256 a = packFloatx80(0, pi_exp, pi_sig); 2257 float_raise(float_flag_inexact, status); 2258 return floatx80_move(a, status); 2259 } else { /* X == +1 */ 2260 return packFloatx80(0, 0, 0); 2261 } 2262 } else { /* |X| > 1 */ 2263 float_raise(float_flag_invalid, status); 2264 return floatx80_default_nan(status); 2265 } 2266 } /* |X| < 1 */ 2267 2268 user_rnd_mode = status->float_rounding_mode; 2269 user_rnd_prec = status->floatx80_rounding_precision; 2270 status->float_rounding_mode = float_round_nearest_even; 2271 status->floatx80_rounding_precision = 80; 2272 2273 one = packFloatx80(0, one_exp, one_sig); 2274 fp0 = a; 2275 2276 fp1 = floatx80_add(one, fp0, status); /* 1 + X */ 2277 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ 2278 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ 2279 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ 2280 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ 2281 2282 status->float_rounding_mode = user_rnd_mode; 2283 status->floatx80_rounding_precision = user_rnd_prec; 2284 2285 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ 2286 2287 float_raise(float_flag_inexact, status); 2288 2289 return a; 2290 } 2291 2292 /*---------------------------------------------------------------------------- 2293 | Hyperbolic arc tangent 2294 *----------------------------------------------------------------------------*/ 2295 2296 floatx80 floatx80_atanh(floatx80 a, float_status *status) 2297 { 2298 flag aSign; 2299 int32_t aExp; 2300 uint64_t aSig; 2301 2302 int8_t user_rnd_mode, user_rnd_prec; 2303 2304 int32_t compact; 2305 floatx80 fp0, fp1, fp2, one; 2306 2307 aSig = extractFloatx80Frac(a); 2308 aExp = extractFloatx80Exp(a); 2309 aSign = extractFloatx80Sign(a); 2310 2311 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2312 return propagateFloatx80NaNOneArg(a, status); 2313 } 2314 2315 if (aExp == 0 && aSig == 0) { 2316 return packFloatx80(aSign, 0, 0); 2317 } 2318 2319 compact = floatx80_make_compact(aExp, aSig); 2320 2321 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2322 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2323 float_raise(float_flag_divbyzero, status); 2324 return packFloatx80(aSign, floatx80_infinity.high, 2325 floatx80_infinity.low); 2326 } else { /* |X| > 1 */ 2327 float_raise(float_flag_invalid, status); 2328 return floatx80_default_nan(status); 2329 } 2330 } /* |X| < 1 */ 2331 2332 user_rnd_mode = status->float_rounding_mode; 2333 user_rnd_prec = status->floatx80_rounding_precision; 2334 status->float_rounding_mode = float_round_nearest_even; 2335 status->floatx80_rounding_precision = 80; 2336 2337 one = packFloatx80(0, one_exp, one_sig); 2338 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ 2339 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ 2340 fp1 = packFloatx80(1, aExp, aSig); /* -Y */ 2341 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ 2342 fp1 = floatx80_add(fp1, one, status); /* 1-Y */ 2343 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ 2344 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ 2345 2346 status->float_rounding_mode = user_rnd_mode; 2347 status->floatx80_rounding_precision = user_rnd_prec; 2348 2349 a = floatx80_mul(fp0, fp2, 2350 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ 2351 2352 float_raise(float_flag_inexact, status); 2353 2354 return a; 2355 } 2356 2357 /*---------------------------------------------------------------------------- 2358 | e to x minus 1 2359 *----------------------------------------------------------------------------*/ 2360 2361 floatx80 floatx80_etoxm1(floatx80 a, float_status *status) 2362 { 2363 flag aSign; 2364 int32_t aExp; 2365 uint64_t aSig; 2366 2367 int8_t user_rnd_mode, user_rnd_prec; 2368 2369 int32_t compact, n, j, m, m1; 2370 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; 2371 2372 aSig = extractFloatx80Frac(a); 2373 aExp = extractFloatx80Exp(a); 2374 aSign = extractFloatx80Sign(a); 2375 2376 if (aExp == 0x7FFF) { 2377 if ((uint64_t) (aSig << 1)) { 2378 return propagateFloatx80NaNOneArg(a, status); 2379 } 2380 if (aSign) { 2381 return packFloatx80(aSign, one_exp, one_sig); 2382 } 2383 return packFloatx80(0, floatx80_infinity.high, 2384 floatx80_infinity.low); 2385 } 2386 2387 if (aExp == 0 && aSig == 0) { 2388 return packFloatx80(aSign, 0, 0); 2389 } 2390 2391 user_rnd_mode = status->float_rounding_mode; 2392 user_rnd_prec = status->floatx80_rounding_precision; 2393 status->float_rounding_mode = float_round_nearest_even; 2394 status->floatx80_rounding_precision = 80; 2395 2396 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ 2397 compact = floatx80_make_compact(aExp, aSig); 2398 2399 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ 2400 fp0 = a; 2401 fp1 = a; 2402 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2403 make_float32(0x42B8AA3B), status), 2404 status); /* 64/log2 * X */ 2405 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 2406 fp0 = int32_to_floatx80(n, status); 2407 2408 j = n & 0x3F; /* J = N mod 64 */ 2409 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 2410 if (n < 0 && j) { 2411 /* arithmetic right shift is division and 2412 * round towards minus infinity 2413 */ 2414 m--; 2415 } 2416 m1 = -m; 2417 /*m += 0x3FFF; // biased exponent of 2^(M) */ 2418 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ 2419 2420 fp2 = fp0; /* N */ 2421 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2422 make_float32(0xBC317218), status), 2423 status); /* N * L1, L1 = lead(-log2/64) */ 2424 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 2425 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 2426 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 2427 fp0 = floatx80_add(fp0, fp2, status); /* R */ 2428 2429 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 2430 fp2 = float32_to_floatx80(make_float32(0x3950097B), 2431 status); /* A6 */ 2432 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ 2433 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), 2434 status), fp1, status); /* fp3 is S*A5 */ 2435 fp2 = floatx80_add(fp2, float64_to_floatx80( 2436 make_float64(0x3F81111111174385), status), 2437 status); /* fp2 IS A4+S*A6 */ 2438 fp3 = floatx80_add(fp3, float64_to_floatx80( 2439 make_float64(0x3FA5555555554F5A), status), 2440 status); /* fp3 is A3+S*A5 */ 2441 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ 2442 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ 2443 fp2 = floatx80_add(fp2, float64_to_floatx80( 2444 make_float64(0x3FC5555555555555), status), 2445 status); /* fp2 IS A2+S*(A4+S*A6) */ 2446 fp3 = floatx80_add(fp3, float32_to_floatx80( 2447 make_float32(0x3F000000), status), 2448 status); /* fp3 IS A1+S*(A3+S*A5) */ 2449 fp2 = floatx80_mul(fp2, fp1, 2450 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ 2451 fp1 = floatx80_mul(fp1, fp3, 2452 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ 2453 fp2 = floatx80_mul(fp2, fp0, 2454 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ 2455 fp0 = floatx80_add(fp0, fp1, 2456 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ 2457 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 2458 2459 fp0 = floatx80_mul(fp0, exp_tbl[j], 2460 status); /* 2^(J/64)*(Exp(R)-1) */ 2461 2462 if (m >= 64) { 2463 fp1 = float32_to_floatx80(exp_tbl2[j], status); 2464 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2465 fp1 = floatx80_add(fp1, onebysc, status); 2466 fp0 = floatx80_add(fp0, fp1, status); 2467 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2468 } else if (m < -3) { 2469 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2470 status), status); 2471 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2472 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2473 fp0 = floatx80_add(fp0, onebysc, status); 2474 } else { /* -3 <= m <= 63 */ 2475 fp1 = exp_tbl[j]; 2476 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2477 status), status); 2478 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2479 fp1 = floatx80_add(fp1, onebysc, status); 2480 fp0 = floatx80_add(fp0, fp1, status); 2481 } 2482 2483 sc = packFloatx80(0, m + 0x3FFF, one_sig); 2484 2485 status->float_rounding_mode = user_rnd_mode; 2486 status->floatx80_rounding_precision = user_rnd_prec; 2487 2488 a = floatx80_mul(fp0, sc, status); 2489 2490 float_raise(float_flag_inexact, status); 2491 2492 return a; 2493 } else { /* |X| > 70 log2 */ 2494 if (aSign) { 2495 fp0 = float32_to_floatx80(make_float32(0xBF800000), 2496 status); /* -1 */ 2497 2498 status->float_rounding_mode = user_rnd_mode; 2499 status->floatx80_rounding_precision = user_rnd_prec; 2500 2501 a = floatx80_add(fp0, float32_to_floatx80( 2502 make_float32(0x00800000), status), 2503 status); /* -1 + 2^(-126) */ 2504 2505 float_raise(float_flag_inexact, status); 2506 2507 return a; 2508 } else { 2509 status->float_rounding_mode = user_rnd_mode; 2510 status->floatx80_rounding_precision = user_rnd_prec; 2511 2512 return floatx80_etox(a, status); 2513 } 2514 } 2515 } else { /* |X| < 1/4 */ 2516 if (aExp >= 0x3FBE) { 2517 fp0 = a; 2518 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ 2519 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), 2520 status); /* B12 */ 2521 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ 2522 fp2 = float32_to_floatx80(make_float32(0x310F8290), 2523 status); /* B11 */ 2524 fp1 = floatx80_add(fp1, float32_to_floatx80( 2525 make_float32(0x32D73220), status), 2526 status); /* B10 */ 2527 fp2 = floatx80_mul(fp2, fp0, status); 2528 fp1 = floatx80_mul(fp1, fp0, status); 2529 fp2 = floatx80_add(fp2, float32_to_floatx80( 2530 make_float32(0x3493F281), status), 2531 status); /* B9 */ 2532 fp1 = floatx80_add(fp1, float64_to_floatx80( 2533 make_float64(0x3EC71DE3A5774682), status), 2534 status); /* B8 */ 2535 fp2 = floatx80_mul(fp2, fp0, status); 2536 fp1 = floatx80_mul(fp1, fp0, status); 2537 fp2 = floatx80_add(fp2, float64_to_floatx80( 2538 make_float64(0x3EFA01A019D7CB68), status), 2539 status); /* B7 */ 2540 fp1 = floatx80_add(fp1, float64_to_floatx80( 2541 make_float64(0x3F2A01A01A019DF3), status), 2542 status); /* B6 */ 2543 fp2 = floatx80_mul(fp2, fp0, status); 2544 fp1 = floatx80_mul(fp1, fp0, status); 2545 fp2 = floatx80_add(fp2, float64_to_floatx80( 2546 make_float64(0x3F56C16C16C170E2), status), 2547 status); /* B5 */ 2548 fp1 = floatx80_add(fp1, float64_to_floatx80( 2549 make_float64(0x3F81111111111111), status), 2550 status); /* B4 */ 2551 fp2 = floatx80_mul(fp2, fp0, status); 2552 fp1 = floatx80_mul(fp1, fp0, status); 2553 fp2 = floatx80_add(fp2, float64_to_floatx80( 2554 make_float64(0x3FA5555555555555), status), 2555 status); /* B3 */ 2556 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); 2557 fp1 = floatx80_add(fp1, fp3, status); /* B2 */ 2558 fp2 = floatx80_mul(fp2, fp0, status); 2559 fp1 = floatx80_mul(fp1, fp0, status); 2560 2561 fp2 = floatx80_mul(fp2, fp0, status); 2562 fp1 = floatx80_mul(fp1, a, status); 2563 2564 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2565 make_float32(0x3F000000), status), 2566 status); /* S*B1 */ 2567 fp1 = floatx80_add(fp1, fp2, status); /* Q */ 2568 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ 2569 2570 status->float_rounding_mode = user_rnd_mode; 2571 status->floatx80_rounding_precision = user_rnd_prec; 2572 2573 a = floatx80_add(fp0, a, status); 2574 2575 float_raise(float_flag_inexact, status); 2576 2577 return a; 2578 } else { /* |X| < 2^(-65) */ 2579 sc = packFloatx80(1, 1, one_sig); 2580 fp0 = a; 2581 2582 if (aExp < 0x0033) { /* |X| < 2^(-16382) */ 2583 fp0 = floatx80_mul(fp0, float64_to_floatx80( 2584 make_float64(0x48B0000000000000), status), 2585 status); 2586 fp0 = floatx80_add(fp0, sc, status); 2587 2588 status->float_rounding_mode = user_rnd_mode; 2589 status->floatx80_rounding_precision = user_rnd_prec; 2590 2591 a = floatx80_mul(fp0, float64_to_floatx80( 2592 make_float64(0x3730000000000000), status), 2593 status); 2594 } else { 2595 status->float_rounding_mode = user_rnd_mode; 2596 status->floatx80_rounding_precision = user_rnd_prec; 2597 2598 a = floatx80_add(fp0, sc, status); 2599 } 2600 2601 float_raise(float_flag_inexact, status); 2602 2603 return a; 2604 } 2605 } 2606 } 2607 2608 /*---------------------------------------------------------------------------- 2609 | Hyperbolic tangent 2610 *----------------------------------------------------------------------------*/ 2611 2612 floatx80 floatx80_tanh(floatx80 a, float_status *status) 2613 { 2614 flag aSign, vSign; 2615 int32_t aExp, vExp; 2616 uint64_t aSig, vSig; 2617 2618 int8_t user_rnd_mode, user_rnd_prec; 2619 2620 int32_t compact; 2621 floatx80 fp0, fp1; 2622 uint32_t sign; 2623 2624 aSig = extractFloatx80Frac(a); 2625 aExp = extractFloatx80Exp(a); 2626 aSign = extractFloatx80Sign(a); 2627 2628 if (aExp == 0x7FFF) { 2629 if ((uint64_t) (aSig << 1)) { 2630 return propagateFloatx80NaNOneArg(a, status); 2631 } 2632 return packFloatx80(aSign, one_exp, one_sig); 2633 } 2634 2635 if (aExp == 0 && aSig == 0) { 2636 return packFloatx80(aSign, 0, 0); 2637 } 2638 2639 user_rnd_mode = status->float_rounding_mode; 2640 user_rnd_prec = status->floatx80_rounding_precision; 2641 status->float_rounding_mode = float_round_nearest_even; 2642 status->floatx80_rounding_precision = 80; 2643 2644 compact = floatx80_make_compact(aExp, aSig); 2645 2646 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { 2647 /* TANHBORS */ 2648 if (compact < 0x3FFF8000) { 2649 /* TANHSM */ 2650 status->float_rounding_mode = user_rnd_mode; 2651 status->floatx80_rounding_precision = user_rnd_prec; 2652 2653 a = floatx80_move(a, status); 2654 2655 float_raise(float_flag_inexact, status); 2656 2657 return a; 2658 } else { 2659 if (compact > 0x40048AA1) { 2660 /* TANHHUGE */ 2661 sign = 0x3F800000; 2662 sign |= aSign ? 0x80000000 : 0x00000000; 2663 fp0 = float32_to_floatx80(make_float32(sign), status); 2664 sign &= 0x80000000; 2665 sign ^= 0x80800000; /* -SIGN(X)*EPS */ 2666 2667 status->float_rounding_mode = user_rnd_mode; 2668 status->floatx80_rounding_precision = user_rnd_prec; 2669 2670 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), 2671 status), status); 2672 2673 float_raise(float_flag_inexact, status); 2674 2675 return a; 2676 } else { 2677 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2678 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ 2679 fp0 = floatx80_add(fp0, float32_to_floatx80( 2680 make_float32(0x3F800000), 2681 status), status); /* EXP(Y)+1 */ 2682 sign = aSign ? 0x80000000 : 0x00000000; 2683 fp1 = floatx80_div(float32_to_floatx80(make_float32( 2684 sign ^ 0xC0000000), status), fp0, 2685 status); /* -SIGN(X)*2 / [EXP(Y)+1] */ 2686 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), 2687 status); /* SIGN */ 2688 2689 status->float_rounding_mode = user_rnd_mode; 2690 status->floatx80_rounding_precision = user_rnd_prec; 2691 2692 a = floatx80_add(fp1, fp0, status); 2693 2694 float_raise(float_flag_inexact, status); 2695 2696 return a; 2697 } 2698 } 2699 } else { /* 2**(-40) < |X| < (5/2)LOG2 */ 2700 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2701 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2702 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), 2703 status), 2704 status); /* Z+2 */ 2705 2706 vSign = extractFloatx80Sign(fp1); 2707 vExp = extractFloatx80Exp(fp1); 2708 vSig = extractFloatx80Frac(fp1); 2709 2710 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); 2711 2712 status->float_rounding_mode = user_rnd_mode; 2713 status->floatx80_rounding_precision = user_rnd_prec; 2714 2715 a = floatx80_div(fp0, fp1, status); 2716 2717 float_raise(float_flag_inexact, status); 2718 2719 return a; 2720 } 2721 } 2722 2723 /*---------------------------------------------------------------------------- 2724 | Hyperbolic sine 2725 *----------------------------------------------------------------------------*/ 2726 2727 floatx80 floatx80_sinh(floatx80 a, float_status *status) 2728 { 2729 flag aSign; 2730 int32_t aExp; 2731 uint64_t aSig; 2732 2733 int8_t user_rnd_mode, user_rnd_prec; 2734 2735 int32_t compact; 2736 floatx80 fp0, fp1, fp2; 2737 float32 fact; 2738 2739 aSig = extractFloatx80Frac(a); 2740 aExp = extractFloatx80Exp(a); 2741 aSign = extractFloatx80Sign(a); 2742 2743 if (aExp == 0x7FFF) { 2744 if ((uint64_t) (aSig << 1)) { 2745 return propagateFloatx80NaNOneArg(a, status); 2746 } 2747 return packFloatx80(aSign, floatx80_infinity.high, 2748 floatx80_infinity.low); 2749 } 2750 2751 if (aExp == 0 && aSig == 0) { 2752 return packFloatx80(aSign, 0, 0); 2753 } 2754 2755 user_rnd_mode = status->float_rounding_mode; 2756 user_rnd_prec = status->floatx80_rounding_precision; 2757 status->float_rounding_mode = float_round_nearest_even; 2758 status->floatx80_rounding_precision = 80; 2759 2760 compact = floatx80_make_compact(aExp, aSig); 2761 2762 if (compact > 0x400CB167) { 2763 /* SINHBIG */ 2764 if (compact > 0x400CB2B3) { 2765 status->float_rounding_mode = user_rnd_mode; 2766 status->floatx80_rounding_precision = user_rnd_prec; 2767 2768 return roundAndPackFloatx80(status->floatx80_rounding_precision, 2769 aSign, 0x8000, aSig, 0, status); 2770 } else { 2771 fp0 = floatx80_abs(a); /* Y = |X| */ 2772 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2773 make_float64(0x40C62D38D3D64634), status), 2774 status); /* (|X|-16381LOG2_LEAD) */ 2775 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2776 make_float64(0x3D6F90AEB1E75CC7), status), 2777 status); /* |X| - 16381 LOG2, ACCURATE */ 2778 fp0 = floatx80_etox(fp0, status); 2779 fp2 = packFloatx80(aSign, 0x7FFB, one_sig); 2780 2781 status->float_rounding_mode = user_rnd_mode; 2782 status->floatx80_rounding_precision = user_rnd_prec; 2783 2784 a = floatx80_mul(fp0, fp2, status); 2785 2786 float_raise(float_flag_inexact, status); 2787 2788 return a; 2789 } 2790 } else { /* |X| < 16380 LOG2 */ 2791 fp0 = floatx80_abs(a); /* Y = |X| */ 2792 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2793 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 2794 status), status); /* 1+Z */ 2795 fp2 = fp0; 2796 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ 2797 fp0 = floatx80_add(fp0, fp2, status); 2798 2799 fact = packFloat32(aSign, 0x7E, 0); 2800 2801 status->float_rounding_mode = user_rnd_mode; 2802 status->floatx80_rounding_precision = user_rnd_prec; 2803 2804 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); 2805 2806 float_raise(float_flag_inexact, status); 2807 2808 return a; 2809 } 2810 } 2811 2812 /*---------------------------------------------------------------------------- 2813 | Hyperbolic cosine 2814 *----------------------------------------------------------------------------*/ 2815 2816 floatx80 floatx80_cosh(floatx80 a, float_status *status) 2817 { 2818 int32_t aExp; 2819 uint64_t aSig; 2820 2821 int8_t user_rnd_mode, user_rnd_prec; 2822 2823 int32_t compact; 2824 floatx80 fp0, fp1; 2825 2826 aSig = extractFloatx80Frac(a); 2827 aExp = extractFloatx80Exp(a); 2828 2829 if (aExp == 0x7FFF) { 2830 if ((uint64_t) (aSig << 1)) { 2831 return propagateFloatx80NaNOneArg(a, status); 2832 } 2833 return packFloatx80(0, floatx80_infinity.high, 2834 floatx80_infinity.low); 2835 } 2836 2837 if (aExp == 0 && aSig == 0) { 2838 return packFloatx80(0, one_exp, one_sig); 2839 } 2840 2841 user_rnd_mode = status->float_rounding_mode; 2842 user_rnd_prec = status->floatx80_rounding_precision; 2843 status->float_rounding_mode = float_round_nearest_even; 2844 status->floatx80_rounding_precision = 80; 2845 2846 compact = floatx80_make_compact(aExp, aSig); 2847 2848 if (compact > 0x400CB167) { 2849 if (compact > 0x400CB2B3) { 2850 status->float_rounding_mode = user_rnd_mode; 2851 status->floatx80_rounding_precision = user_rnd_prec; 2852 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2853 0x8000, one_sig, 0, status); 2854 } else { 2855 fp0 = packFloatx80(0, aExp, aSig); 2856 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2857 make_float64(0x40C62D38D3D64634), status), 2858 status); 2859 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2860 make_float64(0x3D6F90AEB1E75CC7), status), 2861 status); 2862 fp0 = floatx80_etox(fp0, status); 2863 fp1 = packFloatx80(0, 0x7FFB, one_sig); 2864 2865 status->float_rounding_mode = user_rnd_mode; 2866 status->floatx80_rounding_precision = user_rnd_prec; 2867 2868 a = floatx80_mul(fp0, fp1, status); 2869 2870 float_raise(float_flag_inexact, status); 2871 2872 return a; 2873 } 2874 } 2875 2876 fp0 = packFloatx80(0, aExp, aSig); /* |X| */ 2877 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ 2878 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), 2879 status), status); /* (1/2)*EXP(|X|) */ 2880 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ 2881 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ 2882 2883 status->float_rounding_mode = user_rnd_mode; 2884 status->floatx80_rounding_precision = user_rnd_prec; 2885 2886 a = floatx80_add(fp0, fp1, status); 2887 2888 float_raise(float_flag_inexact, status); 2889 2890 return a; 2891 } 2892