1 /* 2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, 3 * derived from NetBSD M68040 FPSP functions, 4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic 5 * Package. Those parts of the code (and some later contributions) are 6 * provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file will be taken to be licensed under 14 * the Softfloat-2a license unless specifically indicated otherwise. 15 */ 16 17 /* 18 * Portions of this work are licensed under the terms of the GNU GPL, 19 * version 2 or later. See the COPYING file in the top-level directory. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "softfloat.h" 24 #include "fpu/softfloat-macros.h" 25 #include "softfloat_fpsp_tables.h" 26 27 #define pi_exp 0x4000 28 #define piby2_exp 0x3FFF 29 #define pi_sig LIT64(0xc90fdaa22168c235) 30 31 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) 32 { 33 if (floatx80_is_signaling_nan(a, status)) { 34 float_raise(float_flag_invalid, status); 35 a = floatx80_silence_nan(a, status); 36 } 37 38 if (status->default_nan_mode) { 39 return floatx80_default_nan(status); 40 } 41 42 return a; 43 } 44 45 /* 46 * Returns the modulo remainder of the extended double-precision floating-point 47 * value `a' with respect to the corresponding value `b'. 48 */ 49 50 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 51 { 52 flag aSign, zSign; 53 int32_t aExp, bExp, expDiff; 54 uint64_t aSig0, aSig1, bSig; 55 uint64_t qTemp, term0, term1; 56 57 aSig0 = extractFloatx80Frac(a); 58 aExp = extractFloatx80Exp(a); 59 aSign = extractFloatx80Sign(a); 60 bSig = extractFloatx80Frac(b); 61 bExp = extractFloatx80Exp(b); 62 63 if (aExp == 0x7FFF) { 64 if ((uint64_t) (aSig0 << 1) 65 || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { 66 return propagateFloatx80NaN(a, b, status); 67 } 68 goto invalid; 69 } 70 if (bExp == 0x7FFF) { 71 if ((uint64_t) (bSig << 1)) { 72 return propagateFloatx80NaN(a, b, status); 73 } 74 return a; 75 } 76 if (bExp == 0) { 77 if (bSig == 0) { 78 invalid: 79 float_raise(float_flag_invalid, status); 80 return floatx80_default_nan(status); 81 } 82 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 83 } 84 if (aExp == 0) { 85 if ((uint64_t) (aSig0 << 1) == 0) { 86 return a; 87 } 88 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 89 } 90 bSig |= LIT64(0x8000000000000000); 91 zSign = aSign; 92 expDiff = aExp - bExp; 93 aSig1 = 0; 94 if (expDiff < 0) { 95 return a; 96 } 97 qTemp = (bSig <= aSig0); 98 if (qTemp) { 99 aSig0 -= bSig; 100 } 101 expDiff -= 64; 102 while (0 < expDiff) { 103 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 104 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 105 mul64To128(bSig, qTemp, &term0, &term1); 106 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 107 shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); 108 expDiff -= 62; 109 } 110 expDiff += 64; 111 if (0 < expDiff) { 112 qTemp = estimateDiv128To64(aSig0, aSig1, bSig); 113 qTemp = (2 < qTemp) ? qTemp - 2 : 0; 114 qTemp >>= 64 - expDiff; 115 mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); 116 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 117 shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); 118 while (le128(term0, term1, aSig0, aSig1)) { 119 ++qTemp; 120 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); 121 } 122 } 123 return 124 normalizeRoundAndPackFloatx80( 125 80, zSign, bExp + expDiff, aSig0, aSig1, status); 126 } 127 128 /* 129 * Returns the mantissa of the extended double-precision floating-point 130 * value `a'. 131 */ 132 133 floatx80 floatx80_getman(floatx80 a, float_status *status) 134 { 135 flag aSign; 136 int32_t aExp; 137 uint64_t aSig; 138 139 aSig = extractFloatx80Frac(a); 140 aExp = extractFloatx80Exp(a); 141 aSign = extractFloatx80Sign(a); 142 143 if (aExp == 0x7FFF) { 144 if ((uint64_t) (aSig << 1)) { 145 return propagateFloatx80NaNOneArg(a , status); 146 } 147 float_raise(float_flag_invalid , status); 148 return floatx80_default_nan(status); 149 } 150 151 if (aExp == 0) { 152 if (aSig == 0) { 153 return packFloatx80(aSign, 0, 0); 154 } 155 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 156 } 157 158 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 159 0x3FFF, aSig, 0, status); 160 } 161 162 /* 163 * Returns the exponent of the extended double-precision floating-point 164 * value `a' as an extended double-precision value. 165 */ 166 167 floatx80 floatx80_getexp(floatx80 a, float_status *status) 168 { 169 flag aSign; 170 int32_t aExp; 171 uint64_t aSig; 172 173 aSig = extractFloatx80Frac(a); 174 aExp = extractFloatx80Exp(a); 175 aSign = extractFloatx80Sign(a); 176 177 if (aExp == 0x7FFF) { 178 if ((uint64_t) (aSig << 1)) { 179 return propagateFloatx80NaNOneArg(a , status); 180 } 181 float_raise(float_flag_invalid , status); 182 return floatx80_default_nan(status); 183 } 184 185 if (aExp == 0) { 186 if (aSig == 0) { 187 return packFloatx80(aSign, 0, 0); 188 } 189 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 190 } 191 192 return int32_to_floatx80(aExp - 0x3FFF, status); 193 } 194 195 /* 196 * Scales extended double-precision floating-point value in operand `a' by 197 * value `b'. The function truncates the value in the second operand 'b' to 198 * an integral value and adds that value to the exponent of the operand 'a'. 199 * The operation performed according to the IEC/IEEE Standard for Binary 200 * Floating-Point Arithmetic. 201 */ 202 203 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) 204 { 205 flag aSign, bSign; 206 int32_t aExp, bExp, shiftCount; 207 uint64_t aSig, bSig; 208 209 aSig = extractFloatx80Frac(a); 210 aExp = extractFloatx80Exp(a); 211 aSign = extractFloatx80Sign(a); 212 bSig = extractFloatx80Frac(b); 213 bExp = extractFloatx80Exp(b); 214 bSign = extractFloatx80Sign(b); 215 216 if (bExp == 0x7FFF) { 217 if ((uint64_t) (bSig << 1) || 218 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { 219 return propagateFloatx80NaN(a, b, status); 220 } 221 float_raise(float_flag_invalid , status); 222 return floatx80_default_nan(status); 223 } 224 if (aExp == 0x7FFF) { 225 if ((uint64_t) (aSig << 1)) { 226 return propagateFloatx80NaN(a, b, status); 227 } 228 return packFloatx80(aSign, floatx80_infinity.high, 229 floatx80_infinity.low); 230 } 231 if (aExp == 0) { 232 if (aSig == 0) { 233 return packFloatx80(aSign, 0, 0); 234 } 235 if (bExp < 0x3FFF) { 236 return a; 237 } 238 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 239 } 240 241 if (bExp < 0x3FFF) { 242 return a; 243 } 244 245 if (0x400F < bExp) { 246 aExp = bSign ? -0x6001 : 0xE000; 247 return roundAndPackFloatx80(status->floatx80_rounding_precision, 248 aSign, aExp, aSig, 0, status); 249 } 250 251 shiftCount = 0x403E - bExp; 252 bSig >>= shiftCount; 253 aExp = bSign ? (aExp - bSig) : (aExp + bSig); 254 255 return roundAndPackFloatx80(status->floatx80_rounding_precision, 256 aSign, aExp, aSig, 0, status); 257 } 258 259 floatx80 floatx80_move(floatx80 a, float_status *status) 260 { 261 flag aSign; 262 int32_t aExp; 263 uint64_t aSig; 264 265 aSig = extractFloatx80Frac(a); 266 aExp = extractFloatx80Exp(a); 267 aSign = extractFloatx80Sign(a); 268 269 if (aExp == 0x7FFF) { 270 if ((uint64_t)(aSig << 1)) { 271 return propagateFloatx80NaNOneArg(a, status); 272 } 273 return a; 274 } 275 if (aExp == 0) { 276 if (aSig == 0) { 277 return a; 278 } 279 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 280 aSign, aExp, aSig, 0, status); 281 } 282 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 283 aExp, aSig, 0, status); 284 } 285 286 /* 287 * Algorithms for transcendental functions supported by MC68881 and MC68882 288 * mathematical coprocessors. The functions are derived from FPSP library. 289 */ 290 291 #define one_exp 0x3FFF 292 #define one_sig LIT64(0x8000000000000000) 293 294 /* 295 * Function for compactifying extended double-precision floating point values. 296 */ 297 298 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) 299 { 300 return (aExp << 16) | (aSig >> 48); 301 } 302 303 /* 304 * Log base e of x plus 1 305 */ 306 307 floatx80 floatx80_lognp1(floatx80 a, float_status *status) 308 { 309 flag aSign; 310 int32_t aExp; 311 uint64_t aSig, fSig; 312 313 int8_t user_rnd_mode, user_rnd_prec; 314 315 int32_t compact, j, k; 316 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 317 318 aSig = extractFloatx80Frac(a); 319 aExp = extractFloatx80Exp(a); 320 aSign = extractFloatx80Sign(a); 321 322 if (aExp == 0x7FFF) { 323 if ((uint64_t) (aSig << 1)) { 324 propagateFloatx80NaNOneArg(a, status); 325 } 326 if (aSign) { 327 float_raise(float_flag_invalid, status); 328 return floatx80_default_nan(status); 329 } 330 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); 331 } 332 333 if (aExp == 0 && aSig == 0) { 334 return packFloatx80(aSign, 0, 0); 335 } 336 337 if (aSign && aExp >= one_exp) { 338 if (aExp == one_exp && aSig == one_sig) { 339 float_raise(float_flag_divbyzero, status); 340 return packFloatx80(aSign, floatx80_infinity.high, 341 floatx80_infinity.low); 342 } 343 float_raise(float_flag_invalid, status); 344 return floatx80_default_nan(status); 345 } 346 347 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { 348 /* <= min threshold */ 349 float_raise(float_flag_inexact, status); 350 return floatx80_move(a, status); 351 } 352 353 user_rnd_mode = status->float_rounding_mode; 354 user_rnd_prec = status->floatx80_rounding_precision; 355 status->float_rounding_mode = float_round_nearest_even; 356 status->floatx80_rounding_precision = 80; 357 358 compact = floatx80_make_compact(aExp, aSig); 359 360 fp0 = a; /* Z */ 361 fp1 = a; 362 363 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 364 status), status); /* X = (1+Z) */ 365 366 aExp = extractFloatx80Exp(fp0); 367 aSig = extractFloatx80Frac(fp0); 368 369 compact = floatx80_make_compact(aExp, aSig); 370 371 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { 372 /* |X| < 1/2 or |X| > 3/2 */ 373 k = aExp - 0x3FFF; 374 fp1 = int32_to_floatx80(k, status); 375 376 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 377 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 378 379 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 380 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 381 382 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 383 384 lp1cont1: 385 /* LP1CONT1 */ 386 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 387 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 388 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 389 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 390 391 fp3 = fp2; 392 fp1 = fp2; 393 394 fp1 = floatx80_mul(fp1, float64_to_floatx80( 395 make_float64(0x3FC2499AB5E4040B), status), 396 status); /* V*A6 */ 397 fp2 = floatx80_mul(fp2, float64_to_floatx80( 398 make_float64(0xBFC555B5848CB7DB), status), 399 status); /* V*A5 */ 400 fp1 = floatx80_add(fp1, float64_to_floatx80( 401 make_float64(0x3FC99999987D8730), status), 402 status); /* A4+V*A6 */ 403 fp2 = floatx80_add(fp2, float64_to_floatx80( 404 make_float64(0xBFCFFFFFFF6F7E97), status), 405 status); /* A3+V*A5 */ 406 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 407 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 408 fp1 = floatx80_add(fp1, float64_to_floatx80( 409 make_float64(0x3FD55555555555A4), status), 410 status); /* A2+V*(A4+V*A6) */ 411 fp2 = floatx80_add(fp2, float64_to_floatx80( 412 make_float64(0xBFE0000000000008), status), 413 status); /* A1+V*(A3+V*A5) */ 414 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 415 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 416 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 417 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 418 419 fp1 = floatx80_add(fp1, log_tbl[j + 1], 420 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 421 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 422 423 status->float_rounding_mode = user_rnd_mode; 424 status->floatx80_rounding_precision = user_rnd_prec; 425 426 a = floatx80_add(fp0, klog2, status); 427 428 float_raise(float_flag_inexact, status); 429 430 return a; 431 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 432 /* |X| < 1/16 or |X| > -1/16 */ 433 /* LP1CARE */ 434 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 435 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 436 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 437 438 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ 439 /* KISZERO */ 440 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), 441 status), f, status); /* 1-F */ 442 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ 443 fp1 = packFloatx80(0, 0, 0); /* K = 0 */ 444 } else { 445 /* KISNEG */ 446 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), 447 status), f, status); /* 2-F */ 448 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ 449 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ 450 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ 451 } 452 goto lp1cont1; 453 } else { 454 /* LP1ONE16 */ 455 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ 456 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 457 status), status); /* FP0 IS 1+X */ 458 459 /* LP1CONT2 */ 460 fp1 = floatx80_div(fp1, fp0, status); /* U */ 461 saveu = fp1; 462 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 463 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 464 465 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 466 status); /* B5 */ 467 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 468 status); /* B4 */ 469 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 470 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 471 fp3 = floatx80_add(fp3, float64_to_floatx80( 472 make_float64(0x3F624924928BCCFF), status), 473 status); /* B3+W*B5 */ 474 fp2 = floatx80_add(fp2, float64_to_floatx80( 475 make_float64(0x3F899999999995EC), status), 476 status); /* B2+W*B4 */ 477 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 478 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 479 fp1 = floatx80_add(fp1, float64_to_floatx80( 480 make_float64(0x3FB5555555555555), status), 481 status); /* B1+W*(B3+W*B5) */ 482 483 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 484 fp1 = floatx80_add(fp1, fp2, 485 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 486 fp0 = floatx80_mul(fp0, fp1, 487 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 488 489 status->float_rounding_mode = user_rnd_mode; 490 status->floatx80_rounding_precision = user_rnd_prec; 491 492 a = floatx80_add(fp0, saveu, status); 493 494 /*if (!floatx80_is_zero(a)) { */ 495 float_raise(float_flag_inexact, status); 496 /*} */ 497 498 return a; 499 } 500 } 501 502 /* 503 * Log base e 504 */ 505 506 floatx80 floatx80_logn(floatx80 a, float_status *status) 507 { 508 flag aSign; 509 int32_t aExp; 510 uint64_t aSig, fSig; 511 512 int8_t user_rnd_mode, user_rnd_prec; 513 514 int32_t compact, j, k, adjk; 515 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; 516 517 aSig = extractFloatx80Frac(a); 518 aExp = extractFloatx80Exp(a); 519 aSign = extractFloatx80Sign(a); 520 521 if (aExp == 0x7FFF) { 522 if ((uint64_t) (aSig << 1)) { 523 propagateFloatx80NaNOneArg(a, status); 524 } 525 if (aSign == 0) { 526 return packFloatx80(0, floatx80_infinity.high, 527 floatx80_infinity.low); 528 } 529 } 530 531 adjk = 0; 532 533 if (aExp == 0) { 534 if (aSig == 0) { /* zero */ 535 float_raise(float_flag_divbyzero, status); 536 return packFloatx80(1, floatx80_infinity.high, 537 floatx80_infinity.low); 538 } 539 if ((aSig & one_sig) == 0) { /* denormal */ 540 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 541 adjk = -100; 542 aExp += 100; 543 a = packFloatx80(aSign, aExp, aSig); 544 } 545 } 546 547 if (aSign) { 548 float_raise(float_flag_invalid, status); 549 return floatx80_default_nan(status); 550 } 551 552 user_rnd_mode = status->float_rounding_mode; 553 user_rnd_prec = status->floatx80_rounding_precision; 554 status->float_rounding_mode = float_round_nearest_even; 555 status->floatx80_rounding_precision = 80; 556 557 compact = floatx80_make_compact(aExp, aSig); 558 559 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { 560 /* |X| < 15/16 or |X| > 17/16 */ 561 k = aExp - 0x3FFF; 562 k += adjk; 563 fp1 = int32_to_floatx80(k, status); 564 565 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); 566 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ 567 568 f = packFloatx80(0, 0x3FFF, fSig); /* F */ 569 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ 570 571 fp0 = floatx80_sub(fp0, f, status); /* Y-F */ 572 573 /* LP1CONT1 */ 574 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ 575 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); 576 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ 577 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ 578 579 fp3 = fp2; 580 fp1 = fp2; 581 582 fp1 = floatx80_mul(fp1, float64_to_floatx80( 583 make_float64(0x3FC2499AB5E4040B), status), 584 status); /* V*A6 */ 585 fp2 = floatx80_mul(fp2, float64_to_floatx80( 586 make_float64(0xBFC555B5848CB7DB), status), 587 status); /* V*A5 */ 588 fp1 = floatx80_add(fp1, float64_to_floatx80( 589 make_float64(0x3FC99999987D8730), status), 590 status); /* A4+V*A6 */ 591 fp2 = floatx80_add(fp2, float64_to_floatx80( 592 make_float64(0xBFCFFFFFFF6F7E97), status), 593 status); /* A3+V*A5 */ 594 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ 595 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ 596 fp1 = floatx80_add(fp1, float64_to_floatx80( 597 make_float64(0x3FD55555555555A4), status), 598 status); /* A2+V*(A4+V*A6) */ 599 fp2 = floatx80_add(fp2, float64_to_floatx80( 600 make_float64(0xBFE0000000000008), status), 601 status); /* A1+V*(A3+V*A5) */ 602 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ 603 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ 604 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ 605 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ 606 607 fp1 = floatx80_add(fp1, log_tbl[j + 1], 608 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ 609 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ 610 611 status->float_rounding_mode = user_rnd_mode; 612 status->floatx80_rounding_precision = user_rnd_prec; 613 614 a = floatx80_add(fp0, klog2, status); 615 616 float_raise(float_flag_inexact, status); 617 618 return a; 619 } else { /* |X-1| >= 1/16 */ 620 fp0 = a; 621 fp1 = a; 622 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), 623 status), status); /* FP1 IS X-1 */ 624 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 625 status), status); /* FP0 IS X+1 */ 626 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ 627 628 /* LP1CONT2 */ 629 fp1 = floatx80_div(fp1, fp0, status); /* U */ 630 saveu = fp1; 631 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ 632 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ 633 634 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), 635 status); /* B5 */ 636 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), 637 status); /* B4 */ 638 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ 639 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ 640 fp3 = floatx80_add(fp3, float64_to_floatx80( 641 make_float64(0x3F624924928BCCFF), status), 642 status); /* B3+W*B5 */ 643 fp2 = floatx80_add(fp2, float64_to_floatx80( 644 make_float64(0x3F899999999995EC), status), 645 status); /* B2+W*B4 */ 646 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ 647 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ 648 fp1 = floatx80_add(fp1, float64_to_floatx80( 649 make_float64(0x3FB5555555555555), status), 650 status); /* B1+W*(B3+W*B5) */ 651 652 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ 653 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ 654 fp0 = floatx80_mul(fp0, fp1, 655 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ 656 657 status->float_rounding_mode = user_rnd_mode; 658 status->floatx80_rounding_precision = user_rnd_prec; 659 660 a = floatx80_add(fp0, saveu, status); 661 662 /*if (!floatx80_is_zero(a)) { */ 663 float_raise(float_flag_inexact, status); 664 /*} */ 665 666 return a; 667 } 668 } 669 670 /* 671 * Log base 10 672 */ 673 674 floatx80 floatx80_log10(floatx80 a, float_status *status) 675 { 676 flag aSign; 677 int32_t aExp; 678 uint64_t aSig; 679 680 int8_t user_rnd_mode, user_rnd_prec; 681 682 floatx80 fp0, fp1; 683 684 aSig = extractFloatx80Frac(a); 685 aExp = extractFloatx80Exp(a); 686 aSign = extractFloatx80Sign(a); 687 688 if (aExp == 0x7FFF) { 689 if ((uint64_t) (aSig << 1)) { 690 propagateFloatx80NaNOneArg(a, status); 691 } 692 if (aSign == 0) { 693 return packFloatx80(0, floatx80_infinity.high, 694 floatx80_infinity.low); 695 } 696 } 697 698 if (aExp == 0 && aSig == 0) { 699 float_raise(float_flag_divbyzero, status); 700 return packFloatx80(1, floatx80_infinity.high, 701 floatx80_infinity.low); 702 } 703 704 if (aSign) { 705 float_raise(float_flag_invalid, status); 706 return floatx80_default_nan(status); 707 } 708 709 user_rnd_mode = status->float_rounding_mode; 710 user_rnd_prec = status->floatx80_rounding_precision; 711 status->float_rounding_mode = float_round_nearest_even; 712 status->floatx80_rounding_precision = 80; 713 714 fp0 = floatx80_logn(a, status); 715 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */ 716 717 status->float_rounding_mode = user_rnd_mode; 718 status->floatx80_rounding_precision = user_rnd_prec; 719 720 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ 721 722 float_raise(float_flag_inexact, status); 723 724 return a; 725 } 726 727 /* 728 * Log base 2 729 */ 730 731 floatx80 floatx80_log2(floatx80 a, float_status *status) 732 { 733 flag aSign; 734 int32_t aExp; 735 uint64_t aSig; 736 737 int8_t user_rnd_mode, user_rnd_prec; 738 739 floatx80 fp0, fp1; 740 741 aSig = extractFloatx80Frac(a); 742 aExp = extractFloatx80Exp(a); 743 aSign = extractFloatx80Sign(a); 744 745 if (aExp == 0x7FFF) { 746 if ((uint64_t) (aSig << 1)) { 747 propagateFloatx80NaNOneArg(a, status); 748 } 749 if (aSign == 0) { 750 return packFloatx80(0, floatx80_infinity.high, 751 floatx80_infinity.low); 752 } 753 } 754 755 if (aExp == 0) { 756 if (aSig == 0) { 757 float_raise(float_flag_divbyzero, status); 758 return packFloatx80(1, floatx80_infinity.high, 759 floatx80_infinity.low); 760 } 761 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 762 } 763 764 if (aSign) { 765 float_raise(float_flag_invalid, status); 766 return floatx80_default_nan(status); 767 } 768 769 user_rnd_mode = status->float_rounding_mode; 770 user_rnd_prec = status->floatx80_rounding_precision; 771 status->float_rounding_mode = float_round_nearest_even; 772 status->floatx80_rounding_precision = 80; 773 774 if (aSig == one_sig) { /* X is 2^k */ 775 status->float_rounding_mode = user_rnd_mode; 776 status->floatx80_rounding_precision = user_rnd_prec; 777 778 a = int32_to_floatx80(aExp - 0x3FFF, status); 779 } else { 780 fp0 = floatx80_logn(a, status); 781 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */ 782 783 status->float_rounding_mode = user_rnd_mode; 784 status->floatx80_rounding_precision = user_rnd_prec; 785 786 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ 787 } 788 789 float_raise(float_flag_inexact, status); 790 791 return a; 792 } 793 794 /* 795 * e to x 796 */ 797 798 floatx80 floatx80_etox(floatx80 a, float_status *status) 799 { 800 flag aSign; 801 int32_t aExp; 802 uint64_t aSig; 803 804 int8_t user_rnd_mode, user_rnd_prec; 805 806 int32_t compact, n, j, k, m, m1; 807 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; 808 flag adjflag; 809 810 aSig = extractFloatx80Frac(a); 811 aExp = extractFloatx80Exp(a); 812 aSign = extractFloatx80Sign(a); 813 814 if (aExp == 0x7FFF) { 815 if ((uint64_t) (aSig << 1)) { 816 return propagateFloatx80NaNOneArg(a, status); 817 } 818 if (aSign) { 819 return packFloatx80(0, 0, 0); 820 } 821 return packFloatx80(0, floatx80_infinity.high, 822 floatx80_infinity.low); 823 } 824 825 if (aExp == 0 && aSig == 0) { 826 return packFloatx80(0, one_exp, one_sig); 827 } 828 829 user_rnd_mode = status->float_rounding_mode; 830 user_rnd_prec = status->floatx80_rounding_precision; 831 status->float_rounding_mode = float_round_nearest_even; 832 status->floatx80_rounding_precision = 80; 833 834 adjflag = 0; 835 836 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ 837 compact = floatx80_make_compact(aExp, aSig); 838 839 if (compact < 0x400CB167) { /* |X| < 16380 log2 */ 840 fp0 = a; 841 fp1 = a; 842 fp0 = floatx80_mul(fp0, float32_to_floatx80( 843 make_float32(0x42B8AA3B), status), 844 status); /* 64/log2 * X */ 845 adjflag = 0; 846 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 847 fp0 = int32_to_floatx80(n, status); 848 849 j = n & 0x3F; /* J = N mod 64 */ 850 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 851 if (n < 0 && j) { 852 /* 853 * arithmetic right shift is division and 854 * round towards minus infinity 855 */ 856 m--; 857 } 858 m += 0x3FFF; /* biased exponent of 2^(M) */ 859 860 expcont1: 861 fp2 = fp0; /* N */ 862 fp0 = floatx80_mul(fp0, float32_to_floatx80( 863 make_float32(0xBC317218), status), 864 status); /* N * L1, L1 = lead(-log2/64) */ 865 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 866 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 867 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 868 fp0 = floatx80_add(fp0, fp2, status); /* R */ 869 870 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 871 fp2 = float32_to_floatx80(make_float32(0x3AB60B70), 872 status); /* A5 */ 873 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ 874 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), 875 status), fp1, 876 status); /* fp3 is S*A4 */ 877 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( 878 0x3FA5555555554431), status), 879 status); /* fp2 is A3+S*A5 */ 880 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( 881 0x3FC5555555554018), status), 882 status); /* fp3 is A2+S*A4 */ 883 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ 884 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ 885 fp2 = floatx80_add(fp2, float32_to_floatx80( 886 make_float32(0x3F000000), status), 887 status); /* fp2 is A1+S*(A3+S*A5) */ 888 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ 889 fp2 = floatx80_mul(fp2, fp1, 890 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ 891 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ 892 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 893 894 fp1 = exp_tbl[j]; 895 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ 896 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), 897 status); /* accurate 2^(J/64) */ 898 fp0 = floatx80_add(fp0, fp1, 899 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ 900 901 scale = packFloatx80(0, m, one_sig); 902 if (adjflag) { 903 adjscale = packFloatx80(0, m1, one_sig); 904 fp0 = floatx80_mul(fp0, adjscale, status); 905 } 906 907 status->float_rounding_mode = user_rnd_mode; 908 status->floatx80_rounding_precision = user_rnd_prec; 909 910 a = floatx80_mul(fp0, scale, status); 911 912 float_raise(float_flag_inexact, status); 913 914 return a; 915 } else { /* |X| >= 16380 log2 */ 916 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ 917 status->float_rounding_mode = user_rnd_mode; 918 status->floatx80_rounding_precision = user_rnd_prec; 919 if (aSign) { 920 a = roundAndPackFloatx80( 921 status->floatx80_rounding_precision, 922 0, -0x1000, aSig, 0, status); 923 } else { 924 a = roundAndPackFloatx80( 925 status->floatx80_rounding_precision, 926 0, 0x8000, aSig, 0, status); 927 } 928 float_raise(float_flag_inexact, status); 929 930 return a; 931 } else { 932 fp0 = a; 933 fp1 = a; 934 fp0 = floatx80_mul(fp0, float32_to_floatx80( 935 make_float32(0x42B8AA3B), status), 936 status); /* 64/log2 * X */ 937 adjflag = 1; 938 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 939 fp0 = int32_to_floatx80(n, status); 940 941 j = n & 0x3F; /* J = N mod 64 */ 942 /* NOTE: this is really arithmetic right shift by 6 */ 943 k = n / 64; 944 if (n < 0 && j) { 945 /* arithmetic right shift is division and 946 * round towards minus infinity 947 */ 948 k--; 949 } 950 /* NOTE: this is really arithmetic right shift by 1 */ 951 m1 = k / 2; 952 if (k < 0 && (k & 1)) { 953 /* arithmetic right shift is division and 954 * round towards minus infinity 955 */ 956 m1--; 957 } 958 m = k - m1; 959 m1 += 0x3FFF; /* biased exponent of 2^(M1) */ 960 m += 0x3FFF; /* biased exponent of 2^(M) */ 961 962 goto expcont1; 963 } 964 } 965 } else { /* |X| < 2^(-65) */ 966 status->float_rounding_mode = user_rnd_mode; 967 status->floatx80_rounding_precision = user_rnd_prec; 968 969 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), 970 status), status); /* 1 + X */ 971 972 float_raise(float_flag_inexact, status); 973 974 return a; 975 } 976 } 977 978 /* 979 * 2 to x 980 */ 981 982 floatx80 floatx80_twotox(floatx80 a, float_status *status) 983 { 984 flag aSign; 985 int32_t aExp; 986 uint64_t aSig; 987 988 int8_t user_rnd_mode, user_rnd_prec; 989 990 int32_t compact, n, j, l, m, m1; 991 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 992 993 aSig = extractFloatx80Frac(a); 994 aExp = extractFloatx80Exp(a); 995 aSign = extractFloatx80Sign(a); 996 997 if (aExp == 0x7FFF) { 998 if ((uint64_t) (aSig << 1)) { 999 return propagateFloatx80NaNOneArg(a, status); 1000 } 1001 if (aSign) { 1002 return packFloatx80(0, 0, 0); 1003 } 1004 return packFloatx80(0, floatx80_infinity.high, 1005 floatx80_infinity.low); 1006 } 1007 1008 if (aExp == 0 && aSig == 0) { 1009 return packFloatx80(0, one_exp, one_sig); 1010 } 1011 1012 user_rnd_mode = status->float_rounding_mode; 1013 user_rnd_prec = status->floatx80_rounding_precision; 1014 status->float_rounding_mode = float_round_nearest_even; 1015 status->floatx80_rounding_precision = 80; 1016 1017 fp0 = a; 1018 1019 compact = floatx80_make_compact(aExp, aSig); 1020 1021 if (compact < 0x3FB98000 || compact > 0x400D80C0) { 1022 /* |X| > 16480 or |X| < 2^(-70) */ 1023 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1024 status->float_rounding_mode = user_rnd_mode; 1025 status->floatx80_rounding_precision = user_rnd_prec; 1026 1027 if (aSign) { 1028 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1029 0, -0x1000, aSig, 0, status); 1030 } else { 1031 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1032 0, 0x8000, aSig, 0, status); 1033 } 1034 } else { /* |X| < 2^(-70) */ 1035 status->float_rounding_mode = user_rnd_mode; 1036 status->floatx80_rounding_precision = user_rnd_prec; 1037 1038 a = floatx80_add(fp0, float32_to_floatx80( 1039 make_float32(0x3F800000), status), 1040 status); /* 1 + X */ 1041 1042 float_raise(float_flag_inexact, status); 1043 1044 return a; 1045 } 1046 } else { /* 2^(-70) <= |X| <= 16480 */ 1047 fp1 = fp0; /* X */ 1048 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1049 make_float32(0x42800000), status), 1050 status); /* X * 64 */ 1051 n = floatx80_to_int32(fp1, status); 1052 fp1 = int32_to_floatx80(n, status); 1053 j = n & 0x3F; 1054 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1055 if (n < 0 && j) { 1056 /* 1057 * arithmetic right shift is division and 1058 * round towards minus infinity 1059 */ 1060 l--; 1061 } 1062 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1063 if (l < 0 && (l & 1)) { 1064 /* 1065 * arithmetic right shift is division and 1066 * round towards minus infinity 1067 */ 1068 m--; 1069 } 1070 m1 = l - m; 1071 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1072 1073 adjfact = packFloatx80(0, m1, one_sig); 1074 fact1 = exp2_tbl[j]; 1075 fact1.high += m; 1076 fact2.high = exp2_tbl2[j] >> 16; 1077 fact2.high += m; 1078 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1079 fact2.low <<= 48; 1080 1081 fp1 = floatx80_mul(fp1, float32_to_floatx80( 1082 make_float32(0x3C800000), status), 1083 status); /* (1/64)*N */ 1084 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ 1085 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */ 1086 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1087 1088 /* EXPR */ 1089 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1090 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1091 status); /* A5 */ 1092 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1093 status); /* A4 */ 1094 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1095 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1096 fp2 = floatx80_add(fp2, float64_to_floatx80( 1097 make_float64(0x3FA5555555554CC1), status), 1098 status); /* A3+S*A5 */ 1099 fp3 = floatx80_add(fp3, float64_to_floatx80( 1100 make_float64(0x3FC5555555554A54), status), 1101 status); /* A2+S*A4 */ 1102 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1103 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1104 fp2 = floatx80_add(fp2, float64_to_floatx80( 1105 make_float64(0x3FE0000000000000), status), 1106 status); /* A1+S*(A3+S*A5) */ 1107 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1108 1109 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1110 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1111 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1112 1113 fp0 = floatx80_mul(fp0, fact1, status); 1114 fp0 = floatx80_add(fp0, fact2, status); 1115 fp0 = floatx80_add(fp0, fact1, status); 1116 1117 status->float_rounding_mode = user_rnd_mode; 1118 status->floatx80_rounding_precision = user_rnd_prec; 1119 1120 a = floatx80_mul(fp0, adjfact, status); 1121 1122 float_raise(float_flag_inexact, status); 1123 1124 return a; 1125 } 1126 } 1127 1128 /* 1129 * 10 to x 1130 */ 1131 1132 floatx80 floatx80_tentox(floatx80 a, float_status *status) 1133 { 1134 flag aSign; 1135 int32_t aExp; 1136 uint64_t aSig; 1137 1138 int8_t user_rnd_mode, user_rnd_prec; 1139 1140 int32_t compact, n, j, l, m, m1; 1141 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; 1142 1143 aSig = extractFloatx80Frac(a); 1144 aExp = extractFloatx80Exp(a); 1145 aSign = extractFloatx80Sign(a); 1146 1147 if (aExp == 0x7FFF) { 1148 if ((uint64_t) (aSig << 1)) { 1149 return propagateFloatx80NaNOneArg(a, status); 1150 } 1151 if (aSign) { 1152 return packFloatx80(0, 0, 0); 1153 } 1154 return packFloatx80(0, floatx80_infinity.high, 1155 floatx80_infinity.low); 1156 } 1157 1158 if (aExp == 0 && aSig == 0) { 1159 return packFloatx80(0, one_exp, one_sig); 1160 } 1161 1162 user_rnd_mode = status->float_rounding_mode; 1163 user_rnd_prec = status->floatx80_rounding_precision; 1164 status->float_rounding_mode = float_round_nearest_even; 1165 status->floatx80_rounding_precision = 80; 1166 1167 fp0 = a; 1168 1169 compact = floatx80_make_compact(aExp, aSig); 1170 1171 if (compact < 0x3FB98000 || compact > 0x400B9B07) { 1172 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ 1173 if (compact > 0x3FFF8000) { /* |X| > 16480 */ 1174 status->float_rounding_mode = user_rnd_mode; 1175 status->floatx80_rounding_precision = user_rnd_prec; 1176 1177 if (aSign) { 1178 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1179 0, -0x1000, aSig, 0, status); 1180 } else { 1181 return roundAndPackFloatx80(status->floatx80_rounding_precision, 1182 0, 0x8000, aSig, 0, status); 1183 } 1184 } else { /* |X| < 2^(-70) */ 1185 status->float_rounding_mode = user_rnd_mode; 1186 status->floatx80_rounding_precision = user_rnd_prec; 1187 1188 a = floatx80_add(fp0, float32_to_floatx80( 1189 make_float32(0x3F800000), status), 1190 status); /* 1 + X */ 1191 1192 float_raise(float_flag_inexact, status); 1193 1194 return a; 1195 } 1196 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ 1197 fp1 = fp0; /* X */ 1198 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1199 make_float64(0x406A934F0979A371), 1200 status), status); /* X*64*LOG10/LOG2 */ 1201 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ 1202 fp1 = int32_to_floatx80(n, status); 1203 1204 j = n & 0x3F; 1205 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 1206 if (n < 0 && j) { 1207 /* 1208 * arithmetic right shift is division and 1209 * round towards minus infinity 1210 */ 1211 l--; 1212 } 1213 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ 1214 if (l < 0 && (l & 1)) { 1215 /* 1216 * arithmetic right shift is division and 1217 * round towards minus infinity 1218 */ 1219 m--; 1220 } 1221 m1 = l - m; 1222 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ 1223 1224 adjfact = packFloatx80(0, m1, one_sig); 1225 fact1 = exp2_tbl[j]; 1226 fact1.high += m; 1227 fact2.high = exp2_tbl2[j] >> 16; 1228 fact2.high += m; 1229 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); 1230 fact2.low <<= 48; 1231 1232 fp2 = fp1; /* N */ 1233 fp1 = floatx80_mul(fp1, float64_to_floatx80( 1234 make_float64(0x3F734413509F8000), status), 1235 status); /* N*(LOG2/64LOG10)_LEAD */ 1236 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); 1237 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ 1238 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ 1239 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ 1240 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */ 1241 fp0 = floatx80_mul(fp0, fp2, status); /* R */ 1242 1243 /* EXPR */ 1244 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1245 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), 1246 status); /* A5 */ 1247 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), 1248 status); /* A4 */ 1249 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ 1250 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ 1251 fp2 = floatx80_add(fp2, float64_to_floatx80( 1252 make_float64(0x3FA5555555554CC1), status), 1253 status); /* A3+S*A5 */ 1254 fp3 = floatx80_add(fp3, float64_to_floatx80( 1255 make_float64(0x3FC5555555554A54), status), 1256 status); /* A2+S*A4 */ 1257 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ 1258 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ 1259 fp2 = floatx80_add(fp2, float64_to_floatx80( 1260 make_float64(0x3FE0000000000000), status), 1261 status); /* A1+S*(A3+S*A5) */ 1262 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ 1263 1264 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ 1265 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ 1266 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ 1267 1268 fp0 = floatx80_mul(fp0, fact1, status); 1269 fp0 = floatx80_add(fp0, fact2, status); 1270 fp0 = floatx80_add(fp0, fact1, status); 1271 1272 status->float_rounding_mode = user_rnd_mode; 1273 status->floatx80_rounding_precision = user_rnd_prec; 1274 1275 a = floatx80_mul(fp0, adjfact, status); 1276 1277 float_raise(float_flag_inexact, status); 1278 1279 return a; 1280 } 1281 } 1282 1283 /* 1284 * Tangent 1285 */ 1286 1287 floatx80 floatx80_tan(floatx80 a, float_status *status) 1288 { 1289 flag aSign, xSign; 1290 int32_t aExp, xExp; 1291 uint64_t aSig, xSig; 1292 1293 int8_t user_rnd_mode, user_rnd_prec; 1294 1295 int32_t compact, l, n, j; 1296 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; 1297 float32 twoto63; 1298 flag endflag; 1299 1300 aSig = extractFloatx80Frac(a); 1301 aExp = extractFloatx80Exp(a); 1302 aSign = extractFloatx80Sign(a); 1303 1304 if (aExp == 0x7FFF) { 1305 if ((uint64_t) (aSig << 1)) { 1306 return propagateFloatx80NaNOneArg(a, status); 1307 } 1308 float_raise(float_flag_invalid, status); 1309 return floatx80_default_nan(status); 1310 } 1311 1312 if (aExp == 0 && aSig == 0) { 1313 return packFloatx80(aSign, 0, 0); 1314 } 1315 1316 user_rnd_mode = status->float_rounding_mode; 1317 user_rnd_prec = status->floatx80_rounding_precision; 1318 status->float_rounding_mode = float_round_nearest_even; 1319 status->floatx80_rounding_precision = 80; 1320 1321 compact = floatx80_make_compact(aExp, aSig); 1322 1323 fp0 = a; 1324 1325 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1326 /* 2^(-40) > |X| > 15 PI */ 1327 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1328 /* REDUCEX */ 1329 fp1 = packFloatx80(0, 0, 0); 1330 if (compact == 0x7FFEFFFF) { 1331 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1332 LIT64(0xC90FDAA200000000)); 1333 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1334 LIT64(0x85A308D300000000)); 1335 fp0 = floatx80_add(fp0, twopi1, status); 1336 fp1 = fp0; 1337 fp0 = floatx80_add(fp0, twopi2, status); 1338 fp1 = floatx80_sub(fp1, fp0, status); 1339 fp1 = floatx80_add(fp1, twopi2, status); 1340 } 1341 loop: 1342 xSign = extractFloatx80Sign(fp0); 1343 xExp = extractFloatx80Exp(fp0); 1344 xExp -= 0x3FFF; 1345 if (xExp <= 28) { 1346 l = 0; 1347 endflag = 1; 1348 } else { 1349 l = xExp - 27; 1350 endflag = 0; 1351 } 1352 invtwopi = packFloatx80(0, 0x3FFE - l, 1353 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1354 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1355 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1356 1357 /* SIGN(INARG)*2^63 IN SGL */ 1358 twoto63 = packFloat32(xSign, 0xBE, 0); 1359 1360 fp2 = floatx80_mul(fp0, invtwopi, status); 1361 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1362 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1363 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1364 status); /* FP2 is N */ 1365 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1366 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1367 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1368 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1369 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1370 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1371 fp3 = fp0; /* FP3 is A */ 1372 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1373 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1374 1375 if (endflag > 0) { 1376 n = floatx80_to_int32(fp2, status); 1377 goto tancont; 1378 } 1379 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1380 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1381 goto loop; 1382 } else { 1383 status->float_rounding_mode = user_rnd_mode; 1384 status->floatx80_rounding_precision = user_rnd_prec; 1385 1386 a = floatx80_move(a, status); 1387 1388 float_raise(float_flag_inexact, status); 1389 1390 return a; 1391 } 1392 } else { 1393 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1394 make_float64(0x3FE45F306DC9C883), status), 1395 status); /* X*2/PI */ 1396 1397 n = floatx80_to_int32(fp1, status); 1398 j = 32 + n; 1399 1400 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1401 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1402 status); /* FP0 IS R = (X-Y1)-Y2 */ 1403 1404 tancont: 1405 if (n & 1) { 1406 /* NODD */ 1407 fp1 = fp0; /* R */ 1408 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1409 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1410 status); /* Q4 */ 1411 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1412 status); /* P3 */ 1413 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ 1414 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ 1415 fp3 = floatx80_add(fp3, float64_to_floatx80( 1416 make_float64(0xBF346F59B39BA65F), status), 1417 status); /* Q3+SQ4 */ 1418 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1419 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1420 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ 1421 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ 1422 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1423 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1424 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1425 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1426 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ 1427 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ 1428 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1429 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1430 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ 1431 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1432 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1433 fp0 = floatx80_add(fp0, float32_to_floatx80( 1434 make_float32(0x3F800000), status), 1435 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1436 1437 xSign = extractFloatx80Sign(fp1); 1438 xExp = extractFloatx80Exp(fp1); 1439 xSig = extractFloatx80Frac(fp1); 1440 xSign ^= 1; 1441 fp1 = packFloatx80(xSign, xExp, xSig); 1442 1443 status->float_rounding_mode = user_rnd_mode; 1444 status->floatx80_rounding_precision = user_rnd_prec; 1445 1446 a = floatx80_div(fp0, fp1, status); 1447 1448 float_raise(float_flag_inexact, status); 1449 1450 return a; 1451 } else { 1452 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 1453 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), 1454 status); /* Q4 */ 1455 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), 1456 status); /* P3 */ 1457 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ 1458 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ 1459 fp3 = floatx80_add(fp3, float64_to_floatx80( 1460 make_float64(0xBF346F59B39BA65F), status), 1461 status); /* Q3+SQ4 */ 1462 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); 1463 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ 1464 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ 1465 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ 1466 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); 1467 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ 1468 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); 1469 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ 1470 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ 1471 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ 1472 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); 1473 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ 1474 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ 1475 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ 1476 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ 1477 fp1 = floatx80_add(fp1, float32_to_floatx80( 1478 make_float32(0x3F800000), status), 1479 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ 1480 1481 status->float_rounding_mode = user_rnd_mode; 1482 status->floatx80_rounding_precision = user_rnd_prec; 1483 1484 a = floatx80_div(fp0, fp1, status); 1485 1486 float_raise(float_flag_inexact, status); 1487 1488 return a; 1489 } 1490 } 1491 } 1492 1493 /* 1494 * Sine 1495 */ 1496 1497 floatx80 floatx80_sin(floatx80 a, float_status *status) 1498 { 1499 flag aSign, xSign; 1500 int32_t aExp, xExp; 1501 uint64_t aSig, xSig; 1502 1503 int8_t user_rnd_mode, user_rnd_prec; 1504 1505 int32_t compact, l, n, j; 1506 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1507 float32 posneg1, twoto63; 1508 flag endflag; 1509 1510 aSig = extractFloatx80Frac(a); 1511 aExp = extractFloatx80Exp(a); 1512 aSign = extractFloatx80Sign(a); 1513 1514 if (aExp == 0x7FFF) { 1515 if ((uint64_t) (aSig << 1)) { 1516 return propagateFloatx80NaNOneArg(a, status); 1517 } 1518 float_raise(float_flag_invalid, status); 1519 return floatx80_default_nan(status); 1520 } 1521 1522 if (aExp == 0 && aSig == 0) { 1523 return packFloatx80(aSign, 0, 0); 1524 } 1525 1526 user_rnd_mode = status->float_rounding_mode; 1527 user_rnd_prec = status->floatx80_rounding_precision; 1528 status->float_rounding_mode = float_round_nearest_even; 1529 status->floatx80_rounding_precision = 80; 1530 1531 compact = floatx80_make_compact(aExp, aSig); 1532 1533 fp0 = a; 1534 1535 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1536 /* 2^(-40) > |X| > 15 PI */ 1537 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1538 /* REDUCEX */ 1539 fp1 = packFloatx80(0, 0, 0); 1540 if (compact == 0x7FFEFFFF) { 1541 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1542 LIT64(0xC90FDAA200000000)); 1543 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1544 LIT64(0x85A308D300000000)); 1545 fp0 = floatx80_add(fp0, twopi1, status); 1546 fp1 = fp0; 1547 fp0 = floatx80_add(fp0, twopi2, status); 1548 fp1 = floatx80_sub(fp1, fp0, status); 1549 fp1 = floatx80_add(fp1, twopi2, status); 1550 } 1551 loop: 1552 xSign = extractFloatx80Sign(fp0); 1553 xExp = extractFloatx80Exp(fp0); 1554 xExp -= 0x3FFF; 1555 if (xExp <= 28) { 1556 l = 0; 1557 endflag = 1; 1558 } else { 1559 l = xExp - 27; 1560 endflag = 0; 1561 } 1562 invtwopi = packFloatx80(0, 0x3FFE - l, 1563 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1564 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1565 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1566 1567 /* SIGN(INARG)*2^63 IN SGL */ 1568 twoto63 = packFloat32(xSign, 0xBE, 0); 1569 1570 fp2 = floatx80_mul(fp0, invtwopi, status); 1571 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1572 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1573 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1574 status); /* FP2 is N */ 1575 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1576 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1577 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1578 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1579 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1580 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1581 fp3 = fp0; /* FP3 is A */ 1582 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1583 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1584 1585 if (endflag > 0) { 1586 n = floatx80_to_int32(fp2, status); 1587 goto sincont; 1588 } 1589 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1590 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1591 goto loop; 1592 } else { 1593 /* SINSM */ 1594 fp0 = float32_to_floatx80(make_float32(0x3F800000), 1595 status); /* 1 */ 1596 1597 status->float_rounding_mode = user_rnd_mode; 1598 status->floatx80_rounding_precision = user_rnd_prec; 1599 1600 /* SINTINY */ 1601 a = floatx80_move(a, status); 1602 float_raise(float_flag_inexact, status); 1603 1604 return a; 1605 } 1606 } else { 1607 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1608 make_float64(0x3FE45F306DC9C883), status), 1609 status); /* X*2/PI */ 1610 1611 n = floatx80_to_int32(fp1, status); 1612 j = 32 + n; 1613 1614 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1615 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1616 status); /* FP0 IS R = (X-Y1)-Y2 */ 1617 1618 sincont: 1619 if (n & 1) { 1620 /* COSPOLY */ 1621 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1622 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1623 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1624 status); /* B8 */ 1625 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1626 status); /* B7 */ 1627 1628 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1629 xExp = extractFloatx80Exp(fp0); 1630 xSig = extractFloatx80Frac(fp0); 1631 1632 if ((n >> 1) & 1) { 1633 xSign ^= 1; 1634 posneg1 = make_float32(0xBF800000); /* -1 */ 1635 } else { 1636 xSign ^= 0; 1637 posneg1 = make_float32(0x3F800000); /* 1 */ 1638 } /* X IS NOW R'= SGN*R */ 1639 1640 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1641 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1642 fp2 = floatx80_add(fp2, float64_to_floatx80( 1643 make_float64(0x3E21EED90612C972), status), 1644 status); /* B6+TB8 */ 1645 fp3 = floatx80_add(fp3, float64_to_floatx80( 1646 make_float64(0xBE927E4FB79D9FCF), status), 1647 status); /* B5+TB7 */ 1648 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1649 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1650 fp2 = floatx80_add(fp2, float64_to_floatx80( 1651 make_float64(0x3EFA01A01A01D423), status), 1652 status); /* B4+T(B6+TB8) */ 1653 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1654 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1655 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1656 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1657 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1658 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1659 fp1 = floatx80_add(fp1, float32_to_floatx80( 1660 make_float32(0xBF000000), status), 1661 status); /* B1+T(B3+T(B5+TB7)) */ 1662 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1663 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ 1664 * [S(B2+T(B4+T(B6+TB8)))] 1665 */ 1666 1667 x = packFloatx80(xSign, xExp, xSig); 1668 fp0 = floatx80_mul(fp0, x, status); 1669 1670 status->float_rounding_mode = user_rnd_mode; 1671 status->floatx80_rounding_precision = user_rnd_prec; 1672 1673 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1674 1675 float_raise(float_flag_inexact, status); 1676 1677 return a; 1678 } else { 1679 /* SINPOLY */ 1680 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1681 xExp = extractFloatx80Exp(fp0); 1682 xSig = extractFloatx80Frac(fp0); 1683 1684 xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ 1685 1686 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1687 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1688 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1689 status); /* A7 */ 1690 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1691 status); /* A6 */ 1692 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1693 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1694 fp3 = floatx80_add(fp3, float64_to_floatx80( 1695 make_float64(0xBE5AE6452A118AE4), status), 1696 status); /* A5+T*A7 */ 1697 fp2 = floatx80_add(fp2, float64_to_floatx80( 1698 make_float64(0x3EC71DE3A5341531), status), 1699 status); /* A4+T*A6 */ 1700 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1701 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1702 fp3 = floatx80_add(fp3, float64_to_floatx80( 1703 make_float64(0xBF2A01A01A018B59), status), 1704 status); /* A3+T(A5+TA7) */ 1705 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1706 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1707 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1708 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1709 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1710 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1711 fp1 = floatx80_add(fp1, fp2, 1712 status); /* [A1+T(A3+T(A5+TA7))]+ 1713 * [S(A2+T(A4+TA6))] 1714 */ 1715 1716 x = packFloatx80(xSign, xExp, xSig); 1717 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1718 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1719 1720 status->float_rounding_mode = user_rnd_mode; 1721 status->floatx80_rounding_precision = user_rnd_prec; 1722 1723 a = floatx80_add(fp0, x, status); 1724 1725 float_raise(float_flag_inexact, status); 1726 1727 return a; 1728 } 1729 } 1730 } 1731 1732 /* 1733 * Cosine 1734 */ 1735 1736 floatx80 floatx80_cos(floatx80 a, float_status *status) 1737 { 1738 flag aSign, xSign; 1739 int32_t aExp, xExp; 1740 uint64_t aSig, xSig; 1741 1742 int8_t user_rnd_mode, user_rnd_prec; 1743 1744 int32_t compact, l, n, j; 1745 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; 1746 float32 posneg1, twoto63; 1747 flag endflag; 1748 1749 aSig = extractFloatx80Frac(a); 1750 aExp = extractFloatx80Exp(a); 1751 aSign = extractFloatx80Sign(a); 1752 1753 if (aExp == 0x7FFF) { 1754 if ((uint64_t) (aSig << 1)) { 1755 return propagateFloatx80NaNOneArg(a, status); 1756 } 1757 float_raise(float_flag_invalid, status); 1758 return floatx80_default_nan(status); 1759 } 1760 1761 if (aExp == 0 && aSig == 0) { 1762 return packFloatx80(0, one_exp, one_sig); 1763 } 1764 1765 user_rnd_mode = status->float_rounding_mode; 1766 user_rnd_prec = status->floatx80_rounding_precision; 1767 status->float_rounding_mode = float_round_nearest_even; 1768 status->floatx80_rounding_precision = 80; 1769 1770 compact = floatx80_make_compact(aExp, aSig); 1771 1772 fp0 = a; 1773 1774 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { 1775 /* 2^(-40) > |X| > 15 PI */ 1776 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ 1777 /* REDUCEX */ 1778 fp1 = packFloatx80(0, 0, 0); 1779 if (compact == 0x7FFEFFFF) { 1780 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, 1781 LIT64(0xC90FDAA200000000)); 1782 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, 1783 LIT64(0x85A308D300000000)); 1784 fp0 = floatx80_add(fp0, twopi1, status); 1785 fp1 = fp0; 1786 fp0 = floatx80_add(fp0, twopi2, status); 1787 fp1 = floatx80_sub(fp1, fp0, status); 1788 fp1 = floatx80_add(fp1, twopi2, status); 1789 } 1790 loop: 1791 xSign = extractFloatx80Sign(fp0); 1792 xExp = extractFloatx80Exp(fp0); 1793 xExp -= 0x3FFF; 1794 if (xExp <= 28) { 1795 l = 0; 1796 endflag = 1; 1797 } else { 1798 l = xExp - 27; 1799 endflag = 0; 1800 } 1801 invtwopi = packFloatx80(0, 0x3FFE - l, 1802 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */ 1803 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); 1804 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); 1805 1806 /* SIGN(INARG)*2^63 IN SGL */ 1807 twoto63 = packFloat32(xSign, 0xBE, 0); 1808 1809 fp2 = floatx80_mul(fp0, invtwopi, status); 1810 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), 1811 status); /* THE FRACT PART OF FP2 IS ROUNDED */ 1812 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), 1813 status); /* FP2 is N */ 1814 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ 1815 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ 1816 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ 1817 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ 1818 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ 1819 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ 1820 fp3 = fp0; /* FP3 is A */ 1821 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ 1822 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ 1823 1824 if (endflag > 0) { 1825 n = floatx80_to_int32(fp2, status); 1826 goto sincont; 1827 } 1828 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ 1829 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ 1830 goto loop; 1831 } else { 1832 /* SINSM */ 1833 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ 1834 1835 status->float_rounding_mode = user_rnd_mode; 1836 status->floatx80_rounding_precision = user_rnd_prec; 1837 1838 /* COSTINY */ 1839 a = floatx80_sub(fp0, float32_to_floatx80( 1840 make_float32(0x00800000), status), 1841 status); 1842 float_raise(float_flag_inexact, status); 1843 1844 return a; 1845 } 1846 } else { 1847 fp1 = floatx80_mul(fp0, float64_to_floatx80( 1848 make_float64(0x3FE45F306DC9C883), status), 1849 status); /* X*2/PI */ 1850 1851 n = floatx80_to_int32(fp1, status); 1852 j = 32 + n; 1853 1854 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ 1855 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), 1856 status); /* FP0 IS R = (X-Y1)-Y2 */ 1857 1858 sincont: 1859 if ((n + 1) & 1) { 1860 /* COSPOLY */ 1861 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1862 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1863 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), 1864 status); /* B8 */ 1865 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), 1866 status); /* B7 */ 1867 1868 xSign = extractFloatx80Sign(fp0); /* X IS S */ 1869 xExp = extractFloatx80Exp(fp0); 1870 xSig = extractFloatx80Frac(fp0); 1871 1872 if (((n + 1) >> 1) & 1) { 1873 xSign ^= 1; 1874 posneg1 = make_float32(0xBF800000); /* -1 */ 1875 } else { 1876 xSign ^= 0; 1877 posneg1 = make_float32(0x3F800000); /* 1 */ 1878 } /* X IS NOW R'= SGN*R */ 1879 1880 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ 1881 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ 1882 fp2 = floatx80_add(fp2, float64_to_floatx80( 1883 make_float64(0x3E21EED90612C972), status), 1884 status); /* B6+TB8 */ 1885 fp3 = floatx80_add(fp3, float64_to_floatx80( 1886 make_float64(0xBE927E4FB79D9FCF), status), 1887 status); /* B5+TB7 */ 1888 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ 1889 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ 1890 fp2 = floatx80_add(fp2, float64_to_floatx80( 1891 make_float64(0x3EFA01A01A01D423), status), 1892 status); /* B4+T(B6+TB8) */ 1893 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); 1894 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ 1895 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ 1896 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ 1897 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); 1898 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ 1899 fp1 = floatx80_add(fp1, float32_to_floatx80( 1900 make_float32(0xBF000000), status), 1901 status); /* B1+T(B3+T(B5+TB7)) */ 1902 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ 1903 fp0 = floatx80_add(fp0, fp1, status); 1904 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ 1905 1906 x = packFloatx80(xSign, xExp, xSig); 1907 fp0 = floatx80_mul(fp0, x, status); 1908 1909 status->float_rounding_mode = user_rnd_mode; 1910 status->floatx80_rounding_precision = user_rnd_prec; 1911 1912 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); 1913 1914 float_raise(float_flag_inexact, status); 1915 1916 return a; 1917 } else { 1918 /* SINPOLY */ 1919 xSign = extractFloatx80Sign(fp0); /* X IS R */ 1920 xExp = extractFloatx80Exp(fp0); 1921 xSig = extractFloatx80Frac(fp0); 1922 1923 xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ 1924 1925 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ 1926 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ 1927 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), 1928 status); /* A7 */ 1929 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), 1930 status); /* A6 */ 1931 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ 1932 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ 1933 fp3 = floatx80_add(fp3, float64_to_floatx80( 1934 make_float64(0xBE5AE6452A118AE4), status), 1935 status); /* A5+T*A7 */ 1936 fp2 = floatx80_add(fp2, float64_to_floatx80( 1937 make_float64(0x3EC71DE3A5341531), status), 1938 status); /* A4+T*A6 */ 1939 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ 1940 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ 1941 fp3 = floatx80_add(fp3, float64_to_floatx80( 1942 make_float64(0xBF2A01A01A018B59), status), 1943 status); /* A3+T(A5+TA7) */ 1944 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); 1945 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ 1946 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ 1947 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ 1948 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); 1949 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ 1950 fp1 = floatx80_add(fp1, fp2, status); 1951 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ 1952 1953 x = packFloatx80(xSign, xExp, xSig); 1954 fp0 = floatx80_mul(fp0, x, status); /* R'*S */ 1955 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ 1956 1957 status->float_rounding_mode = user_rnd_mode; 1958 status->floatx80_rounding_precision = user_rnd_prec; 1959 1960 a = floatx80_add(fp0, x, status); 1961 1962 float_raise(float_flag_inexact, status); 1963 1964 return a; 1965 } 1966 } 1967 } 1968 1969 /* 1970 * Arc tangent 1971 */ 1972 1973 floatx80 floatx80_atan(floatx80 a, float_status *status) 1974 { 1975 flag aSign; 1976 int32_t aExp; 1977 uint64_t aSig; 1978 1979 int8_t user_rnd_mode, user_rnd_prec; 1980 1981 int32_t compact, tbl_index; 1982 floatx80 fp0, fp1, fp2, fp3, xsave; 1983 1984 aSig = extractFloatx80Frac(a); 1985 aExp = extractFloatx80Exp(a); 1986 aSign = extractFloatx80Sign(a); 1987 1988 if (aExp == 0x7FFF) { 1989 if ((uint64_t) (aSig << 1)) { 1990 return propagateFloatx80NaNOneArg(a, status); 1991 } 1992 a = packFloatx80(aSign, piby2_exp, pi_sig); 1993 float_raise(float_flag_inexact, status); 1994 return floatx80_move(a, status); 1995 } 1996 1997 if (aExp == 0 && aSig == 0) { 1998 return packFloatx80(aSign, 0, 0); 1999 } 2000 2001 compact = floatx80_make_compact(aExp, aSig); 2002 2003 user_rnd_mode = status->float_rounding_mode; 2004 user_rnd_prec = status->floatx80_rounding_precision; 2005 status->float_rounding_mode = float_round_nearest_even; 2006 status->floatx80_rounding_precision = 80; 2007 2008 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { 2009 /* |X| >= 16 or |X| < 1/16 */ 2010 if (compact > 0x3FFF8000) { /* |X| >= 16 */ 2011 if (compact > 0x40638000) { /* |X| > 2^(100) */ 2012 fp0 = packFloatx80(aSign, piby2_exp, pi_sig); 2013 fp1 = packFloatx80(aSign, 0x0001, one_sig); 2014 2015 status->float_rounding_mode = user_rnd_mode; 2016 status->floatx80_rounding_precision = user_rnd_prec; 2017 2018 a = floatx80_sub(fp0, fp1, status); 2019 2020 float_raise(float_flag_inexact, status); 2021 2022 return a; 2023 } else { 2024 fp0 = a; 2025 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ 2026 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ 2027 xsave = fp1; 2028 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ 2029 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2030 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), 2031 status); /* C5 */ 2032 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), 2033 status); /* C4 */ 2034 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ 2035 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ 2036 fp3 = floatx80_add(fp3, float64_to_floatx80( 2037 make_float64(0xBFC24924827107B8), status), 2038 status); /* C3+Z*C5 */ 2039 fp2 = floatx80_add(fp2, float64_to_floatx80( 2040 make_float64(0x3FC999999996263E), status), 2041 status); /* C2+Z*C4 */ 2042 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ 2043 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ 2044 fp1 = floatx80_add(fp1, float64_to_floatx80( 2045 make_float64(0xBFD5555555555536), status), 2046 status); /* C1+Z*(C3+Z*C5) */ 2047 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ 2048 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ 2049 fp1 = floatx80_add(fp1, fp2, status); 2050 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ 2051 fp0 = floatx80_mul(fp0, fp1, status); 2052 fp0 = floatx80_add(fp0, xsave, status); 2053 fp1 = packFloatx80(aSign, piby2_exp, pi_sig); 2054 2055 status->float_rounding_mode = user_rnd_mode; 2056 status->floatx80_rounding_precision = user_rnd_prec; 2057 2058 a = floatx80_add(fp0, fp1, status); 2059 2060 float_raise(float_flag_inexact, status); 2061 2062 return a; 2063 } 2064 } else { /* |X| < 1/16 */ 2065 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ 2066 status->float_rounding_mode = user_rnd_mode; 2067 status->floatx80_rounding_precision = user_rnd_prec; 2068 2069 a = floatx80_move(a, status); 2070 2071 float_raise(float_flag_inexact, status); 2072 2073 return a; 2074 } else { 2075 fp0 = a; 2076 xsave = a; 2077 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ 2078 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ 2079 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), 2080 status); /* B6 */ 2081 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), 2082 status); /* B5 */ 2083 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ 2084 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ 2085 fp2 = floatx80_add(fp2, float64_to_floatx80( 2086 make_float64(0x3FBC71C646940220), status), 2087 status); /* B4+Z*B6 */ 2088 fp3 = floatx80_add(fp3, float64_to_floatx80( 2089 make_float64(0xBFC24924921872F9), 2090 status), status); /* B3+Z*B5 */ 2091 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ 2092 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ 2093 fp2 = floatx80_add(fp2, float64_to_floatx80( 2094 make_float64(0x3FC9999999998FA9), status), 2095 status); /* B2+Z*(B4+Z*B6) */ 2096 fp1 = floatx80_add(fp1, float64_to_floatx80( 2097 make_float64(0xBFD5555555555555), status), 2098 status); /* B1+Z*(B3+Z*B5) */ 2099 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ 2100 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ 2101 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ 2102 fp1 = floatx80_add(fp1, fp2, status); 2103 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ 2104 fp0 = floatx80_mul(fp0, fp1, status); 2105 2106 status->float_rounding_mode = user_rnd_mode; 2107 status->floatx80_rounding_precision = user_rnd_prec; 2108 2109 a = floatx80_add(fp0, xsave, status); 2110 2111 float_raise(float_flag_inexact, status); 2112 2113 return a; 2114 } 2115 } 2116 } else { 2117 aSig &= LIT64(0xF800000000000000); 2118 aSig |= LIT64(0x0400000000000000); 2119 xsave = packFloatx80(aSign, aExp, aSig); /* F */ 2120 fp0 = a; 2121 fp1 = a; /* X */ 2122 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ 2123 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ 2124 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ 2125 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ 2126 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ 2127 2128 tbl_index = compact; 2129 2130 tbl_index &= 0x7FFF0000; 2131 tbl_index -= 0x3FFB0000; 2132 tbl_index >>= 1; 2133 tbl_index += compact & 0x00007800; 2134 tbl_index >>= 11; 2135 2136 fp3 = atan_tbl[tbl_index]; 2137 2138 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ 2139 2140 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ 2141 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), 2142 status); /* A3 */ 2143 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ 2144 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ 2145 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ 2146 fp2 = floatx80_add(fp2, float64_to_floatx80( 2147 make_float64(0x4002AC6934A26DB3), status), 2148 status); /* A2+V*(A3+V) */ 2149 fp1 = floatx80_mul(fp1, float64_to_floatx80( 2150 make_float64(0xBFC2476F4E1DA28E), status), 2151 status); /* A1+U*V */ 2152 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ 2153 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ 2154 2155 status->float_rounding_mode = user_rnd_mode; 2156 status->floatx80_rounding_precision = user_rnd_prec; 2157 2158 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ 2159 2160 float_raise(float_flag_inexact, status); 2161 2162 return a; 2163 } 2164 } 2165 2166 /* 2167 * Arc sine 2168 */ 2169 2170 floatx80 floatx80_asin(floatx80 a, float_status *status) 2171 { 2172 flag aSign; 2173 int32_t aExp; 2174 uint64_t aSig; 2175 2176 int8_t user_rnd_mode, user_rnd_prec; 2177 2178 int32_t compact; 2179 floatx80 fp0, fp1, fp2, one; 2180 2181 aSig = extractFloatx80Frac(a); 2182 aExp = extractFloatx80Exp(a); 2183 aSign = extractFloatx80Sign(a); 2184 2185 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2186 return propagateFloatx80NaNOneArg(a, status); 2187 } 2188 2189 if (aExp == 0 && aSig == 0) { 2190 return packFloatx80(aSign, 0, 0); 2191 } 2192 2193 compact = floatx80_make_compact(aExp, aSig); 2194 2195 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2196 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2197 float_raise(float_flag_inexact, status); 2198 a = packFloatx80(aSign, piby2_exp, pi_sig); 2199 return floatx80_move(a, status); 2200 } else { /* |X| > 1 */ 2201 float_raise(float_flag_invalid, status); 2202 return floatx80_default_nan(status); 2203 } 2204 2205 } /* |X| < 1 */ 2206 2207 user_rnd_mode = status->float_rounding_mode; 2208 user_rnd_prec = status->floatx80_rounding_precision; 2209 status->float_rounding_mode = float_round_nearest_even; 2210 status->floatx80_rounding_precision = 80; 2211 2212 one = packFloatx80(0, one_exp, one_sig); 2213 fp0 = a; 2214 2215 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ 2216 fp2 = floatx80_add(one, fp0, status); /* 1 + X */ 2217 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ 2218 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ 2219 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ 2220 2221 status->float_rounding_mode = user_rnd_mode; 2222 status->floatx80_rounding_precision = user_rnd_prec; 2223 2224 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ 2225 2226 float_raise(float_flag_inexact, status); 2227 2228 return a; 2229 } 2230 2231 /* 2232 * Arc cosine 2233 */ 2234 2235 floatx80 floatx80_acos(floatx80 a, float_status *status) 2236 { 2237 flag aSign; 2238 int32_t aExp; 2239 uint64_t aSig; 2240 2241 int8_t user_rnd_mode, user_rnd_prec; 2242 2243 int32_t compact; 2244 floatx80 fp0, fp1, one; 2245 2246 aSig = extractFloatx80Frac(a); 2247 aExp = extractFloatx80Exp(a); 2248 aSign = extractFloatx80Sign(a); 2249 2250 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2251 return propagateFloatx80NaNOneArg(a, status); 2252 } 2253 if (aExp == 0 && aSig == 0) { 2254 float_raise(float_flag_inexact, status); 2255 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2256 piby2_exp, pi_sig, 0, status); 2257 } 2258 2259 compact = floatx80_make_compact(aExp, aSig); 2260 2261 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2262 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2263 if (aSign) { /* X == -1 */ 2264 a = packFloatx80(0, pi_exp, pi_sig); 2265 float_raise(float_flag_inexact, status); 2266 return floatx80_move(a, status); 2267 } else { /* X == +1 */ 2268 return packFloatx80(0, 0, 0); 2269 } 2270 } else { /* |X| > 1 */ 2271 float_raise(float_flag_invalid, status); 2272 return floatx80_default_nan(status); 2273 } 2274 } /* |X| < 1 */ 2275 2276 user_rnd_mode = status->float_rounding_mode; 2277 user_rnd_prec = status->floatx80_rounding_precision; 2278 status->float_rounding_mode = float_round_nearest_even; 2279 status->floatx80_rounding_precision = 80; 2280 2281 one = packFloatx80(0, one_exp, one_sig); 2282 fp0 = a; 2283 2284 fp1 = floatx80_add(one, fp0, status); /* 1 + X */ 2285 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ 2286 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ 2287 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ 2288 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ 2289 2290 status->float_rounding_mode = user_rnd_mode; 2291 status->floatx80_rounding_precision = user_rnd_prec; 2292 2293 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ 2294 2295 float_raise(float_flag_inexact, status); 2296 2297 return a; 2298 } 2299 2300 /* 2301 * Hyperbolic arc tangent 2302 */ 2303 2304 floatx80 floatx80_atanh(floatx80 a, float_status *status) 2305 { 2306 flag aSign; 2307 int32_t aExp; 2308 uint64_t aSig; 2309 2310 int8_t user_rnd_mode, user_rnd_prec; 2311 2312 int32_t compact; 2313 floatx80 fp0, fp1, fp2, one; 2314 2315 aSig = extractFloatx80Frac(a); 2316 aExp = extractFloatx80Exp(a); 2317 aSign = extractFloatx80Sign(a); 2318 2319 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { 2320 return propagateFloatx80NaNOneArg(a, status); 2321 } 2322 2323 if (aExp == 0 && aSig == 0) { 2324 return packFloatx80(aSign, 0, 0); 2325 } 2326 2327 compact = floatx80_make_compact(aExp, aSig); 2328 2329 if (compact >= 0x3FFF8000) { /* |X| >= 1 */ 2330 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ 2331 float_raise(float_flag_divbyzero, status); 2332 return packFloatx80(aSign, floatx80_infinity.high, 2333 floatx80_infinity.low); 2334 } else { /* |X| > 1 */ 2335 float_raise(float_flag_invalid, status); 2336 return floatx80_default_nan(status); 2337 } 2338 } /* |X| < 1 */ 2339 2340 user_rnd_mode = status->float_rounding_mode; 2341 user_rnd_prec = status->floatx80_rounding_precision; 2342 status->float_rounding_mode = float_round_nearest_even; 2343 status->floatx80_rounding_precision = 80; 2344 2345 one = packFloatx80(0, one_exp, one_sig); 2346 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ 2347 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ 2348 fp1 = packFloatx80(1, aExp, aSig); /* -Y */ 2349 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ 2350 fp1 = floatx80_add(fp1, one, status); /* 1-Y */ 2351 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ 2352 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ 2353 2354 status->float_rounding_mode = user_rnd_mode; 2355 status->floatx80_rounding_precision = user_rnd_prec; 2356 2357 a = floatx80_mul(fp0, fp2, 2358 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ 2359 2360 float_raise(float_flag_inexact, status); 2361 2362 return a; 2363 } 2364 2365 /* 2366 * e to x minus 1 2367 */ 2368 2369 floatx80 floatx80_etoxm1(floatx80 a, float_status *status) 2370 { 2371 flag aSign; 2372 int32_t aExp; 2373 uint64_t aSig; 2374 2375 int8_t user_rnd_mode, user_rnd_prec; 2376 2377 int32_t compact, n, j, m, m1; 2378 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; 2379 2380 aSig = extractFloatx80Frac(a); 2381 aExp = extractFloatx80Exp(a); 2382 aSign = extractFloatx80Sign(a); 2383 2384 if (aExp == 0x7FFF) { 2385 if ((uint64_t) (aSig << 1)) { 2386 return propagateFloatx80NaNOneArg(a, status); 2387 } 2388 if (aSign) { 2389 return packFloatx80(aSign, one_exp, one_sig); 2390 } 2391 return packFloatx80(0, floatx80_infinity.high, 2392 floatx80_infinity.low); 2393 } 2394 2395 if (aExp == 0 && aSig == 0) { 2396 return packFloatx80(aSign, 0, 0); 2397 } 2398 2399 user_rnd_mode = status->float_rounding_mode; 2400 user_rnd_prec = status->floatx80_rounding_precision; 2401 status->float_rounding_mode = float_round_nearest_even; 2402 status->floatx80_rounding_precision = 80; 2403 2404 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ 2405 compact = floatx80_make_compact(aExp, aSig); 2406 2407 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ 2408 fp0 = a; 2409 fp1 = a; 2410 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2411 make_float32(0x42B8AA3B), status), 2412 status); /* 64/log2 * X */ 2413 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ 2414 fp0 = int32_to_floatx80(n, status); 2415 2416 j = n & 0x3F; /* J = N mod 64 */ 2417 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ 2418 if (n < 0 && j) { 2419 /* 2420 * arithmetic right shift is division and 2421 * round towards minus infinity 2422 */ 2423 m--; 2424 } 2425 m1 = -m; 2426 /*m += 0x3FFF; // biased exponent of 2^(M) */ 2427 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ 2428 2429 fp2 = fp0; /* N */ 2430 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2431 make_float32(0xBC317218), status), 2432 status); /* N * L1, L1 = lead(-log2/64) */ 2433 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); 2434 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ 2435 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ 2436 fp0 = floatx80_add(fp0, fp2, status); /* R */ 2437 2438 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ 2439 fp2 = float32_to_floatx80(make_float32(0x3950097B), 2440 status); /* A6 */ 2441 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ 2442 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), 2443 status), fp1, status); /* fp3 is S*A5 */ 2444 fp2 = floatx80_add(fp2, float64_to_floatx80( 2445 make_float64(0x3F81111111174385), status), 2446 status); /* fp2 IS A4+S*A6 */ 2447 fp3 = floatx80_add(fp3, float64_to_floatx80( 2448 make_float64(0x3FA5555555554F5A), status), 2449 status); /* fp3 is A3+S*A5 */ 2450 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ 2451 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ 2452 fp2 = floatx80_add(fp2, float64_to_floatx80( 2453 make_float64(0x3FC5555555555555), status), 2454 status); /* fp2 IS A2+S*(A4+S*A6) */ 2455 fp3 = floatx80_add(fp3, float32_to_floatx80( 2456 make_float32(0x3F000000), status), 2457 status); /* fp3 IS A1+S*(A3+S*A5) */ 2458 fp2 = floatx80_mul(fp2, fp1, 2459 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ 2460 fp1 = floatx80_mul(fp1, fp3, 2461 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ 2462 fp2 = floatx80_mul(fp2, fp0, 2463 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ 2464 fp0 = floatx80_add(fp0, fp1, 2465 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ 2466 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ 2467 2468 fp0 = floatx80_mul(fp0, exp_tbl[j], 2469 status); /* 2^(J/64)*(Exp(R)-1) */ 2470 2471 if (m >= 64) { 2472 fp1 = float32_to_floatx80(exp_tbl2[j], status); 2473 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2474 fp1 = floatx80_add(fp1, onebysc, status); 2475 fp0 = floatx80_add(fp0, fp1, status); 2476 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2477 } else if (m < -3) { 2478 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2479 status), status); 2480 fp0 = floatx80_add(fp0, exp_tbl[j], status); 2481 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2482 fp0 = floatx80_add(fp0, onebysc, status); 2483 } else { /* -3 <= m <= 63 */ 2484 fp1 = exp_tbl[j]; 2485 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], 2486 status), status); 2487 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ 2488 fp1 = floatx80_add(fp1, onebysc, status); 2489 fp0 = floatx80_add(fp0, fp1, status); 2490 } 2491 2492 sc = packFloatx80(0, m + 0x3FFF, one_sig); 2493 2494 status->float_rounding_mode = user_rnd_mode; 2495 status->floatx80_rounding_precision = user_rnd_prec; 2496 2497 a = floatx80_mul(fp0, sc, status); 2498 2499 float_raise(float_flag_inexact, status); 2500 2501 return a; 2502 } else { /* |X| > 70 log2 */ 2503 if (aSign) { 2504 fp0 = float32_to_floatx80(make_float32(0xBF800000), 2505 status); /* -1 */ 2506 2507 status->float_rounding_mode = user_rnd_mode; 2508 status->floatx80_rounding_precision = user_rnd_prec; 2509 2510 a = floatx80_add(fp0, float32_to_floatx80( 2511 make_float32(0x00800000), status), 2512 status); /* -1 + 2^(-126) */ 2513 2514 float_raise(float_flag_inexact, status); 2515 2516 return a; 2517 } else { 2518 status->float_rounding_mode = user_rnd_mode; 2519 status->floatx80_rounding_precision = user_rnd_prec; 2520 2521 return floatx80_etox(a, status); 2522 } 2523 } 2524 } else { /* |X| < 1/4 */ 2525 if (aExp >= 0x3FBE) { 2526 fp0 = a; 2527 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ 2528 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), 2529 status); /* B12 */ 2530 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ 2531 fp2 = float32_to_floatx80(make_float32(0x310F8290), 2532 status); /* B11 */ 2533 fp1 = floatx80_add(fp1, float32_to_floatx80( 2534 make_float32(0x32D73220), status), 2535 status); /* B10 */ 2536 fp2 = floatx80_mul(fp2, fp0, status); 2537 fp1 = floatx80_mul(fp1, fp0, status); 2538 fp2 = floatx80_add(fp2, float32_to_floatx80( 2539 make_float32(0x3493F281), status), 2540 status); /* B9 */ 2541 fp1 = floatx80_add(fp1, float64_to_floatx80( 2542 make_float64(0x3EC71DE3A5774682), status), 2543 status); /* B8 */ 2544 fp2 = floatx80_mul(fp2, fp0, status); 2545 fp1 = floatx80_mul(fp1, fp0, status); 2546 fp2 = floatx80_add(fp2, float64_to_floatx80( 2547 make_float64(0x3EFA01A019D7CB68), status), 2548 status); /* B7 */ 2549 fp1 = floatx80_add(fp1, float64_to_floatx80( 2550 make_float64(0x3F2A01A01A019DF3), status), 2551 status); /* B6 */ 2552 fp2 = floatx80_mul(fp2, fp0, status); 2553 fp1 = floatx80_mul(fp1, fp0, status); 2554 fp2 = floatx80_add(fp2, float64_to_floatx80( 2555 make_float64(0x3F56C16C16C170E2), status), 2556 status); /* B5 */ 2557 fp1 = floatx80_add(fp1, float64_to_floatx80( 2558 make_float64(0x3F81111111111111), status), 2559 status); /* B4 */ 2560 fp2 = floatx80_mul(fp2, fp0, status); 2561 fp1 = floatx80_mul(fp1, fp0, status); 2562 fp2 = floatx80_add(fp2, float64_to_floatx80( 2563 make_float64(0x3FA5555555555555), status), 2564 status); /* B3 */ 2565 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); 2566 fp1 = floatx80_add(fp1, fp3, status); /* B2 */ 2567 fp2 = floatx80_mul(fp2, fp0, status); 2568 fp1 = floatx80_mul(fp1, fp0, status); 2569 2570 fp2 = floatx80_mul(fp2, fp0, status); 2571 fp1 = floatx80_mul(fp1, a, status); 2572 2573 fp0 = floatx80_mul(fp0, float32_to_floatx80( 2574 make_float32(0x3F000000), status), 2575 status); /* S*B1 */ 2576 fp1 = floatx80_add(fp1, fp2, status); /* Q */ 2577 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ 2578 2579 status->float_rounding_mode = user_rnd_mode; 2580 status->floatx80_rounding_precision = user_rnd_prec; 2581 2582 a = floatx80_add(fp0, a, status); 2583 2584 float_raise(float_flag_inexact, status); 2585 2586 return a; 2587 } else { /* |X| < 2^(-65) */ 2588 sc = packFloatx80(1, 1, one_sig); 2589 fp0 = a; 2590 2591 if (aExp < 0x0033) { /* |X| < 2^(-16382) */ 2592 fp0 = floatx80_mul(fp0, float64_to_floatx80( 2593 make_float64(0x48B0000000000000), status), 2594 status); 2595 fp0 = floatx80_add(fp0, sc, status); 2596 2597 status->float_rounding_mode = user_rnd_mode; 2598 status->floatx80_rounding_precision = user_rnd_prec; 2599 2600 a = floatx80_mul(fp0, float64_to_floatx80( 2601 make_float64(0x3730000000000000), status), 2602 status); 2603 } else { 2604 status->float_rounding_mode = user_rnd_mode; 2605 status->floatx80_rounding_precision = user_rnd_prec; 2606 2607 a = floatx80_add(fp0, sc, status); 2608 } 2609 2610 float_raise(float_flag_inexact, status); 2611 2612 return a; 2613 } 2614 } 2615 } 2616 2617 /* 2618 * Hyperbolic tangent 2619 */ 2620 2621 floatx80 floatx80_tanh(floatx80 a, float_status *status) 2622 { 2623 flag aSign, vSign; 2624 int32_t aExp, vExp; 2625 uint64_t aSig, vSig; 2626 2627 int8_t user_rnd_mode, user_rnd_prec; 2628 2629 int32_t compact; 2630 floatx80 fp0, fp1; 2631 uint32_t sign; 2632 2633 aSig = extractFloatx80Frac(a); 2634 aExp = extractFloatx80Exp(a); 2635 aSign = extractFloatx80Sign(a); 2636 2637 if (aExp == 0x7FFF) { 2638 if ((uint64_t) (aSig << 1)) { 2639 return propagateFloatx80NaNOneArg(a, status); 2640 } 2641 return packFloatx80(aSign, one_exp, one_sig); 2642 } 2643 2644 if (aExp == 0 && aSig == 0) { 2645 return packFloatx80(aSign, 0, 0); 2646 } 2647 2648 user_rnd_mode = status->float_rounding_mode; 2649 user_rnd_prec = status->floatx80_rounding_precision; 2650 status->float_rounding_mode = float_round_nearest_even; 2651 status->floatx80_rounding_precision = 80; 2652 2653 compact = floatx80_make_compact(aExp, aSig); 2654 2655 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { 2656 /* TANHBORS */ 2657 if (compact < 0x3FFF8000) { 2658 /* TANHSM */ 2659 status->float_rounding_mode = user_rnd_mode; 2660 status->floatx80_rounding_precision = user_rnd_prec; 2661 2662 a = floatx80_move(a, status); 2663 2664 float_raise(float_flag_inexact, status); 2665 2666 return a; 2667 } else { 2668 if (compact > 0x40048AA1) { 2669 /* TANHHUGE */ 2670 sign = 0x3F800000; 2671 sign |= aSign ? 0x80000000 : 0x00000000; 2672 fp0 = float32_to_floatx80(make_float32(sign), status); 2673 sign &= 0x80000000; 2674 sign ^= 0x80800000; /* -SIGN(X)*EPS */ 2675 2676 status->float_rounding_mode = user_rnd_mode; 2677 status->floatx80_rounding_precision = user_rnd_prec; 2678 2679 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), 2680 status), status); 2681 2682 float_raise(float_flag_inexact, status); 2683 2684 return a; 2685 } else { 2686 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2687 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ 2688 fp0 = floatx80_add(fp0, float32_to_floatx80( 2689 make_float32(0x3F800000), 2690 status), status); /* EXP(Y)+1 */ 2691 sign = aSign ? 0x80000000 : 0x00000000; 2692 fp1 = floatx80_div(float32_to_floatx80(make_float32( 2693 sign ^ 0xC0000000), status), fp0, 2694 status); /* -SIGN(X)*2 / [EXP(Y)+1] */ 2695 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), 2696 status); /* SIGN */ 2697 2698 status->float_rounding_mode = user_rnd_mode; 2699 status->floatx80_rounding_precision = user_rnd_prec; 2700 2701 a = floatx80_add(fp1, fp0, status); 2702 2703 float_raise(float_flag_inexact, status); 2704 2705 return a; 2706 } 2707 } 2708 } else { /* 2**(-40) < |X| < (5/2)LOG2 */ 2709 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ 2710 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2711 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), 2712 status), 2713 status); /* Z+2 */ 2714 2715 vSign = extractFloatx80Sign(fp1); 2716 vExp = extractFloatx80Exp(fp1); 2717 vSig = extractFloatx80Frac(fp1); 2718 2719 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); 2720 2721 status->float_rounding_mode = user_rnd_mode; 2722 status->floatx80_rounding_precision = user_rnd_prec; 2723 2724 a = floatx80_div(fp0, fp1, status); 2725 2726 float_raise(float_flag_inexact, status); 2727 2728 return a; 2729 } 2730 } 2731 2732 /* 2733 * Hyperbolic sine 2734 */ 2735 2736 floatx80 floatx80_sinh(floatx80 a, float_status *status) 2737 { 2738 flag aSign; 2739 int32_t aExp; 2740 uint64_t aSig; 2741 2742 int8_t user_rnd_mode, user_rnd_prec; 2743 2744 int32_t compact; 2745 floatx80 fp0, fp1, fp2; 2746 float32 fact; 2747 2748 aSig = extractFloatx80Frac(a); 2749 aExp = extractFloatx80Exp(a); 2750 aSign = extractFloatx80Sign(a); 2751 2752 if (aExp == 0x7FFF) { 2753 if ((uint64_t) (aSig << 1)) { 2754 return propagateFloatx80NaNOneArg(a, status); 2755 } 2756 return packFloatx80(aSign, floatx80_infinity.high, 2757 floatx80_infinity.low); 2758 } 2759 2760 if (aExp == 0 && aSig == 0) { 2761 return packFloatx80(aSign, 0, 0); 2762 } 2763 2764 user_rnd_mode = status->float_rounding_mode; 2765 user_rnd_prec = status->floatx80_rounding_precision; 2766 status->float_rounding_mode = float_round_nearest_even; 2767 status->floatx80_rounding_precision = 80; 2768 2769 compact = floatx80_make_compact(aExp, aSig); 2770 2771 if (compact > 0x400CB167) { 2772 /* SINHBIG */ 2773 if (compact > 0x400CB2B3) { 2774 status->float_rounding_mode = user_rnd_mode; 2775 status->floatx80_rounding_precision = user_rnd_prec; 2776 2777 return roundAndPackFloatx80(status->floatx80_rounding_precision, 2778 aSign, 0x8000, aSig, 0, status); 2779 } else { 2780 fp0 = floatx80_abs(a); /* Y = |X| */ 2781 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2782 make_float64(0x40C62D38D3D64634), status), 2783 status); /* (|X|-16381LOG2_LEAD) */ 2784 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2785 make_float64(0x3D6F90AEB1E75CC7), status), 2786 status); /* |X| - 16381 LOG2, ACCURATE */ 2787 fp0 = floatx80_etox(fp0, status); 2788 fp2 = packFloatx80(aSign, 0x7FFB, one_sig); 2789 2790 status->float_rounding_mode = user_rnd_mode; 2791 status->floatx80_rounding_precision = user_rnd_prec; 2792 2793 a = floatx80_mul(fp0, fp2, status); 2794 2795 float_raise(float_flag_inexact, status); 2796 2797 return a; 2798 } 2799 } else { /* |X| < 16380 LOG2 */ 2800 fp0 = floatx80_abs(a); /* Y = |X| */ 2801 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ 2802 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), 2803 status), status); /* 1+Z */ 2804 fp2 = fp0; 2805 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ 2806 fp0 = floatx80_add(fp0, fp2, status); 2807 2808 fact = packFloat32(aSign, 0x7E, 0); 2809 2810 status->float_rounding_mode = user_rnd_mode; 2811 status->floatx80_rounding_precision = user_rnd_prec; 2812 2813 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); 2814 2815 float_raise(float_flag_inexact, status); 2816 2817 return a; 2818 } 2819 } 2820 2821 /* 2822 * Hyperbolic cosine 2823 */ 2824 2825 floatx80 floatx80_cosh(floatx80 a, float_status *status) 2826 { 2827 int32_t aExp; 2828 uint64_t aSig; 2829 2830 int8_t user_rnd_mode, user_rnd_prec; 2831 2832 int32_t compact; 2833 floatx80 fp0, fp1; 2834 2835 aSig = extractFloatx80Frac(a); 2836 aExp = extractFloatx80Exp(a); 2837 2838 if (aExp == 0x7FFF) { 2839 if ((uint64_t) (aSig << 1)) { 2840 return propagateFloatx80NaNOneArg(a, status); 2841 } 2842 return packFloatx80(0, floatx80_infinity.high, 2843 floatx80_infinity.low); 2844 } 2845 2846 if (aExp == 0 && aSig == 0) { 2847 return packFloatx80(0, one_exp, one_sig); 2848 } 2849 2850 user_rnd_mode = status->float_rounding_mode; 2851 user_rnd_prec = status->floatx80_rounding_precision; 2852 status->float_rounding_mode = float_round_nearest_even; 2853 status->floatx80_rounding_precision = 80; 2854 2855 compact = floatx80_make_compact(aExp, aSig); 2856 2857 if (compact > 0x400CB167) { 2858 if (compact > 0x400CB2B3) { 2859 status->float_rounding_mode = user_rnd_mode; 2860 status->floatx80_rounding_precision = user_rnd_prec; 2861 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 2862 0x8000, one_sig, 0, status); 2863 } else { 2864 fp0 = packFloatx80(0, aExp, aSig); 2865 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2866 make_float64(0x40C62D38D3D64634), status), 2867 status); 2868 fp0 = floatx80_sub(fp0, float64_to_floatx80( 2869 make_float64(0x3D6F90AEB1E75CC7), status), 2870 status); 2871 fp0 = floatx80_etox(fp0, status); 2872 fp1 = packFloatx80(0, 0x7FFB, one_sig); 2873 2874 status->float_rounding_mode = user_rnd_mode; 2875 status->floatx80_rounding_precision = user_rnd_prec; 2876 2877 a = floatx80_mul(fp0, fp1, status); 2878 2879 float_raise(float_flag_inexact, status); 2880 2881 return a; 2882 } 2883 } 2884 2885 fp0 = packFloatx80(0, aExp, aSig); /* |X| */ 2886 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ 2887 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), 2888 status), status); /* (1/2)*EXP(|X|) */ 2889 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ 2890 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ 2891 2892 status->float_rounding_mode = user_rnd_mode; 2893 status->floatx80_rounding_precision = user_rnd_prec; 2894 2895 a = floatx80_add(fp0, fp1, status); 2896 2897 float_raise(float_flag_inexact, status); 2898 2899 return a; 2900 } 2901