1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 if (is_snan(a->cls) || is_snan(b->cls)) { 43 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 44 } 45 46 if (s->default_nan_mode) { 47 parts_default_nan(a, s); 48 } else { 49 int cmp = frac_cmp(a, b); 50 if (cmp == 0) { 51 cmp = a->sign < b->sign; 52 } 53 54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) { 55 a = b; 56 } 57 if (is_snan(a->cls)) { 58 parts_silence_nan(a, s); 59 } 60 } 61 return a; 62} 63 64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 65 FloatPartsN *c, float_status *s, 66 int ab_mask, int abc_mask) 67{ 68 int which; 69 70 if (unlikely(abc_mask & float_cmask_snan)) { 71 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 72 } 73 74 which = pickNaNMulAdd(a->cls, b->cls, c->cls, 75 ab_mask == float_cmask_infzero, s); 76 77 if (s->default_nan_mode || which == 3) { 78 /* 79 * Note that this check is after pickNaNMulAdd so that function 80 * has an opportunity to set the Invalid flag for infzero. 81 */ 82 parts_default_nan(a, s); 83 return a; 84 } 85 86 switch (which) { 87 case 0: 88 break; 89 case 1: 90 a = b; 91 break; 92 case 2: 93 a = c; 94 break; 95 default: 96 g_assert_not_reached(); 97 } 98 if (is_snan(a->cls)) { 99 parts_silence_nan(a, s); 100 } 101 return a; 102} 103 104/* 105 * Canonicalize the FloatParts structure. Determine the class, 106 * unbias the exponent, and normalize the fraction. 107 */ 108static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 109 const FloatFmt *fmt) 110{ 111 if (unlikely(p->exp == 0)) { 112 if (likely(frac_eqz(p))) { 113 p->cls = float_class_zero; 114 } else if (status->flush_inputs_to_zero) { 115 float_raise(float_flag_input_denormal, status); 116 p->cls = float_class_zero; 117 frac_clear(p); 118 } else { 119 int shift = frac_normalize(p); 120 p->cls = float_class_normal; 121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; 122 } 123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 124 p->cls = float_class_normal; 125 p->exp -= fmt->exp_bias; 126 frac_shl(p, fmt->frac_shift); 127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 128 } else if (likely(frac_eqz(p))) { 129 p->cls = float_class_inf; 130 } else { 131 frac_shl(p, fmt->frac_shift); 132 p->cls = (parts_is_snan_frac(p->frac_hi, status) 133 ? float_class_snan : float_class_qnan); 134 } 135} 136 137/* 138 * Round and uncanonicalize a floating-point number by parts. There 139 * are FRAC_SHIFT bits that may require rounding at the bottom of the 140 * fraction; these bits will be removed. The exponent will be biased 141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 142 */ 143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 144 const FloatFmt *fmt) 145{ 146 const int exp_max = fmt->exp_max; 147 const int frac_shift = fmt->frac_shift; 148 const uint64_t round_mask = fmt->round_mask; 149 const uint64_t frac_lsb = round_mask + 1; 150 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 151 const uint64_t roundeven_mask = round_mask | frac_lsb; 152 uint64_t inc; 153 bool overflow_norm = false; 154 int exp, flags = 0; 155 156 switch (s->float_rounding_mode) { 157 case float_round_nearest_even: 158 if (N > 64 && frac_lsb == 0) { 159 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 160 ? frac_lsbm1 : 0); 161 } else { 162 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 163 ? frac_lsbm1 : 0); 164 } 165 break; 166 case float_round_ties_away: 167 inc = frac_lsbm1; 168 break; 169 case float_round_to_zero: 170 overflow_norm = true; 171 inc = 0; 172 break; 173 case float_round_up: 174 inc = p->sign ? 0 : round_mask; 175 overflow_norm = p->sign; 176 break; 177 case float_round_down: 178 inc = p->sign ? round_mask : 0; 179 overflow_norm = !p->sign; 180 break; 181 case float_round_to_odd: 182 overflow_norm = true; 183 /* fall through */ 184 case float_round_to_odd_inf: 185 if (N > 64 && frac_lsb == 0) { 186 inc = p->frac_hi & 1 ? 0 : round_mask; 187 } else { 188 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 189 } 190 break; 191 default: 192 g_assert_not_reached(); 193 } 194 195 exp = p->exp + fmt->exp_bias; 196 if (likely(exp > 0)) { 197 if (p->frac_lo & round_mask) { 198 flags |= float_flag_inexact; 199 if (frac_addi(p, p, inc)) { 200 frac_shr(p, 1); 201 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 202 exp++; 203 } 204 p->frac_lo &= ~round_mask; 205 } 206 207 if (fmt->arm_althp) { 208 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 209 if (unlikely(exp > exp_max)) { 210 /* Overflow. Return the maximum normal. */ 211 flags = float_flag_invalid; 212 exp = exp_max; 213 frac_allones(p); 214 p->frac_lo &= ~round_mask; 215 } 216 } else if (unlikely(exp >= exp_max)) { 217 flags |= float_flag_overflow | float_flag_inexact; 218 if (overflow_norm) { 219 exp = exp_max - 1; 220 frac_allones(p); 221 p->frac_lo &= ~round_mask; 222 } else { 223 p->cls = float_class_inf; 224 exp = exp_max; 225 frac_clear(p); 226 } 227 } 228 frac_shr(p, frac_shift); 229 } else if (s->flush_to_zero) { 230 flags |= float_flag_output_denormal; 231 p->cls = float_class_zero; 232 exp = 0; 233 frac_clear(p); 234 } else { 235 bool is_tiny = s->tininess_before_rounding || exp < 0; 236 237 if (!is_tiny) { 238 FloatPartsN discard; 239 is_tiny = !frac_addi(&discard, p, inc); 240 } 241 242 frac_shrjam(p, 1 - exp); 243 244 if (p->frac_lo & round_mask) { 245 /* Need to recompute round-to-even/round-to-odd. */ 246 switch (s->float_rounding_mode) { 247 case float_round_nearest_even: 248 if (N > 64 && frac_lsb == 0) { 249 inc = ((p->frac_hi & 1) || 250 (p->frac_lo & round_mask) != frac_lsbm1 251 ? frac_lsbm1 : 0); 252 } else { 253 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 254 ? frac_lsbm1 : 0); 255 } 256 break; 257 case float_round_to_odd: 258 case float_round_to_odd_inf: 259 if (N > 64 && frac_lsb == 0) { 260 inc = p->frac_hi & 1 ? 0 : round_mask; 261 } else { 262 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 263 } 264 break; 265 default: 266 break; 267 } 268 flags |= float_flag_inexact; 269 frac_addi(p, p, inc); 270 p->frac_lo &= ~round_mask; 271 } 272 273 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; 274 frac_shr(p, frac_shift); 275 276 if (is_tiny && (flags & float_flag_inexact)) { 277 flags |= float_flag_underflow; 278 } 279 if (exp == 0 && frac_eqz(p)) { 280 p->cls = float_class_zero; 281 } 282 } 283 p->exp = exp; 284 float_raise(flags, s); 285} 286 287static void partsN(uncanon)(FloatPartsN *p, float_status *s, 288 const FloatFmt *fmt) 289{ 290 if (likely(p->cls == float_class_normal)) { 291 parts_uncanon_normal(p, s, fmt); 292 } else { 293 switch (p->cls) { 294 case float_class_zero: 295 p->exp = 0; 296 frac_clear(p); 297 return; 298 case float_class_inf: 299 g_assert(!fmt->arm_althp); 300 p->exp = fmt->exp_max; 301 frac_clear(p); 302 return; 303 case float_class_qnan: 304 case float_class_snan: 305 g_assert(!fmt->arm_althp); 306 p->exp = fmt->exp_max; 307 frac_shr(p, fmt->frac_shift); 308 return; 309 default: 310 break; 311 } 312 g_assert_not_reached(); 313 } 314} 315 316/* 317 * Returns the result of adding or subtracting the values of the 318 * floating-point values `a' and `b'. The operation is performed 319 * according to the IEC/IEEE Standard for Binary Floating-Point 320 * Arithmetic. 321 */ 322static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 323 float_status *s, bool subtract) 324{ 325 bool b_sign = b->sign ^ subtract; 326 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 327 328 if (a->sign != b_sign) { 329 /* Subtraction */ 330 if (likely(ab_mask == float_cmask_normal)) { 331 if (parts_sub_normal(a, b)) { 332 return a; 333 } 334 /* Subtract was exact, fall through to set sign. */ 335 ab_mask = float_cmask_zero; 336 } 337 338 if (ab_mask == float_cmask_zero) { 339 a->sign = s->float_rounding_mode == float_round_down; 340 return a; 341 } 342 343 if (unlikely(ab_mask & float_cmask_anynan)) { 344 goto p_nan; 345 } 346 347 if (ab_mask & float_cmask_inf) { 348 if (a->cls != float_class_inf) { 349 /* N - Inf */ 350 goto return_b; 351 } 352 if (b->cls != float_class_inf) { 353 /* Inf - N */ 354 return a; 355 } 356 /* Inf - Inf */ 357 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 358 parts_default_nan(a, s); 359 return a; 360 } 361 } else { 362 /* Addition */ 363 if (likely(ab_mask == float_cmask_normal)) { 364 parts_add_normal(a, b); 365 return a; 366 } 367 368 if (ab_mask == float_cmask_zero) { 369 return a; 370 } 371 372 if (unlikely(ab_mask & float_cmask_anynan)) { 373 goto p_nan; 374 } 375 376 if (ab_mask & float_cmask_inf) { 377 a->cls = float_class_inf; 378 return a; 379 } 380 } 381 382 if (b->cls == float_class_zero) { 383 g_assert(a->cls == float_class_normal); 384 return a; 385 } 386 387 g_assert(a->cls == float_class_zero); 388 g_assert(b->cls == float_class_normal); 389 return_b: 390 b->sign = b_sign; 391 return b; 392 393 p_nan: 394 return parts_pick_nan(a, b, s); 395} 396 397/* 398 * Returns the result of multiplying the floating-point values `a' and 399 * `b'. The operation is performed according to the IEC/IEEE Standard 400 * for Binary Floating-Point Arithmetic. 401 */ 402static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 403 float_status *s) 404{ 405 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 406 bool sign = a->sign ^ b->sign; 407 408 if (likely(ab_mask == float_cmask_normal)) { 409 FloatPartsW tmp; 410 411 frac_mulw(&tmp, a, b); 412 frac_truncjam(a, &tmp); 413 414 a->exp += b->exp + 1; 415 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 416 frac_add(a, a, a); 417 a->exp -= 1; 418 } 419 420 a->sign = sign; 421 return a; 422 } 423 424 /* Inf * Zero == NaN */ 425 if (unlikely(ab_mask == float_cmask_infzero)) { 426 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 427 parts_default_nan(a, s); 428 return a; 429 } 430 431 if (unlikely(ab_mask & float_cmask_anynan)) { 432 return parts_pick_nan(a, b, s); 433 } 434 435 /* Multiply by 0 or Inf */ 436 if (ab_mask & float_cmask_inf) { 437 a->cls = float_class_inf; 438 a->sign = sign; 439 return a; 440 } 441 442 g_assert(ab_mask & float_cmask_zero); 443 a->cls = float_class_zero; 444 a->sign = sign; 445 return a; 446} 447 448/* 449 * Returns the result of multiplying the floating-point values `a' and 450 * `b' then adding 'c', with no intermediate rounding step after the 451 * multiplication. The operation is performed according to the 452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 453 * The flags argument allows the caller to select negation of the 454 * addend, the intermediate product, or the final result. (The 455 * difference between this and having the caller do a separate 456 * negation is that negating externally will flip the sign bit on NaNs.) 457 * 458 * Requires A and C extracted into a double-sized structure to provide the 459 * extra space for the widening multiply. 460 */ 461static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, 462 FloatPartsN *c, int flags, float_status *s) 463{ 464 int ab_mask, abc_mask; 465 FloatPartsW p_widen, c_widen; 466 467 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 468 abc_mask = float_cmask(c->cls) | ab_mask; 469 470 /* 471 * It is implementation-defined whether the cases of (0,inf,qnan) 472 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 473 * they return if they do), so we have to hand this information 474 * off to the target-specific pick-a-NaN routine. 475 */ 476 if (unlikely(abc_mask & float_cmask_anynan)) { 477 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 478 } 479 480 if (flags & float_muladd_negate_c) { 481 c->sign ^= 1; 482 } 483 484 /* Compute the sign of the product into A. */ 485 a->sign ^= b->sign; 486 if (flags & float_muladd_negate_product) { 487 a->sign ^= 1; 488 } 489 490 if (unlikely(ab_mask != float_cmask_normal)) { 491 if (unlikely(ab_mask == float_cmask_infzero)) { 492 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 493 goto d_nan; 494 } 495 496 if (ab_mask & float_cmask_inf) { 497 if (c->cls == float_class_inf && a->sign != c->sign) { 498 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 499 goto d_nan; 500 } 501 goto return_inf; 502 } 503 504 g_assert(ab_mask & float_cmask_zero); 505 if (c->cls == float_class_normal) { 506 *a = *c; 507 goto return_normal; 508 } 509 if (c->cls == float_class_zero) { 510 if (a->sign != c->sign) { 511 goto return_sub_zero; 512 } 513 goto return_zero; 514 } 515 g_assert(c->cls == float_class_inf); 516 } 517 518 if (unlikely(c->cls == float_class_inf)) { 519 a->sign = c->sign; 520 goto return_inf; 521 } 522 523 /* Perform the multiplication step. */ 524 p_widen.sign = a->sign; 525 p_widen.exp = a->exp + b->exp + 1; 526 frac_mulw(&p_widen, a, b); 527 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 528 frac_add(&p_widen, &p_widen, &p_widen); 529 p_widen.exp -= 1; 530 } 531 532 /* Perform the addition step. */ 533 if (c->cls != float_class_zero) { 534 /* Zero-extend C to less significant bits. */ 535 frac_widen(&c_widen, c); 536 c_widen.exp = c->exp; 537 538 if (a->sign == c->sign) { 539 parts_add_normal(&p_widen, &c_widen); 540 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 541 goto return_sub_zero; 542 } 543 } 544 545 /* Narrow with sticky bit, for proper rounding later. */ 546 frac_truncjam(a, &p_widen); 547 a->sign = p_widen.sign; 548 a->exp = p_widen.exp; 549 550 return_normal: 551 if (flags & float_muladd_halve_result) { 552 a->exp -= 1; 553 } 554 finish_sign: 555 if (flags & float_muladd_negate_result) { 556 a->sign ^= 1; 557 } 558 return a; 559 560 return_sub_zero: 561 a->sign = s->float_rounding_mode == float_round_down; 562 return_zero: 563 a->cls = float_class_zero; 564 goto finish_sign; 565 566 return_inf: 567 a->cls = float_class_inf; 568 goto finish_sign; 569 570 d_nan: 571 parts_default_nan(a, s); 572 return a; 573} 574 575/* 576 * Returns the result of dividing the floating-point value `a' by the 577 * corresponding value `b'. The operation is performed according to 578 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 579 */ 580static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 581 float_status *s) 582{ 583 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 584 bool sign = a->sign ^ b->sign; 585 586 if (likely(ab_mask == float_cmask_normal)) { 587 a->sign = sign; 588 a->exp -= b->exp + frac_div(a, b); 589 return a; 590 } 591 592 /* 0/0 or Inf/Inf => NaN */ 593 if (unlikely(ab_mask == float_cmask_zero)) { 594 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 595 goto d_nan; 596 } 597 if (unlikely(ab_mask == float_cmask_inf)) { 598 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 599 goto d_nan; 600 } 601 602 /* All the NaN cases */ 603 if (unlikely(ab_mask & float_cmask_anynan)) { 604 return parts_pick_nan(a, b, s); 605 } 606 607 a->sign = sign; 608 609 /* Inf / X */ 610 if (a->cls == float_class_inf) { 611 return a; 612 } 613 614 /* 0 / X */ 615 if (a->cls == float_class_zero) { 616 return a; 617 } 618 619 /* X / Inf */ 620 if (b->cls == float_class_inf) { 621 a->cls = float_class_zero; 622 return a; 623 } 624 625 /* X / 0 => Inf */ 626 g_assert(b->cls == float_class_zero); 627 float_raise(float_flag_divbyzero, s); 628 a->cls = float_class_inf; 629 return a; 630 631 d_nan: 632 parts_default_nan(a, s); 633 return a; 634} 635 636/* 637 * Floating point remainder, per IEC/IEEE, or modulus. 638 */ 639static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 640 uint64_t *mod_quot, float_status *s) 641{ 642 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 643 644 if (likely(ab_mask == float_cmask_normal)) { 645 frac_modrem(a, b, mod_quot); 646 return a; 647 } 648 649 if (mod_quot) { 650 *mod_quot = 0; 651 } 652 653 /* All the NaN cases */ 654 if (unlikely(ab_mask & float_cmask_anynan)) { 655 return parts_pick_nan(a, b, s); 656 } 657 658 /* Inf % N; N % 0 */ 659 if (a->cls == float_class_inf || b->cls == float_class_zero) { 660 float_raise(float_flag_invalid, s); 661 parts_default_nan(a, s); 662 return a; 663 } 664 665 /* N % Inf; 0 % N */ 666 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 667 return a; 668} 669 670/* 671 * Square Root 672 * 673 * The base algorithm is lifted from 674 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 675 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 676 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 677 * and is thus MIT licenced. 678 */ 679static void partsN(sqrt)(FloatPartsN *a, float_status *status, 680 const FloatFmt *fmt) 681{ 682 const uint32_t three32 = 3u << 30; 683 const uint64_t three64 = 3ull << 62; 684 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 685 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 686 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 687 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 688 uint64_t discard; 689 bool exp_odd; 690 size_t index; 691 692 if (unlikely(a->cls != float_class_normal)) { 693 switch (a->cls) { 694 case float_class_snan: 695 case float_class_qnan: 696 parts_return_nan(a, status); 697 return; 698 case float_class_zero: 699 return; 700 case float_class_inf: 701 if (unlikely(a->sign)) { 702 goto d_nan; 703 } 704 return; 705 default: 706 g_assert_not_reached(); 707 } 708 } 709 710 if (unlikely(a->sign)) { 711 goto d_nan; 712 } 713 714 /* 715 * Argument reduction. 716 * x = 4^e frac; with integer e, and frac in [1, 4) 717 * m = frac fixed point at bit 62, since we're in base 4. 718 * If base-2 exponent is odd, exchange that for multiply by 2, 719 * which results in no shift. 720 */ 721 exp_odd = a->exp & 1; 722 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 723 if (!exp_odd) { 724 frac_shr(a, 1); 725 } 726 727 /* 728 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 729 * 730 * Initial estimate: 731 * 7-bit lookup table (1-bit exponent and 6-bit significand). 732 * 733 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 734 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 735 * a table lookup is faster and needs one less iteration. 736 * The 7-bit table gives |e| < 0x1.fdp-9. 737 * 738 * A Newton-Raphson iteration for r is 739 * s = m*r 740 * d = s*r 741 * u = 3 - d 742 * r = r*u/2 743 * 744 * Fixed point representations: 745 * m, s, d, u, three are all 2.30; r is 0.32 746 */ 747 m64 = a->frac_hi; 748 m32 = m64 >> 32; 749 750 r32 = rsqrt_tab[index] << 16; 751 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 752 753 s32 = ((uint64_t)m32 * r32) >> 32; 754 d32 = ((uint64_t)s32 * r32) >> 32; 755 u32 = three32 - d32; 756 757 if (N == 64) { 758 /* float64 or smaller */ 759 760 r32 = ((uint64_t)r32 * u32) >> 31; 761 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 762 763 s32 = ((uint64_t)m32 * r32) >> 32; 764 d32 = ((uint64_t)s32 * r32) >> 32; 765 u32 = three32 - d32; 766 767 if (fmt->frac_size <= 23) { 768 /* float32 or smaller */ 769 770 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 771 s32 = (s32 - 1) >> 6; /* 9.23 */ 772 /* s < sqrt(m) < s + 0x1.08p-23 */ 773 774 /* compute nearest rounded result to 2.23 bits */ 775 uint32_t d0 = (m32 << 16) - s32 * s32; 776 uint32_t d1 = s32 - d0; 777 uint32_t d2 = d1 + s32 + 1; 778 s32 += d1 >> 31; 779 a->frac_hi = (uint64_t)s32 << (64 - 25); 780 781 /* increment or decrement for inexact */ 782 if (d2 != 0) { 783 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 784 } 785 goto done; 786 } 787 788 /* float64 */ 789 790 r64 = (uint64_t)r32 * u32 * 2; 791 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 792 mul64To128(m64, r64, &s64, &discard); 793 mul64To128(s64, r64, &d64, &discard); 794 u64 = three64 - d64; 795 796 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 797 s64 = (s64 - 2) >> 9; /* 12.52 */ 798 799 /* Compute nearest rounded result */ 800 uint64_t d0 = (m64 << 42) - s64 * s64; 801 uint64_t d1 = s64 - d0; 802 uint64_t d2 = d1 + s64 + 1; 803 s64 += d1 >> 63; 804 a->frac_hi = s64 << (64 - 54); 805 806 /* increment or decrement for inexact */ 807 if (d2 != 0) { 808 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 809 } 810 goto done; 811 } 812 813 r64 = (uint64_t)r32 * u32 * 2; 814 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 815 816 mul64To128(m64, r64, &s64, &discard); 817 mul64To128(s64, r64, &d64, &discard); 818 u64 = three64 - d64; 819 mul64To128(u64, r64, &r64, &discard); 820 r64 <<= 1; 821 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 822 823 mul64To128(m64, r64, &s64, &discard); 824 mul64To128(s64, r64, &d64, &discard); 825 u64 = three64 - d64; 826 mul64To128(u64, r64, &rh, &rl); 827 add128(rh, rl, rh, rl, &rh, &rl); 828 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 829 830 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 831 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 832 sub128(three64, 0, dh, dl, &uh, &ul); 833 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 834 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 835 836 sub128(sh, sl, 0, 4, &sh, &sl); 837 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 838 /* s < sqrt(m) < s + 1ulp */ 839 840 /* Compute nearest rounded result */ 841 mul64To128(sl, sl, &d0h, &d0l); 842 d0h += 2 * sh * sl; 843 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 844 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 845 add128(sh, sl, 0, 1, &d2h, &d2l); 846 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 847 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 848 shift128Left(sh, sl, 128 - 114, &sh, &sl); 849 850 /* increment or decrement for inexact */ 851 if (d2h | d2l) { 852 if ((int64_t)(d1h ^ d2h) < 0) { 853 sub128(sh, sl, 0, 1, &sh, &sl); 854 } else { 855 add128(sh, sl, 0, 1, &sh, &sl); 856 } 857 } 858 a->frac_lo = sl; 859 a->frac_hi = sh; 860 861 done: 862 /* Convert back from base 4 to base 2. */ 863 a->exp >>= 1; 864 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 865 frac_add(a, a, a); 866 } else { 867 a->exp += 1; 868 } 869 return; 870 871 d_nan: 872 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 873 parts_default_nan(a, status); 874} 875 876/* 877 * Rounds the floating-point value `a' to an integer, and returns the 878 * result as a floating-point value. The operation is performed 879 * according to the IEC/IEEE Standard for Binary Floating-Point 880 * Arithmetic. 881 * 882 * parts_round_to_int_normal is an internal helper function for 883 * normal numbers only, returning true for inexact but not directly 884 * raising float_flag_inexact. 885 */ 886static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 887 int scale, int frac_size) 888{ 889 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 890 int shift_adj; 891 892 scale = MIN(MAX(scale, -0x10000), 0x10000); 893 a->exp += scale; 894 895 if (a->exp < 0) { 896 bool one; 897 898 /* All fractional */ 899 switch (rmode) { 900 case float_round_nearest_even: 901 one = false; 902 if (a->exp == -1) { 903 FloatPartsN tmp; 904 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 905 frac_add(&tmp, a, a); 906 /* Anything remaining means frac > 0.5. */ 907 one = !frac_eqz(&tmp); 908 } 909 break; 910 case float_round_ties_away: 911 one = a->exp == -1; 912 break; 913 case float_round_to_zero: 914 one = false; 915 break; 916 case float_round_up: 917 one = !a->sign; 918 break; 919 case float_round_down: 920 one = a->sign; 921 break; 922 case float_round_to_odd: 923 one = true; 924 break; 925 default: 926 g_assert_not_reached(); 927 } 928 929 frac_clear(a); 930 a->exp = 0; 931 if (one) { 932 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 933 } else { 934 a->cls = float_class_zero; 935 } 936 return true; 937 } 938 939 if (a->exp >= frac_size) { 940 /* All integral */ 941 return false; 942 } 943 944 if (N > 64 && a->exp < N - 64) { 945 /* 946 * Rounding is not in the low word -- shift lsb to bit 2, 947 * which leaves room for sticky and rounding bit. 948 */ 949 shift_adj = (N - 1) - (a->exp + 2); 950 frac_shrjam(a, shift_adj); 951 frac_lsb = 1 << 2; 952 } else { 953 shift_adj = 0; 954 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 955 } 956 957 frac_lsbm1 = frac_lsb >> 1; 958 rnd_mask = frac_lsb - 1; 959 rnd_even_mask = rnd_mask | frac_lsb; 960 961 if (!(a->frac_lo & rnd_mask)) { 962 /* Fractional bits already clear, undo the shift above. */ 963 frac_shl(a, shift_adj); 964 return false; 965 } 966 967 switch (rmode) { 968 case float_round_nearest_even: 969 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 970 break; 971 case float_round_ties_away: 972 inc = frac_lsbm1; 973 break; 974 case float_round_to_zero: 975 inc = 0; 976 break; 977 case float_round_up: 978 inc = a->sign ? 0 : rnd_mask; 979 break; 980 case float_round_down: 981 inc = a->sign ? rnd_mask : 0; 982 break; 983 case float_round_to_odd: 984 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 985 break; 986 default: 987 g_assert_not_reached(); 988 } 989 990 if (shift_adj == 0) { 991 if (frac_addi(a, a, inc)) { 992 frac_shr(a, 1); 993 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 994 a->exp++; 995 } 996 a->frac_lo &= ~rnd_mask; 997 } else { 998 frac_addi(a, a, inc); 999 a->frac_lo &= ~rnd_mask; 1000 /* Be careful shifting back, not to overflow */ 1001 frac_shl(a, shift_adj - 1); 1002 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1003 a->exp++; 1004 } else { 1005 frac_add(a, a, a); 1006 } 1007 } 1008 return true; 1009} 1010 1011static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1012 int scale, float_status *s, 1013 const FloatFmt *fmt) 1014{ 1015 switch (a->cls) { 1016 case float_class_qnan: 1017 case float_class_snan: 1018 parts_return_nan(a, s); 1019 break; 1020 case float_class_zero: 1021 case float_class_inf: 1022 break; 1023 case float_class_normal: 1024 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1025 float_raise(float_flag_inexact, s); 1026 } 1027 break; 1028 default: 1029 g_assert_not_reached(); 1030 } 1031} 1032 1033/* 1034 * Returns the result of converting the floating-point value `a' to 1035 * the two's complement integer format. The conversion is performed 1036 * according to the IEC/IEEE Standard for Binary Floating-Point 1037 * Arithmetic---which means in particular that the conversion is 1038 * rounded according to the current rounding mode. If `a' is a NaN, 1039 * the largest positive integer is returned. Otherwise, if the 1040 * conversion overflows, the largest integer with the same sign as `a' 1041 * is returned. 1042 */ 1043static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1044 int scale, int64_t min, int64_t max, 1045 float_status *s) 1046{ 1047 int flags = 0; 1048 uint64_t r; 1049 1050 switch (p->cls) { 1051 case float_class_snan: 1052 flags |= float_flag_invalid_snan; 1053 /* fall through */ 1054 case float_class_qnan: 1055 flags |= float_flag_invalid; 1056 r = max; 1057 break; 1058 1059 case float_class_inf: 1060 flags = float_flag_invalid | float_flag_invalid_cvti; 1061 r = p->sign ? min : max; 1062 break; 1063 1064 case float_class_zero: 1065 return 0; 1066 1067 case float_class_normal: 1068 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1069 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1070 flags = float_flag_inexact; 1071 } 1072 1073 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1074 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1075 } else { 1076 r = UINT64_MAX; 1077 } 1078 if (p->sign) { 1079 if (r <= -(uint64_t)min) { 1080 r = -r; 1081 } else { 1082 flags = float_flag_invalid | float_flag_invalid_cvti; 1083 r = min; 1084 } 1085 } else if (r > max) { 1086 flags = float_flag_invalid | float_flag_invalid_cvti; 1087 r = max; 1088 } 1089 break; 1090 1091 default: 1092 g_assert_not_reached(); 1093 } 1094 1095 float_raise(flags, s); 1096 return r; 1097} 1098 1099/* 1100 * Returns the result of converting the floating-point value `a' to 1101 * the unsigned integer format. The conversion is performed according 1102 * to the IEC/IEEE Standard for Binary Floating-Point 1103 * Arithmetic---which means in particular that the conversion is 1104 * rounded according to the current rounding mode. If `a' is a NaN, 1105 * the largest unsigned integer is returned. Otherwise, if the 1106 * conversion overflows, the largest unsigned integer is returned. If 1107 * the 'a' is negative, the result is rounded and zero is returned; 1108 * values that do not round to zero will raise the inexact exception 1109 * flag. 1110 */ 1111static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1112 int scale, uint64_t max, float_status *s) 1113{ 1114 int flags = 0; 1115 uint64_t r; 1116 1117 switch (p->cls) { 1118 case float_class_snan: 1119 flags |= float_flag_invalid_snan; 1120 /* fall through */ 1121 case float_class_qnan: 1122 flags |= float_flag_invalid; 1123 r = max; 1124 break; 1125 1126 case float_class_inf: 1127 flags = float_flag_invalid | float_flag_invalid_cvti; 1128 r = p->sign ? 0 : max; 1129 break; 1130 1131 case float_class_zero: 1132 return 0; 1133 1134 case float_class_normal: 1135 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1136 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1137 flags = float_flag_inexact; 1138 if (p->cls == float_class_zero) { 1139 r = 0; 1140 break; 1141 } 1142 } 1143 1144 if (p->sign) { 1145 flags = float_flag_invalid | float_flag_invalid_cvti; 1146 r = 0; 1147 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1148 flags = float_flag_invalid | float_flag_invalid_cvti; 1149 r = max; 1150 } else { 1151 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1152 if (r > max) { 1153 flags = float_flag_invalid | float_flag_invalid_cvti; 1154 r = max; 1155 } 1156 } 1157 break; 1158 1159 default: 1160 g_assert_not_reached(); 1161 } 1162 1163 float_raise(flags, s); 1164 return r; 1165} 1166 1167/* 1168 * Integer to float conversions 1169 * 1170 * Returns the result of converting the two's complement integer `a' 1171 * to the floating-point format. The conversion is performed according 1172 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1173 */ 1174static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1175 int scale, float_status *s) 1176{ 1177 uint64_t f = a; 1178 int shift; 1179 1180 memset(p, 0, sizeof(*p)); 1181 1182 if (a == 0) { 1183 p->cls = float_class_zero; 1184 return; 1185 } 1186 1187 p->cls = float_class_normal; 1188 if (a < 0) { 1189 f = -f; 1190 p->sign = true; 1191 } 1192 shift = clz64(f); 1193 scale = MIN(MAX(scale, -0x10000), 0x10000); 1194 1195 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1196 p->frac_hi = f << shift; 1197} 1198 1199/* 1200 * Unsigned Integer to float conversions 1201 * 1202 * Returns the result of converting the unsigned integer `a' to the 1203 * floating-point format. The conversion is performed according to the 1204 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1205 */ 1206static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1207 int scale, float_status *status) 1208{ 1209 memset(p, 0, sizeof(*p)); 1210 1211 if (a == 0) { 1212 p->cls = float_class_zero; 1213 } else { 1214 int shift = clz64(a); 1215 scale = MIN(MAX(scale, -0x10000), 0x10000); 1216 p->cls = float_class_normal; 1217 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1218 p->frac_hi = a << shift; 1219 } 1220} 1221 1222/* 1223 * Float min/max. 1224 */ 1225static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1226 float_status *s, int flags) 1227{ 1228 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1229 int a_exp, b_exp, cmp; 1230 1231 if (unlikely(ab_mask & float_cmask_anynan)) { 1232 /* 1233 * For minNum/maxNum (IEEE 754-2008) 1234 * or minimumNumber/maximumNumber (IEEE 754-2019), 1235 * if one operand is a QNaN, and the other 1236 * operand is numerical, then return numerical argument. 1237 */ 1238 if ((flags & (minmax_isnum | minmax_isnumber)) 1239 && !(ab_mask & float_cmask_snan) 1240 && (ab_mask & ~float_cmask_qnan)) { 1241 return is_nan(a->cls) ? b : a; 1242 } 1243 1244 /* 1245 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1246 * are removed and replaced with minimum, minimumNumber, maximum 1247 * and maximumNumber. 1248 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1249 * If both operands are NaNs, a QNaN is returned. 1250 * If either operand is a SNaN, 1251 * an invalid operation exception is signaled, 1252 * but unless both operands are NaNs, 1253 * the SNaN is otherwise ignored and not converted to a QNaN. 1254 */ 1255 if ((flags & minmax_isnumber) 1256 && (ab_mask & float_cmask_snan) 1257 && (ab_mask & ~float_cmask_anynan)) { 1258 float_raise(float_flag_invalid, s); 1259 return is_nan(a->cls) ? b : a; 1260 } 1261 1262 return parts_pick_nan(a, b, s); 1263 } 1264 1265 a_exp = a->exp; 1266 b_exp = b->exp; 1267 1268 if (unlikely(ab_mask != float_cmask_normal)) { 1269 switch (a->cls) { 1270 case float_class_normal: 1271 break; 1272 case float_class_inf: 1273 a_exp = INT16_MAX; 1274 break; 1275 case float_class_zero: 1276 a_exp = INT16_MIN; 1277 break; 1278 default: 1279 g_assert_not_reached(); 1280 break; 1281 } 1282 switch (b->cls) { 1283 case float_class_normal: 1284 break; 1285 case float_class_inf: 1286 b_exp = INT16_MAX; 1287 break; 1288 case float_class_zero: 1289 b_exp = INT16_MIN; 1290 break; 1291 default: 1292 g_assert_not_reached(); 1293 break; 1294 } 1295 } 1296 1297 /* Compare magnitudes. */ 1298 cmp = a_exp - b_exp; 1299 if (cmp == 0) { 1300 cmp = frac_cmp(a, b); 1301 } 1302 1303 /* 1304 * Take the sign into account. 1305 * For ismag, only do this if the magnitudes are equal. 1306 */ 1307 if (!(flags & minmax_ismag) || cmp == 0) { 1308 if (a->sign != b->sign) { 1309 /* For differing signs, the negative operand is less. */ 1310 cmp = a->sign ? -1 : 1; 1311 } else if (a->sign) { 1312 /* For two negative operands, invert the magnitude comparison. */ 1313 cmp = -cmp; 1314 } 1315 } 1316 1317 if (flags & minmax_ismin) { 1318 cmp = -cmp; 1319 } 1320 return cmp < 0 ? b : a; 1321} 1322 1323/* 1324 * Floating point compare 1325 */ 1326static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1327 float_status *s, bool is_quiet) 1328{ 1329 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1330 int cmp; 1331 1332 if (likely(ab_mask == float_cmask_normal)) { 1333 if (a->sign != b->sign) { 1334 goto a_sign; 1335 } 1336 if (a->exp != b->exp) { 1337 cmp = a->exp < b->exp ? -1 : 1; 1338 } else { 1339 cmp = frac_cmp(a, b); 1340 } 1341 if (a->sign) { 1342 cmp = -cmp; 1343 } 1344 return cmp; 1345 } 1346 1347 if (unlikely(ab_mask & float_cmask_anynan)) { 1348 if (ab_mask & float_cmask_snan) { 1349 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1350 } else if (!is_quiet) { 1351 float_raise(float_flag_invalid, s); 1352 } 1353 return float_relation_unordered; 1354 } 1355 1356 if (ab_mask & float_cmask_zero) { 1357 if (ab_mask == float_cmask_zero) { 1358 return float_relation_equal; 1359 } else if (a->cls == float_class_zero) { 1360 goto b_sign; 1361 } else { 1362 goto a_sign; 1363 } 1364 } 1365 1366 if (ab_mask == float_cmask_inf) { 1367 if (a->sign == b->sign) { 1368 return float_relation_equal; 1369 } 1370 } else if (b->cls == float_class_inf) { 1371 goto b_sign; 1372 } else { 1373 g_assert(a->cls == float_class_inf); 1374 } 1375 1376 a_sign: 1377 return a->sign ? float_relation_less : float_relation_greater; 1378 b_sign: 1379 return b->sign ? float_relation_greater : float_relation_less; 1380} 1381 1382/* 1383 * Multiply A by 2 raised to the power N. 1384 */ 1385static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1386{ 1387 switch (a->cls) { 1388 case float_class_snan: 1389 case float_class_qnan: 1390 parts_return_nan(a, s); 1391 break; 1392 case float_class_zero: 1393 case float_class_inf: 1394 break; 1395 case float_class_normal: 1396 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1397 break; 1398 default: 1399 g_assert_not_reached(); 1400 } 1401} 1402 1403/* 1404 * Return log2(A) 1405 */ 1406static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1407{ 1408 uint64_t a0, a1, r, t, ign; 1409 FloatPartsN f; 1410 int i, n, a_exp, f_exp; 1411 1412 if (unlikely(a->cls != float_class_normal)) { 1413 switch (a->cls) { 1414 case float_class_snan: 1415 case float_class_qnan: 1416 parts_return_nan(a, s); 1417 return; 1418 case float_class_zero: 1419 /* log2(0) = -inf */ 1420 a->cls = float_class_inf; 1421 a->sign = 1; 1422 return; 1423 case float_class_inf: 1424 if (unlikely(a->sign)) { 1425 goto d_nan; 1426 } 1427 return; 1428 default: 1429 break; 1430 } 1431 g_assert_not_reached(); 1432 } 1433 if (unlikely(a->sign)) { 1434 goto d_nan; 1435 } 1436 1437 /* TODO: This algorithm looses bits too quickly for float128. */ 1438 g_assert(N == 64); 1439 1440 a_exp = a->exp; 1441 f_exp = -1; 1442 1443 r = 0; 1444 t = DECOMPOSED_IMPLICIT_BIT; 1445 a0 = a->frac_hi; 1446 a1 = 0; 1447 1448 n = fmt->frac_size + 2; 1449 if (unlikely(a_exp == -1)) { 1450 /* 1451 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1452 * When the value is very close to 1.0, there are lots of 1's in 1453 * the msb parts of the fraction. At the end, when we subtract 1454 * this value from -1.0, we can see a catastrophic loss of precision, 1455 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1456 * bits of y in the final result. To minimize this, compute as many 1457 * digits as we can. 1458 * ??? This case needs another algorithm to avoid this. 1459 */ 1460 n = fmt->frac_size * 2 + 2; 1461 /* Don't compute a value overlapping the sticky bit */ 1462 n = MIN(n, 62); 1463 } 1464 1465 for (i = 0; i < n; i++) { 1466 if (a1) { 1467 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1468 } else if (a0 & 0xffffffffull) { 1469 mul64To128(a0, a0, &a0, &a1); 1470 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1471 a0 >>= 32; 1472 a0 *= a0; 1473 } else { 1474 goto exact; 1475 } 1476 1477 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1478 if (unlikely(a_exp == 0 && r == 0)) { 1479 /* 1480 * When a_exp == 0, we're computing the log2 of a value 1481 * [1.0,2.0). When the value is very close to 1.0, there 1482 * are lots of 0's in the msb parts of the fraction. 1483 * We need to compute more digits to produce a correct 1484 * result -- restart at the top of the fraction. 1485 * ??? This is likely to lose precision quickly, as for 1486 * float128; we may need another method. 1487 */ 1488 f_exp -= i; 1489 t = r = DECOMPOSED_IMPLICIT_BIT; 1490 i = 0; 1491 } else { 1492 r |= t; 1493 } 1494 } else { 1495 add128(a0, a1, a0, a1, &a0, &a1); 1496 } 1497 t >>= 1; 1498 } 1499 1500 /* Set sticky for inexact. */ 1501 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1502 1503 exact: 1504 parts_sint_to_float(a, a_exp, 0, s); 1505 if (r == 0) { 1506 return; 1507 } 1508 1509 memset(&f, 0, sizeof(f)); 1510 f.cls = float_class_normal; 1511 f.frac_hi = r; 1512 f.exp = f_exp - frac_normalize(&f); 1513 1514 if (a_exp < 0) { 1515 parts_sub_normal(a, &f); 1516 } else if (a_exp > 0) { 1517 parts_add_normal(a, &f); 1518 } else { 1519 *a = f; 1520 } 1521 return; 1522 1523 d_nan: 1524 float_raise(float_flag_invalid, s); 1525 parts_default_nan(a, s); 1526} 1527