1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 if (is_snan(a->cls) || is_snan(b->cls)) { 43 float_raise(float_flag_invalid, s); 44 } 45 46 if (s->default_nan_mode) { 47 parts_default_nan(a, s); 48 } else { 49 int cmp = frac_cmp(a, b); 50 if (cmp == 0) { 51 cmp = a->sign < b->sign; 52 } 53 54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) { 55 a = b; 56 } 57 if (is_snan(a->cls)) { 58 parts_silence_nan(a, s); 59 } 60 } 61 return a; 62} 63 64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 65 FloatPartsN *c, float_status *s, 66 int ab_mask, int abc_mask) 67{ 68 int which; 69 70 if (unlikely(abc_mask & float_cmask_snan)) { 71 float_raise(float_flag_invalid, s); 72 } 73 74 which = pickNaNMulAdd(a->cls, b->cls, c->cls, 75 ab_mask == float_cmask_infzero, s); 76 77 if (s->default_nan_mode || which == 3) { 78 /* 79 * Note that this check is after pickNaNMulAdd so that function 80 * has an opportunity to set the Invalid flag for infzero. 81 */ 82 parts_default_nan(a, s); 83 return a; 84 } 85 86 switch (which) { 87 case 0: 88 break; 89 case 1: 90 a = b; 91 break; 92 case 2: 93 a = c; 94 break; 95 default: 96 g_assert_not_reached(); 97 } 98 if (is_snan(a->cls)) { 99 parts_silence_nan(a, s); 100 } 101 return a; 102} 103 104/* 105 * Canonicalize the FloatParts structure. Determine the class, 106 * unbias the exponent, and normalize the fraction. 107 */ 108static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 109 const FloatFmt *fmt) 110{ 111 if (unlikely(p->exp == 0)) { 112 if (likely(frac_eqz(p))) { 113 p->cls = float_class_zero; 114 } else if (status->flush_inputs_to_zero) { 115 float_raise(float_flag_input_denormal, status); 116 p->cls = float_class_zero; 117 frac_clear(p); 118 } else { 119 int shift = frac_normalize(p); 120 p->cls = float_class_normal; 121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; 122 } 123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 124 p->cls = float_class_normal; 125 p->exp -= fmt->exp_bias; 126 frac_shl(p, fmt->frac_shift); 127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 128 } else if (likely(frac_eqz(p))) { 129 p->cls = float_class_inf; 130 } else { 131 frac_shl(p, fmt->frac_shift); 132 p->cls = (parts_is_snan_frac(p->frac_hi, status) 133 ? float_class_snan : float_class_qnan); 134 } 135} 136 137/* 138 * Round and uncanonicalize a floating-point number by parts. There 139 * are FRAC_SHIFT bits that may require rounding at the bottom of the 140 * fraction; these bits will be removed. The exponent will be biased 141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 142 */ 143static void partsN(uncanon)(FloatPartsN *p, float_status *s, 144 const FloatFmt *fmt) 145{ 146 const int exp_max = fmt->exp_max; 147 const int frac_shift = fmt->frac_shift; 148 const uint64_t frac_lsb = fmt->frac_lsb; 149 const uint64_t frac_lsbm1 = fmt->frac_lsbm1; 150 const uint64_t round_mask = fmt->round_mask; 151 const uint64_t roundeven_mask = fmt->roundeven_mask; 152 uint64_t inc; 153 bool overflow_norm; 154 int exp, flags = 0; 155 156 if (unlikely(p->cls != float_class_normal)) { 157 switch (p->cls) { 158 case float_class_zero: 159 p->exp = 0; 160 frac_clear(p); 161 return; 162 case float_class_inf: 163 g_assert(!fmt->arm_althp); 164 p->exp = fmt->exp_max; 165 frac_clear(p); 166 return; 167 case float_class_qnan: 168 case float_class_snan: 169 g_assert(!fmt->arm_althp); 170 p->exp = fmt->exp_max; 171 frac_shr(p, fmt->frac_shift); 172 return; 173 default: 174 break; 175 } 176 g_assert_not_reached(); 177 } 178 179 overflow_norm = false; 180 switch (s->float_rounding_mode) { 181 case float_round_nearest_even: 182 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 183 break; 184 case float_round_ties_away: 185 inc = frac_lsbm1; 186 break; 187 case float_round_to_zero: 188 overflow_norm = true; 189 inc = 0; 190 break; 191 case float_round_up: 192 inc = p->sign ? 0 : round_mask; 193 overflow_norm = p->sign; 194 break; 195 case float_round_down: 196 inc = p->sign ? round_mask : 0; 197 overflow_norm = !p->sign; 198 break; 199 case float_round_to_odd: 200 overflow_norm = true; 201 /* fall through */ 202 case float_round_to_odd_inf: 203 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 204 break; 205 default: 206 g_assert_not_reached(); 207 } 208 209 exp = p->exp + fmt->exp_bias; 210 if (likely(exp > 0)) { 211 if (p->frac_lo & round_mask) { 212 flags |= float_flag_inexact; 213 if (frac_addi(p, p, inc)) { 214 frac_shr(p, 1); 215 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 216 exp++; 217 } 218 } 219 frac_shr(p, frac_shift); 220 221 if (fmt->arm_althp) { 222 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 223 if (unlikely(exp > exp_max)) { 224 /* Overflow. Return the maximum normal. */ 225 flags = float_flag_invalid; 226 exp = exp_max; 227 frac_allones(p); 228 } 229 } else if (unlikely(exp >= exp_max)) { 230 flags |= float_flag_overflow | float_flag_inexact; 231 if (overflow_norm) { 232 exp = exp_max - 1; 233 frac_allones(p); 234 } else { 235 p->cls = float_class_inf; 236 exp = exp_max; 237 frac_clear(p); 238 } 239 } 240 } else if (s->flush_to_zero) { 241 flags |= float_flag_output_denormal; 242 p->cls = float_class_zero; 243 exp = 0; 244 frac_clear(p); 245 } else { 246 bool is_tiny = s->tininess_before_rounding || exp < 0; 247 248 if (!is_tiny) { 249 FloatPartsN discard; 250 is_tiny = !frac_addi(&discard, p, inc); 251 } 252 253 frac_shrjam(p, 1 - exp); 254 255 if (p->frac_lo & round_mask) { 256 /* Need to recompute round-to-even/round-to-odd. */ 257 switch (s->float_rounding_mode) { 258 case float_round_nearest_even: 259 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 260 ? frac_lsbm1 : 0); 261 break; 262 case float_round_to_odd: 263 case float_round_to_odd_inf: 264 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 265 break; 266 default: 267 break; 268 } 269 flags |= float_flag_inexact; 270 frac_addi(p, p, inc); 271 } 272 273 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; 274 frac_shr(p, frac_shift); 275 276 if (is_tiny && (flags & float_flag_inexact)) { 277 flags |= float_flag_underflow; 278 } 279 if (exp == 0 && frac_eqz(p)) { 280 p->cls = float_class_zero; 281 } 282 } 283 p->exp = exp; 284 float_raise(flags, s); 285} 286 287/* 288 * Returns the result of adding or subtracting the values of the 289 * floating-point values `a' and `b'. The operation is performed 290 * according to the IEC/IEEE Standard for Binary Floating-Point 291 * Arithmetic. 292 */ 293static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 294 float_status *s, bool subtract) 295{ 296 bool b_sign = b->sign ^ subtract; 297 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 298 299 if (a->sign != b_sign) { 300 /* Subtraction */ 301 if (likely(ab_mask == float_cmask_normal)) { 302 if (parts_sub_normal(a, b)) { 303 return a; 304 } 305 /* Subtract was exact, fall through to set sign. */ 306 ab_mask = float_cmask_zero; 307 } 308 309 if (ab_mask == float_cmask_zero) { 310 a->sign = s->float_rounding_mode == float_round_down; 311 return a; 312 } 313 314 if (unlikely(ab_mask & float_cmask_anynan)) { 315 goto p_nan; 316 } 317 318 if (ab_mask & float_cmask_inf) { 319 if (a->cls != float_class_inf) { 320 /* N - Inf */ 321 goto return_b; 322 } 323 if (b->cls != float_class_inf) { 324 /* Inf - N */ 325 return a; 326 } 327 /* Inf - Inf */ 328 float_raise(float_flag_invalid, s); 329 parts_default_nan(a, s); 330 return a; 331 } 332 } else { 333 /* Addition */ 334 if (likely(ab_mask == float_cmask_normal)) { 335 parts_add_normal(a, b); 336 return a; 337 } 338 339 if (ab_mask == float_cmask_zero) { 340 return a; 341 } 342 343 if (unlikely(ab_mask & float_cmask_anynan)) { 344 goto p_nan; 345 } 346 347 if (ab_mask & float_cmask_inf) { 348 a->cls = float_class_inf; 349 return a; 350 } 351 } 352 353 if (b->cls == float_class_zero) { 354 g_assert(a->cls == float_class_normal); 355 return a; 356 } 357 358 g_assert(a->cls == float_class_zero); 359 g_assert(b->cls == float_class_normal); 360 return_b: 361 b->sign = b_sign; 362 return b; 363 364 p_nan: 365 return parts_pick_nan(a, b, s); 366} 367 368/* 369 * Returns the result of multiplying the floating-point values `a' and 370 * `b'. The operation is performed according to the IEC/IEEE Standard 371 * for Binary Floating-Point Arithmetic. 372 */ 373static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 374 float_status *s) 375{ 376 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 377 bool sign = a->sign ^ b->sign; 378 379 if (likely(ab_mask == float_cmask_normal)) { 380 FloatPartsW tmp; 381 382 frac_mulw(&tmp, a, b); 383 frac_truncjam(a, &tmp); 384 385 a->exp += b->exp + 1; 386 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 387 frac_add(a, a, a); 388 a->exp -= 1; 389 } 390 391 a->sign = sign; 392 return a; 393 } 394 395 /* Inf * Zero == NaN */ 396 if (unlikely(ab_mask == float_cmask_infzero)) { 397 float_raise(float_flag_invalid, s); 398 parts_default_nan(a, s); 399 return a; 400 } 401 402 if (unlikely(ab_mask & float_cmask_anynan)) { 403 return parts_pick_nan(a, b, s); 404 } 405 406 /* Multiply by 0 or Inf */ 407 if (ab_mask & float_cmask_inf) { 408 a->cls = float_class_inf; 409 a->sign = sign; 410 return a; 411 } 412 413 g_assert(ab_mask & float_cmask_zero); 414 a->cls = float_class_zero; 415 a->sign = sign; 416 return a; 417} 418 419/* 420 * Returns the result of multiplying the floating-point values `a' and 421 * `b' then adding 'c', with no intermediate rounding step after the 422 * multiplication. The operation is performed according to the 423 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 424 * The flags argument allows the caller to select negation of the 425 * addend, the intermediate product, or the final result. (The 426 * difference between this and having the caller do a separate 427 * negation is that negating externally will flip the sign bit on NaNs.) 428 * 429 * Requires A and C extracted into a double-sized structure to provide the 430 * extra space for the widening multiply. 431 */ 432static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, 433 FloatPartsN *c, int flags, float_status *s) 434{ 435 int ab_mask, abc_mask; 436 FloatPartsW p_widen, c_widen; 437 438 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 439 abc_mask = float_cmask(c->cls) | ab_mask; 440 441 /* 442 * It is implementation-defined whether the cases of (0,inf,qnan) 443 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 444 * they return if they do), so we have to hand this information 445 * off to the target-specific pick-a-NaN routine. 446 */ 447 if (unlikely(abc_mask & float_cmask_anynan)) { 448 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 449 } 450 451 if (flags & float_muladd_negate_c) { 452 c->sign ^= 1; 453 } 454 455 /* Compute the sign of the product into A. */ 456 a->sign ^= b->sign; 457 if (flags & float_muladd_negate_product) { 458 a->sign ^= 1; 459 } 460 461 if (unlikely(ab_mask != float_cmask_normal)) { 462 if (unlikely(ab_mask == float_cmask_infzero)) { 463 goto d_nan; 464 } 465 466 if (ab_mask & float_cmask_inf) { 467 if (c->cls == float_class_inf && a->sign != c->sign) { 468 goto d_nan; 469 } 470 goto return_inf; 471 } 472 473 g_assert(ab_mask & float_cmask_zero); 474 if (c->cls == float_class_normal) { 475 *a = *c; 476 goto return_normal; 477 } 478 if (c->cls == float_class_zero) { 479 if (a->sign != c->sign) { 480 goto return_sub_zero; 481 } 482 goto return_zero; 483 } 484 g_assert(c->cls == float_class_inf); 485 } 486 487 if (unlikely(c->cls == float_class_inf)) { 488 a->sign = c->sign; 489 goto return_inf; 490 } 491 492 /* Perform the multiplication step. */ 493 p_widen.sign = a->sign; 494 p_widen.exp = a->exp + b->exp + 1; 495 frac_mulw(&p_widen, a, b); 496 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 497 frac_add(&p_widen, &p_widen, &p_widen); 498 p_widen.exp -= 1; 499 } 500 501 /* Perform the addition step. */ 502 if (c->cls != float_class_zero) { 503 /* Zero-extend C to less significant bits. */ 504 frac_widen(&c_widen, c); 505 c_widen.exp = c->exp; 506 507 if (a->sign == c->sign) { 508 parts_add_normal(&p_widen, &c_widen); 509 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 510 goto return_sub_zero; 511 } 512 } 513 514 /* Narrow with sticky bit, for proper rounding later. */ 515 frac_truncjam(a, &p_widen); 516 a->sign = p_widen.sign; 517 a->exp = p_widen.exp; 518 519 return_normal: 520 if (flags & float_muladd_halve_result) { 521 a->exp -= 1; 522 } 523 finish_sign: 524 if (flags & float_muladd_negate_result) { 525 a->sign ^= 1; 526 } 527 return a; 528 529 return_sub_zero: 530 a->sign = s->float_rounding_mode == float_round_down; 531 return_zero: 532 a->cls = float_class_zero; 533 goto finish_sign; 534 535 return_inf: 536 a->cls = float_class_inf; 537 goto finish_sign; 538 539 d_nan: 540 float_raise(float_flag_invalid, s); 541 parts_default_nan(a, s); 542 return a; 543} 544 545/* 546 * Returns the result of dividing the floating-point value `a' by the 547 * corresponding value `b'. The operation is performed according to 548 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 549 */ 550static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 551 float_status *s) 552{ 553 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 554 bool sign = a->sign ^ b->sign; 555 556 if (likely(ab_mask == float_cmask_normal)) { 557 a->sign = sign; 558 a->exp -= b->exp + frac_div(a, b); 559 return a; 560 } 561 562 /* 0/0 or Inf/Inf => NaN */ 563 if (unlikely(ab_mask == float_cmask_zero) || 564 unlikely(ab_mask == float_cmask_inf)) { 565 float_raise(float_flag_invalid, s); 566 parts_default_nan(a, s); 567 return a; 568 } 569 570 /* All the NaN cases */ 571 if (unlikely(ab_mask & float_cmask_anynan)) { 572 return parts_pick_nan(a, b, s); 573 } 574 575 a->sign = sign; 576 577 /* Inf / X */ 578 if (a->cls == float_class_inf) { 579 return a; 580 } 581 582 /* 0 / X */ 583 if (a->cls == float_class_zero) { 584 return a; 585 } 586 587 /* X / Inf */ 588 if (b->cls == float_class_inf) { 589 a->cls = float_class_zero; 590 return a; 591 } 592 593 /* X / 0 => Inf */ 594 g_assert(b->cls == float_class_zero); 595 float_raise(float_flag_divbyzero, s); 596 a->cls = float_class_inf; 597 return a; 598} 599 600/* 601 * Square Root 602 * 603 * The base algorithm is lifted from 604 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 605 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 606 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 607 * and is thus MIT licenced. 608 */ 609static void partsN(sqrt)(FloatPartsN *a, float_status *status, 610 const FloatFmt *fmt) 611{ 612 const uint32_t three32 = 3u << 30; 613 const uint64_t three64 = 3ull << 62; 614 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 615 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 616 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 617 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 618 uint64_t discard; 619 bool exp_odd; 620 size_t index; 621 622 if (unlikely(a->cls != float_class_normal)) { 623 switch (a->cls) { 624 case float_class_snan: 625 case float_class_qnan: 626 parts_return_nan(a, status); 627 return; 628 case float_class_zero: 629 return; 630 case float_class_inf: 631 if (unlikely(a->sign)) { 632 goto d_nan; 633 } 634 return; 635 default: 636 g_assert_not_reached(); 637 } 638 } 639 640 if (unlikely(a->sign)) { 641 goto d_nan; 642 } 643 644 /* 645 * Argument reduction. 646 * x = 4^e frac; with integer e, and frac in [1, 4) 647 * m = frac fixed point at bit 62, since we're in base 4. 648 * If base-2 exponent is odd, exchange that for multiply by 2, 649 * which results in no shift. 650 */ 651 exp_odd = a->exp & 1; 652 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 653 if (!exp_odd) { 654 frac_shr(a, 1); 655 } 656 657 /* 658 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 659 * 660 * Initial estimate: 661 * 7-bit lookup table (1-bit exponent and 6-bit significand). 662 * 663 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 664 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 665 * a table lookup is faster and needs one less iteration. 666 * The 7-bit table gives |e| < 0x1.fdp-9. 667 * 668 * A Newton-Raphson iteration for r is 669 * s = m*r 670 * d = s*r 671 * u = 3 - d 672 * r = r*u/2 673 * 674 * Fixed point representations: 675 * m, s, d, u, three are all 2.30; r is 0.32 676 */ 677 m64 = a->frac_hi; 678 m32 = m64 >> 32; 679 680 r32 = rsqrt_tab[index] << 16; 681 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 682 683 s32 = ((uint64_t)m32 * r32) >> 32; 684 d32 = ((uint64_t)s32 * r32) >> 32; 685 u32 = three32 - d32; 686 687 if (N == 64) { 688 /* float64 or smaller */ 689 690 r32 = ((uint64_t)r32 * u32) >> 31; 691 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 692 693 s32 = ((uint64_t)m32 * r32) >> 32; 694 d32 = ((uint64_t)s32 * r32) >> 32; 695 u32 = three32 - d32; 696 697 if (fmt->frac_size <= 23) { 698 /* float32 or smaller */ 699 700 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 701 s32 = (s32 - 1) >> 6; /* 9.23 */ 702 /* s < sqrt(m) < s + 0x1.08p-23 */ 703 704 /* compute nearest rounded result to 2.23 bits */ 705 uint32_t d0 = (m32 << 16) - s32 * s32; 706 uint32_t d1 = s32 - d0; 707 uint32_t d2 = d1 + s32 + 1; 708 s32 += d1 >> 31; 709 a->frac_hi = (uint64_t)s32 << (64 - 25); 710 711 /* increment or decrement for inexact */ 712 if (d2 != 0) { 713 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 714 } 715 goto done; 716 } 717 718 /* float64 */ 719 720 r64 = (uint64_t)r32 * u32 * 2; 721 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 722 mul64To128(m64, r64, &s64, &discard); 723 mul64To128(s64, r64, &d64, &discard); 724 u64 = three64 - d64; 725 726 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 727 s64 = (s64 - 2) >> 9; /* 12.52 */ 728 729 /* Compute nearest rounded result */ 730 uint64_t d0 = (m64 << 42) - s64 * s64; 731 uint64_t d1 = s64 - d0; 732 uint64_t d2 = d1 + s64 + 1; 733 s64 += d1 >> 63; 734 a->frac_hi = s64 << (64 - 54); 735 736 /* increment or decrement for inexact */ 737 if (d2 != 0) { 738 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 739 } 740 goto done; 741 } 742 743 r64 = (uint64_t)r32 * u32 * 2; 744 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 745 746 mul64To128(m64, r64, &s64, &discard); 747 mul64To128(s64, r64, &d64, &discard); 748 u64 = three64 - d64; 749 mul64To128(u64, r64, &r64, &discard); 750 r64 <<= 1; 751 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 752 753 mul64To128(m64, r64, &s64, &discard); 754 mul64To128(s64, r64, &d64, &discard); 755 u64 = three64 - d64; 756 mul64To128(u64, r64, &rh, &rl); 757 add128(rh, rl, rh, rl, &rh, &rl); 758 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 759 760 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 761 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 762 sub128(three64, 0, dh, dl, &uh, &ul); 763 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 764 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 765 766 sub128(sh, sl, 0, 4, &sh, &sl); 767 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 768 /* s < sqrt(m) < s + 1ulp */ 769 770 /* Compute nearest rounded result */ 771 mul64To128(sl, sl, &d0h, &d0l); 772 d0h += 2 * sh * sl; 773 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 774 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 775 add128(sh, sl, 0, 1, &d2h, &d2l); 776 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 777 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 778 shift128Left(sh, sl, 128 - 114, &sh, &sl); 779 780 /* increment or decrement for inexact */ 781 if (d2h | d2l) { 782 if ((int64_t)(d1h ^ d2h) < 0) { 783 sub128(sh, sl, 0, 1, &sh, &sl); 784 } else { 785 add128(sh, sl, 0, 1, &sh, &sl); 786 } 787 } 788 a->frac_lo = sl; 789 a->frac_hi = sh; 790 791 done: 792 /* Convert back from base 4 to base 2. */ 793 a->exp >>= 1; 794 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 795 frac_add(a, a, a); 796 } else { 797 a->exp += 1; 798 } 799 return; 800 801 d_nan: 802 float_raise(float_flag_invalid, status); 803 parts_default_nan(a, status); 804} 805 806/* 807 * Rounds the floating-point value `a' to an integer, and returns the 808 * result as a floating-point value. The operation is performed 809 * according to the IEC/IEEE Standard for Binary Floating-Point 810 * Arithmetic. 811 * 812 * parts_round_to_int_normal is an internal helper function for 813 * normal numbers only, returning true for inexact but not directly 814 * raising float_flag_inexact. 815 */ 816static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 817 int scale, int frac_size) 818{ 819 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 820 int shift_adj; 821 822 scale = MIN(MAX(scale, -0x10000), 0x10000); 823 a->exp += scale; 824 825 if (a->exp < 0) { 826 bool one; 827 828 /* All fractional */ 829 switch (rmode) { 830 case float_round_nearest_even: 831 one = false; 832 if (a->exp == -1) { 833 FloatPartsN tmp; 834 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 835 frac_add(&tmp, a, a); 836 /* Anything remaining means frac > 0.5. */ 837 one = !frac_eqz(&tmp); 838 } 839 break; 840 case float_round_ties_away: 841 one = a->exp == -1; 842 break; 843 case float_round_to_zero: 844 one = false; 845 break; 846 case float_round_up: 847 one = !a->sign; 848 break; 849 case float_round_down: 850 one = a->sign; 851 break; 852 case float_round_to_odd: 853 one = true; 854 break; 855 default: 856 g_assert_not_reached(); 857 } 858 859 frac_clear(a); 860 a->exp = 0; 861 if (one) { 862 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 863 } else { 864 a->cls = float_class_zero; 865 } 866 return true; 867 } 868 869 if (a->exp >= frac_size) { 870 /* All integral */ 871 return false; 872 } 873 874 if (N > 64 && a->exp < N - 64) { 875 /* 876 * Rounding is not in the low word -- shift lsb to bit 2, 877 * which leaves room for sticky and rounding bit. 878 */ 879 shift_adj = (N - 1) - (a->exp + 2); 880 frac_shrjam(a, shift_adj); 881 frac_lsb = 1 << 2; 882 } else { 883 shift_adj = 0; 884 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 885 } 886 887 frac_lsbm1 = frac_lsb >> 1; 888 rnd_mask = frac_lsb - 1; 889 rnd_even_mask = rnd_mask | frac_lsb; 890 891 if (!(a->frac_lo & rnd_mask)) { 892 /* Fractional bits already clear, undo the shift above. */ 893 frac_shl(a, shift_adj); 894 return false; 895 } 896 897 switch (rmode) { 898 case float_round_nearest_even: 899 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 900 break; 901 case float_round_ties_away: 902 inc = frac_lsbm1; 903 break; 904 case float_round_to_zero: 905 inc = 0; 906 break; 907 case float_round_up: 908 inc = a->sign ? 0 : rnd_mask; 909 break; 910 case float_round_down: 911 inc = a->sign ? rnd_mask : 0; 912 break; 913 case float_round_to_odd: 914 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 915 break; 916 default: 917 g_assert_not_reached(); 918 } 919 920 if (shift_adj == 0) { 921 if (frac_addi(a, a, inc)) { 922 frac_shr(a, 1); 923 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 924 a->exp++; 925 } 926 a->frac_lo &= ~rnd_mask; 927 } else { 928 frac_addi(a, a, inc); 929 a->frac_lo &= ~rnd_mask; 930 /* Be careful shifting back, not to overflow */ 931 frac_shl(a, shift_adj - 1); 932 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 933 a->exp++; 934 } else { 935 frac_add(a, a, a); 936 } 937 } 938 return true; 939} 940 941static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 942 int scale, float_status *s, 943 const FloatFmt *fmt) 944{ 945 switch (a->cls) { 946 case float_class_qnan: 947 case float_class_snan: 948 parts_return_nan(a, s); 949 break; 950 case float_class_zero: 951 case float_class_inf: 952 break; 953 case float_class_normal: 954 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 955 float_raise(float_flag_inexact, s); 956 } 957 break; 958 default: 959 g_assert_not_reached(); 960 } 961} 962 963/* 964 * Returns the result of converting the floating-point value `a' to 965 * the two's complement integer format. The conversion is performed 966 * according to the IEC/IEEE Standard for Binary Floating-Point 967 * Arithmetic---which means in particular that the conversion is 968 * rounded according to the current rounding mode. If `a' is a NaN, 969 * the largest positive integer is returned. Otherwise, if the 970 * conversion overflows, the largest integer with the same sign as `a' 971 * is returned. 972 */ 973static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 974 int scale, int64_t min, int64_t max, 975 float_status *s) 976{ 977 int flags = 0; 978 uint64_t r; 979 980 switch (p->cls) { 981 case float_class_snan: 982 case float_class_qnan: 983 flags = float_flag_invalid; 984 r = max; 985 break; 986 987 case float_class_inf: 988 flags = float_flag_invalid; 989 r = p->sign ? min : max; 990 break; 991 992 case float_class_zero: 993 return 0; 994 995 case float_class_normal: 996 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 997 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 998 flags = float_flag_inexact; 999 } 1000 1001 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1002 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1003 } else { 1004 r = UINT64_MAX; 1005 } 1006 if (p->sign) { 1007 if (r <= -(uint64_t)min) { 1008 r = -r; 1009 } else { 1010 flags = float_flag_invalid; 1011 r = min; 1012 } 1013 } else if (r > max) { 1014 flags = float_flag_invalid; 1015 r = max; 1016 } 1017 break; 1018 1019 default: 1020 g_assert_not_reached(); 1021 } 1022 1023 float_raise(flags, s); 1024 return r; 1025} 1026 1027/* 1028 * Returns the result of converting the floating-point value `a' to 1029 * the unsigned integer format. The conversion is performed according 1030 * to the IEC/IEEE Standard for Binary Floating-Point 1031 * Arithmetic---which means in particular that the conversion is 1032 * rounded according to the current rounding mode. If `a' is a NaN, 1033 * the largest unsigned integer is returned. Otherwise, if the 1034 * conversion overflows, the largest unsigned integer is returned. If 1035 * the 'a' is negative, the result is rounded and zero is returned; 1036 * values that do not round to zero will raise the inexact exception 1037 * flag. 1038 */ 1039static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1040 int scale, uint64_t max, float_status *s) 1041{ 1042 int flags = 0; 1043 uint64_t r; 1044 1045 switch (p->cls) { 1046 case float_class_snan: 1047 case float_class_qnan: 1048 flags = float_flag_invalid; 1049 r = max; 1050 break; 1051 1052 case float_class_inf: 1053 flags = float_flag_invalid; 1054 r = p->sign ? 0 : max; 1055 break; 1056 1057 case float_class_zero: 1058 return 0; 1059 1060 case float_class_normal: 1061 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1062 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1063 flags = float_flag_inexact; 1064 if (p->cls == float_class_zero) { 1065 r = 0; 1066 break; 1067 } 1068 } 1069 1070 if (p->sign) { 1071 flags = float_flag_invalid; 1072 r = 0; 1073 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1074 flags = float_flag_invalid; 1075 r = max; 1076 } else { 1077 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1078 if (r > max) { 1079 flags = float_flag_invalid; 1080 r = max; 1081 } 1082 } 1083 break; 1084 1085 default: 1086 g_assert_not_reached(); 1087 } 1088 1089 float_raise(flags, s); 1090 return r; 1091} 1092 1093/* 1094 * Integer to float conversions 1095 * 1096 * Returns the result of converting the two's complement integer `a' 1097 * to the floating-point format. The conversion is performed according 1098 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1099 */ 1100static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1101 int scale, float_status *s) 1102{ 1103 uint64_t f = a; 1104 int shift; 1105 1106 memset(p, 0, sizeof(*p)); 1107 1108 if (a == 0) { 1109 p->cls = float_class_zero; 1110 return; 1111 } 1112 1113 p->cls = float_class_normal; 1114 if (a < 0) { 1115 f = -f; 1116 p->sign = true; 1117 } 1118 shift = clz64(f); 1119 scale = MIN(MAX(scale, -0x10000), 0x10000); 1120 1121 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1122 p->frac_hi = f << shift; 1123} 1124 1125/* 1126 * Unsigned Integer to float conversions 1127 * 1128 * Returns the result of converting the unsigned integer `a' to the 1129 * floating-point format. The conversion is performed according to the 1130 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1131 */ 1132static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1133 int scale, float_status *status) 1134{ 1135 memset(p, 0, sizeof(*p)); 1136 1137 if (a == 0) { 1138 p->cls = float_class_zero; 1139 } else { 1140 int shift = clz64(a); 1141 scale = MIN(MAX(scale, -0x10000), 0x10000); 1142 p->cls = float_class_normal; 1143 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1144 p->frac_hi = a << shift; 1145 } 1146} 1147 1148/* 1149 * Float min/max. 1150 */ 1151static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1152 float_status *s, int flags) 1153{ 1154 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1155 int a_exp, b_exp, cmp; 1156 1157 if (unlikely(ab_mask & float_cmask_anynan)) { 1158 /* 1159 * For minnum/maxnum, if one operand is a QNaN, and the other 1160 * operand is numerical, then return numerical argument. 1161 */ 1162 if ((flags & minmax_isnum) 1163 && !(ab_mask & float_cmask_snan) 1164 && (ab_mask & ~float_cmask_qnan)) { 1165 return is_nan(a->cls) ? b : a; 1166 } 1167 return parts_pick_nan(a, b, s); 1168 } 1169 1170 a_exp = a->exp; 1171 b_exp = b->exp; 1172 1173 if (unlikely(ab_mask != float_cmask_normal)) { 1174 switch (a->cls) { 1175 case float_class_normal: 1176 break; 1177 case float_class_inf: 1178 a_exp = INT16_MAX; 1179 break; 1180 case float_class_zero: 1181 a_exp = INT16_MIN; 1182 break; 1183 default: 1184 g_assert_not_reached(); 1185 break; 1186 } 1187 switch (b->cls) { 1188 case float_class_normal: 1189 break; 1190 case float_class_inf: 1191 b_exp = INT16_MAX; 1192 break; 1193 case float_class_zero: 1194 b_exp = INT16_MIN; 1195 break; 1196 default: 1197 g_assert_not_reached(); 1198 break; 1199 } 1200 } 1201 1202 /* Compare magnitudes. */ 1203 cmp = a_exp - b_exp; 1204 if (cmp == 0) { 1205 cmp = frac_cmp(a, b); 1206 } 1207 1208 /* 1209 * Take the sign into account. 1210 * For ismag, only do this if the magnitudes are equal. 1211 */ 1212 if (!(flags & minmax_ismag) || cmp == 0) { 1213 if (a->sign != b->sign) { 1214 /* For differing signs, the negative operand is less. */ 1215 cmp = a->sign ? -1 : 1; 1216 } else if (a->sign) { 1217 /* For two negative operands, invert the magnitude comparison. */ 1218 cmp = -cmp; 1219 } 1220 } 1221 1222 if (flags & minmax_ismin) { 1223 cmp = -cmp; 1224 } 1225 return cmp < 0 ? b : a; 1226} 1227 1228/* 1229 * Floating point compare 1230 */ 1231static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1232 float_status *s, bool is_quiet) 1233{ 1234 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1235 int cmp; 1236 1237 if (likely(ab_mask == float_cmask_normal)) { 1238 if (a->sign != b->sign) { 1239 goto a_sign; 1240 } 1241 if (a->exp != b->exp) { 1242 cmp = a->exp < b->exp ? -1 : 1; 1243 } else { 1244 cmp = frac_cmp(a, b); 1245 } 1246 if (a->sign) { 1247 cmp = -cmp; 1248 } 1249 return cmp; 1250 } 1251 1252 if (unlikely(ab_mask & float_cmask_anynan)) { 1253 if (!is_quiet || (ab_mask & float_cmask_snan)) { 1254 float_raise(float_flag_invalid, s); 1255 } 1256 return float_relation_unordered; 1257 } 1258 1259 if (ab_mask & float_cmask_zero) { 1260 if (ab_mask == float_cmask_zero) { 1261 return float_relation_equal; 1262 } else if (a->cls == float_class_zero) { 1263 goto b_sign; 1264 } else { 1265 goto a_sign; 1266 } 1267 } 1268 1269 if (ab_mask == float_cmask_inf) { 1270 if (a->sign == b->sign) { 1271 return float_relation_equal; 1272 } 1273 } else if (b->cls == float_class_inf) { 1274 goto b_sign; 1275 } else { 1276 g_assert(a->cls == float_class_inf); 1277 } 1278 1279 a_sign: 1280 return a->sign ? float_relation_less : float_relation_greater; 1281 b_sign: 1282 return b->sign ? float_relation_greater : float_relation_less; 1283} 1284 1285/* 1286 * Multiply A by 2 raised to the power N. 1287 */ 1288static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1289{ 1290 switch (a->cls) { 1291 case float_class_snan: 1292 case float_class_qnan: 1293 parts_return_nan(a, s); 1294 break; 1295 case float_class_zero: 1296 case float_class_inf: 1297 break; 1298 case float_class_normal: 1299 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1300 break; 1301 default: 1302 g_assert_not_reached(); 1303 } 1304} 1305