1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 bool have_snan = false; 43 FloatPartsN *ret; 44 int cmp; 45 46 if (is_snan(a->cls) || is_snan(b->cls)) { 47 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 48 have_snan = true; 49 } 50 51 if (s->default_nan_mode) { 52 parts_default_nan(a, s); 53 return a; 54 } 55 56 switch (s->float_2nan_prop_rule) { 57 case float_2nan_prop_s_ab: 58 if (have_snan) { 59 ret = is_snan(a->cls) ? a : b; 60 break; 61 } 62 /* fall through */ 63 case float_2nan_prop_ab: 64 ret = is_nan(a->cls) ? a : b; 65 break; 66 case float_2nan_prop_s_ba: 67 if (have_snan) { 68 ret = is_snan(b->cls) ? b : a; 69 break; 70 } 71 /* fall through */ 72 case float_2nan_prop_ba: 73 ret = is_nan(b->cls) ? b : a; 74 break; 75 case float_2nan_prop_x87: 76 /* 77 * This implements x87 NaN propagation rules: 78 * SNaN + QNaN => return the QNaN 79 * two SNaNs => return the one with the larger significand, silenced 80 * two QNaNs => return the one with the larger significand 81 * SNaN and a non-NaN => return the SNaN, silenced 82 * QNaN and a non-NaN => return the QNaN 83 * 84 * If we get down to comparing significands and they are the same, 85 * return the NaN with the positive sign bit (if any). 86 */ 87 if (is_snan(a->cls)) { 88 if (!is_snan(b->cls)) { 89 ret = is_qnan(b->cls) ? b : a; 90 break; 91 } 92 } else if (is_qnan(a->cls)) { 93 if (is_snan(b->cls) || !is_qnan(b->cls)) { 94 ret = a; 95 break; 96 } 97 } else { 98 ret = b; 99 break; 100 } 101 cmp = frac_cmp(a, b); 102 if (cmp == 0) { 103 cmp = a->sign < b->sign; 104 } 105 ret = cmp > 0 ? a : b; 106 break; 107 default: 108 g_assert_not_reached(); 109 } 110 111 if (is_snan(ret->cls)) { 112 parts_silence_nan(ret, s); 113 } 114 return ret; 115} 116 117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 118 FloatPartsN *c, float_status *s, 119 int ab_mask, int abc_mask) 120{ 121 bool infzero = (ab_mask == float_cmask_infzero); 122 bool have_snan = (abc_mask & float_cmask_snan); 123 FloatPartsN *ret; 124 125 if (unlikely(have_snan)) { 126 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 127 } 128 129 if (infzero) { 130 /* This is (0 * inf) + NaN or (inf * 0) + NaN */ 131 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 132 } 133 134 if (s->default_nan_mode) { 135 /* 136 * We guarantee not to require the target to tell us how to 137 * pick a NaN if we're always returning the default NaN. 138 * But if we're not in default-NaN mode then the target must 139 * specify. 140 */ 141 goto default_nan; 142 } else if (infzero) { 143 /* 144 * Inf * 0 + NaN -- some implementations return the 145 * default NaN here, and some return the input NaN. 146 */ 147 switch (s->float_infzeronan_rule) { 148 case float_infzeronan_dnan_never: 149 break; 150 case float_infzeronan_dnan_always: 151 goto default_nan; 152 case float_infzeronan_dnan_if_qnan: 153 if (is_qnan(c->cls)) { 154 goto default_nan; 155 } 156 break; 157 default: 158 g_assert_not_reached(); 159 } 160 ret = c; 161 } else { 162 FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c }; 163 Float3NaNPropRule rule = s->float_3nan_prop_rule; 164 165 assert(rule != float_3nan_prop_none); 166 if (have_snan && (rule & R_3NAN_SNAN_MASK)) { 167 /* We have at least one SNaN input and should prefer it */ 168 do { 169 ret = val[rule & R_3NAN_1ST_MASK]; 170 rule >>= R_3NAN_1ST_LENGTH; 171 } while (!is_snan(ret->cls)); 172 } else { 173 do { 174 ret = val[rule & R_3NAN_1ST_MASK]; 175 rule >>= R_3NAN_1ST_LENGTH; 176 } while (!is_nan(ret->cls)); 177 } 178 } 179 180 if (is_snan(ret->cls)) { 181 parts_silence_nan(ret, s); 182 } 183 return ret; 184 185 default_nan: 186 parts_default_nan(a, s); 187 return a; 188} 189 190/* 191 * Canonicalize the FloatParts structure. Determine the class, 192 * unbias the exponent, and normalize the fraction. 193 */ 194static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 195 const FloatFmt *fmt) 196{ 197 if (unlikely(p->exp == 0)) { 198 if (likely(frac_eqz(p))) { 199 p->cls = float_class_zero; 200 } else if (status->flush_inputs_to_zero) { 201 float_raise(float_flag_input_denormal, status); 202 p->cls = float_class_zero; 203 frac_clear(p); 204 } else { 205 int shift = frac_normalize(p); 206 p->cls = float_class_normal; 207 p->exp = fmt->frac_shift - fmt->exp_bias 208 - shift + !fmt->m68k_denormal; 209 } 210 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 211 p->cls = float_class_normal; 212 p->exp -= fmt->exp_bias; 213 frac_shl(p, fmt->frac_shift); 214 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 215 } else if (likely(frac_eqz(p))) { 216 p->cls = float_class_inf; 217 } else { 218 frac_shl(p, fmt->frac_shift); 219 p->cls = (parts_is_snan_frac(p->frac_hi, status) 220 ? float_class_snan : float_class_qnan); 221 } 222} 223 224/* 225 * Round and uncanonicalize a floating-point number by parts. There 226 * are FRAC_SHIFT bits that may require rounding at the bottom of the 227 * fraction; these bits will be removed. The exponent will be biased 228 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 229 */ 230static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 231 const FloatFmt *fmt) 232{ 233 const int exp_max = fmt->exp_max; 234 const int frac_shift = fmt->frac_shift; 235 const uint64_t round_mask = fmt->round_mask; 236 const uint64_t frac_lsb = round_mask + 1; 237 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 238 const uint64_t roundeven_mask = round_mask | frac_lsb; 239 uint64_t inc; 240 bool overflow_norm = false; 241 int exp, flags = 0; 242 243 switch (s->float_rounding_mode) { 244 case float_round_nearest_even_max: 245 overflow_norm = true; 246 /* fall through */ 247 case float_round_nearest_even: 248 if (N > 64 && frac_lsb == 0) { 249 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 250 ? frac_lsbm1 : 0); 251 } else { 252 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 253 ? frac_lsbm1 : 0); 254 } 255 break; 256 case float_round_ties_away: 257 inc = frac_lsbm1; 258 break; 259 case float_round_to_zero: 260 overflow_norm = true; 261 inc = 0; 262 break; 263 case float_round_up: 264 inc = p->sign ? 0 : round_mask; 265 overflow_norm = p->sign; 266 break; 267 case float_round_down: 268 inc = p->sign ? round_mask : 0; 269 overflow_norm = !p->sign; 270 break; 271 case float_round_to_odd: 272 overflow_norm = true; 273 /* fall through */ 274 case float_round_to_odd_inf: 275 if (N > 64 && frac_lsb == 0) { 276 inc = p->frac_hi & 1 ? 0 : round_mask; 277 } else { 278 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 279 } 280 break; 281 default: 282 g_assert_not_reached(); 283 } 284 285 exp = p->exp + fmt->exp_bias; 286 if (likely(exp > 0)) { 287 if (p->frac_lo & round_mask) { 288 flags |= float_flag_inexact; 289 if (frac_addi(p, p, inc)) { 290 frac_shr(p, 1); 291 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 292 exp++; 293 } 294 p->frac_lo &= ~round_mask; 295 } 296 297 if (fmt->arm_althp) { 298 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 299 if (unlikely(exp > exp_max)) { 300 /* Overflow. Return the maximum normal. */ 301 flags = float_flag_invalid; 302 exp = exp_max; 303 frac_allones(p); 304 p->frac_lo &= ~round_mask; 305 } 306 } else if (unlikely(exp >= exp_max)) { 307 flags |= float_flag_overflow; 308 if (s->rebias_overflow) { 309 exp -= fmt->exp_re_bias; 310 } else if (overflow_norm) { 311 flags |= float_flag_inexact; 312 exp = exp_max - 1; 313 frac_allones(p); 314 p->frac_lo &= ~round_mask; 315 } else { 316 flags |= float_flag_inexact; 317 p->cls = float_class_inf; 318 exp = exp_max; 319 frac_clear(p); 320 } 321 } 322 frac_shr(p, frac_shift); 323 } else if (unlikely(s->rebias_underflow)) { 324 flags |= float_flag_underflow; 325 exp += fmt->exp_re_bias; 326 if (p->frac_lo & round_mask) { 327 flags |= float_flag_inexact; 328 if (frac_addi(p, p, inc)) { 329 frac_shr(p, 1); 330 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 331 exp++; 332 } 333 p->frac_lo &= ~round_mask; 334 } 335 frac_shr(p, frac_shift); 336 } else if (s->flush_to_zero) { 337 flags |= float_flag_output_denormal; 338 p->cls = float_class_zero; 339 exp = 0; 340 frac_clear(p); 341 } else { 342 bool is_tiny = s->tininess_before_rounding || exp < 0; 343 344 if (!is_tiny) { 345 FloatPartsN discard; 346 is_tiny = !frac_addi(&discard, p, inc); 347 } 348 349 frac_shrjam(p, !fmt->m68k_denormal - exp); 350 351 if (p->frac_lo & round_mask) { 352 /* Need to recompute round-to-even/round-to-odd. */ 353 switch (s->float_rounding_mode) { 354 case float_round_nearest_even: 355 if (N > 64 && frac_lsb == 0) { 356 inc = ((p->frac_hi & 1) || 357 (p->frac_lo & round_mask) != frac_lsbm1 358 ? frac_lsbm1 : 0); 359 } else { 360 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 361 ? frac_lsbm1 : 0); 362 } 363 break; 364 case float_round_to_odd: 365 case float_round_to_odd_inf: 366 if (N > 64 && frac_lsb == 0) { 367 inc = p->frac_hi & 1 ? 0 : round_mask; 368 } else { 369 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 370 } 371 break; 372 default: 373 break; 374 } 375 flags |= float_flag_inexact; 376 frac_addi(p, p, inc); 377 p->frac_lo &= ~round_mask; 378 } 379 380 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; 381 frac_shr(p, frac_shift); 382 383 if (is_tiny && (flags & float_flag_inexact)) { 384 flags |= float_flag_underflow; 385 } 386 if (exp == 0 && frac_eqz(p)) { 387 p->cls = float_class_zero; 388 } 389 } 390 p->exp = exp; 391 float_raise(flags, s); 392} 393 394static void partsN(uncanon)(FloatPartsN *p, float_status *s, 395 const FloatFmt *fmt) 396{ 397 if (likely(p->cls == float_class_normal)) { 398 parts_uncanon_normal(p, s, fmt); 399 } else { 400 switch (p->cls) { 401 case float_class_zero: 402 p->exp = 0; 403 frac_clear(p); 404 return; 405 case float_class_inf: 406 g_assert(!fmt->arm_althp); 407 p->exp = fmt->exp_max; 408 frac_clear(p); 409 return; 410 case float_class_qnan: 411 case float_class_snan: 412 g_assert(!fmt->arm_althp); 413 p->exp = fmt->exp_max; 414 frac_shr(p, fmt->frac_shift); 415 return; 416 default: 417 break; 418 } 419 g_assert_not_reached(); 420 } 421} 422 423/* 424 * Returns the result of adding or subtracting the values of the 425 * floating-point values `a' and `b'. The operation is performed 426 * according to the IEC/IEEE Standard for Binary Floating-Point 427 * Arithmetic. 428 */ 429static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 430 float_status *s, bool subtract) 431{ 432 bool b_sign = b->sign ^ subtract; 433 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 434 435 if (a->sign != b_sign) { 436 /* Subtraction */ 437 if (likely(ab_mask == float_cmask_normal)) { 438 if (parts_sub_normal(a, b)) { 439 return a; 440 } 441 /* Subtract was exact, fall through to set sign. */ 442 ab_mask = float_cmask_zero; 443 } 444 445 if (ab_mask == float_cmask_zero) { 446 a->sign = s->float_rounding_mode == float_round_down; 447 return a; 448 } 449 450 if (unlikely(ab_mask & float_cmask_anynan)) { 451 goto p_nan; 452 } 453 454 if (ab_mask & float_cmask_inf) { 455 if (a->cls != float_class_inf) { 456 /* N - Inf */ 457 goto return_b; 458 } 459 if (b->cls != float_class_inf) { 460 /* Inf - N */ 461 return a; 462 } 463 /* Inf - Inf */ 464 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 465 parts_default_nan(a, s); 466 return a; 467 } 468 } else { 469 /* Addition */ 470 if (likely(ab_mask == float_cmask_normal)) { 471 parts_add_normal(a, b); 472 return a; 473 } 474 475 if (ab_mask == float_cmask_zero) { 476 return a; 477 } 478 479 if (unlikely(ab_mask & float_cmask_anynan)) { 480 goto p_nan; 481 } 482 483 if (ab_mask & float_cmask_inf) { 484 a->cls = float_class_inf; 485 return a; 486 } 487 } 488 489 if (b->cls == float_class_zero) { 490 g_assert(a->cls == float_class_normal); 491 return a; 492 } 493 494 g_assert(a->cls == float_class_zero); 495 g_assert(b->cls == float_class_normal); 496 return_b: 497 b->sign = b_sign; 498 return b; 499 500 p_nan: 501 return parts_pick_nan(a, b, s); 502} 503 504/* 505 * Returns the result of multiplying the floating-point values `a' and 506 * `b'. The operation is performed according to the IEC/IEEE Standard 507 * for Binary Floating-Point Arithmetic. 508 */ 509static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 510 float_status *s) 511{ 512 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 513 bool sign = a->sign ^ b->sign; 514 515 if (likely(ab_mask == float_cmask_normal)) { 516 FloatPartsW tmp; 517 518 frac_mulw(&tmp, a, b); 519 frac_truncjam(a, &tmp); 520 521 a->exp += b->exp + 1; 522 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 523 frac_add(a, a, a); 524 a->exp -= 1; 525 } 526 527 a->sign = sign; 528 return a; 529 } 530 531 /* Inf * Zero == NaN */ 532 if (unlikely(ab_mask == float_cmask_infzero)) { 533 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 534 parts_default_nan(a, s); 535 return a; 536 } 537 538 if (unlikely(ab_mask & float_cmask_anynan)) { 539 return parts_pick_nan(a, b, s); 540 } 541 542 /* Multiply by 0 or Inf */ 543 if (ab_mask & float_cmask_inf) { 544 a->cls = float_class_inf; 545 a->sign = sign; 546 return a; 547 } 548 549 g_assert(ab_mask & float_cmask_zero); 550 a->cls = float_class_zero; 551 a->sign = sign; 552 return a; 553} 554 555/* 556 * Returns the result of multiplying the floating-point values `a' and 557 * `b' then adding 'c', with no intermediate rounding step after the 558 * multiplication. The operation is performed according to the 559 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 560 * The flags argument allows the caller to select negation of the 561 * addend, the intermediate product, or the final result. (The 562 * difference between this and having the caller do a separate 563 * negation is that negating externally will flip the sign bit on NaNs.) 564 * 565 * Requires A and C extracted into a double-sized structure to provide the 566 * extra space for the widening multiply. 567 */ 568static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, 569 FloatPartsN *c, int scale, 570 int flags, float_status *s) 571{ 572 int ab_mask, abc_mask; 573 FloatPartsW p_widen, c_widen; 574 575 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 576 abc_mask = float_cmask(c->cls) | ab_mask; 577 578 /* 579 * It is implementation-defined whether the cases of (0,inf,qnan) 580 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 581 * they return if they do), so we have to hand this information 582 * off to the target-specific pick-a-NaN routine. 583 */ 584 if (unlikely(abc_mask & float_cmask_anynan)) { 585 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 586 } 587 588 if (flags & float_muladd_negate_c) { 589 c->sign ^= 1; 590 } 591 592 /* Compute the sign of the product into A. */ 593 a->sign ^= b->sign; 594 if (flags & float_muladd_negate_product) { 595 a->sign ^= 1; 596 } 597 598 if (unlikely(ab_mask != float_cmask_normal)) { 599 if (unlikely(ab_mask == float_cmask_infzero)) { 600 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 601 goto d_nan; 602 } 603 604 if (ab_mask & float_cmask_inf) { 605 if (c->cls == float_class_inf && a->sign != c->sign) { 606 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 607 goto d_nan; 608 } 609 goto return_inf; 610 } 611 612 g_assert(ab_mask & float_cmask_zero); 613 if (c->cls == float_class_normal) { 614 *a = *c; 615 goto return_normal; 616 } 617 if (c->cls == float_class_zero) { 618 if (flags & float_muladd_suppress_add_product_zero) { 619 a->sign = c->sign; 620 } else if (a->sign != c->sign) { 621 goto return_sub_zero; 622 } 623 goto return_zero; 624 } 625 g_assert(c->cls == float_class_inf); 626 } 627 628 if (unlikely(c->cls == float_class_inf)) { 629 a->sign = c->sign; 630 goto return_inf; 631 } 632 633 /* Perform the multiplication step. */ 634 p_widen.sign = a->sign; 635 p_widen.exp = a->exp + b->exp + 1; 636 frac_mulw(&p_widen, a, b); 637 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 638 frac_add(&p_widen, &p_widen, &p_widen); 639 p_widen.exp -= 1; 640 } 641 642 /* Perform the addition step. */ 643 if (c->cls != float_class_zero) { 644 /* Zero-extend C to less significant bits. */ 645 frac_widen(&c_widen, c); 646 c_widen.exp = c->exp; 647 648 if (a->sign == c->sign) { 649 parts_add_normal(&p_widen, &c_widen); 650 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 651 goto return_sub_zero; 652 } 653 } 654 655 /* Narrow with sticky bit, for proper rounding later. */ 656 frac_truncjam(a, &p_widen); 657 a->sign = p_widen.sign; 658 a->exp = p_widen.exp; 659 660 return_normal: 661 a->exp += scale; 662 finish_sign: 663 if (flags & float_muladd_negate_result) { 664 a->sign ^= 1; 665 } 666 return a; 667 668 return_sub_zero: 669 a->sign = s->float_rounding_mode == float_round_down; 670 return_zero: 671 a->cls = float_class_zero; 672 goto finish_sign; 673 674 return_inf: 675 a->cls = float_class_inf; 676 goto finish_sign; 677 678 d_nan: 679 parts_default_nan(a, s); 680 return a; 681} 682 683/* 684 * Returns the result of dividing the floating-point value `a' by the 685 * corresponding value `b'. The operation is performed according to 686 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 687 */ 688static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 689 float_status *s) 690{ 691 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 692 bool sign = a->sign ^ b->sign; 693 694 if (likely(ab_mask == float_cmask_normal)) { 695 a->sign = sign; 696 a->exp -= b->exp + frac_div(a, b); 697 return a; 698 } 699 700 /* 0/0 or Inf/Inf => NaN */ 701 if (unlikely(ab_mask == float_cmask_zero)) { 702 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 703 goto d_nan; 704 } 705 if (unlikely(ab_mask == float_cmask_inf)) { 706 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 707 goto d_nan; 708 } 709 710 /* All the NaN cases */ 711 if (unlikely(ab_mask & float_cmask_anynan)) { 712 return parts_pick_nan(a, b, s); 713 } 714 715 a->sign = sign; 716 717 /* Inf / X */ 718 if (a->cls == float_class_inf) { 719 return a; 720 } 721 722 /* 0 / X */ 723 if (a->cls == float_class_zero) { 724 return a; 725 } 726 727 /* X / Inf */ 728 if (b->cls == float_class_inf) { 729 a->cls = float_class_zero; 730 return a; 731 } 732 733 /* X / 0 => Inf */ 734 g_assert(b->cls == float_class_zero); 735 float_raise(float_flag_divbyzero, s); 736 a->cls = float_class_inf; 737 return a; 738 739 d_nan: 740 parts_default_nan(a, s); 741 return a; 742} 743 744/* 745 * Floating point remainder, per IEC/IEEE, or modulus. 746 */ 747static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 748 uint64_t *mod_quot, float_status *s) 749{ 750 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 751 752 if (likely(ab_mask == float_cmask_normal)) { 753 frac_modrem(a, b, mod_quot); 754 return a; 755 } 756 757 if (mod_quot) { 758 *mod_quot = 0; 759 } 760 761 /* All the NaN cases */ 762 if (unlikely(ab_mask & float_cmask_anynan)) { 763 return parts_pick_nan(a, b, s); 764 } 765 766 /* Inf % N; N % 0 */ 767 if (a->cls == float_class_inf || b->cls == float_class_zero) { 768 float_raise(float_flag_invalid, s); 769 parts_default_nan(a, s); 770 return a; 771 } 772 773 /* N % Inf; 0 % N */ 774 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 775 return a; 776} 777 778/* 779 * Square Root 780 * 781 * The base algorithm is lifted from 782 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 783 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 784 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 785 * and is thus MIT licenced. 786 */ 787static void partsN(sqrt)(FloatPartsN *a, float_status *status, 788 const FloatFmt *fmt) 789{ 790 const uint32_t three32 = 3u << 30; 791 const uint64_t three64 = 3ull << 62; 792 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 793 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 794 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 795 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 796 uint64_t discard; 797 bool exp_odd; 798 size_t index; 799 800 if (unlikely(a->cls != float_class_normal)) { 801 switch (a->cls) { 802 case float_class_snan: 803 case float_class_qnan: 804 parts_return_nan(a, status); 805 return; 806 case float_class_zero: 807 return; 808 case float_class_inf: 809 if (unlikely(a->sign)) { 810 goto d_nan; 811 } 812 return; 813 default: 814 g_assert_not_reached(); 815 } 816 } 817 818 if (unlikely(a->sign)) { 819 goto d_nan; 820 } 821 822 /* 823 * Argument reduction. 824 * x = 4^e frac; with integer e, and frac in [1, 4) 825 * m = frac fixed point at bit 62, since we're in base 4. 826 * If base-2 exponent is odd, exchange that for multiply by 2, 827 * which results in no shift. 828 */ 829 exp_odd = a->exp & 1; 830 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 831 if (!exp_odd) { 832 frac_shr(a, 1); 833 } 834 835 /* 836 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 837 * 838 * Initial estimate: 839 * 7-bit lookup table (1-bit exponent and 6-bit significand). 840 * 841 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 842 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 843 * a table lookup is faster and needs one less iteration. 844 * The 7-bit table gives |e| < 0x1.fdp-9. 845 * 846 * A Newton-Raphson iteration for r is 847 * s = m*r 848 * d = s*r 849 * u = 3 - d 850 * r = r*u/2 851 * 852 * Fixed point representations: 853 * m, s, d, u, three are all 2.30; r is 0.32 854 */ 855 m64 = a->frac_hi; 856 m32 = m64 >> 32; 857 858 r32 = rsqrt_tab[index] << 16; 859 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 860 861 s32 = ((uint64_t)m32 * r32) >> 32; 862 d32 = ((uint64_t)s32 * r32) >> 32; 863 u32 = three32 - d32; 864 865 if (N == 64) { 866 /* float64 or smaller */ 867 868 r32 = ((uint64_t)r32 * u32) >> 31; 869 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 870 871 s32 = ((uint64_t)m32 * r32) >> 32; 872 d32 = ((uint64_t)s32 * r32) >> 32; 873 u32 = three32 - d32; 874 875 if (fmt->frac_size <= 23) { 876 /* float32 or smaller */ 877 878 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 879 s32 = (s32 - 1) >> 6; /* 9.23 */ 880 /* s < sqrt(m) < s + 0x1.08p-23 */ 881 882 /* compute nearest rounded result to 2.23 bits */ 883 uint32_t d0 = (m32 << 16) - s32 * s32; 884 uint32_t d1 = s32 - d0; 885 uint32_t d2 = d1 + s32 + 1; 886 s32 += d1 >> 31; 887 a->frac_hi = (uint64_t)s32 << (64 - 25); 888 889 /* increment or decrement for inexact */ 890 if (d2 != 0) { 891 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 892 } 893 goto done; 894 } 895 896 /* float64 */ 897 898 r64 = (uint64_t)r32 * u32 * 2; 899 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 900 mul64To128(m64, r64, &s64, &discard); 901 mul64To128(s64, r64, &d64, &discard); 902 u64 = three64 - d64; 903 904 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 905 s64 = (s64 - 2) >> 9; /* 12.52 */ 906 907 /* Compute nearest rounded result */ 908 uint64_t d0 = (m64 << 42) - s64 * s64; 909 uint64_t d1 = s64 - d0; 910 uint64_t d2 = d1 + s64 + 1; 911 s64 += d1 >> 63; 912 a->frac_hi = s64 << (64 - 54); 913 914 /* increment or decrement for inexact */ 915 if (d2 != 0) { 916 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 917 } 918 goto done; 919 } 920 921 r64 = (uint64_t)r32 * u32 * 2; 922 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 923 924 mul64To128(m64, r64, &s64, &discard); 925 mul64To128(s64, r64, &d64, &discard); 926 u64 = three64 - d64; 927 mul64To128(u64, r64, &r64, &discard); 928 r64 <<= 1; 929 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 930 931 mul64To128(m64, r64, &s64, &discard); 932 mul64To128(s64, r64, &d64, &discard); 933 u64 = three64 - d64; 934 mul64To128(u64, r64, &rh, &rl); 935 add128(rh, rl, rh, rl, &rh, &rl); 936 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 937 938 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 939 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 940 sub128(three64, 0, dh, dl, &uh, &ul); 941 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 942 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 943 944 sub128(sh, sl, 0, 4, &sh, &sl); 945 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 946 /* s < sqrt(m) < s + 1ulp */ 947 948 /* Compute nearest rounded result */ 949 mul64To128(sl, sl, &d0h, &d0l); 950 d0h += 2 * sh * sl; 951 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 952 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 953 add128(sh, sl, 0, 1, &d2h, &d2l); 954 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 955 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 956 shift128Left(sh, sl, 128 - 114, &sh, &sl); 957 958 /* increment or decrement for inexact */ 959 if (d2h | d2l) { 960 if ((int64_t)(d1h ^ d2h) < 0) { 961 sub128(sh, sl, 0, 1, &sh, &sl); 962 } else { 963 add128(sh, sl, 0, 1, &sh, &sl); 964 } 965 } 966 a->frac_lo = sl; 967 a->frac_hi = sh; 968 969 done: 970 /* Convert back from base 4 to base 2. */ 971 a->exp >>= 1; 972 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 973 frac_add(a, a, a); 974 } else { 975 a->exp += 1; 976 } 977 return; 978 979 d_nan: 980 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 981 parts_default_nan(a, status); 982} 983 984/* 985 * Rounds the floating-point value `a' to an integer, and returns the 986 * result as a floating-point value. The operation is performed 987 * according to the IEC/IEEE Standard for Binary Floating-Point 988 * Arithmetic. 989 * 990 * parts_round_to_int_normal is an internal helper function for 991 * normal numbers only, returning true for inexact but not directly 992 * raising float_flag_inexact. 993 */ 994static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 995 int scale, int frac_size) 996{ 997 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 998 int shift_adj; 999 1000 scale = MIN(MAX(scale, -0x10000), 0x10000); 1001 a->exp += scale; 1002 1003 if (a->exp < 0) { 1004 bool one; 1005 1006 /* All fractional */ 1007 switch (rmode) { 1008 case float_round_nearest_even: 1009 one = false; 1010 if (a->exp == -1) { 1011 FloatPartsN tmp; 1012 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 1013 frac_add(&tmp, a, a); 1014 /* Anything remaining means frac > 0.5. */ 1015 one = !frac_eqz(&tmp); 1016 } 1017 break; 1018 case float_round_ties_away: 1019 one = a->exp == -1; 1020 break; 1021 case float_round_to_zero: 1022 one = false; 1023 break; 1024 case float_round_up: 1025 one = !a->sign; 1026 break; 1027 case float_round_down: 1028 one = a->sign; 1029 break; 1030 case float_round_to_odd: 1031 one = true; 1032 break; 1033 default: 1034 g_assert_not_reached(); 1035 } 1036 1037 frac_clear(a); 1038 a->exp = 0; 1039 if (one) { 1040 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 1041 } else { 1042 a->cls = float_class_zero; 1043 } 1044 return true; 1045 } 1046 1047 if (a->exp >= frac_size) { 1048 /* All integral */ 1049 return false; 1050 } 1051 1052 if (N > 64 && a->exp < N - 64) { 1053 /* 1054 * Rounding is not in the low word -- shift lsb to bit 2, 1055 * which leaves room for sticky and rounding bit. 1056 */ 1057 shift_adj = (N - 1) - (a->exp + 2); 1058 frac_shrjam(a, shift_adj); 1059 frac_lsb = 1 << 2; 1060 } else { 1061 shift_adj = 0; 1062 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 1063 } 1064 1065 frac_lsbm1 = frac_lsb >> 1; 1066 rnd_mask = frac_lsb - 1; 1067 rnd_even_mask = rnd_mask | frac_lsb; 1068 1069 if (!(a->frac_lo & rnd_mask)) { 1070 /* Fractional bits already clear, undo the shift above. */ 1071 frac_shl(a, shift_adj); 1072 return false; 1073 } 1074 1075 switch (rmode) { 1076 case float_round_nearest_even: 1077 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 1078 break; 1079 case float_round_ties_away: 1080 inc = frac_lsbm1; 1081 break; 1082 case float_round_to_zero: 1083 inc = 0; 1084 break; 1085 case float_round_up: 1086 inc = a->sign ? 0 : rnd_mask; 1087 break; 1088 case float_round_down: 1089 inc = a->sign ? rnd_mask : 0; 1090 break; 1091 case float_round_to_odd: 1092 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 1093 break; 1094 default: 1095 g_assert_not_reached(); 1096 } 1097 1098 if (shift_adj == 0) { 1099 if (frac_addi(a, a, inc)) { 1100 frac_shr(a, 1); 1101 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 1102 a->exp++; 1103 } 1104 a->frac_lo &= ~rnd_mask; 1105 } else { 1106 frac_addi(a, a, inc); 1107 a->frac_lo &= ~rnd_mask; 1108 /* Be careful shifting back, not to overflow */ 1109 frac_shl(a, shift_adj - 1); 1110 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1111 a->exp++; 1112 } else { 1113 frac_add(a, a, a); 1114 } 1115 } 1116 return true; 1117} 1118 1119static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1120 int scale, float_status *s, 1121 const FloatFmt *fmt) 1122{ 1123 switch (a->cls) { 1124 case float_class_qnan: 1125 case float_class_snan: 1126 parts_return_nan(a, s); 1127 break; 1128 case float_class_zero: 1129 case float_class_inf: 1130 break; 1131 case float_class_normal: 1132 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1133 float_raise(float_flag_inexact, s); 1134 } 1135 break; 1136 default: 1137 g_assert_not_reached(); 1138 } 1139} 1140 1141/* 1142 * Returns the result of converting the floating-point value `a' to 1143 * the two's complement integer format. The conversion is performed 1144 * according to the IEC/IEEE Standard for Binary Floating-Point 1145 * Arithmetic---which means in particular that the conversion is 1146 * rounded according to the current rounding mode. If `a' is a NaN, 1147 * the largest positive integer is returned. Otherwise, if the 1148 * conversion overflows, the largest integer with the same sign as `a' 1149 * is returned. 1150 */ 1151static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1152 int scale, int64_t min, int64_t max, 1153 float_status *s) 1154{ 1155 int flags = 0; 1156 uint64_t r; 1157 1158 switch (p->cls) { 1159 case float_class_snan: 1160 flags |= float_flag_invalid_snan; 1161 /* fall through */ 1162 case float_class_qnan: 1163 flags |= float_flag_invalid; 1164 r = max; 1165 break; 1166 1167 case float_class_inf: 1168 flags = float_flag_invalid | float_flag_invalid_cvti; 1169 r = p->sign ? min : max; 1170 break; 1171 1172 case float_class_zero: 1173 return 0; 1174 1175 case float_class_normal: 1176 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1177 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1178 flags = float_flag_inexact; 1179 } 1180 1181 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1182 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1183 } else { 1184 r = UINT64_MAX; 1185 } 1186 if (p->sign) { 1187 if (r <= -(uint64_t)min) { 1188 r = -r; 1189 } else { 1190 flags = float_flag_invalid | float_flag_invalid_cvti; 1191 r = min; 1192 } 1193 } else if (r > max) { 1194 flags = float_flag_invalid | float_flag_invalid_cvti; 1195 r = max; 1196 } 1197 break; 1198 1199 default: 1200 g_assert_not_reached(); 1201 } 1202 1203 float_raise(flags, s); 1204 return r; 1205} 1206 1207/* 1208 * Returns the result of converting the floating-point value `a' to 1209 * the unsigned integer format. The conversion is performed according 1210 * to the IEC/IEEE Standard for Binary Floating-Point 1211 * Arithmetic---which means in particular that the conversion is 1212 * rounded according to the current rounding mode. If `a' is a NaN, 1213 * the largest unsigned integer is returned. Otherwise, if the 1214 * conversion overflows, the largest unsigned integer is returned. If 1215 * the 'a' is negative, the result is rounded and zero is returned; 1216 * values that do not round to zero will raise the inexact exception 1217 * flag. 1218 */ 1219static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1220 int scale, uint64_t max, float_status *s) 1221{ 1222 int flags = 0; 1223 uint64_t r; 1224 1225 switch (p->cls) { 1226 case float_class_snan: 1227 flags |= float_flag_invalid_snan; 1228 /* fall through */ 1229 case float_class_qnan: 1230 flags |= float_flag_invalid; 1231 r = max; 1232 break; 1233 1234 case float_class_inf: 1235 flags = float_flag_invalid | float_flag_invalid_cvti; 1236 r = p->sign ? 0 : max; 1237 break; 1238 1239 case float_class_zero: 1240 return 0; 1241 1242 case float_class_normal: 1243 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1244 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1245 flags = float_flag_inexact; 1246 if (p->cls == float_class_zero) { 1247 r = 0; 1248 break; 1249 } 1250 } 1251 1252 if (p->sign) { 1253 flags = float_flag_invalid | float_flag_invalid_cvti; 1254 r = 0; 1255 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1256 flags = float_flag_invalid | float_flag_invalid_cvti; 1257 r = max; 1258 } else { 1259 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1260 if (r > max) { 1261 flags = float_flag_invalid | float_flag_invalid_cvti; 1262 r = max; 1263 } 1264 } 1265 break; 1266 1267 default: 1268 g_assert_not_reached(); 1269 } 1270 1271 float_raise(flags, s); 1272 return r; 1273} 1274 1275/* 1276 * Like partsN(float_to_sint), except do not saturate the result. 1277 * Instead, return the rounded unbounded precision two's compliment result, 1278 * modulo 2**(bitsm1 + 1). 1279 */ 1280static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, 1281 FloatRoundMode rmode, 1282 int bitsm1, float_status *s) 1283{ 1284 int flags = 0; 1285 uint64_t r; 1286 bool overflow = false; 1287 1288 switch (p->cls) { 1289 case float_class_snan: 1290 flags |= float_flag_invalid_snan; 1291 /* fall through */ 1292 case float_class_qnan: 1293 flags |= float_flag_invalid; 1294 r = 0; 1295 break; 1296 1297 case float_class_inf: 1298 overflow = true; 1299 r = 0; 1300 break; 1301 1302 case float_class_zero: 1303 return 0; 1304 1305 case float_class_normal: 1306 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1307 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { 1308 flags = float_flag_inexact; 1309 } 1310 1311 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1312 /* 1313 * Because we rounded to integral, and exp < 64, 1314 * we know frac_low is zero. 1315 */ 1316 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1317 if (p->exp < bitsm1) { 1318 /* Result in range. */ 1319 } else if (p->exp == bitsm1) { 1320 /* The only in-range value is INT_MIN. */ 1321 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; 1322 } else { 1323 overflow = true; 1324 } 1325 } else { 1326 /* Overflow, but there might still be bits to return. */ 1327 int shl = p->exp - DECOMPOSED_BINARY_POINT; 1328 if (shl < N) { 1329 frac_shl(p, shl); 1330 r = p->frac_hi; 1331 } else { 1332 r = 0; 1333 } 1334 overflow = true; 1335 } 1336 1337 if (p->sign) { 1338 r = -r; 1339 } 1340 break; 1341 1342 default: 1343 g_assert_not_reached(); 1344 } 1345 1346 if (overflow) { 1347 flags = float_flag_invalid | float_flag_invalid_cvti; 1348 } 1349 float_raise(flags, s); 1350 return r; 1351} 1352 1353/* 1354 * Integer to float conversions 1355 * 1356 * Returns the result of converting the two's complement integer `a' 1357 * to the floating-point format. The conversion is performed according 1358 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1359 */ 1360static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1361 int scale, float_status *s) 1362{ 1363 uint64_t f = a; 1364 int shift; 1365 1366 memset(p, 0, sizeof(*p)); 1367 1368 if (a == 0) { 1369 p->cls = float_class_zero; 1370 return; 1371 } 1372 1373 p->cls = float_class_normal; 1374 if (a < 0) { 1375 f = -f; 1376 p->sign = true; 1377 } 1378 shift = clz64(f); 1379 scale = MIN(MAX(scale, -0x10000), 0x10000); 1380 1381 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1382 p->frac_hi = f << shift; 1383} 1384 1385/* 1386 * Unsigned Integer to float conversions 1387 * 1388 * Returns the result of converting the unsigned integer `a' to the 1389 * floating-point format. The conversion is performed according to the 1390 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1391 */ 1392static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1393 int scale, float_status *status) 1394{ 1395 memset(p, 0, sizeof(*p)); 1396 1397 if (a == 0) { 1398 p->cls = float_class_zero; 1399 } else { 1400 int shift = clz64(a); 1401 scale = MIN(MAX(scale, -0x10000), 0x10000); 1402 p->cls = float_class_normal; 1403 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1404 p->frac_hi = a << shift; 1405 } 1406} 1407 1408/* 1409 * Float min/max. 1410 */ 1411static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1412 float_status *s, int flags) 1413{ 1414 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1415 int a_exp, b_exp, cmp; 1416 1417 if (unlikely(ab_mask & float_cmask_anynan)) { 1418 /* 1419 * For minNum/maxNum (IEEE 754-2008) 1420 * or minimumNumber/maximumNumber (IEEE 754-2019), 1421 * if one operand is a QNaN, and the other 1422 * operand is numerical, then return numerical argument. 1423 */ 1424 if ((flags & (minmax_isnum | minmax_isnumber)) 1425 && !(ab_mask & float_cmask_snan) 1426 && (ab_mask & ~float_cmask_qnan)) { 1427 return is_nan(a->cls) ? b : a; 1428 } 1429 1430 /* 1431 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1432 * are removed and replaced with minimum, minimumNumber, maximum 1433 * and maximumNumber. 1434 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1435 * If both operands are NaNs, a QNaN is returned. 1436 * If either operand is a SNaN, 1437 * an invalid operation exception is signaled, 1438 * but unless both operands are NaNs, 1439 * the SNaN is otherwise ignored and not converted to a QNaN. 1440 */ 1441 if ((flags & minmax_isnumber) 1442 && (ab_mask & float_cmask_snan) 1443 && (ab_mask & ~float_cmask_anynan)) { 1444 float_raise(float_flag_invalid, s); 1445 return is_nan(a->cls) ? b : a; 1446 } 1447 1448 return parts_pick_nan(a, b, s); 1449 } 1450 1451 a_exp = a->exp; 1452 b_exp = b->exp; 1453 1454 if (unlikely(ab_mask != float_cmask_normal)) { 1455 switch (a->cls) { 1456 case float_class_normal: 1457 break; 1458 case float_class_inf: 1459 a_exp = INT16_MAX; 1460 break; 1461 case float_class_zero: 1462 a_exp = INT16_MIN; 1463 break; 1464 default: 1465 g_assert_not_reached(); 1466 } 1467 switch (b->cls) { 1468 case float_class_normal: 1469 break; 1470 case float_class_inf: 1471 b_exp = INT16_MAX; 1472 break; 1473 case float_class_zero: 1474 b_exp = INT16_MIN; 1475 break; 1476 default: 1477 g_assert_not_reached(); 1478 } 1479 } 1480 1481 /* Compare magnitudes. */ 1482 cmp = a_exp - b_exp; 1483 if (cmp == 0) { 1484 cmp = frac_cmp(a, b); 1485 } 1486 1487 /* 1488 * Take the sign into account. 1489 * For ismag, only do this if the magnitudes are equal. 1490 */ 1491 if (!(flags & minmax_ismag) || cmp == 0) { 1492 if (a->sign != b->sign) { 1493 /* For differing signs, the negative operand is less. */ 1494 cmp = a->sign ? -1 : 1; 1495 } else if (a->sign) { 1496 /* For two negative operands, invert the magnitude comparison. */ 1497 cmp = -cmp; 1498 } 1499 } 1500 1501 if (flags & minmax_ismin) { 1502 cmp = -cmp; 1503 } 1504 return cmp < 0 ? b : a; 1505} 1506 1507/* 1508 * Floating point compare 1509 */ 1510static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1511 float_status *s, bool is_quiet) 1512{ 1513 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1514 1515 if (likely(ab_mask == float_cmask_normal)) { 1516 FloatRelation cmp; 1517 1518 if (a->sign != b->sign) { 1519 goto a_sign; 1520 } 1521 if (a->exp == b->exp) { 1522 cmp = frac_cmp(a, b); 1523 } else if (a->exp < b->exp) { 1524 cmp = float_relation_less; 1525 } else { 1526 cmp = float_relation_greater; 1527 } 1528 if (a->sign) { 1529 cmp = -cmp; 1530 } 1531 return cmp; 1532 } 1533 1534 if (unlikely(ab_mask & float_cmask_anynan)) { 1535 if (ab_mask & float_cmask_snan) { 1536 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1537 } else if (!is_quiet) { 1538 float_raise(float_flag_invalid, s); 1539 } 1540 return float_relation_unordered; 1541 } 1542 1543 if (ab_mask & float_cmask_zero) { 1544 if (ab_mask == float_cmask_zero) { 1545 return float_relation_equal; 1546 } else if (a->cls == float_class_zero) { 1547 goto b_sign; 1548 } else { 1549 goto a_sign; 1550 } 1551 } 1552 1553 if (ab_mask == float_cmask_inf) { 1554 if (a->sign == b->sign) { 1555 return float_relation_equal; 1556 } 1557 } else if (b->cls == float_class_inf) { 1558 goto b_sign; 1559 } else { 1560 g_assert(a->cls == float_class_inf); 1561 } 1562 1563 a_sign: 1564 return a->sign ? float_relation_less : float_relation_greater; 1565 b_sign: 1566 return b->sign ? float_relation_greater : float_relation_less; 1567} 1568 1569/* 1570 * Multiply A by 2 raised to the power N. 1571 */ 1572static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1573{ 1574 switch (a->cls) { 1575 case float_class_snan: 1576 case float_class_qnan: 1577 parts_return_nan(a, s); 1578 break; 1579 case float_class_zero: 1580 case float_class_inf: 1581 break; 1582 case float_class_normal: 1583 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1584 break; 1585 default: 1586 g_assert_not_reached(); 1587 } 1588} 1589 1590/* 1591 * Return log2(A) 1592 */ 1593static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1594{ 1595 uint64_t a0, a1, r, t, ign; 1596 FloatPartsN f; 1597 int i, n, a_exp, f_exp; 1598 1599 if (unlikely(a->cls != float_class_normal)) { 1600 switch (a->cls) { 1601 case float_class_snan: 1602 case float_class_qnan: 1603 parts_return_nan(a, s); 1604 return; 1605 case float_class_zero: 1606 float_raise(float_flag_divbyzero, s); 1607 /* log2(0) = -inf */ 1608 a->cls = float_class_inf; 1609 a->sign = 1; 1610 return; 1611 case float_class_inf: 1612 if (unlikely(a->sign)) { 1613 goto d_nan; 1614 } 1615 return; 1616 default: 1617 break; 1618 } 1619 g_assert_not_reached(); 1620 } 1621 if (unlikely(a->sign)) { 1622 goto d_nan; 1623 } 1624 1625 /* TODO: This algorithm looses bits too quickly for float128. */ 1626 g_assert(N == 64); 1627 1628 a_exp = a->exp; 1629 f_exp = -1; 1630 1631 r = 0; 1632 t = DECOMPOSED_IMPLICIT_BIT; 1633 a0 = a->frac_hi; 1634 a1 = 0; 1635 1636 n = fmt->frac_size + 2; 1637 if (unlikely(a_exp == -1)) { 1638 /* 1639 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1640 * When the value is very close to 1.0, there are lots of 1's in 1641 * the msb parts of the fraction. At the end, when we subtract 1642 * this value from -1.0, we can see a catastrophic loss of precision, 1643 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1644 * bits of y in the final result. To minimize this, compute as many 1645 * digits as we can. 1646 * ??? This case needs another algorithm to avoid this. 1647 */ 1648 n = fmt->frac_size * 2 + 2; 1649 /* Don't compute a value overlapping the sticky bit */ 1650 n = MIN(n, 62); 1651 } 1652 1653 for (i = 0; i < n; i++) { 1654 if (a1) { 1655 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1656 } else if (a0 & 0xffffffffull) { 1657 mul64To128(a0, a0, &a0, &a1); 1658 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1659 a0 >>= 32; 1660 a0 *= a0; 1661 } else { 1662 goto exact; 1663 } 1664 1665 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1666 if (unlikely(a_exp == 0 && r == 0)) { 1667 /* 1668 * When a_exp == 0, we're computing the log2 of a value 1669 * [1.0,2.0). When the value is very close to 1.0, there 1670 * are lots of 0's in the msb parts of the fraction. 1671 * We need to compute more digits to produce a correct 1672 * result -- restart at the top of the fraction. 1673 * ??? This is likely to lose precision quickly, as for 1674 * float128; we may need another method. 1675 */ 1676 f_exp -= i; 1677 t = r = DECOMPOSED_IMPLICIT_BIT; 1678 i = 0; 1679 } else { 1680 r |= t; 1681 } 1682 } else { 1683 add128(a0, a1, a0, a1, &a0, &a1); 1684 } 1685 t >>= 1; 1686 } 1687 1688 /* Set sticky for inexact. */ 1689 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1690 1691 exact: 1692 parts_sint_to_float(a, a_exp, 0, s); 1693 if (r == 0) { 1694 return; 1695 } 1696 1697 memset(&f, 0, sizeof(f)); 1698 f.cls = float_class_normal; 1699 f.frac_hi = r; 1700 f.exp = f_exp - frac_normalize(&f); 1701 1702 if (a_exp < 0) { 1703 parts_sub_normal(a, &f); 1704 } else if (a_exp > 0) { 1705 parts_add_normal(a, &f); 1706 } else { 1707 *a = f; 1708 } 1709 return; 1710 1711 d_nan: 1712 float_raise(float_flag_invalid, s); 1713 parts_default_nan(a, s); 1714} 1715