1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 bool have_snan = false; 43 FloatPartsN *ret; 44 int cmp; 45 46 if (is_snan(a->cls) || is_snan(b->cls)) { 47 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 48 have_snan = true; 49 } 50 51 if (s->default_nan_mode) { 52 parts_default_nan(a, s); 53 return a; 54 } 55 56 switch (s->float_2nan_prop_rule) { 57 case float_2nan_prop_s_ab: 58 if (have_snan) { 59 ret = is_snan(a->cls) ? a : b; 60 break; 61 } 62 /* fall through */ 63 case float_2nan_prop_ab: 64 ret = is_nan(a->cls) ? a : b; 65 break; 66 case float_2nan_prop_s_ba: 67 if (have_snan) { 68 ret = is_snan(b->cls) ? b : a; 69 break; 70 } 71 /* fall through */ 72 case float_2nan_prop_ba: 73 ret = is_nan(b->cls) ? b : a; 74 break; 75 case float_2nan_prop_x87: 76 /* 77 * This implements x87 NaN propagation rules: 78 * SNaN + QNaN => return the QNaN 79 * two SNaNs => return the one with the larger significand, silenced 80 * two QNaNs => return the one with the larger significand 81 * SNaN and a non-NaN => return the SNaN, silenced 82 * QNaN and a non-NaN => return the QNaN 83 * 84 * If we get down to comparing significands and they are the same, 85 * return the NaN with the positive sign bit (if any). 86 */ 87 if (is_snan(a->cls)) { 88 if (!is_snan(b->cls)) { 89 ret = is_qnan(b->cls) ? b : a; 90 break; 91 } 92 } else if (is_qnan(a->cls)) { 93 if (is_snan(b->cls) || !is_qnan(b->cls)) { 94 ret = a; 95 break; 96 } 97 } else { 98 ret = b; 99 break; 100 } 101 cmp = frac_cmp(a, b); 102 if (cmp == 0) { 103 cmp = a->sign < b->sign; 104 } 105 ret = cmp > 0 ? a : b; 106 break; 107 default: 108 g_assert_not_reached(); 109 } 110 111 if (is_snan(ret->cls)) { 112 parts_silence_nan(ret, s); 113 } 114 return ret; 115} 116 117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 118 FloatPartsN *c, float_status *s, 119 int ab_mask, int abc_mask) 120{ 121 bool infzero = (ab_mask == float_cmask_infzero); 122 bool have_snan = (abc_mask & float_cmask_snan); 123 FloatPartsN *ret; 124 125 if (unlikely(have_snan)) { 126 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 127 } 128 129 if (infzero && 130 !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) { 131 /* This is (0 * inf) + NaN or (inf * 0) + NaN */ 132 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 133 } 134 135 if (s->default_nan_mode) { 136 /* 137 * We guarantee not to require the target to tell us how to 138 * pick a NaN if we're always returning the default NaN. 139 * But if we're not in default-NaN mode then the target must 140 * specify. 141 */ 142 goto default_nan; 143 } else if (infzero) { 144 /* 145 * Inf * 0 + NaN -- some implementations return the 146 * default NaN here, and some return the input NaN. 147 */ 148 switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) { 149 case float_infzeronan_dnan_never: 150 break; 151 case float_infzeronan_dnan_always: 152 goto default_nan; 153 case float_infzeronan_dnan_if_qnan: 154 if (is_qnan(c->cls)) { 155 goto default_nan; 156 } 157 break; 158 default: 159 g_assert_not_reached(); 160 } 161 ret = c; 162 } else { 163 FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c }; 164 Float3NaNPropRule rule = s->float_3nan_prop_rule; 165 166 assert(rule != float_3nan_prop_none); 167 if (have_snan && (rule & R_3NAN_SNAN_MASK)) { 168 /* We have at least one SNaN input and should prefer it */ 169 do { 170 ret = val[rule & R_3NAN_1ST_MASK]; 171 rule >>= R_3NAN_1ST_LENGTH; 172 } while (!is_snan(ret->cls)); 173 } else { 174 do { 175 ret = val[rule & R_3NAN_1ST_MASK]; 176 rule >>= R_3NAN_1ST_LENGTH; 177 } while (!is_nan(ret->cls)); 178 } 179 } 180 181 if (is_snan(ret->cls)) { 182 parts_silence_nan(ret, s); 183 } 184 return ret; 185 186 default_nan: 187 parts_default_nan(a, s); 188 return a; 189} 190 191/* 192 * Canonicalize the FloatParts structure. Determine the class, 193 * unbias the exponent, and normalize the fraction. 194 */ 195static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 196 const FloatFmt *fmt) 197{ 198 if (unlikely(p->exp == 0)) { 199 if (likely(frac_eqz(p))) { 200 p->cls = float_class_zero; 201 } else if (status->flush_inputs_to_zero) { 202 float_raise(float_flag_input_denormal_flushed, status); 203 p->cls = float_class_zero; 204 frac_clear(p); 205 } else { 206 int shift = frac_normalize(p); 207 p->cls = float_class_normal; 208 p->exp = fmt->frac_shift - fmt->exp_bias 209 - shift + !fmt->m68k_denormal; 210 } 211 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 212 p->cls = float_class_normal; 213 p->exp -= fmt->exp_bias; 214 frac_shl(p, fmt->frac_shift); 215 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 216 } else if (likely(frac_eqz(p))) { 217 p->cls = float_class_inf; 218 } else { 219 frac_shl(p, fmt->frac_shift); 220 p->cls = (parts_is_snan_frac(p->frac_hi, status) 221 ? float_class_snan : float_class_qnan); 222 } 223} 224 225/* 226 * Round and uncanonicalize a floating-point number by parts. There 227 * are FRAC_SHIFT bits that may require rounding at the bottom of the 228 * fraction; these bits will be removed. The exponent will be biased 229 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 230 */ 231static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 232 const FloatFmt *fmt) 233{ 234 const int exp_max = fmt->exp_max; 235 const int frac_shift = fmt->frac_shift; 236 const uint64_t round_mask = fmt->round_mask; 237 const uint64_t frac_lsb = round_mask + 1; 238 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 239 const uint64_t roundeven_mask = round_mask | frac_lsb; 240 uint64_t inc; 241 bool overflow_norm = false; 242 int exp, flags = 0; 243 244 switch (s->float_rounding_mode) { 245 case float_round_nearest_even_max: 246 overflow_norm = true; 247 /* fall through */ 248 case float_round_nearest_even: 249 if (N > 64 && frac_lsb == 0) { 250 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 251 ? frac_lsbm1 : 0); 252 } else { 253 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 254 ? frac_lsbm1 : 0); 255 } 256 break; 257 case float_round_ties_away: 258 inc = frac_lsbm1; 259 break; 260 case float_round_to_zero: 261 overflow_norm = true; 262 inc = 0; 263 break; 264 case float_round_up: 265 inc = p->sign ? 0 : round_mask; 266 overflow_norm = p->sign; 267 break; 268 case float_round_down: 269 inc = p->sign ? round_mask : 0; 270 overflow_norm = !p->sign; 271 break; 272 case float_round_to_odd: 273 overflow_norm = true; 274 /* fall through */ 275 case float_round_to_odd_inf: 276 if (N > 64 && frac_lsb == 0) { 277 inc = p->frac_hi & 1 ? 0 : round_mask; 278 } else { 279 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 280 } 281 break; 282 default: 283 g_assert_not_reached(); 284 } 285 286 exp = p->exp + fmt->exp_bias; 287 if (likely(exp > 0)) { 288 if (p->frac_lo & round_mask) { 289 flags |= float_flag_inexact; 290 if (frac_addi(p, p, inc)) { 291 frac_shr(p, 1); 292 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 293 exp++; 294 } 295 p->frac_lo &= ~round_mask; 296 } 297 298 if (fmt->arm_althp) { 299 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 300 if (unlikely(exp > exp_max)) { 301 /* Overflow. Return the maximum normal. */ 302 flags = float_flag_invalid; 303 exp = exp_max; 304 frac_allones(p); 305 p->frac_lo &= ~round_mask; 306 } 307 } else if (unlikely(exp >= exp_max)) { 308 flags |= float_flag_overflow; 309 if (s->rebias_overflow) { 310 exp -= fmt->exp_re_bias; 311 } else if (overflow_norm) { 312 flags |= float_flag_inexact; 313 exp = exp_max - 1; 314 frac_allones(p); 315 p->frac_lo &= ~round_mask; 316 } else { 317 flags |= float_flag_inexact; 318 p->cls = float_class_inf; 319 exp = exp_max; 320 frac_clear(p); 321 } 322 } 323 frac_shr(p, frac_shift); 324 } else if (unlikely(s->rebias_underflow)) { 325 flags |= float_flag_underflow; 326 exp += fmt->exp_re_bias; 327 if (p->frac_lo & round_mask) { 328 flags |= float_flag_inexact; 329 if (frac_addi(p, p, inc)) { 330 frac_shr(p, 1); 331 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 332 exp++; 333 } 334 p->frac_lo &= ~round_mask; 335 } 336 frac_shr(p, frac_shift); 337 } else if (s->flush_to_zero) { 338 flags |= float_flag_output_denormal_flushed; 339 p->cls = float_class_zero; 340 exp = 0; 341 frac_clear(p); 342 } else { 343 bool is_tiny = s->tininess_before_rounding || exp < 0; 344 345 if (!is_tiny) { 346 FloatPartsN discard; 347 is_tiny = !frac_addi(&discard, p, inc); 348 } 349 350 frac_shrjam(p, !fmt->m68k_denormal - exp); 351 352 if (p->frac_lo & round_mask) { 353 /* Need to recompute round-to-even/round-to-odd. */ 354 switch (s->float_rounding_mode) { 355 case float_round_nearest_even: 356 if (N > 64 && frac_lsb == 0) { 357 inc = ((p->frac_hi & 1) || 358 (p->frac_lo & round_mask) != frac_lsbm1 359 ? frac_lsbm1 : 0); 360 } else { 361 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 362 ? frac_lsbm1 : 0); 363 } 364 break; 365 case float_round_to_odd: 366 case float_round_to_odd_inf: 367 if (N > 64 && frac_lsb == 0) { 368 inc = p->frac_hi & 1 ? 0 : round_mask; 369 } else { 370 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 371 } 372 break; 373 default: 374 break; 375 } 376 flags |= float_flag_inexact; 377 frac_addi(p, p, inc); 378 p->frac_lo &= ~round_mask; 379 } 380 381 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; 382 frac_shr(p, frac_shift); 383 384 if (is_tiny && (flags & float_flag_inexact)) { 385 flags |= float_flag_underflow; 386 } 387 if (exp == 0 && frac_eqz(p)) { 388 p->cls = float_class_zero; 389 } 390 } 391 p->exp = exp; 392 float_raise(flags, s); 393} 394 395static void partsN(uncanon)(FloatPartsN *p, float_status *s, 396 const FloatFmt *fmt) 397{ 398 if (likely(p->cls == float_class_normal)) { 399 parts_uncanon_normal(p, s, fmt); 400 } else { 401 switch (p->cls) { 402 case float_class_zero: 403 p->exp = 0; 404 frac_clear(p); 405 return; 406 case float_class_inf: 407 g_assert(!fmt->arm_althp); 408 p->exp = fmt->exp_max; 409 frac_clear(p); 410 return; 411 case float_class_qnan: 412 case float_class_snan: 413 g_assert(!fmt->arm_althp); 414 p->exp = fmt->exp_max; 415 frac_shr(p, fmt->frac_shift); 416 return; 417 default: 418 break; 419 } 420 g_assert_not_reached(); 421 } 422} 423 424/* 425 * Returns the result of adding or subtracting the values of the 426 * floating-point values `a' and `b'. The operation is performed 427 * according to the IEC/IEEE Standard for Binary Floating-Point 428 * Arithmetic. 429 */ 430static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 431 float_status *s, bool subtract) 432{ 433 bool b_sign = b->sign ^ subtract; 434 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 435 436 if (a->sign != b_sign) { 437 /* Subtraction */ 438 if (likely(ab_mask == float_cmask_normal)) { 439 if (parts_sub_normal(a, b)) { 440 return a; 441 } 442 /* Subtract was exact, fall through to set sign. */ 443 ab_mask = float_cmask_zero; 444 } 445 446 if (ab_mask == float_cmask_zero) { 447 a->sign = s->float_rounding_mode == float_round_down; 448 return a; 449 } 450 451 if (unlikely(ab_mask & float_cmask_anynan)) { 452 goto p_nan; 453 } 454 455 if (ab_mask & float_cmask_inf) { 456 if (a->cls != float_class_inf) { 457 /* N - Inf */ 458 goto return_b; 459 } 460 if (b->cls != float_class_inf) { 461 /* Inf - N */ 462 return a; 463 } 464 /* Inf - Inf */ 465 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 466 parts_default_nan(a, s); 467 return a; 468 } 469 } else { 470 /* Addition */ 471 if (likely(ab_mask == float_cmask_normal)) { 472 parts_add_normal(a, b); 473 return a; 474 } 475 476 if (ab_mask == float_cmask_zero) { 477 return a; 478 } 479 480 if (unlikely(ab_mask & float_cmask_anynan)) { 481 goto p_nan; 482 } 483 484 if (ab_mask & float_cmask_inf) { 485 a->cls = float_class_inf; 486 return a; 487 } 488 } 489 490 if (b->cls == float_class_zero) { 491 g_assert(a->cls == float_class_normal); 492 return a; 493 } 494 495 g_assert(a->cls == float_class_zero); 496 g_assert(b->cls == float_class_normal); 497 return_b: 498 b->sign = b_sign; 499 return b; 500 501 p_nan: 502 return parts_pick_nan(a, b, s); 503} 504 505/* 506 * Returns the result of multiplying the floating-point values `a' and 507 * `b'. The operation is performed according to the IEC/IEEE Standard 508 * for Binary Floating-Point Arithmetic. 509 */ 510static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 511 float_status *s) 512{ 513 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 514 bool sign = a->sign ^ b->sign; 515 516 if (likely(ab_mask == float_cmask_normal)) { 517 FloatPartsW tmp; 518 519 frac_mulw(&tmp, a, b); 520 frac_truncjam(a, &tmp); 521 522 a->exp += b->exp + 1; 523 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 524 frac_add(a, a, a); 525 a->exp -= 1; 526 } 527 528 a->sign = sign; 529 return a; 530 } 531 532 /* Inf * Zero == NaN */ 533 if (unlikely(ab_mask == float_cmask_infzero)) { 534 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 535 parts_default_nan(a, s); 536 return a; 537 } 538 539 if (unlikely(ab_mask & float_cmask_anynan)) { 540 return parts_pick_nan(a, b, s); 541 } 542 543 /* Multiply by 0 or Inf */ 544 if (ab_mask & float_cmask_inf) { 545 a->cls = float_class_inf; 546 a->sign = sign; 547 return a; 548 } 549 550 g_assert(ab_mask & float_cmask_zero); 551 a->cls = float_class_zero; 552 a->sign = sign; 553 return a; 554} 555 556/* 557 * Returns the result of multiplying the floating-point values `a' and 558 * `b' then adding 'c', with no intermediate rounding step after the 559 * multiplication. The operation is performed according to the 560 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 561 * The flags argument allows the caller to select negation of the 562 * addend, the intermediate product, or the final result. (The 563 * difference between this and having the caller do a separate 564 * negation is that negating externally will flip the sign bit on NaNs.) 565 * 566 * Requires A and C extracted into a double-sized structure to provide the 567 * extra space for the widening multiply. 568 */ 569static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, 570 FloatPartsN *c, int scale, 571 int flags, float_status *s) 572{ 573 int ab_mask, abc_mask; 574 FloatPartsW p_widen, c_widen; 575 576 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 577 abc_mask = float_cmask(c->cls) | ab_mask; 578 579 /* 580 * It is implementation-defined whether the cases of (0,inf,qnan) 581 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 582 * they return if they do), so we have to hand this information 583 * off to the target-specific pick-a-NaN routine. 584 */ 585 if (unlikely(abc_mask & float_cmask_anynan)) { 586 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 587 } 588 589 if (flags & float_muladd_negate_c) { 590 c->sign ^= 1; 591 } 592 593 /* Compute the sign of the product into A. */ 594 a->sign ^= b->sign; 595 if (flags & float_muladd_negate_product) { 596 a->sign ^= 1; 597 } 598 599 if (unlikely(ab_mask != float_cmask_normal)) { 600 if (unlikely(ab_mask == float_cmask_infzero)) { 601 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 602 goto d_nan; 603 } 604 605 if (ab_mask & float_cmask_inf) { 606 if (c->cls == float_class_inf && a->sign != c->sign) { 607 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 608 goto d_nan; 609 } 610 goto return_inf; 611 } 612 613 g_assert(ab_mask & float_cmask_zero); 614 if (c->cls == float_class_normal) { 615 *a = *c; 616 goto return_normal; 617 } 618 if (c->cls == float_class_zero) { 619 if (flags & float_muladd_suppress_add_product_zero) { 620 a->sign = c->sign; 621 } else if (a->sign != c->sign) { 622 goto return_sub_zero; 623 } 624 goto return_zero; 625 } 626 g_assert(c->cls == float_class_inf); 627 } 628 629 if (unlikely(c->cls == float_class_inf)) { 630 a->sign = c->sign; 631 goto return_inf; 632 } 633 634 /* Perform the multiplication step. */ 635 p_widen.sign = a->sign; 636 p_widen.exp = a->exp + b->exp + 1; 637 frac_mulw(&p_widen, a, b); 638 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 639 frac_add(&p_widen, &p_widen, &p_widen); 640 p_widen.exp -= 1; 641 } 642 643 /* Perform the addition step. */ 644 if (c->cls != float_class_zero) { 645 /* Zero-extend C to less significant bits. */ 646 frac_widen(&c_widen, c); 647 c_widen.exp = c->exp; 648 649 if (a->sign == c->sign) { 650 parts_add_normal(&p_widen, &c_widen); 651 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 652 goto return_sub_zero; 653 } 654 } 655 656 /* Narrow with sticky bit, for proper rounding later. */ 657 frac_truncjam(a, &p_widen); 658 a->sign = p_widen.sign; 659 a->exp = p_widen.exp; 660 661 return_normal: 662 a->exp += scale; 663 finish_sign: 664 if (flags & float_muladd_negate_result) { 665 a->sign ^= 1; 666 } 667 return a; 668 669 return_sub_zero: 670 a->sign = s->float_rounding_mode == float_round_down; 671 return_zero: 672 a->cls = float_class_zero; 673 goto finish_sign; 674 675 return_inf: 676 a->cls = float_class_inf; 677 goto finish_sign; 678 679 d_nan: 680 parts_default_nan(a, s); 681 return a; 682} 683 684/* 685 * Returns the result of dividing the floating-point value `a' by the 686 * corresponding value `b'. The operation is performed according to 687 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 688 */ 689static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 690 float_status *s) 691{ 692 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 693 bool sign = a->sign ^ b->sign; 694 695 if (likely(ab_mask == float_cmask_normal)) { 696 a->sign = sign; 697 a->exp -= b->exp + frac_div(a, b); 698 return a; 699 } 700 701 /* 0/0 or Inf/Inf => NaN */ 702 if (unlikely(ab_mask == float_cmask_zero)) { 703 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 704 goto d_nan; 705 } 706 if (unlikely(ab_mask == float_cmask_inf)) { 707 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 708 goto d_nan; 709 } 710 711 /* All the NaN cases */ 712 if (unlikely(ab_mask & float_cmask_anynan)) { 713 return parts_pick_nan(a, b, s); 714 } 715 716 a->sign = sign; 717 718 /* Inf / X */ 719 if (a->cls == float_class_inf) { 720 return a; 721 } 722 723 /* 0 / X */ 724 if (a->cls == float_class_zero) { 725 return a; 726 } 727 728 /* X / Inf */ 729 if (b->cls == float_class_inf) { 730 a->cls = float_class_zero; 731 return a; 732 } 733 734 /* X / 0 => Inf */ 735 g_assert(b->cls == float_class_zero); 736 float_raise(float_flag_divbyzero, s); 737 a->cls = float_class_inf; 738 return a; 739 740 d_nan: 741 parts_default_nan(a, s); 742 return a; 743} 744 745/* 746 * Floating point remainder, per IEC/IEEE, or modulus. 747 */ 748static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 749 uint64_t *mod_quot, float_status *s) 750{ 751 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 752 753 if (likely(ab_mask == float_cmask_normal)) { 754 frac_modrem(a, b, mod_quot); 755 return a; 756 } 757 758 if (mod_quot) { 759 *mod_quot = 0; 760 } 761 762 /* All the NaN cases */ 763 if (unlikely(ab_mask & float_cmask_anynan)) { 764 return parts_pick_nan(a, b, s); 765 } 766 767 /* Inf % N; N % 0 */ 768 if (a->cls == float_class_inf || b->cls == float_class_zero) { 769 float_raise(float_flag_invalid, s); 770 parts_default_nan(a, s); 771 return a; 772 } 773 774 /* N % Inf; 0 % N */ 775 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 776 return a; 777} 778 779/* 780 * Square Root 781 * 782 * The base algorithm is lifted from 783 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 784 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 785 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 786 * and is thus MIT licenced. 787 */ 788static void partsN(sqrt)(FloatPartsN *a, float_status *status, 789 const FloatFmt *fmt) 790{ 791 const uint32_t three32 = 3u << 30; 792 const uint64_t three64 = 3ull << 62; 793 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 794 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 795 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 796 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 797 uint64_t discard; 798 bool exp_odd; 799 size_t index; 800 801 if (unlikely(a->cls != float_class_normal)) { 802 switch (a->cls) { 803 case float_class_snan: 804 case float_class_qnan: 805 parts_return_nan(a, status); 806 return; 807 case float_class_zero: 808 return; 809 case float_class_inf: 810 if (unlikely(a->sign)) { 811 goto d_nan; 812 } 813 return; 814 default: 815 g_assert_not_reached(); 816 } 817 } 818 819 if (unlikely(a->sign)) { 820 goto d_nan; 821 } 822 823 /* 824 * Argument reduction. 825 * x = 4^e frac; with integer e, and frac in [1, 4) 826 * m = frac fixed point at bit 62, since we're in base 4. 827 * If base-2 exponent is odd, exchange that for multiply by 2, 828 * which results in no shift. 829 */ 830 exp_odd = a->exp & 1; 831 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 832 if (!exp_odd) { 833 frac_shr(a, 1); 834 } 835 836 /* 837 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 838 * 839 * Initial estimate: 840 * 7-bit lookup table (1-bit exponent and 6-bit significand). 841 * 842 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 843 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 844 * a table lookup is faster and needs one less iteration. 845 * The 7-bit table gives |e| < 0x1.fdp-9. 846 * 847 * A Newton-Raphson iteration for r is 848 * s = m*r 849 * d = s*r 850 * u = 3 - d 851 * r = r*u/2 852 * 853 * Fixed point representations: 854 * m, s, d, u, three are all 2.30; r is 0.32 855 */ 856 m64 = a->frac_hi; 857 m32 = m64 >> 32; 858 859 r32 = rsqrt_tab[index] << 16; 860 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 861 862 s32 = ((uint64_t)m32 * r32) >> 32; 863 d32 = ((uint64_t)s32 * r32) >> 32; 864 u32 = three32 - d32; 865 866 if (N == 64) { 867 /* float64 or smaller */ 868 869 r32 = ((uint64_t)r32 * u32) >> 31; 870 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 871 872 s32 = ((uint64_t)m32 * r32) >> 32; 873 d32 = ((uint64_t)s32 * r32) >> 32; 874 u32 = three32 - d32; 875 876 if (fmt->frac_size <= 23) { 877 /* float32 or smaller */ 878 879 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 880 s32 = (s32 - 1) >> 6; /* 9.23 */ 881 /* s < sqrt(m) < s + 0x1.08p-23 */ 882 883 /* compute nearest rounded result to 2.23 bits */ 884 uint32_t d0 = (m32 << 16) - s32 * s32; 885 uint32_t d1 = s32 - d0; 886 uint32_t d2 = d1 + s32 + 1; 887 s32 += d1 >> 31; 888 a->frac_hi = (uint64_t)s32 << (64 - 25); 889 890 /* increment or decrement for inexact */ 891 if (d2 != 0) { 892 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 893 } 894 goto done; 895 } 896 897 /* float64 */ 898 899 r64 = (uint64_t)r32 * u32 * 2; 900 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 901 mul64To128(m64, r64, &s64, &discard); 902 mul64To128(s64, r64, &d64, &discard); 903 u64 = three64 - d64; 904 905 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 906 s64 = (s64 - 2) >> 9; /* 12.52 */ 907 908 /* Compute nearest rounded result */ 909 uint64_t d0 = (m64 << 42) - s64 * s64; 910 uint64_t d1 = s64 - d0; 911 uint64_t d2 = d1 + s64 + 1; 912 s64 += d1 >> 63; 913 a->frac_hi = s64 << (64 - 54); 914 915 /* increment or decrement for inexact */ 916 if (d2 != 0) { 917 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 918 } 919 goto done; 920 } 921 922 r64 = (uint64_t)r32 * u32 * 2; 923 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 924 925 mul64To128(m64, r64, &s64, &discard); 926 mul64To128(s64, r64, &d64, &discard); 927 u64 = three64 - d64; 928 mul64To128(u64, r64, &r64, &discard); 929 r64 <<= 1; 930 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 931 932 mul64To128(m64, r64, &s64, &discard); 933 mul64To128(s64, r64, &d64, &discard); 934 u64 = three64 - d64; 935 mul64To128(u64, r64, &rh, &rl); 936 add128(rh, rl, rh, rl, &rh, &rl); 937 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 938 939 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 940 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 941 sub128(three64, 0, dh, dl, &uh, &ul); 942 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 943 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 944 945 sub128(sh, sl, 0, 4, &sh, &sl); 946 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 947 /* s < sqrt(m) < s + 1ulp */ 948 949 /* Compute nearest rounded result */ 950 mul64To128(sl, sl, &d0h, &d0l); 951 d0h += 2 * sh * sl; 952 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 953 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 954 add128(sh, sl, 0, 1, &d2h, &d2l); 955 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 956 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 957 shift128Left(sh, sl, 128 - 114, &sh, &sl); 958 959 /* increment or decrement for inexact */ 960 if (d2h | d2l) { 961 if ((int64_t)(d1h ^ d2h) < 0) { 962 sub128(sh, sl, 0, 1, &sh, &sl); 963 } else { 964 add128(sh, sl, 0, 1, &sh, &sl); 965 } 966 } 967 a->frac_lo = sl; 968 a->frac_hi = sh; 969 970 done: 971 /* Convert back from base 4 to base 2. */ 972 a->exp >>= 1; 973 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 974 frac_add(a, a, a); 975 } else { 976 a->exp += 1; 977 } 978 return; 979 980 d_nan: 981 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 982 parts_default_nan(a, status); 983} 984 985/* 986 * Rounds the floating-point value `a' to an integer, and returns the 987 * result as a floating-point value. The operation is performed 988 * according to the IEC/IEEE Standard for Binary Floating-Point 989 * Arithmetic. 990 * 991 * parts_round_to_int_normal is an internal helper function for 992 * normal numbers only, returning true for inexact but not directly 993 * raising float_flag_inexact. 994 */ 995static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 996 int scale, int frac_size) 997{ 998 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 999 int shift_adj; 1000 1001 scale = MIN(MAX(scale, -0x10000), 0x10000); 1002 a->exp += scale; 1003 1004 if (a->exp < 0) { 1005 bool one; 1006 1007 /* All fractional */ 1008 switch (rmode) { 1009 case float_round_nearest_even: 1010 one = false; 1011 if (a->exp == -1) { 1012 FloatPartsN tmp; 1013 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 1014 frac_add(&tmp, a, a); 1015 /* Anything remaining means frac > 0.5. */ 1016 one = !frac_eqz(&tmp); 1017 } 1018 break; 1019 case float_round_ties_away: 1020 one = a->exp == -1; 1021 break; 1022 case float_round_to_zero: 1023 one = false; 1024 break; 1025 case float_round_up: 1026 one = !a->sign; 1027 break; 1028 case float_round_down: 1029 one = a->sign; 1030 break; 1031 case float_round_to_odd: 1032 one = true; 1033 break; 1034 default: 1035 g_assert_not_reached(); 1036 } 1037 1038 frac_clear(a); 1039 a->exp = 0; 1040 if (one) { 1041 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 1042 } else { 1043 a->cls = float_class_zero; 1044 } 1045 return true; 1046 } 1047 1048 if (a->exp >= frac_size) { 1049 /* All integral */ 1050 return false; 1051 } 1052 1053 if (N > 64 && a->exp < N - 64) { 1054 /* 1055 * Rounding is not in the low word -- shift lsb to bit 2, 1056 * which leaves room for sticky and rounding bit. 1057 */ 1058 shift_adj = (N - 1) - (a->exp + 2); 1059 frac_shrjam(a, shift_adj); 1060 frac_lsb = 1 << 2; 1061 } else { 1062 shift_adj = 0; 1063 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 1064 } 1065 1066 frac_lsbm1 = frac_lsb >> 1; 1067 rnd_mask = frac_lsb - 1; 1068 rnd_even_mask = rnd_mask | frac_lsb; 1069 1070 if (!(a->frac_lo & rnd_mask)) { 1071 /* Fractional bits already clear, undo the shift above. */ 1072 frac_shl(a, shift_adj); 1073 return false; 1074 } 1075 1076 switch (rmode) { 1077 case float_round_nearest_even: 1078 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 1079 break; 1080 case float_round_ties_away: 1081 inc = frac_lsbm1; 1082 break; 1083 case float_round_to_zero: 1084 inc = 0; 1085 break; 1086 case float_round_up: 1087 inc = a->sign ? 0 : rnd_mask; 1088 break; 1089 case float_round_down: 1090 inc = a->sign ? rnd_mask : 0; 1091 break; 1092 case float_round_to_odd: 1093 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 1094 break; 1095 default: 1096 g_assert_not_reached(); 1097 } 1098 1099 if (shift_adj == 0) { 1100 if (frac_addi(a, a, inc)) { 1101 frac_shr(a, 1); 1102 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 1103 a->exp++; 1104 } 1105 a->frac_lo &= ~rnd_mask; 1106 } else { 1107 frac_addi(a, a, inc); 1108 a->frac_lo &= ~rnd_mask; 1109 /* Be careful shifting back, not to overflow */ 1110 frac_shl(a, shift_adj - 1); 1111 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1112 a->exp++; 1113 } else { 1114 frac_add(a, a, a); 1115 } 1116 } 1117 return true; 1118} 1119 1120static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1121 int scale, float_status *s, 1122 const FloatFmt *fmt) 1123{ 1124 switch (a->cls) { 1125 case float_class_qnan: 1126 case float_class_snan: 1127 parts_return_nan(a, s); 1128 break; 1129 case float_class_zero: 1130 case float_class_inf: 1131 break; 1132 case float_class_normal: 1133 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1134 float_raise(float_flag_inexact, s); 1135 } 1136 break; 1137 default: 1138 g_assert_not_reached(); 1139 } 1140} 1141 1142/* 1143 * Returns the result of converting the floating-point value `a' to 1144 * the two's complement integer format. The conversion is performed 1145 * according to the IEC/IEEE Standard for Binary Floating-Point 1146 * Arithmetic---which means in particular that the conversion is 1147 * rounded according to the current rounding mode. If `a' is a NaN, 1148 * the largest positive integer is returned. Otherwise, if the 1149 * conversion overflows, the largest integer with the same sign as `a' 1150 * is returned. 1151 */ 1152static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1153 int scale, int64_t min, int64_t max, 1154 float_status *s) 1155{ 1156 int flags = 0; 1157 uint64_t r; 1158 1159 switch (p->cls) { 1160 case float_class_snan: 1161 flags |= float_flag_invalid_snan; 1162 /* fall through */ 1163 case float_class_qnan: 1164 flags |= float_flag_invalid; 1165 r = max; 1166 break; 1167 1168 case float_class_inf: 1169 flags = float_flag_invalid | float_flag_invalid_cvti; 1170 r = p->sign ? min : max; 1171 break; 1172 1173 case float_class_zero: 1174 return 0; 1175 1176 case float_class_normal: 1177 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1178 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1179 flags = float_flag_inexact; 1180 } 1181 1182 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1183 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1184 } else { 1185 r = UINT64_MAX; 1186 } 1187 if (p->sign) { 1188 if (r <= -(uint64_t)min) { 1189 r = -r; 1190 } else { 1191 flags = float_flag_invalid | float_flag_invalid_cvti; 1192 r = min; 1193 } 1194 } else if (r > max) { 1195 flags = float_flag_invalid | float_flag_invalid_cvti; 1196 r = max; 1197 } 1198 break; 1199 1200 default: 1201 g_assert_not_reached(); 1202 } 1203 1204 float_raise(flags, s); 1205 return r; 1206} 1207 1208/* 1209 * Returns the result of converting the floating-point value `a' to 1210 * the unsigned integer format. The conversion is performed according 1211 * to the IEC/IEEE Standard for Binary Floating-Point 1212 * Arithmetic---which means in particular that the conversion is 1213 * rounded according to the current rounding mode. If `a' is a NaN, 1214 * the largest unsigned integer is returned. Otherwise, if the 1215 * conversion overflows, the largest unsigned integer is returned. If 1216 * the 'a' is negative, the result is rounded and zero is returned; 1217 * values that do not round to zero will raise the inexact exception 1218 * flag. 1219 */ 1220static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1221 int scale, uint64_t max, float_status *s) 1222{ 1223 int flags = 0; 1224 uint64_t r; 1225 1226 switch (p->cls) { 1227 case float_class_snan: 1228 flags |= float_flag_invalid_snan; 1229 /* fall through */ 1230 case float_class_qnan: 1231 flags |= float_flag_invalid; 1232 r = max; 1233 break; 1234 1235 case float_class_inf: 1236 flags = float_flag_invalid | float_flag_invalid_cvti; 1237 r = p->sign ? 0 : max; 1238 break; 1239 1240 case float_class_zero: 1241 return 0; 1242 1243 case float_class_normal: 1244 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1245 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1246 flags = float_flag_inexact; 1247 if (p->cls == float_class_zero) { 1248 r = 0; 1249 break; 1250 } 1251 } 1252 1253 if (p->sign) { 1254 flags = float_flag_invalid | float_flag_invalid_cvti; 1255 r = 0; 1256 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1257 flags = float_flag_invalid | float_flag_invalid_cvti; 1258 r = max; 1259 } else { 1260 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1261 if (r > max) { 1262 flags = float_flag_invalid | float_flag_invalid_cvti; 1263 r = max; 1264 } 1265 } 1266 break; 1267 1268 default: 1269 g_assert_not_reached(); 1270 } 1271 1272 float_raise(flags, s); 1273 return r; 1274} 1275 1276/* 1277 * Like partsN(float_to_sint), except do not saturate the result. 1278 * Instead, return the rounded unbounded precision two's compliment result, 1279 * modulo 2**(bitsm1 + 1). 1280 */ 1281static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, 1282 FloatRoundMode rmode, 1283 int bitsm1, float_status *s) 1284{ 1285 int flags = 0; 1286 uint64_t r; 1287 bool overflow = false; 1288 1289 switch (p->cls) { 1290 case float_class_snan: 1291 flags |= float_flag_invalid_snan; 1292 /* fall through */ 1293 case float_class_qnan: 1294 flags |= float_flag_invalid; 1295 r = 0; 1296 break; 1297 1298 case float_class_inf: 1299 overflow = true; 1300 r = 0; 1301 break; 1302 1303 case float_class_zero: 1304 return 0; 1305 1306 case float_class_normal: 1307 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1308 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { 1309 flags = float_flag_inexact; 1310 } 1311 1312 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1313 /* 1314 * Because we rounded to integral, and exp < 64, 1315 * we know frac_low is zero. 1316 */ 1317 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1318 if (p->exp < bitsm1) { 1319 /* Result in range. */ 1320 } else if (p->exp == bitsm1) { 1321 /* The only in-range value is INT_MIN. */ 1322 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; 1323 } else { 1324 overflow = true; 1325 } 1326 } else { 1327 /* Overflow, but there might still be bits to return. */ 1328 int shl = p->exp - DECOMPOSED_BINARY_POINT; 1329 if (shl < N) { 1330 frac_shl(p, shl); 1331 r = p->frac_hi; 1332 } else { 1333 r = 0; 1334 } 1335 overflow = true; 1336 } 1337 1338 if (p->sign) { 1339 r = -r; 1340 } 1341 break; 1342 1343 default: 1344 g_assert_not_reached(); 1345 } 1346 1347 if (overflow) { 1348 flags = float_flag_invalid | float_flag_invalid_cvti; 1349 } 1350 float_raise(flags, s); 1351 return r; 1352} 1353 1354/* 1355 * Integer to float conversions 1356 * 1357 * Returns the result of converting the two's complement integer `a' 1358 * to the floating-point format. The conversion is performed according 1359 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1360 */ 1361static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1362 int scale, float_status *s) 1363{ 1364 uint64_t f = a; 1365 int shift; 1366 1367 memset(p, 0, sizeof(*p)); 1368 1369 if (a == 0) { 1370 p->cls = float_class_zero; 1371 return; 1372 } 1373 1374 p->cls = float_class_normal; 1375 if (a < 0) { 1376 f = -f; 1377 p->sign = true; 1378 } 1379 shift = clz64(f); 1380 scale = MIN(MAX(scale, -0x10000), 0x10000); 1381 1382 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1383 p->frac_hi = f << shift; 1384} 1385 1386/* 1387 * Unsigned Integer to float conversions 1388 * 1389 * Returns the result of converting the unsigned integer `a' to the 1390 * floating-point format. The conversion is performed according to the 1391 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1392 */ 1393static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1394 int scale, float_status *status) 1395{ 1396 memset(p, 0, sizeof(*p)); 1397 1398 if (a == 0) { 1399 p->cls = float_class_zero; 1400 } else { 1401 int shift = clz64(a); 1402 scale = MIN(MAX(scale, -0x10000), 0x10000); 1403 p->cls = float_class_normal; 1404 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1405 p->frac_hi = a << shift; 1406 } 1407} 1408 1409/* 1410 * Float min/max. 1411 */ 1412static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1413 float_status *s, int flags) 1414{ 1415 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1416 int a_exp, b_exp, cmp; 1417 1418 if (unlikely(ab_mask & float_cmask_anynan)) { 1419 /* 1420 * For minNum/maxNum (IEEE 754-2008) 1421 * or minimumNumber/maximumNumber (IEEE 754-2019), 1422 * if one operand is a QNaN, and the other 1423 * operand is numerical, then return numerical argument. 1424 */ 1425 if ((flags & (minmax_isnum | minmax_isnumber)) 1426 && !(ab_mask & float_cmask_snan) 1427 && (ab_mask & ~float_cmask_qnan)) { 1428 return is_nan(a->cls) ? b : a; 1429 } 1430 1431 /* 1432 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1433 * are removed and replaced with minimum, minimumNumber, maximum 1434 * and maximumNumber. 1435 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1436 * If both operands are NaNs, a QNaN is returned. 1437 * If either operand is a SNaN, 1438 * an invalid operation exception is signaled, 1439 * but unless both operands are NaNs, 1440 * the SNaN is otherwise ignored and not converted to a QNaN. 1441 */ 1442 if ((flags & minmax_isnumber) 1443 && (ab_mask & float_cmask_snan) 1444 && (ab_mask & ~float_cmask_anynan)) { 1445 float_raise(float_flag_invalid, s); 1446 return is_nan(a->cls) ? b : a; 1447 } 1448 1449 return parts_pick_nan(a, b, s); 1450 } 1451 1452 a_exp = a->exp; 1453 b_exp = b->exp; 1454 1455 if (unlikely(ab_mask != float_cmask_normal)) { 1456 switch (a->cls) { 1457 case float_class_normal: 1458 break; 1459 case float_class_inf: 1460 a_exp = INT16_MAX; 1461 break; 1462 case float_class_zero: 1463 a_exp = INT16_MIN; 1464 break; 1465 default: 1466 g_assert_not_reached(); 1467 } 1468 switch (b->cls) { 1469 case float_class_normal: 1470 break; 1471 case float_class_inf: 1472 b_exp = INT16_MAX; 1473 break; 1474 case float_class_zero: 1475 b_exp = INT16_MIN; 1476 break; 1477 default: 1478 g_assert_not_reached(); 1479 } 1480 } 1481 1482 /* Compare magnitudes. */ 1483 cmp = a_exp - b_exp; 1484 if (cmp == 0) { 1485 cmp = frac_cmp(a, b); 1486 } 1487 1488 /* 1489 * Take the sign into account. 1490 * For ismag, only do this if the magnitudes are equal. 1491 */ 1492 if (!(flags & minmax_ismag) || cmp == 0) { 1493 if (a->sign != b->sign) { 1494 /* For differing signs, the negative operand is less. */ 1495 cmp = a->sign ? -1 : 1; 1496 } else if (a->sign) { 1497 /* For two negative operands, invert the magnitude comparison. */ 1498 cmp = -cmp; 1499 } 1500 } 1501 1502 if (flags & minmax_ismin) { 1503 cmp = -cmp; 1504 } 1505 return cmp < 0 ? b : a; 1506} 1507 1508/* 1509 * Floating point compare 1510 */ 1511static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1512 float_status *s, bool is_quiet) 1513{ 1514 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1515 1516 if (likely(ab_mask == float_cmask_normal)) { 1517 FloatRelation cmp; 1518 1519 if (a->sign != b->sign) { 1520 goto a_sign; 1521 } 1522 if (a->exp == b->exp) { 1523 cmp = frac_cmp(a, b); 1524 } else if (a->exp < b->exp) { 1525 cmp = float_relation_less; 1526 } else { 1527 cmp = float_relation_greater; 1528 } 1529 if (a->sign) { 1530 cmp = -cmp; 1531 } 1532 return cmp; 1533 } 1534 1535 if (unlikely(ab_mask & float_cmask_anynan)) { 1536 if (ab_mask & float_cmask_snan) { 1537 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1538 } else if (!is_quiet) { 1539 float_raise(float_flag_invalid, s); 1540 } 1541 return float_relation_unordered; 1542 } 1543 1544 if (ab_mask & float_cmask_zero) { 1545 if (ab_mask == float_cmask_zero) { 1546 return float_relation_equal; 1547 } else if (a->cls == float_class_zero) { 1548 goto b_sign; 1549 } else { 1550 goto a_sign; 1551 } 1552 } 1553 1554 if (ab_mask == float_cmask_inf) { 1555 if (a->sign == b->sign) { 1556 return float_relation_equal; 1557 } 1558 } else if (b->cls == float_class_inf) { 1559 goto b_sign; 1560 } else { 1561 g_assert(a->cls == float_class_inf); 1562 } 1563 1564 a_sign: 1565 return a->sign ? float_relation_less : float_relation_greater; 1566 b_sign: 1567 return b->sign ? float_relation_greater : float_relation_less; 1568} 1569 1570/* 1571 * Multiply A by 2 raised to the power N. 1572 */ 1573static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1574{ 1575 switch (a->cls) { 1576 case float_class_snan: 1577 case float_class_qnan: 1578 parts_return_nan(a, s); 1579 break; 1580 case float_class_zero: 1581 case float_class_inf: 1582 break; 1583 case float_class_normal: 1584 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1585 break; 1586 default: 1587 g_assert_not_reached(); 1588 } 1589} 1590 1591/* 1592 * Return log2(A) 1593 */ 1594static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1595{ 1596 uint64_t a0, a1, r, t, ign; 1597 FloatPartsN f; 1598 int i, n, a_exp, f_exp; 1599 1600 if (unlikely(a->cls != float_class_normal)) { 1601 switch (a->cls) { 1602 case float_class_snan: 1603 case float_class_qnan: 1604 parts_return_nan(a, s); 1605 return; 1606 case float_class_zero: 1607 float_raise(float_flag_divbyzero, s); 1608 /* log2(0) = -inf */ 1609 a->cls = float_class_inf; 1610 a->sign = 1; 1611 return; 1612 case float_class_inf: 1613 if (unlikely(a->sign)) { 1614 goto d_nan; 1615 } 1616 return; 1617 default: 1618 break; 1619 } 1620 g_assert_not_reached(); 1621 } 1622 if (unlikely(a->sign)) { 1623 goto d_nan; 1624 } 1625 1626 /* TODO: This algorithm looses bits too quickly for float128. */ 1627 g_assert(N == 64); 1628 1629 a_exp = a->exp; 1630 f_exp = -1; 1631 1632 r = 0; 1633 t = DECOMPOSED_IMPLICIT_BIT; 1634 a0 = a->frac_hi; 1635 a1 = 0; 1636 1637 n = fmt->frac_size + 2; 1638 if (unlikely(a_exp == -1)) { 1639 /* 1640 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1641 * When the value is very close to 1.0, there are lots of 1's in 1642 * the msb parts of the fraction. At the end, when we subtract 1643 * this value from -1.0, we can see a catastrophic loss of precision, 1644 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1645 * bits of y in the final result. To minimize this, compute as many 1646 * digits as we can. 1647 * ??? This case needs another algorithm to avoid this. 1648 */ 1649 n = fmt->frac_size * 2 + 2; 1650 /* Don't compute a value overlapping the sticky bit */ 1651 n = MIN(n, 62); 1652 } 1653 1654 for (i = 0; i < n; i++) { 1655 if (a1) { 1656 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1657 } else if (a0 & 0xffffffffull) { 1658 mul64To128(a0, a0, &a0, &a1); 1659 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1660 a0 >>= 32; 1661 a0 *= a0; 1662 } else { 1663 goto exact; 1664 } 1665 1666 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1667 if (unlikely(a_exp == 0 && r == 0)) { 1668 /* 1669 * When a_exp == 0, we're computing the log2 of a value 1670 * [1.0,2.0). When the value is very close to 1.0, there 1671 * are lots of 0's in the msb parts of the fraction. 1672 * We need to compute more digits to produce a correct 1673 * result -- restart at the top of the fraction. 1674 * ??? This is likely to lose precision quickly, as for 1675 * float128; we may need another method. 1676 */ 1677 f_exp -= i; 1678 t = r = DECOMPOSED_IMPLICIT_BIT; 1679 i = 0; 1680 } else { 1681 r |= t; 1682 } 1683 } else { 1684 add128(a0, a1, a0, a1, &a0, &a1); 1685 } 1686 t >>= 1; 1687 } 1688 1689 /* Set sticky for inexact. */ 1690 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1691 1692 exact: 1693 parts_sint_to_float(a, a_exp, 0, s); 1694 if (r == 0) { 1695 return; 1696 } 1697 1698 memset(&f, 0, sizeof(f)); 1699 f.cls = float_class_normal; 1700 f.frac_hi = r; 1701 f.exp = f_exp - frac_normalize(&f); 1702 1703 if (a_exp < 0) { 1704 parts_sub_normal(a, &f); 1705 } else if (a_exp > 0) { 1706 parts_add_normal(a, &f); 1707 } else { 1708 *a = f; 1709 } 1710 return; 1711 1712 d_nan: 1713 float_raise(float_flag_invalid, s); 1714 parts_default_nan(a, s); 1715} 1716