1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 bool have_snan = false; 43 FloatPartsN *ret; 44 int cmp; 45 46 if (is_snan(a->cls) || is_snan(b->cls)) { 47 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 48 have_snan = true; 49 } 50 51 if (s->default_nan_mode) { 52 parts_default_nan(a, s); 53 return a; 54 } 55 56 switch (s->float_2nan_prop_rule) { 57 case float_2nan_prop_s_ab: 58 if (have_snan) { 59 ret = is_snan(a->cls) ? a : b; 60 break; 61 } 62 /* fall through */ 63 case float_2nan_prop_ab: 64 ret = is_nan(a->cls) ? a : b; 65 break; 66 case float_2nan_prop_s_ba: 67 if (have_snan) { 68 ret = is_snan(b->cls) ? b : a; 69 break; 70 } 71 /* fall through */ 72 case float_2nan_prop_ba: 73 ret = is_nan(b->cls) ? b : a; 74 break; 75 case float_2nan_prop_x87: 76 /* 77 * This implements x87 NaN propagation rules: 78 * SNaN + QNaN => return the QNaN 79 * two SNaNs => return the one with the larger significand, silenced 80 * two QNaNs => return the one with the larger significand 81 * SNaN and a non-NaN => return the SNaN, silenced 82 * QNaN and a non-NaN => return the QNaN 83 * 84 * If we get down to comparing significands and they are the same, 85 * return the NaN with the positive sign bit (if any). 86 */ 87 if (is_snan(a->cls)) { 88 if (!is_snan(b->cls)) { 89 ret = is_qnan(b->cls) ? b : a; 90 break; 91 } 92 } else if (is_qnan(a->cls)) { 93 if (is_snan(b->cls) || !is_qnan(b->cls)) { 94 ret = a; 95 break; 96 } 97 } else { 98 ret = b; 99 break; 100 } 101 cmp = frac_cmp(a, b); 102 if (cmp == 0) { 103 cmp = a->sign < b->sign; 104 } 105 ret = cmp > 0 ? a : b; 106 break; 107 default: 108 g_assert_not_reached(); 109 } 110 111 if (is_snan(ret->cls)) { 112 parts_silence_nan(ret, s); 113 } 114 return ret; 115} 116 117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 118 FloatPartsN *c, float_status *s, 119 int ab_mask, int abc_mask) 120{ 121 bool infzero = (ab_mask == float_cmask_infzero); 122 bool have_snan = (abc_mask & float_cmask_snan); 123 FloatPartsN *ret; 124 125 if (unlikely(have_snan)) { 126 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 127 } 128 129 if (infzero) { 130 /* This is (0 * inf) + NaN or (inf * 0) + NaN */ 131 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 132 } 133 134 if (s->default_nan_mode) { 135 /* 136 * We guarantee not to require the target to tell us how to 137 * pick a NaN if we're always returning the default NaN. 138 * But if we're not in default-NaN mode then the target must 139 * specify. 140 */ 141 goto default_nan; 142 } else if (infzero) { 143 /* 144 * Inf * 0 + NaN -- some implementations return the 145 * default NaN here, and some return the input NaN. 146 */ 147 switch (s->float_infzeronan_rule) { 148 case float_infzeronan_dnan_never: 149 break; 150 case float_infzeronan_dnan_always: 151 goto default_nan; 152 case float_infzeronan_dnan_if_qnan: 153 if (is_qnan(c->cls)) { 154 goto default_nan; 155 } 156 break; 157 default: 158 g_assert_not_reached(); 159 } 160 ret = c; 161 } else { 162 FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c }; 163 Float3NaNPropRule rule = s->float_3nan_prop_rule; 164 165 assert(rule != float_3nan_prop_none); 166 if (have_snan && (rule & R_3NAN_SNAN_MASK)) { 167 /* We have at least one SNaN input and should prefer it */ 168 do { 169 ret = val[rule & R_3NAN_1ST_MASK]; 170 rule >>= R_3NAN_1ST_LENGTH; 171 } while (!is_snan(ret->cls)); 172 } else { 173 do { 174 ret = val[rule & R_3NAN_1ST_MASK]; 175 rule >>= R_3NAN_1ST_LENGTH; 176 } while (!is_nan(ret->cls)); 177 } 178 } 179 180 if (is_snan(ret->cls)) { 181 parts_silence_nan(ret, s); 182 } 183 return ret; 184 185 default_nan: 186 parts_default_nan(a, s); 187 return a; 188} 189 190/* 191 * Canonicalize the FloatParts structure. Determine the class, 192 * unbias the exponent, and normalize the fraction. 193 */ 194static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 195 const FloatFmt *fmt) 196{ 197 if (unlikely(p->exp == 0)) { 198 if (likely(frac_eqz(p))) { 199 p->cls = float_class_zero; 200 } else if (status->flush_inputs_to_zero) { 201 float_raise(float_flag_input_denormal, status); 202 p->cls = float_class_zero; 203 frac_clear(p); 204 } else { 205 int shift = frac_normalize(p); 206 p->cls = float_class_normal; 207 p->exp = fmt->frac_shift - fmt->exp_bias 208 - shift + !fmt->m68k_denormal; 209 } 210 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 211 p->cls = float_class_normal; 212 p->exp -= fmt->exp_bias; 213 frac_shl(p, fmt->frac_shift); 214 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 215 } else if (likely(frac_eqz(p))) { 216 p->cls = float_class_inf; 217 } else { 218 frac_shl(p, fmt->frac_shift); 219 p->cls = (parts_is_snan_frac(p->frac_hi, status) 220 ? float_class_snan : float_class_qnan); 221 } 222} 223 224/* 225 * Round and uncanonicalize a floating-point number by parts. There 226 * are FRAC_SHIFT bits that may require rounding at the bottom of the 227 * fraction; these bits will be removed. The exponent will be biased 228 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 229 */ 230static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 231 const FloatFmt *fmt) 232{ 233 const int exp_max = fmt->exp_max; 234 const int frac_shift = fmt->frac_shift; 235 const uint64_t round_mask = fmt->round_mask; 236 const uint64_t frac_lsb = round_mask + 1; 237 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 238 const uint64_t roundeven_mask = round_mask | frac_lsb; 239 uint64_t inc; 240 bool overflow_norm = false; 241 int exp, flags = 0; 242 243 switch (s->float_rounding_mode) { 244 case float_round_nearest_even: 245 if (N > 64 && frac_lsb == 0) { 246 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 247 ? frac_lsbm1 : 0); 248 } else { 249 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 250 ? frac_lsbm1 : 0); 251 } 252 break; 253 case float_round_ties_away: 254 inc = frac_lsbm1; 255 break; 256 case float_round_to_zero: 257 overflow_norm = true; 258 inc = 0; 259 break; 260 case float_round_up: 261 inc = p->sign ? 0 : round_mask; 262 overflow_norm = p->sign; 263 break; 264 case float_round_down: 265 inc = p->sign ? round_mask : 0; 266 overflow_norm = !p->sign; 267 break; 268 case float_round_to_odd: 269 overflow_norm = true; 270 /* fall through */ 271 case float_round_to_odd_inf: 272 if (N > 64 && frac_lsb == 0) { 273 inc = p->frac_hi & 1 ? 0 : round_mask; 274 } else { 275 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 276 } 277 break; 278 default: 279 g_assert_not_reached(); 280 } 281 282 exp = p->exp + fmt->exp_bias; 283 if (likely(exp > 0)) { 284 if (p->frac_lo & round_mask) { 285 flags |= float_flag_inexact; 286 if (frac_addi(p, p, inc)) { 287 frac_shr(p, 1); 288 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 289 exp++; 290 } 291 p->frac_lo &= ~round_mask; 292 } 293 294 if (fmt->arm_althp) { 295 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 296 if (unlikely(exp > exp_max)) { 297 /* Overflow. Return the maximum normal. */ 298 flags = float_flag_invalid; 299 exp = exp_max; 300 frac_allones(p); 301 p->frac_lo &= ~round_mask; 302 } 303 } else if (unlikely(exp >= exp_max)) { 304 flags |= float_flag_overflow; 305 if (s->rebias_overflow) { 306 exp -= fmt->exp_re_bias; 307 } else if (overflow_norm) { 308 flags |= float_flag_inexact; 309 exp = exp_max - 1; 310 frac_allones(p); 311 p->frac_lo &= ~round_mask; 312 } else { 313 flags |= float_flag_inexact; 314 p->cls = float_class_inf; 315 exp = exp_max; 316 frac_clear(p); 317 } 318 } 319 frac_shr(p, frac_shift); 320 } else if (unlikely(s->rebias_underflow)) { 321 flags |= float_flag_underflow; 322 exp += fmt->exp_re_bias; 323 if (p->frac_lo & round_mask) { 324 flags |= float_flag_inexact; 325 if (frac_addi(p, p, inc)) { 326 frac_shr(p, 1); 327 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 328 exp++; 329 } 330 p->frac_lo &= ~round_mask; 331 } 332 frac_shr(p, frac_shift); 333 } else if (s->flush_to_zero) { 334 flags |= float_flag_output_denormal; 335 p->cls = float_class_zero; 336 exp = 0; 337 frac_clear(p); 338 } else { 339 bool is_tiny = s->tininess_before_rounding || exp < 0; 340 341 if (!is_tiny) { 342 FloatPartsN discard; 343 is_tiny = !frac_addi(&discard, p, inc); 344 } 345 346 frac_shrjam(p, !fmt->m68k_denormal - exp); 347 348 if (p->frac_lo & round_mask) { 349 /* Need to recompute round-to-even/round-to-odd. */ 350 switch (s->float_rounding_mode) { 351 case float_round_nearest_even: 352 if (N > 64 && frac_lsb == 0) { 353 inc = ((p->frac_hi & 1) || 354 (p->frac_lo & round_mask) != frac_lsbm1 355 ? frac_lsbm1 : 0); 356 } else { 357 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 358 ? frac_lsbm1 : 0); 359 } 360 break; 361 case float_round_to_odd: 362 case float_round_to_odd_inf: 363 if (N > 64 && frac_lsb == 0) { 364 inc = p->frac_hi & 1 ? 0 : round_mask; 365 } else { 366 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 367 } 368 break; 369 default: 370 break; 371 } 372 flags |= float_flag_inexact; 373 frac_addi(p, p, inc); 374 p->frac_lo &= ~round_mask; 375 } 376 377 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; 378 frac_shr(p, frac_shift); 379 380 if (is_tiny && (flags & float_flag_inexact)) { 381 flags |= float_flag_underflow; 382 } 383 if (exp == 0 && frac_eqz(p)) { 384 p->cls = float_class_zero; 385 } 386 } 387 p->exp = exp; 388 float_raise(flags, s); 389} 390 391static void partsN(uncanon)(FloatPartsN *p, float_status *s, 392 const FloatFmt *fmt) 393{ 394 if (likely(p->cls == float_class_normal)) { 395 parts_uncanon_normal(p, s, fmt); 396 } else { 397 switch (p->cls) { 398 case float_class_zero: 399 p->exp = 0; 400 frac_clear(p); 401 return; 402 case float_class_inf: 403 g_assert(!fmt->arm_althp); 404 p->exp = fmt->exp_max; 405 frac_clear(p); 406 return; 407 case float_class_qnan: 408 case float_class_snan: 409 g_assert(!fmt->arm_althp); 410 p->exp = fmt->exp_max; 411 frac_shr(p, fmt->frac_shift); 412 return; 413 default: 414 break; 415 } 416 g_assert_not_reached(); 417 } 418} 419 420/* 421 * Returns the result of adding or subtracting the values of the 422 * floating-point values `a' and `b'. The operation is performed 423 * according to the IEC/IEEE Standard for Binary Floating-Point 424 * Arithmetic. 425 */ 426static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 427 float_status *s, bool subtract) 428{ 429 bool b_sign = b->sign ^ subtract; 430 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 431 432 if (a->sign != b_sign) { 433 /* Subtraction */ 434 if (likely(ab_mask == float_cmask_normal)) { 435 if (parts_sub_normal(a, b)) { 436 return a; 437 } 438 /* Subtract was exact, fall through to set sign. */ 439 ab_mask = float_cmask_zero; 440 } 441 442 if (ab_mask == float_cmask_zero) { 443 a->sign = s->float_rounding_mode == float_round_down; 444 return a; 445 } 446 447 if (unlikely(ab_mask & float_cmask_anynan)) { 448 goto p_nan; 449 } 450 451 if (ab_mask & float_cmask_inf) { 452 if (a->cls != float_class_inf) { 453 /* N - Inf */ 454 goto return_b; 455 } 456 if (b->cls != float_class_inf) { 457 /* Inf - N */ 458 return a; 459 } 460 /* Inf - Inf */ 461 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 462 parts_default_nan(a, s); 463 return a; 464 } 465 } else { 466 /* Addition */ 467 if (likely(ab_mask == float_cmask_normal)) { 468 parts_add_normal(a, b); 469 return a; 470 } 471 472 if (ab_mask == float_cmask_zero) { 473 return a; 474 } 475 476 if (unlikely(ab_mask & float_cmask_anynan)) { 477 goto p_nan; 478 } 479 480 if (ab_mask & float_cmask_inf) { 481 a->cls = float_class_inf; 482 return a; 483 } 484 } 485 486 if (b->cls == float_class_zero) { 487 g_assert(a->cls == float_class_normal); 488 return a; 489 } 490 491 g_assert(a->cls == float_class_zero); 492 g_assert(b->cls == float_class_normal); 493 return_b: 494 b->sign = b_sign; 495 return b; 496 497 p_nan: 498 return parts_pick_nan(a, b, s); 499} 500 501/* 502 * Returns the result of multiplying the floating-point values `a' and 503 * `b'. The operation is performed according to the IEC/IEEE Standard 504 * for Binary Floating-Point Arithmetic. 505 */ 506static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 507 float_status *s) 508{ 509 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 510 bool sign = a->sign ^ b->sign; 511 512 if (likely(ab_mask == float_cmask_normal)) { 513 FloatPartsW tmp; 514 515 frac_mulw(&tmp, a, b); 516 frac_truncjam(a, &tmp); 517 518 a->exp += b->exp + 1; 519 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 520 frac_add(a, a, a); 521 a->exp -= 1; 522 } 523 524 a->sign = sign; 525 return a; 526 } 527 528 /* Inf * Zero == NaN */ 529 if (unlikely(ab_mask == float_cmask_infzero)) { 530 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 531 parts_default_nan(a, s); 532 return a; 533 } 534 535 if (unlikely(ab_mask & float_cmask_anynan)) { 536 return parts_pick_nan(a, b, s); 537 } 538 539 /* Multiply by 0 or Inf */ 540 if (ab_mask & float_cmask_inf) { 541 a->cls = float_class_inf; 542 a->sign = sign; 543 return a; 544 } 545 546 g_assert(ab_mask & float_cmask_zero); 547 a->cls = float_class_zero; 548 a->sign = sign; 549 return a; 550} 551 552/* 553 * Returns the result of multiplying the floating-point values `a' and 554 * `b' then adding 'c', with no intermediate rounding step after the 555 * multiplication. The operation is performed according to the 556 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 557 * The flags argument allows the caller to select negation of the 558 * addend, the intermediate product, or the final result. (The 559 * difference between this and having the caller do a separate 560 * negation is that negating externally will flip the sign bit on NaNs.) 561 * 562 * Requires A and C extracted into a double-sized structure to provide the 563 * extra space for the widening multiply. 564 */ 565static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, 566 FloatPartsN *c, int flags, float_status *s) 567{ 568 int ab_mask, abc_mask; 569 FloatPartsW p_widen, c_widen; 570 571 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 572 abc_mask = float_cmask(c->cls) | ab_mask; 573 574 /* 575 * It is implementation-defined whether the cases of (0,inf,qnan) 576 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 577 * they return if they do), so we have to hand this information 578 * off to the target-specific pick-a-NaN routine. 579 */ 580 if (unlikely(abc_mask & float_cmask_anynan)) { 581 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 582 } 583 584 if (flags & float_muladd_negate_c) { 585 c->sign ^= 1; 586 } 587 588 /* Compute the sign of the product into A. */ 589 a->sign ^= b->sign; 590 if (flags & float_muladd_negate_product) { 591 a->sign ^= 1; 592 } 593 594 if (unlikely(ab_mask != float_cmask_normal)) { 595 if (unlikely(ab_mask == float_cmask_infzero)) { 596 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 597 goto d_nan; 598 } 599 600 if (ab_mask & float_cmask_inf) { 601 if (c->cls == float_class_inf && a->sign != c->sign) { 602 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 603 goto d_nan; 604 } 605 goto return_inf; 606 } 607 608 g_assert(ab_mask & float_cmask_zero); 609 if (c->cls == float_class_normal) { 610 *a = *c; 611 goto return_normal; 612 } 613 if (c->cls == float_class_zero) { 614 if (a->sign != c->sign) { 615 goto return_sub_zero; 616 } 617 goto return_zero; 618 } 619 g_assert(c->cls == float_class_inf); 620 } 621 622 if (unlikely(c->cls == float_class_inf)) { 623 a->sign = c->sign; 624 goto return_inf; 625 } 626 627 /* Perform the multiplication step. */ 628 p_widen.sign = a->sign; 629 p_widen.exp = a->exp + b->exp + 1; 630 frac_mulw(&p_widen, a, b); 631 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 632 frac_add(&p_widen, &p_widen, &p_widen); 633 p_widen.exp -= 1; 634 } 635 636 /* Perform the addition step. */ 637 if (c->cls != float_class_zero) { 638 /* Zero-extend C to less significant bits. */ 639 frac_widen(&c_widen, c); 640 c_widen.exp = c->exp; 641 642 if (a->sign == c->sign) { 643 parts_add_normal(&p_widen, &c_widen); 644 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 645 goto return_sub_zero; 646 } 647 } 648 649 /* Narrow with sticky bit, for proper rounding later. */ 650 frac_truncjam(a, &p_widen); 651 a->sign = p_widen.sign; 652 a->exp = p_widen.exp; 653 654 return_normal: 655 if (flags & float_muladd_halve_result) { 656 a->exp -= 1; 657 } 658 finish_sign: 659 if (flags & float_muladd_negate_result) { 660 a->sign ^= 1; 661 } 662 return a; 663 664 return_sub_zero: 665 a->sign = s->float_rounding_mode == float_round_down; 666 return_zero: 667 a->cls = float_class_zero; 668 goto finish_sign; 669 670 return_inf: 671 a->cls = float_class_inf; 672 goto finish_sign; 673 674 d_nan: 675 parts_default_nan(a, s); 676 return a; 677} 678 679/* 680 * Returns the result of dividing the floating-point value `a' by the 681 * corresponding value `b'. The operation is performed according to 682 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 683 */ 684static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 685 float_status *s) 686{ 687 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 688 bool sign = a->sign ^ b->sign; 689 690 if (likely(ab_mask == float_cmask_normal)) { 691 a->sign = sign; 692 a->exp -= b->exp + frac_div(a, b); 693 return a; 694 } 695 696 /* 0/0 or Inf/Inf => NaN */ 697 if (unlikely(ab_mask == float_cmask_zero)) { 698 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 699 goto d_nan; 700 } 701 if (unlikely(ab_mask == float_cmask_inf)) { 702 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 703 goto d_nan; 704 } 705 706 /* All the NaN cases */ 707 if (unlikely(ab_mask & float_cmask_anynan)) { 708 return parts_pick_nan(a, b, s); 709 } 710 711 a->sign = sign; 712 713 /* Inf / X */ 714 if (a->cls == float_class_inf) { 715 return a; 716 } 717 718 /* 0 / X */ 719 if (a->cls == float_class_zero) { 720 return a; 721 } 722 723 /* X / Inf */ 724 if (b->cls == float_class_inf) { 725 a->cls = float_class_zero; 726 return a; 727 } 728 729 /* X / 0 => Inf */ 730 g_assert(b->cls == float_class_zero); 731 float_raise(float_flag_divbyzero, s); 732 a->cls = float_class_inf; 733 return a; 734 735 d_nan: 736 parts_default_nan(a, s); 737 return a; 738} 739 740/* 741 * Floating point remainder, per IEC/IEEE, or modulus. 742 */ 743static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 744 uint64_t *mod_quot, float_status *s) 745{ 746 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 747 748 if (likely(ab_mask == float_cmask_normal)) { 749 frac_modrem(a, b, mod_quot); 750 return a; 751 } 752 753 if (mod_quot) { 754 *mod_quot = 0; 755 } 756 757 /* All the NaN cases */ 758 if (unlikely(ab_mask & float_cmask_anynan)) { 759 return parts_pick_nan(a, b, s); 760 } 761 762 /* Inf % N; N % 0 */ 763 if (a->cls == float_class_inf || b->cls == float_class_zero) { 764 float_raise(float_flag_invalid, s); 765 parts_default_nan(a, s); 766 return a; 767 } 768 769 /* N % Inf; 0 % N */ 770 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 771 return a; 772} 773 774/* 775 * Square Root 776 * 777 * The base algorithm is lifted from 778 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 779 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 780 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 781 * and is thus MIT licenced. 782 */ 783static void partsN(sqrt)(FloatPartsN *a, float_status *status, 784 const FloatFmt *fmt) 785{ 786 const uint32_t three32 = 3u << 30; 787 const uint64_t three64 = 3ull << 62; 788 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 789 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 790 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 791 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 792 uint64_t discard; 793 bool exp_odd; 794 size_t index; 795 796 if (unlikely(a->cls != float_class_normal)) { 797 switch (a->cls) { 798 case float_class_snan: 799 case float_class_qnan: 800 parts_return_nan(a, status); 801 return; 802 case float_class_zero: 803 return; 804 case float_class_inf: 805 if (unlikely(a->sign)) { 806 goto d_nan; 807 } 808 return; 809 default: 810 g_assert_not_reached(); 811 } 812 } 813 814 if (unlikely(a->sign)) { 815 goto d_nan; 816 } 817 818 /* 819 * Argument reduction. 820 * x = 4^e frac; with integer e, and frac in [1, 4) 821 * m = frac fixed point at bit 62, since we're in base 4. 822 * If base-2 exponent is odd, exchange that for multiply by 2, 823 * which results in no shift. 824 */ 825 exp_odd = a->exp & 1; 826 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 827 if (!exp_odd) { 828 frac_shr(a, 1); 829 } 830 831 /* 832 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 833 * 834 * Initial estimate: 835 * 7-bit lookup table (1-bit exponent and 6-bit significand). 836 * 837 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 838 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 839 * a table lookup is faster and needs one less iteration. 840 * The 7-bit table gives |e| < 0x1.fdp-9. 841 * 842 * A Newton-Raphson iteration for r is 843 * s = m*r 844 * d = s*r 845 * u = 3 - d 846 * r = r*u/2 847 * 848 * Fixed point representations: 849 * m, s, d, u, three are all 2.30; r is 0.32 850 */ 851 m64 = a->frac_hi; 852 m32 = m64 >> 32; 853 854 r32 = rsqrt_tab[index] << 16; 855 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 856 857 s32 = ((uint64_t)m32 * r32) >> 32; 858 d32 = ((uint64_t)s32 * r32) >> 32; 859 u32 = three32 - d32; 860 861 if (N == 64) { 862 /* float64 or smaller */ 863 864 r32 = ((uint64_t)r32 * u32) >> 31; 865 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 866 867 s32 = ((uint64_t)m32 * r32) >> 32; 868 d32 = ((uint64_t)s32 * r32) >> 32; 869 u32 = three32 - d32; 870 871 if (fmt->frac_size <= 23) { 872 /* float32 or smaller */ 873 874 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 875 s32 = (s32 - 1) >> 6; /* 9.23 */ 876 /* s < sqrt(m) < s + 0x1.08p-23 */ 877 878 /* compute nearest rounded result to 2.23 bits */ 879 uint32_t d0 = (m32 << 16) - s32 * s32; 880 uint32_t d1 = s32 - d0; 881 uint32_t d2 = d1 + s32 + 1; 882 s32 += d1 >> 31; 883 a->frac_hi = (uint64_t)s32 << (64 - 25); 884 885 /* increment or decrement for inexact */ 886 if (d2 != 0) { 887 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 888 } 889 goto done; 890 } 891 892 /* float64 */ 893 894 r64 = (uint64_t)r32 * u32 * 2; 895 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 896 mul64To128(m64, r64, &s64, &discard); 897 mul64To128(s64, r64, &d64, &discard); 898 u64 = three64 - d64; 899 900 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 901 s64 = (s64 - 2) >> 9; /* 12.52 */ 902 903 /* Compute nearest rounded result */ 904 uint64_t d0 = (m64 << 42) - s64 * s64; 905 uint64_t d1 = s64 - d0; 906 uint64_t d2 = d1 + s64 + 1; 907 s64 += d1 >> 63; 908 a->frac_hi = s64 << (64 - 54); 909 910 /* increment or decrement for inexact */ 911 if (d2 != 0) { 912 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 913 } 914 goto done; 915 } 916 917 r64 = (uint64_t)r32 * u32 * 2; 918 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 919 920 mul64To128(m64, r64, &s64, &discard); 921 mul64To128(s64, r64, &d64, &discard); 922 u64 = three64 - d64; 923 mul64To128(u64, r64, &r64, &discard); 924 r64 <<= 1; 925 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 926 927 mul64To128(m64, r64, &s64, &discard); 928 mul64To128(s64, r64, &d64, &discard); 929 u64 = three64 - d64; 930 mul64To128(u64, r64, &rh, &rl); 931 add128(rh, rl, rh, rl, &rh, &rl); 932 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 933 934 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 935 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 936 sub128(three64, 0, dh, dl, &uh, &ul); 937 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 938 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 939 940 sub128(sh, sl, 0, 4, &sh, &sl); 941 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 942 /* s < sqrt(m) < s + 1ulp */ 943 944 /* Compute nearest rounded result */ 945 mul64To128(sl, sl, &d0h, &d0l); 946 d0h += 2 * sh * sl; 947 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 948 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 949 add128(sh, sl, 0, 1, &d2h, &d2l); 950 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 951 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 952 shift128Left(sh, sl, 128 - 114, &sh, &sl); 953 954 /* increment or decrement for inexact */ 955 if (d2h | d2l) { 956 if ((int64_t)(d1h ^ d2h) < 0) { 957 sub128(sh, sl, 0, 1, &sh, &sl); 958 } else { 959 add128(sh, sl, 0, 1, &sh, &sl); 960 } 961 } 962 a->frac_lo = sl; 963 a->frac_hi = sh; 964 965 done: 966 /* Convert back from base 4 to base 2. */ 967 a->exp >>= 1; 968 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 969 frac_add(a, a, a); 970 } else { 971 a->exp += 1; 972 } 973 return; 974 975 d_nan: 976 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 977 parts_default_nan(a, status); 978} 979 980/* 981 * Rounds the floating-point value `a' to an integer, and returns the 982 * result as a floating-point value. The operation is performed 983 * according to the IEC/IEEE Standard for Binary Floating-Point 984 * Arithmetic. 985 * 986 * parts_round_to_int_normal is an internal helper function for 987 * normal numbers only, returning true for inexact but not directly 988 * raising float_flag_inexact. 989 */ 990static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 991 int scale, int frac_size) 992{ 993 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 994 int shift_adj; 995 996 scale = MIN(MAX(scale, -0x10000), 0x10000); 997 a->exp += scale; 998 999 if (a->exp < 0) { 1000 bool one; 1001 1002 /* All fractional */ 1003 switch (rmode) { 1004 case float_round_nearest_even: 1005 one = false; 1006 if (a->exp == -1) { 1007 FloatPartsN tmp; 1008 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 1009 frac_add(&tmp, a, a); 1010 /* Anything remaining means frac > 0.5. */ 1011 one = !frac_eqz(&tmp); 1012 } 1013 break; 1014 case float_round_ties_away: 1015 one = a->exp == -1; 1016 break; 1017 case float_round_to_zero: 1018 one = false; 1019 break; 1020 case float_round_up: 1021 one = !a->sign; 1022 break; 1023 case float_round_down: 1024 one = a->sign; 1025 break; 1026 case float_round_to_odd: 1027 one = true; 1028 break; 1029 default: 1030 g_assert_not_reached(); 1031 } 1032 1033 frac_clear(a); 1034 a->exp = 0; 1035 if (one) { 1036 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 1037 } else { 1038 a->cls = float_class_zero; 1039 } 1040 return true; 1041 } 1042 1043 if (a->exp >= frac_size) { 1044 /* All integral */ 1045 return false; 1046 } 1047 1048 if (N > 64 && a->exp < N - 64) { 1049 /* 1050 * Rounding is not in the low word -- shift lsb to bit 2, 1051 * which leaves room for sticky and rounding bit. 1052 */ 1053 shift_adj = (N - 1) - (a->exp + 2); 1054 frac_shrjam(a, shift_adj); 1055 frac_lsb = 1 << 2; 1056 } else { 1057 shift_adj = 0; 1058 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 1059 } 1060 1061 frac_lsbm1 = frac_lsb >> 1; 1062 rnd_mask = frac_lsb - 1; 1063 rnd_even_mask = rnd_mask | frac_lsb; 1064 1065 if (!(a->frac_lo & rnd_mask)) { 1066 /* Fractional bits already clear, undo the shift above. */ 1067 frac_shl(a, shift_adj); 1068 return false; 1069 } 1070 1071 switch (rmode) { 1072 case float_round_nearest_even: 1073 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 1074 break; 1075 case float_round_ties_away: 1076 inc = frac_lsbm1; 1077 break; 1078 case float_round_to_zero: 1079 inc = 0; 1080 break; 1081 case float_round_up: 1082 inc = a->sign ? 0 : rnd_mask; 1083 break; 1084 case float_round_down: 1085 inc = a->sign ? rnd_mask : 0; 1086 break; 1087 case float_round_to_odd: 1088 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 1089 break; 1090 default: 1091 g_assert_not_reached(); 1092 } 1093 1094 if (shift_adj == 0) { 1095 if (frac_addi(a, a, inc)) { 1096 frac_shr(a, 1); 1097 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 1098 a->exp++; 1099 } 1100 a->frac_lo &= ~rnd_mask; 1101 } else { 1102 frac_addi(a, a, inc); 1103 a->frac_lo &= ~rnd_mask; 1104 /* Be careful shifting back, not to overflow */ 1105 frac_shl(a, shift_adj - 1); 1106 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1107 a->exp++; 1108 } else { 1109 frac_add(a, a, a); 1110 } 1111 } 1112 return true; 1113} 1114 1115static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1116 int scale, float_status *s, 1117 const FloatFmt *fmt) 1118{ 1119 switch (a->cls) { 1120 case float_class_qnan: 1121 case float_class_snan: 1122 parts_return_nan(a, s); 1123 break; 1124 case float_class_zero: 1125 case float_class_inf: 1126 break; 1127 case float_class_normal: 1128 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1129 float_raise(float_flag_inexact, s); 1130 } 1131 break; 1132 default: 1133 g_assert_not_reached(); 1134 } 1135} 1136 1137/* 1138 * Returns the result of converting the floating-point value `a' to 1139 * the two's complement integer format. The conversion is performed 1140 * according to the IEC/IEEE Standard for Binary Floating-Point 1141 * Arithmetic---which means in particular that the conversion is 1142 * rounded according to the current rounding mode. If `a' is a NaN, 1143 * the largest positive integer is returned. Otherwise, if the 1144 * conversion overflows, the largest integer with the same sign as `a' 1145 * is returned. 1146 */ 1147static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1148 int scale, int64_t min, int64_t max, 1149 float_status *s) 1150{ 1151 int flags = 0; 1152 uint64_t r; 1153 1154 switch (p->cls) { 1155 case float_class_snan: 1156 flags |= float_flag_invalid_snan; 1157 /* fall through */ 1158 case float_class_qnan: 1159 flags |= float_flag_invalid; 1160 r = max; 1161 break; 1162 1163 case float_class_inf: 1164 flags = float_flag_invalid | float_flag_invalid_cvti; 1165 r = p->sign ? min : max; 1166 break; 1167 1168 case float_class_zero: 1169 return 0; 1170 1171 case float_class_normal: 1172 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1173 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1174 flags = float_flag_inexact; 1175 } 1176 1177 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1178 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1179 } else { 1180 r = UINT64_MAX; 1181 } 1182 if (p->sign) { 1183 if (r <= -(uint64_t)min) { 1184 r = -r; 1185 } else { 1186 flags = float_flag_invalid | float_flag_invalid_cvti; 1187 r = min; 1188 } 1189 } else if (r > max) { 1190 flags = float_flag_invalid | float_flag_invalid_cvti; 1191 r = max; 1192 } 1193 break; 1194 1195 default: 1196 g_assert_not_reached(); 1197 } 1198 1199 float_raise(flags, s); 1200 return r; 1201} 1202 1203/* 1204 * Returns the result of converting the floating-point value `a' to 1205 * the unsigned integer format. The conversion is performed according 1206 * to the IEC/IEEE Standard for Binary Floating-Point 1207 * Arithmetic---which means in particular that the conversion is 1208 * rounded according to the current rounding mode. If `a' is a NaN, 1209 * the largest unsigned integer is returned. Otherwise, if the 1210 * conversion overflows, the largest unsigned integer is returned. If 1211 * the 'a' is negative, the result is rounded and zero is returned; 1212 * values that do not round to zero will raise the inexact exception 1213 * flag. 1214 */ 1215static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1216 int scale, uint64_t max, float_status *s) 1217{ 1218 int flags = 0; 1219 uint64_t r; 1220 1221 switch (p->cls) { 1222 case float_class_snan: 1223 flags |= float_flag_invalid_snan; 1224 /* fall through */ 1225 case float_class_qnan: 1226 flags |= float_flag_invalid; 1227 r = max; 1228 break; 1229 1230 case float_class_inf: 1231 flags = float_flag_invalid | float_flag_invalid_cvti; 1232 r = p->sign ? 0 : max; 1233 break; 1234 1235 case float_class_zero: 1236 return 0; 1237 1238 case float_class_normal: 1239 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1240 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1241 flags = float_flag_inexact; 1242 if (p->cls == float_class_zero) { 1243 r = 0; 1244 break; 1245 } 1246 } 1247 1248 if (p->sign) { 1249 flags = float_flag_invalid | float_flag_invalid_cvti; 1250 r = 0; 1251 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1252 flags = float_flag_invalid | float_flag_invalid_cvti; 1253 r = max; 1254 } else { 1255 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1256 if (r > max) { 1257 flags = float_flag_invalid | float_flag_invalid_cvti; 1258 r = max; 1259 } 1260 } 1261 break; 1262 1263 default: 1264 g_assert_not_reached(); 1265 } 1266 1267 float_raise(flags, s); 1268 return r; 1269} 1270 1271/* 1272 * Like partsN(float_to_sint), except do not saturate the result. 1273 * Instead, return the rounded unbounded precision two's compliment result, 1274 * modulo 2**(bitsm1 + 1). 1275 */ 1276static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, 1277 FloatRoundMode rmode, 1278 int bitsm1, float_status *s) 1279{ 1280 int flags = 0; 1281 uint64_t r; 1282 bool overflow = false; 1283 1284 switch (p->cls) { 1285 case float_class_snan: 1286 flags |= float_flag_invalid_snan; 1287 /* fall through */ 1288 case float_class_qnan: 1289 flags |= float_flag_invalid; 1290 r = 0; 1291 break; 1292 1293 case float_class_inf: 1294 overflow = true; 1295 r = 0; 1296 break; 1297 1298 case float_class_zero: 1299 return 0; 1300 1301 case float_class_normal: 1302 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1303 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { 1304 flags = float_flag_inexact; 1305 } 1306 1307 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1308 /* 1309 * Because we rounded to integral, and exp < 64, 1310 * we know frac_low is zero. 1311 */ 1312 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1313 if (p->exp < bitsm1) { 1314 /* Result in range. */ 1315 } else if (p->exp == bitsm1) { 1316 /* The only in-range value is INT_MIN. */ 1317 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; 1318 } else { 1319 overflow = true; 1320 } 1321 } else { 1322 /* Overflow, but there might still be bits to return. */ 1323 int shl = p->exp - DECOMPOSED_BINARY_POINT; 1324 if (shl < N) { 1325 frac_shl(p, shl); 1326 r = p->frac_hi; 1327 } else { 1328 r = 0; 1329 } 1330 overflow = true; 1331 } 1332 1333 if (p->sign) { 1334 r = -r; 1335 } 1336 break; 1337 1338 default: 1339 g_assert_not_reached(); 1340 } 1341 1342 if (overflow) { 1343 flags = float_flag_invalid | float_flag_invalid_cvti; 1344 } 1345 float_raise(flags, s); 1346 return r; 1347} 1348 1349/* 1350 * Integer to float conversions 1351 * 1352 * Returns the result of converting the two's complement integer `a' 1353 * to the floating-point format. The conversion is performed according 1354 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1355 */ 1356static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1357 int scale, float_status *s) 1358{ 1359 uint64_t f = a; 1360 int shift; 1361 1362 memset(p, 0, sizeof(*p)); 1363 1364 if (a == 0) { 1365 p->cls = float_class_zero; 1366 return; 1367 } 1368 1369 p->cls = float_class_normal; 1370 if (a < 0) { 1371 f = -f; 1372 p->sign = true; 1373 } 1374 shift = clz64(f); 1375 scale = MIN(MAX(scale, -0x10000), 0x10000); 1376 1377 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1378 p->frac_hi = f << shift; 1379} 1380 1381/* 1382 * Unsigned Integer to float conversions 1383 * 1384 * Returns the result of converting the unsigned integer `a' to the 1385 * floating-point format. The conversion is performed according to the 1386 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1387 */ 1388static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1389 int scale, float_status *status) 1390{ 1391 memset(p, 0, sizeof(*p)); 1392 1393 if (a == 0) { 1394 p->cls = float_class_zero; 1395 } else { 1396 int shift = clz64(a); 1397 scale = MIN(MAX(scale, -0x10000), 0x10000); 1398 p->cls = float_class_normal; 1399 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1400 p->frac_hi = a << shift; 1401 } 1402} 1403 1404/* 1405 * Float min/max. 1406 */ 1407static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1408 float_status *s, int flags) 1409{ 1410 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1411 int a_exp, b_exp, cmp; 1412 1413 if (unlikely(ab_mask & float_cmask_anynan)) { 1414 /* 1415 * For minNum/maxNum (IEEE 754-2008) 1416 * or minimumNumber/maximumNumber (IEEE 754-2019), 1417 * if one operand is a QNaN, and the other 1418 * operand is numerical, then return numerical argument. 1419 */ 1420 if ((flags & (minmax_isnum | minmax_isnumber)) 1421 && !(ab_mask & float_cmask_snan) 1422 && (ab_mask & ~float_cmask_qnan)) { 1423 return is_nan(a->cls) ? b : a; 1424 } 1425 1426 /* 1427 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1428 * are removed and replaced with minimum, minimumNumber, maximum 1429 * and maximumNumber. 1430 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1431 * If both operands are NaNs, a QNaN is returned. 1432 * If either operand is a SNaN, 1433 * an invalid operation exception is signaled, 1434 * but unless both operands are NaNs, 1435 * the SNaN is otherwise ignored and not converted to a QNaN. 1436 */ 1437 if ((flags & minmax_isnumber) 1438 && (ab_mask & float_cmask_snan) 1439 && (ab_mask & ~float_cmask_anynan)) { 1440 float_raise(float_flag_invalid, s); 1441 return is_nan(a->cls) ? b : a; 1442 } 1443 1444 return parts_pick_nan(a, b, s); 1445 } 1446 1447 a_exp = a->exp; 1448 b_exp = b->exp; 1449 1450 if (unlikely(ab_mask != float_cmask_normal)) { 1451 switch (a->cls) { 1452 case float_class_normal: 1453 break; 1454 case float_class_inf: 1455 a_exp = INT16_MAX; 1456 break; 1457 case float_class_zero: 1458 a_exp = INT16_MIN; 1459 break; 1460 default: 1461 g_assert_not_reached(); 1462 } 1463 switch (b->cls) { 1464 case float_class_normal: 1465 break; 1466 case float_class_inf: 1467 b_exp = INT16_MAX; 1468 break; 1469 case float_class_zero: 1470 b_exp = INT16_MIN; 1471 break; 1472 default: 1473 g_assert_not_reached(); 1474 } 1475 } 1476 1477 /* Compare magnitudes. */ 1478 cmp = a_exp - b_exp; 1479 if (cmp == 0) { 1480 cmp = frac_cmp(a, b); 1481 } 1482 1483 /* 1484 * Take the sign into account. 1485 * For ismag, only do this if the magnitudes are equal. 1486 */ 1487 if (!(flags & minmax_ismag) || cmp == 0) { 1488 if (a->sign != b->sign) { 1489 /* For differing signs, the negative operand is less. */ 1490 cmp = a->sign ? -1 : 1; 1491 } else if (a->sign) { 1492 /* For two negative operands, invert the magnitude comparison. */ 1493 cmp = -cmp; 1494 } 1495 } 1496 1497 if (flags & minmax_ismin) { 1498 cmp = -cmp; 1499 } 1500 return cmp < 0 ? b : a; 1501} 1502 1503/* 1504 * Floating point compare 1505 */ 1506static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1507 float_status *s, bool is_quiet) 1508{ 1509 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1510 1511 if (likely(ab_mask == float_cmask_normal)) { 1512 FloatRelation cmp; 1513 1514 if (a->sign != b->sign) { 1515 goto a_sign; 1516 } 1517 if (a->exp == b->exp) { 1518 cmp = frac_cmp(a, b); 1519 } else if (a->exp < b->exp) { 1520 cmp = float_relation_less; 1521 } else { 1522 cmp = float_relation_greater; 1523 } 1524 if (a->sign) { 1525 cmp = -cmp; 1526 } 1527 return cmp; 1528 } 1529 1530 if (unlikely(ab_mask & float_cmask_anynan)) { 1531 if (ab_mask & float_cmask_snan) { 1532 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1533 } else if (!is_quiet) { 1534 float_raise(float_flag_invalid, s); 1535 } 1536 return float_relation_unordered; 1537 } 1538 1539 if (ab_mask & float_cmask_zero) { 1540 if (ab_mask == float_cmask_zero) { 1541 return float_relation_equal; 1542 } else if (a->cls == float_class_zero) { 1543 goto b_sign; 1544 } else { 1545 goto a_sign; 1546 } 1547 } 1548 1549 if (ab_mask == float_cmask_inf) { 1550 if (a->sign == b->sign) { 1551 return float_relation_equal; 1552 } 1553 } else if (b->cls == float_class_inf) { 1554 goto b_sign; 1555 } else { 1556 g_assert(a->cls == float_class_inf); 1557 } 1558 1559 a_sign: 1560 return a->sign ? float_relation_less : float_relation_greater; 1561 b_sign: 1562 return b->sign ? float_relation_greater : float_relation_less; 1563} 1564 1565/* 1566 * Multiply A by 2 raised to the power N. 1567 */ 1568static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1569{ 1570 switch (a->cls) { 1571 case float_class_snan: 1572 case float_class_qnan: 1573 parts_return_nan(a, s); 1574 break; 1575 case float_class_zero: 1576 case float_class_inf: 1577 break; 1578 case float_class_normal: 1579 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1580 break; 1581 default: 1582 g_assert_not_reached(); 1583 } 1584} 1585 1586/* 1587 * Return log2(A) 1588 */ 1589static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1590{ 1591 uint64_t a0, a1, r, t, ign; 1592 FloatPartsN f; 1593 int i, n, a_exp, f_exp; 1594 1595 if (unlikely(a->cls != float_class_normal)) { 1596 switch (a->cls) { 1597 case float_class_snan: 1598 case float_class_qnan: 1599 parts_return_nan(a, s); 1600 return; 1601 case float_class_zero: 1602 float_raise(float_flag_divbyzero, s); 1603 /* log2(0) = -inf */ 1604 a->cls = float_class_inf; 1605 a->sign = 1; 1606 return; 1607 case float_class_inf: 1608 if (unlikely(a->sign)) { 1609 goto d_nan; 1610 } 1611 return; 1612 default: 1613 break; 1614 } 1615 g_assert_not_reached(); 1616 } 1617 if (unlikely(a->sign)) { 1618 goto d_nan; 1619 } 1620 1621 /* TODO: This algorithm looses bits too quickly for float128. */ 1622 g_assert(N == 64); 1623 1624 a_exp = a->exp; 1625 f_exp = -1; 1626 1627 r = 0; 1628 t = DECOMPOSED_IMPLICIT_BIT; 1629 a0 = a->frac_hi; 1630 a1 = 0; 1631 1632 n = fmt->frac_size + 2; 1633 if (unlikely(a_exp == -1)) { 1634 /* 1635 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1636 * When the value is very close to 1.0, there are lots of 1's in 1637 * the msb parts of the fraction. At the end, when we subtract 1638 * this value from -1.0, we can see a catastrophic loss of precision, 1639 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1640 * bits of y in the final result. To minimize this, compute as many 1641 * digits as we can. 1642 * ??? This case needs another algorithm to avoid this. 1643 */ 1644 n = fmt->frac_size * 2 + 2; 1645 /* Don't compute a value overlapping the sticky bit */ 1646 n = MIN(n, 62); 1647 } 1648 1649 for (i = 0; i < n; i++) { 1650 if (a1) { 1651 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1652 } else if (a0 & 0xffffffffull) { 1653 mul64To128(a0, a0, &a0, &a1); 1654 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1655 a0 >>= 32; 1656 a0 *= a0; 1657 } else { 1658 goto exact; 1659 } 1660 1661 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1662 if (unlikely(a_exp == 0 && r == 0)) { 1663 /* 1664 * When a_exp == 0, we're computing the log2 of a value 1665 * [1.0,2.0). When the value is very close to 1.0, there 1666 * are lots of 0's in the msb parts of the fraction. 1667 * We need to compute more digits to produce a correct 1668 * result -- restart at the top of the fraction. 1669 * ??? This is likely to lose precision quickly, as for 1670 * float128; we may need another method. 1671 */ 1672 f_exp -= i; 1673 t = r = DECOMPOSED_IMPLICIT_BIT; 1674 i = 0; 1675 } else { 1676 r |= t; 1677 } 1678 } else { 1679 add128(a0, a1, a0, a1, &a0, &a1); 1680 } 1681 t >>= 1; 1682 } 1683 1684 /* Set sticky for inexact. */ 1685 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1686 1687 exact: 1688 parts_sint_to_float(a, a_exp, 0, s); 1689 if (r == 0) { 1690 return; 1691 } 1692 1693 memset(&f, 0, sizeof(f)); 1694 f.cls = float_class_normal; 1695 f.frac_hi = r; 1696 f.exp = f_exp - frac_normalize(&f); 1697 1698 if (a_exp < 0) { 1699 parts_sub_normal(a, &f); 1700 } else if (a_exp > 0) { 1701 parts_add_normal(a, &f); 1702 } else { 1703 *a = f; 1704 } 1705 return; 1706 1707 d_nan: 1708 float_raise(float_flag_invalid, s); 1709 parts_default_nan(a, s); 1710} 1711