1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 bool have_snan = false; 43 FloatPartsN *ret; 44 int cmp; 45 46 if (is_snan(a->cls) || is_snan(b->cls)) { 47 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 48 have_snan = true; 49 } 50 51 if (s->default_nan_mode) { 52 parts_default_nan(a, s); 53 return a; 54 } 55 56 switch (s->float_2nan_prop_rule) { 57 case float_2nan_prop_s_ab: 58 if (have_snan) { 59 ret = is_snan(a->cls) ? a : b; 60 break; 61 } 62 /* fall through */ 63 case float_2nan_prop_ab: 64 ret = is_nan(a->cls) ? a : b; 65 break; 66 case float_2nan_prop_s_ba: 67 if (have_snan) { 68 ret = is_snan(b->cls) ? b : a; 69 break; 70 } 71 /* fall through */ 72 case float_2nan_prop_ba: 73 ret = is_nan(b->cls) ? b : a; 74 break; 75 case float_2nan_prop_x87: 76 /* 77 * This implements x87 NaN propagation rules: 78 * SNaN + QNaN => return the QNaN 79 * two SNaNs => return the one with the larger significand, silenced 80 * two QNaNs => return the one with the larger significand 81 * SNaN and a non-NaN => return the SNaN, silenced 82 * QNaN and a non-NaN => return the QNaN 83 * 84 * If we get down to comparing significands and they are the same, 85 * return the NaN with the positive sign bit (if any). 86 */ 87 if (is_snan(a->cls)) { 88 if (!is_snan(b->cls)) { 89 ret = is_qnan(b->cls) ? b : a; 90 break; 91 } 92 } else if (is_qnan(a->cls)) { 93 if (is_snan(b->cls) || !is_qnan(b->cls)) { 94 ret = a; 95 break; 96 } 97 } else { 98 ret = b; 99 break; 100 } 101 cmp = frac_cmp(a, b); 102 if (cmp == 0) { 103 cmp = a->sign < b->sign; 104 } 105 ret = cmp > 0 ? a : b; 106 break; 107 default: 108 g_assert_not_reached(); 109 } 110 111 if (is_snan(ret->cls)) { 112 parts_silence_nan(ret, s); 113 } 114 return ret; 115} 116 117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 118 FloatPartsN *c, float_status *s, 119 int ab_mask, int abc_mask) 120{ 121 bool infzero = (ab_mask == float_cmask_infzero); 122 bool have_snan = (abc_mask & float_cmask_snan); 123 FloatPartsN *ret; 124 125 if (unlikely(have_snan)) { 126 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 127 } 128 129 if (infzero && 130 !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) { 131 /* This is (0 * inf) + NaN or (inf * 0) + NaN */ 132 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 133 } 134 135 if (s->default_nan_mode) { 136 /* 137 * We guarantee not to require the target to tell us how to 138 * pick a NaN if we're always returning the default NaN. 139 * But if we're not in default-NaN mode then the target must 140 * specify. 141 */ 142 goto default_nan; 143 } else if (infzero) { 144 /* 145 * Inf * 0 + NaN -- some implementations return the 146 * default NaN here, and some return the input NaN. 147 */ 148 switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) { 149 case float_infzeronan_dnan_never: 150 break; 151 case float_infzeronan_dnan_always: 152 goto default_nan; 153 case float_infzeronan_dnan_if_qnan: 154 if (is_qnan(c->cls)) { 155 goto default_nan; 156 } 157 break; 158 default: 159 g_assert_not_reached(); 160 } 161 ret = c; 162 } else { 163 FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c }; 164 Float3NaNPropRule rule = s->float_3nan_prop_rule; 165 166 assert(rule != float_3nan_prop_none); 167 if (have_snan && (rule & R_3NAN_SNAN_MASK)) { 168 /* We have at least one SNaN input and should prefer it */ 169 do { 170 ret = val[rule & R_3NAN_1ST_MASK]; 171 rule >>= R_3NAN_1ST_LENGTH; 172 } while (!is_snan(ret->cls)); 173 } else { 174 do { 175 ret = val[rule & R_3NAN_1ST_MASK]; 176 rule >>= R_3NAN_1ST_LENGTH; 177 } while (!is_nan(ret->cls)); 178 } 179 } 180 181 if (is_snan(ret->cls)) { 182 parts_silence_nan(ret, s); 183 } 184 return ret; 185 186 default_nan: 187 parts_default_nan(a, s); 188 return a; 189} 190 191/* 192 * Canonicalize the FloatParts structure. Determine the class, 193 * unbias the exponent, and normalize the fraction. 194 */ 195static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 196 const FloatFmt *fmt) 197{ 198 /* 199 * It's target-dependent how to handle the case of exponent 0 200 * and Integer bit set. Intel calls these "pseudodenormals", 201 * and treats them as if the integer bit was 0, and never 202 * produces them on output. This is the default behaviour for QEMU. 203 * For m68k, the integer bit is considered validly part of the 204 * input value when the exponent is 0, and may be 0 or 1, 205 * giving extra range. They may also be generated as outputs. 206 * (The m68k manual actually calls these values part of the 207 * normalized number range, not the denormalized number range, 208 * but that distinction is not important for us, because 209 * m68k doesn't care about the input_denormal_used status flag.) 210 * floatx80_pseudo_denormal_valid selects the m68k behaviour, 211 * which changes both how we canonicalize such a value and 212 * how we uncanonicalize results. 213 */ 214 bool has_pseudo_denormals = fmt->has_explicit_bit && 215 (status->floatx80_behaviour & floatx80_pseudo_denormal_valid); 216 217 if (unlikely(p->exp == 0)) { 218 if (likely(frac_eqz(p))) { 219 p->cls = float_class_zero; 220 } else if (status->flush_inputs_to_zero) { 221 float_raise(float_flag_input_denormal_flushed, status); 222 p->cls = float_class_zero; 223 frac_clear(p); 224 } else { 225 int shift = frac_normalize(p); 226 p->cls = float_class_denormal; 227 p->exp = fmt->frac_shift - fmt->exp_bias 228 - shift + !has_pseudo_denormals; 229 } 230 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 231 p->cls = float_class_normal; 232 p->exp -= fmt->exp_bias; 233 frac_shl(p, fmt->frac_shift); 234 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 235 } else if (likely(frac_eqz(p))) { 236 p->cls = float_class_inf; 237 } else { 238 frac_shl(p, fmt->frac_shift); 239 p->cls = (parts_is_snan_frac(p->frac_hi, status) 240 ? float_class_snan : float_class_qnan); 241 } 242} 243 244/* 245 * Round and uncanonicalize a floating-point number by parts. There 246 * are FRAC_SHIFT bits that may require rounding at the bottom of the 247 * fraction; these bits will be removed. The exponent will be biased 248 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 249 */ 250static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 251 const FloatFmt *fmt) 252{ 253 const int exp_max = fmt->exp_max; 254 const int frac_shift = fmt->frac_shift; 255 const uint64_t round_mask = fmt->round_mask; 256 const uint64_t frac_lsb = round_mask + 1; 257 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 258 const uint64_t roundeven_mask = round_mask | frac_lsb; 259 uint64_t inc; 260 bool overflow_norm = false; 261 int exp, flags = 0; 262 263 switch (s->float_rounding_mode) { 264 case float_round_nearest_even_max: 265 overflow_norm = true; 266 /* fall through */ 267 case float_round_nearest_even: 268 if (N > 64 && frac_lsb == 0) { 269 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 270 ? frac_lsbm1 : 0); 271 } else { 272 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 273 ? frac_lsbm1 : 0); 274 } 275 break; 276 case float_round_ties_away: 277 inc = frac_lsbm1; 278 break; 279 case float_round_to_zero: 280 overflow_norm = true; 281 inc = 0; 282 break; 283 case float_round_up: 284 inc = p->sign ? 0 : round_mask; 285 overflow_norm = p->sign; 286 break; 287 case float_round_down: 288 inc = p->sign ? round_mask : 0; 289 overflow_norm = !p->sign; 290 break; 291 case float_round_to_odd: 292 overflow_norm = true; 293 /* fall through */ 294 case float_round_to_odd_inf: 295 if (N > 64 && frac_lsb == 0) { 296 inc = p->frac_hi & 1 ? 0 : round_mask; 297 } else { 298 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 299 } 300 break; 301 default: 302 g_assert_not_reached(); 303 } 304 305 exp = p->exp + fmt->exp_bias; 306 if (likely(exp > 0)) { 307 if (p->frac_lo & round_mask) { 308 flags |= float_flag_inexact; 309 if (frac_addi(p, p, inc)) { 310 frac_shr(p, 1); 311 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 312 exp++; 313 } 314 p->frac_lo &= ~round_mask; 315 } 316 317 if (fmt->arm_althp) { 318 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 319 if (unlikely(exp > exp_max)) { 320 /* Overflow. Return the maximum normal. */ 321 flags = float_flag_invalid; 322 exp = exp_max; 323 frac_allones(p); 324 p->frac_lo &= ~round_mask; 325 } 326 } else if (unlikely(exp >= exp_max)) { 327 flags |= float_flag_overflow; 328 if (s->rebias_overflow) { 329 exp -= fmt->exp_re_bias; 330 } else if (overflow_norm) { 331 flags |= float_flag_inexact; 332 exp = exp_max - 1; 333 frac_allones(p); 334 p->frac_lo &= ~round_mask; 335 } else { 336 flags |= float_flag_inexact; 337 p->cls = float_class_inf; 338 exp = exp_max; 339 frac_clear(p); 340 } 341 } 342 frac_shr(p, frac_shift); 343 } else if (unlikely(s->rebias_underflow)) { 344 flags |= float_flag_underflow; 345 exp += fmt->exp_re_bias; 346 if (p->frac_lo & round_mask) { 347 flags |= float_flag_inexact; 348 if (frac_addi(p, p, inc)) { 349 frac_shr(p, 1); 350 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 351 exp++; 352 } 353 p->frac_lo &= ~round_mask; 354 } 355 frac_shr(p, frac_shift); 356 } else if (s->flush_to_zero && 357 s->ftz_detection == float_ftz_before_rounding) { 358 flags |= float_flag_output_denormal_flushed; 359 p->cls = float_class_zero; 360 exp = 0; 361 frac_clear(p); 362 } else { 363 bool is_tiny = s->tininess_before_rounding || exp < 0; 364 bool has_pseudo_denormals = fmt->has_explicit_bit && 365 (s->floatx80_behaviour & floatx80_pseudo_denormal_valid); 366 367 if (!is_tiny) { 368 FloatPartsN discard; 369 is_tiny = !frac_addi(&discard, p, inc); 370 } 371 372 frac_shrjam(p, !has_pseudo_denormals - exp); 373 374 if (p->frac_lo & round_mask) { 375 /* Need to recompute round-to-even/round-to-odd. */ 376 switch (s->float_rounding_mode) { 377 case float_round_nearest_even: 378 if (N > 64 && frac_lsb == 0) { 379 inc = ((p->frac_hi & 1) || 380 (p->frac_lo & round_mask) != frac_lsbm1 381 ? frac_lsbm1 : 0); 382 } else { 383 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 384 ? frac_lsbm1 : 0); 385 } 386 break; 387 case float_round_to_odd: 388 case float_round_to_odd_inf: 389 if (N > 64 && frac_lsb == 0) { 390 inc = p->frac_hi & 1 ? 0 : round_mask; 391 } else { 392 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 393 } 394 break; 395 default: 396 break; 397 } 398 flags |= float_flag_inexact; 399 frac_addi(p, p, inc); 400 p->frac_lo &= ~round_mask; 401 } 402 403 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !has_pseudo_denormals; 404 frac_shr(p, frac_shift); 405 406 if (is_tiny) { 407 if (s->flush_to_zero) { 408 assert(s->ftz_detection == float_ftz_after_rounding); 409 flags |= float_flag_output_denormal_flushed; 410 p->cls = float_class_zero; 411 exp = 0; 412 frac_clear(p); 413 } else if (flags & float_flag_inexact) { 414 flags |= float_flag_underflow; 415 } 416 if (exp == 0 && frac_eqz(p)) { 417 p->cls = float_class_zero; 418 } 419 } 420 } 421 p->exp = exp; 422 float_raise(flags, s); 423} 424 425static void partsN(uncanon)(FloatPartsN *p, float_status *s, 426 const FloatFmt *fmt) 427{ 428 if (likely(is_anynorm(p->cls))) { 429 parts_uncanon_normal(p, s, fmt); 430 } else { 431 switch (p->cls) { 432 case float_class_zero: 433 p->exp = 0; 434 frac_clear(p); 435 return; 436 case float_class_inf: 437 g_assert(!fmt->arm_althp); 438 p->exp = fmt->exp_max; 439 frac_clear(p); 440 return; 441 case float_class_qnan: 442 case float_class_snan: 443 g_assert(!fmt->arm_althp); 444 p->exp = fmt->exp_max; 445 frac_shr(p, fmt->frac_shift); 446 return; 447 default: 448 break; 449 } 450 g_assert_not_reached(); 451 } 452} 453 454/* 455 * Returns the result of adding or subtracting the values of the 456 * floating-point values `a' and `b'. The operation is performed 457 * according to the IEC/IEEE Standard for Binary Floating-Point 458 * Arithmetic. 459 */ 460static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 461 float_status *s, bool subtract) 462{ 463 bool b_sign = b->sign ^ subtract; 464 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 465 466 /* 467 * For addition and subtraction, we will consume an 468 * input denormal unless the other input is a NaN. 469 */ 470 if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == 471 float_cmask_denormal) { 472 float_raise(float_flag_input_denormal_used, s); 473 } 474 475 if (a->sign != b_sign) { 476 /* Subtraction */ 477 if (likely(cmask_is_only_normals(ab_mask))) { 478 if (parts_sub_normal(a, b)) { 479 return a; 480 } 481 /* Subtract was exact, fall through to set sign. */ 482 ab_mask = float_cmask_zero; 483 } 484 485 if (ab_mask == float_cmask_zero) { 486 a->sign = s->float_rounding_mode == float_round_down; 487 return a; 488 } 489 490 if (unlikely(ab_mask & float_cmask_anynan)) { 491 goto p_nan; 492 } 493 494 if (ab_mask & float_cmask_inf) { 495 if (a->cls != float_class_inf) { 496 /* N - Inf */ 497 goto return_b; 498 } 499 if (b->cls != float_class_inf) { 500 /* Inf - N */ 501 return a; 502 } 503 /* Inf - Inf */ 504 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 505 parts_default_nan(a, s); 506 return a; 507 } 508 } else { 509 /* Addition */ 510 if (likely(cmask_is_only_normals(ab_mask))) { 511 parts_add_normal(a, b); 512 return a; 513 } 514 515 if (ab_mask == float_cmask_zero) { 516 return a; 517 } 518 519 if (unlikely(ab_mask & float_cmask_anynan)) { 520 goto p_nan; 521 } 522 523 if (ab_mask & float_cmask_inf) { 524 a->cls = float_class_inf; 525 return a; 526 } 527 } 528 529 if (b->cls == float_class_zero) { 530 g_assert(is_anynorm(a->cls)); 531 return a; 532 } 533 534 g_assert(a->cls == float_class_zero); 535 g_assert(is_anynorm(b->cls)); 536 return_b: 537 b->sign = b_sign; 538 return b; 539 540 p_nan: 541 return parts_pick_nan(a, b, s); 542} 543 544/* 545 * Returns the result of multiplying the floating-point values `a' and 546 * `b'. The operation is performed according to the IEC/IEEE Standard 547 * for Binary Floating-Point Arithmetic. 548 */ 549static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 550 float_status *s) 551{ 552 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 553 bool sign = a->sign ^ b->sign; 554 555 if (likely(cmask_is_only_normals(ab_mask))) { 556 FloatPartsW tmp; 557 558 if (ab_mask & float_cmask_denormal) { 559 float_raise(float_flag_input_denormal_used, s); 560 } 561 562 frac_mulw(&tmp, a, b); 563 frac_truncjam(a, &tmp); 564 565 a->exp += b->exp + 1; 566 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 567 frac_add(a, a, a); 568 a->exp -= 1; 569 } 570 571 a->sign = sign; 572 return a; 573 } 574 575 /* Inf * Zero == NaN */ 576 if (unlikely(ab_mask == float_cmask_infzero)) { 577 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 578 parts_default_nan(a, s); 579 return a; 580 } 581 582 if (unlikely(ab_mask & float_cmask_anynan)) { 583 return parts_pick_nan(a, b, s); 584 } 585 586 /* Multiply by 0 or Inf */ 587 if (ab_mask & float_cmask_denormal) { 588 float_raise(float_flag_input_denormal_used, s); 589 } 590 591 if (ab_mask & float_cmask_inf) { 592 a->cls = float_class_inf; 593 a->sign = sign; 594 return a; 595 } 596 597 g_assert(ab_mask & float_cmask_zero); 598 a->cls = float_class_zero; 599 a->sign = sign; 600 return a; 601} 602 603/* 604 * Returns the result of multiplying the floating-point values `a' and 605 * `b' then adding 'c', with no intermediate rounding step after the 606 * multiplication. The operation is performed according to the 607 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 608 * The flags argument allows the caller to select negation of the 609 * addend, the intermediate product, or the final result. (The 610 * difference between this and having the caller do a separate 611 * negation is that negating externally will flip the sign bit on NaNs.) 612 * 613 * Requires A and C extracted into a double-sized structure to provide the 614 * extra space for the widening multiply. 615 */ 616static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, 617 FloatPartsN *c, int scale, 618 int flags, float_status *s) 619{ 620 int ab_mask, abc_mask; 621 FloatPartsW p_widen, c_widen; 622 623 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 624 abc_mask = float_cmask(c->cls) | ab_mask; 625 626 /* 627 * It is implementation-defined whether the cases of (0,inf,qnan) 628 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 629 * they return if they do), so we have to hand this information 630 * off to the target-specific pick-a-NaN routine. 631 */ 632 if (unlikely(abc_mask & float_cmask_anynan)) { 633 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 634 } 635 636 if (flags & float_muladd_negate_c) { 637 c->sign ^= 1; 638 } 639 640 /* Compute the sign of the product into A. */ 641 a->sign ^= b->sign; 642 if (flags & float_muladd_negate_product) { 643 a->sign ^= 1; 644 } 645 646 if (unlikely(!cmask_is_only_normals(ab_mask))) { 647 if (unlikely(ab_mask == float_cmask_infzero)) { 648 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 649 goto d_nan; 650 } 651 652 if (ab_mask & float_cmask_inf) { 653 if (c->cls == float_class_inf && a->sign != c->sign) { 654 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 655 goto d_nan; 656 } 657 goto return_inf; 658 } 659 660 g_assert(ab_mask & float_cmask_zero); 661 if (is_anynorm(c->cls)) { 662 *a = *c; 663 goto return_normal; 664 } 665 if (c->cls == float_class_zero) { 666 if (flags & float_muladd_suppress_add_product_zero) { 667 a->sign = c->sign; 668 } else if (a->sign != c->sign) { 669 goto return_sub_zero; 670 } 671 goto return_zero; 672 } 673 g_assert(c->cls == float_class_inf); 674 } 675 676 if (unlikely(c->cls == float_class_inf)) { 677 a->sign = c->sign; 678 goto return_inf; 679 } 680 681 /* Perform the multiplication step. */ 682 p_widen.sign = a->sign; 683 p_widen.exp = a->exp + b->exp + 1; 684 frac_mulw(&p_widen, a, b); 685 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 686 frac_add(&p_widen, &p_widen, &p_widen); 687 p_widen.exp -= 1; 688 } 689 690 /* Perform the addition step. */ 691 if (c->cls != float_class_zero) { 692 /* Zero-extend C to less significant bits. */ 693 frac_widen(&c_widen, c); 694 c_widen.exp = c->exp; 695 696 if (a->sign == c->sign) { 697 parts_add_normal(&p_widen, &c_widen); 698 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 699 goto return_sub_zero; 700 } 701 } 702 703 /* Narrow with sticky bit, for proper rounding later. */ 704 frac_truncjam(a, &p_widen); 705 a->sign = p_widen.sign; 706 a->exp = p_widen.exp; 707 708 return_normal: 709 a->exp += scale; 710 finish_sign: 711 /* 712 * All result types except for "return the default NaN 713 * because this is an Invalid Operation" go through here; 714 * this matches the set of cases where we consumed a 715 * denormal input. 716 */ 717 if (abc_mask & float_cmask_denormal) { 718 float_raise(float_flag_input_denormal_used, s); 719 } 720 return a; 721 722 return_sub_zero: 723 a->sign = s->float_rounding_mode == float_round_down; 724 return_zero: 725 a->cls = float_class_zero; 726 goto finish_sign; 727 728 return_inf: 729 a->cls = float_class_inf; 730 goto finish_sign; 731 732 d_nan: 733 parts_default_nan(a, s); 734 return a; 735} 736 737/* 738 * Returns the result of dividing the floating-point value `a' by the 739 * corresponding value `b'. The operation is performed according to 740 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 741 */ 742static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 743 float_status *s) 744{ 745 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 746 bool sign = a->sign ^ b->sign; 747 748 if (likely(cmask_is_only_normals(ab_mask))) { 749 if (ab_mask & float_cmask_denormal) { 750 float_raise(float_flag_input_denormal_used, s); 751 } 752 a->sign = sign; 753 a->exp -= b->exp + frac_div(a, b); 754 return a; 755 } 756 757 /* 0/0 or Inf/Inf => NaN */ 758 if (unlikely(ab_mask == float_cmask_zero)) { 759 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 760 goto d_nan; 761 } 762 if (unlikely(ab_mask == float_cmask_inf)) { 763 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 764 goto d_nan; 765 } 766 767 /* All the NaN cases */ 768 if (unlikely(ab_mask & float_cmask_anynan)) { 769 return parts_pick_nan(a, b, s); 770 } 771 772 if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { 773 float_raise(float_flag_input_denormal_used, s); 774 } 775 776 a->sign = sign; 777 778 /* Inf / X */ 779 if (a->cls == float_class_inf) { 780 return a; 781 } 782 783 /* 0 / X */ 784 if (a->cls == float_class_zero) { 785 return a; 786 } 787 788 /* X / Inf */ 789 if (b->cls == float_class_inf) { 790 a->cls = float_class_zero; 791 return a; 792 } 793 794 /* X / 0 => Inf */ 795 g_assert(b->cls == float_class_zero); 796 float_raise(float_flag_divbyzero, s); 797 a->cls = float_class_inf; 798 return a; 799 800 d_nan: 801 parts_default_nan(a, s); 802 return a; 803} 804 805/* 806 * Floating point remainder, per IEC/IEEE, or modulus. 807 */ 808static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 809 uint64_t *mod_quot, float_status *s) 810{ 811 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 812 813 if (likely(cmask_is_only_normals(ab_mask))) { 814 if (ab_mask & float_cmask_denormal) { 815 float_raise(float_flag_input_denormal_used, s); 816 } 817 frac_modrem(a, b, mod_quot); 818 return a; 819 } 820 821 if (mod_quot) { 822 *mod_quot = 0; 823 } 824 825 /* All the NaN cases */ 826 if (unlikely(ab_mask & float_cmask_anynan)) { 827 return parts_pick_nan(a, b, s); 828 } 829 830 /* Inf % N; N % 0 */ 831 if (a->cls == float_class_inf || b->cls == float_class_zero) { 832 float_raise(float_flag_invalid, s); 833 parts_default_nan(a, s); 834 return a; 835 } 836 837 if (ab_mask & float_cmask_denormal) { 838 float_raise(float_flag_input_denormal_used, s); 839 } 840 841 /* N % Inf; 0 % N */ 842 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 843 return a; 844} 845 846/* 847 * Square Root 848 * 849 * The base algorithm is lifted from 850 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 851 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 852 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 853 * and is thus MIT licenced. 854 */ 855static void partsN(sqrt)(FloatPartsN *a, float_status *status, 856 const FloatFmt *fmt) 857{ 858 const uint32_t three32 = 3u << 30; 859 const uint64_t three64 = 3ull << 62; 860 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 861 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 862 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 863 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 864 uint64_t discard; 865 bool exp_odd; 866 size_t index; 867 868 if (unlikely(a->cls != float_class_normal)) { 869 switch (a->cls) { 870 case float_class_denormal: 871 if (!a->sign) { 872 /* -ve denormal will be InvalidOperation */ 873 float_raise(float_flag_input_denormal_used, status); 874 } 875 break; 876 case float_class_snan: 877 case float_class_qnan: 878 parts_return_nan(a, status); 879 return; 880 case float_class_zero: 881 return; 882 case float_class_inf: 883 if (unlikely(a->sign)) { 884 goto d_nan; 885 } 886 return; 887 default: 888 g_assert_not_reached(); 889 } 890 } 891 892 if (unlikely(a->sign)) { 893 goto d_nan; 894 } 895 896 /* 897 * Argument reduction. 898 * x = 4^e frac; with integer e, and frac in [1, 4) 899 * m = frac fixed point at bit 62, since we're in base 4. 900 * If base-2 exponent is odd, exchange that for multiply by 2, 901 * which results in no shift. 902 */ 903 exp_odd = a->exp & 1; 904 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 905 if (!exp_odd) { 906 frac_shr(a, 1); 907 } 908 909 /* 910 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 911 * 912 * Initial estimate: 913 * 7-bit lookup table (1-bit exponent and 6-bit significand). 914 * 915 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 916 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 917 * a table lookup is faster and needs one less iteration. 918 * The 7-bit table gives |e| < 0x1.fdp-9. 919 * 920 * A Newton-Raphson iteration for r is 921 * s = m*r 922 * d = s*r 923 * u = 3 - d 924 * r = r*u/2 925 * 926 * Fixed point representations: 927 * m, s, d, u, three are all 2.30; r is 0.32 928 */ 929 m64 = a->frac_hi; 930 m32 = m64 >> 32; 931 932 r32 = rsqrt_tab[index] << 16; 933 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 934 935 s32 = ((uint64_t)m32 * r32) >> 32; 936 d32 = ((uint64_t)s32 * r32) >> 32; 937 u32 = three32 - d32; 938 939 if (N == 64) { 940 /* float64 or smaller */ 941 942 r32 = ((uint64_t)r32 * u32) >> 31; 943 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 944 945 s32 = ((uint64_t)m32 * r32) >> 32; 946 d32 = ((uint64_t)s32 * r32) >> 32; 947 u32 = three32 - d32; 948 949 if (fmt->frac_size <= 23) { 950 /* float32 or smaller */ 951 952 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 953 s32 = (s32 - 1) >> 6; /* 9.23 */ 954 /* s < sqrt(m) < s + 0x1.08p-23 */ 955 956 /* compute nearest rounded result to 2.23 bits */ 957 uint32_t d0 = (m32 << 16) - s32 * s32; 958 uint32_t d1 = s32 - d0; 959 uint32_t d2 = d1 + s32 + 1; 960 s32 += d1 >> 31; 961 a->frac_hi = (uint64_t)s32 << (64 - 25); 962 963 /* increment or decrement for inexact */ 964 if (d2 != 0) { 965 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 966 } 967 goto done; 968 } 969 970 /* float64 */ 971 972 r64 = (uint64_t)r32 * u32 * 2; 973 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 974 mul64To128(m64, r64, &s64, &discard); 975 mul64To128(s64, r64, &d64, &discard); 976 u64 = three64 - d64; 977 978 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 979 s64 = (s64 - 2) >> 9; /* 12.52 */ 980 981 /* Compute nearest rounded result */ 982 uint64_t d0 = (m64 << 42) - s64 * s64; 983 uint64_t d1 = s64 - d0; 984 uint64_t d2 = d1 + s64 + 1; 985 s64 += d1 >> 63; 986 a->frac_hi = s64 << (64 - 54); 987 988 /* increment or decrement for inexact */ 989 if (d2 != 0) { 990 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 991 } 992 goto done; 993 } 994 995 r64 = (uint64_t)r32 * u32 * 2; 996 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 997 998 mul64To128(m64, r64, &s64, &discard); 999 mul64To128(s64, r64, &d64, &discard); 1000 u64 = three64 - d64; 1001 mul64To128(u64, r64, &r64, &discard); 1002 r64 <<= 1; 1003 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 1004 1005 mul64To128(m64, r64, &s64, &discard); 1006 mul64To128(s64, r64, &d64, &discard); 1007 u64 = three64 - d64; 1008 mul64To128(u64, r64, &rh, &rl); 1009 add128(rh, rl, rh, rl, &rh, &rl); 1010 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 1011 1012 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 1013 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 1014 sub128(three64, 0, dh, dl, &uh, &ul); 1015 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 1016 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 1017 1018 sub128(sh, sl, 0, 4, &sh, &sl); 1019 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 1020 /* s < sqrt(m) < s + 1ulp */ 1021 1022 /* Compute nearest rounded result */ 1023 mul64To128(sl, sl, &d0h, &d0l); 1024 d0h += 2 * sh * sl; 1025 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 1026 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 1027 add128(sh, sl, 0, 1, &d2h, &d2l); 1028 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 1029 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 1030 shift128Left(sh, sl, 128 - 114, &sh, &sl); 1031 1032 /* increment or decrement for inexact */ 1033 if (d2h | d2l) { 1034 if ((int64_t)(d1h ^ d2h) < 0) { 1035 sub128(sh, sl, 0, 1, &sh, &sl); 1036 } else { 1037 add128(sh, sl, 0, 1, &sh, &sl); 1038 } 1039 } 1040 a->frac_lo = sl; 1041 a->frac_hi = sh; 1042 1043 done: 1044 /* Convert back from base 4 to base 2. */ 1045 a->exp >>= 1; 1046 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 1047 frac_add(a, a, a); 1048 } else { 1049 a->exp += 1; 1050 } 1051 return; 1052 1053 d_nan: 1054 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 1055 parts_default_nan(a, status); 1056} 1057 1058/* 1059 * Rounds the floating-point value `a' to an integer, and returns the 1060 * result as a floating-point value. The operation is performed 1061 * according to the IEC/IEEE Standard for Binary Floating-Point 1062 * Arithmetic. 1063 * 1064 * parts_round_to_int_normal is an internal helper function for 1065 * normal numbers only, returning true for inexact but not directly 1066 * raising float_flag_inexact. 1067 */ 1068static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 1069 int scale, int frac_size) 1070{ 1071 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 1072 int shift_adj; 1073 1074 scale = MIN(MAX(scale, -0x10000), 0x10000); 1075 a->exp += scale; 1076 1077 if (a->exp < 0) { 1078 bool one; 1079 1080 /* All fractional */ 1081 switch (rmode) { 1082 case float_round_nearest_even: 1083 one = false; 1084 if (a->exp == -1) { 1085 FloatPartsN tmp; 1086 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 1087 frac_add(&tmp, a, a); 1088 /* Anything remaining means frac > 0.5. */ 1089 one = !frac_eqz(&tmp); 1090 } 1091 break; 1092 case float_round_ties_away: 1093 one = a->exp == -1; 1094 break; 1095 case float_round_to_zero: 1096 one = false; 1097 break; 1098 case float_round_up: 1099 one = !a->sign; 1100 break; 1101 case float_round_down: 1102 one = a->sign; 1103 break; 1104 case float_round_to_odd: 1105 one = true; 1106 break; 1107 default: 1108 g_assert_not_reached(); 1109 } 1110 1111 frac_clear(a); 1112 a->exp = 0; 1113 if (one) { 1114 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 1115 } else { 1116 a->cls = float_class_zero; 1117 } 1118 return true; 1119 } 1120 1121 if (a->exp >= frac_size) { 1122 /* All integral */ 1123 return false; 1124 } 1125 1126 if (N > 64 && a->exp < N - 64) { 1127 /* 1128 * Rounding is not in the low word -- shift lsb to bit 2, 1129 * which leaves room for sticky and rounding bit. 1130 */ 1131 shift_adj = (N - 1) - (a->exp + 2); 1132 frac_shrjam(a, shift_adj); 1133 frac_lsb = 1 << 2; 1134 } else { 1135 shift_adj = 0; 1136 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 1137 } 1138 1139 frac_lsbm1 = frac_lsb >> 1; 1140 rnd_mask = frac_lsb - 1; 1141 rnd_even_mask = rnd_mask | frac_lsb; 1142 1143 if (!(a->frac_lo & rnd_mask)) { 1144 /* Fractional bits already clear, undo the shift above. */ 1145 frac_shl(a, shift_adj); 1146 return false; 1147 } 1148 1149 switch (rmode) { 1150 case float_round_nearest_even: 1151 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 1152 break; 1153 case float_round_ties_away: 1154 inc = frac_lsbm1; 1155 break; 1156 case float_round_to_zero: 1157 inc = 0; 1158 break; 1159 case float_round_up: 1160 inc = a->sign ? 0 : rnd_mask; 1161 break; 1162 case float_round_down: 1163 inc = a->sign ? rnd_mask : 0; 1164 break; 1165 case float_round_to_odd: 1166 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 1167 break; 1168 default: 1169 g_assert_not_reached(); 1170 } 1171 1172 if (shift_adj == 0) { 1173 if (frac_addi(a, a, inc)) { 1174 frac_shr(a, 1); 1175 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 1176 a->exp++; 1177 } 1178 a->frac_lo &= ~rnd_mask; 1179 } else { 1180 frac_addi(a, a, inc); 1181 a->frac_lo &= ~rnd_mask; 1182 /* Be careful shifting back, not to overflow */ 1183 frac_shl(a, shift_adj - 1); 1184 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1185 a->exp++; 1186 } else { 1187 frac_add(a, a, a); 1188 } 1189 } 1190 return true; 1191} 1192 1193static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1194 int scale, float_status *s, 1195 const FloatFmt *fmt) 1196{ 1197 switch (a->cls) { 1198 case float_class_qnan: 1199 case float_class_snan: 1200 parts_return_nan(a, s); 1201 break; 1202 case float_class_zero: 1203 case float_class_inf: 1204 break; 1205 case float_class_normal: 1206 case float_class_denormal: 1207 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1208 float_raise(float_flag_inexact, s); 1209 } 1210 break; 1211 default: 1212 g_assert_not_reached(); 1213 } 1214} 1215 1216/* 1217 * Returns the result of converting the floating-point value `a' to 1218 * the two's complement integer format. The conversion is performed 1219 * according to the IEC/IEEE Standard for Binary Floating-Point 1220 * Arithmetic---which means in particular that the conversion is 1221 * rounded according to the current rounding mode. If `a' is a NaN, 1222 * the largest positive integer is returned. Otherwise, if the 1223 * conversion overflows, the largest integer with the same sign as `a' 1224 * is returned. 1225 */ 1226static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1227 int scale, int64_t min, int64_t max, 1228 float_status *s) 1229{ 1230 int flags = 0; 1231 uint64_t r; 1232 1233 switch (p->cls) { 1234 case float_class_snan: 1235 flags |= float_flag_invalid_snan; 1236 /* fall through */ 1237 case float_class_qnan: 1238 flags |= float_flag_invalid; 1239 r = max; 1240 break; 1241 1242 case float_class_inf: 1243 flags = float_flag_invalid | float_flag_invalid_cvti; 1244 r = p->sign ? min : max; 1245 break; 1246 1247 case float_class_zero: 1248 return 0; 1249 1250 case float_class_normal: 1251 case float_class_denormal: 1252 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1253 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1254 flags = float_flag_inexact; 1255 } 1256 1257 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1258 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1259 } else { 1260 r = UINT64_MAX; 1261 } 1262 if (p->sign) { 1263 if (r <= -(uint64_t)min) { 1264 r = -r; 1265 } else { 1266 flags = float_flag_invalid | float_flag_invalid_cvti; 1267 r = min; 1268 } 1269 } else if (r > max) { 1270 flags = float_flag_invalid | float_flag_invalid_cvti; 1271 r = max; 1272 } 1273 break; 1274 1275 default: 1276 g_assert_not_reached(); 1277 } 1278 1279 float_raise(flags, s); 1280 return r; 1281} 1282 1283/* 1284 * Returns the result of converting the floating-point value `a' to 1285 * the unsigned integer format. The conversion is performed according 1286 * to the IEC/IEEE Standard for Binary Floating-Point 1287 * Arithmetic---which means in particular that the conversion is 1288 * rounded according to the current rounding mode. If `a' is a NaN, 1289 * the largest unsigned integer is returned. Otherwise, if the 1290 * conversion overflows, the largest unsigned integer is returned. If 1291 * the 'a' is negative, the result is rounded and zero is returned; 1292 * values that do not round to zero will raise the inexact exception 1293 * flag. 1294 */ 1295static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1296 int scale, uint64_t max, float_status *s) 1297{ 1298 int flags = 0; 1299 uint64_t r; 1300 1301 switch (p->cls) { 1302 case float_class_snan: 1303 flags |= float_flag_invalid_snan; 1304 /* fall through */ 1305 case float_class_qnan: 1306 flags |= float_flag_invalid; 1307 r = max; 1308 break; 1309 1310 case float_class_inf: 1311 flags = float_flag_invalid | float_flag_invalid_cvti; 1312 r = p->sign ? 0 : max; 1313 break; 1314 1315 case float_class_zero: 1316 return 0; 1317 1318 case float_class_normal: 1319 case float_class_denormal: 1320 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1321 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1322 flags = float_flag_inexact; 1323 if (p->cls == float_class_zero) { 1324 r = 0; 1325 break; 1326 } 1327 } 1328 1329 if (p->sign) { 1330 flags = float_flag_invalid | float_flag_invalid_cvti; 1331 r = 0; 1332 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1333 flags = float_flag_invalid | float_flag_invalid_cvti; 1334 r = max; 1335 } else { 1336 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1337 if (r > max) { 1338 flags = float_flag_invalid | float_flag_invalid_cvti; 1339 r = max; 1340 } 1341 } 1342 break; 1343 1344 default: 1345 g_assert_not_reached(); 1346 } 1347 1348 float_raise(flags, s); 1349 return r; 1350} 1351 1352/* 1353 * Like partsN(float_to_sint), except do not saturate the result. 1354 * Instead, return the rounded unbounded precision two's compliment result, 1355 * modulo 2**(bitsm1 + 1). 1356 */ 1357static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, 1358 FloatRoundMode rmode, 1359 int bitsm1, float_status *s) 1360{ 1361 int flags = 0; 1362 uint64_t r; 1363 bool overflow = false; 1364 1365 switch (p->cls) { 1366 case float_class_snan: 1367 flags |= float_flag_invalid_snan; 1368 /* fall through */ 1369 case float_class_qnan: 1370 flags |= float_flag_invalid; 1371 r = 0; 1372 break; 1373 1374 case float_class_inf: 1375 overflow = true; 1376 r = 0; 1377 break; 1378 1379 case float_class_zero: 1380 return 0; 1381 1382 case float_class_normal: 1383 case float_class_denormal: 1384 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1385 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { 1386 flags = float_flag_inexact; 1387 } 1388 1389 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1390 /* 1391 * Because we rounded to integral, and exp < 64, 1392 * we know frac_low is zero. 1393 */ 1394 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1395 if (p->exp < bitsm1) { 1396 /* Result in range. */ 1397 } else if (p->exp == bitsm1) { 1398 /* The only in-range value is INT_MIN. */ 1399 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; 1400 } else { 1401 overflow = true; 1402 } 1403 } else { 1404 /* Overflow, but there might still be bits to return. */ 1405 int shl = p->exp - DECOMPOSED_BINARY_POINT; 1406 if (shl < N) { 1407 frac_shl(p, shl); 1408 r = p->frac_hi; 1409 } else { 1410 r = 0; 1411 } 1412 overflow = true; 1413 } 1414 1415 if (p->sign) { 1416 r = -r; 1417 } 1418 break; 1419 1420 default: 1421 g_assert_not_reached(); 1422 } 1423 1424 if (overflow) { 1425 flags = float_flag_invalid | float_flag_invalid_cvti; 1426 } 1427 float_raise(flags, s); 1428 return r; 1429} 1430 1431/* 1432 * Integer to float conversions 1433 * 1434 * Returns the result of converting the two's complement integer `a' 1435 * to the floating-point format. The conversion is performed according 1436 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1437 */ 1438static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1439 int scale, float_status *s) 1440{ 1441 uint64_t f = a; 1442 int shift; 1443 1444 memset(p, 0, sizeof(*p)); 1445 1446 if (a == 0) { 1447 p->cls = float_class_zero; 1448 return; 1449 } 1450 1451 p->cls = float_class_normal; 1452 if (a < 0) { 1453 f = -f; 1454 p->sign = true; 1455 } 1456 shift = clz64(f); 1457 scale = MIN(MAX(scale, -0x10000), 0x10000); 1458 1459 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1460 p->frac_hi = f << shift; 1461} 1462 1463/* 1464 * Unsigned Integer to float conversions 1465 * 1466 * Returns the result of converting the unsigned integer `a' to the 1467 * floating-point format. The conversion is performed according to the 1468 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1469 */ 1470static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1471 int scale, float_status *status) 1472{ 1473 memset(p, 0, sizeof(*p)); 1474 1475 if (a == 0) { 1476 p->cls = float_class_zero; 1477 } else { 1478 int shift = clz64(a); 1479 scale = MIN(MAX(scale, -0x10000), 0x10000); 1480 p->cls = float_class_normal; 1481 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1482 p->frac_hi = a << shift; 1483 } 1484} 1485 1486/* 1487 * Float min/max. 1488 */ 1489static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1490 float_status *s, int flags) 1491{ 1492 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1493 int a_exp, b_exp, cmp; 1494 1495 if (unlikely(ab_mask & float_cmask_anynan)) { 1496 /* 1497 * For minNum/maxNum (IEEE 754-2008) 1498 * or minimumNumber/maximumNumber (IEEE 754-2019), 1499 * if one operand is a QNaN, and the other 1500 * operand is numerical, then return numerical argument. 1501 */ 1502 if ((flags & (minmax_isnum | minmax_isnumber)) 1503 && !(ab_mask & float_cmask_snan) 1504 && (ab_mask & ~float_cmask_qnan)) { 1505 if (ab_mask & float_cmask_denormal) { 1506 float_raise(float_flag_input_denormal_used, s); 1507 } 1508 return is_nan(a->cls) ? b : a; 1509 } 1510 1511 /* 1512 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1513 * are removed and replaced with minimum, minimumNumber, maximum 1514 * and maximumNumber. 1515 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1516 * If both operands are NaNs, a QNaN is returned. 1517 * If either operand is a SNaN, 1518 * an invalid operation exception is signaled, 1519 * but unless both operands are NaNs, 1520 * the SNaN is otherwise ignored and not converted to a QNaN. 1521 */ 1522 if ((flags & minmax_isnumber) 1523 && (ab_mask & float_cmask_snan) 1524 && (ab_mask & ~float_cmask_anynan)) { 1525 float_raise(float_flag_invalid, s); 1526 return is_nan(a->cls) ? b : a; 1527 } 1528 1529 return parts_pick_nan(a, b, s); 1530 } 1531 1532 if (ab_mask & float_cmask_denormal) { 1533 float_raise(float_flag_input_denormal_used, s); 1534 } 1535 1536 a_exp = a->exp; 1537 b_exp = b->exp; 1538 1539 if (unlikely(!cmask_is_only_normals(ab_mask))) { 1540 switch (a->cls) { 1541 case float_class_normal: 1542 case float_class_denormal: 1543 break; 1544 case float_class_inf: 1545 a_exp = INT16_MAX; 1546 break; 1547 case float_class_zero: 1548 a_exp = INT16_MIN; 1549 break; 1550 default: 1551 g_assert_not_reached(); 1552 } 1553 switch (b->cls) { 1554 case float_class_normal: 1555 case float_class_denormal: 1556 break; 1557 case float_class_inf: 1558 b_exp = INT16_MAX; 1559 break; 1560 case float_class_zero: 1561 b_exp = INT16_MIN; 1562 break; 1563 default: 1564 g_assert_not_reached(); 1565 } 1566 } 1567 1568 /* Compare magnitudes. */ 1569 cmp = a_exp - b_exp; 1570 if (cmp == 0) { 1571 cmp = frac_cmp(a, b); 1572 } 1573 1574 /* 1575 * Take the sign into account. 1576 * For ismag, only do this if the magnitudes are equal. 1577 */ 1578 if (!(flags & minmax_ismag) || cmp == 0) { 1579 if (a->sign != b->sign) { 1580 /* For differing signs, the negative operand is less. */ 1581 cmp = a->sign ? -1 : 1; 1582 } else if (a->sign) { 1583 /* For two negative operands, invert the magnitude comparison. */ 1584 cmp = -cmp; 1585 } 1586 } 1587 1588 if (flags & minmax_ismin) { 1589 cmp = -cmp; 1590 } 1591 return cmp < 0 ? b : a; 1592} 1593 1594/* 1595 * Floating point compare 1596 */ 1597static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1598 float_status *s, bool is_quiet) 1599{ 1600 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1601 1602 if (likely(cmask_is_only_normals(ab_mask))) { 1603 FloatRelation cmp; 1604 1605 if (ab_mask & float_cmask_denormal) { 1606 float_raise(float_flag_input_denormal_used, s); 1607 } 1608 1609 if (a->sign != b->sign) { 1610 goto a_sign; 1611 } 1612 if (a->exp == b->exp) { 1613 cmp = frac_cmp(a, b); 1614 } else if (a->exp < b->exp) { 1615 cmp = float_relation_less; 1616 } else { 1617 cmp = float_relation_greater; 1618 } 1619 if (a->sign) { 1620 cmp = -cmp; 1621 } 1622 return cmp; 1623 } 1624 1625 if (unlikely(ab_mask & float_cmask_anynan)) { 1626 if (ab_mask & float_cmask_snan) { 1627 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1628 } else if (!is_quiet) { 1629 float_raise(float_flag_invalid, s); 1630 } 1631 return float_relation_unordered; 1632 } 1633 1634 if (ab_mask & float_cmask_denormal) { 1635 float_raise(float_flag_input_denormal_used, s); 1636 } 1637 1638 if (ab_mask & float_cmask_zero) { 1639 if (ab_mask == float_cmask_zero) { 1640 return float_relation_equal; 1641 } else if (a->cls == float_class_zero) { 1642 goto b_sign; 1643 } else { 1644 goto a_sign; 1645 } 1646 } 1647 1648 if (ab_mask == float_cmask_inf) { 1649 if (a->sign == b->sign) { 1650 return float_relation_equal; 1651 } 1652 } else if (b->cls == float_class_inf) { 1653 goto b_sign; 1654 } else { 1655 g_assert(a->cls == float_class_inf); 1656 } 1657 1658 a_sign: 1659 return a->sign ? float_relation_less : float_relation_greater; 1660 b_sign: 1661 return b->sign ? float_relation_greater : float_relation_less; 1662} 1663 1664/* 1665 * Multiply A by 2 raised to the power N. 1666 */ 1667static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1668{ 1669 switch (a->cls) { 1670 case float_class_snan: 1671 case float_class_qnan: 1672 parts_return_nan(a, s); 1673 break; 1674 case float_class_zero: 1675 case float_class_inf: 1676 break; 1677 case float_class_denormal: 1678 float_raise(float_flag_input_denormal_used, s); 1679 /* fall through */ 1680 case float_class_normal: 1681 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1682 break; 1683 default: 1684 g_assert_not_reached(); 1685 } 1686} 1687 1688/* 1689 * Return log2(A) 1690 */ 1691static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1692{ 1693 uint64_t a0, a1, r, t, ign; 1694 FloatPartsN f; 1695 int i, n, a_exp, f_exp; 1696 1697 if (unlikely(a->cls != float_class_normal)) { 1698 switch (a->cls) { 1699 case float_class_denormal: 1700 if (!a->sign) { 1701 /* -ve denormal will be InvalidOperation */ 1702 float_raise(float_flag_input_denormal_used, s); 1703 } 1704 break; 1705 case float_class_snan: 1706 case float_class_qnan: 1707 parts_return_nan(a, s); 1708 return; 1709 case float_class_zero: 1710 float_raise(float_flag_divbyzero, s); 1711 /* log2(0) = -inf */ 1712 a->cls = float_class_inf; 1713 a->sign = 1; 1714 return; 1715 case float_class_inf: 1716 if (unlikely(a->sign)) { 1717 goto d_nan; 1718 } 1719 return; 1720 default: 1721 g_assert_not_reached(); 1722 } 1723 } 1724 if (unlikely(a->sign)) { 1725 goto d_nan; 1726 } 1727 1728 /* TODO: This algorithm looses bits too quickly for float128. */ 1729 g_assert(N == 64); 1730 1731 a_exp = a->exp; 1732 f_exp = -1; 1733 1734 r = 0; 1735 t = DECOMPOSED_IMPLICIT_BIT; 1736 a0 = a->frac_hi; 1737 a1 = 0; 1738 1739 n = fmt->frac_size + 2; 1740 if (unlikely(a_exp == -1)) { 1741 /* 1742 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1743 * When the value is very close to 1.0, there are lots of 1's in 1744 * the msb parts of the fraction. At the end, when we subtract 1745 * this value from -1.0, we can see a catastrophic loss of precision, 1746 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1747 * bits of y in the final result. To minimize this, compute as many 1748 * digits as we can. 1749 * ??? This case needs another algorithm to avoid this. 1750 */ 1751 n = fmt->frac_size * 2 + 2; 1752 /* Don't compute a value overlapping the sticky bit */ 1753 n = MIN(n, 62); 1754 } 1755 1756 for (i = 0; i < n; i++) { 1757 if (a1) { 1758 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1759 } else if (a0 & 0xffffffffull) { 1760 mul64To128(a0, a0, &a0, &a1); 1761 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1762 a0 >>= 32; 1763 a0 *= a0; 1764 } else { 1765 goto exact; 1766 } 1767 1768 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1769 if (unlikely(a_exp == 0 && r == 0)) { 1770 /* 1771 * When a_exp == 0, we're computing the log2 of a value 1772 * [1.0,2.0). When the value is very close to 1.0, there 1773 * are lots of 0's in the msb parts of the fraction. 1774 * We need to compute more digits to produce a correct 1775 * result -- restart at the top of the fraction. 1776 * ??? This is likely to lose precision quickly, as for 1777 * float128; we may need another method. 1778 */ 1779 f_exp -= i; 1780 t = r = DECOMPOSED_IMPLICIT_BIT; 1781 i = 0; 1782 } else { 1783 r |= t; 1784 } 1785 } else { 1786 add128(a0, a1, a0, a1, &a0, &a1); 1787 } 1788 t >>= 1; 1789 } 1790 1791 /* Set sticky for inexact. */ 1792 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1793 1794 exact: 1795 parts_sint_to_float(a, a_exp, 0, s); 1796 if (r == 0) { 1797 return; 1798 } 1799 1800 memset(&f, 0, sizeof(f)); 1801 f.cls = float_class_normal; 1802 f.frac_hi = r; 1803 f.exp = f_exp - frac_normalize(&f); 1804 1805 if (a_exp < 0) { 1806 parts_sub_normal(a, &f); 1807 } else if (a_exp > 0) { 1808 parts_add_normal(a, &f); 1809 } else { 1810 *a = f; 1811 } 1812 return; 1813 1814 d_nan: 1815 float_raise(float_flag_invalid, s); 1816 parts_default_nan(a, s); 1817} 1818