1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 bool have_snan = false; 43 FloatPartsN *ret; 44 int cmp; 45 46 if (is_snan(a->cls) || is_snan(b->cls)) { 47 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 48 have_snan = true; 49 } 50 51 if (s->default_nan_mode) { 52 parts_default_nan(a, s); 53 return a; 54 } 55 56 switch (s->float_2nan_prop_rule) { 57 case float_2nan_prop_s_ab: 58 if (have_snan) { 59 ret = is_snan(a->cls) ? a : b; 60 break; 61 } 62 /* fall through */ 63 case float_2nan_prop_ab: 64 ret = is_nan(a->cls) ? a : b; 65 break; 66 case float_2nan_prop_s_ba: 67 if (have_snan) { 68 ret = is_snan(b->cls) ? b : a; 69 break; 70 } 71 /* fall through */ 72 case float_2nan_prop_ba: 73 ret = is_nan(b->cls) ? b : a; 74 break; 75 case float_2nan_prop_x87: 76 /* 77 * This implements x87 NaN propagation rules: 78 * SNaN + QNaN => return the QNaN 79 * two SNaNs => return the one with the larger significand, silenced 80 * two QNaNs => return the one with the larger significand 81 * SNaN and a non-NaN => return the SNaN, silenced 82 * QNaN and a non-NaN => return the QNaN 83 * 84 * If we get down to comparing significands and they are the same, 85 * return the NaN with the positive sign bit (if any). 86 */ 87 if (is_snan(a->cls)) { 88 if (!is_snan(b->cls)) { 89 ret = is_qnan(b->cls) ? b : a; 90 break; 91 } 92 } else if (is_qnan(a->cls)) { 93 if (is_snan(b->cls) || !is_qnan(b->cls)) { 94 ret = a; 95 break; 96 } 97 } else { 98 ret = b; 99 break; 100 } 101 cmp = frac_cmp(a, b); 102 if (cmp == 0) { 103 cmp = a->sign < b->sign; 104 } 105 ret = cmp > 0 ? a : b; 106 break; 107 default: 108 g_assert_not_reached(); 109 } 110 111 if (is_snan(ret->cls)) { 112 parts_silence_nan(ret, s); 113 } 114 return ret; 115} 116 117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 118 FloatPartsN *c, float_status *s, 119 int ab_mask, int abc_mask) 120{ 121 bool infzero = (ab_mask == float_cmask_infzero); 122 bool have_snan = (abc_mask & float_cmask_snan); 123 FloatPartsN *ret; 124 125 if (unlikely(have_snan)) { 126 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 127 } 128 129 if (infzero && 130 !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) { 131 /* This is (0 * inf) + NaN or (inf * 0) + NaN */ 132 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 133 } 134 135 if (s->default_nan_mode) { 136 /* 137 * We guarantee not to require the target to tell us how to 138 * pick a NaN if we're always returning the default NaN. 139 * But if we're not in default-NaN mode then the target must 140 * specify. 141 */ 142 goto default_nan; 143 } else if (infzero) { 144 /* 145 * Inf * 0 + NaN -- some implementations return the 146 * default NaN here, and some return the input NaN. 147 */ 148 switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) { 149 case float_infzeronan_dnan_never: 150 break; 151 case float_infzeronan_dnan_always: 152 goto default_nan; 153 case float_infzeronan_dnan_if_qnan: 154 if (is_qnan(c->cls)) { 155 goto default_nan; 156 } 157 break; 158 default: 159 g_assert_not_reached(); 160 } 161 ret = c; 162 } else { 163 FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c }; 164 Float3NaNPropRule rule = s->float_3nan_prop_rule; 165 166 assert(rule != float_3nan_prop_none); 167 if (have_snan && (rule & R_3NAN_SNAN_MASK)) { 168 /* We have at least one SNaN input and should prefer it */ 169 do { 170 ret = val[rule & R_3NAN_1ST_MASK]; 171 rule >>= R_3NAN_1ST_LENGTH; 172 } while (!is_snan(ret->cls)); 173 } else { 174 do { 175 ret = val[rule & R_3NAN_1ST_MASK]; 176 rule >>= R_3NAN_1ST_LENGTH; 177 } while (!is_nan(ret->cls)); 178 } 179 } 180 181 if (is_snan(ret->cls)) { 182 parts_silence_nan(ret, s); 183 } 184 return ret; 185 186 default_nan: 187 parts_default_nan(a, s); 188 return a; 189} 190 191/* 192 * Canonicalize the FloatParts structure. Determine the class, 193 * unbias the exponent, and normalize the fraction. 194 */ 195static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 196 const FloatFmt *fmt) 197{ 198 /* 199 * It's target-dependent how to handle the case of exponent 0 200 * and Integer bit set. Intel calls these "pseudodenormals", 201 * and treats them as if the integer bit was 0, and never 202 * produces them on output. This is the default behaviour for QEMU. 203 * For m68k, the integer bit is considered validly part of the 204 * input value when the exponent is 0, and may be 0 or 1, 205 * giving extra range. They may also be generated as outputs. 206 * (The m68k manual actually calls these values part of the 207 * normalized number range, not the denormalized number range, 208 * but that distinction is not important for us, because 209 * m68k doesn't care about the input_denormal_used status flag.) 210 * floatx80_pseudo_denormal_valid selects the m68k behaviour, 211 * which changes both how we canonicalize such a value and 212 * how we uncanonicalize results. 213 */ 214 bool has_pseudo_denormals = fmt->has_explicit_bit && 215 (status->floatx80_behaviour & floatx80_pseudo_denormal_valid); 216 217 if (unlikely(p->exp == 0)) { 218 if (likely(frac_eqz(p))) { 219 p->cls = float_class_zero; 220 } else if (status->flush_inputs_to_zero) { 221 float_raise(float_flag_input_denormal_flushed, status); 222 p->cls = float_class_zero; 223 frac_clear(p); 224 } else { 225 int shift = frac_normalize(p); 226 p->cls = float_class_denormal; 227 p->exp = fmt->frac_shift - fmt->exp_bias 228 - shift + !has_pseudo_denormals; 229 } 230 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 231 p->cls = float_class_normal; 232 p->exp -= fmt->exp_bias; 233 frac_shl(p, fmt->frac_shift); 234 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 235 } else if (likely(frac_eqz(p))) { 236 p->cls = float_class_inf; 237 } else { 238 frac_shl(p, fmt->frac_shift); 239 p->cls = (parts_is_snan_frac(p->frac_hi, status) 240 ? float_class_snan : float_class_qnan); 241 } 242} 243 244/* 245 * Round and uncanonicalize a floating-point number by parts. There 246 * are FRAC_SHIFT bits that may require rounding at the bottom of the 247 * fraction; these bits will be removed. The exponent will be biased 248 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 249 */ 250static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 251 const FloatFmt *fmt) 252{ 253 const int exp_max = fmt->exp_max; 254 const int frac_shift = fmt->frac_shift; 255 const uint64_t round_mask = fmt->round_mask; 256 const uint64_t frac_lsb = round_mask + 1; 257 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 258 const uint64_t roundeven_mask = round_mask | frac_lsb; 259 uint64_t inc; 260 bool overflow_norm = false; 261 int exp, flags = 0; 262 263 switch (s->float_rounding_mode) { 264 case float_round_nearest_even_max: 265 overflow_norm = true; 266 /* fall through */ 267 case float_round_nearest_even: 268 if (N > 64 && frac_lsb == 0) { 269 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 270 ? frac_lsbm1 : 0); 271 } else { 272 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 273 ? frac_lsbm1 : 0); 274 } 275 break; 276 case float_round_ties_away: 277 inc = frac_lsbm1; 278 break; 279 case float_round_to_zero: 280 overflow_norm = true; 281 inc = 0; 282 break; 283 case float_round_up: 284 inc = p->sign ? 0 : round_mask; 285 overflow_norm = p->sign; 286 break; 287 case float_round_down: 288 inc = p->sign ? round_mask : 0; 289 overflow_norm = !p->sign; 290 break; 291 case float_round_to_odd: 292 overflow_norm = true; 293 /* fall through */ 294 case float_round_to_odd_inf: 295 if (N > 64 && frac_lsb == 0) { 296 inc = p->frac_hi & 1 ? 0 : round_mask; 297 } else { 298 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 299 } 300 break; 301 default: 302 g_assert_not_reached(); 303 } 304 305 exp = p->exp + fmt->exp_bias; 306 if (likely(exp > 0)) { 307 if (p->frac_lo & round_mask) { 308 flags |= float_flag_inexact; 309 if (frac_addi(p, p, inc)) { 310 frac_shr(p, 1); 311 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 312 exp++; 313 } 314 p->frac_lo &= ~round_mask; 315 } 316 317 if (fmt->arm_althp) { 318 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 319 if (unlikely(exp > exp_max)) { 320 /* Overflow. Return the maximum normal. */ 321 flags = float_flag_invalid; 322 exp = exp_max; 323 frac_allones(p); 324 p->frac_lo &= ~round_mask; 325 } 326 } else if (unlikely(exp >= exp_max)) { 327 flags |= float_flag_overflow; 328 if (s->rebias_overflow) { 329 exp -= fmt->exp_re_bias; 330 } else if (overflow_norm) { 331 flags |= float_flag_inexact; 332 exp = exp_max - 1; 333 frac_allones(p); 334 p->frac_lo &= ~round_mask; 335 } else { 336 flags |= float_flag_inexact; 337 p->cls = float_class_inf; 338 exp = exp_max; 339 frac_clear(p); 340 } 341 } 342 frac_shr(p, frac_shift); 343 } else if (unlikely(s->rebias_underflow)) { 344 flags |= float_flag_underflow; 345 exp += fmt->exp_re_bias; 346 if (p->frac_lo & round_mask) { 347 flags |= float_flag_inexact; 348 if (frac_addi(p, p, inc)) { 349 frac_shr(p, 1); 350 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 351 exp++; 352 } 353 p->frac_lo &= ~round_mask; 354 } 355 frac_shr(p, frac_shift); 356 } else if (s->flush_to_zero && 357 s->ftz_detection == float_ftz_before_rounding) { 358 flags |= float_flag_output_denormal_flushed; 359 p->cls = float_class_zero; 360 exp = 0; 361 frac_clear(p); 362 } else { 363 bool is_tiny = s->tininess_before_rounding || exp < 0; 364 bool has_pseudo_denormals = fmt->has_explicit_bit && 365 (s->floatx80_behaviour & floatx80_pseudo_denormal_valid); 366 367 if (!is_tiny) { 368 FloatPartsN discard; 369 is_tiny = !frac_addi(&discard, p, inc); 370 } 371 372 frac_shrjam(p, !has_pseudo_denormals - exp); 373 374 if (p->frac_lo & round_mask) { 375 /* Need to recompute round-to-even/round-to-odd. */ 376 switch (s->float_rounding_mode) { 377 case float_round_nearest_even: 378 if (N > 64 && frac_lsb == 0) { 379 inc = ((p->frac_hi & 1) || 380 (p->frac_lo & round_mask) != frac_lsbm1 381 ? frac_lsbm1 : 0); 382 } else { 383 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 384 ? frac_lsbm1 : 0); 385 } 386 break; 387 case float_round_to_odd: 388 case float_round_to_odd_inf: 389 if (N > 64 && frac_lsb == 0) { 390 inc = p->frac_hi & 1 ? 0 : round_mask; 391 } else { 392 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 393 } 394 break; 395 default: 396 break; 397 } 398 flags |= float_flag_inexact; 399 frac_addi(p, p, inc); 400 p->frac_lo &= ~round_mask; 401 } 402 403 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !has_pseudo_denormals; 404 frac_shr(p, frac_shift); 405 406 if (is_tiny) { 407 if (s->flush_to_zero) { 408 assert(s->ftz_detection == float_ftz_after_rounding); 409 flags |= float_flag_output_denormal_flushed; 410 p->cls = float_class_zero; 411 exp = 0; 412 frac_clear(p); 413 } else if (flags & float_flag_inexact) { 414 flags |= float_flag_underflow; 415 } 416 if (exp == 0 && frac_eqz(p)) { 417 p->cls = float_class_zero; 418 } 419 } 420 } 421 p->exp = exp; 422 float_raise(flags, s); 423} 424 425static void partsN(uncanon)(FloatPartsN *p, float_status *s, 426 const FloatFmt *fmt) 427{ 428 if (likely(is_anynorm(p->cls))) { 429 parts_uncanon_normal(p, s, fmt); 430 } else { 431 switch (p->cls) { 432 case float_class_zero: 433 p->exp = 0; 434 frac_clear(p); 435 return; 436 case float_class_inf: 437 g_assert(!fmt->arm_althp); 438 p->exp = fmt->exp_max; 439 frac_clear(p); 440 return; 441 case float_class_qnan: 442 case float_class_snan: 443 g_assert(!fmt->arm_althp); 444 p->exp = fmt->exp_max; 445 frac_shr(p, fmt->frac_shift); 446 return; 447 default: 448 break; 449 } 450 g_assert_not_reached(); 451 } 452} 453 454/* 455 * Returns the result of adding or subtracting the values of the 456 * floating-point values `a' and `b'. The operation is performed 457 * according to the IEC/IEEE Standard for Binary Floating-Point 458 * Arithmetic. 459 */ 460static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 461 float_status *s, bool subtract) 462{ 463 bool b_sign = b->sign ^ subtract; 464 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 465 466 /* 467 * For addition and subtraction, we will consume an 468 * input denormal unless the other input is a NaN. 469 */ 470 if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == 471 float_cmask_denormal) { 472 float_raise(float_flag_input_denormal_used, s); 473 } 474 475 if (a->sign != b_sign) { 476 /* Subtraction */ 477 if (likely(cmask_is_only_normals(ab_mask))) { 478 if (parts_sub_normal(a, b)) { 479 return a; 480 } 481 /* Subtract was exact, fall through to set sign. */ 482 ab_mask = float_cmask_zero; 483 } 484 485 if (ab_mask == float_cmask_zero) { 486 a->sign = s->float_rounding_mode == float_round_down; 487 return a; 488 } 489 490 if (unlikely(ab_mask & float_cmask_anynan)) { 491 goto p_nan; 492 } 493 494 if (ab_mask & float_cmask_inf) { 495 if (a->cls != float_class_inf) { 496 /* N - Inf */ 497 goto return_b; 498 } 499 if (b->cls != float_class_inf) { 500 /* Inf - N */ 501 return a; 502 } 503 /* Inf - Inf */ 504 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 505 parts_default_nan(a, s); 506 return a; 507 } 508 } else { 509 /* Addition */ 510 if (likely(cmask_is_only_normals(ab_mask))) { 511 parts_add_normal(a, b); 512 return a; 513 } 514 515 if (ab_mask == float_cmask_zero) { 516 return a; 517 } 518 519 if (unlikely(ab_mask & float_cmask_anynan)) { 520 goto p_nan; 521 } 522 523 if (ab_mask & float_cmask_inf) { 524 a->cls = float_class_inf; 525 return a; 526 } 527 } 528 529 if (b->cls == float_class_zero) { 530 g_assert(is_anynorm(a->cls)); 531 return a; 532 } 533 534 g_assert(a->cls == float_class_zero); 535 g_assert(is_anynorm(b->cls)); 536 return_b: 537 b->sign = b_sign; 538 return b; 539 540 p_nan: 541 return parts_pick_nan(a, b, s); 542} 543 544/* 545 * Returns the result of multiplying the floating-point values `a' and 546 * `b'. The operation is performed according to the IEC/IEEE Standard 547 * for Binary Floating-Point Arithmetic. 548 */ 549static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 550 float_status *s) 551{ 552 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 553 bool sign = a->sign ^ b->sign; 554 555 if (likely(cmask_is_only_normals(ab_mask))) { 556 FloatPartsW tmp; 557 558 if (ab_mask & float_cmask_denormal) { 559 float_raise(float_flag_input_denormal_used, s); 560 } 561 562 frac_mulw(&tmp, a, b); 563 frac_truncjam(a, &tmp); 564 565 a->exp += b->exp + 1; 566 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 567 frac_add(a, a, a); 568 a->exp -= 1; 569 } 570 571 a->sign = sign; 572 return a; 573 } 574 575 /* Inf * Zero == NaN */ 576 if (unlikely(ab_mask == float_cmask_infzero)) { 577 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 578 parts_default_nan(a, s); 579 return a; 580 } 581 582 if (unlikely(ab_mask & float_cmask_anynan)) { 583 return parts_pick_nan(a, b, s); 584 } 585 586 /* Multiply by 0 or Inf */ 587 if (ab_mask & float_cmask_denormal) { 588 float_raise(float_flag_input_denormal_used, s); 589 } 590 591 if (ab_mask & float_cmask_inf) { 592 a->cls = float_class_inf; 593 a->sign = sign; 594 return a; 595 } 596 597 g_assert(ab_mask & float_cmask_zero); 598 a->cls = float_class_zero; 599 a->sign = sign; 600 return a; 601} 602 603/* 604 * Returns the result of multiplying the floating-point values `a' and 605 * `b' then adding 'c', with no intermediate rounding step after the 606 * multiplication. The operation is performed according to the 607 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 608 * The flags argument allows the caller to select negation of the 609 * addend, the intermediate product, or the final result. (The 610 * difference between this and having the caller do a separate 611 * negation is that negating externally will flip the sign bit on NaNs.) 612 * 613 * Requires A and C extracted into a double-sized structure to provide the 614 * extra space for the widening multiply. 615 */ 616static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, 617 FloatPartsN *c, int scale, 618 int flags, float_status *s) 619{ 620 int ab_mask, abc_mask; 621 FloatPartsW p_widen, c_widen; 622 623 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 624 abc_mask = float_cmask(c->cls) | ab_mask; 625 626 /* 627 * It is implementation-defined whether the cases of (0,inf,qnan) 628 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 629 * they return if they do), so we have to hand this information 630 * off to the target-specific pick-a-NaN routine. 631 */ 632 if (unlikely(abc_mask & float_cmask_anynan)) { 633 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 634 } 635 636 if (flags & float_muladd_negate_c) { 637 c->sign ^= 1; 638 } 639 640 /* Compute the sign of the product into A. */ 641 a->sign ^= b->sign; 642 if (flags & float_muladd_negate_product) { 643 a->sign ^= 1; 644 } 645 646 if (unlikely(!cmask_is_only_normals(ab_mask))) { 647 if (unlikely(ab_mask == float_cmask_infzero)) { 648 float_raise(float_flag_invalid | float_flag_invalid_imz, s); 649 goto d_nan; 650 } 651 652 if (ab_mask & float_cmask_inf) { 653 if (c->cls == float_class_inf && a->sign != c->sign) { 654 float_raise(float_flag_invalid | float_flag_invalid_isi, s); 655 goto d_nan; 656 } 657 goto return_inf; 658 } 659 660 g_assert(ab_mask & float_cmask_zero); 661 if (is_anynorm(c->cls)) { 662 *a = *c; 663 goto return_normal; 664 } 665 if (c->cls == float_class_zero) { 666 if (flags & float_muladd_suppress_add_product_zero) { 667 a->sign = c->sign; 668 } else if (a->sign != c->sign) { 669 goto return_sub_zero; 670 } 671 goto return_zero; 672 } 673 g_assert(c->cls == float_class_inf); 674 } 675 676 if (unlikely(c->cls == float_class_inf)) { 677 a->sign = c->sign; 678 goto return_inf; 679 } 680 681 /* Perform the multiplication step. */ 682 p_widen.sign = a->sign; 683 p_widen.exp = a->exp + b->exp + 1; 684 frac_mulw(&p_widen, a, b); 685 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 686 frac_add(&p_widen, &p_widen, &p_widen); 687 p_widen.exp -= 1; 688 } 689 690 /* Perform the addition step. */ 691 if (c->cls != float_class_zero) { 692 /* Zero-extend C to less significant bits. */ 693 frac_widen(&c_widen, c); 694 c_widen.exp = c->exp; 695 696 if (a->sign == c->sign) { 697 parts_add_normal(&p_widen, &c_widen); 698 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 699 goto return_sub_zero; 700 } 701 } 702 703 /* Narrow with sticky bit, for proper rounding later. */ 704 frac_truncjam(a, &p_widen); 705 a->sign = p_widen.sign; 706 a->exp = p_widen.exp; 707 708 return_normal: 709 a->exp += scale; 710 finish_sign: 711 if (flags & float_muladd_negate_result) { 712 a->sign ^= 1; 713 } 714 715 /* 716 * All result types except for "return the default NaN 717 * because this is an Invalid Operation" go through here; 718 * this matches the set of cases where we consumed a 719 * denormal input. 720 */ 721 if (abc_mask & float_cmask_denormal) { 722 float_raise(float_flag_input_denormal_used, s); 723 } 724 return a; 725 726 return_sub_zero: 727 a->sign = s->float_rounding_mode == float_round_down; 728 return_zero: 729 a->cls = float_class_zero; 730 goto finish_sign; 731 732 return_inf: 733 a->cls = float_class_inf; 734 goto finish_sign; 735 736 d_nan: 737 parts_default_nan(a, s); 738 return a; 739} 740 741/* 742 * Returns the result of dividing the floating-point value `a' by the 743 * corresponding value `b'. The operation is performed according to 744 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 745 */ 746static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 747 float_status *s) 748{ 749 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 750 bool sign = a->sign ^ b->sign; 751 752 if (likely(cmask_is_only_normals(ab_mask))) { 753 if (ab_mask & float_cmask_denormal) { 754 float_raise(float_flag_input_denormal_used, s); 755 } 756 a->sign = sign; 757 a->exp -= b->exp + frac_div(a, b); 758 return a; 759 } 760 761 /* 0/0 or Inf/Inf => NaN */ 762 if (unlikely(ab_mask == float_cmask_zero)) { 763 float_raise(float_flag_invalid | float_flag_invalid_zdz, s); 764 goto d_nan; 765 } 766 if (unlikely(ab_mask == float_cmask_inf)) { 767 float_raise(float_flag_invalid | float_flag_invalid_idi, s); 768 goto d_nan; 769 } 770 771 /* All the NaN cases */ 772 if (unlikely(ab_mask & float_cmask_anynan)) { 773 return parts_pick_nan(a, b, s); 774 } 775 776 if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { 777 float_raise(float_flag_input_denormal_used, s); 778 } 779 780 a->sign = sign; 781 782 /* Inf / X */ 783 if (a->cls == float_class_inf) { 784 return a; 785 } 786 787 /* 0 / X */ 788 if (a->cls == float_class_zero) { 789 return a; 790 } 791 792 /* X / Inf */ 793 if (b->cls == float_class_inf) { 794 a->cls = float_class_zero; 795 return a; 796 } 797 798 /* X / 0 => Inf */ 799 g_assert(b->cls == float_class_zero); 800 float_raise(float_flag_divbyzero, s); 801 a->cls = float_class_inf; 802 return a; 803 804 d_nan: 805 parts_default_nan(a, s); 806 return a; 807} 808 809/* 810 * Floating point remainder, per IEC/IEEE, or modulus. 811 */ 812static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, 813 uint64_t *mod_quot, float_status *s) 814{ 815 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 816 817 if (likely(cmask_is_only_normals(ab_mask))) { 818 if (ab_mask & float_cmask_denormal) { 819 float_raise(float_flag_input_denormal_used, s); 820 } 821 frac_modrem(a, b, mod_quot); 822 return a; 823 } 824 825 if (mod_quot) { 826 *mod_quot = 0; 827 } 828 829 /* All the NaN cases */ 830 if (unlikely(ab_mask & float_cmask_anynan)) { 831 return parts_pick_nan(a, b, s); 832 } 833 834 /* Inf % N; N % 0 */ 835 if (a->cls == float_class_inf || b->cls == float_class_zero) { 836 float_raise(float_flag_invalid, s); 837 parts_default_nan(a, s); 838 return a; 839 } 840 841 if (ab_mask & float_cmask_denormal) { 842 float_raise(float_flag_input_denormal_used, s); 843 } 844 845 /* N % Inf; 0 % N */ 846 g_assert(b->cls == float_class_inf || a->cls == float_class_zero); 847 return a; 848} 849 850/* 851 * Square Root 852 * 853 * The base algorithm is lifted from 854 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 855 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 856 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 857 * and is thus MIT licenced. 858 */ 859static void partsN(sqrt)(FloatPartsN *a, float_status *status, 860 const FloatFmt *fmt) 861{ 862 const uint32_t three32 = 3u << 30; 863 const uint64_t three64 = 3ull << 62; 864 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 865 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 866 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 867 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 868 uint64_t discard; 869 bool exp_odd; 870 size_t index; 871 872 if (unlikely(a->cls != float_class_normal)) { 873 switch (a->cls) { 874 case float_class_denormal: 875 if (!a->sign) { 876 /* -ve denormal will be InvalidOperation */ 877 float_raise(float_flag_input_denormal_used, status); 878 } 879 break; 880 case float_class_snan: 881 case float_class_qnan: 882 parts_return_nan(a, status); 883 return; 884 case float_class_zero: 885 return; 886 case float_class_inf: 887 if (unlikely(a->sign)) { 888 goto d_nan; 889 } 890 return; 891 default: 892 g_assert_not_reached(); 893 } 894 } 895 896 if (unlikely(a->sign)) { 897 goto d_nan; 898 } 899 900 /* 901 * Argument reduction. 902 * x = 4^e frac; with integer e, and frac in [1, 4) 903 * m = frac fixed point at bit 62, since we're in base 4. 904 * If base-2 exponent is odd, exchange that for multiply by 2, 905 * which results in no shift. 906 */ 907 exp_odd = a->exp & 1; 908 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 909 if (!exp_odd) { 910 frac_shr(a, 1); 911 } 912 913 /* 914 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 915 * 916 * Initial estimate: 917 * 7-bit lookup table (1-bit exponent and 6-bit significand). 918 * 919 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 920 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 921 * a table lookup is faster and needs one less iteration. 922 * The 7-bit table gives |e| < 0x1.fdp-9. 923 * 924 * A Newton-Raphson iteration for r is 925 * s = m*r 926 * d = s*r 927 * u = 3 - d 928 * r = r*u/2 929 * 930 * Fixed point representations: 931 * m, s, d, u, three are all 2.30; r is 0.32 932 */ 933 m64 = a->frac_hi; 934 m32 = m64 >> 32; 935 936 r32 = rsqrt_tab[index] << 16; 937 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 938 939 s32 = ((uint64_t)m32 * r32) >> 32; 940 d32 = ((uint64_t)s32 * r32) >> 32; 941 u32 = three32 - d32; 942 943 if (N == 64) { 944 /* float64 or smaller */ 945 946 r32 = ((uint64_t)r32 * u32) >> 31; 947 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 948 949 s32 = ((uint64_t)m32 * r32) >> 32; 950 d32 = ((uint64_t)s32 * r32) >> 32; 951 u32 = three32 - d32; 952 953 if (fmt->frac_size <= 23) { 954 /* float32 or smaller */ 955 956 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 957 s32 = (s32 - 1) >> 6; /* 9.23 */ 958 /* s < sqrt(m) < s + 0x1.08p-23 */ 959 960 /* compute nearest rounded result to 2.23 bits */ 961 uint32_t d0 = (m32 << 16) - s32 * s32; 962 uint32_t d1 = s32 - d0; 963 uint32_t d2 = d1 + s32 + 1; 964 s32 += d1 >> 31; 965 a->frac_hi = (uint64_t)s32 << (64 - 25); 966 967 /* increment or decrement for inexact */ 968 if (d2 != 0) { 969 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 970 } 971 goto done; 972 } 973 974 /* float64 */ 975 976 r64 = (uint64_t)r32 * u32 * 2; 977 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 978 mul64To128(m64, r64, &s64, &discard); 979 mul64To128(s64, r64, &d64, &discard); 980 u64 = three64 - d64; 981 982 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 983 s64 = (s64 - 2) >> 9; /* 12.52 */ 984 985 /* Compute nearest rounded result */ 986 uint64_t d0 = (m64 << 42) - s64 * s64; 987 uint64_t d1 = s64 - d0; 988 uint64_t d2 = d1 + s64 + 1; 989 s64 += d1 >> 63; 990 a->frac_hi = s64 << (64 - 54); 991 992 /* increment or decrement for inexact */ 993 if (d2 != 0) { 994 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 995 } 996 goto done; 997 } 998 999 r64 = (uint64_t)r32 * u32 * 2; 1000 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 1001 1002 mul64To128(m64, r64, &s64, &discard); 1003 mul64To128(s64, r64, &d64, &discard); 1004 u64 = three64 - d64; 1005 mul64To128(u64, r64, &r64, &discard); 1006 r64 <<= 1; 1007 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 1008 1009 mul64To128(m64, r64, &s64, &discard); 1010 mul64To128(s64, r64, &d64, &discard); 1011 u64 = three64 - d64; 1012 mul64To128(u64, r64, &rh, &rl); 1013 add128(rh, rl, rh, rl, &rh, &rl); 1014 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 1015 1016 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 1017 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 1018 sub128(three64, 0, dh, dl, &uh, &ul); 1019 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 1020 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 1021 1022 sub128(sh, sl, 0, 4, &sh, &sl); 1023 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 1024 /* s < sqrt(m) < s + 1ulp */ 1025 1026 /* Compute nearest rounded result */ 1027 mul64To128(sl, sl, &d0h, &d0l); 1028 d0h += 2 * sh * sl; 1029 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 1030 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 1031 add128(sh, sl, 0, 1, &d2h, &d2l); 1032 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 1033 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 1034 shift128Left(sh, sl, 128 - 114, &sh, &sl); 1035 1036 /* increment or decrement for inexact */ 1037 if (d2h | d2l) { 1038 if ((int64_t)(d1h ^ d2h) < 0) { 1039 sub128(sh, sl, 0, 1, &sh, &sl); 1040 } else { 1041 add128(sh, sl, 0, 1, &sh, &sl); 1042 } 1043 } 1044 a->frac_lo = sl; 1045 a->frac_hi = sh; 1046 1047 done: 1048 /* Convert back from base 4 to base 2. */ 1049 a->exp >>= 1; 1050 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 1051 frac_add(a, a, a); 1052 } else { 1053 a->exp += 1; 1054 } 1055 return; 1056 1057 d_nan: 1058 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); 1059 parts_default_nan(a, status); 1060} 1061 1062/* 1063 * Rounds the floating-point value `a' to an integer, and returns the 1064 * result as a floating-point value. The operation is performed 1065 * according to the IEC/IEEE Standard for Binary Floating-Point 1066 * Arithmetic. 1067 * 1068 * parts_round_to_int_normal is an internal helper function for 1069 * normal numbers only, returning true for inexact but not directly 1070 * raising float_flag_inexact. 1071 */ 1072static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 1073 int scale, int frac_size) 1074{ 1075 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 1076 int shift_adj; 1077 1078 scale = MIN(MAX(scale, -0x10000), 0x10000); 1079 a->exp += scale; 1080 1081 if (a->exp < 0) { 1082 bool one; 1083 1084 /* All fractional */ 1085 switch (rmode) { 1086 case float_round_nearest_even: 1087 one = false; 1088 if (a->exp == -1) { 1089 FloatPartsN tmp; 1090 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 1091 frac_add(&tmp, a, a); 1092 /* Anything remaining means frac > 0.5. */ 1093 one = !frac_eqz(&tmp); 1094 } 1095 break; 1096 case float_round_ties_away: 1097 one = a->exp == -1; 1098 break; 1099 case float_round_to_zero: 1100 one = false; 1101 break; 1102 case float_round_up: 1103 one = !a->sign; 1104 break; 1105 case float_round_down: 1106 one = a->sign; 1107 break; 1108 case float_round_to_odd: 1109 one = true; 1110 break; 1111 default: 1112 g_assert_not_reached(); 1113 } 1114 1115 frac_clear(a); 1116 a->exp = 0; 1117 if (one) { 1118 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 1119 } else { 1120 a->cls = float_class_zero; 1121 } 1122 return true; 1123 } 1124 1125 if (a->exp >= frac_size) { 1126 /* All integral */ 1127 return false; 1128 } 1129 1130 if (N > 64 && a->exp < N - 64) { 1131 /* 1132 * Rounding is not in the low word -- shift lsb to bit 2, 1133 * which leaves room for sticky and rounding bit. 1134 */ 1135 shift_adj = (N - 1) - (a->exp + 2); 1136 frac_shrjam(a, shift_adj); 1137 frac_lsb = 1 << 2; 1138 } else { 1139 shift_adj = 0; 1140 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 1141 } 1142 1143 frac_lsbm1 = frac_lsb >> 1; 1144 rnd_mask = frac_lsb - 1; 1145 rnd_even_mask = rnd_mask | frac_lsb; 1146 1147 if (!(a->frac_lo & rnd_mask)) { 1148 /* Fractional bits already clear, undo the shift above. */ 1149 frac_shl(a, shift_adj); 1150 return false; 1151 } 1152 1153 switch (rmode) { 1154 case float_round_nearest_even: 1155 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 1156 break; 1157 case float_round_ties_away: 1158 inc = frac_lsbm1; 1159 break; 1160 case float_round_to_zero: 1161 inc = 0; 1162 break; 1163 case float_round_up: 1164 inc = a->sign ? 0 : rnd_mask; 1165 break; 1166 case float_round_down: 1167 inc = a->sign ? rnd_mask : 0; 1168 break; 1169 case float_round_to_odd: 1170 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 1171 break; 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 1176 if (shift_adj == 0) { 1177 if (frac_addi(a, a, inc)) { 1178 frac_shr(a, 1); 1179 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 1180 a->exp++; 1181 } 1182 a->frac_lo &= ~rnd_mask; 1183 } else { 1184 frac_addi(a, a, inc); 1185 a->frac_lo &= ~rnd_mask; 1186 /* Be careful shifting back, not to overflow */ 1187 frac_shl(a, shift_adj - 1); 1188 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 1189 a->exp++; 1190 } else { 1191 frac_add(a, a, a); 1192 } 1193 } 1194 return true; 1195} 1196 1197static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 1198 int scale, float_status *s, 1199 const FloatFmt *fmt) 1200{ 1201 switch (a->cls) { 1202 case float_class_qnan: 1203 case float_class_snan: 1204 parts_return_nan(a, s); 1205 break; 1206 case float_class_zero: 1207 case float_class_inf: 1208 break; 1209 case float_class_normal: 1210 case float_class_denormal: 1211 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 1212 float_raise(float_flag_inexact, s); 1213 } 1214 break; 1215 default: 1216 g_assert_not_reached(); 1217 } 1218} 1219 1220/* 1221 * Returns the result of converting the floating-point value `a' to 1222 * the two's complement integer format. The conversion is performed 1223 * according to the IEC/IEEE Standard for Binary Floating-Point 1224 * Arithmetic---which means in particular that the conversion is 1225 * rounded according to the current rounding mode. If `a' is a NaN, 1226 * the largest positive integer is returned. Otherwise, if the 1227 * conversion overflows, the largest integer with the same sign as `a' 1228 * is returned. 1229 */ 1230static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 1231 int scale, int64_t min, int64_t max, 1232 float_status *s) 1233{ 1234 int flags = 0; 1235 uint64_t r; 1236 1237 switch (p->cls) { 1238 case float_class_snan: 1239 flags |= float_flag_invalid_snan; 1240 /* fall through */ 1241 case float_class_qnan: 1242 flags |= float_flag_invalid; 1243 r = max; 1244 break; 1245 1246 case float_class_inf: 1247 flags = float_flag_invalid | float_flag_invalid_cvti; 1248 r = p->sign ? min : max; 1249 break; 1250 1251 case float_class_zero: 1252 return 0; 1253 1254 case float_class_normal: 1255 case float_class_denormal: 1256 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1257 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1258 flags = float_flag_inexact; 1259 } 1260 1261 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1262 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1263 } else { 1264 r = UINT64_MAX; 1265 } 1266 if (p->sign) { 1267 if (r <= -(uint64_t)min) { 1268 r = -r; 1269 } else { 1270 flags = float_flag_invalid | float_flag_invalid_cvti; 1271 r = min; 1272 } 1273 } else if (r > max) { 1274 flags = float_flag_invalid | float_flag_invalid_cvti; 1275 r = max; 1276 } 1277 break; 1278 1279 default: 1280 g_assert_not_reached(); 1281 } 1282 1283 float_raise(flags, s); 1284 return r; 1285} 1286 1287/* 1288 * Returns the result of converting the floating-point value `a' to 1289 * the unsigned integer format. The conversion is performed according 1290 * to the IEC/IEEE Standard for Binary Floating-Point 1291 * Arithmetic---which means in particular that the conversion is 1292 * rounded according to the current rounding mode. If `a' is a NaN, 1293 * the largest unsigned integer is returned. Otherwise, if the 1294 * conversion overflows, the largest unsigned integer is returned. If 1295 * the 'a' is negative, the result is rounded and zero is returned; 1296 * values that do not round to zero will raise the inexact exception 1297 * flag. 1298 */ 1299static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1300 int scale, uint64_t max, float_status *s) 1301{ 1302 int flags = 0; 1303 uint64_t r; 1304 1305 switch (p->cls) { 1306 case float_class_snan: 1307 flags |= float_flag_invalid_snan; 1308 /* fall through */ 1309 case float_class_qnan: 1310 flags |= float_flag_invalid; 1311 r = max; 1312 break; 1313 1314 case float_class_inf: 1315 flags = float_flag_invalid | float_flag_invalid_cvti; 1316 r = p->sign ? 0 : max; 1317 break; 1318 1319 case float_class_zero: 1320 return 0; 1321 1322 case float_class_normal: 1323 case float_class_denormal: 1324 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1325 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1326 flags = float_flag_inexact; 1327 if (p->cls == float_class_zero) { 1328 r = 0; 1329 break; 1330 } 1331 } 1332 1333 if (p->sign) { 1334 flags = float_flag_invalid | float_flag_invalid_cvti; 1335 r = 0; 1336 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1337 flags = float_flag_invalid | float_flag_invalid_cvti; 1338 r = max; 1339 } else { 1340 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1341 if (r > max) { 1342 flags = float_flag_invalid | float_flag_invalid_cvti; 1343 r = max; 1344 } 1345 } 1346 break; 1347 1348 default: 1349 g_assert_not_reached(); 1350 } 1351 1352 float_raise(flags, s); 1353 return r; 1354} 1355 1356/* 1357 * Like partsN(float_to_sint), except do not saturate the result. 1358 * Instead, return the rounded unbounded precision two's compliment result, 1359 * modulo 2**(bitsm1 + 1). 1360 */ 1361static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, 1362 FloatRoundMode rmode, 1363 int bitsm1, float_status *s) 1364{ 1365 int flags = 0; 1366 uint64_t r; 1367 bool overflow = false; 1368 1369 switch (p->cls) { 1370 case float_class_snan: 1371 flags |= float_flag_invalid_snan; 1372 /* fall through */ 1373 case float_class_qnan: 1374 flags |= float_flag_invalid; 1375 r = 0; 1376 break; 1377 1378 case float_class_inf: 1379 overflow = true; 1380 r = 0; 1381 break; 1382 1383 case float_class_zero: 1384 return 0; 1385 1386 case float_class_normal: 1387 case float_class_denormal: 1388 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1389 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { 1390 flags = float_flag_inexact; 1391 } 1392 1393 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1394 /* 1395 * Because we rounded to integral, and exp < 64, 1396 * we know frac_low is zero. 1397 */ 1398 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1399 if (p->exp < bitsm1) { 1400 /* Result in range. */ 1401 } else if (p->exp == bitsm1) { 1402 /* The only in-range value is INT_MIN. */ 1403 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; 1404 } else { 1405 overflow = true; 1406 } 1407 } else { 1408 /* Overflow, but there might still be bits to return. */ 1409 int shl = p->exp - DECOMPOSED_BINARY_POINT; 1410 if (shl < N) { 1411 frac_shl(p, shl); 1412 r = p->frac_hi; 1413 } else { 1414 r = 0; 1415 } 1416 overflow = true; 1417 } 1418 1419 if (p->sign) { 1420 r = -r; 1421 } 1422 break; 1423 1424 default: 1425 g_assert_not_reached(); 1426 } 1427 1428 if (overflow) { 1429 flags = float_flag_invalid | float_flag_invalid_cvti; 1430 } 1431 float_raise(flags, s); 1432 return r; 1433} 1434 1435/* 1436 * Integer to float conversions 1437 * 1438 * Returns the result of converting the two's complement integer `a' 1439 * to the floating-point format. The conversion is performed according 1440 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1441 */ 1442static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1443 int scale, float_status *s) 1444{ 1445 uint64_t f = a; 1446 int shift; 1447 1448 memset(p, 0, sizeof(*p)); 1449 1450 if (a == 0) { 1451 p->cls = float_class_zero; 1452 return; 1453 } 1454 1455 p->cls = float_class_normal; 1456 if (a < 0) { 1457 f = -f; 1458 p->sign = true; 1459 } 1460 shift = clz64(f); 1461 scale = MIN(MAX(scale, -0x10000), 0x10000); 1462 1463 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1464 p->frac_hi = f << shift; 1465} 1466 1467/* 1468 * Unsigned Integer to float conversions 1469 * 1470 * Returns the result of converting the unsigned integer `a' to the 1471 * floating-point format. The conversion is performed according to the 1472 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1473 */ 1474static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1475 int scale, float_status *status) 1476{ 1477 memset(p, 0, sizeof(*p)); 1478 1479 if (a == 0) { 1480 p->cls = float_class_zero; 1481 } else { 1482 int shift = clz64(a); 1483 scale = MIN(MAX(scale, -0x10000), 0x10000); 1484 p->cls = float_class_normal; 1485 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1486 p->frac_hi = a << shift; 1487 } 1488} 1489 1490/* 1491 * Float min/max. 1492 */ 1493static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1494 float_status *s, int flags) 1495{ 1496 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1497 int a_exp, b_exp, cmp; 1498 1499 if (unlikely(ab_mask & float_cmask_anynan)) { 1500 /* 1501 * For minNum/maxNum (IEEE 754-2008) 1502 * or minimumNumber/maximumNumber (IEEE 754-2019), 1503 * if one operand is a QNaN, and the other 1504 * operand is numerical, then return numerical argument. 1505 */ 1506 if ((flags & (minmax_isnum | minmax_isnumber)) 1507 && !(ab_mask & float_cmask_snan) 1508 && (ab_mask & ~float_cmask_qnan)) { 1509 if (ab_mask & float_cmask_denormal) { 1510 float_raise(float_flag_input_denormal_used, s); 1511 } 1512 return is_nan(a->cls) ? b : a; 1513 } 1514 1515 /* 1516 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag 1517 * are removed and replaced with minimum, minimumNumber, maximum 1518 * and maximumNumber. 1519 * minimumNumber/maximumNumber behavior for SNaN is changed to: 1520 * If both operands are NaNs, a QNaN is returned. 1521 * If either operand is a SNaN, 1522 * an invalid operation exception is signaled, 1523 * but unless both operands are NaNs, 1524 * the SNaN is otherwise ignored and not converted to a QNaN. 1525 */ 1526 if ((flags & minmax_isnumber) 1527 && (ab_mask & float_cmask_snan) 1528 && (ab_mask & ~float_cmask_anynan)) { 1529 float_raise(float_flag_invalid, s); 1530 return is_nan(a->cls) ? b : a; 1531 } 1532 1533 return parts_pick_nan(a, b, s); 1534 } 1535 1536 if (ab_mask & float_cmask_denormal) { 1537 float_raise(float_flag_input_denormal_used, s); 1538 } 1539 1540 a_exp = a->exp; 1541 b_exp = b->exp; 1542 1543 if (unlikely(!cmask_is_only_normals(ab_mask))) { 1544 switch (a->cls) { 1545 case float_class_normal: 1546 case float_class_denormal: 1547 break; 1548 case float_class_inf: 1549 a_exp = INT16_MAX; 1550 break; 1551 case float_class_zero: 1552 a_exp = INT16_MIN; 1553 break; 1554 default: 1555 g_assert_not_reached(); 1556 } 1557 switch (b->cls) { 1558 case float_class_normal: 1559 case float_class_denormal: 1560 break; 1561 case float_class_inf: 1562 b_exp = INT16_MAX; 1563 break; 1564 case float_class_zero: 1565 b_exp = INT16_MIN; 1566 break; 1567 default: 1568 g_assert_not_reached(); 1569 } 1570 } 1571 1572 /* Compare magnitudes. */ 1573 cmp = a_exp - b_exp; 1574 if (cmp == 0) { 1575 cmp = frac_cmp(a, b); 1576 } 1577 1578 /* 1579 * Take the sign into account. 1580 * For ismag, only do this if the magnitudes are equal. 1581 */ 1582 if (!(flags & minmax_ismag) || cmp == 0) { 1583 if (a->sign != b->sign) { 1584 /* For differing signs, the negative operand is less. */ 1585 cmp = a->sign ? -1 : 1; 1586 } else if (a->sign) { 1587 /* For two negative operands, invert the magnitude comparison. */ 1588 cmp = -cmp; 1589 } 1590 } 1591 1592 if (flags & minmax_ismin) { 1593 cmp = -cmp; 1594 } 1595 return cmp < 0 ? b : a; 1596} 1597 1598/* 1599 * Floating point compare 1600 */ 1601static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1602 float_status *s, bool is_quiet) 1603{ 1604 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1605 1606 if (likely(cmask_is_only_normals(ab_mask))) { 1607 FloatRelation cmp; 1608 1609 if (ab_mask & float_cmask_denormal) { 1610 float_raise(float_flag_input_denormal_used, s); 1611 } 1612 1613 if (a->sign != b->sign) { 1614 goto a_sign; 1615 } 1616 if (a->exp == b->exp) { 1617 cmp = frac_cmp(a, b); 1618 } else if (a->exp < b->exp) { 1619 cmp = float_relation_less; 1620 } else { 1621 cmp = float_relation_greater; 1622 } 1623 if (a->sign) { 1624 cmp = -cmp; 1625 } 1626 return cmp; 1627 } 1628 1629 if (unlikely(ab_mask & float_cmask_anynan)) { 1630 if (ab_mask & float_cmask_snan) { 1631 float_raise(float_flag_invalid | float_flag_invalid_snan, s); 1632 } else if (!is_quiet) { 1633 float_raise(float_flag_invalid, s); 1634 } 1635 return float_relation_unordered; 1636 } 1637 1638 if (ab_mask & float_cmask_denormal) { 1639 float_raise(float_flag_input_denormal_used, s); 1640 } 1641 1642 if (ab_mask & float_cmask_zero) { 1643 if (ab_mask == float_cmask_zero) { 1644 return float_relation_equal; 1645 } else if (a->cls == float_class_zero) { 1646 goto b_sign; 1647 } else { 1648 goto a_sign; 1649 } 1650 } 1651 1652 if (ab_mask == float_cmask_inf) { 1653 if (a->sign == b->sign) { 1654 return float_relation_equal; 1655 } 1656 } else if (b->cls == float_class_inf) { 1657 goto b_sign; 1658 } else { 1659 g_assert(a->cls == float_class_inf); 1660 } 1661 1662 a_sign: 1663 return a->sign ? float_relation_less : float_relation_greater; 1664 b_sign: 1665 return b->sign ? float_relation_greater : float_relation_less; 1666} 1667 1668/* 1669 * Multiply A by 2 raised to the power N. 1670 */ 1671static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1672{ 1673 switch (a->cls) { 1674 case float_class_snan: 1675 case float_class_qnan: 1676 parts_return_nan(a, s); 1677 break; 1678 case float_class_zero: 1679 case float_class_inf: 1680 break; 1681 case float_class_denormal: 1682 float_raise(float_flag_input_denormal_used, s); 1683 /* fall through */ 1684 case float_class_normal: 1685 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1686 break; 1687 default: 1688 g_assert_not_reached(); 1689 } 1690} 1691 1692/* 1693 * Return log2(A) 1694 */ 1695static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) 1696{ 1697 uint64_t a0, a1, r, t, ign; 1698 FloatPartsN f; 1699 int i, n, a_exp, f_exp; 1700 1701 if (unlikely(a->cls != float_class_normal)) { 1702 switch (a->cls) { 1703 case float_class_denormal: 1704 if (!a->sign) { 1705 /* -ve denormal will be InvalidOperation */ 1706 float_raise(float_flag_input_denormal_used, s); 1707 } 1708 break; 1709 case float_class_snan: 1710 case float_class_qnan: 1711 parts_return_nan(a, s); 1712 return; 1713 case float_class_zero: 1714 float_raise(float_flag_divbyzero, s); 1715 /* log2(0) = -inf */ 1716 a->cls = float_class_inf; 1717 a->sign = 1; 1718 return; 1719 case float_class_inf: 1720 if (unlikely(a->sign)) { 1721 goto d_nan; 1722 } 1723 return; 1724 default: 1725 g_assert_not_reached(); 1726 } 1727 } 1728 if (unlikely(a->sign)) { 1729 goto d_nan; 1730 } 1731 1732 /* TODO: This algorithm looses bits too quickly for float128. */ 1733 g_assert(N == 64); 1734 1735 a_exp = a->exp; 1736 f_exp = -1; 1737 1738 r = 0; 1739 t = DECOMPOSED_IMPLICIT_BIT; 1740 a0 = a->frac_hi; 1741 a1 = 0; 1742 1743 n = fmt->frac_size + 2; 1744 if (unlikely(a_exp == -1)) { 1745 /* 1746 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). 1747 * When the value is very close to 1.0, there are lots of 1's in 1748 * the msb parts of the fraction. At the end, when we subtract 1749 * this value from -1.0, we can see a catastrophic loss of precision, 1750 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the 1751 * bits of y in the final result. To minimize this, compute as many 1752 * digits as we can. 1753 * ??? This case needs another algorithm to avoid this. 1754 */ 1755 n = fmt->frac_size * 2 + 2; 1756 /* Don't compute a value overlapping the sticky bit */ 1757 n = MIN(n, 62); 1758 } 1759 1760 for (i = 0; i < n; i++) { 1761 if (a1) { 1762 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); 1763 } else if (a0 & 0xffffffffull) { 1764 mul64To128(a0, a0, &a0, &a1); 1765 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { 1766 a0 >>= 32; 1767 a0 *= a0; 1768 } else { 1769 goto exact; 1770 } 1771 1772 if (a0 & DECOMPOSED_IMPLICIT_BIT) { 1773 if (unlikely(a_exp == 0 && r == 0)) { 1774 /* 1775 * When a_exp == 0, we're computing the log2 of a value 1776 * [1.0,2.0). When the value is very close to 1.0, there 1777 * are lots of 0's in the msb parts of the fraction. 1778 * We need to compute more digits to produce a correct 1779 * result -- restart at the top of the fraction. 1780 * ??? This is likely to lose precision quickly, as for 1781 * float128; we may need another method. 1782 */ 1783 f_exp -= i; 1784 t = r = DECOMPOSED_IMPLICIT_BIT; 1785 i = 0; 1786 } else { 1787 r |= t; 1788 } 1789 } else { 1790 add128(a0, a1, a0, a1, &a0, &a1); 1791 } 1792 t >>= 1; 1793 } 1794 1795 /* Set sticky for inexact. */ 1796 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); 1797 1798 exact: 1799 parts_sint_to_float(a, a_exp, 0, s); 1800 if (r == 0) { 1801 return; 1802 } 1803 1804 memset(&f, 0, sizeof(f)); 1805 f.cls = float_class_normal; 1806 f.frac_hi = r; 1807 f.exp = f_exp - frac_normalize(&f); 1808 1809 if (a_exp < 0) { 1810 parts_sub_normal(a, &f); 1811 } else if (a_exp > 0) { 1812 parts_add_normal(a, &f); 1813 } else { 1814 *a = f; 1815 } 1816 return; 1817 1818 d_nan: 1819 float_raise(float_flag_invalid, s); 1820 parts_default_nan(a, s); 1821} 1822