1 /* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18 /* 19 =============================================================================== 20 This C source file is part of the SoftFloat IEC/IEEE Floating-point 21 Arithmetic Package, Release 2a. 22 23 Written by John R. Hauser. This work was made possible in part by the 24 International Computer Science Institute, located at Suite 600, 1947 Center 25 Street, Berkeley, California 94704. Funding was partially provided by the 26 National Science Foundation under grant MIP-9311980. The original version 27 of this code was written as part of a project to build a fixed-point vector 28 processor in collaboration with the University of California at Berkeley, 29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 arithmetic/SoftFloat.html'. 32 33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 39 Derivative works are acceptable, even for commercial purposes, so long as 40 (1) they include prominent notice that the work is derivative, and (2) they 41 include prominent notice akin to these four paragraphs for those parts of 42 this code that are retained. 43 44 =============================================================================== 45 */ 46 47 /* BSD licensing: 48 * Copyright (c) 2006, Fabrice Bellard 49 * All rights reserved. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright notice, 55 * this list of conditions and the following disclaimer. 56 * 57 * 2. Redistributions in binary form must reproduce the above copyright notice, 58 * this list of conditions and the following disclaimer in the documentation 59 * and/or other materials provided with the distribution. 60 * 61 * 3. Neither the name of the copyright holder nor the names of its contributors 62 * may be used to endorse or promote products derived from this software without 63 * specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 * THE POSSIBILITY OF SUCH DAMAGE. 76 */ 77 78 /* Portions of this work are licensed under the terms of the GNU GPL, 79 * version 2 or later. See the COPYING file in the top-level directory. 80 */ 81 82 /* softfloat (and in particular the code in softfloat-specialize.h) is 83 * target-dependent and needs the TARGET_* macros. 84 */ 85 #include "qemu/osdep.h" 86 #include <math.h> 87 #include "qemu/bitops.h" 88 #include "fpu/softfloat.h" 89 90 /* We only need stdlib for abort() */ 91 92 /*---------------------------------------------------------------------------- 93 | Primitive arithmetic functions, including multi-word arithmetic, and 94 | division and square root approximations. (Can be specialized to target if 95 | desired.) 96 *----------------------------------------------------------------------------*/ 97 #include "fpu/softfloat-macros.h" 98 99 /* 100 * Hardfloat 101 * 102 * Fast emulation of guest FP instructions is challenging for two reasons. 103 * First, FP instruction semantics are similar but not identical, particularly 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP 105 * exception flags is not trivial: reading the host's flags register with a 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp], 107 * and trapping on every FP exception is not fast nor pleasant to work with. 108 * 109 * We address these challenges by leveraging the host FPU for a subset of the 110 * operations. To do this we expand on the idea presented in this paper: 111 * 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615. 114 * 115 * The idea is thus to leverage the host FPU to (1) compute FP operations 116 * and (2) identify whether FP exceptions occurred while avoiding 117 * expensive exception flag register accesses. 118 * 119 * An important optimization shown in the paper is that given that exception 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags. 121 * This is particularly useful for the inexact flag, which is very frequently 122 * raised in floating-point workloads. 123 * 124 * We optimize the code further by deferring to soft-fp whenever FP exception 125 * detection might get hairy. Two examples: (1) when at least one operand is 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result 127 * and the result is < the minimum normal. 128 */ 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \ 130 static inline void name(soft_t *a, float_status *s) \ 131 { \ 132 if (unlikely(soft_t ## _is_denormal(*a))) { \ 133 *a = soft_t ## _set_sign(soft_t ## _zero, \ 134 soft_t ## _is_neg(*a)); \ 135 float_raise(float_flag_input_denormal, s); \ 136 } \ 137 } 138 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32) 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64) 141 #undef GEN_INPUT_FLUSH__NOCHECK 142 143 #define GEN_INPUT_FLUSH1(name, soft_t) \ 144 static inline void name(soft_t *a, float_status *s) \ 145 { \ 146 if (likely(!s->flush_inputs_to_zero)) { \ 147 return; \ 148 } \ 149 soft_t ## _input_flush__nocheck(a, s); \ 150 } 151 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32) 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64) 154 #undef GEN_INPUT_FLUSH1 155 156 #define GEN_INPUT_FLUSH2(name, soft_t) \ 157 static inline void name(soft_t *a, soft_t *b, float_status *s) \ 158 { \ 159 if (likely(!s->flush_inputs_to_zero)) { \ 160 return; \ 161 } \ 162 soft_t ## _input_flush__nocheck(a, s); \ 163 soft_t ## _input_flush__nocheck(b, s); \ 164 } 165 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32) 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64) 168 #undef GEN_INPUT_FLUSH2 169 170 #define GEN_INPUT_FLUSH3(name, soft_t) \ 171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \ 172 { \ 173 if (likely(!s->flush_inputs_to_zero)) { \ 174 return; \ 175 } \ 176 soft_t ## _input_flush__nocheck(a, s); \ 177 soft_t ## _input_flush__nocheck(b, s); \ 178 soft_t ## _input_flush__nocheck(c, s); \ 179 } 180 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32) 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64) 183 #undef GEN_INPUT_FLUSH3 184 185 /* 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated 187 * hardfloat functions. Each combination of number of inputs and float size 188 * gets its own value. 189 */ 190 #if defined(__x86_64__) 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1 197 #else 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0 204 #endif 205 206 /* 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over 208 * float{32,64}_is_infinity when !USE_FP. 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup. 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%. 211 */ 212 #if defined(__x86_64__) || defined(__aarch64__) 213 # define QEMU_HARDFLOAT_USE_ISINF 1 214 #else 215 # define QEMU_HARDFLOAT_USE_ISINF 0 216 #endif 217 218 /* 219 * Some targets clear the FP flags before most FP operations. This prevents 220 * the use of hardfloat, since hardfloat relies on the inexact flag being 221 * already set. 222 */ 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__) 224 # if defined(__FAST_MATH__) 225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \ 226 IEEE implementation 227 # endif 228 # define QEMU_NO_HARDFLOAT 1 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN 230 #else 231 # define QEMU_NO_HARDFLOAT 0 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline)) 233 #endif 234 235 static inline bool can_use_fpu(const float_status *s) 236 { 237 if (QEMU_NO_HARDFLOAT) { 238 return false; 239 } 240 return likely(s->float_exception_flags & float_flag_inexact && 241 s->float_rounding_mode == float_round_nearest_even); 242 } 243 244 /* 245 * Hardfloat generation functions. Each operation can have two flavors: 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for 247 * most condition checks, or native ones (e.g. fpclassify). 248 * 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the 250 * compiler to propagate constants and inline everything into the callers. 251 * 252 * We only generate functions for operations with two inputs, since only 253 * these are common enough to justify consolidating them into common code. 254 */ 255 256 typedef union { 257 float32 s; 258 float h; 259 } union_float32; 260 261 typedef union { 262 float64 s; 263 double h; 264 } union_float64; 265 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b); 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b); 268 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s); 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s); 271 typedef float (*hard_f32_op2_fn)(float a, float b); 272 typedef double (*hard_f64_op2_fn)(double a, double b); 273 274 /* 2-input is-zero-or-normal */ 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b) 276 { 277 if (QEMU_HARDFLOAT_2F32_USE_FP) { 278 /* 279 * Not using a temp variable for consecutive fpclassify calls ends up 280 * generating faster code. 281 */ 282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 284 } 285 return float32_is_zero_or_normal(a.s) && 286 float32_is_zero_or_normal(b.s); 287 } 288 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b) 290 { 291 if (QEMU_HARDFLOAT_2F64_USE_FP) { 292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 294 } 295 return float64_is_zero_or_normal(a.s) && 296 float64_is_zero_or_normal(b.s); 297 } 298 299 /* 3-input is-zero-or-normal */ 300 static inline 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c) 302 { 303 if (QEMU_HARDFLOAT_3F32_USE_FP) { 304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 307 } 308 return float32_is_zero_or_normal(a.s) && 309 float32_is_zero_or_normal(b.s) && 310 float32_is_zero_or_normal(c.s); 311 } 312 313 static inline 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c) 315 { 316 if (QEMU_HARDFLOAT_3F64_USE_FP) { 317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 320 } 321 return float64_is_zero_or_normal(a.s) && 322 float64_is_zero_or_normal(b.s) && 323 float64_is_zero_or_normal(c.s); 324 } 325 326 static inline bool f32_is_inf(union_float32 a) 327 { 328 if (QEMU_HARDFLOAT_USE_ISINF) { 329 return isinf(a.h); 330 } 331 return float32_is_infinity(a.s); 332 } 333 334 static inline bool f64_is_inf(union_float64 a) 335 { 336 if (QEMU_HARDFLOAT_USE_ISINF) { 337 return isinf(a.h); 338 } 339 return float64_is_infinity(a.s); 340 } 341 342 static inline float32 343 float32_gen2(float32 xa, float32 xb, float_status *s, 344 hard_f32_op2_fn hard, soft_f32_op2_fn soft, 345 f32_check_fn pre, f32_check_fn post) 346 { 347 union_float32 ua, ub, ur; 348 349 ua.s = xa; 350 ub.s = xb; 351 352 if (unlikely(!can_use_fpu(s))) { 353 goto soft; 354 } 355 356 float32_input_flush2(&ua.s, &ub.s, s); 357 if (unlikely(!pre(ua, ub))) { 358 goto soft; 359 } 360 361 ur.h = hard(ua.h, ub.h); 362 if (unlikely(f32_is_inf(ur))) { 363 float_raise(float_flag_overflow, s); 364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { 365 goto soft; 366 } 367 return ur.s; 368 369 soft: 370 return soft(ua.s, ub.s, s); 371 } 372 373 static inline float64 374 float64_gen2(float64 xa, float64 xb, float_status *s, 375 hard_f64_op2_fn hard, soft_f64_op2_fn soft, 376 f64_check_fn pre, f64_check_fn post) 377 { 378 union_float64 ua, ub, ur; 379 380 ua.s = xa; 381 ub.s = xb; 382 383 if (unlikely(!can_use_fpu(s))) { 384 goto soft; 385 } 386 387 float64_input_flush2(&ua.s, &ub.s, s); 388 if (unlikely(!pre(ua, ub))) { 389 goto soft; 390 } 391 392 ur.h = hard(ua.h, ub.h); 393 if (unlikely(f64_is_inf(ur))) { 394 float_raise(float_flag_overflow, s); 395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { 396 goto soft; 397 } 398 return ur.s; 399 400 soft: 401 return soft(ua.s, ub.s, s); 402 } 403 404 /*---------------------------------------------------------------------------- 405 | Returns the fraction bits of the single-precision floating-point value `a'. 406 *----------------------------------------------------------------------------*/ 407 408 static inline uint32_t extractFloat32Frac(float32 a) 409 { 410 return float32_val(a) & 0x007FFFFF; 411 } 412 413 /*---------------------------------------------------------------------------- 414 | Returns the exponent bits of the single-precision floating-point value `a'. 415 *----------------------------------------------------------------------------*/ 416 417 static inline int extractFloat32Exp(float32 a) 418 { 419 return (float32_val(a) >> 23) & 0xFF; 420 } 421 422 /*---------------------------------------------------------------------------- 423 | Returns the sign bit of the single-precision floating-point value `a'. 424 *----------------------------------------------------------------------------*/ 425 426 static inline bool extractFloat32Sign(float32 a) 427 { 428 return float32_val(a) >> 31; 429 } 430 431 /*---------------------------------------------------------------------------- 432 | Returns the fraction bits of the double-precision floating-point value `a'. 433 *----------------------------------------------------------------------------*/ 434 435 static inline uint64_t extractFloat64Frac(float64 a) 436 { 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF); 438 } 439 440 /*---------------------------------------------------------------------------- 441 | Returns the exponent bits of the double-precision floating-point value `a'. 442 *----------------------------------------------------------------------------*/ 443 444 static inline int extractFloat64Exp(float64 a) 445 { 446 return (float64_val(a) >> 52) & 0x7FF; 447 } 448 449 /*---------------------------------------------------------------------------- 450 | Returns the sign bit of the double-precision floating-point value `a'. 451 *----------------------------------------------------------------------------*/ 452 453 static inline bool extractFloat64Sign(float64 a) 454 { 455 return float64_val(a) >> 63; 456 } 457 458 /* 459 * Classify a floating point number. Everything above float_class_qnan 460 * is a NaN so cls >= float_class_qnan is any NaN. 461 */ 462 463 typedef enum __attribute__ ((__packed__)) { 464 float_class_unclassified, 465 float_class_zero, 466 float_class_normal, 467 float_class_inf, 468 float_class_qnan, /* all NaNs from here */ 469 float_class_snan, 470 } FloatClass; 471 472 #define float_cmask(bit) (1u << (bit)) 473 474 enum { 475 float_cmask_zero = float_cmask(float_class_zero), 476 float_cmask_normal = float_cmask(float_class_normal), 477 float_cmask_inf = float_cmask(float_class_inf), 478 float_cmask_qnan = float_cmask(float_class_qnan), 479 float_cmask_snan = float_cmask(float_class_snan), 480 481 float_cmask_infzero = float_cmask_zero | float_cmask_inf, 482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan, 483 }; 484 485 /* Flags for parts_minmax. */ 486 enum { 487 /* Set for minimum; clear for maximum. */ 488 minmax_ismin = 1, 489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */ 490 minmax_isnum = 2, 491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */ 492 minmax_ismag = 4, 493 }; 494 495 /* Simple helpers for checking if, or what kind of, NaN we have */ 496 static inline __attribute__((unused)) bool is_nan(FloatClass c) 497 { 498 return unlikely(c >= float_class_qnan); 499 } 500 501 static inline __attribute__((unused)) bool is_snan(FloatClass c) 502 { 503 return c == float_class_snan; 504 } 505 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c) 507 { 508 return c == float_class_qnan; 509 } 510 511 /* 512 * Structure holding all of the decomposed parts of a float. 513 * The exponent is unbiased and the fraction is normalized. 514 * 515 * The fraction words are stored in big-endian word ordering, 516 * so that truncation from a larger format to a smaller format 517 * can be done simply by ignoring subsequent elements. 518 */ 519 520 typedef struct { 521 FloatClass cls; 522 bool sign; 523 int32_t exp; 524 union { 525 /* Routines that know the structure may reference the singular name. */ 526 uint64_t frac; 527 /* 528 * Routines expanded with multiple structures reference "hi" and "lo" 529 * depending on the operation. In FloatParts64, "hi" and "lo" are 530 * both the same word and aliased here. 531 */ 532 uint64_t frac_hi; 533 uint64_t frac_lo; 534 }; 535 } FloatParts64; 536 537 typedef struct { 538 FloatClass cls; 539 bool sign; 540 int32_t exp; 541 uint64_t frac_hi; 542 uint64_t frac_lo; 543 } FloatParts128; 544 545 typedef struct { 546 FloatClass cls; 547 bool sign; 548 int32_t exp; 549 uint64_t frac_hi; 550 uint64_t frac_hm; /* high-middle */ 551 uint64_t frac_lm; /* low-middle */ 552 uint64_t frac_lo; 553 } FloatParts256; 554 555 /* These apply to the most significant word of each FloatPartsN. */ 556 #define DECOMPOSED_BINARY_POINT 63 557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) 558 559 /* Structure holding all of the relevant parameters for a format. 560 * exp_size: the size of the exponent field 561 * exp_bias: the offset applied to the exponent field 562 * exp_max: the maximum normalised exponent 563 * frac_size: the size of the fraction field 564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT 565 * The following are computed based the size of fraction 566 * frac_lsb: least significant bit of fraction 567 * frac_lsbm1: the bit below the least significant bit (for rounding) 568 * round_mask/roundeven_mask: masks used for rounding 569 * The following optional modifiers are available: 570 * arm_althp: handle ARM Alternative Half Precision 571 */ 572 typedef struct { 573 int exp_size; 574 int exp_bias; 575 int exp_max; 576 int frac_size; 577 int frac_shift; 578 uint64_t frac_lsb; 579 uint64_t frac_lsbm1; 580 uint64_t round_mask; 581 uint64_t roundeven_mask; 582 bool arm_althp; 583 } FloatFmt; 584 585 /* Expand fields based on the size of exponent and fraction */ 586 #define FLOAT_PARAMS(E, F) \ 587 .exp_size = E, \ 588 .exp_bias = ((1 << E) - 1) >> 1, \ 589 .exp_max = (1 << E) - 1, \ 590 .frac_size = F, \ 591 .frac_shift = (-F - 1) & 63, \ 592 .frac_lsb = 1ull << ((-F - 1) & 63), \ 593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \ 594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \ 595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1 596 597 static const FloatFmt float16_params = { 598 FLOAT_PARAMS(5, 10) 599 }; 600 601 static const FloatFmt float16_params_ahp = { 602 FLOAT_PARAMS(5, 10), 603 .arm_althp = true 604 }; 605 606 static const FloatFmt bfloat16_params = { 607 FLOAT_PARAMS(8, 7) 608 }; 609 610 static const FloatFmt float32_params = { 611 FLOAT_PARAMS(8, 23) 612 }; 613 614 static const FloatFmt float64_params = { 615 FLOAT_PARAMS(11, 52) 616 }; 617 618 static const FloatFmt float128_params = { 619 FLOAT_PARAMS(15, 112) 620 }; 621 622 /* Unpack a float to parts, but do not canonicalize. */ 623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) 624 { 625 const int f_size = fmt->frac_size; 626 const int e_size = fmt->exp_size; 627 628 *r = (FloatParts64) { 629 .cls = float_class_unclassified, 630 .sign = extract64(raw, f_size + e_size, 1), 631 .exp = extract64(raw, f_size, e_size), 632 .frac = extract64(raw, 0, f_size) 633 }; 634 } 635 636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f) 637 { 638 unpack_raw64(p, &float16_params, f); 639 } 640 641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) 642 { 643 unpack_raw64(p, &bfloat16_params, f); 644 } 645 646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f) 647 { 648 unpack_raw64(p, &float32_params, f); 649 } 650 651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f) 652 { 653 unpack_raw64(p, &float64_params, f); 654 } 655 656 static void float128_unpack_raw(FloatParts128 *p, float128 f) 657 { 658 const int f_size = float128_params.frac_size - 64; 659 const int e_size = float128_params.exp_size; 660 661 *p = (FloatParts128) { 662 .cls = float_class_unclassified, 663 .sign = extract64(f.high, f_size + e_size, 1), 664 .exp = extract64(f.high, f_size, e_size), 665 .frac_hi = extract64(f.high, 0, f_size), 666 .frac_lo = f.low, 667 }; 668 } 669 670 /* Pack a float from parts, but do not canonicalize. */ 671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) 672 { 673 const int f_size = fmt->frac_size; 674 const int e_size = fmt->exp_size; 675 uint64_t ret; 676 677 ret = (uint64_t)p->sign << (f_size + e_size); 678 ret = deposit64(ret, f_size, e_size, p->exp); 679 ret = deposit64(ret, 0, f_size, p->frac); 680 return ret; 681 } 682 683 static inline float16 float16_pack_raw(const FloatParts64 *p) 684 { 685 return make_float16(pack_raw64(p, &float16_params)); 686 } 687 688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p) 689 { 690 return pack_raw64(p, &bfloat16_params); 691 } 692 693 static inline float32 float32_pack_raw(const FloatParts64 *p) 694 { 695 return make_float32(pack_raw64(p, &float32_params)); 696 } 697 698 static inline float64 float64_pack_raw(const FloatParts64 *p) 699 { 700 return make_float64(pack_raw64(p, &float64_params)); 701 } 702 703 static float128 float128_pack_raw(const FloatParts128 *p) 704 { 705 const int f_size = float128_params.frac_size - 64; 706 const int e_size = float128_params.exp_size; 707 uint64_t hi; 708 709 hi = (uint64_t)p->sign << (f_size + e_size); 710 hi = deposit64(hi, f_size, e_size, p->exp); 711 hi = deposit64(hi, 0, f_size, p->frac_hi); 712 return make_float128(hi, p->frac_lo); 713 } 714 715 /*---------------------------------------------------------------------------- 716 | Functions and definitions to determine: (1) whether tininess for underflow 717 | is detected before or after rounding by default, (2) what (if anything) 718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished 719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 720 | are propagated from function inputs to output. These details are target- 721 | specific. 722 *----------------------------------------------------------------------------*/ 723 #include "softfloat-specialize.c.inc" 724 725 #define PARTS_GENERIC_64_128(NAME, P) \ 726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME) 727 728 #define PARTS_GENERIC_64_128_256(NAME, P) \ 729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \ 730 (FloatParts128 *, parts128_##NAME), parts64_##NAME) 731 732 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S) 733 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S) 734 735 static void parts64_return_nan(FloatParts64 *a, float_status *s); 736 static void parts128_return_nan(FloatParts128 *a, float_status *s); 737 738 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S) 739 740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b, 741 float_status *s); 742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b, 743 float_status *s); 744 745 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S) 746 747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b, 748 FloatParts64 *c, float_status *s, 749 int ab_mask, int abc_mask); 750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a, 751 FloatParts128 *b, 752 FloatParts128 *c, 753 float_status *s, 754 int ab_mask, int abc_mask); 755 756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \ 757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM) 758 759 static void parts64_canonicalize(FloatParts64 *p, float_status *status, 760 const FloatFmt *fmt); 761 static void parts128_canonicalize(FloatParts128 *p, float_status *status, 762 const FloatFmt *fmt); 763 764 #define parts_canonicalize(A, S, F) \ 765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F) 766 767 static void parts64_uncanon(FloatParts64 *p, float_status *status, 768 const FloatFmt *fmt); 769 static void parts128_uncanon(FloatParts128 *p, float_status *status, 770 const FloatFmt *fmt); 771 772 #define parts_uncanon(A, S, F) \ 773 PARTS_GENERIC_64_128(uncanon, A)(A, S, F) 774 775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b); 776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b); 777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b); 778 779 #define parts_add_normal(A, B) \ 780 PARTS_GENERIC_64_128_256(add_normal, A)(A, B) 781 782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b); 783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b); 784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b); 785 786 #define parts_sub_normal(A, B) \ 787 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B) 788 789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b, 790 float_status *s, bool subtract); 791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b, 792 float_status *s, bool subtract); 793 794 #define parts_addsub(A, B, S, Z) \ 795 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z) 796 797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b, 798 float_status *s); 799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b, 800 float_status *s); 801 802 #define parts_mul(A, B, S) \ 803 PARTS_GENERIC_64_128(mul, A)(A, B, S) 804 805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b, 806 FloatParts64 *c, int flags, 807 float_status *s); 808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b, 809 FloatParts128 *c, int flags, 810 float_status *s); 811 812 #define parts_muladd(A, B, C, Z, S) \ 813 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S) 814 815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b, 816 float_status *s); 817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b, 818 float_status *s); 819 820 #define parts_div(A, B, S) \ 821 PARTS_GENERIC_64_128(div, A)(A, B, S) 822 823 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm, 824 int scale, int frac_size); 825 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r, 826 int scale, int frac_size); 827 828 #define parts_round_to_int_normal(A, R, C, F) \ 829 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F) 830 831 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm, 832 int scale, float_status *s, 833 const FloatFmt *fmt); 834 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r, 835 int scale, float_status *s, 836 const FloatFmt *fmt); 837 838 #define parts_round_to_int(A, R, C, S, F) \ 839 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F) 840 841 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode, 842 int scale, int64_t min, int64_t max, 843 float_status *s); 844 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode, 845 int scale, int64_t min, int64_t max, 846 float_status *s); 847 848 #define parts_float_to_sint(P, R, Z, MN, MX, S) \ 849 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S) 850 851 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode, 852 int scale, uint64_t max, 853 float_status *s); 854 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode, 855 int scale, uint64_t max, 856 float_status *s); 857 858 #define parts_float_to_uint(P, R, Z, M, S) \ 859 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S) 860 861 static void parts64_sint_to_float(FloatParts64 *p, int64_t a, 862 int scale, float_status *s); 863 static void parts128_sint_to_float(FloatParts128 *p, int64_t a, 864 int scale, float_status *s); 865 866 #define parts_sint_to_float(P, I, Z, S) \ 867 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S) 868 869 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a, 870 int scale, float_status *s); 871 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a, 872 int scale, float_status *s); 873 874 #define parts_uint_to_float(P, I, Z, S) \ 875 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S) 876 877 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b, 878 float_status *s, int flags); 879 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b, 880 float_status *s, int flags); 881 882 #define parts_minmax(A, B, S, F) \ 883 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F) 884 885 /* 886 * Helper functions for softfloat-parts.c.inc, per-size operations. 887 */ 888 889 #define FRAC_GENERIC_64_128(NAME, P) \ 890 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME) 891 892 #define FRAC_GENERIC_64_128_256(NAME, P) \ 893 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \ 894 (FloatParts128 *, frac128_##NAME), frac64_##NAME) 895 896 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 897 { 898 return uadd64_overflow(a->frac, b->frac, &r->frac); 899 } 900 901 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 902 { 903 bool c = 0; 904 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 905 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 906 return c; 907 } 908 909 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 910 { 911 bool c = 0; 912 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 913 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c); 914 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c); 915 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 916 return c; 917 } 918 919 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B) 920 921 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c) 922 { 923 return uadd64_overflow(a->frac, c, &r->frac); 924 } 925 926 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c) 927 { 928 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo); 929 return uadd64_overflow(a->frac_hi, c, &r->frac_hi); 930 } 931 932 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C) 933 934 static void frac64_allones(FloatParts64 *a) 935 { 936 a->frac = -1; 937 } 938 939 static void frac128_allones(FloatParts128 *a) 940 { 941 a->frac_hi = a->frac_lo = -1; 942 } 943 944 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A) 945 946 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b) 947 { 948 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1; 949 } 950 951 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b) 952 { 953 uint64_t ta = a->frac_hi, tb = b->frac_hi; 954 if (ta == tb) { 955 ta = a->frac_lo, tb = b->frac_lo; 956 if (ta == tb) { 957 return 0; 958 } 959 } 960 return ta < tb ? -1 : 1; 961 } 962 963 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B) 964 965 static void frac64_clear(FloatParts64 *a) 966 { 967 a->frac = 0; 968 } 969 970 static void frac128_clear(FloatParts128 *a) 971 { 972 a->frac_hi = a->frac_lo = 0; 973 } 974 975 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A) 976 977 static bool frac64_div(FloatParts64 *a, FloatParts64 *b) 978 { 979 uint64_t n1, n0, r, q; 980 bool ret; 981 982 /* 983 * We want a 2*N / N-bit division to produce exactly an N-bit 984 * result, so that we do not lose any precision and so that we 985 * do not have to renormalize afterward. If A.frac < B.frac, 986 * then division would produce an (N-1)-bit result; shift A left 987 * by one to produce the an N-bit result, and return true to 988 * decrement the exponent to match. 989 * 990 * The udiv_qrnnd algorithm that we're using requires normalization, 991 * i.e. the msb of the denominator must be set, which is already true. 992 */ 993 ret = a->frac < b->frac; 994 if (ret) { 995 n0 = a->frac; 996 n1 = 0; 997 } else { 998 n0 = a->frac >> 1; 999 n1 = a->frac << 63; 1000 } 1001 q = udiv_qrnnd(&r, n0, n1, b->frac); 1002 1003 /* Set lsb if there is a remainder, to set inexact. */ 1004 a->frac = q | (r != 0); 1005 1006 return ret; 1007 } 1008 1009 static bool frac128_div(FloatParts128 *a, FloatParts128 *b) 1010 { 1011 uint64_t q0, q1, a0, a1, b0, b1; 1012 uint64_t r0, r1, r2, r3, t0, t1, t2, t3; 1013 bool ret = false; 1014 1015 a0 = a->frac_hi, a1 = a->frac_lo; 1016 b0 = b->frac_hi, b1 = b->frac_lo; 1017 1018 ret = lt128(a0, a1, b0, b1); 1019 if (!ret) { 1020 a1 = shr_double(a0, a1, 1); 1021 a0 = a0 >> 1; 1022 } 1023 1024 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */ 1025 q0 = estimateDiv128To64(a0, a1, b0); 1026 1027 /* 1028 * Estimate is high because B1 was not included (unless B1 == 0). 1029 * Reduce quotient and increase remainder until remainder is non-negative. 1030 * This loop will execute 0 to 2 times. 1031 */ 1032 mul128By64To192(b0, b1, q0, &t0, &t1, &t2); 1033 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2); 1034 while (r0 != 0) { 1035 q0--; 1036 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2); 1037 } 1038 1039 /* Repeat using the remainder, producing a second word of quotient. */ 1040 q1 = estimateDiv128To64(r1, r2, b0); 1041 mul128By64To192(b0, b1, q1, &t1, &t2, &t3); 1042 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3); 1043 while (r1 != 0) { 1044 q1--; 1045 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3); 1046 } 1047 1048 /* Any remainder indicates inexact; set sticky bit. */ 1049 q1 |= (r2 | r3) != 0; 1050 1051 a->frac_hi = q0; 1052 a->frac_lo = q1; 1053 return ret; 1054 } 1055 1056 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B) 1057 1058 static bool frac64_eqz(FloatParts64 *a) 1059 { 1060 return a->frac == 0; 1061 } 1062 1063 static bool frac128_eqz(FloatParts128 *a) 1064 { 1065 return (a->frac_hi | a->frac_lo) == 0; 1066 } 1067 1068 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A) 1069 1070 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b) 1071 { 1072 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac); 1073 } 1074 1075 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b) 1076 { 1077 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo, 1078 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo); 1079 } 1080 1081 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B) 1082 1083 static void frac64_neg(FloatParts64 *a) 1084 { 1085 a->frac = -a->frac; 1086 } 1087 1088 static void frac128_neg(FloatParts128 *a) 1089 { 1090 bool c = 0; 1091 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1092 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1093 } 1094 1095 static void frac256_neg(FloatParts256 *a) 1096 { 1097 bool c = 0; 1098 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1099 a->frac_lm = usub64_borrow(0, a->frac_lm, &c); 1100 a->frac_hm = usub64_borrow(0, a->frac_hm, &c); 1101 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1102 } 1103 1104 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A) 1105 1106 static int frac64_normalize(FloatParts64 *a) 1107 { 1108 if (a->frac) { 1109 int shift = clz64(a->frac); 1110 a->frac <<= shift; 1111 return shift; 1112 } 1113 return 64; 1114 } 1115 1116 static int frac128_normalize(FloatParts128 *a) 1117 { 1118 if (a->frac_hi) { 1119 int shl = clz64(a->frac_hi); 1120 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); 1121 a->frac_lo <<= shl; 1122 return shl; 1123 } else if (a->frac_lo) { 1124 int shl = clz64(a->frac_lo); 1125 a->frac_hi = a->frac_lo << shl; 1126 a->frac_lo = 0; 1127 return shl + 64; 1128 } 1129 return 128; 1130 } 1131 1132 static int frac256_normalize(FloatParts256 *a) 1133 { 1134 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1135 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1136 int ret, shl; 1137 1138 if (likely(a0)) { 1139 shl = clz64(a0); 1140 if (shl == 0) { 1141 return 0; 1142 } 1143 ret = shl; 1144 } else { 1145 if (a1) { 1146 ret = 64; 1147 a0 = a1, a1 = a2, a2 = a3, a3 = 0; 1148 } else if (a2) { 1149 ret = 128; 1150 a0 = a2, a1 = a3, a2 = 0, a3 = 0; 1151 } else if (a3) { 1152 ret = 192; 1153 a0 = a3, a1 = 0, a2 = 0, a3 = 0; 1154 } else { 1155 ret = 256; 1156 a0 = 0, a1 = 0, a2 = 0, a3 = 0; 1157 goto done; 1158 } 1159 shl = clz64(a0); 1160 if (shl == 0) { 1161 goto done; 1162 } 1163 ret += shl; 1164 } 1165 1166 a0 = shl_double(a0, a1, shl); 1167 a1 = shl_double(a1, a2, shl); 1168 a2 = shl_double(a2, a3, shl); 1169 a3 <<= shl; 1170 1171 done: 1172 a->frac_hi = a0; 1173 a->frac_hm = a1; 1174 a->frac_lm = a2; 1175 a->frac_lo = a3; 1176 return ret; 1177 } 1178 1179 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A) 1180 1181 static void frac64_shl(FloatParts64 *a, int c) 1182 { 1183 a->frac <<= c; 1184 } 1185 1186 static void frac128_shl(FloatParts128 *a, int c) 1187 { 1188 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1189 1190 if (c & 64) { 1191 a0 = a1, a1 = 0; 1192 } 1193 1194 c &= 63; 1195 if (c) { 1196 a0 = shl_double(a0, a1, c); 1197 a1 = a1 << c; 1198 } 1199 1200 a->frac_hi = a0; 1201 a->frac_lo = a1; 1202 } 1203 1204 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C) 1205 1206 static void frac64_shr(FloatParts64 *a, int c) 1207 { 1208 a->frac >>= c; 1209 } 1210 1211 static void frac128_shr(FloatParts128 *a, int c) 1212 { 1213 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1214 1215 if (c & 64) { 1216 a1 = a0, a0 = 0; 1217 } 1218 1219 c &= 63; 1220 if (c) { 1221 a1 = shr_double(a0, a1, c); 1222 a0 = a0 >> c; 1223 } 1224 1225 a->frac_hi = a0; 1226 a->frac_lo = a1; 1227 } 1228 1229 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C) 1230 1231 static void frac64_shrjam(FloatParts64 *a, int c) 1232 { 1233 uint64_t a0 = a->frac; 1234 1235 if (likely(c != 0)) { 1236 if (likely(c < 64)) { 1237 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0); 1238 } else { 1239 a0 = a0 != 0; 1240 } 1241 a->frac = a0; 1242 } 1243 } 1244 1245 static void frac128_shrjam(FloatParts128 *a, int c) 1246 { 1247 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1248 uint64_t sticky = 0; 1249 1250 if (unlikely(c == 0)) { 1251 return; 1252 } else if (likely(c < 64)) { 1253 /* nothing */ 1254 } else if (likely(c < 128)) { 1255 sticky = a1; 1256 a1 = a0; 1257 a0 = 0; 1258 c &= 63; 1259 if (c == 0) { 1260 goto done; 1261 } 1262 } else { 1263 sticky = a0 | a1; 1264 a0 = a1 = 0; 1265 goto done; 1266 } 1267 1268 sticky |= shr_double(a1, 0, c); 1269 a1 = shr_double(a0, a1, c); 1270 a0 = a0 >> c; 1271 1272 done: 1273 a->frac_lo = a1 | (sticky != 0); 1274 a->frac_hi = a0; 1275 } 1276 1277 static void frac256_shrjam(FloatParts256 *a, int c) 1278 { 1279 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1280 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1281 uint64_t sticky = 0; 1282 1283 if (unlikely(c == 0)) { 1284 return; 1285 } else if (likely(c < 64)) { 1286 /* nothing */ 1287 } else if (likely(c < 256)) { 1288 if (unlikely(c & 128)) { 1289 sticky |= a2 | a3; 1290 a3 = a1, a2 = a0, a1 = 0, a0 = 0; 1291 } 1292 if (unlikely(c & 64)) { 1293 sticky |= a3; 1294 a3 = a2, a2 = a1, a1 = a0, a0 = 0; 1295 } 1296 c &= 63; 1297 if (c == 0) { 1298 goto done; 1299 } 1300 } else { 1301 sticky = a0 | a1 | a2 | a3; 1302 a0 = a1 = a2 = a3 = 0; 1303 goto done; 1304 } 1305 1306 sticky |= shr_double(a3, 0, c); 1307 a3 = shr_double(a2, a3, c); 1308 a2 = shr_double(a1, a2, c); 1309 a1 = shr_double(a0, a1, c); 1310 a0 = a0 >> c; 1311 1312 done: 1313 a->frac_lo = a3 | (sticky != 0); 1314 a->frac_lm = a2; 1315 a->frac_hm = a1; 1316 a->frac_hi = a0; 1317 } 1318 1319 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C) 1320 1321 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 1322 { 1323 return usub64_overflow(a->frac, b->frac, &r->frac); 1324 } 1325 1326 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 1327 { 1328 bool c = 0; 1329 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1330 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1331 return c; 1332 } 1333 1334 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 1335 { 1336 bool c = 0; 1337 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1338 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c); 1339 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c); 1340 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1341 return c; 1342 } 1343 1344 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B) 1345 1346 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a) 1347 { 1348 r->frac = a->frac_hi | (a->frac_lo != 0); 1349 } 1350 1351 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a) 1352 { 1353 r->frac_hi = a->frac_hi; 1354 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0); 1355 } 1356 1357 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A) 1358 1359 static void frac64_widen(FloatParts128 *r, FloatParts64 *a) 1360 { 1361 r->frac_hi = a->frac; 1362 r->frac_lo = 0; 1363 } 1364 1365 static void frac128_widen(FloatParts256 *r, FloatParts128 *a) 1366 { 1367 r->frac_hi = a->frac_hi; 1368 r->frac_hm = a->frac_lo; 1369 r->frac_lm = 0; 1370 r->frac_lo = 0; 1371 } 1372 1373 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B) 1374 1375 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME) 1376 #define FloatPartsN glue(FloatParts,N) 1377 #define FloatPartsW glue(FloatParts,W) 1378 1379 #define N 64 1380 #define W 128 1381 1382 #include "softfloat-parts-addsub.c.inc" 1383 #include "softfloat-parts.c.inc" 1384 1385 #undef N 1386 #undef W 1387 #define N 128 1388 #define W 256 1389 1390 #include "softfloat-parts-addsub.c.inc" 1391 #include "softfloat-parts.c.inc" 1392 1393 #undef N 1394 #undef W 1395 #define N 256 1396 1397 #include "softfloat-parts-addsub.c.inc" 1398 1399 #undef N 1400 #undef W 1401 #undef partsN 1402 #undef FloatPartsN 1403 #undef FloatPartsW 1404 1405 /* 1406 * Pack/unpack routines with a specific FloatFmt. 1407 */ 1408 1409 static void float16a_unpack_canonical(FloatParts64 *p, float16 f, 1410 float_status *s, const FloatFmt *params) 1411 { 1412 float16_unpack_raw(p, f); 1413 parts_canonicalize(p, s, params); 1414 } 1415 1416 static void float16_unpack_canonical(FloatParts64 *p, float16 f, 1417 float_status *s) 1418 { 1419 float16a_unpack_canonical(p, f, s, &float16_params); 1420 } 1421 1422 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f, 1423 float_status *s) 1424 { 1425 bfloat16_unpack_raw(p, f); 1426 parts_canonicalize(p, s, &bfloat16_params); 1427 } 1428 1429 static float16 float16a_round_pack_canonical(FloatParts64 *p, 1430 float_status *s, 1431 const FloatFmt *params) 1432 { 1433 parts_uncanon(p, s, params); 1434 return float16_pack_raw(p); 1435 } 1436 1437 static float16 float16_round_pack_canonical(FloatParts64 *p, 1438 float_status *s) 1439 { 1440 return float16a_round_pack_canonical(p, s, &float16_params); 1441 } 1442 1443 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p, 1444 float_status *s) 1445 { 1446 parts_uncanon(p, s, &bfloat16_params); 1447 return bfloat16_pack_raw(p); 1448 } 1449 1450 static void float32_unpack_canonical(FloatParts64 *p, float32 f, 1451 float_status *s) 1452 { 1453 float32_unpack_raw(p, f); 1454 parts_canonicalize(p, s, &float32_params); 1455 } 1456 1457 static float32 float32_round_pack_canonical(FloatParts64 *p, 1458 float_status *s) 1459 { 1460 parts_uncanon(p, s, &float32_params); 1461 return float32_pack_raw(p); 1462 } 1463 1464 static void float64_unpack_canonical(FloatParts64 *p, float64 f, 1465 float_status *s) 1466 { 1467 float64_unpack_raw(p, f); 1468 parts_canonicalize(p, s, &float64_params); 1469 } 1470 1471 static float64 float64_round_pack_canonical(FloatParts64 *p, 1472 float_status *s) 1473 { 1474 parts_uncanon(p, s, &float64_params); 1475 return float64_pack_raw(p); 1476 } 1477 1478 static void float128_unpack_canonical(FloatParts128 *p, float128 f, 1479 float_status *s) 1480 { 1481 float128_unpack_raw(p, f); 1482 parts_canonicalize(p, s, &float128_params); 1483 } 1484 1485 static float128 float128_round_pack_canonical(FloatParts128 *p, 1486 float_status *s) 1487 { 1488 parts_uncanon(p, s, &float128_params); 1489 return float128_pack_raw(p); 1490 } 1491 1492 /* 1493 * Addition and subtraction 1494 */ 1495 1496 static float16 QEMU_FLATTEN 1497 float16_addsub(float16 a, float16 b, float_status *status, bool subtract) 1498 { 1499 FloatParts64 pa, pb, *pr; 1500 1501 float16_unpack_canonical(&pa, a, status); 1502 float16_unpack_canonical(&pb, b, status); 1503 pr = parts_addsub(&pa, &pb, status, subtract); 1504 1505 return float16_round_pack_canonical(pr, status); 1506 } 1507 1508 float16 float16_add(float16 a, float16 b, float_status *status) 1509 { 1510 return float16_addsub(a, b, status, false); 1511 } 1512 1513 float16 float16_sub(float16 a, float16 b, float_status *status) 1514 { 1515 return float16_addsub(a, b, status, true); 1516 } 1517 1518 static float32 QEMU_SOFTFLOAT_ATTR 1519 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract) 1520 { 1521 FloatParts64 pa, pb, *pr; 1522 1523 float32_unpack_canonical(&pa, a, status); 1524 float32_unpack_canonical(&pb, b, status); 1525 pr = parts_addsub(&pa, &pb, status, subtract); 1526 1527 return float32_round_pack_canonical(pr, status); 1528 } 1529 1530 static float32 soft_f32_add(float32 a, float32 b, float_status *status) 1531 { 1532 return soft_f32_addsub(a, b, status, false); 1533 } 1534 1535 static float32 soft_f32_sub(float32 a, float32 b, float_status *status) 1536 { 1537 return soft_f32_addsub(a, b, status, true); 1538 } 1539 1540 static float64 QEMU_SOFTFLOAT_ATTR 1541 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract) 1542 { 1543 FloatParts64 pa, pb, *pr; 1544 1545 float64_unpack_canonical(&pa, a, status); 1546 float64_unpack_canonical(&pb, b, status); 1547 pr = parts_addsub(&pa, &pb, status, subtract); 1548 1549 return float64_round_pack_canonical(pr, status); 1550 } 1551 1552 static float64 soft_f64_add(float64 a, float64 b, float_status *status) 1553 { 1554 return soft_f64_addsub(a, b, status, false); 1555 } 1556 1557 static float64 soft_f64_sub(float64 a, float64 b, float_status *status) 1558 { 1559 return soft_f64_addsub(a, b, status, true); 1560 } 1561 1562 static float hard_f32_add(float a, float b) 1563 { 1564 return a + b; 1565 } 1566 1567 static float hard_f32_sub(float a, float b) 1568 { 1569 return a - b; 1570 } 1571 1572 static double hard_f64_add(double a, double b) 1573 { 1574 return a + b; 1575 } 1576 1577 static double hard_f64_sub(double a, double b) 1578 { 1579 return a - b; 1580 } 1581 1582 static bool f32_addsubmul_post(union_float32 a, union_float32 b) 1583 { 1584 if (QEMU_HARDFLOAT_2F32_USE_FP) { 1585 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1586 } 1587 return !(float32_is_zero(a.s) && float32_is_zero(b.s)); 1588 } 1589 1590 static bool f64_addsubmul_post(union_float64 a, union_float64 b) 1591 { 1592 if (QEMU_HARDFLOAT_2F64_USE_FP) { 1593 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1594 } else { 1595 return !(float64_is_zero(a.s) && float64_is_zero(b.s)); 1596 } 1597 } 1598 1599 static float32 float32_addsub(float32 a, float32 b, float_status *s, 1600 hard_f32_op2_fn hard, soft_f32_op2_fn soft) 1601 { 1602 return float32_gen2(a, b, s, hard, soft, 1603 f32_is_zon2, f32_addsubmul_post); 1604 } 1605 1606 static float64 float64_addsub(float64 a, float64 b, float_status *s, 1607 hard_f64_op2_fn hard, soft_f64_op2_fn soft) 1608 { 1609 return float64_gen2(a, b, s, hard, soft, 1610 f64_is_zon2, f64_addsubmul_post); 1611 } 1612 1613 float32 QEMU_FLATTEN 1614 float32_add(float32 a, float32 b, float_status *s) 1615 { 1616 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add); 1617 } 1618 1619 float32 QEMU_FLATTEN 1620 float32_sub(float32 a, float32 b, float_status *s) 1621 { 1622 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub); 1623 } 1624 1625 float64 QEMU_FLATTEN 1626 float64_add(float64 a, float64 b, float_status *s) 1627 { 1628 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add); 1629 } 1630 1631 float64 QEMU_FLATTEN 1632 float64_sub(float64 a, float64 b, float_status *s) 1633 { 1634 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub); 1635 } 1636 1637 static bfloat16 QEMU_FLATTEN 1638 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract) 1639 { 1640 FloatParts64 pa, pb, *pr; 1641 1642 bfloat16_unpack_canonical(&pa, a, status); 1643 bfloat16_unpack_canonical(&pb, b, status); 1644 pr = parts_addsub(&pa, &pb, status, subtract); 1645 1646 return bfloat16_round_pack_canonical(pr, status); 1647 } 1648 1649 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status) 1650 { 1651 return bfloat16_addsub(a, b, status, false); 1652 } 1653 1654 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status) 1655 { 1656 return bfloat16_addsub(a, b, status, true); 1657 } 1658 1659 static float128 QEMU_FLATTEN 1660 float128_addsub(float128 a, float128 b, float_status *status, bool subtract) 1661 { 1662 FloatParts128 pa, pb, *pr; 1663 1664 float128_unpack_canonical(&pa, a, status); 1665 float128_unpack_canonical(&pb, b, status); 1666 pr = parts_addsub(&pa, &pb, status, subtract); 1667 1668 return float128_round_pack_canonical(pr, status); 1669 } 1670 1671 float128 float128_add(float128 a, float128 b, float_status *status) 1672 { 1673 return float128_addsub(a, b, status, false); 1674 } 1675 1676 float128 float128_sub(float128 a, float128 b, float_status *status) 1677 { 1678 return float128_addsub(a, b, status, true); 1679 } 1680 1681 /* 1682 * Multiplication 1683 */ 1684 1685 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status) 1686 { 1687 FloatParts64 pa, pb, *pr; 1688 1689 float16_unpack_canonical(&pa, a, status); 1690 float16_unpack_canonical(&pb, b, status); 1691 pr = parts_mul(&pa, &pb, status); 1692 1693 return float16_round_pack_canonical(pr, status); 1694 } 1695 1696 static float32 QEMU_SOFTFLOAT_ATTR 1697 soft_f32_mul(float32 a, float32 b, float_status *status) 1698 { 1699 FloatParts64 pa, pb, *pr; 1700 1701 float32_unpack_canonical(&pa, a, status); 1702 float32_unpack_canonical(&pb, b, status); 1703 pr = parts_mul(&pa, &pb, status); 1704 1705 return float32_round_pack_canonical(pr, status); 1706 } 1707 1708 static float64 QEMU_SOFTFLOAT_ATTR 1709 soft_f64_mul(float64 a, float64 b, float_status *status) 1710 { 1711 FloatParts64 pa, pb, *pr; 1712 1713 float64_unpack_canonical(&pa, a, status); 1714 float64_unpack_canonical(&pb, b, status); 1715 pr = parts_mul(&pa, &pb, status); 1716 1717 return float64_round_pack_canonical(pr, status); 1718 } 1719 1720 static float hard_f32_mul(float a, float b) 1721 { 1722 return a * b; 1723 } 1724 1725 static double hard_f64_mul(double a, double b) 1726 { 1727 return a * b; 1728 } 1729 1730 float32 QEMU_FLATTEN 1731 float32_mul(float32 a, float32 b, float_status *s) 1732 { 1733 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, 1734 f32_is_zon2, f32_addsubmul_post); 1735 } 1736 1737 float64 QEMU_FLATTEN 1738 float64_mul(float64 a, float64 b, float_status *s) 1739 { 1740 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, 1741 f64_is_zon2, f64_addsubmul_post); 1742 } 1743 1744 bfloat16 QEMU_FLATTEN 1745 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status) 1746 { 1747 FloatParts64 pa, pb, *pr; 1748 1749 bfloat16_unpack_canonical(&pa, a, status); 1750 bfloat16_unpack_canonical(&pb, b, status); 1751 pr = parts_mul(&pa, &pb, status); 1752 1753 return bfloat16_round_pack_canonical(pr, status); 1754 } 1755 1756 float128 QEMU_FLATTEN 1757 float128_mul(float128 a, float128 b, float_status *status) 1758 { 1759 FloatParts128 pa, pb, *pr; 1760 1761 float128_unpack_canonical(&pa, a, status); 1762 float128_unpack_canonical(&pb, b, status); 1763 pr = parts_mul(&pa, &pb, status); 1764 1765 return float128_round_pack_canonical(pr, status); 1766 } 1767 1768 /* 1769 * Fused multiply-add 1770 */ 1771 1772 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c, 1773 int flags, float_status *status) 1774 { 1775 FloatParts64 pa, pb, pc, *pr; 1776 1777 float16_unpack_canonical(&pa, a, status); 1778 float16_unpack_canonical(&pb, b, status); 1779 float16_unpack_canonical(&pc, c, status); 1780 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1781 1782 return float16_round_pack_canonical(pr, status); 1783 } 1784 1785 static float32 QEMU_SOFTFLOAT_ATTR 1786 soft_f32_muladd(float32 a, float32 b, float32 c, int flags, 1787 float_status *status) 1788 { 1789 FloatParts64 pa, pb, pc, *pr; 1790 1791 float32_unpack_canonical(&pa, a, status); 1792 float32_unpack_canonical(&pb, b, status); 1793 float32_unpack_canonical(&pc, c, status); 1794 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1795 1796 return float32_round_pack_canonical(pr, status); 1797 } 1798 1799 static float64 QEMU_SOFTFLOAT_ATTR 1800 soft_f64_muladd(float64 a, float64 b, float64 c, int flags, 1801 float_status *status) 1802 { 1803 FloatParts64 pa, pb, pc, *pr; 1804 1805 float64_unpack_canonical(&pa, a, status); 1806 float64_unpack_canonical(&pb, b, status); 1807 float64_unpack_canonical(&pc, c, status); 1808 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1809 1810 return float64_round_pack_canonical(pr, status); 1811 } 1812 1813 static bool force_soft_fma; 1814 1815 float32 QEMU_FLATTEN 1816 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) 1817 { 1818 union_float32 ua, ub, uc, ur; 1819 1820 ua.s = xa; 1821 ub.s = xb; 1822 uc.s = xc; 1823 1824 if (unlikely(!can_use_fpu(s))) { 1825 goto soft; 1826 } 1827 if (unlikely(flags & float_muladd_halve_result)) { 1828 goto soft; 1829 } 1830 1831 float32_input_flush3(&ua.s, &ub.s, &uc.s, s); 1832 if (unlikely(!f32_is_zon3(ua, ub, uc))) { 1833 goto soft; 1834 } 1835 1836 if (unlikely(force_soft_fma)) { 1837 goto soft; 1838 } 1839 1840 /* 1841 * When (a || b) == 0, there's no need to check for under/over flow, 1842 * since we know the addend is (normal || 0) and the product is 0. 1843 */ 1844 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) { 1845 union_float32 up; 1846 bool prod_sign; 1847 1848 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s); 1849 prod_sign ^= !!(flags & float_muladd_negate_product); 1850 up.s = float32_set_sign(float32_zero, prod_sign); 1851 1852 if (flags & float_muladd_negate_c) { 1853 uc.h = -uc.h; 1854 } 1855 ur.h = up.h + uc.h; 1856 } else { 1857 union_float32 ua_orig = ua; 1858 union_float32 uc_orig = uc; 1859 1860 if (flags & float_muladd_negate_product) { 1861 ua.h = -ua.h; 1862 } 1863 if (flags & float_muladd_negate_c) { 1864 uc.h = -uc.h; 1865 } 1866 1867 ur.h = fmaf(ua.h, ub.h, uc.h); 1868 1869 if (unlikely(f32_is_inf(ur))) { 1870 float_raise(float_flag_overflow, s); 1871 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { 1872 ua = ua_orig; 1873 uc = uc_orig; 1874 goto soft; 1875 } 1876 } 1877 if (flags & float_muladd_negate_result) { 1878 return float32_chs(ur.s); 1879 } 1880 return ur.s; 1881 1882 soft: 1883 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s); 1884 } 1885 1886 float64 QEMU_FLATTEN 1887 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) 1888 { 1889 union_float64 ua, ub, uc, ur; 1890 1891 ua.s = xa; 1892 ub.s = xb; 1893 uc.s = xc; 1894 1895 if (unlikely(!can_use_fpu(s))) { 1896 goto soft; 1897 } 1898 if (unlikely(flags & float_muladd_halve_result)) { 1899 goto soft; 1900 } 1901 1902 float64_input_flush3(&ua.s, &ub.s, &uc.s, s); 1903 if (unlikely(!f64_is_zon3(ua, ub, uc))) { 1904 goto soft; 1905 } 1906 1907 if (unlikely(force_soft_fma)) { 1908 goto soft; 1909 } 1910 1911 /* 1912 * When (a || b) == 0, there's no need to check for under/over flow, 1913 * since we know the addend is (normal || 0) and the product is 0. 1914 */ 1915 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) { 1916 union_float64 up; 1917 bool prod_sign; 1918 1919 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s); 1920 prod_sign ^= !!(flags & float_muladd_negate_product); 1921 up.s = float64_set_sign(float64_zero, prod_sign); 1922 1923 if (flags & float_muladd_negate_c) { 1924 uc.h = -uc.h; 1925 } 1926 ur.h = up.h + uc.h; 1927 } else { 1928 union_float64 ua_orig = ua; 1929 union_float64 uc_orig = uc; 1930 1931 if (flags & float_muladd_negate_product) { 1932 ua.h = -ua.h; 1933 } 1934 if (flags & float_muladd_negate_c) { 1935 uc.h = -uc.h; 1936 } 1937 1938 ur.h = fma(ua.h, ub.h, uc.h); 1939 1940 if (unlikely(f64_is_inf(ur))) { 1941 float_raise(float_flag_overflow, s); 1942 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { 1943 ua = ua_orig; 1944 uc = uc_orig; 1945 goto soft; 1946 } 1947 } 1948 if (flags & float_muladd_negate_result) { 1949 return float64_chs(ur.s); 1950 } 1951 return ur.s; 1952 1953 soft: 1954 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s); 1955 } 1956 1957 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, 1958 int flags, float_status *status) 1959 { 1960 FloatParts64 pa, pb, pc, *pr; 1961 1962 bfloat16_unpack_canonical(&pa, a, status); 1963 bfloat16_unpack_canonical(&pb, b, status); 1964 bfloat16_unpack_canonical(&pc, c, status); 1965 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1966 1967 return bfloat16_round_pack_canonical(pr, status); 1968 } 1969 1970 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, 1971 int flags, float_status *status) 1972 { 1973 FloatParts128 pa, pb, pc, *pr; 1974 1975 float128_unpack_canonical(&pa, a, status); 1976 float128_unpack_canonical(&pb, b, status); 1977 float128_unpack_canonical(&pc, c, status); 1978 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1979 1980 return float128_round_pack_canonical(pr, status); 1981 } 1982 1983 /* 1984 * Division 1985 */ 1986 1987 float16 float16_div(float16 a, float16 b, float_status *status) 1988 { 1989 FloatParts64 pa, pb, *pr; 1990 1991 float16_unpack_canonical(&pa, a, status); 1992 float16_unpack_canonical(&pb, b, status); 1993 pr = parts_div(&pa, &pb, status); 1994 1995 return float16_round_pack_canonical(pr, status); 1996 } 1997 1998 static float32 QEMU_SOFTFLOAT_ATTR 1999 soft_f32_div(float32 a, float32 b, float_status *status) 2000 { 2001 FloatParts64 pa, pb, *pr; 2002 2003 float32_unpack_canonical(&pa, a, status); 2004 float32_unpack_canonical(&pb, b, status); 2005 pr = parts_div(&pa, &pb, status); 2006 2007 return float32_round_pack_canonical(pr, status); 2008 } 2009 2010 static float64 QEMU_SOFTFLOAT_ATTR 2011 soft_f64_div(float64 a, float64 b, float_status *status) 2012 { 2013 FloatParts64 pa, pb, *pr; 2014 2015 float64_unpack_canonical(&pa, a, status); 2016 float64_unpack_canonical(&pb, b, status); 2017 pr = parts_div(&pa, &pb, status); 2018 2019 return float64_round_pack_canonical(pr, status); 2020 } 2021 2022 static float hard_f32_div(float a, float b) 2023 { 2024 return a / b; 2025 } 2026 2027 static double hard_f64_div(double a, double b) 2028 { 2029 return a / b; 2030 } 2031 2032 static bool f32_div_pre(union_float32 a, union_float32 b) 2033 { 2034 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2035 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2036 fpclassify(b.h) == FP_NORMAL; 2037 } 2038 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s); 2039 } 2040 2041 static bool f64_div_pre(union_float64 a, union_float64 b) 2042 { 2043 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2044 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2045 fpclassify(b.h) == FP_NORMAL; 2046 } 2047 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s); 2048 } 2049 2050 static bool f32_div_post(union_float32 a, union_float32 b) 2051 { 2052 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2053 return fpclassify(a.h) != FP_ZERO; 2054 } 2055 return !float32_is_zero(a.s); 2056 } 2057 2058 static bool f64_div_post(union_float64 a, union_float64 b) 2059 { 2060 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2061 return fpclassify(a.h) != FP_ZERO; 2062 } 2063 return !float64_is_zero(a.s); 2064 } 2065 2066 float32 QEMU_FLATTEN 2067 float32_div(float32 a, float32 b, float_status *s) 2068 { 2069 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, 2070 f32_div_pre, f32_div_post); 2071 } 2072 2073 float64 QEMU_FLATTEN 2074 float64_div(float64 a, float64 b, float_status *s) 2075 { 2076 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, 2077 f64_div_pre, f64_div_post); 2078 } 2079 2080 bfloat16 QEMU_FLATTEN 2081 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status) 2082 { 2083 FloatParts64 pa, pb, *pr; 2084 2085 bfloat16_unpack_canonical(&pa, a, status); 2086 bfloat16_unpack_canonical(&pb, b, status); 2087 pr = parts_div(&pa, &pb, status); 2088 2089 return bfloat16_round_pack_canonical(pr, status); 2090 } 2091 2092 float128 QEMU_FLATTEN 2093 float128_div(float128 a, float128 b, float_status *status) 2094 { 2095 FloatParts128 pa, pb, *pr; 2096 2097 float128_unpack_canonical(&pa, a, status); 2098 float128_unpack_canonical(&pb, b, status); 2099 pr = parts_div(&pa, &pb, status); 2100 2101 return float128_round_pack_canonical(pr, status); 2102 } 2103 2104 /* 2105 * Float to Float conversions 2106 * 2107 * Returns the result of converting one float format to another. The 2108 * conversion is performed according to the IEC/IEEE Standard for 2109 * Binary Floating-Point Arithmetic. 2110 * 2111 * Usually this only needs to take care of raising invalid exceptions 2112 * and handling the conversion on NaNs. 2113 */ 2114 2115 static void parts_float_to_ahp(FloatParts64 *a, float_status *s) 2116 { 2117 switch (a->cls) { 2118 case float_class_qnan: 2119 case float_class_snan: 2120 /* 2121 * There is no NaN in the destination format. Raise Invalid 2122 * and return a zero with the sign of the input NaN. 2123 */ 2124 float_raise(float_flag_invalid, s); 2125 a->cls = float_class_zero; 2126 break; 2127 2128 case float_class_inf: 2129 /* 2130 * There is no Inf in the destination format. Raise Invalid 2131 * and return the maximum normal with the correct sign. 2132 */ 2133 float_raise(float_flag_invalid, s); 2134 a->cls = float_class_normal; 2135 a->exp = float16_params_ahp.exp_max; 2136 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift, 2137 float16_params_ahp.frac_size + 1); 2138 break; 2139 2140 case float_class_normal: 2141 case float_class_zero: 2142 break; 2143 2144 default: 2145 g_assert_not_reached(); 2146 } 2147 } 2148 2149 static void parts64_float_to_float(FloatParts64 *a, float_status *s) 2150 { 2151 if (is_nan(a->cls)) { 2152 parts_return_nan(a, s); 2153 } 2154 } 2155 2156 static void parts128_float_to_float(FloatParts128 *a, float_status *s) 2157 { 2158 if (is_nan(a->cls)) { 2159 parts_return_nan(a, s); 2160 } 2161 } 2162 2163 #define parts_float_to_float(P, S) \ 2164 PARTS_GENERIC_64_128(float_to_float, P)(P, S) 2165 2166 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, 2167 float_status *s) 2168 { 2169 a->cls = b->cls; 2170 a->sign = b->sign; 2171 a->exp = b->exp; 2172 2173 if (a->cls == float_class_normal) { 2174 frac_truncjam(a, b); 2175 } else if (is_nan(a->cls)) { 2176 /* Discard the low bits of the NaN. */ 2177 a->frac = b->frac_hi; 2178 parts_return_nan(a, s); 2179 } 2180 } 2181 2182 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, 2183 float_status *s) 2184 { 2185 a->cls = b->cls; 2186 a->sign = b->sign; 2187 a->exp = b->exp; 2188 frac_widen(a, b); 2189 2190 if (is_nan(a->cls)) { 2191 parts_return_nan(a, s); 2192 } 2193 } 2194 2195 float32 float16_to_float32(float16 a, bool ieee, float_status *s) 2196 { 2197 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2198 FloatParts64 p; 2199 2200 float16a_unpack_canonical(&p, a, s, fmt16); 2201 parts_float_to_float(&p, s); 2202 return float32_round_pack_canonical(&p, s); 2203 } 2204 2205 float64 float16_to_float64(float16 a, bool ieee, float_status *s) 2206 { 2207 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2208 FloatParts64 p; 2209 2210 float16a_unpack_canonical(&p, a, s, fmt16); 2211 parts_float_to_float(&p, s); 2212 return float64_round_pack_canonical(&p, s); 2213 } 2214 2215 float16 float32_to_float16(float32 a, bool ieee, float_status *s) 2216 { 2217 FloatParts64 p; 2218 const FloatFmt *fmt; 2219 2220 float32_unpack_canonical(&p, a, s); 2221 if (ieee) { 2222 parts_float_to_float(&p, s); 2223 fmt = &float16_params; 2224 } else { 2225 parts_float_to_ahp(&p, s); 2226 fmt = &float16_params_ahp; 2227 } 2228 return float16a_round_pack_canonical(&p, s, fmt); 2229 } 2230 2231 static float64 QEMU_SOFTFLOAT_ATTR 2232 soft_float32_to_float64(float32 a, float_status *s) 2233 { 2234 FloatParts64 p; 2235 2236 float32_unpack_canonical(&p, a, s); 2237 parts_float_to_float(&p, s); 2238 return float64_round_pack_canonical(&p, s); 2239 } 2240 2241 float64 float32_to_float64(float32 a, float_status *s) 2242 { 2243 if (likely(float32_is_normal(a))) { 2244 /* Widening conversion can never produce inexact results. */ 2245 union_float32 uf; 2246 union_float64 ud; 2247 uf.s = a; 2248 ud.h = uf.h; 2249 return ud.s; 2250 } else if (float32_is_zero(a)) { 2251 return float64_set_sign(float64_zero, float32_is_neg(a)); 2252 } else { 2253 return soft_float32_to_float64(a, s); 2254 } 2255 } 2256 2257 float16 float64_to_float16(float64 a, bool ieee, float_status *s) 2258 { 2259 FloatParts64 p; 2260 const FloatFmt *fmt; 2261 2262 float64_unpack_canonical(&p, a, s); 2263 if (ieee) { 2264 parts_float_to_float(&p, s); 2265 fmt = &float16_params; 2266 } else { 2267 parts_float_to_ahp(&p, s); 2268 fmt = &float16_params_ahp; 2269 } 2270 return float16a_round_pack_canonical(&p, s, fmt); 2271 } 2272 2273 float32 float64_to_float32(float64 a, float_status *s) 2274 { 2275 FloatParts64 p; 2276 2277 float64_unpack_canonical(&p, a, s); 2278 parts_float_to_float(&p, s); 2279 return float32_round_pack_canonical(&p, s); 2280 } 2281 2282 float32 bfloat16_to_float32(bfloat16 a, float_status *s) 2283 { 2284 FloatParts64 p; 2285 2286 bfloat16_unpack_canonical(&p, a, s); 2287 parts_float_to_float(&p, s); 2288 return float32_round_pack_canonical(&p, s); 2289 } 2290 2291 float64 bfloat16_to_float64(bfloat16 a, float_status *s) 2292 { 2293 FloatParts64 p; 2294 2295 bfloat16_unpack_canonical(&p, a, s); 2296 parts_float_to_float(&p, s); 2297 return float64_round_pack_canonical(&p, s); 2298 } 2299 2300 bfloat16 float32_to_bfloat16(float32 a, float_status *s) 2301 { 2302 FloatParts64 p; 2303 2304 float32_unpack_canonical(&p, a, s); 2305 parts_float_to_float(&p, s); 2306 return bfloat16_round_pack_canonical(&p, s); 2307 } 2308 2309 bfloat16 float64_to_bfloat16(float64 a, float_status *s) 2310 { 2311 FloatParts64 p; 2312 2313 float64_unpack_canonical(&p, a, s); 2314 parts_float_to_float(&p, s); 2315 return bfloat16_round_pack_canonical(&p, s); 2316 } 2317 2318 float32 float128_to_float32(float128 a, float_status *s) 2319 { 2320 FloatParts64 p64; 2321 FloatParts128 p128; 2322 2323 float128_unpack_canonical(&p128, a, s); 2324 parts_float_to_float_narrow(&p64, &p128, s); 2325 return float32_round_pack_canonical(&p64, s); 2326 } 2327 2328 float64 float128_to_float64(float128 a, float_status *s) 2329 { 2330 FloatParts64 p64; 2331 FloatParts128 p128; 2332 2333 float128_unpack_canonical(&p128, a, s); 2334 parts_float_to_float_narrow(&p64, &p128, s); 2335 return float64_round_pack_canonical(&p64, s); 2336 } 2337 2338 float128 float32_to_float128(float32 a, float_status *s) 2339 { 2340 FloatParts64 p64; 2341 FloatParts128 p128; 2342 2343 float32_unpack_canonical(&p64, a, s); 2344 parts_float_to_float_widen(&p128, &p64, s); 2345 return float128_round_pack_canonical(&p128, s); 2346 } 2347 2348 float128 float64_to_float128(float64 a, float_status *s) 2349 { 2350 FloatParts64 p64; 2351 FloatParts128 p128; 2352 2353 float64_unpack_canonical(&p64, a, s); 2354 parts_float_to_float_widen(&p128, &p64, s); 2355 return float128_round_pack_canonical(&p128, s); 2356 } 2357 2358 /* 2359 * Round to integral value 2360 */ 2361 2362 float16 float16_round_to_int(float16 a, float_status *s) 2363 { 2364 FloatParts64 p; 2365 2366 float16_unpack_canonical(&p, a, s); 2367 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params); 2368 return float16_round_pack_canonical(&p, s); 2369 } 2370 2371 float32 float32_round_to_int(float32 a, float_status *s) 2372 { 2373 FloatParts64 p; 2374 2375 float32_unpack_canonical(&p, a, s); 2376 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params); 2377 return float32_round_pack_canonical(&p, s); 2378 } 2379 2380 float64 float64_round_to_int(float64 a, float_status *s) 2381 { 2382 FloatParts64 p; 2383 2384 float64_unpack_canonical(&p, a, s); 2385 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params); 2386 return float64_round_pack_canonical(&p, s); 2387 } 2388 2389 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s) 2390 { 2391 FloatParts64 p; 2392 2393 bfloat16_unpack_canonical(&p, a, s); 2394 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params); 2395 return bfloat16_round_pack_canonical(&p, s); 2396 } 2397 2398 float128 float128_round_to_int(float128 a, float_status *s) 2399 { 2400 FloatParts128 p; 2401 2402 float128_unpack_canonical(&p, a, s); 2403 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params); 2404 return float128_round_pack_canonical(&p, s); 2405 } 2406 2407 /* 2408 * Floating-point to signed integer conversions 2409 */ 2410 2411 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale, 2412 float_status *s) 2413 { 2414 FloatParts64 p; 2415 2416 float16_unpack_canonical(&p, a, s); 2417 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); 2418 } 2419 2420 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, 2421 float_status *s) 2422 { 2423 FloatParts64 p; 2424 2425 float16_unpack_canonical(&p, a, s); 2426 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2427 } 2428 2429 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, 2430 float_status *s) 2431 { 2432 FloatParts64 p; 2433 2434 float16_unpack_canonical(&p, a, s); 2435 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2436 } 2437 2438 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, 2439 float_status *s) 2440 { 2441 FloatParts64 p; 2442 2443 float16_unpack_canonical(&p, a, s); 2444 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2445 } 2446 2447 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, 2448 float_status *s) 2449 { 2450 FloatParts64 p; 2451 2452 float32_unpack_canonical(&p, a, s); 2453 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2454 } 2455 2456 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, 2457 float_status *s) 2458 { 2459 FloatParts64 p; 2460 2461 float32_unpack_canonical(&p, a, s); 2462 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2463 } 2464 2465 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, 2466 float_status *s) 2467 { 2468 FloatParts64 p; 2469 2470 float32_unpack_canonical(&p, a, s); 2471 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2472 } 2473 2474 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, 2475 float_status *s) 2476 { 2477 FloatParts64 p; 2478 2479 float64_unpack_canonical(&p, a, s); 2480 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2481 } 2482 2483 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, 2484 float_status *s) 2485 { 2486 FloatParts64 p; 2487 2488 float64_unpack_canonical(&p, a, s); 2489 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2490 } 2491 2492 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, 2493 float_status *s) 2494 { 2495 FloatParts64 p; 2496 2497 float64_unpack_canonical(&p, a, s); 2498 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2499 } 2500 2501 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2502 float_status *s) 2503 { 2504 FloatParts64 p; 2505 2506 bfloat16_unpack_canonical(&p, a, s); 2507 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2508 } 2509 2510 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2511 float_status *s) 2512 { 2513 FloatParts64 p; 2514 2515 bfloat16_unpack_canonical(&p, a, s); 2516 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2517 } 2518 2519 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2520 float_status *s) 2521 { 2522 FloatParts64 p; 2523 2524 bfloat16_unpack_canonical(&p, a, s); 2525 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2526 } 2527 2528 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode, 2529 int scale, float_status *s) 2530 { 2531 FloatParts128 p; 2532 2533 float128_unpack_canonical(&p, a, s); 2534 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2535 } 2536 2537 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode, 2538 int scale, float_status *s) 2539 { 2540 FloatParts128 p; 2541 2542 float128_unpack_canonical(&p, a, s); 2543 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2544 } 2545 2546 int8_t float16_to_int8(float16 a, float_status *s) 2547 { 2548 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); 2549 } 2550 2551 int16_t float16_to_int16(float16 a, float_status *s) 2552 { 2553 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2554 } 2555 2556 int32_t float16_to_int32(float16 a, float_status *s) 2557 { 2558 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2559 } 2560 2561 int64_t float16_to_int64(float16 a, float_status *s) 2562 { 2563 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2564 } 2565 2566 int16_t float32_to_int16(float32 a, float_status *s) 2567 { 2568 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2569 } 2570 2571 int32_t float32_to_int32(float32 a, float_status *s) 2572 { 2573 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2574 } 2575 2576 int64_t float32_to_int64(float32 a, float_status *s) 2577 { 2578 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2579 } 2580 2581 int16_t float64_to_int16(float64 a, float_status *s) 2582 { 2583 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2584 } 2585 2586 int32_t float64_to_int32(float64 a, float_status *s) 2587 { 2588 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2589 } 2590 2591 int64_t float64_to_int64(float64 a, float_status *s) 2592 { 2593 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2594 } 2595 2596 int32_t float128_to_int32(float128 a, float_status *s) 2597 { 2598 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2599 } 2600 2601 int64_t float128_to_int64(float128 a, float_status *s) 2602 { 2603 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2604 } 2605 2606 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s) 2607 { 2608 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s); 2609 } 2610 2611 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s) 2612 { 2613 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s); 2614 } 2615 2616 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s) 2617 { 2618 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s); 2619 } 2620 2621 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s) 2622 { 2623 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s); 2624 } 2625 2626 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s) 2627 { 2628 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s); 2629 } 2630 2631 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s) 2632 { 2633 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s); 2634 } 2635 2636 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s) 2637 { 2638 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s); 2639 } 2640 2641 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s) 2642 { 2643 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s); 2644 } 2645 2646 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) 2647 { 2648 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s); 2649 } 2650 2651 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s) 2652 { 2653 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s); 2654 } 2655 2656 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s) 2657 { 2658 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s); 2659 } 2660 2661 int16_t bfloat16_to_int16(bfloat16 a, float_status *s) 2662 { 2663 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2664 } 2665 2666 int32_t bfloat16_to_int32(bfloat16 a, float_status *s) 2667 { 2668 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2669 } 2670 2671 int64_t bfloat16_to_int64(bfloat16 a, float_status *s) 2672 { 2673 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2674 } 2675 2676 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) 2677 { 2678 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); 2679 } 2680 2681 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s) 2682 { 2683 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s); 2684 } 2685 2686 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s) 2687 { 2688 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s); 2689 } 2690 2691 /* 2692 * Floating-point to unsigned integer conversions 2693 */ 2694 2695 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale, 2696 float_status *s) 2697 { 2698 FloatParts64 p; 2699 2700 float16_unpack_canonical(&p, a, s); 2701 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); 2702 } 2703 2704 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, 2705 float_status *s) 2706 { 2707 FloatParts64 p; 2708 2709 float16_unpack_canonical(&p, a, s); 2710 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2711 } 2712 2713 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, 2714 float_status *s) 2715 { 2716 FloatParts64 p; 2717 2718 float16_unpack_canonical(&p, a, s); 2719 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2720 } 2721 2722 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, 2723 float_status *s) 2724 { 2725 FloatParts64 p; 2726 2727 float16_unpack_canonical(&p, a, s); 2728 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2729 } 2730 2731 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, 2732 float_status *s) 2733 { 2734 FloatParts64 p; 2735 2736 float32_unpack_canonical(&p, a, s); 2737 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2738 } 2739 2740 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, 2741 float_status *s) 2742 { 2743 FloatParts64 p; 2744 2745 float32_unpack_canonical(&p, a, s); 2746 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2747 } 2748 2749 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, 2750 float_status *s) 2751 { 2752 FloatParts64 p; 2753 2754 float32_unpack_canonical(&p, a, s); 2755 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2756 } 2757 2758 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, 2759 float_status *s) 2760 { 2761 FloatParts64 p; 2762 2763 float64_unpack_canonical(&p, a, s); 2764 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2765 } 2766 2767 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, 2768 float_status *s) 2769 { 2770 FloatParts64 p; 2771 2772 float64_unpack_canonical(&p, a, s); 2773 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2774 } 2775 2776 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, 2777 float_status *s) 2778 { 2779 FloatParts64 p; 2780 2781 float64_unpack_canonical(&p, a, s); 2782 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2783 } 2784 2785 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, 2786 int scale, float_status *s) 2787 { 2788 FloatParts64 p; 2789 2790 bfloat16_unpack_canonical(&p, a, s); 2791 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2792 } 2793 2794 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode, 2795 int scale, float_status *s) 2796 { 2797 FloatParts64 p; 2798 2799 bfloat16_unpack_canonical(&p, a, s); 2800 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2801 } 2802 2803 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode, 2804 int scale, float_status *s) 2805 { 2806 FloatParts64 p; 2807 2808 bfloat16_unpack_canonical(&p, a, s); 2809 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2810 } 2811 2812 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode, 2813 int scale, float_status *s) 2814 { 2815 FloatParts128 p; 2816 2817 float128_unpack_canonical(&p, a, s); 2818 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2819 } 2820 2821 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode, 2822 int scale, float_status *s) 2823 { 2824 FloatParts128 p; 2825 2826 float128_unpack_canonical(&p, a, s); 2827 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2828 } 2829 2830 uint8_t float16_to_uint8(float16 a, float_status *s) 2831 { 2832 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); 2833 } 2834 2835 uint16_t float16_to_uint16(float16 a, float_status *s) 2836 { 2837 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2838 } 2839 2840 uint32_t float16_to_uint32(float16 a, float_status *s) 2841 { 2842 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2843 } 2844 2845 uint64_t float16_to_uint64(float16 a, float_status *s) 2846 { 2847 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2848 } 2849 2850 uint16_t float32_to_uint16(float32 a, float_status *s) 2851 { 2852 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2853 } 2854 2855 uint32_t float32_to_uint32(float32 a, float_status *s) 2856 { 2857 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2858 } 2859 2860 uint64_t float32_to_uint64(float32 a, float_status *s) 2861 { 2862 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2863 } 2864 2865 uint16_t float64_to_uint16(float64 a, float_status *s) 2866 { 2867 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2868 } 2869 2870 uint32_t float64_to_uint32(float64 a, float_status *s) 2871 { 2872 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2873 } 2874 2875 uint64_t float64_to_uint64(float64 a, float_status *s) 2876 { 2877 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2878 } 2879 2880 uint32_t float128_to_uint32(float128 a, float_status *s) 2881 { 2882 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2883 } 2884 2885 uint64_t float128_to_uint64(float128 a, float_status *s) 2886 { 2887 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2888 } 2889 2890 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s) 2891 { 2892 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2893 } 2894 2895 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s) 2896 { 2897 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2898 } 2899 2900 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s) 2901 { 2902 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2903 } 2904 2905 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s) 2906 { 2907 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2908 } 2909 2910 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s) 2911 { 2912 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2913 } 2914 2915 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s) 2916 { 2917 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2918 } 2919 2920 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s) 2921 { 2922 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2923 } 2924 2925 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s) 2926 { 2927 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2928 } 2929 2930 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s) 2931 { 2932 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2933 } 2934 2935 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s) 2936 { 2937 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2938 } 2939 2940 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s) 2941 { 2942 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2943 } 2944 2945 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) 2946 { 2947 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2948 } 2949 2950 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s) 2951 { 2952 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2953 } 2954 2955 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) 2956 { 2957 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2958 } 2959 2960 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) 2961 { 2962 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2963 } 2964 2965 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s) 2966 { 2967 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2968 } 2969 2970 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s) 2971 { 2972 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2973 } 2974 2975 /* 2976 * Signed integer to floating-point conversions 2977 */ 2978 2979 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) 2980 { 2981 FloatParts64 p; 2982 2983 parts_sint_to_float(&p, a, scale, status); 2984 return float16_round_pack_canonical(&p, status); 2985 } 2986 2987 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status) 2988 { 2989 return int64_to_float16_scalbn(a, scale, status); 2990 } 2991 2992 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status) 2993 { 2994 return int64_to_float16_scalbn(a, scale, status); 2995 } 2996 2997 float16 int64_to_float16(int64_t a, float_status *status) 2998 { 2999 return int64_to_float16_scalbn(a, 0, status); 3000 } 3001 3002 float16 int32_to_float16(int32_t a, float_status *status) 3003 { 3004 return int64_to_float16_scalbn(a, 0, status); 3005 } 3006 3007 float16 int16_to_float16(int16_t a, float_status *status) 3008 { 3009 return int64_to_float16_scalbn(a, 0, status); 3010 } 3011 3012 float16 int8_to_float16(int8_t a, float_status *status) 3013 { 3014 return int64_to_float16_scalbn(a, 0, status); 3015 } 3016 3017 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) 3018 { 3019 FloatParts64 p; 3020 3021 parts64_sint_to_float(&p, a, scale, status); 3022 return float32_round_pack_canonical(&p, status); 3023 } 3024 3025 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status) 3026 { 3027 return int64_to_float32_scalbn(a, scale, status); 3028 } 3029 3030 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status) 3031 { 3032 return int64_to_float32_scalbn(a, scale, status); 3033 } 3034 3035 float32 int64_to_float32(int64_t a, float_status *status) 3036 { 3037 return int64_to_float32_scalbn(a, 0, status); 3038 } 3039 3040 float32 int32_to_float32(int32_t a, float_status *status) 3041 { 3042 return int64_to_float32_scalbn(a, 0, status); 3043 } 3044 3045 float32 int16_to_float32(int16_t a, float_status *status) 3046 { 3047 return int64_to_float32_scalbn(a, 0, status); 3048 } 3049 3050 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status) 3051 { 3052 FloatParts64 p; 3053 3054 parts_sint_to_float(&p, a, scale, status); 3055 return float64_round_pack_canonical(&p, status); 3056 } 3057 3058 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status) 3059 { 3060 return int64_to_float64_scalbn(a, scale, status); 3061 } 3062 3063 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status) 3064 { 3065 return int64_to_float64_scalbn(a, scale, status); 3066 } 3067 3068 float64 int64_to_float64(int64_t a, float_status *status) 3069 { 3070 return int64_to_float64_scalbn(a, 0, status); 3071 } 3072 3073 float64 int32_to_float64(int32_t a, float_status *status) 3074 { 3075 return int64_to_float64_scalbn(a, 0, status); 3076 } 3077 3078 float64 int16_to_float64(int16_t a, float_status *status) 3079 { 3080 return int64_to_float64_scalbn(a, 0, status); 3081 } 3082 3083 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) 3084 { 3085 FloatParts64 p; 3086 3087 parts_sint_to_float(&p, a, scale, status); 3088 return bfloat16_round_pack_canonical(&p, status); 3089 } 3090 3091 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status) 3092 { 3093 return int64_to_bfloat16_scalbn(a, scale, status); 3094 } 3095 3096 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) 3097 { 3098 return int64_to_bfloat16_scalbn(a, scale, status); 3099 } 3100 3101 bfloat16 int64_to_bfloat16(int64_t a, float_status *status) 3102 { 3103 return int64_to_bfloat16_scalbn(a, 0, status); 3104 } 3105 3106 bfloat16 int32_to_bfloat16(int32_t a, float_status *status) 3107 { 3108 return int64_to_bfloat16_scalbn(a, 0, status); 3109 } 3110 3111 bfloat16 int16_to_bfloat16(int16_t a, float_status *status) 3112 { 3113 return int64_to_bfloat16_scalbn(a, 0, status); 3114 } 3115 3116 float128 int64_to_float128(int64_t a, float_status *status) 3117 { 3118 FloatParts128 p; 3119 3120 parts_sint_to_float(&p, a, 0, status); 3121 return float128_round_pack_canonical(&p, status); 3122 } 3123 3124 float128 int32_to_float128(int32_t a, float_status *status) 3125 { 3126 return int64_to_float128(a, status); 3127 } 3128 3129 /* 3130 * Unsigned Integer to floating-point conversions 3131 */ 3132 3133 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) 3134 { 3135 FloatParts64 p; 3136 3137 parts_uint_to_float(&p, a, scale, status); 3138 return float16_round_pack_canonical(&p, status); 3139 } 3140 3141 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status) 3142 { 3143 return uint64_to_float16_scalbn(a, scale, status); 3144 } 3145 3146 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status) 3147 { 3148 return uint64_to_float16_scalbn(a, scale, status); 3149 } 3150 3151 float16 uint64_to_float16(uint64_t a, float_status *status) 3152 { 3153 return uint64_to_float16_scalbn(a, 0, status); 3154 } 3155 3156 float16 uint32_to_float16(uint32_t a, float_status *status) 3157 { 3158 return uint64_to_float16_scalbn(a, 0, status); 3159 } 3160 3161 float16 uint16_to_float16(uint16_t a, float_status *status) 3162 { 3163 return uint64_to_float16_scalbn(a, 0, status); 3164 } 3165 3166 float16 uint8_to_float16(uint8_t a, float_status *status) 3167 { 3168 return uint64_to_float16_scalbn(a, 0, status); 3169 } 3170 3171 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) 3172 { 3173 FloatParts64 p; 3174 3175 parts_uint_to_float(&p, a, scale, status); 3176 return float32_round_pack_canonical(&p, status); 3177 } 3178 3179 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status) 3180 { 3181 return uint64_to_float32_scalbn(a, scale, status); 3182 } 3183 3184 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status) 3185 { 3186 return uint64_to_float32_scalbn(a, scale, status); 3187 } 3188 3189 float32 uint64_to_float32(uint64_t a, float_status *status) 3190 { 3191 return uint64_to_float32_scalbn(a, 0, status); 3192 } 3193 3194 float32 uint32_to_float32(uint32_t a, float_status *status) 3195 { 3196 return uint64_to_float32_scalbn(a, 0, status); 3197 } 3198 3199 float32 uint16_to_float32(uint16_t a, float_status *status) 3200 { 3201 return uint64_to_float32_scalbn(a, 0, status); 3202 } 3203 3204 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) 3205 { 3206 FloatParts64 p; 3207 3208 parts_uint_to_float(&p, a, scale, status); 3209 return float64_round_pack_canonical(&p, status); 3210 } 3211 3212 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status) 3213 { 3214 return uint64_to_float64_scalbn(a, scale, status); 3215 } 3216 3217 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status) 3218 { 3219 return uint64_to_float64_scalbn(a, scale, status); 3220 } 3221 3222 float64 uint64_to_float64(uint64_t a, float_status *status) 3223 { 3224 return uint64_to_float64_scalbn(a, 0, status); 3225 } 3226 3227 float64 uint32_to_float64(uint32_t a, float_status *status) 3228 { 3229 return uint64_to_float64_scalbn(a, 0, status); 3230 } 3231 3232 float64 uint16_to_float64(uint16_t a, float_status *status) 3233 { 3234 return uint64_to_float64_scalbn(a, 0, status); 3235 } 3236 3237 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) 3238 { 3239 FloatParts64 p; 3240 3241 parts_uint_to_float(&p, a, scale, status); 3242 return bfloat16_round_pack_canonical(&p, status); 3243 } 3244 3245 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status) 3246 { 3247 return uint64_to_bfloat16_scalbn(a, scale, status); 3248 } 3249 3250 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) 3251 { 3252 return uint64_to_bfloat16_scalbn(a, scale, status); 3253 } 3254 3255 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) 3256 { 3257 return uint64_to_bfloat16_scalbn(a, 0, status); 3258 } 3259 3260 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status) 3261 { 3262 return uint64_to_bfloat16_scalbn(a, 0, status); 3263 } 3264 3265 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) 3266 { 3267 return uint64_to_bfloat16_scalbn(a, 0, status); 3268 } 3269 3270 float128 uint64_to_float128(uint64_t a, float_status *status) 3271 { 3272 FloatParts128 p; 3273 3274 parts_uint_to_float(&p, a, 0, status); 3275 return float128_round_pack_canonical(&p, status); 3276 } 3277 3278 /* 3279 * Minimum and maximum 3280 */ 3281 3282 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags) 3283 { 3284 FloatParts64 pa, pb, *pr; 3285 3286 float16_unpack_canonical(&pa, a, s); 3287 float16_unpack_canonical(&pb, b, s); 3288 pr = parts_minmax(&pa, &pb, s, flags); 3289 3290 return float16_round_pack_canonical(pr, s); 3291 } 3292 3293 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b, 3294 float_status *s, int flags) 3295 { 3296 FloatParts64 pa, pb, *pr; 3297 3298 bfloat16_unpack_canonical(&pa, a, s); 3299 bfloat16_unpack_canonical(&pb, b, s); 3300 pr = parts_minmax(&pa, &pb, s, flags); 3301 3302 return bfloat16_round_pack_canonical(pr, s); 3303 } 3304 3305 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags) 3306 { 3307 FloatParts64 pa, pb, *pr; 3308 3309 float32_unpack_canonical(&pa, a, s); 3310 float32_unpack_canonical(&pb, b, s); 3311 pr = parts_minmax(&pa, &pb, s, flags); 3312 3313 return float32_round_pack_canonical(pr, s); 3314 } 3315 3316 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags) 3317 { 3318 FloatParts64 pa, pb, *pr; 3319 3320 float64_unpack_canonical(&pa, a, s); 3321 float64_unpack_canonical(&pb, b, s); 3322 pr = parts_minmax(&pa, &pb, s, flags); 3323 3324 return float64_round_pack_canonical(pr, s); 3325 } 3326 3327 static float128 float128_minmax(float128 a, float128 b, 3328 float_status *s, int flags) 3329 { 3330 FloatParts128 pa, pb, *pr; 3331 3332 float128_unpack_canonical(&pa, a, s); 3333 float128_unpack_canonical(&pb, b, s); 3334 pr = parts_minmax(&pa, &pb, s, flags); 3335 3336 return float128_round_pack_canonical(pr, s); 3337 } 3338 3339 #define MINMAX_1(type, name, flags) \ 3340 type type##_##name(type a, type b, float_status *s) \ 3341 { return type##_minmax(a, b, s, flags); } 3342 3343 #define MINMAX_2(type) \ 3344 MINMAX_1(type, max, 0) \ 3345 MINMAX_1(type, maxnum, minmax_isnum) \ 3346 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \ 3347 MINMAX_1(type, min, minmax_ismin) \ 3348 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \ 3349 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) 3350 3351 MINMAX_2(float16) 3352 MINMAX_2(bfloat16) 3353 MINMAX_2(float32) 3354 MINMAX_2(float64) 3355 MINMAX_2(float128) 3356 3357 #undef MINMAX_1 3358 #undef MINMAX_2 3359 3360 /* Floating point compare */ 3361 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet, 3362 float_status *s) 3363 { 3364 if (is_nan(a.cls) || is_nan(b.cls)) { 3365 if (!is_quiet || 3366 a.cls == float_class_snan || 3367 b.cls == float_class_snan) { 3368 float_raise(float_flag_invalid, s); 3369 } 3370 return float_relation_unordered; 3371 } 3372 3373 if (a.cls == float_class_zero) { 3374 if (b.cls == float_class_zero) { 3375 return float_relation_equal; 3376 } 3377 return b.sign ? float_relation_greater : float_relation_less; 3378 } else if (b.cls == float_class_zero) { 3379 return a.sign ? float_relation_less : float_relation_greater; 3380 } 3381 3382 /* The only really important thing about infinity is its sign. If 3383 * both are infinities the sign marks the smallest of the two. 3384 */ 3385 if (a.cls == float_class_inf) { 3386 if ((b.cls == float_class_inf) && (a.sign == b.sign)) { 3387 return float_relation_equal; 3388 } 3389 return a.sign ? float_relation_less : float_relation_greater; 3390 } else if (b.cls == float_class_inf) { 3391 return b.sign ? float_relation_greater : float_relation_less; 3392 } 3393 3394 if (a.sign != b.sign) { 3395 return a.sign ? float_relation_less : float_relation_greater; 3396 } 3397 3398 if (a.exp == b.exp) { 3399 if (a.frac == b.frac) { 3400 return float_relation_equal; 3401 } 3402 if (a.sign) { 3403 return a.frac > b.frac ? 3404 float_relation_less : float_relation_greater; 3405 } else { 3406 return a.frac > b.frac ? 3407 float_relation_greater : float_relation_less; 3408 } 3409 } else { 3410 if (a.sign) { 3411 return a.exp > b.exp ? float_relation_less : float_relation_greater; 3412 } else { 3413 return a.exp > b.exp ? float_relation_greater : float_relation_less; 3414 } 3415 } 3416 } 3417 3418 #define COMPARE(name, attr, sz) \ 3419 static int attr \ 3420 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \ 3421 { \ 3422 FloatParts64 pa, pb; \ 3423 float ## sz ## _unpack_canonical(&pa, a, s); \ 3424 float ## sz ## _unpack_canonical(&pb, b, s); \ 3425 return compare_floats(pa, pb, is_quiet, s); \ 3426 } 3427 3428 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16) 3429 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32) 3430 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64) 3431 3432 #undef COMPARE 3433 3434 FloatRelation float16_compare(float16 a, float16 b, float_status *s) 3435 { 3436 return soft_f16_compare(a, b, false, s); 3437 } 3438 3439 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) 3440 { 3441 return soft_f16_compare(a, b, true, s); 3442 } 3443 3444 static FloatRelation QEMU_FLATTEN 3445 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) 3446 { 3447 union_float32 ua, ub; 3448 3449 ua.s = xa; 3450 ub.s = xb; 3451 3452 if (QEMU_NO_HARDFLOAT) { 3453 goto soft; 3454 } 3455 3456 float32_input_flush2(&ua.s, &ub.s, s); 3457 if (isgreaterequal(ua.h, ub.h)) { 3458 if (isgreater(ua.h, ub.h)) { 3459 return float_relation_greater; 3460 } 3461 return float_relation_equal; 3462 } 3463 if (likely(isless(ua.h, ub.h))) { 3464 return float_relation_less; 3465 } 3466 /* The only condition remaining is unordered. 3467 * Fall through to set flags. 3468 */ 3469 soft: 3470 return soft_f32_compare(ua.s, ub.s, is_quiet, s); 3471 } 3472 3473 FloatRelation float32_compare(float32 a, float32 b, float_status *s) 3474 { 3475 return f32_compare(a, b, false, s); 3476 } 3477 3478 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) 3479 { 3480 return f32_compare(a, b, true, s); 3481 } 3482 3483 static FloatRelation QEMU_FLATTEN 3484 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) 3485 { 3486 union_float64 ua, ub; 3487 3488 ua.s = xa; 3489 ub.s = xb; 3490 3491 if (QEMU_NO_HARDFLOAT) { 3492 goto soft; 3493 } 3494 3495 float64_input_flush2(&ua.s, &ub.s, s); 3496 if (isgreaterequal(ua.h, ub.h)) { 3497 if (isgreater(ua.h, ub.h)) { 3498 return float_relation_greater; 3499 } 3500 return float_relation_equal; 3501 } 3502 if (likely(isless(ua.h, ub.h))) { 3503 return float_relation_less; 3504 } 3505 /* The only condition remaining is unordered. 3506 * Fall through to set flags. 3507 */ 3508 soft: 3509 return soft_f64_compare(ua.s, ub.s, is_quiet, s); 3510 } 3511 3512 FloatRelation float64_compare(float64 a, float64 b, float_status *s) 3513 { 3514 return f64_compare(a, b, false, s); 3515 } 3516 3517 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) 3518 { 3519 return f64_compare(a, b, true, s); 3520 } 3521 3522 static FloatRelation QEMU_FLATTEN 3523 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s) 3524 { 3525 FloatParts64 pa, pb; 3526 3527 bfloat16_unpack_canonical(&pa, a, s); 3528 bfloat16_unpack_canonical(&pb, b, s); 3529 return compare_floats(pa, pb, is_quiet, s); 3530 } 3531 3532 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s) 3533 { 3534 return soft_bf16_compare(a, b, false, s); 3535 } 3536 3537 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s) 3538 { 3539 return soft_bf16_compare(a, b, true, s); 3540 } 3541 3542 /* Multiply A by 2 raised to the power N. */ 3543 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s) 3544 { 3545 if (unlikely(is_nan(a.cls))) { 3546 parts_return_nan(&a, s); 3547 } 3548 if (a.cls == float_class_normal) { 3549 /* The largest float type (even though not supported by FloatParts64) 3550 * is float128, which has a 15 bit exponent. Bounding N to 16 bits 3551 * still allows rounding to infinity, without allowing overflow 3552 * within the int32_t that backs FloatParts64.exp. 3553 */ 3554 n = MIN(MAX(n, -0x10000), 0x10000); 3555 a.exp += n; 3556 } 3557 return a; 3558 } 3559 3560 float16 float16_scalbn(float16 a, int n, float_status *status) 3561 { 3562 FloatParts64 pa, pr; 3563 3564 float16_unpack_canonical(&pa, a, status); 3565 pr = scalbn_decomposed(pa, n, status); 3566 return float16_round_pack_canonical(&pr, status); 3567 } 3568 3569 float32 float32_scalbn(float32 a, int n, float_status *status) 3570 { 3571 FloatParts64 pa, pr; 3572 3573 float32_unpack_canonical(&pa, a, status); 3574 pr = scalbn_decomposed(pa, n, status); 3575 return float32_round_pack_canonical(&pr, status); 3576 } 3577 3578 float64 float64_scalbn(float64 a, int n, float_status *status) 3579 { 3580 FloatParts64 pa, pr; 3581 3582 float64_unpack_canonical(&pa, a, status); 3583 pr = scalbn_decomposed(pa, n, status); 3584 return float64_round_pack_canonical(&pr, status); 3585 } 3586 3587 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status) 3588 { 3589 FloatParts64 pa, pr; 3590 3591 bfloat16_unpack_canonical(&pa, a, status); 3592 pr = scalbn_decomposed(pa, n, status); 3593 return bfloat16_round_pack_canonical(&pr, status); 3594 } 3595 3596 /* 3597 * Square Root 3598 * 3599 * The old softfloat code did an approximation step before zeroing in 3600 * on the final result. However for simpleness we just compute the 3601 * square root by iterating down from the implicit bit to enough extra 3602 * bits to ensure we get a correctly rounded result. 3603 * 3604 * This does mean however the calculation is slower than before, 3605 * especially for 64 bit floats. 3606 */ 3607 3608 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p) 3609 { 3610 uint64_t a_frac, r_frac, s_frac; 3611 int bit, last_bit; 3612 3613 if (is_nan(a.cls)) { 3614 parts_return_nan(&a, s); 3615 return a; 3616 } 3617 if (a.cls == float_class_zero) { 3618 return a; /* sqrt(+-0) = +-0 */ 3619 } 3620 if (a.sign) { 3621 float_raise(float_flag_invalid, s); 3622 parts_default_nan(&a, s); 3623 return a; 3624 } 3625 if (a.cls == float_class_inf) { 3626 return a; /* sqrt(+inf) = +inf */ 3627 } 3628 3629 assert(a.cls == float_class_normal); 3630 3631 /* We need two overflow bits at the top. Adding room for that is a 3632 * right shift. If the exponent is odd, we can discard the low bit 3633 * by multiplying the fraction by 2; that's a left shift. Combine 3634 * those and we shift right by 1 if the exponent is odd, otherwise 2. 3635 */ 3636 a_frac = a.frac >> (2 - (a.exp & 1)); 3637 a.exp >>= 1; 3638 3639 /* Bit-by-bit computation of sqrt. */ 3640 r_frac = 0; 3641 s_frac = 0; 3642 3643 /* Iterate from implicit bit down to the 3 extra bits to compute a 3644 * properly rounded result. Remember we've inserted two more bits 3645 * at the top, so these positions are two less. 3646 */ 3647 bit = DECOMPOSED_BINARY_POINT - 2; 3648 last_bit = MAX(p->frac_shift - 4, 0); 3649 do { 3650 uint64_t q = 1ULL << bit; 3651 uint64_t t_frac = s_frac + q; 3652 if (t_frac <= a_frac) { 3653 s_frac = t_frac + q; 3654 a_frac -= t_frac; 3655 r_frac += q; 3656 } 3657 a_frac <<= 1; 3658 } while (--bit >= last_bit); 3659 3660 /* Undo the right shift done above. If there is any remaining 3661 * fraction, the result is inexact. Set the sticky bit. 3662 */ 3663 a.frac = (r_frac << 2) + (a_frac != 0); 3664 3665 return a; 3666 } 3667 3668 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status) 3669 { 3670 FloatParts64 pa, pr; 3671 3672 float16_unpack_canonical(&pa, a, status); 3673 pr = sqrt_float(pa, status, &float16_params); 3674 return float16_round_pack_canonical(&pr, status); 3675 } 3676 3677 static float32 QEMU_SOFTFLOAT_ATTR 3678 soft_f32_sqrt(float32 a, float_status *status) 3679 { 3680 FloatParts64 pa, pr; 3681 3682 float32_unpack_canonical(&pa, a, status); 3683 pr = sqrt_float(pa, status, &float32_params); 3684 return float32_round_pack_canonical(&pr, status); 3685 } 3686 3687 static float64 QEMU_SOFTFLOAT_ATTR 3688 soft_f64_sqrt(float64 a, float_status *status) 3689 { 3690 FloatParts64 pa, pr; 3691 3692 float64_unpack_canonical(&pa, a, status); 3693 pr = sqrt_float(pa, status, &float64_params); 3694 return float64_round_pack_canonical(&pr, status); 3695 } 3696 3697 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s) 3698 { 3699 union_float32 ua, ur; 3700 3701 ua.s = xa; 3702 if (unlikely(!can_use_fpu(s))) { 3703 goto soft; 3704 } 3705 3706 float32_input_flush1(&ua.s, s); 3707 if (QEMU_HARDFLOAT_1F32_USE_FP) { 3708 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 3709 fpclassify(ua.h) == FP_ZERO) || 3710 signbit(ua.h))) { 3711 goto soft; 3712 } 3713 } else if (unlikely(!float32_is_zero_or_normal(ua.s) || 3714 float32_is_neg(ua.s))) { 3715 goto soft; 3716 } 3717 ur.h = sqrtf(ua.h); 3718 return ur.s; 3719 3720 soft: 3721 return soft_f32_sqrt(ua.s, s); 3722 } 3723 3724 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s) 3725 { 3726 union_float64 ua, ur; 3727 3728 ua.s = xa; 3729 if (unlikely(!can_use_fpu(s))) { 3730 goto soft; 3731 } 3732 3733 float64_input_flush1(&ua.s, s); 3734 if (QEMU_HARDFLOAT_1F64_USE_FP) { 3735 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 3736 fpclassify(ua.h) == FP_ZERO) || 3737 signbit(ua.h))) { 3738 goto soft; 3739 } 3740 } else if (unlikely(!float64_is_zero_or_normal(ua.s) || 3741 float64_is_neg(ua.s))) { 3742 goto soft; 3743 } 3744 ur.h = sqrt(ua.h); 3745 return ur.s; 3746 3747 soft: 3748 return soft_f64_sqrt(ua.s, s); 3749 } 3750 3751 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status) 3752 { 3753 FloatParts64 pa, pr; 3754 3755 bfloat16_unpack_canonical(&pa, a, status); 3756 pr = sqrt_float(pa, status, &bfloat16_params); 3757 return bfloat16_round_pack_canonical(&pr, status); 3758 } 3759 3760 /*---------------------------------------------------------------------------- 3761 | The pattern for a default generated NaN. 3762 *----------------------------------------------------------------------------*/ 3763 3764 float16 float16_default_nan(float_status *status) 3765 { 3766 FloatParts64 p; 3767 3768 parts_default_nan(&p, status); 3769 p.frac >>= float16_params.frac_shift; 3770 return float16_pack_raw(&p); 3771 } 3772 3773 float32 float32_default_nan(float_status *status) 3774 { 3775 FloatParts64 p; 3776 3777 parts_default_nan(&p, status); 3778 p.frac >>= float32_params.frac_shift; 3779 return float32_pack_raw(&p); 3780 } 3781 3782 float64 float64_default_nan(float_status *status) 3783 { 3784 FloatParts64 p; 3785 3786 parts_default_nan(&p, status); 3787 p.frac >>= float64_params.frac_shift; 3788 return float64_pack_raw(&p); 3789 } 3790 3791 float128 float128_default_nan(float_status *status) 3792 { 3793 FloatParts128 p; 3794 3795 parts_default_nan(&p, status); 3796 frac_shr(&p, float128_params.frac_shift); 3797 return float128_pack_raw(&p); 3798 } 3799 3800 bfloat16 bfloat16_default_nan(float_status *status) 3801 { 3802 FloatParts64 p; 3803 3804 parts_default_nan(&p, status); 3805 p.frac >>= bfloat16_params.frac_shift; 3806 return bfloat16_pack_raw(&p); 3807 } 3808 3809 /*---------------------------------------------------------------------------- 3810 | Returns a quiet NaN from a signalling NaN for the floating point value `a'. 3811 *----------------------------------------------------------------------------*/ 3812 3813 float16 float16_silence_nan(float16 a, float_status *status) 3814 { 3815 FloatParts64 p; 3816 3817 float16_unpack_raw(&p, a); 3818 p.frac <<= float16_params.frac_shift; 3819 parts_silence_nan(&p, status); 3820 p.frac >>= float16_params.frac_shift; 3821 return float16_pack_raw(&p); 3822 } 3823 3824 float32 float32_silence_nan(float32 a, float_status *status) 3825 { 3826 FloatParts64 p; 3827 3828 float32_unpack_raw(&p, a); 3829 p.frac <<= float32_params.frac_shift; 3830 parts_silence_nan(&p, status); 3831 p.frac >>= float32_params.frac_shift; 3832 return float32_pack_raw(&p); 3833 } 3834 3835 float64 float64_silence_nan(float64 a, float_status *status) 3836 { 3837 FloatParts64 p; 3838 3839 float64_unpack_raw(&p, a); 3840 p.frac <<= float64_params.frac_shift; 3841 parts_silence_nan(&p, status); 3842 p.frac >>= float64_params.frac_shift; 3843 return float64_pack_raw(&p); 3844 } 3845 3846 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status) 3847 { 3848 FloatParts64 p; 3849 3850 bfloat16_unpack_raw(&p, a); 3851 p.frac <<= bfloat16_params.frac_shift; 3852 parts_silence_nan(&p, status); 3853 p.frac >>= bfloat16_params.frac_shift; 3854 return bfloat16_pack_raw(&p); 3855 } 3856 3857 float128 float128_silence_nan(float128 a, float_status *status) 3858 { 3859 FloatParts128 p; 3860 3861 float128_unpack_raw(&p, a); 3862 frac_shl(&p, float128_params.frac_shift); 3863 parts_silence_nan(&p, status); 3864 frac_shr(&p, float128_params.frac_shift); 3865 return float128_pack_raw(&p); 3866 } 3867 3868 /*---------------------------------------------------------------------------- 3869 | If `a' is denormal and we are in flush-to-zero mode then set the 3870 | input-denormal exception and return zero. Otherwise just return the value. 3871 *----------------------------------------------------------------------------*/ 3872 3873 static bool parts_squash_denormal(FloatParts64 p, float_status *status) 3874 { 3875 if (p.exp == 0 && p.frac != 0) { 3876 float_raise(float_flag_input_denormal, status); 3877 return true; 3878 } 3879 3880 return false; 3881 } 3882 3883 float16 float16_squash_input_denormal(float16 a, float_status *status) 3884 { 3885 if (status->flush_inputs_to_zero) { 3886 FloatParts64 p; 3887 3888 float16_unpack_raw(&p, a); 3889 if (parts_squash_denormal(p, status)) { 3890 return float16_set_sign(float16_zero, p.sign); 3891 } 3892 } 3893 return a; 3894 } 3895 3896 float32 float32_squash_input_denormal(float32 a, float_status *status) 3897 { 3898 if (status->flush_inputs_to_zero) { 3899 FloatParts64 p; 3900 3901 float32_unpack_raw(&p, a); 3902 if (parts_squash_denormal(p, status)) { 3903 return float32_set_sign(float32_zero, p.sign); 3904 } 3905 } 3906 return a; 3907 } 3908 3909 float64 float64_squash_input_denormal(float64 a, float_status *status) 3910 { 3911 if (status->flush_inputs_to_zero) { 3912 FloatParts64 p; 3913 3914 float64_unpack_raw(&p, a); 3915 if (parts_squash_denormal(p, status)) { 3916 return float64_set_sign(float64_zero, p.sign); 3917 } 3918 } 3919 return a; 3920 } 3921 3922 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status) 3923 { 3924 if (status->flush_inputs_to_zero) { 3925 FloatParts64 p; 3926 3927 bfloat16_unpack_raw(&p, a); 3928 if (parts_squash_denormal(p, status)) { 3929 return bfloat16_set_sign(bfloat16_zero, p.sign); 3930 } 3931 } 3932 return a; 3933 } 3934 3935 /*---------------------------------------------------------------------------- 3936 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 3937 | and 7, and returns the properly rounded 32-bit integer corresponding to the 3938 | input. If `zSign' is 1, the input is negated before being converted to an 3939 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 3940 | is simply rounded to an integer, with the inexact exception raised if the 3941 | input cannot be represented exactly as an integer. However, if the fixed- 3942 | point input is too large, the invalid exception is raised and the largest 3943 | positive or negative integer is returned. 3944 *----------------------------------------------------------------------------*/ 3945 3946 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, 3947 float_status *status) 3948 { 3949 int8_t roundingMode; 3950 bool roundNearestEven; 3951 int8_t roundIncrement, roundBits; 3952 int32_t z; 3953 3954 roundingMode = status->float_rounding_mode; 3955 roundNearestEven = ( roundingMode == float_round_nearest_even ); 3956 switch (roundingMode) { 3957 case float_round_nearest_even: 3958 case float_round_ties_away: 3959 roundIncrement = 0x40; 3960 break; 3961 case float_round_to_zero: 3962 roundIncrement = 0; 3963 break; 3964 case float_round_up: 3965 roundIncrement = zSign ? 0 : 0x7f; 3966 break; 3967 case float_round_down: 3968 roundIncrement = zSign ? 0x7f : 0; 3969 break; 3970 case float_round_to_odd: 3971 roundIncrement = absZ & 0x80 ? 0 : 0x7f; 3972 break; 3973 default: 3974 abort(); 3975 } 3976 roundBits = absZ & 0x7F; 3977 absZ = ( absZ + roundIncrement )>>7; 3978 if (!(roundBits ^ 0x40) && roundNearestEven) { 3979 absZ &= ~1; 3980 } 3981 z = absZ; 3982 if ( zSign ) z = - z; 3983 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 3984 float_raise(float_flag_invalid, status); 3985 return zSign ? INT32_MIN : INT32_MAX; 3986 } 3987 if (roundBits) { 3988 float_raise(float_flag_inexact, status); 3989 } 3990 return z; 3991 3992 } 3993 3994 /*---------------------------------------------------------------------------- 3995 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and 3996 | `absZ1', with binary point between bits 63 and 64 (between the input words), 3997 | and returns the properly rounded 64-bit integer corresponding to the input. 3998 | If `zSign' is 1, the input is negated before being converted to an integer. 3999 | Ordinarily, the fixed-point input is simply rounded to an integer, with 4000 | the inexact exception raised if the input cannot be represented exactly as 4001 | an integer. However, if the fixed-point input is too large, the invalid 4002 | exception is raised and the largest positive or negative integer is 4003 | returned. 4004 *----------------------------------------------------------------------------*/ 4005 4006 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, 4007 float_status *status) 4008 { 4009 int8_t roundingMode; 4010 bool roundNearestEven, increment; 4011 int64_t z; 4012 4013 roundingMode = status->float_rounding_mode; 4014 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4015 switch (roundingMode) { 4016 case float_round_nearest_even: 4017 case float_round_ties_away: 4018 increment = ((int64_t) absZ1 < 0); 4019 break; 4020 case float_round_to_zero: 4021 increment = 0; 4022 break; 4023 case float_round_up: 4024 increment = !zSign && absZ1; 4025 break; 4026 case float_round_down: 4027 increment = zSign && absZ1; 4028 break; 4029 case float_round_to_odd: 4030 increment = !(absZ0 & 1) && absZ1; 4031 break; 4032 default: 4033 abort(); 4034 } 4035 if ( increment ) { 4036 ++absZ0; 4037 if ( absZ0 == 0 ) goto overflow; 4038 if (!(absZ1 << 1) && roundNearestEven) { 4039 absZ0 &= ~1; 4040 } 4041 } 4042 z = absZ0; 4043 if ( zSign ) z = - z; 4044 if ( z && ( ( z < 0 ) ^ zSign ) ) { 4045 overflow: 4046 float_raise(float_flag_invalid, status); 4047 return zSign ? INT64_MIN : INT64_MAX; 4048 } 4049 if (absZ1) { 4050 float_raise(float_flag_inexact, status); 4051 } 4052 return z; 4053 4054 } 4055 4056 /*---------------------------------------------------------------------------- 4057 | Normalizes the subnormal single-precision floating-point value represented 4058 | by the denormalized significand `aSig'. The normalized exponent and 4059 | significand are stored at the locations pointed to by `zExpPtr' and 4060 | `zSigPtr', respectively. 4061 *----------------------------------------------------------------------------*/ 4062 4063 static void 4064 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) 4065 { 4066 int8_t shiftCount; 4067 4068 shiftCount = clz32(aSig) - 8; 4069 *zSigPtr = aSig<<shiftCount; 4070 *zExpPtr = 1 - shiftCount; 4071 4072 } 4073 4074 /*---------------------------------------------------------------------------- 4075 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4076 | and significand `zSig', and returns the proper single-precision floating- 4077 | point value corresponding to the abstract input. Ordinarily, the abstract 4078 | value is simply rounded and packed into the single-precision format, with 4079 | the inexact exception raised if the abstract input cannot be represented 4080 | exactly. However, if the abstract value is too large, the overflow and 4081 | inexact exceptions are raised and an infinity or maximal finite value is 4082 | returned. If the abstract value is too small, the input value is rounded to 4083 | a subnormal number, and the underflow and inexact exceptions are raised if 4084 | the abstract input cannot be represented exactly as a subnormal single- 4085 | precision floating-point number. 4086 | The input significand `zSig' has its binary point between bits 30 4087 | and 29, which is 7 bits to the left of the usual location. This shifted 4088 | significand must be normalized or smaller. If `zSig' is not normalized, 4089 | `zExp' must be 0; in that case, the result returned is a subnormal number, 4090 | and it must not require rounding. In the usual case that `zSig' is 4091 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 4092 | The handling of underflow and overflow follows the IEC/IEEE Standard for 4093 | Binary Floating-Point Arithmetic. 4094 *----------------------------------------------------------------------------*/ 4095 4096 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, 4097 float_status *status) 4098 { 4099 int8_t roundingMode; 4100 bool roundNearestEven; 4101 int8_t roundIncrement, roundBits; 4102 bool isTiny; 4103 4104 roundingMode = status->float_rounding_mode; 4105 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4106 switch (roundingMode) { 4107 case float_round_nearest_even: 4108 case float_round_ties_away: 4109 roundIncrement = 0x40; 4110 break; 4111 case float_round_to_zero: 4112 roundIncrement = 0; 4113 break; 4114 case float_round_up: 4115 roundIncrement = zSign ? 0 : 0x7f; 4116 break; 4117 case float_round_down: 4118 roundIncrement = zSign ? 0x7f : 0; 4119 break; 4120 case float_round_to_odd: 4121 roundIncrement = zSig & 0x80 ? 0 : 0x7f; 4122 break; 4123 default: 4124 abort(); 4125 break; 4126 } 4127 roundBits = zSig & 0x7F; 4128 if ( 0xFD <= (uint16_t) zExp ) { 4129 if ( ( 0xFD < zExp ) 4130 || ( ( zExp == 0xFD ) 4131 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) 4132 ) { 4133 bool overflow_to_inf = roundingMode != float_round_to_odd && 4134 roundIncrement != 0; 4135 float_raise(float_flag_overflow | float_flag_inexact, status); 4136 return packFloat32(zSign, 0xFF, -!overflow_to_inf); 4137 } 4138 if ( zExp < 0 ) { 4139 if (status->flush_to_zero) { 4140 float_raise(float_flag_output_denormal, status); 4141 return packFloat32(zSign, 0, 0); 4142 } 4143 isTiny = status->tininess_before_rounding 4144 || (zExp < -1) 4145 || (zSig + roundIncrement < 0x80000000); 4146 shift32RightJamming( zSig, - zExp, &zSig ); 4147 zExp = 0; 4148 roundBits = zSig & 0x7F; 4149 if (isTiny && roundBits) { 4150 float_raise(float_flag_underflow, status); 4151 } 4152 if (roundingMode == float_round_to_odd) { 4153 /* 4154 * For round-to-odd case, the roundIncrement depends on 4155 * zSig which just changed. 4156 */ 4157 roundIncrement = zSig & 0x80 ? 0 : 0x7f; 4158 } 4159 } 4160 } 4161 if (roundBits) { 4162 float_raise(float_flag_inexact, status); 4163 } 4164 zSig = ( zSig + roundIncrement )>>7; 4165 if (!(roundBits ^ 0x40) && roundNearestEven) { 4166 zSig &= ~1; 4167 } 4168 if ( zSig == 0 ) zExp = 0; 4169 return packFloat32( zSign, zExp, zSig ); 4170 4171 } 4172 4173 /*---------------------------------------------------------------------------- 4174 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4175 | and significand `zSig', and returns the proper single-precision floating- 4176 | point value corresponding to the abstract input. This routine is just like 4177 | `roundAndPackFloat32' except that `zSig' does not have to be normalized. 4178 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 4179 | floating-point exponent. 4180 *----------------------------------------------------------------------------*/ 4181 4182 static float32 4183 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, 4184 float_status *status) 4185 { 4186 int8_t shiftCount; 4187 4188 shiftCount = clz32(zSig) - 1; 4189 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, 4190 status); 4191 4192 } 4193 4194 /*---------------------------------------------------------------------------- 4195 | Normalizes the subnormal double-precision floating-point value represented 4196 | by the denormalized significand `aSig'. The normalized exponent and 4197 | significand are stored at the locations pointed to by `zExpPtr' and 4198 | `zSigPtr', respectively. 4199 *----------------------------------------------------------------------------*/ 4200 4201 static void 4202 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr) 4203 { 4204 int8_t shiftCount; 4205 4206 shiftCount = clz64(aSig) - 11; 4207 *zSigPtr = aSig<<shiftCount; 4208 *zExpPtr = 1 - shiftCount; 4209 4210 } 4211 4212 /*---------------------------------------------------------------------------- 4213 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 4214 | double-precision floating-point value, returning the result. After being 4215 | shifted into the proper positions, the three fields are simply added 4216 | together to form the result. This means that any integer portion of `zSig' 4217 | will be added into the exponent. Since a properly normalized significand 4218 | will have an integer portion equal to 1, the `zExp' input should be 1 less 4219 | than the desired result exponent whenever `zSig' is a complete, normalized 4220 | significand. 4221 *----------------------------------------------------------------------------*/ 4222 4223 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig) 4224 { 4225 4226 return make_float64( 4227 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig); 4228 4229 } 4230 4231 /*---------------------------------------------------------------------------- 4232 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4233 | and significand `zSig', and returns the proper double-precision floating- 4234 | point value corresponding to the abstract input. Ordinarily, the abstract 4235 | value is simply rounded and packed into the double-precision format, with 4236 | the inexact exception raised if the abstract input cannot be represented 4237 | exactly. However, if the abstract value is too large, the overflow and 4238 | inexact exceptions are raised and an infinity or maximal finite value is 4239 | returned. If the abstract value is too small, the input value is rounded to 4240 | a subnormal number, and the underflow and inexact exceptions are raised if 4241 | the abstract input cannot be represented exactly as a subnormal double- 4242 | precision floating-point number. 4243 | The input significand `zSig' has its binary point between bits 62 4244 | and 61, which is 10 bits to the left of the usual location. This shifted 4245 | significand must be normalized or smaller. If `zSig' is not normalized, 4246 | `zExp' must be 0; in that case, the result returned is a subnormal number, 4247 | and it must not require rounding. In the usual case that `zSig' is 4248 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 4249 | The handling of underflow and overflow follows the IEC/IEEE Standard for 4250 | Binary Floating-Point Arithmetic. 4251 *----------------------------------------------------------------------------*/ 4252 4253 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, 4254 float_status *status) 4255 { 4256 int8_t roundingMode; 4257 bool roundNearestEven; 4258 int roundIncrement, roundBits; 4259 bool isTiny; 4260 4261 roundingMode = status->float_rounding_mode; 4262 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4263 switch (roundingMode) { 4264 case float_round_nearest_even: 4265 case float_round_ties_away: 4266 roundIncrement = 0x200; 4267 break; 4268 case float_round_to_zero: 4269 roundIncrement = 0; 4270 break; 4271 case float_round_up: 4272 roundIncrement = zSign ? 0 : 0x3ff; 4273 break; 4274 case float_round_down: 4275 roundIncrement = zSign ? 0x3ff : 0; 4276 break; 4277 case float_round_to_odd: 4278 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; 4279 break; 4280 default: 4281 abort(); 4282 } 4283 roundBits = zSig & 0x3FF; 4284 if ( 0x7FD <= (uint16_t) zExp ) { 4285 if ( ( 0x7FD < zExp ) 4286 || ( ( zExp == 0x7FD ) 4287 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) 4288 ) { 4289 bool overflow_to_inf = roundingMode != float_round_to_odd && 4290 roundIncrement != 0; 4291 float_raise(float_flag_overflow | float_flag_inexact, status); 4292 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf)); 4293 } 4294 if ( zExp < 0 ) { 4295 if (status->flush_to_zero) { 4296 float_raise(float_flag_output_denormal, status); 4297 return packFloat64(zSign, 0, 0); 4298 } 4299 isTiny = status->tininess_before_rounding 4300 || (zExp < -1) 4301 || (zSig + roundIncrement < UINT64_C(0x8000000000000000)); 4302 shift64RightJamming( zSig, - zExp, &zSig ); 4303 zExp = 0; 4304 roundBits = zSig & 0x3FF; 4305 if (isTiny && roundBits) { 4306 float_raise(float_flag_underflow, status); 4307 } 4308 if (roundingMode == float_round_to_odd) { 4309 /* 4310 * For round-to-odd case, the roundIncrement depends on 4311 * zSig which just changed. 4312 */ 4313 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; 4314 } 4315 } 4316 } 4317 if (roundBits) { 4318 float_raise(float_flag_inexact, status); 4319 } 4320 zSig = ( zSig + roundIncrement )>>10; 4321 if (!(roundBits ^ 0x200) && roundNearestEven) { 4322 zSig &= ~1; 4323 } 4324 if ( zSig == 0 ) zExp = 0; 4325 return packFloat64( zSign, zExp, zSig ); 4326 4327 } 4328 4329 /*---------------------------------------------------------------------------- 4330 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4331 | and significand `zSig', and returns the proper double-precision floating- 4332 | point value corresponding to the abstract input. This routine is just like 4333 | `roundAndPackFloat64' except that `zSig' does not have to be normalized. 4334 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 4335 | floating-point exponent. 4336 *----------------------------------------------------------------------------*/ 4337 4338 static float64 4339 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, 4340 float_status *status) 4341 { 4342 int8_t shiftCount; 4343 4344 shiftCount = clz64(zSig) - 1; 4345 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, 4346 status); 4347 4348 } 4349 4350 /*---------------------------------------------------------------------------- 4351 | Normalizes the subnormal extended double-precision floating-point value 4352 | represented by the denormalized significand `aSig'. The normalized exponent 4353 | and significand are stored at the locations pointed to by `zExpPtr' and 4354 | `zSigPtr', respectively. 4355 *----------------------------------------------------------------------------*/ 4356 4357 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, 4358 uint64_t *zSigPtr) 4359 { 4360 int8_t shiftCount; 4361 4362 shiftCount = clz64(aSig); 4363 *zSigPtr = aSig<<shiftCount; 4364 *zExpPtr = 1 - shiftCount; 4365 } 4366 4367 /*---------------------------------------------------------------------------- 4368 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4369 | and extended significand formed by the concatenation of `zSig0' and `zSig1', 4370 | and returns the proper extended double-precision floating-point value 4371 | corresponding to the abstract input. Ordinarily, the abstract value is 4372 | rounded and packed into the extended double-precision format, with the 4373 | inexact exception raised if the abstract input cannot be represented 4374 | exactly. However, if the abstract value is too large, the overflow and 4375 | inexact exceptions are raised and an infinity or maximal finite value is 4376 | returned. If the abstract value is too small, the input value is rounded to 4377 | a subnormal number, and the underflow and inexact exceptions are raised if 4378 | the abstract input cannot be represented exactly as a subnormal extended 4379 | double-precision floating-point number. 4380 | If `roundingPrecision' is 32 or 64, the result is rounded to the same 4381 | number of bits as single or double precision, respectively. Otherwise, the 4382 | result is rounded to the full precision of the extended double-precision 4383 | format. 4384 | The input significand must be normalized or smaller. If the input 4385 | significand is not normalized, `zExp' must be 0; in that case, the result 4386 | returned is a subnormal number, and it must not require rounding. The 4387 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary 4388 | Floating-Point Arithmetic. 4389 *----------------------------------------------------------------------------*/ 4390 4391 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, 4392 int32_t zExp, uint64_t zSig0, uint64_t zSig1, 4393 float_status *status) 4394 { 4395 int8_t roundingMode; 4396 bool roundNearestEven, increment, isTiny; 4397 int64_t roundIncrement, roundMask, roundBits; 4398 4399 roundingMode = status->float_rounding_mode; 4400 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4401 if ( roundingPrecision == 80 ) goto precision80; 4402 if ( roundingPrecision == 64 ) { 4403 roundIncrement = UINT64_C(0x0000000000000400); 4404 roundMask = UINT64_C(0x00000000000007FF); 4405 } 4406 else if ( roundingPrecision == 32 ) { 4407 roundIncrement = UINT64_C(0x0000008000000000); 4408 roundMask = UINT64_C(0x000000FFFFFFFFFF); 4409 } 4410 else { 4411 goto precision80; 4412 } 4413 zSig0 |= ( zSig1 != 0 ); 4414 switch (roundingMode) { 4415 case float_round_nearest_even: 4416 case float_round_ties_away: 4417 break; 4418 case float_round_to_zero: 4419 roundIncrement = 0; 4420 break; 4421 case float_round_up: 4422 roundIncrement = zSign ? 0 : roundMask; 4423 break; 4424 case float_round_down: 4425 roundIncrement = zSign ? roundMask : 0; 4426 break; 4427 default: 4428 abort(); 4429 } 4430 roundBits = zSig0 & roundMask; 4431 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 4432 if ( ( 0x7FFE < zExp ) 4433 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 4434 ) { 4435 goto overflow; 4436 } 4437 if ( zExp <= 0 ) { 4438 if (status->flush_to_zero) { 4439 float_raise(float_flag_output_denormal, status); 4440 return packFloatx80(zSign, 0, 0); 4441 } 4442 isTiny = status->tininess_before_rounding 4443 || (zExp < 0 ) 4444 || (zSig0 <= zSig0 + roundIncrement); 4445 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 4446 zExp = 0; 4447 roundBits = zSig0 & roundMask; 4448 if (isTiny && roundBits) { 4449 float_raise(float_flag_underflow, status); 4450 } 4451 if (roundBits) { 4452 float_raise(float_flag_inexact, status); 4453 } 4454 zSig0 += roundIncrement; 4455 if ( (int64_t) zSig0 < 0 ) zExp = 1; 4456 roundIncrement = roundMask + 1; 4457 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 4458 roundMask |= roundIncrement; 4459 } 4460 zSig0 &= ~ roundMask; 4461 return packFloatx80( zSign, zExp, zSig0 ); 4462 } 4463 } 4464 if (roundBits) { 4465 float_raise(float_flag_inexact, status); 4466 } 4467 zSig0 += roundIncrement; 4468 if ( zSig0 < roundIncrement ) { 4469 ++zExp; 4470 zSig0 = UINT64_C(0x8000000000000000); 4471 } 4472 roundIncrement = roundMask + 1; 4473 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 4474 roundMask |= roundIncrement; 4475 } 4476 zSig0 &= ~ roundMask; 4477 if ( zSig0 == 0 ) zExp = 0; 4478 return packFloatx80( zSign, zExp, zSig0 ); 4479 precision80: 4480 switch (roundingMode) { 4481 case float_round_nearest_even: 4482 case float_round_ties_away: 4483 increment = ((int64_t)zSig1 < 0); 4484 break; 4485 case float_round_to_zero: 4486 increment = 0; 4487 break; 4488 case float_round_up: 4489 increment = !zSign && zSig1; 4490 break; 4491 case float_round_down: 4492 increment = zSign && zSig1; 4493 break; 4494 default: 4495 abort(); 4496 } 4497 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 4498 if ( ( 0x7FFE < zExp ) 4499 || ( ( zExp == 0x7FFE ) 4500 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) ) 4501 && increment 4502 ) 4503 ) { 4504 roundMask = 0; 4505 overflow: 4506 float_raise(float_flag_overflow | float_flag_inexact, status); 4507 if ( ( roundingMode == float_round_to_zero ) 4508 || ( zSign && ( roundingMode == float_round_up ) ) 4509 || ( ! zSign && ( roundingMode == float_round_down ) ) 4510 ) { 4511 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 4512 } 4513 return packFloatx80(zSign, 4514 floatx80_infinity_high, 4515 floatx80_infinity_low); 4516 } 4517 if ( zExp <= 0 ) { 4518 isTiny = status->tininess_before_rounding 4519 || (zExp < 0) 4520 || !increment 4521 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF)); 4522 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 4523 zExp = 0; 4524 if (isTiny && zSig1) { 4525 float_raise(float_flag_underflow, status); 4526 } 4527 if (zSig1) { 4528 float_raise(float_flag_inexact, status); 4529 } 4530 switch (roundingMode) { 4531 case float_round_nearest_even: 4532 case float_round_ties_away: 4533 increment = ((int64_t)zSig1 < 0); 4534 break; 4535 case float_round_to_zero: 4536 increment = 0; 4537 break; 4538 case float_round_up: 4539 increment = !zSign && zSig1; 4540 break; 4541 case float_round_down: 4542 increment = zSign && zSig1; 4543 break; 4544 default: 4545 abort(); 4546 } 4547 if ( increment ) { 4548 ++zSig0; 4549 if (!(zSig1 << 1) && roundNearestEven) { 4550 zSig0 &= ~1; 4551 } 4552 if ( (int64_t) zSig0 < 0 ) zExp = 1; 4553 } 4554 return packFloatx80( zSign, zExp, zSig0 ); 4555 } 4556 } 4557 if (zSig1) { 4558 float_raise(float_flag_inexact, status); 4559 } 4560 if ( increment ) { 4561 ++zSig0; 4562 if ( zSig0 == 0 ) { 4563 ++zExp; 4564 zSig0 = UINT64_C(0x8000000000000000); 4565 } 4566 else { 4567 if (!(zSig1 << 1) && roundNearestEven) { 4568 zSig0 &= ~1; 4569 } 4570 } 4571 } 4572 else { 4573 if ( zSig0 == 0 ) zExp = 0; 4574 } 4575 return packFloatx80( zSign, zExp, zSig0 ); 4576 4577 } 4578 4579 /*---------------------------------------------------------------------------- 4580 | Takes an abstract floating-point value having sign `zSign', exponent 4581 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 4582 | and returns the proper extended double-precision floating-point value 4583 | corresponding to the abstract input. This routine is just like 4584 | `roundAndPackFloatx80' except that the input significand does not have to be 4585 | normalized. 4586 *----------------------------------------------------------------------------*/ 4587 4588 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, 4589 bool zSign, int32_t zExp, 4590 uint64_t zSig0, uint64_t zSig1, 4591 float_status *status) 4592 { 4593 int8_t shiftCount; 4594 4595 if ( zSig0 == 0 ) { 4596 zSig0 = zSig1; 4597 zSig1 = 0; 4598 zExp -= 64; 4599 } 4600 shiftCount = clz64(zSig0); 4601 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 4602 zExp -= shiftCount; 4603 return roundAndPackFloatx80(roundingPrecision, zSign, zExp, 4604 zSig0, zSig1, status); 4605 4606 } 4607 4608 /*---------------------------------------------------------------------------- 4609 | Returns the least-significant 64 fraction bits of the quadruple-precision 4610 | floating-point value `a'. 4611 *----------------------------------------------------------------------------*/ 4612 4613 static inline uint64_t extractFloat128Frac1( float128 a ) 4614 { 4615 4616 return a.low; 4617 4618 } 4619 4620 /*---------------------------------------------------------------------------- 4621 | Returns the most-significant 48 fraction bits of the quadruple-precision 4622 | floating-point value `a'. 4623 *----------------------------------------------------------------------------*/ 4624 4625 static inline uint64_t extractFloat128Frac0( float128 a ) 4626 { 4627 4628 return a.high & UINT64_C(0x0000FFFFFFFFFFFF); 4629 4630 } 4631 4632 /*---------------------------------------------------------------------------- 4633 | Returns the exponent bits of the quadruple-precision floating-point value 4634 | `a'. 4635 *----------------------------------------------------------------------------*/ 4636 4637 static inline int32_t extractFloat128Exp( float128 a ) 4638 { 4639 4640 return ( a.high>>48 ) & 0x7FFF; 4641 4642 } 4643 4644 /*---------------------------------------------------------------------------- 4645 | Returns the sign bit of the quadruple-precision floating-point value `a'. 4646 *----------------------------------------------------------------------------*/ 4647 4648 static inline bool extractFloat128Sign(float128 a) 4649 { 4650 return a.high >> 63; 4651 } 4652 4653 /*---------------------------------------------------------------------------- 4654 | Normalizes the subnormal quadruple-precision floating-point value 4655 | represented by the denormalized significand formed by the concatenation of 4656 | `aSig0' and `aSig1'. The normalized exponent is stored at the location 4657 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized 4658 | significand are stored at the location pointed to by `zSig0Ptr', and the 4659 | least significant 64 bits of the normalized significand are stored at the 4660 | location pointed to by `zSig1Ptr'. 4661 *----------------------------------------------------------------------------*/ 4662 4663 static void 4664 normalizeFloat128Subnormal( 4665 uint64_t aSig0, 4666 uint64_t aSig1, 4667 int32_t *zExpPtr, 4668 uint64_t *zSig0Ptr, 4669 uint64_t *zSig1Ptr 4670 ) 4671 { 4672 int8_t shiftCount; 4673 4674 if ( aSig0 == 0 ) { 4675 shiftCount = clz64(aSig1) - 15; 4676 if ( shiftCount < 0 ) { 4677 *zSig0Ptr = aSig1>>( - shiftCount ); 4678 *zSig1Ptr = aSig1<<( shiftCount & 63 ); 4679 } 4680 else { 4681 *zSig0Ptr = aSig1<<shiftCount; 4682 *zSig1Ptr = 0; 4683 } 4684 *zExpPtr = - shiftCount - 63; 4685 } 4686 else { 4687 shiftCount = clz64(aSig0) - 15; 4688 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 4689 *zExpPtr = 1 - shiftCount; 4690 } 4691 4692 } 4693 4694 /*---------------------------------------------------------------------------- 4695 | Packs the sign `zSign', the exponent `zExp', and the significand formed 4696 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision 4697 | floating-point value, returning the result. After being shifted into the 4698 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply 4699 | added together to form the most significant 32 bits of the result. This 4700 | means that any integer portion of `zSig0' will be added into the exponent. 4701 | Since a properly normalized significand will have an integer portion equal 4702 | to 1, the `zExp' input should be 1 less than the desired result exponent 4703 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized 4704 | significand. 4705 *----------------------------------------------------------------------------*/ 4706 4707 static inline float128 4708 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1) 4709 { 4710 float128 z; 4711 4712 z.low = zSig1; 4713 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0; 4714 return z; 4715 } 4716 4717 /*---------------------------------------------------------------------------- 4718 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4719 | and extended significand formed by the concatenation of `zSig0', `zSig1', 4720 | and `zSig2', and returns the proper quadruple-precision floating-point value 4721 | corresponding to the abstract input. Ordinarily, the abstract value is 4722 | simply rounded and packed into the quadruple-precision format, with the 4723 | inexact exception raised if the abstract input cannot be represented 4724 | exactly. However, if the abstract value is too large, the overflow and 4725 | inexact exceptions are raised and an infinity or maximal finite value is 4726 | returned. If the abstract value is too small, the input value is rounded to 4727 | a subnormal number, and the underflow and inexact exceptions are raised if 4728 | the abstract input cannot be represented exactly as a subnormal quadruple- 4729 | precision floating-point number. 4730 | The input significand must be normalized or smaller. If the input 4731 | significand is not normalized, `zExp' must be 0; in that case, the result 4732 | returned is a subnormal number, and it must not require rounding. In the 4733 | usual case that the input significand is normalized, `zExp' must be 1 less 4734 | than the ``true'' floating-point exponent. The handling of underflow and 4735 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4736 *----------------------------------------------------------------------------*/ 4737 4738 static float128 roundAndPackFloat128(bool zSign, int32_t zExp, 4739 uint64_t zSig0, uint64_t zSig1, 4740 uint64_t zSig2, float_status *status) 4741 { 4742 int8_t roundingMode; 4743 bool roundNearestEven, increment, isTiny; 4744 4745 roundingMode = status->float_rounding_mode; 4746 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4747 switch (roundingMode) { 4748 case float_round_nearest_even: 4749 case float_round_ties_away: 4750 increment = ((int64_t)zSig2 < 0); 4751 break; 4752 case float_round_to_zero: 4753 increment = 0; 4754 break; 4755 case float_round_up: 4756 increment = !zSign && zSig2; 4757 break; 4758 case float_round_down: 4759 increment = zSign && zSig2; 4760 break; 4761 case float_round_to_odd: 4762 increment = !(zSig1 & 0x1) && zSig2; 4763 break; 4764 default: 4765 abort(); 4766 } 4767 if ( 0x7FFD <= (uint32_t) zExp ) { 4768 if ( ( 0x7FFD < zExp ) 4769 || ( ( zExp == 0x7FFD ) 4770 && eq128( 4771 UINT64_C(0x0001FFFFFFFFFFFF), 4772 UINT64_C(0xFFFFFFFFFFFFFFFF), 4773 zSig0, 4774 zSig1 4775 ) 4776 && increment 4777 ) 4778 ) { 4779 float_raise(float_flag_overflow | float_flag_inexact, status); 4780 if ( ( roundingMode == float_round_to_zero ) 4781 || ( zSign && ( roundingMode == float_round_up ) ) 4782 || ( ! zSign && ( roundingMode == float_round_down ) ) 4783 || (roundingMode == float_round_to_odd) 4784 ) { 4785 return 4786 packFloat128( 4787 zSign, 4788 0x7FFE, 4789 UINT64_C(0x0000FFFFFFFFFFFF), 4790 UINT64_C(0xFFFFFFFFFFFFFFFF) 4791 ); 4792 } 4793 return packFloat128( zSign, 0x7FFF, 0, 0 ); 4794 } 4795 if ( zExp < 0 ) { 4796 if (status->flush_to_zero) { 4797 float_raise(float_flag_output_denormal, status); 4798 return packFloat128(zSign, 0, 0, 0); 4799 } 4800 isTiny = status->tininess_before_rounding 4801 || (zExp < -1) 4802 || !increment 4803 || lt128(zSig0, zSig1, 4804 UINT64_C(0x0001FFFFFFFFFFFF), 4805 UINT64_C(0xFFFFFFFFFFFFFFFF)); 4806 shift128ExtraRightJamming( 4807 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 4808 zExp = 0; 4809 if (isTiny && zSig2) { 4810 float_raise(float_flag_underflow, status); 4811 } 4812 switch (roundingMode) { 4813 case float_round_nearest_even: 4814 case float_round_ties_away: 4815 increment = ((int64_t)zSig2 < 0); 4816 break; 4817 case float_round_to_zero: 4818 increment = 0; 4819 break; 4820 case float_round_up: 4821 increment = !zSign && zSig2; 4822 break; 4823 case float_round_down: 4824 increment = zSign && zSig2; 4825 break; 4826 case float_round_to_odd: 4827 increment = !(zSig1 & 0x1) && zSig2; 4828 break; 4829 default: 4830 abort(); 4831 } 4832 } 4833 } 4834 if (zSig2) { 4835 float_raise(float_flag_inexact, status); 4836 } 4837 if ( increment ) { 4838 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 4839 if ((zSig2 + zSig2 == 0) && roundNearestEven) { 4840 zSig1 &= ~1; 4841 } 4842 } 4843 else { 4844 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 4845 } 4846 return packFloat128( zSign, zExp, zSig0, zSig1 ); 4847 4848 } 4849 4850 /*---------------------------------------------------------------------------- 4851 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4852 | and significand formed by the concatenation of `zSig0' and `zSig1', and 4853 | returns the proper quadruple-precision floating-point value corresponding 4854 | to the abstract input. This routine is just like `roundAndPackFloat128' 4855 | except that the input significand has fewer bits and does not have to be 4856 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 4857 | point exponent. 4858 *----------------------------------------------------------------------------*/ 4859 4860 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp, 4861 uint64_t zSig0, uint64_t zSig1, 4862 float_status *status) 4863 { 4864 int8_t shiftCount; 4865 uint64_t zSig2; 4866 4867 if ( zSig0 == 0 ) { 4868 zSig0 = zSig1; 4869 zSig1 = 0; 4870 zExp -= 64; 4871 } 4872 shiftCount = clz64(zSig0) - 15; 4873 if ( 0 <= shiftCount ) { 4874 zSig2 = 0; 4875 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 4876 } 4877 else { 4878 shift128ExtraRightJamming( 4879 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 4880 } 4881 zExp -= shiftCount; 4882 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); 4883 4884 } 4885 4886 4887 /*---------------------------------------------------------------------------- 4888 | Returns the result of converting the 32-bit two's complement integer `a' 4889 | to the extended double-precision floating-point format. The conversion 4890 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4891 | Arithmetic. 4892 *----------------------------------------------------------------------------*/ 4893 4894 floatx80 int32_to_floatx80(int32_t a, float_status *status) 4895 { 4896 bool zSign; 4897 uint32_t absA; 4898 int8_t shiftCount; 4899 uint64_t zSig; 4900 4901 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 4902 zSign = ( a < 0 ); 4903 absA = zSign ? - a : a; 4904 shiftCount = clz32(absA) + 32; 4905 zSig = absA; 4906 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 4907 4908 } 4909 4910 /*---------------------------------------------------------------------------- 4911 | Returns the result of converting the 64-bit two's complement integer `a' 4912 | to the extended double-precision floating-point format. The conversion 4913 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4914 | Arithmetic. 4915 *----------------------------------------------------------------------------*/ 4916 4917 floatx80 int64_to_floatx80(int64_t a, float_status *status) 4918 { 4919 bool zSign; 4920 uint64_t absA; 4921 int8_t shiftCount; 4922 4923 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 4924 zSign = ( a < 0 ); 4925 absA = zSign ? - a : a; 4926 shiftCount = clz64(absA); 4927 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 4928 4929 } 4930 4931 /*---------------------------------------------------------------------------- 4932 | Returns the result of converting the single-precision floating-point value 4933 | `a' to the extended double-precision floating-point format. The conversion 4934 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4935 | Arithmetic. 4936 *----------------------------------------------------------------------------*/ 4937 4938 floatx80 float32_to_floatx80(float32 a, float_status *status) 4939 { 4940 bool aSign; 4941 int aExp; 4942 uint32_t aSig; 4943 4944 a = float32_squash_input_denormal(a, status); 4945 aSig = extractFloat32Frac( a ); 4946 aExp = extractFloat32Exp( a ); 4947 aSign = extractFloat32Sign( a ); 4948 if ( aExp == 0xFF ) { 4949 if (aSig) { 4950 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status), 4951 status); 4952 return floatx80_silence_nan(res, status); 4953 } 4954 return packFloatx80(aSign, 4955 floatx80_infinity_high, 4956 floatx80_infinity_low); 4957 } 4958 if ( aExp == 0 ) { 4959 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 4960 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 4961 } 4962 aSig |= 0x00800000; 4963 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 ); 4964 4965 } 4966 4967 /*---------------------------------------------------------------------------- 4968 | Returns the remainder of the single-precision floating-point value `a' 4969 | with respect to the corresponding value `b'. The operation is performed 4970 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4971 *----------------------------------------------------------------------------*/ 4972 4973 float32 float32_rem(float32 a, float32 b, float_status *status) 4974 { 4975 bool aSign, zSign; 4976 int aExp, bExp, expDiff; 4977 uint32_t aSig, bSig; 4978 uint32_t q; 4979 uint64_t aSig64, bSig64, q64; 4980 uint32_t alternateASig; 4981 int32_t sigMean; 4982 a = float32_squash_input_denormal(a, status); 4983 b = float32_squash_input_denormal(b, status); 4984 4985 aSig = extractFloat32Frac( a ); 4986 aExp = extractFloat32Exp( a ); 4987 aSign = extractFloat32Sign( a ); 4988 bSig = extractFloat32Frac( b ); 4989 bExp = extractFloat32Exp( b ); 4990 if ( aExp == 0xFF ) { 4991 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 4992 return propagateFloat32NaN(a, b, status); 4993 } 4994 float_raise(float_flag_invalid, status); 4995 return float32_default_nan(status); 4996 } 4997 if ( bExp == 0xFF ) { 4998 if (bSig) { 4999 return propagateFloat32NaN(a, b, status); 5000 } 5001 return a; 5002 } 5003 if ( bExp == 0 ) { 5004 if ( bSig == 0 ) { 5005 float_raise(float_flag_invalid, status); 5006 return float32_default_nan(status); 5007 } 5008 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 5009 } 5010 if ( aExp == 0 ) { 5011 if ( aSig == 0 ) return a; 5012 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 5013 } 5014 expDiff = aExp - bExp; 5015 aSig |= 0x00800000; 5016 bSig |= 0x00800000; 5017 if ( expDiff < 32 ) { 5018 aSig <<= 8; 5019 bSig <<= 8; 5020 if ( expDiff < 0 ) { 5021 if ( expDiff < -1 ) return a; 5022 aSig >>= 1; 5023 } 5024 q = ( bSig <= aSig ); 5025 if ( q ) aSig -= bSig; 5026 if ( 0 < expDiff ) { 5027 q = ( ( (uint64_t) aSig )<<32 ) / bSig; 5028 q >>= 32 - expDiff; 5029 bSig >>= 2; 5030 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 5031 } 5032 else { 5033 aSig >>= 2; 5034 bSig >>= 2; 5035 } 5036 } 5037 else { 5038 if ( bSig <= aSig ) aSig -= bSig; 5039 aSig64 = ( (uint64_t) aSig )<<40; 5040 bSig64 = ( (uint64_t) bSig )<<40; 5041 expDiff -= 64; 5042 while ( 0 < expDiff ) { 5043 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 5044 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 5045 aSig64 = - ( ( bSig * q64 )<<38 ); 5046 expDiff -= 62; 5047 } 5048 expDiff += 64; 5049 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 5050 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 5051 q = q64>>( 64 - expDiff ); 5052 bSig <<= 6; 5053 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 5054 } 5055 do { 5056 alternateASig = aSig; 5057 ++q; 5058 aSig -= bSig; 5059 } while ( 0 <= (int32_t) aSig ); 5060 sigMean = aSig + alternateASig; 5061 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 5062 aSig = alternateASig; 5063 } 5064 zSign = ( (int32_t) aSig < 0 ); 5065 if ( zSign ) aSig = - aSig; 5066 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status); 5067 } 5068 5069 5070 5071 /*---------------------------------------------------------------------------- 5072 | Returns the binary exponential of the single-precision floating-point value 5073 | `a'. The operation is performed according to the IEC/IEEE Standard for 5074 | Binary Floating-Point Arithmetic. 5075 | 5076 | Uses the following identities: 5077 | 5078 | 1. ------------------------------------------------------------------------- 5079 | x x*ln(2) 5080 | 2 = e 5081 | 5082 | 2. ------------------------------------------------------------------------- 5083 | 2 3 4 5 n 5084 | x x x x x x x 5085 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ... 5086 | 1! 2! 3! 4! 5! n! 5087 *----------------------------------------------------------------------------*/ 5088 5089 static const float64 float32_exp2_coefficients[15] = 5090 { 5091 const_float64( 0x3ff0000000000000ll ), /* 1 */ 5092 const_float64( 0x3fe0000000000000ll ), /* 2 */ 5093 const_float64( 0x3fc5555555555555ll ), /* 3 */ 5094 const_float64( 0x3fa5555555555555ll ), /* 4 */ 5095 const_float64( 0x3f81111111111111ll ), /* 5 */ 5096 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ 5097 const_float64( 0x3f2a01a01a01a01all ), /* 7 */ 5098 const_float64( 0x3efa01a01a01a01all ), /* 8 */ 5099 const_float64( 0x3ec71de3a556c734ll ), /* 9 */ 5100 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ 5101 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ 5102 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ 5103 const_float64( 0x3de6124613a86d09ll ), /* 13 */ 5104 const_float64( 0x3da93974a8c07c9dll ), /* 14 */ 5105 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ 5106 }; 5107 5108 float32 float32_exp2(float32 a, float_status *status) 5109 { 5110 bool aSign; 5111 int aExp; 5112 uint32_t aSig; 5113 float64 r, x, xn; 5114 int i; 5115 a = float32_squash_input_denormal(a, status); 5116 5117 aSig = extractFloat32Frac( a ); 5118 aExp = extractFloat32Exp( a ); 5119 aSign = extractFloat32Sign( a ); 5120 5121 if ( aExp == 0xFF) { 5122 if (aSig) { 5123 return propagateFloat32NaN(a, float32_zero, status); 5124 } 5125 return (aSign) ? float32_zero : a; 5126 } 5127 if (aExp == 0) { 5128 if (aSig == 0) return float32_one; 5129 } 5130 5131 float_raise(float_flag_inexact, status); 5132 5133 /* ******************************* */ 5134 /* using float64 for approximation */ 5135 /* ******************************* */ 5136 x = float32_to_float64(a, status); 5137 x = float64_mul(x, float64_ln2, status); 5138 5139 xn = x; 5140 r = float64_one; 5141 for (i = 0 ; i < 15 ; i++) { 5142 float64 f; 5143 5144 f = float64_mul(xn, float32_exp2_coefficients[i], status); 5145 r = float64_add(r, f, status); 5146 5147 xn = float64_mul(xn, x, status); 5148 } 5149 5150 return float64_to_float32(r, status); 5151 } 5152 5153 /*---------------------------------------------------------------------------- 5154 | Returns the binary log of the single-precision floating-point value `a'. 5155 | The operation is performed according to the IEC/IEEE Standard for Binary 5156 | Floating-Point Arithmetic. 5157 *----------------------------------------------------------------------------*/ 5158 float32 float32_log2(float32 a, float_status *status) 5159 { 5160 bool aSign, zSign; 5161 int aExp; 5162 uint32_t aSig, zSig, i; 5163 5164 a = float32_squash_input_denormal(a, status); 5165 aSig = extractFloat32Frac( a ); 5166 aExp = extractFloat32Exp( a ); 5167 aSign = extractFloat32Sign( a ); 5168 5169 if ( aExp == 0 ) { 5170 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); 5171 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 5172 } 5173 if ( aSign ) { 5174 float_raise(float_flag_invalid, status); 5175 return float32_default_nan(status); 5176 } 5177 if ( aExp == 0xFF ) { 5178 if (aSig) { 5179 return propagateFloat32NaN(a, float32_zero, status); 5180 } 5181 return a; 5182 } 5183 5184 aExp -= 0x7F; 5185 aSig |= 0x00800000; 5186 zSign = aExp < 0; 5187 zSig = aExp << 23; 5188 5189 for (i = 1 << 22; i > 0; i >>= 1) { 5190 aSig = ( (uint64_t)aSig * aSig ) >> 23; 5191 if ( aSig & 0x01000000 ) { 5192 aSig >>= 1; 5193 zSig |= i; 5194 } 5195 } 5196 5197 if ( zSign ) 5198 zSig = -zSig; 5199 5200 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); 5201 } 5202 5203 /*---------------------------------------------------------------------------- 5204 | Returns the result of converting the double-precision floating-point value 5205 | `a' to the extended double-precision floating-point format. The conversion 5206 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 5207 | Arithmetic. 5208 *----------------------------------------------------------------------------*/ 5209 5210 floatx80 float64_to_floatx80(float64 a, float_status *status) 5211 { 5212 bool aSign; 5213 int aExp; 5214 uint64_t aSig; 5215 5216 a = float64_squash_input_denormal(a, status); 5217 aSig = extractFloat64Frac( a ); 5218 aExp = extractFloat64Exp( a ); 5219 aSign = extractFloat64Sign( a ); 5220 if ( aExp == 0x7FF ) { 5221 if (aSig) { 5222 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), 5223 status); 5224 return floatx80_silence_nan(res, status); 5225 } 5226 return packFloatx80(aSign, 5227 floatx80_infinity_high, 5228 floatx80_infinity_low); 5229 } 5230 if ( aExp == 0 ) { 5231 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 5232 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5233 } 5234 return 5235 packFloatx80( 5236 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11); 5237 5238 } 5239 5240 /*---------------------------------------------------------------------------- 5241 | Returns the remainder of the double-precision floating-point value `a' 5242 | with respect to the corresponding value `b'. The operation is performed 5243 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5244 *----------------------------------------------------------------------------*/ 5245 5246 float64 float64_rem(float64 a, float64 b, float_status *status) 5247 { 5248 bool aSign, zSign; 5249 int aExp, bExp, expDiff; 5250 uint64_t aSig, bSig; 5251 uint64_t q, alternateASig; 5252 int64_t sigMean; 5253 5254 a = float64_squash_input_denormal(a, status); 5255 b = float64_squash_input_denormal(b, status); 5256 aSig = extractFloat64Frac( a ); 5257 aExp = extractFloat64Exp( a ); 5258 aSign = extractFloat64Sign( a ); 5259 bSig = extractFloat64Frac( b ); 5260 bExp = extractFloat64Exp( b ); 5261 if ( aExp == 0x7FF ) { 5262 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 5263 return propagateFloat64NaN(a, b, status); 5264 } 5265 float_raise(float_flag_invalid, status); 5266 return float64_default_nan(status); 5267 } 5268 if ( bExp == 0x7FF ) { 5269 if (bSig) { 5270 return propagateFloat64NaN(a, b, status); 5271 } 5272 return a; 5273 } 5274 if ( bExp == 0 ) { 5275 if ( bSig == 0 ) { 5276 float_raise(float_flag_invalid, status); 5277 return float64_default_nan(status); 5278 } 5279 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 5280 } 5281 if ( aExp == 0 ) { 5282 if ( aSig == 0 ) return a; 5283 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5284 } 5285 expDiff = aExp - bExp; 5286 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11; 5287 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11; 5288 if ( expDiff < 0 ) { 5289 if ( expDiff < -1 ) return a; 5290 aSig >>= 1; 5291 } 5292 q = ( bSig <= aSig ); 5293 if ( q ) aSig -= bSig; 5294 expDiff -= 64; 5295 while ( 0 < expDiff ) { 5296 q = estimateDiv128To64( aSig, 0, bSig ); 5297 q = ( 2 < q ) ? q - 2 : 0; 5298 aSig = - ( ( bSig>>2 ) * q ); 5299 expDiff -= 62; 5300 } 5301 expDiff += 64; 5302 if ( 0 < expDiff ) { 5303 q = estimateDiv128To64( aSig, 0, bSig ); 5304 q = ( 2 < q ) ? q - 2 : 0; 5305 q >>= 64 - expDiff; 5306 bSig >>= 2; 5307 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 5308 } 5309 else { 5310 aSig >>= 2; 5311 bSig >>= 2; 5312 } 5313 do { 5314 alternateASig = aSig; 5315 ++q; 5316 aSig -= bSig; 5317 } while ( 0 <= (int64_t) aSig ); 5318 sigMean = aSig + alternateASig; 5319 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 5320 aSig = alternateASig; 5321 } 5322 zSign = ( (int64_t) aSig < 0 ); 5323 if ( zSign ) aSig = - aSig; 5324 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status); 5325 5326 } 5327 5328 /*---------------------------------------------------------------------------- 5329 | Returns the binary log of the double-precision floating-point value `a'. 5330 | The operation is performed according to the IEC/IEEE Standard for Binary 5331 | Floating-Point Arithmetic. 5332 *----------------------------------------------------------------------------*/ 5333 float64 float64_log2(float64 a, float_status *status) 5334 { 5335 bool aSign, zSign; 5336 int aExp; 5337 uint64_t aSig, aSig0, aSig1, zSig, i; 5338 a = float64_squash_input_denormal(a, status); 5339 5340 aSig = extractFloat64Frac( a ); 5341 aExp = extractFloat64Exp( a ); 5342 aSign = extractFloat64Sign( a ); 5343 5344 if ( aExp == 0 ) { 5345 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); 5346 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5347 } 5348 if ( aSign ) { 5349 float_raise(float_flag_invalid, status); 5350 return float64_default_nan(status); 5351 } 5352 if ( aExp == 0x7FF ) { 5353 if (aSig) { 5354 return propagateFloat64NaN(a, float64_zero, status); 5355 } 5356 return a; 5357 } 5358 5359 aExp -= 0x3FF; 5360 aSig |= UINT64_C(0x0010000000000000); 5361 zSign = aExp < 0; 5362 zSig = (uint64_t)aExp << 52; 5363 for (i = 1LL << 51; i > 0; i >>= 1) { 5364 mul64To128( aSig, aSig, &aSig0, &aSig1 ); 5365 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); 5366 if ( aSig & UINT64_C(0x0020000000000000) ) { 5367 aSig >>= 1; 5368 zSig |= i; 5369 } 5370 } 5371 5372 if ( zSign ) 5373 zSig = -zSig; 5374 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status); 5375 } 5376 5377 /*---------------------------------------------------------------------------- 5378 | Returns the result of converting the extended double-precision floating- 5379 | point value `a' to the 32-bit two's complement integer format. The 5380 | conversion is performed according to the IEC/IEEE Standard for Binary 5381 | Floating-Point Arithmetic---which means in particular that the conversion 5382 | is rounded according to the current rounding mode. If `a' is a NaN, the 5383 | largest positive integer is returned. Otherwise, if the conversion 5384 | overflows, the largest integer with the same sign as `a' is returned. 5385 *----------------------------------------------------------------------------*/ 5386 5387 int32_t floatx80_to_int32(floatx80 a, float_status *status) 5388 { 5389 bool aSign; 5390 int32_t aExp, shiftCount; 5391 uint64_t aSig; 5392 5393 if (floatx80_invalid_encoding(a)) { 5394 float_raise(float_flag_invalid, status); 5395 return 1 << 31; 5396 } 5397 aSig = extractFloatx80Frac( a ); 5398 aExp = extractFloatx80Exp( a ); 5399 aSign = extractFloatx80Sign( a ); 5400 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; 5401 shiftCount = 0x4037 - aExp; 5402 if ( shiftCount <= 0 ) shiftCount = 1; 5403 shift64RightJamming( aSig, shiftCount, &aSig ); 5404 return roundAndPackInt32(aSign, aSig, status); 5405 5406 } 5407 5408 /*---------------------------------------------------------------------------- 5409 | Returns the result of converting the extended double-precision floating- 5410 | point value `a' to the 32-bit two's complement integer format. The 5411 | conversion is performed according to the IEC/IEEE Standard for Binary 5412 | Floating-Point Arithmetic, except that the conversion is always rounded 5413 | toward zero. If `a' is a NaN, the largest positive integer is returned. 5414 | Otherwise, if the conversion overflows, the largest integer with the same 5415 | sign as `a' is returned. 5416 *----------------------------------------------------------------------------*/ 5417 5418 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) 5419 { 5420 bool aSign; 5421 int32_t aExp, shiftCount; 5422 uint64_t aSig, savedASig; 5423 int32_t z; 5424 5425 if (floatx80_invalid_encoding(a)) { 5426 float_raise(float_flag_invalid, status); 5427 return 1 << 31; 5428 } 5429 aSig = extractFloatx80Frac( a ); 5430 aExp = extractFloatx80Exp( a ); 5431 aSign = extractFloatx80Sign( a ); 5432 if ( 0x401E < aExp ) { 5433 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; 5434 goto invalid; 5435 } 5436 else if ( aExp < 0x3FFF ) { 5437 if (aExp || aSig) { 5438 float_raise(float_flag_inexact, status); 5439 } 5440 return 0; 5441 } 5442 shiftCount = 0x403E - aExp; 5443 savedASig = aSig; 5444 aSig >>= shiftCount; 5445 z = aSig; 5446 if ( aSign ) z = - z; 5447 if ( ( z < 0 ) ^ aSign ) { 5448 invalid: 5449 float_raise(float_flag_invalid, status); 5450 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; 5451 } 5452 if ( ( aSig<<shiftCount ) != savedASig ) { 5453 float_raise(float_flag_inexact, status); 5454 } 5455 return z; 5456 5457 } 5458 5459 /*---------------------------------------------------------------------------- 5460 | Returns the result of converting the extended double-precision floating- 5461 | point value `a' to the 64-bit two's complement integer format. The 5462 | conversion is performed according to the IEC/IEEE Standard for Binary 5463 | Floating-Point Arithmetic---which means in particular that the conversion 5464 | is rounded according to the current rounding mode. If `a' is a NaN, 5465 | the largest positive integer is returned. Otherwise, if the conversion 5466 | overflows, the largest integer with the same sign as `a' is returned. 5467 *----------------------------------------------------------------------------*/ 5468 5469 int64_t floatx80_to_int64(floatx80 a, float_status *status) 5470 { 5471 bool aSign; 5472 int32_t aExp, shiftCount; 5473 uint64_t aSig, aSigExtra; 5474 5475 if (floatx80_invalid_encoding(a)) { 5476 float_raise(float_flag_invalid, status); 5477 return 1ULL << 63; 5478 } 5479 aSig = extractFloatx80Frac( a ); 5480 aExp = extractFloatx80Exp( a ); 5481 aSign = extractFloatx80Sign( a ); 5482 shiftCount = 0x403E - aExp; 5483 if ( shiftCount <= 0 ) { 5484 if ( shiftCount ) { 5485 float_raise(float_flag_invalid, status); 5486 if (!aSign || floatx80_is_any_nan(a)) { 5487 return INT64_MAX; 5488 } 5489 return INT64_MIN; 5490 } 5491 aSigExtra = 0; 5492 } 5493 else { 5494 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 5495 } 5496 return roundAndPackInt64(aSign, aSig, aSigExtra, status); 5497 5498 } 5499 5500 /*---------------------------------------------------------------------------- 5501 | Returns the result of converting the extended double-precision floating- 5502 | point value `a' to the 64-bit two's complement integer format. The 5503 | conversion is performed according to the IEC/IEEE Standard for Binary 5504 | Floating-Point Arithmetic, except that the conversion is always rounded 5505 | toward zero. If `a' is a NaN, the largest positive integer is returned. 5506 | Otherwise, if the conversion overflows, the largest integer with the same 5507 | sign as `a' is returned. 5508 *----------------------------------------------------------------------------*/ 5509 5510 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) 5511 { 5512 bool aSign; 5513 int32_t aExp, shiftCount; 5514 uint64_t aSig; 5515 int64_t z; 5516 5517 if (floatx80_invalid_encoding(a)) { 5518 float_raise(float_flag_invalid, status); 5519 return 1ULL << 63; 5520 } 5521 aSig = extractFloatx80Frac( a ); 5522 aExp = extractFloatx80Exp( a ); 5523 aSign = extractFloatx80Sign( a ); 5524 shiftCount = aExp - 0x403E; 5525 if ( 0 <= shiftCount ) { 5526 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF); 5527 if ( ( a.high != 0xC03E ) || aSig ) { 5528 float_raise(float_flag_invalid, status); 5529 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 5530 return INT64_MAX; 5531 } 5532 } 5533 return INT64_MIN; 5534 } 5535 else if ( aExp < 0x3FFF ) { 5536 if (aExp | aSig) { 5537 float_raise(float_flag_inexact, status); 5538 } 5539 return 0; 5540 } 5541 z = aSig>>( - shiftCount ); 5542 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { 5543 float_raise(float_flag_inexact, status); 5544 } 5545 if ( aSign ) z = - z; 5546 return z; 5547 5548 } 5549 5550 /*---------------------------------------------------------------------------- 5551 | Returns the result of converting the extended double-precision floating- 5552 | point value `a' to the single-precision floating-point format. The 5553 | conversion is performed according to the IEC/IEEE Standard for Binary 5554 | Floating-Point Arithmetic. 5555 *----------------------------------------------------------------------------*/ 5556 5557 float32 floatx80_to_float32(floatx80 a, float_status *status) 5558 { 5559 bool aSign; 5560 int32_t aExp; 5561 uint64_t aSig; 5562 5563 if (floatx80_invalid_encoding(a)) { 5564 float_raise(float_flag_invalid, status); 5565 return float32_default_nan(status); 5566 } 5567 aSig = extractFloatx80Frac( a ); 5568 aExp = extractFloatx80Exp( a ); 5569 aSign = extractFloatx80Sign( a ); 5570 if ( aExp == 0x7FFF ) { 5571 if ( (uint64_t) ( aSig<<1 ) ) { 5572 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), 5573 status); 5574 return float32_silence_nan(res, status); 5575 } 5576 return packFloat32( aSign, 0xFF, 0 ); 5577 } 5578 shift64RightJamming( aSig, 33, &aSig ); 5579 if ( aExp || aSig ) aExp -= 0x3F81; 5580 return roundAndPackFloat32(aSign, aExp, aSig, status); 5581 5582 } 5583 5584 /*---------------------------------------------------------------------------- 5585 | Returns the result of converting the extended double-precision floating- 5586 | point value `a' to the double-precision floating-point format. The 5587 | conversion is performed according to the IEC/IEEE Standard for Binary 5588 | Floating-Point Arithmetic. 5589 *----------------------------------------------------------------------------*/ 5590 5591 float64 floatx80_to_float64(floatx80 a, float_status *status) 5592 { 5593 bool aSign; 5594 int32_t aExp; 5595 uint64_t aSig, zSig; 5596 5597 if (floatx80_invalid_encoding(a)) { 5598 float_raise(float_flag_invalid, status); 5599 return float64_default_nan(status); 5600 } 5601 aSig = extractFloatx80Frac( a ); 5602 aExp = extractFloatx80Exp( a ); 5603 aSign = extractFloatx80Sign( a ); 5604 if ( aExp == 0x7FFF ) { 5605 if ( (uint64_t) ( aSig<<1 ) ) { 5606 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), 5607 status); 5608 return float64_silence_nan(res, status); 5609 } 5610 return packFloat64( aSign, 0x7FF, 0 ); 5611 } 5612 shift64RightJamming( aSig, 1, &zSig ); 5613 if ( aExp || aSig ) aExp -= 0x3C01; 5614 return roundAndPackFloat64(aSign, aExp, zSig, status); 5615 5616 } 5617 5618 /*---------------------------------------------------------------------------- 5619 | Returns the result of converting the extended double-precision floating- 5620 | point value `a' to the quadruple-precision floating-point format. The 5621 | conversion is performed according to the IEC/IEEE Standard for Binary 5622 | Floating-Point Arithmetic. 5623 *----------------------------------------------------------------------------*/ 5624 5625 float128 floatx80_to_float128(floatx80 a, float_status *status) 5626 { 5627 bool aSign; 5628 int aExp; 5629 uint64_t aSig, zSig0, zSig1; 5630 5631 if (floatx80_invalid_encoding(a)) { 5632 float_raise(float_flag_invalid, status); 5633 return float128_default_nan(status); 5634 } 5635 aSig = extractFloatx80Frac( a ); 5636 aExp = extractFloatx80Exp( a ); 5637 aSign = extractFloatx80Sign( a ); 5638 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { 5639 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), 5640 status); 5641 return float128_silence_nan(res, status); 5642 } 5643 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 5644 return packFloat128( aSign, aExp, zSig0, zSig1 ); 5645 5646 } 5647 5648 /*---------------------------------------------------------------------------- 5649 | Rounds the extended double-precision floating-point value `a' 5650 | to the precision provided by floatx80_rounding_precision and returns the 5651 | result as an extended double-precision floating-point value. 5652 | The operation is performed according to the IEC/IEEE Standard for Binary 5653 | Floating-Point Arithmetic. 5654 *----------------------------------------------------------------------------*/ 5655 5656 floatx80 floatx80_round(floatx80 a, float_status *status) 5657 { 5658 return roundAndPackFloatx80(status->floatx80_rounding_precision, 5659 extractFloatx80Sign(a), 5660 extractFloatx80Exp(a), 5661 extractFloatx80Frac(a), 0, status); 5662 } 5663 5664 /*---------------------------------------------------------------------------- 5665 | Rounds the extended double-precision floating-point value `a' to an integer, 5666 | and returns the result as an extended quadruple-precision floating-point 5667 | value. The operation is performed according to the IEC/IEEE Standard for 5668 | Binary Floating-Point Arithmetic. 5669 *----------------------------------------------------------------------------*/ 5670 5671 floatx80 floatx80_round_to_int(floatx80 a, float_status *status) 5672 { 5673 bool aSign; 5674 int32_t aExp; 5675 uint64_t lastBitMask, roundBitsMask; 5676 floatx80 z; 5677 5678 if (floatx80_invalid_encoding(a)) { 5679 float_raise(float_flag_invalid, status); 5680 return floatx80_default_nan(status); 5681 } 5682 aExp = extractFloatx80Exp( a ); 5683 if ( 0x403E <= aExp ) { 5684 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { 5685 return propagateFloatx80NaN(a, a, status); 5686 } 5687 return a; 5688 } 5689 if ( aExp < 0x3FFF ) { 5690 if ( ( aExp == 0 ) 5691 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) { 5692 return a; 5693 } 5694 float_raise(float_flag_inexact, status); 5695 aSign = extractFloatx80Sign( a ); 5696 switch (status->float_rounding_mode) { 5697 case float_round_nearest_even: 5698 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) 5699 ) { 5700 return 5701 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000)); 5702 } 5703 break; 5704 case float_round_ties_away: 5705 if (aExp == 0x3FFE) { 5706 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000)); 5707 } 5708 break; 5709 case float_round_down: 5710 return 5711 aSign ? 5712 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000)) 5713 : packFloatx80( 0, 0, 0 ); 5714 case float_round_up: 5715 return 5716 aSign ? packFloatx80( 1, 0, 0 ) 5717 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000)); 5718 5719 case float_round_to_zero: 5720 break; 5721 default: 5722 g_assert_not_reached(); 5723 } 5724 return packFloatx80( aSign, 0, 0 ); 5725 } 5726 lastBitMask = 1; 5727 lastBitMask <<= 0x403E - aExp; 5728 roundBitsMask = lastBitMask - 1; 5729 z = a; 5730 switch (status->float_rounding_mode) { 5731 case float_round_nearest_even: 5732 z.low += lastBitMask>>1; 5733 if ((z.low & roundBitsMask) == 0) { 5734 z.low &= ~lastBitMask; 5735 } 5736 break; 5737 case float_round_ties_away: 5738 z.low += lastBitMask >> 1; 5739 break; 5740 case float_round_to_zero: 5741 break; 5742 case float_round_up: 5743 if (!extractFloatx80Sign(z)) { 5744 z.low += roundBitsMask; 5745 } 5746 break; 5747 case float_round_down: 5748 if (extractFloatx80Sign(z)) { 5749 z.low += roundBitsMask; 5750 } 5751 break; 5752 default: 5753 abort(); 5754 } 5755 z.low &= ~ roundBitsMask; 5756 if ( z.low == 0 ) { 5757 ++z.high; 5758 z.low = UINT64_C(0x8000000000000000); 5759 } 5760 if (z.low != a.low) { 5761 float_raise(float_flag_inexact, status); 5762 } 5763 return z; 5764 5765 } 5766 5767 /*---------------------------------------------------------------------------- 5768 | Returns the result of adding the absolute values of the extended double- 5769 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is 5770 | negated before being returned. `zSign' is ignored if the result is a NaN. 5771 | The addition is performed according to the IEC/IEEE Standard for Binary 5772 | Floating-Point Arithmetic. 5773 *----------------------------------------------------------------------------*/ 5774 5775 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, 5776 float_status *status) 5777 { 5778 int32_t aExp, bExp, zExp; 5779 uint64_t aSig, bSig, zSig0, zSig1; 5780 int32_t expDiff; 5781 5782 aSig = extractFloatx80Frac( a ); 5783 aExp = extractFloatx80Exp( a ); 5784 bSig = extractFloatx80Frac( b ); 5785 bExp = extractFloatx80Exp( b ); 5786 expDiff = aExp - bExp; 5787 if ( 0 < expDiff ) { 5788 if ( aExp == 0x7FFF ) { 5789 if ((uint64_t)(aSig << 1)) { 5790 return propagateFloatx80NaN(a, b, status); 5791 } 5792 return a; 5793 } 5794 if ( bExp == 0 ) --expDiff; 5795 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 5796 zExp = aExp; 5797 } 5798 else if ( expDiff < 0 ) { 5799 if ( bExp == 0x7FFF ) { 5800 if ((uint64_t)(bSig << 1)) { 5801 return propagateFloatx80NaN(a, b, status); 5802 } 5803 return packFloatx80(zSign, 5804 floatx80_infinity_high, 5805 floatx80_infinity_low); 5806 } 5807 if ( aExp == 0 ) ++expDiff; 5808 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 5809 zExp = bExp; 5810 } 5811 else { 5812 if ( aExp == 0x7FFF ) { 5813 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { 5814 return propagateFloatx80NaN(a, b, status); 5815 } 5816 return a; 5817 } 5818 zSig1 = 0; 5819 zSig0 = aSig + bSig; 5820 if ( aExp == 0 ) { 5821 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) { 5822 /* At least one of the values is a pseudo-denormal, 5823 * and there is a carry out of the result. */ 5824 zExp = 1; 5825 goto shiftRight1; 5826 } 5827 if (zSig0 == 0) { 5828 return packFloatx80(zSign, 0, 0); 5829 } 5830 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 5831 goto roundAndPack; 5832 } 5833 zExp = aExp; 5834 goto shiftRight1; 5835 } 5836 zSig0 = aSig + bSig; 5837 if ( (int64_t) zSig0 < 0 ) goto roundAndPack; 5838 shiftRight1: 5839 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 5840 zSig0 |= UINT64_C(0x8000000000000000); 5841 ++zExp; 5842 roundAndPack: 5843 return roundAndPackFloatx80(status->floatx80_rounding_precision, 5844 zSign, zExp, zSig0, zSig1, status); 5845 } 5846 5847 /*---------------------------------------------------------------------------- 5848 | Returns the result of subtracting the absolute values of the extended 5849 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the 5850 | difference is negated before being returned. `zSign' is ignored if the 5851 | result is a NaN. The subtraction is performed according to the IEC/IEEE 5852 | Standard for Binary Floating-Point Arithmetic. 5853 *----------------------------------------------------------------------------*/ 5854 5855 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, 5856 float_status *status) 5857 { 5858 int32_t aExp, bExp, zExp; 5859 uint64_t aSig, bSig, zSig0, zSig1; 5860 int32_t expDiff; 5861 5862 aSig = extractFloatx80Frac( a ); 5863 aExp = extractFloatx80Exp( a ); 5864 bSig = extractFloatx80Frac( b ); 5865 bExp = extractFloatx80Exp( b ); 5866 expDiff = aExp - bExp; 5867 if ( 0 < expDiff ) goto aExpBigger; 5868 if ( expDiff < 0 ) goto bExpBigger; 5869 if ( aExp == 0x7FFF ) { 5870 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { 5871 return propagateFloatx80NaN(a, b, status); 5872 } 5873 float_raise(float_flag_invalid, status); 5874 return floatx80_default_nan(status); 5875 } 5876 if ( aExp == 0 ) { 5877 aExp = 1; 5878 bExp = 1; 5879 } 5880 zSig1 = 0; 5881 if ( bSig < aSig ) goto aBigger; 5882 if ( aSig < bSig ) goto bBigger; 5883 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); 5884 bExpBigger: 5885 if ( bExp == 0x7FFF ) { 5886 if ((uint64_t)(bSig << 1)) { 5887 return propagateFloatx80NaN(a, b, status); 5888 } 5889 return packFloatx80(zSign ^ 1, floatx80_infinity_high, 5890 floatx80_infinity_low); 5891 } 5892 if ( aExp == 0 ) ++expDiff; 5893 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 5894 bBigger: 5895 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 5896 zExp = bExp; 5897 zSign ^= 1; 5898 goto normalizeRoundAndPack; 5899 aExpBigger: 5900 if ( aExp == 0x7FFF ) { 5901 if ((uint64_t)(aSig << 1)) { 5902 return propagateFloatx80NaN(a, b, status); 5903 } 5904 return a; 5905 } 5906 if ( bExp == 0 ) --expDiff; 5907 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 5908 aBigger: 5909 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 5910 zExp = aExp; 5911 normalizeRoundAndPack: 5912 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 5913 zSign, zExp, zSig0, zSig1, status); 5914 } 5915 5916 /*---------------------------------------------------------------------------- 5917 | Returns the result of adding the extended double-precision floating-point 5918 | values `a' and `b'. The operation is performed according to the IEC/IEEE 5919 | Standard for Binary Floating-Point Arithmetic. 5920 *----------------------------------------------------------------------------*/ 5921 5922 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) 5923 { 5924 bool aSign, bSign; 5925 5926 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5927 float_raise(float_flag_invalid, status); 5928 return floatx80_default_nan(status); 5929 } 5930 aSign = extractFloatx80Sign( a ); 5931 bSign = extractFloatx80Sign( b ); 5932 if ( aSign == bSign ) { 5933 return addFloatx80Sigs(a, b, aSign, status); 5934 } 5935 else { 5936 return subFloatx80Sigs(a, b, aSign, status); 5937 } 5938 5939 } 5940 5941 /*---------------------------------------------------------------------------- 5942 | Returns the result of subtracting the extended double-precision floating- 5943 | point values `a' and `b'. The operation is performed according to the 5944 | IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5945 *----------------------------------------------------------------------------*/ 5946 5947 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) 5948 { 5949 bool aSign, bSign; 5950 5951 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5952 float_raise(float_flag_invalid, status); 5953 return floatx80_default_nan(status); 5954 } 5955 aSign = extractFloatx80Sign( a ); 5956 bSign = extractFloatx80Sign( b ); 5957 if ( aSign == bSign ) { 5958 return subFloatx80Sigs(a, b, aSign, status); 5959 } 5960 else { 5961 return addFloatx80Sigs(a, b, aSign, status); 5962 } 5963 5964 } 5965 5966 /*---------------------------------------------------------------------------- 5967 | Returns the result of multiplying the extended double-precision floating- 5968 | point values `a' and `b'. The operation is performed according to the 5969 | IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5970 *----------------------------------------------------------------------------*/ 5971 5972 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) 5973 { 5974 bool aSign, bSign, zSign; 5975 int32_t aExp, bExp, zExp; 5976 uint64_t aSig, bSig, zSig0, zSig1; 5977 5978 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5979 float_raise(float_flag_invalid, status); 5980 return floatx80_default_nan(status); 5981 } 5982 aSig = extractFloatx80Frac( a ); 5983 aExp = extractFloatx80Exp( a ); 5984 aSign = extractFloatx80Sign( a ); 5985 bSig = extractFloatx80Frac( b ); 5986 bExp = extractFloatx80Exp( b ); 5987 bSign = extractFloatx80Sign( b ); 5988 zSign = aSign ^ bSign; 5989 if ( aExp == 0x7FFF ) { 5990 if ( (uint64_t) ( aSig<<1 ) 5991 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { 5992 return propagateFloatx80NaN(a, b, status); 5993 } 5994 if ( ( bExp | bSig ) == 0 ) goto invalid; 5995 return packFloatx80(zSign, floatx80_infinity_high, 5996 floatx80_infinity_low); 5997 } 5998 if ( bExp == 0x7FFF ) { 5999 if ((uint64_t)(bSig << 1)) { 6000 return propagateFloatx80NaN(a, b, status); 6001 } 6002 if ( ( aExp | aSig ) == 0 ) { 6003 invalid: 6004 float_raise(float_flag_invalid, status); 6005 return floatx80_default_nan(status); 6006 } 6007 return packFloatx80(zSign, floatx80_infinity_high, 6008 floatx80_infinity_low); 6009 } 6010 if ( aExp == 0 ) { 6011 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 6012 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 6013 } 6014 if ( bExp == 0 ) { 6015 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 6016 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 6017 } 6018 zExp = aExp + bExp - 0x3FFE; 6019 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 6020 if ( 0 < (int64_t) zSig0 ) { 6021 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 6022 --zExp; 6023 } 6024 return roundAndPackFloatx80(status->floatx80_rounding_precision, 6025 zSign, zExp, zSig0, zSig1, status); 6026 } 6027 6028 /*---------------------------------------------------------------------------- 6029 | Returns the result of dividing the extended double-precision floating-point 6030 | value `a' by the corresponding value `b'. The operation is performed 6031 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 6032 *----------------------------------------------------------------------------*/ 6033 6034 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) 6035 { 6036 bool aSign, bSign, zSign; 6037 int32_t aExp, bExp, zExp; 6038 uint64_t aSig, bSig, zSig0, zSig1; 6039 uint64_t rem0, rem1, rem2, term0, term1, term2; 6040 6041 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6042 float_raise(float_flag_invalid, status); 6043 return floatx80_default_nan(status); 6044 } 6045 aSig = extractFloatx80Frac( a ); 6046 aExp = extractFloatx80Exp( a ); 6047 aSign = extractFloatx80Sign( a ); 6048 bSig = extractFloatx80Frac( b ); 6049 bExp = extractFloatx80Exp( b ); 6050 bSign = extractFloatx80Sign( b ); 6051 zSign = aSign ^ bSign; 6052 if ( aExp == 0x7FFF ) { 6053 if ((uint64_t)(aSig << 1)) { 6054 return propagateFloatx80NaN(a, b, status); 6055 } 6056 if ( bExp == 0x7FFF ) { 6057 if ((uint64_t)(bSig << 1)) { 6058 return propagateFloatx80NaN(a, b, status); 6059 } 6060 goto invalid; 6061 } 6062 return packFloatx80(zSign, floatx80_infinity_high, 6063 floatx80_infinity_low); 6064 } 6065 if ( bExp == 0x7FFF ) { 6066 if ((uint64_t)(bSig << 1)) { 6067 return propagateFloatx80NaN(a, b, status); 6068 } 6069 return packFloatx80( zSign, 0, 0 ); 6070 } 6071 if ( bExp == 0 ) { 6072 if ( bSig == 0 ) { 6073 if ( ( aExp | aSig ) == 0 ) { 6074 invalid: 6075 float_raise(float_flag_invalid, status); 6076 return floatx80_default_nan(status); 6077 } 6078 float_raise(float_flag_divbyzero, status); 6079 return packFloatx80(zSign, floatx80_infinity_high, 6080 floatx80_infinity_low); 6081 } 6082 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 6083 } 6084 if ( aExp == 0 ) { 6085 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 6086 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 6087 } 6088 zExp = aExp - bExp + 0x3FFE; 6089 rem1 = 0; 6090 if ( bSig <= aSig ) { 6091 shift128Right( aSig, 0, 1, &aSig, &rem1 ); 6092 ++zExp; 6093 } 6094 zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 6095 mul64To128( bSig, zSig0, &term0, &term1 ); 6096 sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 6097 while ( (int64_t) rem0 < 0 ) { 6098 --zSig0; 6099 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 6100 } 6101 zSig1 = estimateDiv128To64( rem1, 0, bSig ); 6102 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { 6103 mul64To128( bSig, zSig1, &term1, &term2 ); 6104 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 6105 while ( (int64_t) rem1 < 0 ) { 6106 --zSig1; 6107 add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 6108 } 6109 zSig1 |= ( ( rem1 | rem2 ) != 0 ); 6110 } 6111 return roundAndPackFloatx80(status->floatx80_rounding_precision, 6112 zSign, zExp, zSig0, zSig1, status); 6113 } 6114 6115 /*---------------------------------------------------------------------------- 6116 | Returns the remainder of the extended double-precision floating-point value 6117 | `a' with respect to the corresponding value `b'. The operation is performed 6118 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, 6119 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating 6120 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of 6121 | the absolute value of the integer quotient. 6122 *----------------------------------------------------------------------------*/ 6123 6124 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, 6125 float_status *status) 6126 { 6127 bool aSign, zSign; 6128 int32_t aExp, bExp, expDiff, aExpOrig; 6129 uint64_t aSig0, aSig1, bSig; 6130 uint64_t q, term0, term1, alternateASig0, alternateASig1; 6131 6132 *quotient = 0; 6133 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6134 float_raise(float_flag_invalid, status); 6135 return floatx80_default_nan(status); 6136 } 6137 aSig0 = extractFloatx80Frac( a ); 6138 aExpOrig = aExp = extractFloatx80Exp( a ); 6139 aSign = extractFloatx80Sign( a ); 6140 bSig = extractFloatx80Frac( b ); 6141 bExp = extractFloatx80Exp( b ); 6142 if ( aExp == 0x7FFF ) { 6143 if ( (uint64_t) ( aSig0<<1 ) 6144 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { 6145 return propagateFloatx80NaN(a, b, status); 6146 } 6147 goto invalid; 6148 } 6149 if ( bExp == 0x7FFF ) { 6150 if ((uint64_t)(bSig << 1)) { 6151 return propagateFloatx80NaN(a, b, status); 6152 } 6153 if (aExp == 0 && aSig0 >> 63) { 6154 /* 6155 * Pseudo-denormal argument must be returned in normalized 6156 * form. 6157 */ 6158 return packFloatx80(aSign, 1, aSig0); 6159 } 6160 return a; 6161 } 6162 if ( bExp == 0 ) { 6163 if ( bSig == 0 ) { 6164 invalid: 6165 float_raise(float_flag_invalid, status); 6166 return floatx80_default_nan(status); 6167 } 6168 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 6169 } 6170 if ( aExp == 0 ) { 6171 if ( aSig0 == 0 ) return a; 6172 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 6173 } 6174 zSign = aSign; 6175 expDiff = aExp - bExp; 6176 aSig1 = 0; 6177 if ( expDiff < 0 ) { 6178 if ( mod || expDiff < -1 ) { 6179 if (aExp == 1 && aExpOrig == 0) { 6180 /* 6181 * Pseudo-denormal argument must be returned in 6182 * normalized form. 6183 */ 6184 return packFloatx80(aSign, aExp, aSig0); 6185 } 6186 return a; 6187 } 6188 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 6189 expDiff = 0; 6190 } 6191 *quotient = q = ( bSig <= aSig0 ); 6192 if ( q ) aSig0 -= bSig; 6193 expDiff -= 64; 6194 while ( 0 < expDiff ) { 6195 q = estimateDiv128To64( aSig0, aSig1, bSig ); 6196 q = ( 2 < q ) ? q - 2 : 0; 6197 mul64To128( bSig, q, &term0, &term1 ); 6198 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6199 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 6200 expDiff -= 62; 6201 *quotient <<= 62; 6202 *quotient += q; 6203 } 6204 expDiff += 64; 6205 if ( 0 < expDiff ) { 6206 q = estimateDiv128To64( aSig0, aSig1, bSig ); 6207 q = ( 2 < q ) ? q - 2 : 0; 6208 q >>= 64 - expDiff; 6209 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 6210 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6211 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 6212 while ( le128( term0, term1, aSig0, aSig1 ) ) { 6213 ++q; 6214 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6215 } 6216 if (expDiff < 64) { 6217 *quotient <<= expDiff; 6218 } else { 6219 *quotient = 0; 6220 } 6221 *quotient += q; 6222 } 6223 else { 6224 term1 = 0; 6225 term0 = bSig; 6226 } 6227 if (!mod) { 6228 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 6229 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 6230 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 6231 && ( q & 1 ) ) 6232 ) { 6233 aSig0 = alternateASig0; 6234 aSig1 = alternateASig1; 6235 zSign = ! zSign; 6236 ++*quotient; 6237 } 6238 } 6239 return 6240 normalizeRoundAndPackFloatx80( 6241 80, zSign, bExp + expDiff, aSig0, aSig1, status); 6242 6243 } 6244 6245 /*---------------------------------------------------------------------------- 6246 | Returns the remainder of the extended double-precision floating-point value 6247 | `a' with respect to the corresponding value `b'. The operation is performed 6248 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 6249 *----------------------------------------------------------------------------*/ 6250 6251 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) 6252 { 6253 uint64_t quotient; 6254 return floatx80_modrem(a, b, false, "ient, status); 6255 } 6256 6257 /*---------------------------------------------------------------------------- 6258 | Returns the remainder of the extended double-precision floating-point value 6259 | `a' with respect to the corresponding value `b', with the quotient truncated 6260 | toward zero. 6261 *----------------------------------------------------------------------------*/ 6262 6263 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 6264 { 6265 uint64_t quotient; 6266 return floatx80_modrem(a, b, true, "ient, status); 6267 } 6268 6269 /*---------------------------------------------------------------------------- 6270 | Returns the square root of the extended double-precision floating-point 6271 | value `a'. The operation is performed according to the IEC/IEEE Standard 6272 | for Binary Floating-Point Arithmetic. 6273 *----------------------------------------------------------------------------*/ 6274 6275 floatx80 floatx80_sqrt(floatx80 a, float_status *status) 6276 { 6277 bool aSign; 6278 int32_t aExp, zExp; 6279 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; 6280 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; 6281 6282 if (floatx80_invalid_encoding(a)) { 6283 float_raise(float_flag_invalid, status); 6284 return floatx80_default_nan(status); 6285 } 6286 aSig0 = extractFloatx80Frac( a ); 6287 aExp = extractFloatx80Exp( a ); 6288 aSign = extractFloatx80Sign( a ); 6289 if ( aExp == 0x7FFF ) { 6290 if ((uint64_t)(aSig0 << 1)) { 6291 return propagateFloatx80NaN(a, a, status); 6292 } 6293 if ( ! aSign ) return a; 6294 goto invalid; 6295 } 6296 if ( aSign ) { 6297 if ( ( aExp | aSig0 ) == 0 ) return a; 6298 invalid: 6299 float_raise(float_flag_invalid, status); 6300 return floatx80_default_nan(status); 6301 } 6302 if ( aExp == 0 ) { 6303 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 6304 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 6305 } 6306 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 6307 zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 6308 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 6309 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 6310 doubleZSig0 = zSig0<<1; 6311 mul64To128( zSig0, zSig0, &term0, &term1 ); 6312 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 6313 while ( (int64_t) rem0 < 0 ) { 6314 --zSig0; 6315 doubleZSig0 -= 2; 6316 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 6317 } 6318 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 6319 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) { 6320 if ( zSig1 == 0 ) zSig1 = 1; 6321 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 6322 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 6323 mul64To128( zSig1, zSig1, &term2, &term3 ); 6324 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 6325 while ( (int64_t) rem1 < 0 ) { 6326 --zSig1; 6327 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 6328 term3 |= 1; 6329 term2 |= doubleZSig0; 6330 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 6331 } 6332 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 6333 } 6334 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 6335 zSig0 |= doubleZSig0; 6336 return roundAndPackFloatx80(status->floatx80_rounding_precision, 6337 0, zExp, zSig0, zSig1, status); 6338 } 6339 6340 /*---------------------------------------------------------------------------- 6341 | Returns the result of converting the quadruple-precision floating-point 6342 | value `a' to the extended double-precision floating-point format. The 6343 | conversion is performed according to the IEC/IEEE Standard for Binary 6344 | Floating-Point Arithmetic. 6345 *----------------------------------------------------------------------------*/ 6346 6347 floatx80 float128_to_floatx80(float128 a, float_status *status) 6348 { 6349 bool aSign; 6350 int32_t aExp; 6351 uint64_t aSig0, aSig1; 6352 6353 aSig1 = extractFloat128Frac1( a ); 6354 aSig0 = extractFloat128Frac0( a ); 6355 aExp = extractFloat128Exp( a ); 6356 aSign = extractFloat128Sign( a ); 6357 if ( aExp == 0x7FFF ) { 6358 if ( aSig0 | aSig1 ) { 6359 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), 6360 status); 6361 return floatx80_silence_nan(res, status); 6362 } 6363 return packFloatx80(aSign, floatx80_infinity_high, 6364 floatx80_infinity_low); 6365 } 6366 if ( aExp == 0 ) { 6367 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 6368 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 6369 } 6370 else { 6371 aSig0 |= UINT64_C(0x0001000000000000); 6372 } 6373 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 6374 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); 6375 6376 } 6377 6378 /*---------------------------------------------------------------------------- 6379 | Returns the remainder of the quadruple-precision floating-point value `a' 6380 | with respect to the corresponding value `b'. The operation is performed 6381 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 6382 *----------------------------------------------------------------------------*/ 6383 6384 float128 float128_rem(float128 a, float128 b, float_status *status) 6385 { 6386 bool aSign, zSign; 6387 int32_t aExp, bExp, expDiff; 6388 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 6389 uint64_t allZero, alternateASig0, alternateASig1, sigMean1; 6390 int64_t sigMean0; 6391 6392 aSig1 = extractFloat128Frac1( a ); 6393 aSig0 = extractFloat128Frac0( a ); 6394 aExp = extractFloat128Exp( a ); 6395 aSign = extractFloat128Sign( a ); 6396 bSig1 = extractFloat128Frac1( b ); 6397 bSig0 = extractFloat128Frac0( b ); 6398 bExp = extractFloat128Exp( b ); 6399 if ( aExp == 0x7FFF ) { 6400 if ( ( aSig0 | aSig1 ) 6401 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 6402 return propagateFloat128NaN(a, b, status); 6403 } 6404 goto invalid; 6405 } 6406 if ( bExp == 0x7FFF ) { 6407 if (bSig0 | bSig1) { 6408 return propagateFloat128NaN(a, b, status); 6409 } 6410 return a; 6411 } 6412 if ( bExp == 0 ) { 6413 if ( ( bSig0 | bSig1 ) == 0 ) { 6414 invalid: 6415 float_raise(float_flag_invalid, status); 6416 return float128_default_nan(status); 6417 } 6418 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 6419 } 6420 if ( aExp == 0 ) { 6421 if ( ( aSig0 | aSig1 ) == 0 ) return a; 6422 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 6423 } 6424 expDiff = aExp - bExp; 6425 if ( expDiff < -1 ) return a; 6426 shortShift128Left( 6427 aSig0 | UINT64_C(0x0001000000000000), 6428 aSig1, 6429 15 - ( expDiff < 0 ), 6430 &aSig0, 6431 &aSig1 6432 ); 6433 shortShift128Left( 6434 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 ); 6435 q = le128( bSig0, bSig1, aSig0, aSig1 ); 6436 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 6437 expDiff -= 64; 6438 while ( 0 < expDiff ) { 6439 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 6440 q = ( 4 < q ) ? q - 4 : 0; 6441 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 6442 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); 6443 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); 6444 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 6445 expDiff -= 61; 6446 } 6447 if ( -64 < expDiff ) { 6448 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 6449 q = ( 4 < q ) ? q - 4 : 0; 6450 q >>= - expDiff; 6451 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 6452 expDiff += 52; 6453 if ( expDiff < 0 ) { 6454 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 6455 } 6456 else { 6457 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 6458 } 6459 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 6460 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 6461 } 6462 else { 6463 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); 6464 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 6465 } 6466 do { 6467 alternateASig0 = aSig0; 6468 alternateASig1 = aSig1; 6469 ++q; 6470 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 6471 } while ( 0 <= (int64_t) aSig0 ); 6472 add128( 6473 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 ); 6474 if ( ( sigMean0 < 0 ) 6475 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 6476 aSig0 = alternateASig0; 6477 aSig1 = alternateASig1; 6478 } 6479 zSign = ( (int64_t) aSig0 < 0 ); 6480 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 6481 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1, 6482 status); 6483 } 6484 6485 /*---------------------------------------------------------------------------- 6486 | Returns the square root of the quadruple-precision floating-point value `a'. 6487 | The operation is performed according to the IEC/IEEE Standard for Binary 6488 | Floating-Point Arithmetic. 6489 *----------------------------------------------------------------------------*/ 6490 6491 float128 float128_sqrt(float128 a, float_status *status) 6492 { 6493 bool aSign; 6494 int32_t aExp, zExp; 6495 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 6496 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; 6497 6498 aSig1 = extractFloat128Frac1( a ); 6499 aSig0 = extractFloat128Frac0( a ); 6500 aExp = extractFloat128Exp( a ); 6501 aSign = extractFloat128Sign( a ); 6502 if ( aExp == 0x7FFF ) { 6503 if (aSig0 | aSig1) { 6504 return propagateFloat128NaN(a, a, status); 6505 } 6506 if ( ! aSign ) return a; 6507 goto invalid; 6508 } 6509 if ( aSign ) { 6510 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 6511 invalid: 6512 float_raise(float_flag_invalid, status); 6513 return float128_default_nan(status); 6514 } 6515 if ( aExp == 0 ) { 6516 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); 6517 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 6518 } 6519 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; 6520 aSig0 |= UINT64_C(0x0001000000000000); 6521 zSig0 = estimateSqrt32( aExp, aSig0>>17 ); 6522 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); 6523 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 6524 doubleZSig0 = zSig0<<1; 6525 mul64To128( zSig0, zSig0, &term0, &term1 ); 6526 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 6527 while ( (int64_t) rem0 < 0 ) { 6528 --zSig0; 6529 doubleZSig0 -= 2; 6530 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 6531 } 6532 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 6533 if ( ( zSig1 & 0x1FFF ) <= 5 ) { 6534 if ( zSig1 == 0 ) zSig1 = 1; 6535 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 6536 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 6537 mul64To128( zSig1, zSig1, &term2, &term3 ); 6538 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 6539 while ( (int64_t) rem1 < 0 ) { 6540 --zSig1; 6541 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 6542 term3 |= 1; 6543 term2 |= doubleZSig0; 6544 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 6545 } 6546 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 6547 } 6548 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); 6549 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status); 6550 6551 } 6552 6553 static inline FloatRelation 6554 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet, 6555 float_status *status) 6556 { 6557 bool aSign, bSign; 6558 6559 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6560 float_raise(float_flag_invalid, status); 6561 return float_relation_unordered; 6562 } 6563 if (( ( extractFloatx80Exp( a ) == 0x7fff ) && 6564 ( extractFloatx80Frac( a )<<1 ) ) || 6565 ( ( extractFloatx80Exp( b ) == 0x7fff ) && 6566 ( extractFloatx80Frac( b )<<1 ) )) { 6567 if (!is_quiet || 6568 floatx80_is_signaling_nan(a, status) || 6569 floatx80_is_signaling_nan(b, status)) { 6570 float_raise(float_flag_invalid, status); 6571 } 6572 return float_relation_unordered; 6573 } 6574 aSign = extractFloatx80Sign( a ); 6575 bSign = extractFloatx80Sign( b ); 6576 if ( aSign != bSign ) { 6577 6578 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) && 6579 ( ( a.low | b.low ) == 0 ) ) { 6580 /* zero case */ 6581 return float_relation_equal; 6582 } else { 6583 return 1 - (2 * aSign); 6584 } 6585 } else { 6586 /* Normalize pseudo-denormals before comparison. */ 6587 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { 6588 ++a.high; 6589 } 6590 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) { 6591 ++b.high; 6592 } 6593 if (a.low == b.low && a.high == b.high) { 6594 return float_relation_equal; 6595 } else { 6596 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); 6597 } 6598 } 6599 } 6600 6601 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status) 6602 { 6603 return floatx80_compare_internal(a, b, 0, status); 6604 } 6605 6606 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, 6607 float_status *status) 6608 { 6609 return floatx80_compare_internal(a, b, 1, status); 6610 } 6611 6612 static inline FloatRelation 6613 float128_compare_internal(float128 a, float128 b, bool is_quiet, 6614 float_status *status) 6615 { 6616 bool aSign, bSign; 6617 6618 if (( ( extractFloat128Exp( a ) == 0x7fff ) && 6619 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || 6620 ( ( extractFloat128Exp( b ) == 0x7fff ) && 6621 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) { 6622 if (!is_quiet || 6623 float128_is_signaling_nan(a, status) || 6624 float128_is_signaling_nan(b, status)) { 6625 float_raise(float_flag_invalid, status); 6626 } 6627 return float_relation_unordered; 6628 } 6629 aSign = extractFloat128Sign( a ); 6630 bSign = extractFloat128Sign( b ); 6631 if ( aSign != bSign ) { 6632 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) { 6633 /* zero case */ 6634 return float_relation_equal; 6635 } else { 6636 return 1 - (2 * aSign); 6637 } 6638 } else { 6639 if (a.low == b.low && a.high == b.high) { 6640 return float_relation_equal; 6641 } else { 6642 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); 6643 } 6644 } 6645 } 6646 6647 FloatRelation float128_compare(float128 a, float128 b, float_status *status) 6648 { 6649 return float128_compare_internal(a, b, 0, status); 6650 } 6651 6652 FloatRelation float128_compare_quiet(float128 a, float128 b, 6653 float_status *status) 6654 { 6655 return float128_compare_internal(a, b, 1, status); 6656 } 6657 6658 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) 6659 { 6660 bool aSign; 6661 int32_t aExp; 6662 uint64_t aSig; 6663 6664 if (floatx80_invalid_encoding(a)) { 6665 float_raise(float_flag_invalid, status); 6666 return floatx80_default_nan(status); 6667 } 6668 aSig = extractFloatx80Frac( a ); 6669 aExp = extractFloatx80Exp( a ); 6670 aSign = extractFloatx80Sign( a ); 6671 6672 if ( aExp == 0x7FFF ) { 6673 if ( aSig<<1 ) { 6674 return propagateFloatx80NaN(a, a, status); 6675 } 6676 return a; 6677 } 6678 6679 if (aExp == 0) { 6680 if (aSig == 0) { 6681 return a; 6682 } 6683 aExp++; 6684 } 6685 6686 if (n > 0x10000) { 6687 n = 0x10000; 6688 } else if (n < -0x10000) { 6689 n = -0x10000; 6690 } 6691 6692 aExp += n; 6693 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 6694 aSign, aExp, aSig, 0, status); 6695 } 6696 6697 float128 float128_scalbn(float128 a, int n, float_status *status) 6698 { 6699 bool aSign; 6700 int32_t aExp; 6701 uint64_t aSig0, aSig1; 6702 6703 aSig1 = extractFloat128Frac1( a ); 6704 aSig0 = extractFloat128Frac0( a ); 6705 aExp = extractFloat128Exp( a ); 6706 aSign = extractFloat128Sign( a ); 6707 if ( aExp == 0x7FFF ) { 6708 if ( aSig0 | aSig1 ) { 6709 return propagateFloat128NaN(a, a, status); 6710 } 6711 return a; 6712 } 6713 if (aExp != 0) { 6714 aSig0 |= UINT64_C(0x0001000000000000); 6715 } else if (aSig0 == 0 && aSig1 == 0) { 6716 return a; 6717 } else { 6718 aExp++; 6719 } 6720 6721 if (n > 0x10000) { 6722 n = 0x10000; 6723 } else if (n < -0x10000) { 6724 n = -0x10000; 6725 } 6726 6727 aExp += n - 1; 6728 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1 6729 , status); 6730 6731 } 6732 6733 static void __attribute__((constructor)) softfloat_init(void) 6734 { 6735 union_float64 ua, ub, uc, ur; 6736 6737 if (QEMU_NO_HARDFLOAT) { 6738 return; 6739 } 6740 /* 6741 * Test that the host's FMA is not obviously broken. For example, 6742 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see 6743 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304 6744 */ 6745 ua.s = 0x0020000000000001ULL; 6746 ub.s = 0x3ca0000000000000ULL; 6747 uc.s = 0x0020000000000000ULL; 6748 ur.h = fma(ua.h, ub.h, uc.h); 6749 if (ur.s != 0x0020000000000001ULL) { 6750 force_soft_fma = true; 6751 } 6752 } 6753