1 /* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18 /* 19 =============================================================================== 20 This C source file is part of the SoftFloat IEC/IEEE Floating-point 21 Arithmetic Package, Release 2a. 22 23 Written by John R. Hauser. This work was made possible in part by the 24 International Computer Science Institute, located at Suite 600, 1947 Center 25 Street, Berkeley, California 94704. Funding was partially provided by the 26 National Science Foundation under grant MIP-9311980. The original version 27 of this code was written as part of a project to build a fixed-point vector 28 processor in collaboration with the University of California at Berkeley, 29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 arithmetic/SoftFloat.html'. 32 33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 39 Derivative works are acceptable, even for commercial purposes, so long as 40 (1) they include prominent notice that the work is derivative, and (2) they 41 include prominent notice akin to these four paragraphs for those parts of 42 this code that are retained. 43 44 =============================================================================== 45 */ 46 47 /* BSD licensing: 48 * Copyright (c) 2006, Fabrice Bellard 49 * All rights reserved. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright notice, 55 * this list of conditions and the following disclaimer. 56 * 57 * 2. Redistributions in binary form must reproduce the above copyright notice, 58 * this list of conditions and the following disclaimer in the documentation 59 * and/or other materials provided with the distribution. 60 * 61 * 3. Neither the name of the copyright holder nor the names of its contributors 62 * may be used to endorse or promote products derived from this software without 63 * specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 * THE POSSIBILITY OF SUCH DAMAGE. 76 */ 77 78 /* Portions of this work are licensed under the terms of the GNU GPL, 79 * version 2 or later. See the COPYING file in the top-level directory. 80 */ 81 82 /* softfloat (and in particular the code in softfloat-specialize.h) is 83 * target-dependent and needs the TARGET_* macros. 84 */ 85 #include "qemu/osdep.h" 86 #include <math.h> 87 #include "qemu/bitops.h" 88 #include "fpu/softfloat.h" 89 90 /* We only need stdlib for abort() */ 91 92 /*---------------------------------------------------------------------------- 93 | Primitive arithmetic functions, including multi-word arithmetic, and 94 | division and square root approximations. (Can be specialized to target if 95 | desired.) 96 *----------------------------------------------------------------------------*/ 97 #include "fpu/softfloat-macros.h" 98 99 /* 100 * Hardfloat 101 * 102 * Fast emulation of guest FP instructions is challenging for two reasons. 103 * First, FP instruction semantics are similar but not identical, particularly 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP 105 * exception flags is not trivial: reading the host's flags register with a 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp], 107 * and trapping on every FP exception is not fast nor pleasant to work with. 108 * 109 * We address these challenges by leveraging the host FPU for a subset of the 110 * operations. To do this we expand on the idea presented in this paper: 111 * 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615. 114 * 115 * The idea is thus to leverage the host FPU to (1) compute FP operations 116 * and (2) identify whether FP exceptions occurred while avoiding 117 * expensive exception flag register accesses. 118 * 119 * An important optimization shown in the paper is that given that exception 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags. 121 * This is particularly useful for the inexact flag, which is very frequently 122 * raised in floating-point workloads. 123 * 124 * We optimize the code further by deferring to soft-fp whenever FP exception 125 * detection might get hairy. Two examples: (1) when at least one operand is 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result 127 * and the result is < the minimum normal. 128 */ 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \ 130 static inline void name(soft_t *a, float_status *s) \ 131 { \ 132 if (unlikely(soft_t ## _is_denormal(*a))) { \ 133 *a = soft_t ## _set_sign(soft_t ## _zero, \ 134 soft_t ## _is_neg(*a)); \ 135 float_raise(float_flag_input_denormal, s); \ 136 } \ 137 } 138 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32) 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64) 141 #undef GEN_INPUT_FLUSH__NOCHECK 142 143 #define GEN_INPUT_FLUSH1(name, soft_t) \ 144 static inline void name(soft_t *a, float_status *s) \ 145 { \ 146 if (likely(!s->flush_inputs_to_zero)) { \ 147 return; \ 148 } \ 149 soft_t ## _input_flush__nocheck(a, s); \ 150 } 151 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32) 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64) 154 #undef GEN_INPUT_FLUSH1 155 156 #define GEN_INPUT_FLUSH2(name, soft_t) \ 157 static inline void name(soft_t *a, soft_t *b, float_status *s) \ 158 { \ 159 if (likely(!s->flush_inputs_to_zero)) { \ 160 return; \ 161 } \ 162 soft_t ## _input_flush__nocheck(a, s); \ 163 soft_t ## _input_flush__nocheck(b, s); \ 164 } 165 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32) 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64) 168 #undef GEN_INPUT_FLUSH2 169 170 #define GEN_INPUT_FLUSH3(name, soft_t) \ 171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \ 172 { \ 173 if (likely(!s->flush_inputs_to_zero)) { \ 174 return; \ 175 } \ 176 soft_t ## _input_flush__nocheck(a, s); \ 177 soft_t ## _input_flush__nocheck(b, s); \ 178 soft_t ## _input_flush__nocheck(c, s); \ 179 } 180 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32) 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64) 183 #undef GEN_INPUT_FLUSH3 184 185 /* 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated 187 * hardfloat functions. Each combination of number of inputs and float size 188 * gets its own value. 189 */ 190 #if defined(__x86_64__) 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1 197 #else 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0 204 #endif 205 206 /* 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over 208 * float{32,64}_is_infinity when !USE_FP. 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup. 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%. 211 */ 212 #if defined(__x86_64__) || defined(__aarch64__) 213 # define QEMU_HARDFLOAT_USE_ISINF 1 214 #else 215 # define QEMU_HARDFLOAT_USE_ISINF 0 216 #endif 217 218 /* 219 * Some targets clear the FP flags before most FP operations. This prevents 220 * the use of hardfloat, since hardfloat relies on the inexact flag being 221 * already set. 222 */ 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__) 224 # if defined(__FAST_MATH__) 225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \ 226 IEEE implementation 227 # endif 228 # define QEMU_NO_HARDFLOAT 1 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN 230 #else 231 # define QEMU_NO_HARDFLOAT 0 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline)) 233 #endif 234 235 static inline bool can_use_fpu(const float_status *s) 236 { 237 if (QEMU_NO_HARDFLOAT) { 238 return false; 239 } 240 return likely(s->float_exception_flags & float_flag_inexact && 241 s->float_rounding_mode == float_round_nearest_even); 242 } 243 244 /* 245 * Hardfloat generation functions. Each operation can have two flavors: 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for 247 * most condition checks, or native ones (e.g. fpclassify). 248 * 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the 250 * compiler to propagate constants and inline everything into the callers. 251 * 252 * We only generate functions for operations with two inputs, since only 253 * these are common enough to justify consolidating them into common code. 254 */ 255 256 typedef union { 257 float32 s; 258 float h; 259 } union_float32; 260 261 typedef union { 262 float64 s; 263 double h; 264 } union_float64; 265 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b); 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b); 268 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s); 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s); 271 typedef float (*hard_f32_op2_fn)(float a, float b); 272 typedef double (*hard_f64_op2_fn)(double a, double b); 273 274 /* 2-input is-zero-or-normal */ 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b) 276 { 277 if (QEMU_HARDFLOAT_2F32_USE_FP) { 278 /* 279 * Not using a temp variable for consecutive fpclassify calls ends up 280 * generating faster code. 281 */ 282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 284 } 285 return float32_is_zero_or_normal(a.s) && 286 float32_is_zero_or_normal(b.s); 287 } 288 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b) 290 { 291 if (QEMU_HARDFLOAT_2F64_USE_FP) { 292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 294 } 295 return float64_is_zero_or_normal(a.s) && 296 float64_is_zero_or_normal(b.s); 297 } 298 299 /* 3-input is-zero-or-normal */ 300 static inline 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c) 302 { 303 if (QEMU_HARDFLOAT_3F32_USE_FP) { 304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 307 } 308 return float32_is_zero_or_normal(a.s) && 309 float32_is_zero_or_normal(b.s) && 310 float32_is_zero_or_normal(c.s); 311 } 312 313 static inline 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c) 315 { 316 if (QEMU_HARDFLOAT_3F64_USE_FP) { 317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 320 } 321 return float64_is_zero_or_normal(a.s) && 322 float64_is_zero_or_normal(b.s) && 323 float64_is_zero_or_normal(c.s); 324 } 325 326 static inline bool f32_is_inf(union_float32 a) 327 { 328 if (QEMU_HARDFLOAT_USE_ISINF) { 329 return isinf(a.h); 330 } 331 return float32_is_infinity(a.s); 332 } 333 334 static inline bool f64_is_inf(union_float64 a) 335 { 336 if (QEMU_HARDFLOAT_USE_ISINF) { 337 return isinf(a.h); 338 } 339 return float64_is_infinity(a.s); 340 } 341 342 static inline float32 343 float32_gen2(float32 xa, float32 xb, float_status *s, 344 hard_f32_op2_fn hard, soft_f32_op2_fn soft, 345 f32_check_fn pre, f32_check_fn post) 346 { 347 union_float32 ua, ub, ur; 348 349 ua.s = xa; 350 ub.s = xb; 351 352 if (unlikely(!can_use_fpu(s))) { 353 goto soft; 354 } 355 356 float32_input_flush2(&ua.s, &ub.s, s); 357 if (unlikely(!pre(ua, ub))) { 358 goto soft; 359 } 360 361 ur.h = hard(ua.h, ub.h); 362 if (unlikely(f32_is_inf(ur))) { 363 float_raise(float_flag_overflow, s); 364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { 365 goto soft; 366 } 367 return ur.s; 368 369 soft: 370 return soft(ua.s, ub.s, s); 371 } 372 373 static inline float64 374 float64_gen2(float64 xa, float64 xb, float_status *s, 375 hard_f64_op2_fn hard, soft_f64_op2_fn soft, 376 f64_check_fn pre, f64_check_fn post) 377 { 378 union_float64 ua, ub, ur; 379 380 ua.s = xa; 381 ub.s = xb; 382 383 if (unlikely(!can_use_fpu(s))) { 384 goto soft; 385 } 386 387 float64_input_flush2(&ua.s, &ub.s, s); 388 if (unlikely(!pre(ua, ub))) { 389 goto soft; 390 } 391 392 ur.h = hard(ua.h, ub.h); 393 if (unlikely(f64_is_inf(ur))) { 394 float_raise(float_flag_overflow, s); 395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { 396 goto soft; 397 } 398 return ur.s; 399 400 soft: 401 return soft(ua.s, ub.s, s); 402 } 403 404 /*---------------------------------------------------------------------------- 405 | Returns the fraction bits of the single-precision floating-point value `a'. 406 *----------------------------------------------------------------------------*/ 407 408 static inline uint32_t extractFloat32Frac(float32 a) 409 { 410 return float32_val(a) & 0x007FFFFF; 411 } 412 413 /*---------------------------------------------------------------------------- 414 | Returns the exponent bits of the single-precision floating-point value `a'. 415 *----------------------------------------------------------------------------*/ 416 417 static inline int extractFloat32Exp(float32 a) 418 { 419 return (float32_val(a) >> 23) & 0xFF; 420 } 421 422 /*---------------------------------------------------------------------------- 423 | Returns the sign bit of the single-precision floating-point value `a'. 424 *----------------------------------------------------------------------------*/ 425 426 static inline bool extractFloat32Sign(float32 a) 427 { 428 return float32_val(a) >> 31; 429 } 430 431 /*---------------------------------------------------------------------------- 432 | Returns the fraction bits of the double-precision floating-point value `a'. 433 *----------------------------------------------------------------------------*/ 434 435 static inline uint64_t extractFloat64Frac(float64 a) 436 { 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF); 438 } 439 440 /*---------------------------------------------------------------------------- 441 | Returns the exponent bits of the double-precision floating-point value `a'. 442 *----------------------------------------------------------------------------*/ 443 444 static inline int extractFloat64Exp(float64 a) 445 { 446 return (float64_val(a) >> 52) & 0x7FF; 447 } 448 449 /*---------------------------------------------------------------------------- 450 | Returns the sign bit of the double-precision floating-point value `a'. 451 *----------------------------------------------------------------------------*/ 452 453 static inline bool extractFloat64Sign(float64 a) 454 { 455 return float64_val(a) >> 63; 456 } 457 458 /* 459 * Classify a floating point number. Everything above float_class_qnan 460 * is a NaN so cls >= float_class_qnan is any NaN. 461 */ 462 463 typedef enum __attribute__ ((__packed__)) { 464 float_class_unclassified, 465 float_class_zero, 466 float_class_normal, 467 float_class_inf, 468 float_class_qnan, /* all NaNs from here */ 469 float_class_snan, 470 } FloatClass; 471 472 #define float_cmask(bit) (1u << (bit)) 473 474 enum { 475 float_cmask_zero = float_cmask(float_class_zero), 476 float_cmask_normal = float_cmask(float_class_normal), 477 float_cmask_inf = float_cmask(float_class_inf), 478 float_cmask_qnan = float_cmask(float_class_qnan), 479 float_cmask_snan = float_cmask(float_class_snan), 480 481 float_cmask_infzero = float_cmask_zero | float_cmask_inf, 482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan, 483 }; 484 485 /* Flags for parts_minmax. */ 486 enum { 487 /* Set for minimum; clear for maximum. */ 488 minmax_ismin = 1, 489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */ 490 minmax_isnum = 2, 491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */ 492 minmax_ismag = 4, 493 }; 494 495 /* Simple helpers for checking if, or what kind of, NaN we have */ 496 static inline __attribute__((unused)) bool is_nan(FloatClass c) 497 { 498 return unlikely(c >= float_class_qnan); 499 } 500 501 static inline __attribute__((unused)) bool is_snan(FloatClass c) 502 { 503 return c == float_class_snan; 504 } 505 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c) 507 { 508 return c == float_class_qnan; 509 } 510 511 /* 512 * Structure holding all of the decomposed parts of a float. 513 * The exponent is unbiased and the fraction is normalized. 514 * 515 * The fraction words are stored in big-endian word ordering, 516 * so that truncation from a larger format to a smaller format 517 * can be done simply by ignoring subsequent elements. 518 */ 519 520 typedef struct { 521 FloatClass cls; 522 bool sign; 523 int32_t exp; 524 union { 525 /* Routines that know the structure may reference the singular name. */ 526 uint64_t frac; 527 /* 528 * Routines expanded with multiple structures reference "hi" and "lo" 529 * depending on the operation. In FloatParts64, "hi" and "lo" are 530 * both the same word and aliased here. 531 */ 532 uint64_t frac_hi; 533 uint64_t frac_lo; 534 }; 535 } FloatParts64; 536 537 typedef struct { 538 FloatClass cls; 539 bool sign; 540 int32_t exp; 541 uint64_t frac_hi; 542 uint64_t frac_lo; 543 } FloatParts128; 544 545 typedef struct { 546 FloatClass cls; 547 bool sign; 548 int32_t exp; 549 uint64_t frac_hi; 550 uint64_t frac_hm; /* high-middle */ 551 uint64_t frac_lm; /* low-middle */ 552 uint64_t frac_lo; 553 } FloatParts256; 554 555 /* These apply to the most significant word of each FloatPartsN. */ 556 #define DECOMPOSED_BINARY_POINT 63 557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) 558 559 /* Structure holding all of the relevant parameters for a format. 560 * exp_size: the size of the exponent field 561 * exp_bias: the offset applied to the exponent field 562 * exp_max: the maximum normalised exponent 563 * frac_size: the size of the fraction field 564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT 565 * The following are computed based the size of fraction 566 * frac_lsb: least significant bit of fraction 567 * frac_lsbm1: the bit below the least significant bit (for rounding) 568 * round_mask/roundeven_mask: masks used for rounding 569 * The following optional modifiers are available: 570 * arm_althp: handle ARM Alternative Half Precision 571 */ 572 typedef struct { 573 int exp_size; 574 int exp_bias; 575 int exp_max; 576 int frac_size; 577 int frac_shift; 578 uint64_t frac_lsb; 579 uint64_t frac_lsbm1; 580 uint64_t round_mask; 581 uint64_t roundeven_mask; 582 bool arm_althp; 583 } FloatFmt; 584 585 /* Expand fields based on the size of exponent and fraction */ 586 #define FLOAT_PARAMS(E, F) \ 587 .exp_size = E, \ 588 .exp_bias = ((1 << E) - 1) >> 1, \ 589 .exp_max = (1 << E) - 1, \ 590 .frac_size = F, \ 591 .frac_shift = (-F - 1) & 63, \ 592 .frac_lsb = 1ull << ((-F - 1) & 63), \ 593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \ 594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \ 595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1 596 597 static const FloatFmt float16_params = { 598 FLOAT_PARAMS(5, 10) 599 }; 600 601 static const FloatFmt float16_params_ahp = { 602 FLOAT_PARAMS(5, 10), 603 .arm_althp = true 604 }; 605 606 static const FloatFmt bfloat16_params = { 607 FLOAT_PARAMS(8, 7) 608 }; 609 610 static const FloatFmt float32_params = { 611 FLOAT_PARAMS(8, 23) 612 }; 613 614 static const FloatFmt float64_params = { 615 FLOAT_PARAMS(11, 52) 616 }; 617 618 static const FloatFmt float128_params = { 619 FLOAT_PARAMS(15, 112) 620 }; 621 622 /* Unpack a float to parts, but do not canonicalize. */ 623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) 624 { 625 const int f_size = fmt->frac_size; 626 const int e_size = fmt->exp_size; 627 628 *r = (FloatParts64) { 629 .cls = float_class_unclassified, 630 .sign = extract64(raw, f_size + e_size, 1), 631 .exp = extract64(raw, f_size, e_size), 632 .frac = extract64(raw, 0, f_size) 633 }; 634 } 635 636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f) 637 { 638 unpack_raw64(p, &float16_params, f); 639 } 640 641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) 642 { 643 unpack_raw64(p, &bfloat16_params, f); 644 } 645 646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f) 647 { 648 unpack_raw64(p, &float32_params, f); 649 } 650 651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f) 652 { 653 unpack_raw64(p, &float64_params, f); 654 } 655 656 static void float128_unpack_raw(FloatParts128 *p, float128 f) 657 { 658 const int f_size = float128_params.frac_size - 64; 659 const int e_size = float128_params.exp_size; 660 661 *p = (FloatParts128) { 662 .cls = float_class_unclassified, 663 .sign = extract64(f.high, f_size + e_size, 1), 664 .exp = extract64(f.high, f_size, e_size), 665 .frac_hi = extract64(f.high, 0, f_size), 666 .frac_lo = f.low, 667 }; 668 } 669 670 /* Pack a float from parts, but do not canonicalize. */ 671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) 672 { 673 const int f_size = fmt->frac_size; 674 const int e_size = fmt->exp_size; 675 uint64_t ret; 676 677 ret = (uint64_t)p->sign << (f_size + e_size); 678 ret = deposit64(ret, f_size, e_size, p->exp); 679 ret = deposit64(ret, 0, f_size, p->frac); 680 return ret; 681 } 682 683 static inline float16 float16_pack_raw(const FloatParts64 *p) 684 { 685 return make_float16(pack_raw64(p, &float16_params)); 686 } 687 688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p) 689 { 690 return pack_raw64(p, &bfloat16_params); 691 } 692 693 static inline float32 float32_pack_raw(const FloatParts64 *p) 694 { 695 return make_float32(pack_raw64(p, &float32_params)); 696 } 697 698 static inline float64 float64_pack_raw(const FloatParts64 *p) 699 { 700 return make_float64(pack_raw64(p, &float64_params)); 701 } 702 703 static float128 float128_pack_raw(const FloatParts128 *p) 704 { 705 const int f_size = float128_params.frac_size - 64; 706 const int e_size = float128_params.exp_size; 707 uint64_t hi; 708 709 hi = (uint64_t)p->sign << (f_size + e_size); 710 hi = deposit64(hi, f_size, e_size, p->exp); 711 hi = deposit64(hi, 0, f_size, p->frac_hi); 712 return make_float128(hi, p->frac_lo); 713 } 714 715 /*---------------------------------------------------------------------------- 716 | Functions and definitions to determine: (1) whether tininess for underflow 717 | is detected before or after rounding by default, (2) what (if anything) 718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished 719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 720 | are propagated from function inputs to output. These details are target- 721 | specific. 722 *----------------------------------------------------------------------------*/ 723 #include "softfloat-specialize.c.inc" 724 725 #define PARTS_GENERIC_64_128(NAME, P) \ 726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME) 727 728 #define PARTS_GENERIC_64_128_256(NAME, P) \ 729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \ 730 (FloatParts128 *, parts128_##NAME), parts64_##NAME) 731 732 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S) 733 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S) 734 735 static void parts64_return_nan(FloatParts64 *a, float_status *s); 736 static void parts128_return_nan(FloatParts128 *a, float_status *s); 737 738 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S) 739 740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b, 741 float_status *s); 742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b, 743 float_status *s); 744 745 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S) 746 747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b, 748 FloatParts64 *c, float_status *s, 749 int ab_mask, int abc_mask); 750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a, 751 FloatParts128 *b, 752 FloatParts128 *c, 753 float_status *s, 754 int ab_mask, int abc_mask); 755 756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \ 757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM) 758 759 static void parts64_canonicalize(FloatParts64 *p, float_status *status, 760 const FloatFmt *fmt); 761 static void parts128_canonicalize(FloatParts128 *p, float_status *status, 762 const FloatFmt *fmt); 763 764 #define parts_canonicalize(A, S, F) \ 765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F) 766 767 static void parts64_uncanon(FloatParts64 *p, float_status *status, 768 const FloatFmt *fmt); 769 static void parts128_uncanon(FloatParts128 *p, float_status *status, 770 const FloatFmt *fmt); 771 772 #define parts_uncanon(A, S, F) \ 773 PARTS_GENERIC_64_128(uncanon, A)(A, S, F) 774 775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b); 776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b); 777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b); 778 779 #define parts_add_normal(A, B) \ 780 PARTS_GENERIC_64_128_256(add_normal, A)(A, B) 781 782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b); 783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b); 784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b); 785 786 #define parts_sub_normal(A, B) \ 787 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B) 788 789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b, 790 float_status *s, bool subtract); 791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b, 792 float_status *s, bool subtract); 793 794 #define parts_addsub(A, B, S, Z) \ 795 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z) 796 797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b, 798 float_status *s); 799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b, 800 float_status *s); 801 802 #define parts_mul(A, B, S) \ 803 PARTS_GENERIC_64_128(mul, A)(A, B, S) 804 805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b, 806 FloatParts64 *c, int flags, 807 float_status *s); 808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b, 809 FloatParts128 *c, int flags, 810 float_status *s); 811 812 #define parts_muladd(A, B, C, Z, S) \ 813 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S) 814 815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b, 816 float_status *s); 817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b, 818 float_status *s); 819 820 #define parts_div(A, B, S) \ 821 PARTS_GENERIC_64_128(div, A)(A, B, S) 822 823 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f); 824 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f); 825 826 #define parts_sqrt(A, S, F) \ 827 PARTS_GENERIC_64_128(sqrt, A)(A, S, F) 828 829 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm, 830 int scale, int frac_size); 831 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r, 832 int scale, int frac_size); 833 834 #define parts_round_to_int_normal(A, R, C, F) \ 835 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F) 836 837 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm, 838 int scale, float_status *s, 839 const FloatFmt *fmt); 840 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r, 841 int scale, float_status *s, 842 const FloatFmt *fmt); 843 844 #define parts_round_to_int(A, R, C, S, F) \ 845 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F) 846 847 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode, 848 int scale, int64_t min, int64_t max, 849 float_status *s); 850 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode, 851 int scale, int64_t min, int64_t max, 852 float_status *s); 853 854 #define parts_float_to_sint(P, R, Z, MN, MX, S) \ 855 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S) 856 857 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode, 858 int scale, uint64_t max, 859 float_status *s); 860 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode, 861 int scale, uint64_t max, 862 float_status *s); 863 864 #define parts_float_to_uint(P, R, Z, M, S) \ 865 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S) 866 867 static void parts64_sint_to_float(FloatParts64 *p, int64_t a, 868 int scale, float_status *s); 869 static void parts128_sint_to_float(FloatParts128 *p, int64_t a, 870 int scale, float_status *s); 871 872 #define parts_sint_to_float(P, I, Z, S) \ 873 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S) 874 875 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a, 876 int scale, float_status *s); 877 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a, 878 int scale, float_status *s); 879 880 #define parts_uint_to_float(P, I, Z, S) \ 881 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S) 882 883 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b, 884 float_status *s, int flags); 885 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b, 886 float_status *s, int flags); 887 888 #define parts_minmax(A, B, S, F) \ 889 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F) 890 891 static int parts64_compare(FloatParts64 *a, FloatParts64 *b, 892 float_status *s, bool q); 893 static int parts128_compare(FloatParts128 *a, FloatParts128 *b, 894 float_status *s, bool q); 895 896 #define parts_compare(A, B, S, Q) \ 897 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q) 898 899 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s); 900 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s); 901 902 #define parts_scalbn(A, N, S) \ 903 PARTS_GENERIC_64_128(scalbn, A)(A, N, S) 904 905 /* 906 * Helper functions for softfloat-parts.c.inc, per-size operations. 907 */ 908 909 #define FRAC_GENERIC_64_128(NAME, P) \ 910 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME) 911 912 #define FRAC_GENERIC_64_128_256(NAME, P) \ 913 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \ 914 (FloatParts128 *, frac128_##NAME), frac64_##NAME) 915 916 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 917 { 918 return uadd64_overflow(a->frac, b->frac, &r->frac); 919 } 920 921 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 922 { 923 bool c = 0; 924 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 925 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 926 return c; 927 } 928 929 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 930 { 931 bool c = 0; 932 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 933 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c); 934 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c); 935 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 936 return c; 937 } 938 939 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B) 940 941 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c) 942 { 943 return uadd64_overflow(a->frac, c, &r->frac); 944 } 945 946 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c) 947 { 948 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo); 949 return uadd64_overflow(a->frac_hi, c, &r->frac_hi); 950 } 951 952 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C) 953 954 static void frac64_allones(FloatParts64 *a) 955 { 956 a->frac = -1; 957 } 958 959 static void frac128_allones(FloatParts128 *a) 960 { 961 a->frac_hi = a->frac_lo = -1; 962 } 963 964 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A) 965 966 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b) 967 { 968 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1; 969 } 970 971 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b) 972 { 973 uint64_t ta = a->frac_hi, tb = b->frac_hi; 974 if (ta == tb) { 975 ta = a->frac_lo, tb = b->frac_lo; 976 if (ta == tb) { 977 return 0; 978 } 979 } 980 return ta < tb ? -1 : 1; 981 } 982 983 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B) 984 985 static void frac64_clear(FloatParts64 *a) 986 { 987 a->frac = 0; 988 } 989 990 static void frac128_clear(FloatParts128 *a) 991 { 992 a->frac_hi = a->frac_lo = 0; 993 } 994 995 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A) 996 997 static bool frac64_div(FloatParts64 *a, FloatParts64 *b) 998 { 999 uint64_t n1, n0, r, q; 1000 bool ret; 1001 1002 /* 1003 * We want a 2*N / N-bit division to produce exactly an N-bit 1004 * result, so that we do not lose any precision and so that we 1005 * do not have to renormalize afterward. If A.frac < B.frac, 1006 * then division would produce an (N-1)-bit result; shift A left 1007 * by one to produce the an N-bit result, and return true to 1008 * decrement the exponent to match. 1009 * 1010 * The udiv_qrnnd algorithm that we're using requires normalization, 1011 * i.e. the msb of the denominator must be set, which is already true. 1012 */ 1013 ret = a->frac < b->frac; 1014 if (ret) { 1015 n0 = a->frac; 1016 n1 = 0; 1017 } else { 1018 n0 = a->frac >> 1; 1019 n1 = a->frac << 63; 1020 } 1021 q = udiv_qrnnd(&r, n0, n1, b->frac); 1022 1023 /* Set lsb if there is a remainder, to set inexact. */ 1024 a->frac = q | (r != 0); 1025 1026 return ret; 1027 } 1028 1029 static bool frac128_div(FloatParts128 *a, FloatParts128 *b) 1030 { 1031 uint64_t q0, q1, a0, a1, b0, b1; 1032 uint64_t r0, r1, r2, r3, t0, t1, t2, t3; 1033 bool ret = false; 1034 1035 a0 = a->frac_hi, a1 = a->frac_lo; 1036 b0 = b->frac_hi, b1 = b->frac_lo; 1037 1038 ret = lt128(a0, a1, b0, b1); 1039 if (!ret) { 1040 a1 = shr_double(a0, a1, 1); 1041 a0 = a0 >> 1; 1042 } 1043 1044 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */ 1045 q0 = estimateDiv128To64(a0, a1, b0); 1046 1047 /* 1048 * Estimate is high because B1 was not included (unless B1 == 0). 1049 * Reduce quotient and increase remainder until remainder is non-negative. 1050 * This loop will execute 0 to 2 times. 1051 */ 1052 mul128By64To192(b0, b1, q0, &t0, &t1, &t2); 1053 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2); 1054 while (r0 != 0) { 1055 q0--; 1056 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2); 1057 } 1058 1059 /* Repeat using the remainder, producing a second word of quotient. */ 1060 q1 = estimateDiv128To64(r1, r2, b0); 1061 mul128By64To192(b0, b1, q1, &t1, &t2, &t3); 1062 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3); 1063 while (r1 != 0) { 1064 q1--; 1065 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3); 1066 } 1067 1068 /* Any remainder indicates inexact; set sticky bit. */ 1069 q1 |= (r2 | r3) != 0; 1070 1071 a->frac_hi = q0; 1072 a->frac_lo = q1; 1073 return ret; 1074 } 1075 1076 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B) 1077 1078 static bool frac64_eqz(FloatParts64 *a) 1079 { 1080 return a->frac == 0; 1081 } 1082 1083 static bool frac128_eqz(FloatParts128 *a) 1084 { 1085 return (a->frac_hi | a->frac_lo) == 0; 1086 } 1087 1088 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A) 1089 1090 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b) 1091 { 1092 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac); 1093 } 1094 1095 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b) 1096 { 1097 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo, 1098 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo); 1099 } 1100 1101 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B) 1102 1103 static void frac64_neg(FloatParts64 *a) 1104 { 1105 a->frac = -a->frac; 1106 } 1107 1108 static void frac128_neg(FloatParts128 *a) 1109 { 1110 bool c = 0; 1111 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1112 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1113 } 1114 1115 static void frac256_neg(FloatParts256 *a) 1116 { 1117 bool c = 0; 1118 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1119 a->frac_lm = usub64_borrow(0, a->frac_lm, &c); 1120 a->frac_hm = usub64_borrow(0, a->frac_hm, &c); 1121 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1122 } 1123 1124 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A) 1125 1126 static int frac64_normalize(FloatParts64 *a) 1127 { 1128 if (a->frac) { 1129 int shift = clz64(a->frac); 1130 a->frac <<= shift; 1131 return shift; 1132 } 1133 return 64; 1134 } 1135 1136 static int frac128_normalize(FloatParts128 *a) 1137 { 1138 if (a->frac_hi) { 1139 int shl = clz64(a->frac_hi); 1140 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); 1141 a->frac_lo <<= shl; 1142 return shl; 1143 } else if (a->frac_lo) { 1144 int shl = clz64(a->frac_lo); 1145 a->frac_hi = a->frac_lo << shl; 1146 a->frac_lo = 0; 1147 return shl + 64; 1148 } 1149 return 128; 1150 } 1151 1152 static int frac256_normalize(FloatParts256 *a) 1153 { 1154 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1155 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1156 int ret, shl; 1157 1158 if (likely(a0)) { 1159 shl = clz64(a0); 1160 if (shl == 0) { 1161 return 0; 1162 } 1163 ret = shl; 1164 } else { 1165 if (a1) { 1166 ret = 64; 1167 a0 = a1, a1 = a2, a2 = a3, a3 = 0; 1168 } else if (a2) { 1169 ret = 128; 1170 a0 = a2, a1 = a3, a2 = 0, a3 = 0; 1171 } else if (a3) { 1172 ret = 192; 1173 a0 = a3, a1 = 0, a2 = 0, a3 = 0; 1174 } else { 1175 ret = 256; 1176 a0 = 0, a1 = 0, a2 = 0, a3 = 0; 1177 goto done; 1178 } 1179 shl = clz64(a0); 1180 if (shl == 0) { 1181 goto done; 1182 } 1183 ret += shl; 1184 } 1185 1186 a0 = shl_double(a0, a1, shl); 1187 a1 = shl_double(a1, a2, shl); 1188 a2 = shl_double(a2, a3, shl); 1189 a3 <<= shl; 1190 1191 done: 1192 a->frac_hi = a0; 1193 a->frac_hm = a1; 1194 a->frac_lm = a2; 1195 a->frac_lo = a3; 1196 return ret; 1197 } 1198 1199 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A) 1200 1201 static void frac64_shl(FloatParts64 *a, int c) 1202 { 1203 a->frac <<= c; 1204 } 1205 1206 static void frac128_shl(FloatParts128 *a, int c) 1207 { 1208 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1209 1210 if (c & 64) { 1211 a0 = a1, a1 = 0; 1212 } 1213 1214 c &= 63; 1215 if (c) { 1216 a0 = shl_double(a0, a1, c); 1217 a1 = a1 << c; 1218 } 1219 1220 a->frac_hi = a0; 1221 a->frac_lo = a1; 1222 } 1223 1224 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C) 1225 1226 static void frac64_shr(FloatParts64 *a, int c) 1227 { 1228 a->frac >>= c; 1229 } 1230 1231 static void frac128_shr(FloatParts128 *a, int c) 1232 { 1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1234 1235 if (c & 64) { 1236 a1 = a0, a0 = 0; 1237 } 1238 1239 c &= 63; 1240 if (c) { 1241 a1 = shr_double(a0, a1, c); 1242 a0 = a0 >> c; 1243 } 1244 1245 a->frac_hi = a0; 1246 a->frac_lo = a1; 1247 } 1248 1249 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C) 1250 1251 static void frac64_shrjam(FloatParts64 *a, int c) 1252 { 1253 uint64_t a0 = a->frac; 1254 1255 if (likely(c != 0)) { 1256 if (likely(c < 64)) { 1257 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0); 1258 } else { 1259 a0 = a0 != 0; 1260 } 1261 a->frac = a0; 1262 } 1263 } 1264 1265 static void frac128_shrjam(FloatParts128 *a, int c) 1266 { 1267 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1268 uint64_t sticky = 0; 1269 1270 if (unlikely(c == 0)) { 1271 return; 1272 } else if (likely(c < 64)) { 1273 /* nothing */ 1274 } else if (likely(c < 128)) { 1275 sticky = a1; 1276 a1 = a0; 1277 a0 = 0; 1278 c &= 63; 1279 if (c == 0) { 1280 goto done; 1281 } 1282 } else { 1283 sticky = a0 | a1; 1284 a0 = a1 = 0; 1285 goto done; 1286 } 1287 1288 sticky |= shr_double(a1, 0, c); 1289 a1 = shr_double(a0, a1, c); 1290 a0 = a0 >> c; 1291 1292 done: 1293 a->frac_lo = a1 | (sticky != 0); 1294 a->frac_hi = a0; 1295 } 1296 1297 static void frac256_shrjam(FloatParts256 *a, int c) 1298 { 1299 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1300 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1301 uint64_t sticky = 0; 1302 1303 if (unlikely(c == 0)) { 1304 return; 1305 } else if (likely(c < 64)) { 1306 /* nothing */ 1307 } else if (likely(c < 256)) { 1308 if (unlikely(c & 128)) { 1309 sticky |= a2 | a3; 1310 a3 = a1, a2 = a0, a1 = 0, a0 = 0; 1311 } 1312 if (unlikely(c & 64)) { 1313 sticky |= a3; 1314 a3 = a2, a2 = a1, a1 = a0, a0 = 0; 1315 } 1316 c &= 63; 1317 if (c == 0) { 1318 goto done; 1319 } 1320 } else { 1321 sticky = a0 | a1 | a2 | a3; 1322 a0 = a1 = a2 = a3 = 0; 1323 goto done; 1324 } 1325 1326 sticky |= shr_double(a3, 0, c); 1327 a3 = shr_double(a2, a3, c); 1328 a2 = shr_double(a1, a2, c); 1329 a1 = shr_double(a0, a1, c); 1330 a0 = a0 >> c; 1331 1332 done: 1333 a->frac_lo = a3 | (sticky != 0); 1334 a->frac_lm = a2; 1335 a->frac_hm = a1; 1336 a->frac_hi = a0; 1337 } 1338 1339 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C) 1340 1341 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 1342 { 1343 return usub64_overflow(a->frac, b->frac, &r->frac); 1344 } 1345 1346 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 1347 { 1348 bool c = 0; 1349 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1350 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1351 return c; 1352 } 1353 1354 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 1355 { 1356 bool c = 0; 1357 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1358 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c); 1359 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c); 1360 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1361 return c; 1362 } 1363 1364 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B) 1365 1366 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a) 1367 { 1368 r->frac = a->frac_hi | (a->frac_lo != 0); 1369 } 1370 1371 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a) 1372 { 1373 r->frac_hi = a->frac_hi; 1374 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0); 1375 } 1376 1377 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A) 1378 1379 static void frac64_widen(FloatParts128 *r, FloatParts64 *a) 1380 { 1381 r->frac_hi = a->frac; 1382 r->frac_lo = 0; 1383 } 1384 1385 static void frac128_widen(FloatParts256 *r, FloatParts128 *a) 1386 { 1387 r->frac_hi = a->frac_hi; 1388 r->frac_hm = a->frac_lo; 1389 r->frac_lm = 0; 1390 r->frac_lo = 0; 1391 } 1392 1393 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B) 1394 1395 /* 1396 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa. 1397 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c 1398 * and thus MIT licenced. 1399 */ 1400 static const uint16_t rsqrt_tab[128] = { 1401 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, 1402 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, 1403 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, 1404 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, 1405 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, 1406 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, 1407 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, 1408 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, 1409 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, 1410 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, 1411 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, 1412 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, 1413 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, 1414 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, 1415 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, 1416 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, 1417 }; 1418 1419 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME) 1420 #define FloatPartsN glue(FloatParts,N) 1421 #define FloatPartsW glue(FloatParts,W) 1422 1423 #define N 64 1424 #define W 128 1425 1426 #include "softfloat-parts-addsub.c.inc" 1427 #include "softfloat-parts.c.inc" 1428 1429 #undef N 1430 #undef W 1431 #define N 128 1432 #define W 256 1433 1434 #include "softfloat-parts-addsub.c.inc" 1435 #include "softfloat-parts.c.inc" 1436 1437 #undef N 1438 #undef W 1439 #define N 256 1440 1441 #include "softfloat-parts-addsub.c.inc" 1442 1443 #undef N 1444 #undef W 1445 #undef partsN 1446 #undef FloatPartsN 1447 #undef FloatPartsW 1448 1449 /* 1450 * Pack/unpack routines with a specific FloatFmt. 1451 */ 1452 1453 static void float16a_unpack_canonical(FloatParts64 *p, float16 f, 1454 float_status *s, const FloatFmt *params) 1455 { 1456 float16_unpack_raw(p, f); 1457 parts_canonicalize(p, s, params); 1458 } 1459 1460 static void float16_unpack_canonical(FloatParts64 *p, float16 f, 1461 float_status *s) 1462 { 1463 float16a_unpack_canonical(p, f, s, &float16_params); 1464 } 1465 1466 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f, 1467 float_status *s) 1468 { 1469 bfloat16_unpack_raw(p, f); 1470 parts_canonicalize(p, s, &bfloat16_params); 1471 } 1472 1473 static float16 float16a_round_pack_canonical(FloatParts64 *p, 1474 float_status *s, 1475 const FloatFmt *params) 1476 { 1477 parts_uncanon(p, s, params); 1478 return float16_pack_raw(p); 1479 } 1480 1481 static float16 float16_round_pack_canonical(FloatParts64 *p, 1482 float_status *s) 1483 { 1484 return float16a_round_pack_canonical(p, s, &float16_params); 1485 } 1486 1487 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p, 1488 float_status *s) 1489 { 1490 parts_uncanon(p, s, &bfloat16_params); 1491 return bfloat16_pack_raw(p); 1492 } 1493 1494 static void float32_unpack_canonical(FloatParts64 *p, float32 f, 1495 float_status *s) 1496 { 1497 float32_unpack_raw(p, f); 1498 parts_canonicalize(p, s, &float32_params); 1499 } 1500 1501 static float32 float32_round_pack_canonical(FloatParts64 *p, 1502 float_status *s) 1503 { 1504 parts_uncanon(p, s, &float32_params); 1505 return float32_pack_raw(p); 1506 } 1507 1508 static void float64_unpack_canonical(FloatParts64 *p, float64 f, 1509 float_status *s) 1510 { 1511 float64_unpack_raw(p, f); 1512 parts_canonicalize(p, s, &float64_params); 1513 } 1514 1515 static float64 float64_round_pack_canonical(FloatParts64 *p, 1516 float_status *s) 1517 { 1518 parts_uncanon(p, s, &float64_params); 1519 return float64_pack_raw(p); 1520 } 1521 1522 static void float128_unpack_canonical(FloatParts128 *p, float128 f, 1523 float_status *s) 1524 { 1525 float128_unpack_raw(p, f); 1526 parts_canonicalize(p, s, &float128_params); 1527 } 1528 1529 static float128 float128_round_pack_canonical(FloatParts128 *p, 1530 float_status *s) 1531 { 1532 parts_uncanon(p, s, &float128_params); 1533 return float128_pack_raw(p); 1534 } 1535 1536 /* 1537 * Addition and subtraction 1538 */ 1539 1540 static float16 QEMU_FLATTEN 1541 float16_addsub(float16 a, float16 b, float_status *status, bool subtract) 1542 { 1543 FloatParts64 pa, pb, *pr; 1544 1545 float16_unpack_canonical(&pa, a, status); 1546 float16_unpack_canonical(&pb, b, status); 1547 pr = parts_addsub(&pa, &pb, status, subtract); 1548 1549 return float16_round_pack_canonical(pr, status); 1550 } 1551 1552 float16 float16_add(float16 a, float16 b, float_status *status) 1553 { 1554 return float16_addsub(a, b, status, false); 1555 } 1556 1557 float16 float16_sub(float16 a, float16 b, float_status *status) 1558 { 1559 return float16_addsub(a, b, status, true); 1560 } 1561 1562 static float32 QEMU_SOFTFLOAT_ATTR 1563 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract) 1564 { 1565 FloatParts64 pa, pb, *pr; 1566 1567 float32_unpack_canonical(&pa, a, status); 1568 float32_unpack_canonical(&pb, b, status); 1569 pr = parts_addsub(&pa, &pb, status, subtract); 1570 1571 return float32_round_pack_canonical(pr, status); 1572 } 1573 1574 static float32 soft_f32_add(float32 a, float32 b, float_status *status) 1575 { 1576 return soft_f32_addsub(a, b, status, false); 1577 } 1578 1579 static float32 soft_f32_sub(float32 a, float32 b, float_status *status) 1580 { 1581 return soft_f32_addsub(a, b, status, true); 1582 } 1583 1584 static float64 QEMU_SOFTFLOAT_ATTR 1585 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract) 1586 { 1587 FloatParts64 pa, pb, *pr; 1588 1589 float64_unpack_canonical(&pa, a, status); 1590 float64_unpack_canonical(&pb, b, status); 1591 pr = parts_addsub(&pa, &pb, status, subtract); 1592 1593 return float64_round_pack_canonical(pr, status); 1594 } 1595 1596 static float64 soft_f64_add(float64 a, float64 b, float_status *status) 1597 { 1598 return soft_f64_addsub(a, b, status, false); 1599 } 1600 1601 static float64 soft_f64_sub(float64 a, float64 b, float_status *status) 1602 { 1603 return soft_f64_addsub(a, b, status, true); 1604 } 1605 1606 static float hard_f32_add(float a, float b) 1607 { 1608 return a + b; 1609 } 1610 1611 static float hard_f32_sub(float a, float b) 1612 { 1613 return a - b; 1614 } 1615 1616 static double hard_f64_add(double a, double b) 1617 { 1618 return a + b; 1619 } 1620 1621 static double hard_f64_sub(double a, double b) 1622 { 1623 return a - b; 1624 } 1625 1626 static bool f32_addsubmul_post(union_float32 a, union_float32 b) 1627 { 1628 if (QEMU_HARDFLOAT_2F32_USE_FP) { 1629 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1630 } 1631 return !(float32_is_zero(a.s) && float32_is_zero(b.s)); 1632 } 1633 1634 static bool f64_addsubmul_post(union_float64 a, union_float64 b) 1635 { 1636 if (QEMU_HARDFLOAT_2F64_USE_FP) { 1637 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1638 } else { 1639 return !(float64_is_zero(a.s) && float64_is_zero(b.s)); 1640 } 1641 } 1642 1643 static float32 float32_addsub(float32 a, float32 b, float_status *s, 1644 hard_f32_op2_fn hard, soft_f32_op2_fn soft) 1645 { 1646 return float32_gen2(a, b, s, hard, soft, 1647 f32_is_zon2, f32_addsubmul_post); 1648 } 1649 1650 static float64 float64_addsub(float64 a, float64 b, float_status *s, 1651 hard_f64_op2_fn hard, soft_f64_op2_fn soft) 1652 { 1653 return float64_gen2(a, b, s, hard, soft, 1654 f64_is_zon2, f64_addsubmul_post); 1655 } 1656 1657 float32 QEMU_FLATTEN 1658 float32_add(float32 a, float32 b, float_status *s) 1659 { 1660 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add); 1661 } 1662 1663 float32 QEMU_FLATTEN 1664 float32_sub(float32 a, float32 b, float_status *s) 1665 { 1666 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub); 1667 } 1668 1669 float64 QEMU_FLATTEN 1670 float64_add(float64 a, float64 b, float_status *s) 1671 { 1672 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add); 1673 } 1674 1675 float64 QEMU_FLATTEN 1676 float64_sub(float64 a, float64 b, float_status *s) 1677 { 1678 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub); 1679 } 1680 1681 static bfloat16 QEMU_FLATTEN 1682 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract) 1683 { 1684 FloatParts64 pa, pb, *pr; 1685 1686 bfloat16_unpack_canonical(&pa, a, status); 1687 bfloat16_unpack_canonical(&pb, b, status); 1688 pr = parts_addsub(&pa, &pb, status, subtract); 1689 1690 return bfloat16_round_pack_canonical(pr, status); 1691 } 1692 1693 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status) 1694 { 1695 return bfloat16_addsub(a, b, status, false); 1696 } 1697 1698 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status) 1699 { 1700 return bfloat16_addsub(a, b, status, true); 1701 } 1702 1703 static float128 QEMU_FLATTEN 1704 float128_addsub(float128 a, float128 b, float_status *status, bool subtract) 1705 { 1706 FloatParts128 pa, pb, *pr; 1707 1708 float128_unpack_canonical(&pa, a, status); 1709 float128_unpack_canonical(&pb, b, status); 1710 pr = parts_addsub(&pa, &pb, status, subtract); 1711 1712 return float128_round_pack_canonical(pr, status); 1713 } 1714 1715 float128 float128_add(float128 a, float128 b, float_status *status) 1716 { 1717 return float128_addsub(a, b, status, false); 1718 } 1719 1720 float128 float128_sub(float128 a, float128 b, float_status *status) 1721 { 1722 return float128_addsub(a, b, status, true); 1723 } 1724 1725 /* 1726 * Multiplication 1727 */ 1728 1729 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status) 1730 { 1731 FloatParts64 pa, pb, *pr; 1732 1733 float16_unpack_canonical(&pa, a, status); 1734 float16_unpack_canonical(&pb, b, status); 1735 pr = parts_mul(&pa, &pb, status); 1736 1737 return float16_round_pack_canonical(pr, status); 1738 } 1739 1740 static float32 QEMU_SOFTFLOAT_ATTR 1741 soft_f32_mul(float32 a, float32 b, float_status *status) 1742 { 1743 FloatParts64 pa, pb, *pr; 1744 1745 float32_unpack_canonical(&pa, a, status); 1746 float32_unpack_canonical(&pb, b, status); 1747 pr = parts_mul(&pa, &pb, status); 1748 1749 return float32_round_pack_canonical(pr, status); 1750 } 1751 1752 static float64 QEMU_SOFTFLOAT_ATTR 1753 soft_f64_mul(float64 a, float64 b, float_status *status) 1754 { 1755 FloatParts64 pa, pb, *pr; 1756 1757 float64_unpack_canonical(&pa, a, status); 1758 float64_unpack_canonical(&pb, b, status); 1759 pr = parts_mul(&pa, &pb, status); 1760 1761 return float64_round_pack_canonical(pr, status); 1762 } 1763 1764 static float hard_f32_mul(float a, float b) 1765 { 1766 return a * b; 1767 } 1768 1769 static double hard_f64_mul(double a, double b) 1770 { 1771 return a * b; 1772 } 1773 1774 float32 QEMU_FLATTEN 1775 float32_mul(float32 a, float32 b, float_status *s) 1776 { 1777 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, 1778 f32_is_zon2, f32_addsubmul_post); 1779 } 1780 1781 float64 QEMU_FLATTEN 1782 float64_mul(float64 a, float64 b, float_status *s) 1783 { 1784 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, 1785 f64_is_zon2, f64_addsubmul_post); 1786 } 1787 1788 bfloat16 QEMU_FLATTEN 1789 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status) 1790 { 1791 FloatParts64 pa, pb, *pr; 1792 1793 bfloat16_unpack_canonical(&pa, a, status); 1794 bfloat16_unpack_canonical(&pb, b, status); 1795 pr = parts_mul(&pa, &pb, status); 1796 1797 return bfloat16_round_pack_canonical(pr, status); 1798 } 1799 1800 float128 QEMU_FLATTEN 1801 float128_mul(float128 a, float128 b, float_status *status) 1802 { 1803 FloatParts128 pa, pb, *pr; 1804 1805 float128_unpack_canonical(&pa, a, status); 1806 float128_unpack_canonical(&pb, b, status); 1807 pr = parts_mul(&pa, &pb, status); 1808 1809 return float128_round_pack_canonical(pr, status); 1810 } 1811 1812 /* 1813 * Fused multiply-add 1814 */ 1815 1816 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c, 1817 int flags, float_status *status) 1818 { 1819 FloatParts64 pa, pb, pc, *pr; 1820 1821 float16_unpack_canonical(&pa, a, status); 1822 float16_unpack_canonical(&pb, b, status); 1823 float16_unpack_canonical(&pc, c, status); 1824 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1825 1826 return float16_round_pack_canonical(pr, status); 1827 } 1828 1829 static float32 QEMU_SOFTFLOAT_ATTR 1830 soft_f32_muladd(float32 a, float32 b, float32 c, int flags, 1831 float_status *status) 1832 { 1833 FloatParts64 pa, pb, pc, *pr; 1834 1835 float32_unpack_canonical(&pa, a, status); 1836 float32_unpack_canonical(&pb, b, status); 1837 float32_unpack_canonical(&pc, c, status); 1838 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1839 1840 return float32_round_pack_canonical(pr, status); 1841 } 1842 1843 static float64 QEMU_SOFTFLOAT_ATTR 1844 soft_f64_muladd(float64 a, float64 b, float64 c, int flags, 1845 float_status *status) 1846 { 1847 FloatParts64 pa, pb, pc, *pr; 1848 1849 float64_unpack_canonical(&pa, a, status); 1850 float64_unpack_canonical(&pb, b, status); 1851 float64_unpack_canonical(&pc, c, status); 1852 pr = parts_muladd(&pa, &pb, &pc, flags, status); 1853 1854 return float64_round_pack_canonical(pr, status); 1855 } 1856 1857 static bool force_soft_fma; 1858 1859 float32 QEMU_FLATTEN 1860 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) 1861 { 1862 union_float32 ua, ub, uc, ur; 1863 1864 ua.s = xa; 1865 ub.s = xb; 1866 uc.s = xc; 1867 1868 if (unlikely(!can_use_fpu(s))) { 1869 goto soft; 1870 } 1871 if (unlikely(flags & float_muladd_halve_result)) { 1872 goto soft; 1873 } 1874 1875 float32_input_flush3(&ua.s, &ub.s, &uc.s, s); 1876 if (unlikely(!f32_is_zon3(ua, ub, uc))) { 1877 goto soft; 1878 } 1879 1880 if (unlikely(force_soft_fma)) { 1881 goto soft; 1882 } 1883 1884 /* 1885 * When (a || b) == 0, there's no need to check for under/over flow, 1886 * since we know the addend is (normal || 0) and the product is 0. 1887 */ 1888 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) { 1889 union_float32 up; 1890 bool prod_sign; 1891 1892 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s); 1893 prod_sign ^= !!(flags & float_muladd_negate_product); 1894 up.s = float32_set_sign(float32_zero, prod_sign); 1895 1896 if (flags & float_muladd_negate_c) { 1897 uc.h = -uc.h; 1898 } 1899 ur.h = up.h + uc.h; 1900 } else { 1901 union_float32 ua_orig = ua; 1902 union_float32 uc_orig = uc; 1903 1904 if (flags & float_muladd_negate_product) { 1905 ua.h = -ua.h; 1906 } 1907 if (flags & float_muladd_negate_c) { 1908 uc.h = -uc.h; 1909 } 1910 1911 ur.h = fmaf(ua.h, ub.h, uc.h); 1912 1913 if (unlikely(f32_is_inf(ur))) { 1914 float_raise(float_flag_overflow, s); 1915 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { 1916 ua = ua_orig; 1917 uc = uc_orig; 1918 goto soft; 1919 } 1920 } 1921 if (flags & float_muladd_negate_result) { 1922 return float32_chs(ur.s); 1923 } 1924 return ur.s; 1925 1926 soft: 1927 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s); 1928 } 1929 1930 float64 QEMU_FLATTEN 1931 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) 1932 { 1933 union_float64 ua, ub, uc, ur; 1934 1935 ua.s = xa; 1936 ub.s = xb; 1937 uc.s = xc; 1938 1939 if (unlikely(!can_use_fpu(s))) { 1940 goto soft; 1941 } 1942 if (unlikely(flags & float_muladd_halve_result)) { 1943 goto soft; 1944 } 1945 1946 float64_input_flush3(&ua.s, &ub.s, &uc.s, s); 1947 if (unlikely(!f64_is_zon3(ua, ub, uc))) { 1948 goto soft; 1949 } 1950 1951 if (unlikely(force_soft_fma)) { 1952 goto soft; 1953 } 1954 1955 /* 1956 * When (a || b) == 0, there's no need to check for under/over flow, 1957 * since we know the addend is (normal || 0) and the product is 0. 1958 */ 1959 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) { 1960 union_float64 up; 1961 bool prod_sign; 1962 1963 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s); 1964 prod_sign ^= !!(flags & float_muladd_negate_product); 1965 up.s = float64_set_sign(float64_zero, prod_sign); 1966 1967 if (flags & float_muladd_negate_c) { 1968 uc.h = -uc.h; 1969 } 1970 ur.h = up.h + uc.h; 1971 } else { 1972 union_float64 ua_orig = ua; 1973 union_float64 uc_orig = uc; 1974 1975 if (flags & float_muladd_negate_product) { 1976 ua.h = -ua.h; 1977 } 1978 if (flags & float_muladd_negate_c) { 1979 uc.h = -uc.h; 1980 } 1981 1982 ur.h = fma(ua.h, ub.h, uc.h); 1983 1984 if (unlikely(f64_is_inf(ur))) { 1985 float_raise(float_flag_overflow, s); 1986 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { 1987 ua = ua_orig; 1988 uc = uc_orig; 1989 goto soft; 1990 } 1991 } 1992 if (flags & float_muladd_negate_result) { 1993 return float64_chs(ur.s); 1994 } 1995 return ur.s; 1996 1997 soft: 1998 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s); 1999 } 2000 2001 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, 2002 int flags, float_status *status) 2003 { 2004 FloatParts64 pa, pb, pc, *pr; 2005 2006 bfloat16_unpack_canonical(&pa, a, status); 2007 bfloat16_unpack_canonical(&pb, b, status); 2008 bfloat16_unpack_canonical(&pc, c, status); 2009 pr = parts_muladd(&pa, &pb, &pc, flags, status); 2010 2011 return bfloat16_round_pack_canonical(pr, status); 2012 } 2013 2014 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, 2015 int flags, float_status *status) 2016 { 2017 FloatParts128 pa, pb, pc, *pr; 2018 2019 float128_unpack_canonical(&pa, a, status); 2020 float128_unpack_canonical(&pb, b, status); 2021 float128_unpack_canonical(&pc, c, status); 2022 pr = parts_muladd(&pa, &pb, &pc, flags, status); 2023 2024 return float128_round_pack_canonical(pr, status); 2025 } 2026 2027 /* 2028 * Division 2029 */ 2030 2031 float16 float16_div(float16 a, float16 b, float_status *status) 2032 { 2033 FloatParts64 pa, pb, *pr; 2034 2035 float16_unpack_canonical(&pa, a, status); 2036 float16_unpack_canonical(&pb, b, status); 2037 pr = parts_div(&pa, &pb, status); 2038 2039 return float16_round_pack_canonical(pr, status); 2040 } 2041 2042 static float32 QEMU_SOFTFLOAT_ATTR 2043 soft_f32_div(float32 a, float32 b, float_status *status) 2044 { 2045 FloatParts64 pa, pb, *pr; 2046 2047 float32_unpack_canonical(&pa, a, status); 2048 float32_unpack_canonical(&pb, b, status); 2049 pr = parts_div(&pa, &pb, status); 2050 2051 return float32_round_pack_canonical(pr, status); 2052 } 2053 2054 static float64 QEMU_SOFTFLOAT_ATTR 2055 soft_f64_div(float64 a, float64 b, float_status *status) 2056 { 2057 FloatParts64 pa, pb, *pr; 2058 2059 float64_unpack_canonical(&pa, a, status); 2060 float64_unpack_canonical(&pb, b, status); 2061 pr = parts_div(&pa, &pb, status); 2062 2063 return float64_round_pack_canonical(pr, status); 2064 } 2065 2066 static float hard_f32_div(float a, float b) 2067 { 2068 return a / b; 2069 } 2070 2071 static double hard_f64_div(double a, double b) 2072 { 2073 return a / b; 2074 } 2075 2076 static bool f32_div_pre(union_float32 a, union_float32 b) 2077 { 2078 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2079 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2080 fpclassify(b.h) == FP_NORMAL; 2081 } 2082 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s); 2083 } 2084 2085 static bool f64_div_pre(union_float64 a, union_float64 b) 2086 { 2087 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2088 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2089 fpclassify(b.h) == FP_NORMAL; 2090 } 2091 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s); 2092 } 2093 2094 static bool f32_div_post(union_float32 a, union_float32 b) 2095 { 2096 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2097 return fpclassify(a.h) != FP_ZERO; 2098 } 2099 return !float32_is_zero(a.s); 2100 } 2101 2102 static bool f64_div_post(union_float64 a, union_float64 b) 2103 { 2104 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2105 return fpclassify(a.h) != FP_ZERO; 2106 } 2107 return !float64_is_zero(a.s); 2108 } 2109 2110 float32 QEMU_FLATTEN 2111 float32_div(float32 a, float32 b, float_status *s) 2112 { 2113 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, 2114 f32_div_pre, f32_div_post); 2115 } 2116 2117 float64 QEMU_FLATTEN 2118 float64_div(float64 a, float64 b, float_status *s) 2119 { 2120 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, 2121 f64_div_pre, f64_div_post); 2122 } 2123 2124 bfloat16 QEMU_FLATTEN 2125 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status) 2126 { 2127 FloatParts64 pa, pb, *pr; 2128 2129 bfloat16_unpack_canonical(&pa, a, status); 2130 bfloat16_unpack_canonical(&pb, b, status); 2131 pr = parts_div(&pa, &pb, status); 2132 2133 return bfloat16_round_pack_canonical(pr, status); 2134 } 2135 2136 float128 QEMU_FLATTEN 2137 float128_div(float128 a, float128 b, float_status *status) 2138 { 2139 FloatParts128 pa, pb, *pr; 2140 2141 float128_unpack_canonical(&pa, a, status); 2142 float128_unpack_canonical(&pb, b, status); 2143 pr = parts_div(&pa, &pb, status); 2144 2145 return float128_round_pack_canonical(pr, status); 2146 } 2147 2148 /* 2149 * Float to Float conversions 2150 * 2151 * Returns the result of converting one float format to another. The 2152 * conversion is performed according to the IEC/IEEE Standard for 2153 * Binary Floating-Point Arithmetic. 2154 * 2155 * Usually this only needs to take care of raising invalid exceptions 2156 * and handling the conversion on NaNs. 2157 */ 2158 2159 static void parts_float_to_ahp(FloatParts64 *a, float_status *s) 2160 { 2161 switch (a->cls) { 2162 case float_class_qnan: 2163 case float_class_snan: 2164 /* 2165 * There is no NaN in the destination format. Raise Invalid 2166 * and return a zero with the sign of the input NaN. 2167 */ 2168 float_raise(float_flag_invalid, s); 2169 a->cls = float_class_zero; 2170 break; 2171 2172 case float_class_inf: 2173 /* 2174 * There is no Inf in the destination format. Raise Invalid 2175 * and return the maximum normal with the correct sign. 2176 */ 2177 float_raise(float_flag_invalid, s); 2178 a->cls = float_class_normal; 2179 a->exp = float16_params_ahp.exp_max; 2180 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift, 2181 float16_params_ahp.frac_size + 1); 2182 break; 2183 2184 case float_class_normal: 2185 case float_class_zero: 2186 break; 2187 2188 default: 2189 g_assert_not_reached(); 2190 } 2191 } 2192 2193 static void parts64_float_to_float(FloatParts64 *a, float_status *s) 2194 { 2195 if (is_nan(a->cls)) { 2196 parts_return_nan(a, s); 2197 } 2198 } 2199 2200 static void parts128_float_to_float(FloatParts128 *a, float_status *s) 2201 { 2202 if (is_nan(a->cls)) { 2203 parts_return_nan(a, s); 2204 } 2205 } 2206 2207 #define parts_float_to_float(P, S) \ 2208 PARTS_GENERIC_64_128(float_to_float, P)(P, S) 2209 2210 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, 2211 float_status *s) 2212 { 2213 a->cls = b->cls; 2214 a->sign = b->sign; 2215 a->exp = b->exp; 2216 2217 if (a->cls == float_class_normal) { 2218 frac_truncjam(a, b); 2219 } else if (is_nan(a->cls)) { 2220 /* Discard the low bits of the NaN. */ 2221 a->frac = b->frac_hi; 2222 parts_return_nan(a, s); 2223 } 2224 } 2225 2226 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, 2227 float_status *s) 2228 { 2229 a->cls = b->cls; 2230 a->sign = b->sign; 2231 a->exp = b->exp; 2232 frac_widen(a, b); 2233 2234 if (is_nan(a->cls)) { 2235 parts_return_nan(a, s); 2236 } 2237 } 2238 2239 float32 float16_to_float32(float16 a, bool ieee, float_status *s) 2240 { 2241 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2242 FloatParts64 p; 2243 2244 float16a_unpack_canonical(&p, a, s, fmt16); 2245 parts_float_to_float(&p, s); 2246 return float32_round_pack_canonical(&p, s); 2247 } 2248 2249 float64 float16_to_float64(float16 a, bool ieee, float_status *s) 2250 { 2251 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2252 FloatParts64 p; 2253 2254 float16a_unpack_canonical(&p, a, s, fmt16); 2255 parts_float_to_float(&p, s); 2256 return float64_round_pack_canonical(&p, s); 2257 } 2258 2259 float16 float32_to_float16(float32 a, bool ieee, float_status *s) 2260 { 2261 FloatParts64 p; 2262 const FloatFmt *fmt; 2263 2264 float32_unpack_canonical(&p, a, s); 2265 if (ieee) { 2266 parts_float_to_float(&p, s); 2267 fmt = &float16_params; 2268 } else { 2269 parts_float_to_ahp(&p, s); 2270 fmt = &float16_params_ahp; 2271 } 2272 return float16a_round_pack_canonical(&p, s, fmt); 2273 } 2274 2275 static float64 QEMU_SOFTFLOAT_ATTR 2276 soft_float32_to_float64(float32 a, float_status *s) 2277 { 2278 FloatParts64 p; 2279 2280 float32_unpack_canonical(&p, a, s); 2281 parts_float_to_float(&p, s); 2282 return float64_round_pack_canonical(&p, s); 2283 } 2284 2285 float64 float32_to_float64(float32 a, float_status *s) 2286 { 2287 if (likely(float32_is_normal(a))) { 2288 /* Widening conversion can never produce inexact results. */ 2289 union_float32 uf; 2290 union_float64 ud; 2291 uf.s = a; 2292 ud.h = uf.h; 2293 return ud.s; 2294 } else if (float32_is_zero(a)) { 2295 return float64_set_sign(float64_zero, float32_is_neg(a)); 2296 } else { 2297 return soft_float32_to_float64(a, s); 2298 } 2299 } 2300 2301 float16 float64_to_float16(float64 a, bool ieee, float_status *s) 2302 { 2303 FloatParts64 p; 2304 const FloatFmt *fmt; 2305 2306 float64_unpack_canonical(&p, a, s); 2307 if (ieee) { 2308 parts_float_to_float(&p, s); 2309 fmt = &float16_params; 2310 } else { 2311 parts_float_to_ahp(&p, s); 2312 fmt = &float16_params_ahp; 2313 } 2314 return float16a_round_pack_canonical(&p, s, fmt); 2315 } 2316 2317 float32 float64_to_float32(float64 a, float_status *s) 2318 { 2319 FloatParts64 p; 2320 2321 float64_unpack_canonical(&p, a, s); 2322 parts_float_to_float(&p, s); 2323 return float32_round_pack_canonical(&p, s); 2324 } 2325 2326 float32 bfloat16_to_float32(bfloat16 a, float_status *s) 2327 { 2328 FloatParts64 p; 2329 2330 bfloat16_unpack_canonical(&p, a, s); 2331 parts_float_to_float(&p, s); 2332 return float32_round_pack_canonical(&p, s); 2333 } 2334 2335 float64 bfloat16_to_float64(bfloat16 a, float_status *s) 2336 { 2337 FloatParts64 p; 2338 2339 bfloat16_unpack_canonical(&p, a, s); 2340 parts_float_to_float(&p, s); 2341 return float64_round_pack_canonical(&p, s); 2342 } 2343 2344 bfloat16 float32_to_bfloat16(float32 a, float_status *s) 2345 { 2346 FloatParts64 p; 2347 2348 float32_unpack_canonical(&p, a, s); 2349 parts_float_to_float(&p, s); 2350 return bfloat16_round_pack_canonical(&p, s); 2351 } 2352 2353 bfloat16 float64_to_bfloat16(float64 a, float_status *s) 2354 { 2355 FloatParts64 p; 2356 2357 float64_unpack_canonical(&p, a, s); 2358 parts_float_to_float(&p, s); 2359 return bfloat16_round_pack_canonical(&p, s); 2360 } 2361 2362 float32 float128_to_float32(float128 a, float_status *s) 2363 { 2364 FloatParts64 p64; 2365 FloatParts128 p128; 2366 2367 float128_unpack_canonical(&p128, a, s); 2368 parts_float_to_float_narrow(&p64, &p128, s); 2369 return float32_round_pack_canonical(&p64, s); 2370 } 2371 2372 float64 float128_to_float64(float128 a, float_status *s) 2373 { 2374 FloatParts64 p64; 2375 FloatParts128 p128; 2376 2377 float128_unpack_canonical(&p128, a, s); 2378 parts_float_to_float_narrow(&p64, &p128, s); 2379 return float64_round_pack_canonical(&p64, s); 2380 } 2381 2382 float128 float32_to_float128(float32 a, float_status *s) 2383 { 2384 FloatParts64 p64; 2385 FloatParts128 p128; 2386 2387 float32_unpack_canonical(&p64, a, s); 2388 parts_float_to_float_widen(&p128, &p64, s); 2389 return float128_round_pack_canonical(&p128, s); 2390 } 2391 2392 float128 float64_to_float128(float64 a, float_status *s) 2393 { 2394 FloatParts64 p64; 2395 FloatParts128 p128; 2396 2397 float64_unpack_canonical(&p64, a, s); 2398 parts_float_to_float_widen(&p128, &p64, s); 2399 return float128_round_pack_canonical(&p128, s); 2400 } 2401 2402 /* 2403 * Round to integral value 2404 */ 2405 2406 float16 float16_round_to_int(float16 a, float_status *s) 2407 { 2408 FloatParts64 p; 2409 2410 float16_unpack_canonical(&p, a, s); 2411 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params); 2412 return float16_round_pack_canonical(&p, s); 2413 } 2414 2415 float32 float32_round_to_int(float32 a, float_status *s) 2416 { 2417 FloatParts64 p; 2418 2419 float32_unpack_canonical(&p, a, s); 2420 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params); 2421 return float32_round_pack_canonical(&p, s); 2422 } 2423 2424 float64 float64_round_to_int(float64 a, float_status *s) 2425 { 2426 FloatParts64 p; 2427 2428 float64_unpack_canonical(&p, a, s); 2429 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params); 2430 return float64_round_pack_canonical(&p, s); 2431 } 2432 2433 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s) 2434 { 2435 FloatParts64 p; 2436 2437 bfloat16_unpack_canonical(&p, a, s); 2438 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params); 2439 return bfloat16_round_pack_canonical(&p, s); 2440 } 2441 2442 float128 float128_round_to_int(float128 a, float_status *s) 2443 { 2444 FloatParts128 p; 2445 2446 float128_unpack_canonical(&p, a, s); 2447 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params); 2448 return float128_round_pack_canonical(&p, s); 2449 } 2450 2451 /* 2452 * Floating-point to signed integer conversions 2453 */ 2454 2455 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale, 2456 float_status *s) 2457 { 2458 FloatParts64 p; 2459 2460 float16_unpack_canonical(&p, a, s); 2461 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); 2462 } 2463 2464 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, 2465 float_status *s) 2466 { 2467 FloatParts64 p; 2468 2469 float16_unpack_canonical(&p, a, s); 2470 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2471 } 2472 2473 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, 2474 float_status *s) 2475 { 2476 FloatParts64 p; 2477 2478 float16_unpack_canonical(&p, a, s); 2479 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2480 } 2481 2482 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, 2483 float_status *s) 2484 { 2485 FloatParts64 p; 2486 2487 float16_unpack_canonical(&p, a, s); 2488 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2489 } 2490 2491 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, 2492 float_status *s) 2493 { 2494 FloatParts64 p; 2495 2496 float32_unpack_canonical(&p, a, s); 2497 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2498 } 2499 2500 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, 2501 float_status *s) 2502 { 2503 FloatParts64 p; 2504 2505 float32_unpack_canonical(&p, a, s); 2506 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2507 } 2508 2509 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, 2510 float_status *s) 2511 { 2512 FloatParts64 p; 2513 2514 float32_unpack_canonical(&p, a, s); 2515 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2516 } 2517 2518 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, 2519 float_status *s) 2520 { 2521 FloatParts64 p; 2522 2523 float64_unpack_canonical(&p, a, s); 2524 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2525 } 2526 2527 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, 2528 float_status *s) 2529 { 2530 FloatParts64 p; 2531 2532 float64_unpack_canonical(&p, a, s); 2533 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2534 } 2535 2536 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, 2537 float_status *s) 2538 { 2539 FloatParts64 p; 2540 2541 float64_unpack_canonical(&p, a, s); 2542 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2543 } 2544 2545 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2546 float_status *s) 2547 { 2548 FloatParts64 p; 2549 2550 bfloat16_unpack_canonical(&p, a, s); 2551 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 2552 } 2553 2554 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2555 float_status *s) 2556 { 2557 FloatParts64 p; 2558 2559 bfloat16_unpack_canonical(&p, a, s); 2560 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2561 } 2562 2563 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 2564 float_status *s) 2565 { 2566 FloatParts64 p; 2567 2568 bfloat16_unpack_canonical(&p, a, s); 2569 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2570 } 2571 2572 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode, 2573 int scale, float_status *s) 2574 { 2575 FloatParts128 p; 2576 2577 float128_unpack_canonical(&p, a, s); 2578 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 2579 } 2580 2581 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode, 2582 int scale, float_status *s) 2583 { 2584 FloatParts128 p; 2585 2586 float128_unpack_canonical(&p, a, s); 2587 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 2588 } 2589 2590 int8_t float16_to_int8(float16 a, float_status *s) 2591 { 2592 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); 2593 } 2594 2595 int16_t float16_to_int16(float16 a, float_status *s) 2596 { 2597 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2598 } 2599 2600 int32_t float16_to_int32(float16 a, float_status *s) 2601 { 2602 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2603 } 2604 2605 int64_t float16_to_int64(float16 a, float_status *s) 2606 { 2607 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2608 } 2609 2610 int16_t float32_to_int16(float32 a, float_status *s) 2611 { 2612 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2613 } 2614 2615 int32_t float32_to_int32(float32 a, float_status *s) 2616 { 2617 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2618 } 2619 2620 int64_t float32_to_int64(float32 a, float_status *s) 2621 { 2622 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2623 } 2624 2625 int16_t float64_to_int16(float64 a, float_status *s) 2626 { 2627 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2628 } 2629 2630 int32_t float64_to_int32(float64 a, float_status *s) 2631 { 2632 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2633 } 2634 2635 int64_t float64_to_int64(float64 a, float_status *s) 2636 { 2637 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2638 } 2639 2640 int32_t float128_to_int32(float128 a, float_status *s) 2641 { 2642 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2643 } 2644 2645 int64_t float128_to_int64(float128 a, float_status *s) 2646 { 2647 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2648 } 2649 2650 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s) 2651 { 2652 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s); 2653 } 2654 2655 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s) 2656 { 2657 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s); 2658 } 2659 2660 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s) 2661 { 2662 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s); 2663 } 2664 2665 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s) 2666 { 2667 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s); 2668 } 2669 2670 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s) 2671 { 2672 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s); 2673 } 2674 2675 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s) 2676 { 2677 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s); 2678 } 2679 2680 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s) 2681 { 2682 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s); 2683 } 2684 2685 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s) 2686 { 2687 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s); 2688 } 2689 2690 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) 2691 { 2692 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s); 2693 } 2694 2695 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s) 2696 { 2697 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s); 2698 } 2699 2700 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s) 2701 { 2702 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s); 2703 } 2704 2705 int16_t bfloat16_to_int16(bfloat16 a, float_status *s) 2706 { 2707 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 2708 } 2709 2710 int32_t bfloat16_to_int32(bfloat16 a, float_status *s) 2711 { 2712 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 2713 } 2714 2715 int64_t bfloat16_to_int64(bfloat16 a, float_status *s) 2716 { 2717 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 2718 } 2719 2720 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) 2721 { 2722 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); 2723 } 2724 2725 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s) 2726 { 2727 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s); 2728 } 2729 2730 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s) 2731 { 2732 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s); 2733 } 2734 2735 /* 2736 * Floating-point to unsigned integer conversions 2737 */ 2738 2739 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale, 2740 float_status *s) 2741 { 2742 FloatParts64 p; 2743 2744 float16_unpack_canonical(&p, a, s); 2745 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); 2746 } 2747 2748 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, 2749 float_status *s) 2750 { 2751 FloatParts64 p; 2752 2753 float16_unpack_canonical(&p, a, s); 2754 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2755 } 2756 2757 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, 2758 float_status *s) 2759 { 2760 FloatParts64 p; 2761 2762 float16_unpack_canonical(&p, a, s); 2763 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2764 } 2765 2766 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, 2767 float_status *s) 2768 { 2769 FloatParts64 p; 2770 2771 float16_unpack_canonical(&p, a, s); 2772 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2773 } 2774 2775 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, 2776 float_status *s) 2777 { 2778 FloatParts64 p; 2779 2780 float32_unpack_canonical(&p, a, s); 2781 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2782 } 2783 2784 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, 2785 float_status *s) 2786 { 2787 FloatParts64 p; 2788 2789 float32_unpack_canonical(&p, a, s); 2790 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2791 } 2792 2793 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, 2794 float_status *s) 2795 { 2796 FloatParts64 p; 2797 2798 float32_unpack_canonical(&p, a, s); 2799 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2800 } 2801 2802 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, 2803 float_status *s) 2804 { 2805 FloatParts64 p; 2806 2807 float64_unpack_canonical(&p, a, s); 2808 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2809 } 2810 2811 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, 2812 float_status *s) 2813 { 2814 FloatParts64 p; 2815 2816 float64_unpack_canonical(&p, a, s); 2817 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2818 } 2819 2820 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, 2821 float_status *s) 2822 { 2823 FloatParts64 p; 2824 2825 float64_unpack_canonical(&p, a, s); 2826 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2827 } 2828 2829 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, 2830 int scale, float_status *s) 2831 { 2832 FloatParts64 p; 2833 2834 bfloat16_unpack_canonical(&p, a, s); 2835 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 2836 } 2837 2838 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode, 2839 int scale, float_status *s) 2840 { 2841 FloatParts64 p; 2842 2843 bfloat16_unpack_canonical(&p, a, s); 2844 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2845 } 2846 2847 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode, 2848 int scale, float_status *s) 2849 { 2850 FloatParts64 p; 2851 2852 bfloat16_unpack_canonical(&p, a, s); 2853 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2854 } 2855 2856 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode, 2857 int scale, float_status *s) 2858 { 2859 FloatParts128 p; 2860 2861 float128_unpack_canonical(&p, a, s); 2862 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 2863 } 2864 2865 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode, 2866 int scale, float_status *s) 2867 { 2868 FloatParts128 p; 2869 2870 float128_unpack_canonical(&p, a, s); 2871 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 2872 } 2873 2874 uint8_t float16_to_uint8(float16 a, float_status *s) 2875 { 2876 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); 2877 } 2878 2879 uint16_t float16_to_uint16(float16 a, float_status *s) 2880 { 2881 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2882 } 2883 2884 uint32_t float16_to_uint32(float16 a, float_status *s) 2885 { 2886 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2887 } 2888 2889 uint64_t float16_to_uint64(float16 a, float_status *s) 2890 { 2891 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2892 } 2893 2894 uint16_t float32_to_uint16(float32 a, float_status *s) 2895 { 2896 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2897 } 2898 2899 uint32_t float32_to_uint32(float32 a, float_status *s) 2900 { 2901 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2902 } 2903 2904 uint64_t float32_to_uint64(float32 a, float_status *s) 2905 { 2906 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2907 } 2908 2909 uint16_t float64_to_uint16(float64 a, float_status *s) 2910 { 2911 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2912 } 2913 2914 uint32_t float64_to_uint32(float64 a, float_status *s) 2915 { 2916 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2917 } 2918 2919 uint64_t float64_to_uint64(float64 a, float_status *s) 2920 { 2921 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2922 } 2923 2924 uint32_t float128_to_uint32(float128 a, float_status *s) 2925 { 2926 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2927 } 2928 2929 uint64_t float128_to_uint64(float128 a, float_status *s) 2930 { 2931 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 2932 } 2933 2934 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s) 2935 { 2936 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2937 } 2938 2939 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s) 2940 { 2941 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2942 } 2943 2944 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s) 2945 { 2946 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2947 } 2948 2949 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s) 2950 { 2951 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2952 } 2953 2954 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s) 2955 { 2956 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2957 } 2958 2959 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s) 2960 { 2961 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2962 } 2963 2964 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s) 2965 { 2966 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s); 2967 } 2968 2969 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s) 2970 { 2971 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2972 } 2973 2974 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s) 2975 { 2976 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2977 } 2978 2979 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s) 2980 { 2981 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s); 2982 } 2983 2984 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s) 2985 { 2986 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s); 2987 } 2988 2989 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) 2990 { 2991 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 2992 } 2993 2994 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s) 2995 { 2996 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 2997 } 2998 2999 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) 3000 { 3001 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3002 } 3003 3004 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) 3005 { 3006 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 3007 } 3008 3009 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s) 3010 { 3011 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3012 } 3013 3014 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s) 3015 { 3016 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3017 } 3018 3019 /* 3020 * Signed integer to floating-point conversions 3021 */ 3022 3023 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) 3024 { 3025 FloatParts64 p; 3026 3027 parts_sint_to_float(&p, a, scale, status); 3028 return float16_round_pack_canonical(&p, status); 3029 } 3030 3031 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status) 3032 { 3033 return int64_to_float16_scalbn(a, scale, status); 3034 } 3035 3036 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status) 3037 { 3038 return int64_to_float16_scalbn(a, scale, status); 3039 } 3040 3041 float16 int64_to_float16(int64_t a, float_status *status) 3042 { 3043 return int64_to_float16_scalbn(a, 0, status); 3044 } 3045 3046 float16 int32_to_float16(int32_t a, float_status *status) 3047 { 3048 return int64_to_float16_scalbn(a, 0, status); 3049 } 3050 3051 float16 int16_to_float16(int16_t a, float_status *status) 3052 { 3053 return int64_to_float16_scalbn(a, 0, status); 3054 } 3055 3056 float16 int8_to_float16(int8_t a, float_status *status) 3057 { 3058 return int64_to_float16_scalbn(a, 0, status); 3059 } 3060 3061 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) 3062 { 3063 FloatParts64 p; 3064 3065 parts64_sint_to_float(&p, a, scale, status); 3066 return float32_round_pack_canonical(&p, status); 3067 } 3068 3069 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status) 3070 { 3071 return int64_to_float32_scalbn(a, scale, status); 3072 } 3073 3074 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status) 3075 { 3076 return int64_to_float32_scalbn(a, scale, status); 3077 } 3078 3079 float32 int64_to_float32(int64_t a, float_status *status) 3080 { 3081 return int64_to_float32_scalbn(a, 0, status); 3082 } 3083 3084 float32 int32_to_float32(int32_t a, float_status *status) 3085 { 3086 return int64_to_float32_scalbn(a, 0, status); 3087 } 3088 3089 float32 int16_to_float32(int16_t a, float_status *status) 3090 { 3091 return int64_to_float32_scalbn(a, 0, status); 3092 } 3093 3094 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status) 3095 { 3096 FloatParts64 p; 3097 3098 parts_sint_to_float(&p, a, scale, status); 3099 return float64_round_pack_canonical(&p, status); 3100 } 3101 3102 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status) 3103 { 3104 return int64_to_float64_scalbn(a, scale, status); 3105 } 3106 3107 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status) 3108 { 3109 return int64_to_float64_scalbn(a, scale, status); 3110 } 3111 3112 float64 int64_to_float64(int64_t a, float_status *status) 3113 { 3114 return int64_to_float64_scalbn(a, 0, status); 3115 } 3116 3117 float64 int32_to_float64(int32_t a, float_status *status) 3118 { 3119 return int64_to_float64_scalbn(a, 0, status); 3120 } 3121 3122 float64 int16_to_float64(int16_t a, float_status *status) 3123 { 3124 return int64_to_float64_scalbn(a, 0, status); 3125 } 3126 3127 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) 3128 { 3129 FloatParts64 p; 3130 3131 parts_sint_to_float(&p, a, scale, status); 3132 return bfloat16_round_pack_canonical(&p, status); 3133 } 3134 3135 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status) 3136 { 3137 return int64_to_bfloat16_scalbn(a, scale, status); 3138 } 3139 3140 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) 3141 { 3142 return int64_to_bfloat16_scalbn(a, scale, status); 3143 } 3144 3145 bfloat16 int64_to_bfloat16(int64_t a, float_status *status) 3146 { 3147 return int64_to_bfloat16_scalbn(a, 0, status); 3148 } 3149 3150 bfloat16 int32_to_bfloat16(int32_t a, float_status *status) 3151 { 3152 return int64_to_bfloat16_scalbn(a, 0, status); 3153 } 3154 3155 bfloat16 int16_to_bfloat16(int16_t a, float_status *status) 3156 { 3157 return int64_to_bfloat16_scalbn(a, 0, status); 3158 } 3159 3160 float128 int64_to_float128(int64_t a, float_status *status) 3161 { 3162 FloatParts128 p; 3163 3164 parts_sint_to_float(&p, a, 0, status); 3165 return float128_round_pack_canonical(&p, status); 3166 } 3167 3168 float128 int32_to_float128(int32_t a, float_status *status) 3169 { 3170 return int64_to_float128(a, status); 3171 } 3172 3173 /* 3174 * Unsigned Integer to floating-point conversions 3175 */ 3176 3177 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) 3178 { 3179 FloatParts64 p; 3180 3181 parts_uint_to_float(&p, a, scale, status); 3182 return float16_round_pack_canonical(&p, status); 3183 } 3184 3185 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status) 3186 { 3187 return uint64_to_float16_scalbn(a, scale, status); 3188 } 3189 3190 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status) 3191 { 3192 return uint64_to_float16_scalbn(a, scale, status); 3193 } 3194 3195 float16 uint64_to_float16(uint64_t a, float_status *status) 3196 { 3197 return uint64_to_float16_scalbn(a, 0, status); 3198 } 3199 3200 float16 uint32_to_float16(uint32_t a, float_status *status) 3201 { 3202 return uint64_to_float16_scalbn(a, 0, status); 3203 } 3204 3205 float16 uint16_to_float16(uint16_t a, float_status *status) 3206 { 3207 return uint64_to_float16_scalbn(a, 0, status); 3208 } 3209 3210 float16 uint8_to_float16(uint8_t a, float_status *status) 3211 { 3212 return uint64_to_float16_scalbn(a, 0, status); 3213 } 3214 3215 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) 3216 { 3217 FloatParts64 p; 3218 3219 parts_uint_to_float(&p, a, scale, status); 3220 return float32_round_pack_canonical(&p, status); 3221 } 3222 3223 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status) 3224 { 3225 return uint64_to_float32_scalbn(a, scale, status); 3226 } 3227 3228 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status) 3229 { 3230 return uint64_to_float32_scalbn(a, scale, status); 3231 } 3232 3233 float32 uint64_to_float32(uint64_t a, float_status *status) 3234 { 3235 return uint64_to_float32_scalbn(a, 0, status); 3236 } 3237 3238 float32 uint32_to_float32(uint32_t a, float_status *status) 3239 { 3240 return uint64_to_float32_scalbn(a, 0, status); 3241 } 3242 3243 float32 uint16_to_float32(uint16_t a, float_status *status) 3244 { 3245 return uint64_to_float32_scalbn(a, 0, status); 3246 } 3247 3248 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) 3249 { 3250 FloatParts64 p; 3251 3252 parts_uint_to_float(&p, a, scale, status); 3253 return float64_round_pack_canonical(&p, status); 3254 } 3255 3256 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status) 3257 { 3258 return uint64_to_float64_scalbn(a, scale, status); 3259 } 3260 3261 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status) 3262 { 3263 return uint64_to_float64_scalbn(a, scale, status); 3264 } 3265 3266 float64 uint64_to_float64(uint64_t a, float_status *status) 3267 { 3268 return uint64_to_float64_scalbn(a, 0, status); 3269 } 3270 3271 float64 uint32_to_float64(uint32_t a, float_status *status) 3272 { 3273 return uint64_to_float64_scalbn(a, 0, status); 3274 } 3275 3276 float64 uint16_to_float64(uint16_t a, float_status *status) 3277 { 3278 return uint64_to_float64_scalbn(a, 0, status); 3279 } 3280 3281 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) 3282 { 3283 FloatParts64 p; 3284 3285 parts_uint_to_float(&p, a, scale, status); 3286 return bfloat16_round_pack_canonical(&p, status); 3287 } 3288 3289 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status) 3290 { 3291 return uint64_to_bfloat16_scalbn(a, scale, status); 3292 } 3293 3294 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) 3295 { 3296 return uint64_to_bfloat16_scalbn(a, scale, status); 3297 } 3298 3299 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) 3300 { 3301 return uint64_to_bfloat16_scalbn(a, 0, status); 3302 } 3303 3304 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status) 3305 { 3306 return uint64_to_bfloat16_scalbn(a, 0, status); 3307 } 3308 3309 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) 3310 { 3311 return uint64_to_bfloat16_scalbn(a, 0, status); 3312 } 3313 3314 float128 uint64_to_float128(uint64_t a, float_status *status) 3315 { 3316 FloatParts128 p; 3317 3318 parts_uint_to_float(&p, a, 0, status); 3319 return float128_round_pack_canonical(&p, status); 3320 } 3321 3322 /* 3323 * Minimum and maximum 3324 */ 3325 3326 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags) 3327 { 3328 FloatParts64 pa, pb, *pr; 3329 3330 float16_unpack_canonical(&pa, a, s); 3331 float16_unpack_canonical(&pb, b, s); 3332 pr = parts_minmax(&pa, &pb, s, flags); 3333 3334 return float16_round_pack_canonical(pr, s); 3335 } 3336 3337 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b, 3338 float_status *s, int flags) 3339 { 3340 FloatParts64 pa, pb, *pr; 3341 3342 bfloat16_unpack_canonical(&pa, a, s); 3343 bfloat16_unpack_canonical(&pb, b, s); 3344 pr = parts_minmax(&pa, &pb, s, flags); 3345 3346 return bfloat16_round_pack_canonical(pr, s); 3347 } 3348 3349 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags) 3350 { 3351 FloatParts64 pa, pb, *pr; 3352 3353 float32_unpack_canonical(&pa, a, s); 3354 float32_unpack_canonical(&pb, b, s); 3355 pr = parts_minmax(&pa, &pb, s, flags); 3356 3357 return float32_round_pack_canonical(pr, s); 3358 } 3359 3360 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags) 3361 { 3362 FloatParts64 pa, pb, *pr; 3363 3364 float64_unpack_canonical(&pa, a, s); 3365 float64_unpack_canonical(&pb, b, s); 3366 pr = parts_minmax(&pa, &pb, s, flags); 3367 3368 return float64_round_pack_canonical(pr, s); 3369 } 3370 3371 static float128 float128_minmax(float128 a, float128 b, 3372 float_status *s, int flags) 3373 { 3374 FloatParts128 pa, pb, *pr; 3375 3376 float128_unpack_canonical(&pa, a, s); 3377 float128_unpack_canonical(&pb, b, s); 3378 pr = parts_minmax(&pa, &pb, s, flags); 3379 3380 return float128_round_pack_canonical(pr, s); 3381 } 3382 3383 #define MINMAX_1(type, name, flags) \ 3384 type type##_##name(type a, type b, float_status *s) \ 3385 { return type##_minmax(a, b, s, flags); } 3386 3387 #define MINMAX_2(type) \ 3388 MINMAX_1(type, max, 0) \ 3389 MINMAX_1(type, maxnum, minmax_isnum) \ 3390 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \ 3391 MINMAX_1(type, min, minmax_ismin) \ 3392 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \ 3393 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) 3394 3395 MINMAX_2(float16) 3396 MINMAX_2(bfloat16) 3397 MINMAX_2(float32) 3398 MINMAX_2(float64) 3399 MINMAX_2(float128) 3400 3401 #undef MINMAX_1 3402 #undef MINMAX_2 3403 3404 /* 3405 * Floating point compare 3406 */ 3407 3408 static FloatRelation QEMU_FLATTEN 3409 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet) 3410 { 3411 FloatParts64 pa, pb; 3412 3413 float16_unpack_canonical(&pa, a, s); 3414 float16_unpack_canonical(&pb, b, s); 3415 return parts_compare(&pa, &pb, s, is_quiet); 3416 } 3417 3418 FloatRelation float16_compare(float16 a, float16 b, float_status *s) 3419 { 3420 return float16_do_compare(a, b, s, false); 3421 } 3422 3423 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) 3424 { 3425 return float16_do_compare(a, b, s, true); 3426 } 3427 3428 static FloatRelation QEMU_SOFTFLOAT_ATTR 3429 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet) 3430 { 3431 FloatParts64 pa, pb; 3432 3433 float32_unpack_canonical(&pa, a, s); 3434 float32_unpack_canonical(&pb, b, s); 3435 return parts_compare(&pa, &pb, s, is_quiet); 3436 } 3437 3438 static FloatRelation QEMU_FLATTEN 3439 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) 3440 { 3441 union_float32 ua, ub; 3442 3443 ua.s = xa; 3444 ub.s = xb; 3445 3446 if (QEMU_NO_HARDFLOAT) { 3447 goto soft; 3448 } 3449 3450 float32_input_flush2(&ua.s, &ub.s, s); 3451 if (isgreaterequal(ua.h, ub.h)) { 3452 if (isgreater(ua.h, ub.h)) { 3453 return float_relation_greater; 3454 } 3455 return float_relation_equal; 3456 } 3457 if (likely(isless(ua.h, ub.h))) { 3458 return float_relation_less; 3459 } 3460 /* 3461 * The only condition remaining is unordered. 3462 * Fall through to set flags. 3463 */ 3464 soft: 3465 return float32_do_compare(ua.s, ub.s, s, is_quiet); 3466 } 3467 3468 FloatRelation float32_compare(float32 a, float32 b, float_status *s) 3469 { 3470 return float32_hs_compare(a, b, s, false); 3471 } 3472 3473 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) 3474 { 3475 return float32_hs_compare(a, b, s, true); 3476 } 3477 3478 static FloatRelation QEMU_SOFTFLOAT_ATTR 3479 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet) 3480 { 3481 FloatParts64 pa, pb; 3482 3483 float64_unpack_canonical(&pa, a, s); 3484 float64_unpack_canonical(&pb, b, s); 3485 return parts_compare(&pa, &pb, s, is_quiet); 3486 } 3487 3488 static FloatRelation QEMU_FLATTEN 3489 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) 3490 { 3491 union_float64 ua, ub; 3492 3493 ua.s = xa; 3494 ub.s = xb; 3495 3496 if (QEMU_NO_HARDFLOAT) { 3497 goto soft; 3498 } 3499 3500 float64_input_flush2(&ua.s, &ub.s, s); 3501 if (isgreaterequal(ua.h, ub.h)) { 3502 if (isgreater(ua.h, ub.h)) { 3503 return float_relation_greater; 3504 } 3505 return float_relation_equal; 3506 } 3507 if (likely(isless(ua.h, ub.h))) { 3508 return float_relation_less; 3509 } 3510 /* 3511 * The only condition remaining is unordered. 3512 * Fall through to set flags. 3513 */ 3514 soft: 3515 return float64_do_compare(ua.s, ub.s, s, is_quiet); 3516 } 3517 3518 FloatRelation float64_compare(float64 a, float64 b, float_status *s) 3519 { 3520 return float64_hs_compare(a, b, s, false); 3521 } 3522 3523 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) 3524 { 3525 return float64_hs_compare(a, b, s, true); 3526 } 3527 3528 static FloatRelation QEMU_FLATTEN 3529 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet) 3530 { 3531 FloatParts64 pa, pb; 3532 3533 bfloat16_unpack_canonical(&pa, a, s); 3534 bfloat16_unpack_canonical(&pb, b, s); 3535 return parts_compare(&pa, &pb, s, is_quiet); 3536 } 3537 3538 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s) 3539 { 3540 return bfloat16_do_compare(a, b, s, false); 3541 } 3542 3543 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s) 3544 { 3545 return bfloat16_do_compare(a, b, s, true); 3546 } 3547 3548 static FloatRelation QEMU_FLATTEN 3549 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet) 3550 { 3551 FloatParts128 pa, pb; 3552 3553 float128_unpack_canonical(&pa, a, s); 3554 float128_unpack_canonical(&pb, b, s); 3555 return parts_compare(&pa, &pb, s, is_quiet); 3556 } 3557 3558 FloatRelation float128_compare(float128 a, float128 b, float_status *s) 3559 { 3560 return float128_do_compare(a, b, s, false); 3561 } 3562 3563 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s) 3564 { 3565 return float128_do_compare(a, b, s, true); 3566 } 3567 3568 /* 3569 * Scale by 2**N 3570 */ 3571 3572 float16 float16_scalbn(float16 a, int n, float_status *status) 3573 { 3574 FloatParts64 p; 3575 3576 float16_unpack_canonical(&p, a, status); 3577 parts_scalbn(&p, n, status); 3578 return float16_round_pack_canonical(&p, status); 3579 } 3580 3581 float32 float32_scalbn(float32 a, int n, float_status *status) 3582 { 3583 FloatParts64 p; 3584 3585 float32_unpack_canonical(&p, a, status); 3586 parts_scalbn(&p, n, status); 3587 return float32_round_pack_canonical(&p, status); 3588 } 3589 3590 float64 float64_scalbn(float64 a, int n, float_status *status) 3591 { 3592 FloatParts64 p; 3593 3594 float64_unpack_canonical(&p, a, status); 3595 parts_scalbn(&p, n, status); 3596 return float64_round_pack_canonical(&p, status); 3597 } 3598 3599 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status) 3600 { 3601 FloatParts64 p; 3602 3603 bfloat16_unpack_canonical(&p, a, status); 3604 parts_scalbn(&p, n, status); 3605 return bfloat16_round_pack_canonical(&p, status); 3606 } 3607 3608 float128 float128_scalbn(float128 a, int n, float_status *status) 3609 { 3610 FloatParts128 p; 3611 3612 float128_unpack_canonical(&p, a, status); 3613 parts_scalbn(&p, n, status); 3614 return float128_round_pack_canonical(&p, status); 3615 } 3616 3617 /* 3618 * Square Root 3619 */ 3620 3621 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status) 3622 { 3623 FloatParts64 p; 3624 3625 float16_unpack_canonical(&p, a, status); 3626 parts_sqrt(&p, status, &float16_params); 3627 return float16_round_pack_canonical(&p, status); 3628 } 3629 3630 static float32 QEMU_SOFTFLOAT_ATTR 3631 soft_f32_sqrt(float32 a, float_status *status) 3632 { 3633 FloatParts64 p; 3634 3635 float32_unpack_canonical(&p, a, status); 3636 parts_sqrt(&p, status, &float32_params); 3637 return float32_round_pack_canonical(&p, status); 3638 } 3639 3640 static float64 QEMU_SOFTFLOAT_ATTR 3641 soft_f64_sqrt(float64 a, float_status *status) 3642 { 3643 FloatParts64 p; 3644 3645 float64_unpack_canonical(&p, a, status); 3646 parts_sqrt(&p, status, &float64_params); 3647 return float64_round_pack_canonical(&p, status); 3648 } 3649 3650 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s) 3651 { 3652 union_float32 ua, ur; 3653 3654 ua.s = xa; 3655 if (unlikely(!can_use_fpu(s))) { 3656 goto soft; 3657 } 3658 3659 float32_input_flush1(&ua.s, s); 3660 if (QEMU_HARDFLOAT_1F32_USE_FP) { 3661 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 3662 fpclassify(ua.h) == FP_ZERO) || 3663 signbit(ua.h))) { 3664 goto soft; 3665 } 3666 } else if (unlikely(!float32_is_zero_or_normal(ua.s) || 3667 float32_is_neg(ua.s))) { 3668 goto soft; 3669 } 3670 ur.h = sqrtf(ua.h); 3671 return ur.s; 3672 3673 soft: 3674 return soft_f32_sqrt(ua.s, s); 3675 } 3676 3677 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s) 3678 { 3679 union_float64 ua, ur; 3680 3681 ua.s = xa; 3682 if (unlikely(!can_use_fpu(s))) { 3683 goto soft; 3684 } 3685 3686 float64_input_flush1(&ua.s, s); 3687 if (QEMU_HARDFLOAT_1F64_USE_FP) { 3688 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 3689 fpclassify(ua.h) == FP_ZERO) || 3690 signbit(ua.h))) { 3691 goto soft; 3692 } 3693 } else if (unlikely(!float64_is_zero_or_normal(ua.s) || 3694 float64_is_neg(ua.s))) { 3695 goto soft; 3696 } 3697 ur.h = sqrt(ua.h); 3698 return ur.s; 3699 3700 soft: 3701 return soft_f64_sqrt(ua.s, s); 3702 } 3703 3704 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status) 3705 { 3706 FloatParts64 p; 3707 3708 bfloat16_unpack_canonical(&p, a, status); 3709 parts_sqrt(&p, status, &bfloat16_params); 3710 return bfloat16_round_pack_canonical(&p, status); 3711 } 3712 3713 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status) 3714 { 3715 FloatParts128 p; 3716 3717 float128_unpack_canonical(&p, a, status); 3718 parts_sqrt(&p, status, &float128_params); 3719 return float128_round_pack_canonical(&p, status); 3720 } 3721 3722 /*---------------------------------------------------------------------------- 3723 | The pattern for a default generated NaN. 3724 *----------------------------------------------------------------------------*/ 3725 3726 float16 float16_default_nan(float_status *status) 3727 { 3728 FloatParts64 p; 3729 3730 parts_default_nan(&p, status); 3731 p.frac >>= float16_params.frac_shift; 3732 return float16_pack_raw(&p); 3733 } 3734 3735 float32 float32_default_nan(float_status *status) 3736 { 3737 FloatParts64 p; 3738 3739 parts_default_nan(&p, status); 3740 p.frac >>= float32_params.frac_shift; 3741 return float32_pack_raw(&p); 3742 } 3743 3744 float64 float64_default_nan(float_status *status) 3745 { 3746 FloatParts64 p; 3747 3748 parts_default_nan(&p, status); 3749 p.frac >>= float64_params.frac_shift; 3750 return float64_pack_raw(&p); 3751 } 3752 3753 float128 float128_default_nan(float_status *status) 3754 { 3755 FloatParts128 p; 3756 3757 parts_default_nan(&p, status); 3758 frac_shr(&p, float128_params.frac_shift); 3759 return float128_pack_raw(&p); 3760 } 3761 3762 bfloat16 bfloat16_default_nan(float_status *status) 3763 { 3764 FloatParts64 p; 3765 3766 parts_default_nan(&p, status); 3767 p.frac >>= bfloat16_params.frac_shift; 3768 return bfloat16_pack_raw(&p); 3769 } 3770 3771 /*---------------------------------------------------------------------------- 3772 | Returns a quiet NaN from a signalling NaN for the floating point value `a'. 3773 *----------------------------------------------------------------------------*/ 3774 3775 float16 float16_silence_nan(float16 a, float_status *status) 3776 { 3777 FloatParts64 p; 3778 3779 float16_unpack_raw(&p, a); 3780 p.frac <<= float16_params.frac_shift; 3781 parts_silence_nan(&p, status); 3782 p.frac >>= float16_params.frac_shift; 3783 return float16_pack_raw(&p); 3784 } 3785 3786 float32 float32_silence_nan(float32 a, float_status *status) 3787 { 3788 FloatParts64 p; 3789 3790 float32_unpack_raw(&p, a); 3791 p.frac <<= float32_params.frac_shift; 3792 parts_silence_nan(&p, status); 3793 p.frac >>= float32_params.frac_shift; 3794 return float32_pack_raw(&p); 3795 } 3796 3797 float64 float64_silence_nan(float64 a, float_status *status) 3798 { 3799 FloatParts64 p; 3800 3801 float64_unpack_raw(&p, a); 3802 p.frac <<= float64_params.frac_shift; 3803 parts_silence_nan(&p, status); 3804 p.frac >>= float64_params.frac_shift; 3805 return float64_pack_raw(&p); 3806 } 3807 3808 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status) 3809 { 3810 FloatParts64 p; 3811 3812 bfloat16_unpack_raw(&p, a); 3813 p.frac <<= bfloat16_params.frac_shift; 3814 parts_silence_nan(&p, status); 3815 p.frac >>= bfloat16_params.frac_shift; 3816 return bfloat16_pack_raw(&p); 3817 } 3818 3819 float128 float128_silence_nan(float128 a, float_status *status) 3820 { 3821 FloatParts128 p; 3822 3823 float128_unpack_raw(&p, a); 3824 frac_shl(&p, float128_params.frac_shift); 3825 parts_silence_nan(&p, status); 3826 frac_shr(&p, float128_params.frac_shift); 3827 return float128_pack_raw(&p); 3828 } 3829 3830 /*---------------------------------------------------------------------------- 3831 | If `a' is denormal and we are in flush-to-zero mode then set the 3832 | input-denormal exception and return zero. Otherwise just return the value. 3833 *----------------------------------------------------------------------------*/ 3834 3835 static bool parts_squash_denormal(FloatParts64 p, float_status *status) 3836 { 3837 if (p.exp == 0 && p.frac != 0) { 3838 float_raise(float_flag_input_denormal, status); 3839 return true; 3840 } 3841 3842 return false; 3843 } 3844 3845 float16 float16_squash_input_denormal(float16 a, float_status *status) 3846 { 3847 if (status->flush_inputs_to_zero) { 3848 FloatParts64 p; 3849 3850 float16_unpack_raw(&p, a); 3851 if (parts_squash_denormal(p, status)) { 3852 return float16_set_sign(float16_zero, p.sign); 3853 } 3854 } 3855 return a; 3856 } 3857 3858 float32 float32_squash_input_denormal(float32 a, float_status *status) 3859 { 3860 if (status->flush_inputs_to_zero) { 3861 FloatParts64 p; 3862 3863 float32_unpack_raw(&p, a); 3864 if (parts_squash_denormal(p, status)) { 3865 return float32_set_sign(float32_zero, p.sign); 3866 } 3867 } 3868 return a; 3869 } 3870 3871 float64 float64_squash_input_denormal(float64 a, float_status *status) 3872 { 3873 if (status->flush_inputs_to_zero) { 3874 FloatParts64 p; 3875 3876 float64_unpack_raw(&p, a); 3877 if (parts_squash_denormal(p, status)) { 3878 return float64_set_sign(float64_zero, p.sign); 3879 } 3880 } 3881 return a; 3882 } 3883 3884 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status) 3885 { 3886 if (status->flush_inputs_to_zero) { 3887 FloatParts64 p; 3888 3889 bfloat16_unpack_raw(&p, a); 3890 if (parts_squash_denormal(p, status)) { 3891 return bfloat16_set_sign(bfloat16_zero, p.sign); 3892 } 3893 } 3894 return a; 3895 } 3896 3897 /*---------------------------------------------------------------------------- 3898 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 3899 | and 7, and returns the properly rounded 32-bit integer corresponding to the 3900 | input. If `zSign' is 1, the input is negated before being converted to an 3901 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 3902 | is simply rounded to an integer, with the inexact exception raised if the 3903 | input cannot be represented exactly as an integer. However, if the fixed- 3904 | point input is too large, the invalid exception is raised and the largest 3905 | positive or negative integer is returned. 3906 *----------------------------------------------------------------------------*/ 3907 3908 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, 3909 float_status *status) 3910 { 3911 int8_t roundingMode; 3912 bool roundNearestEven; 3913 int8_t roundIncrement, roundBits; 3914 int32_t z; 3915 3916 roundingMode = status->float_rounding_mode; 3917 roundNearestEven = ( roundingMode == float_round_nearest_even ); 3918 switch (roundingMode) { 3919 case float_round_nearest_even: 3920 case float_round_ties_away: 3921 roundIncrement = 0x40; 3922 break; 3923 case float_round_to_zero: 3924 roundIncrement = 0; 3925 break; 3926 case float_round_up: 3927 roundIncrement = zSign ? 0 : 0x7f; 3928 break; 3929 case float_round_down: 3930 roundIncrement = zSign ? 0x7f : 0; 3931 break; 3932 case float_round_to_odd: 3933 roundIncrement = absZ & 0x80 ? 0 : 0x7f; 3934 break; 3935 default: 3936 abort(); 3937 } 3938 roundBits = absZ & 0x7F; 3939 absZ = ( absZ + roundIncrement )>>7; 3940 if (!(roundBits ^ 0x40) && roundNearestEven) { 3941 absZ &= ~1; 3942 } 3943 z = absZ; 3944 if ( zSign ) z = - z; 3945 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 3946 float_raise(float_flag_invalid, status); 3947 return zSign ? INT32_MIN : INT32_MAX; 3948 } 3949 if (roundBits) { 3950 float_raise(float_flag_inexact, status); 3951 } 3952 return z; 3953 3954 } 3955 3956 /*---------------------------------------------------------------------------- 3957 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and 3958 | `absZ1', with binary point between bits 63 and 64 (between the input words), 3959 | and returns the properly rounded 64-bit integer corresponding to the input. 3960 | If `zSign' is 1, the input is negated before being converted to an integer. 3961 | Ordinarily, the fixed-point input is simply rounded to an integer, with 3962 | the inexact exception raised if the input cannot be represented exactly as 3963 | an integer. However, if the fixed-point input is too large, the invalid 3964 | exception is raised and the largest positive or negative integer is 3965 | returned. 3966 *----------------------------------------------------------------------------*/ 3967 3968 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, 3969 float_status *status) 3970 { 3971 int8_t roundingMode; 3972 bool roundNearestEven, increment; 3973 int64_t z; 3974 3975 roundingMode = status->float_rounding_mode; 3976 roundNearestEven = ( roundingMode == float_round_nearest_even ); 3977 switch (roundingMode) { 3978 case float_round_nearest_even: 3979 case float_round_ties_away: 3980 increment = ((int64_t) absZ1 < 0); 3981 break; 3982 case float_round_to_zero: 3983 increment = 0; 3984 break; 3985 case float_round_up: 3986 increment = !zSign && absZ1; 3987 break; 3988 case float_round_down: 3989 increment = zSign && absZ1; 3990 break; 3991 case float_round_to_odd: 3992 increment = !(absZ0 & 1) && absZ1; 3993 break; 3994 default: 3995 abort(); 3996 } 3997 if ( increment ) { 3998 ++absZ0; 3999 if ( absZ0 == 0 ) goto overflow; 4000 if (!(absZ1 << 1) && roundNearestEven) { 4001 absZ0 &= ~1; 4002 } 4003 } 4004 z = absZ0; 4005 if ( zSign ) z = - z; 4006 if ( z && ( ( z < 0 ) ^ zSign ) ) { 4007 overflow: 4008 float_raise(float_flag_invalid, status); 4009 return zSign ? INT64_MIN : INT64_MAX; 4010 } 4011 if (absZ1) { 4012 float_raise(float_flag_inexact, status); 4013 } 4014 return z; 4015 4016 } 4017 4018 /*---------------------------------------------------------------------------- 4019 | Normalizes the subnormal single-precision floating-point value represented 4020 | by the denormalized significand `aSig'. The normalized exponent and 4021 | significand are stored at the locations pointed to by `zExpPtr' and 4022 | `zSigPtr', respectively. 4023 *----------------------------------------------------------------------------*/ 4024 4025 static void 4026 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) 4027 { 4028 int8_t shiftCount; 4029 4030 shiftCount = clz32(aSig) - 8; 4031 *zSigPtr = aSig<<shiftCount; 4032 *zExpPtr = 1 - shiftCount; 4033 4034 } 4035 4036 /*---------------------------------------------------------------------------- 4037 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4038 | and significand `zSig', and returns the proper single-precision floating- 4039 | point value corresponding to the abstract input. Ordinarily, the abstract 4040 | value is simply rounded and packed into the single-precision format, with 4041 | the inexact exception raised if the abstract input cannot be represented 4042 | exactly. However, if the abstract value is too large, the overflow and 4043 | inexact exceptions are raised and an infinity or maximal finite value is 4044 | returned. If the abstract value is too small, the input value is rounded to 4045 | a subnormal number, and the underflow and inexact exceptions are raised if 4046 | the abstract input cannot be represented exactly as a subnormal single- 4047 | precision floating-point number. 4048 | The input significand `zSig' has its binary point between bits 30 4049 | and 29, which is 7 bits to the left of the usual location. This shifted 4050 | significand must be normalized or smaller. If `zSig' is not normalized, 4051 | `zExp' must be 0; in that case, the result returned is a subnormal number, 4052 | and it must not require rounding. In the usual case that `zSig' is 4053 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 4054 | The handling of underflow and overflow follows the IEC/IEEE Standard for 4055 | Binary Floating-Point Arithmetic. 4056 *----------------------------------------------------------------------------*/ 4057 4058 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, 4059 float_status *status) 4060 { 4061 int8_t roundingMode; 4062 bool roundNearestEven; 4063 int8_t roundIncrement, roundBits; 4064 bool isTiny; 4065 4066 roundingMode = status->float_rounding_mode; 4067 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4068 switch (roundingMode) { 4069 case float_round_nearest_even: 4070 case float_round_ties_away: 4071 roundIncrement = 0x40; 4072 break; 4073 case float_round_to_zero: 4074 roundIncrement = 0; 4075 break; 4076 case float_round_up: 4077 roundIncrement = zSign ? 0 : 0x7f; 4078 break; 4079 case float_round_down: 4080 roundIncrement = zSign ? 0x7f : 0; 4081 break; 4082 case float_round_to_odd: 4083 roundIncrement = zSig & 0x80 ? 0 : 0x7f; 4084 break; 4085 default: 4086 abort(); 4087 break; 4088 } 4089 roundBits = zSig & 0x7F; 4090 if ( 0xFD <= (uint16_t) zExp ) { 4091 if ( ( 0xFD < zExp ) 4092 || ( ( zExp == 0xFD ) 4093 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) 4094 ) { 4095 bool overflow_to_inf = roundingMode != float_round_to_odd && 4096 roundIncrement != 0; 4097 float_raise(float_flag_overflow | float_flag_inexact, status); 4098 return packFloat32(zSign, 0xFF, -!overflow_to_inf); 4099 } 4100 if ( zExp < 0 ) { 4101 if (status->flush_to_zero) { 4102 float_raise(float_flag_output_denormal, status); 4103 return packFloat32(zSign, 0, 0); 4104 } 4105 isTiny = status->tininess_before_rounding 4106 || (zExp < -1) 4107 || (zSig + roundIncrement < 0x80000000); 4108 shift32RightJamming( zSig, - zExp, &zSig ); 4109 zExp = 0; 4110 roundBits = zSig & 0x7F; 4111 if (isTiny && roundBits) { 4112 float_raise(float_flag_underflow, status); 4113 } 4114 if (roundingMode == float_round_to_odd) { 4115 /* 4116 * For round-to-odd case, the roundIncrement depends on 4117 * zSig which just changed. 4118 */ 4119 roundIncrement = zSig & 0x80 ? 0 : 0x7f; 4120 } 4121 } 4122 } 4123 if (roundBits) { 4124 float_raise(float_flag_inexact, status); 4125 } 4126 zSig = ( zSig + roundIncrement )>>7; 4127 if (!(roundBits ^ 0x40) && roundNearestEven) { 4128 zSig &= ~1; 4129 } 4130 if ( zSig == 0 ) zExp = 0; 4131 return packFloat32( zSign, zExp, zSig ); 4132 4133 } 4134 4135 /*---------------------------------------------------------------------------- 4136 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4137 | and significand `zSig', and returns the proper single-precision floating- 4138 | point value corresponding to the abstract input. This routine is just like 4139 | `roundAndPackFloat32' except that `zSig' does not have to be normalized. 4140 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 4141 | floating-point exponent. 4142 *----------------------------------------------------------------------------*/ 4143 4144 static float32 4145 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, 4146 float_status *status) 4147 { 4148 int8_t shiftCount; 4149 4150 shiftCount = clz32(zSig) - 1; 4151 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, 4152 status); 4153 4154 } 4155 4156 /*---------------------------------------------------------------------------- 4157 | Normalizes the subnormal double-precision floating-point value represented 4158 | by the denormalized significand `aSig'. The normalized exponent and 4159 | significand are stored at the locations pointed to by `zExpPtr' and 4160 | `zSigPtr', respectively. 4161 *----------------------------------------------------------------------------*/ 4162 4163 static void 4164 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr) 4165 { 4166 int8_t shiftCount; 4167 4168 shiftCount = clz64(aSig) - 11; 4169 *zSigPtr = aSig<<shiftCount; 4170 *zExpPtr = 1 - shiftCount; 4171 4172 } 4173 4174 /*---------------------------------------------------------------------------- 4175 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 4176 | double-precision floating-point value, returning the result. After being 4177 | shifted into the proper positions, the three fields are simply added 4178 | together to form the result. This means that any integer portion of `zSig' 4179 | will be added into the exponent. Since a properly normalized significand 4180 | will have an integer portion equal to 1, the `zExp' input should be 1 less 4181 | than the desired result exponent whenever `zSig' is a complete, normalized 4182 | significand. 4183 *----------------------------------------------------------------------------*/ 4184 4185 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig) 4186 { 4187 4188 return make_float64( 4189 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig); 4190 4191 } 4192 4193 /*---------------------------------------------------------------------------- 4194 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4195 | and significand `zSig', and returns the proper double-precision floating- 4196 | point value corresponding to the abstract input. Ordinarily, the abstract 4197 | value is simply rounded and packed into the double-precision format, with 4198 | the inexact exception raised if the abstract input cannot be represented 4199 | exactly. However, if the abstract value is too large, the overflow and 4200 | inexact exceptions are raised and an infinity or maximal finite value is 4201 | returned. If the abstract value is too small, the input value is rounded to 4202 | a subnormal number, and the underflow and inexact exceptions are raised if 4203 | the abstract input cannot be represented exactly as a subnormal double- 4204 | precision floating-point number. 4205 | The input significand `zSig' has its binary point between bits 62 4206 | and 61, which is 10 bits to the left of the usual location. This shifted 4207 | significand must be normalized or smaller. If `zSig' is not normalized, 4208 | `zExp' must be 0; in that case, the result returned is a subnormal number, 4209 | and it must not require rounding. In the usual case that `zSig' is 4210 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 4211 | The handling of underflow and overflow follows the IEC/IEEE Standard for 4212 | Binary Floating-Point Arithmetic. 4213 *----------------------------------------------------------------------------*/ 4214 4215 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, 4216 float_status *status) 4217 { 4218 int8_t roundingMode; 4219 bool roundNearestEven; 4220 int roundIncrement, roundBits; 4221 bool isTiny; 4222 4223 roundingMode = status->float_rounding_mode; 4224 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4225 switch (roundingMode) { 4226 case float_round_nearest_even: 4227 case float_round_ties_away: 4228 roundIncrement = 0x200; 4229 break; 4230 case float_round_to_zero: 4231 roundIncrement = 0; 4232 break; 4233 case float_round_up: 4234 roundIncrement = zSign ? 0 : 0x3ff; 4235 break; 4236 case float_round_down: 4237 roundIncrement = zSign ? 0x3ff : 0; 4238 break; 4239 case float_round_to_odd: 4240 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; 4241 break; 4242 default: 4243 abort(); 4244 } 4245 roundBits = zSig & 0x3FF; 4246 if ( 0x7FD <= (uint16_t) zExp ) { 4247 if ( ( 0x7FD < zExp ) 4248 || ( ( zExp == 0x7FD ) 4249 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) 4250 ) { 4251 bool overflow_to_inf = roundingMode != float_round_to_odd && 4252 roundIncrement != 0; 4253 float_raise(float_flag_overflow | float_flag_inexact, status); 4254 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf)); 4255 } 4256 if ( zExp < 0 ) { 4257 if (status->flush_to_zero) { 4258 float_raise(float_flag_output_denormal, status); 4259 return packFloat64(zSign, 0, 0); 4260 } 4261 isTiny = status->tininess_before_rounding 4262 || (zExp < -1) 4263 || (zSig + roundIncrement < UINT64_C(0x8000000000000000)); 4264 shift64RightJamming( zSig, - zExp, &zSig ); 4265 zExp = 0; 4266 roundBits = zSig & 0x3FF; 4267 if (isTiny && roundBits) { 4268 float_raise(float_flag_underflow, status); 4269 } 4270 if (roundingMode == float_round_to_odd) { 4271 /* 4272 * For round-to-odd case, the roundIncrement depends on 4273 * zSig which just changed. 4274 */ 4275 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; 4276 } 4277 } 4278 } 4279 if (roundBits) { 4280 float_raise(float_flag_inexact, status); 4281 } 4282 zSig = ( zSig + roundIncrement )>>10; 4283 if (!(roundBits ^ 0x200) && roundNearestEven) { 4284 zSig &= ~1; 4285 } 4286 if ( zSig == 0 ) zExp = 0; 4287 return packFloat64( zSign, zExp, zSig ); 4288 4289 } 4290 4291 /*---------------------------------------------------------------------------- 4292 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4293 | and significand `zSig', and returns the proper double-precision floating- 4294 | point value corresponding to the abstract input. This routine is just like 4295 | `roundAndPackFloat64' except that `zSig' does not have to be normalized. 4296 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 4297 | floating-point exponent. 4298 *----------------------------------------------------------------------------*/ 4299 4300 static float64 4301 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, 4302 float_status *status) 4303 { 4304 int8_t shiftCount; 4305 4306 shiftCount = clz64(zSig) - 1; 4307 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, 4308 status); 4309 4310 } 4311 4312 /*---------------------------------------------------------------------------- 4313 | Normalizes the subnormal extended double-precision floating-point value 4314 | represented by the denormalized significand `aSig'. The normalized exponent 4315 | and significand are stored at the locations pointed to by `zExpPtr' and 4316 | `zSigPtr', respectively. 4317 *----------------------------------------------------------------------------*/ 4318 4319 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, 4320 uint64_t *zSigPtr) 4321 { 4322 int8_t shiftCount; 4323 4324 shiftCount = clz64(aSig); 4325 *zSigPtr = aSig<<shiftCount; 4326 *zExpPtr = 1 - shiftCount; 4327 } 4328 4329 /*---------------------------------------------------------------------------- 4330 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4331 | and extended significand formed by the concatenation of `zSig0' and `zSig1', 4332 | and returns the proper extended double-precision floating-point value 4333 | corresponding to the abstract input. Ordinarily, the abstract value is 4334 | rounded and packed into the extended double-precision format, with the 4335 | inexact exception raised if the abstract input cannot be represented 4336 | exactly. However, if the abstract value is too large, the overflow and 4337 | inexact exceptions are raised and an infinity or maximal finite value is 4338 | returned. If the abstract value is too small, the input value is rounded to 4339 | a subnormal number, and the underflow and inexact exceptions are raised if 4340 | the abstract input cannot be represented exactly as a subnormal extended 4341 | double-precision floating-point number. 4342 | If `roundingPrecision' is 32 or 64, the result is rounded to the same 4343 | number of bits as single or double precision, respectively. Otherwise, the 4344 | result is rounded to the full precision of the extended double-precision 4345 | format. 4346 | The input significand must be normalized or smaller. If the input 4347 | significand is not normalized, `zExp' must be 0; in that case, the result 4348 | returned is a subnormal number, and it must not require rounding. The 4349 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary 4350 | Floating-Point Arithmetic. 4351 *----------------------------------------------------------------------------*/ 4352 4353 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, 4354 int32_t zExp, uint64_t zSig0, uint64_t zSig1, 4355 float_status *status) 4356 { 4357 int8_t roundingMode; 4358 bool roundNearestEven, increment, isTiny; 4359 int64_t roundIncrement, roundMask, roundBits; 4360 4361 roundingMode = status->float_rounding_mode; 4362 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4363 if ( roundingPrecision == 80 ) goto precision80; 4364 if ( roundingPrecision == 64 ) { 4365 roundIncrement = UINT64_C(0x0000000000000400); 4366 roundMask = UINT64_C(0x00000000000007FF); 4367 } 4368 else if ( roundingPrecision == 32 ) { 4369 roundIncrement = UINT64_C(0x0000008000000000); 4370 roundMask = UINT64_C(0x000000FFFFFFFFFF); 4371 } 4372 else { 4373 goto precision80; 4374 } 4375 zSig0 |= ( zSig1 != 0 ); 4376 switch (roundingMode) { 4377 case float_round_nearest_even: 4378 case float_round_ties_away: 4379 break; 4380 case float_round_to_zero: 4381 roundIncrement = 0; 4382 break; 4383 case float_round_up: 4384 roundIncrement = zSign ? 0 : roundMask; 4385 break; 4386 case float_round_down: 4387 roundIncrement = zSign ? roundMask : 0; 4388 break; 4389 default: 4390 abort(); 4391 } 4392 roundBits = zSig0 & roundMask; 4393 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 4394 if ( ( 0x7FFE < zExp ) 4395 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 4396 ) { 4397 goto overflow; 4398 } 4399 if ( zExp <= 0 ) { 4400 if (status->flush_to_zero) { 4401 float_raise(float_flag_output_denormal, status); 4402 return packFloatx80(zSign, 0, 0); 4403 } 4404 isTiny = status->tininess_before_rounding 4405 || (zExp < 0 ) 4406 || (zSig0 <= zSig0 + roundIncrement); 4407 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 4408 zExp = 0; 4409 roundBits = zSig0 & roundMask; 4410 if (isTiny && roundBits) { 4411 float_raise(float_flag_underflow, status); 4412 } 4413 if (roundBits) { 4414 float_raise(float_flag_inexact, status); 4415 } 4416 zSig0 += roundIncrement; 4417 if ( (int64_t) zSig0 < 0 ) zExp = 1; 4418 roundIncrement = roundMask + 1; 4419 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 4420 roundMask |= roundIncrement; 4421 } 4422 zSig0 &= ~ roundMask; 4423 return packFloatx80( zSign, zExp, zSig0 ); 4424 } 4425 } 4426 if (roundBits) { 4427 float_raise(float_flag_inexact, status); 4428 } 4429 zSig0 += roundIncrement; 4430 if ( zSig0 < roundIncrement ) { 4431 ++zExp; 4432 zSig0 = UINT64_C(0x8000000000000000); 4433 } 4434 roundIncrement = roundMask + 1; 4435 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 4436 roundMask |= roundIncrement; 4437 } 4438 zSig0 &= ~ roundMask; 4439 if ( zSig0 == 0 ) zExp = 0; 4440 return packFloatx80( zSign, zExp, zSig0 ); 4441 precision80: 4442 switch (roundingMode) { 4443 case float_round_nearest_even: 4444 case float_round_ties_away: 4445 increment = ((int64_t)zSig1 < 0); 4446 break; 4447 case float_round_to_zero: 4448 increment = 0; 4449 break; 4450 case float_round_up: 4451 increment = !zSign && zSig1; 4452 break; 4453 case float_round_down: 4454 increment = zSign && zSig1; 4455 break; 4456 default: 4457 abort(); 4458 } 4459 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 4460 if ( ( 0x7FFE < zExp ) 4461 || ( ( zExp == 0x7FFE ) 4462 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) ) 4463 && increment 4464 ) 4465 ) { 4466 roundMask = 0; 4467 overflow: 4468 float_raise(float_flag_overflow | float_flag_inexact, status); 4469 if ( ( roundingMode == float_round_to_zero ) 4470 || ( zSign && ( roundingMode == float_round_up ) ) 4471 || ( ! zSign && ( roundingMode == float_round_down ) ) 4472 ) { 4473 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 4474 } 4475 return packFloatx80(zSign, 4476 floatx80_infinity_high, 4477 floatx80_infinity_low); 4478 } 4479 if ( zExp <= 0 ) { 4480 isTiny = status->tininess_before_rounding 4481 || (zExp < 0) 4482 || !increment 4483 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF)); 4484 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 4485 zExp = 0; 4486 if (isTiny && zSig1) { 4487 float_raise(float_flag_underflow, status); 4488 } 4489 if (zSig1) { 4490 float_raise(float_flag_inexact, status); 4491 } 4492 switch (roundingMode) { 4493 case float_round_nearest_even: 4494 case float_round_ties_away: 4495 increment = ((int64_t)zSig1 < 0); 4496 break; 4497 case float_round_to_zero: 4498 increment = 0; 4499 break; 4500 case float_round_up: 4501 increment = !zSign && zSig1; 4502 break; 4503 case float_round_down: 4504 increment = zSign && zSig1; 4505 break; 4506 default: 4507 abort(); 4508 } 4509 if ( increment ) { 4510 ++zSig0; 4511 if (!(zSig1 << 1) && roundNearestEven) { 4512 zSig0 &= ~1; 4513 } 4514 if ( (int64_t) zSig0 < 0 ) zExp = 1; 4515 } 4516 return packFloatx80( zSign, zExp, zSig0 ); 4517 } 4518 } 4519 if (zSig1) { 4520 float_raise(float_flag_inexact, status); 4521 } 4522 if ( increment ) { 4523 ++zSig0; 4524 if ( zSig0 == 0 ) { 4525 ++zExp; 4526 zSig0 = UINT64_C(0x8000000000000000); 4527 } 4528 else { 4529 if (!(zSig1 << 1) && roundNearestEven) { 4530 zSig0 &= ~1; 4531 } 4532 } 4533 } 4534 else { 4535 if ( zSig0 == 0 ) zExp = 0; 4536 } 4537 return packFloatx80( zSign, zExp, zSig0 ); 4538 4539 } 4540 4541 /*---------------------------------------------------------------------------- 4542 | Takes an abstract floating-point value having sign `zSign', exponent 4543 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 4544 | and returns the proper extended double-precision floating-point value 4545 | corresponding to the abstract input. This routine is just like 4546 | `roundAndPackFloatx80' except that the input significand does not have to be 4547 | normalized. 4548 *----------------------------------------------------------------------------*/ 4549 4550 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, 4551 bool zSign, int32_t zExp, 4552 uint64_t zSig0, uint64_t zSig1, 4553 float_status *status) 4554 { 4555 int8_t shiftCount; 4556 4557 if ( zSig0 == 0 ) { 4558 zSig0 = zSig1; 4559 zSig1 = 0; 4560 zExp -= 64; 4561 } 4562 shiftCount = clz64(zSig0); 4563 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 4564 zExp -= shiftCount; 4565 return roundAndPackFloatx80(roundingPrecision, zSign, zExp, 4566 zSig0, zSig1, status); 4567 4568 } 4569 4570 /*---------------------------------------------------------------------------- 4571 | Returns the least-significant 64 fraction bits of the quadruple-precision 4572 | floating-point value `a'. 4573 *----------------------------------------------------------------------------*/ 4574 4575 static inline uint64_t extractFloat128Frac1( float128 a ) 4576 { 4577 4578 return a.low; 4579 4580 } 4581 4582 /*---------------------------------------------------------------------------- 4583 | Returns the most-significant 48 fraction bits of the quadruple-precision 4584 | floating-point value `a'. 4585 *----------------------------------------------------------------------------*/ 4586 4587 static inline uint64_t extractFloat128Frac0( float128 a ) 4588 { 4589 4590 return a.high & UINT64_C(0x0000FFFFFFFFFFFF); 4591 4592 } 4593 4594 /*---------------------------------------------------------------------------- 4595 | Returns the exponent bits of the quadruple-precision floating-point value 4596 | `a'. 4597 *----------------------------------------------------------------------------*/ 4598 4599 static inline int32_t extractFloat128Exp( float128 a ) 4600 { 4601 4602 return ( a.high>>48 ) & 0x7FFF; 4603 4604 } 4605 4606 /*---------------------------------------------------------------------------- 4607 | Returns the sign bit of the quadruple-precision floating-point value `a'. 4608 *----------------------------------------------------------------------------*/ 4609 4610 static inline bool extractFloat128Sign(float128 a) 4611 { 4612 return a.high >> 63; 4613 } 4614 4615 /*---------------------------------------------------------------------------- 4616 | Normalizes the subnormal quadruple-precision floating-point value 4617 | represented by the denormalized significand formed by the concatenation of 4618 | `aSig0' and `aSig1'. The normalized exponent is stored at the location 4619 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized 4620 | significand are stored at the location pointed to by `zSig0Ptr', and the 4621 | least significant 64 bits of the normalized significand are stored at the 4622 | location pointed to by `zSig1Ptr'. 4623 *----------------------------------------------------------------------------*/ 4624 4625 static void 4626 normalizeFloat128Subnormal( 4627 uint64_t aSig0, 4628 uint64_t aSig1, 4629 int32_t *zExpPtr, 4630 uint64_t *zSig0Ptr, 4631 uint64_t *zSig1Ptr 4632 ) 4633 { 4634 int8_t shiftCount; 4635 4636 if ( aSig0 == 0 ) { 4637 shiftCount = clz64(aSig1) - 15; 4638 if ( shiftCount < 0 ) { 4639 *zSig0Ptr = aSig1>>( - shiftCount ); 4640 *zSig1Ptr = aSig1<<( shiftCount & 63 ); 4641 } 4642 else { 4643 *zSig0Ptr = aSig1<<shiftCount; 4644 *zSig1Ptr = 0; 4645 } 4646 *zExpPtr = - shiftCount - 63; 4647 } 4648 else { 4649 shiftCount = clz64(aSig0) - 15; 4650 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 4651 *zExpPtr = 1 - shiftCount; 4652 } 4653 4654 } 4655 4656 /*---------------------------------------------------------------------------- 4657 | Packs the sign `zSign', the exponent `zExp', and the significand formed 4658 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision 4659 | floating-point value, returning the result. After being shifted into the 4660 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply 4661 | added together to form the most significant 32 bits of the result. This 4662 | means that any integer portion of `zSig0' will be added into the exponent. 4663 | Since a properly normalized significand will have an integer portion equal 4664 | to 1, the `zExp' input should be 1 less than the desired result exponent 4665 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized 4666 | significand. 4667 *----------------------------------------------------------------------------*/ 4668 4669 static inline float128 4670 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1) 4671 { 4672 float128 z; 4673 4674 z.low = zSig1; 4675 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0; 4676 return z; 4677 } 4678 4679 /*---------------------------------------------------------------------------- 4680 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4681 | and extended significand formed by the concatenation of `zSig0', `zSig1', 4682 | and `zSig2', and returns the proper quadruple-precision floating-point value 4683 | corresponding to the abstract input. Ordinarily, the abstract value is 4684 | simply rounded and packed into the quadruple-precision format, with the 4685 | inexact exception raised if the abstract input cannot be represented 4686 | exactly. However, if the abstract value is too large, the overflow and 4687 | inexact exceptions are raised and an infinity or maximal finite value is 4688 | returned. If the abstract value is too small, the input value is rounded to 4689 | a subnormal number, and the underflow and inexact exceptions are raised if 4690 | the abstract input cannot be represented exactly as a subnormal quadruple- 4691 | precision floating-point number. 4692 | The input significand must be normalized or smaller. If the input 4693 | significand is not normalized, `zExp' must be 0; in that case, the result 4694 | returned is a subnormal number, and it must not require rounding. In the 4695 | usual case that the input significand is normalized, `zExp' must be 1 less 4696 | than the ``true'' floating-point exponent. The handling of underflow and 4697 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4698 *----------------------------------------------------------------------------*/ 4699 4700 static float128 roundAndPackFloat128(bool zSign, int32_t zExp, 4701 uint64_t zSig0, uint64_t zSig1, 4702 uint64_t zSig2, float_status *status) 4703 { 4704 int8_t roundingMode; 4705 bool roundNearestEven, increment, isTiny; 4706 4707 roundingMode = status->float_rounding_mode; 4708 roundNearestEven = ( roundingMode == float_round_nearest_even ); 4709 switch (roundingMode) { 4710 case float_round_nearest_even: 4711 case float_round_ties_away: 4712 increment = ((int64_t)zSig2 < 0); 4713 break; 4714 case float_round_to_zero: 4715 increment = 0; 4716 break; 4717 case float_round_up: 4718 increment = !zSign && zSig2; 4719 break; 4720 case float_round_down: 4721 increment = zSign && zSig2; 4722 break; 4723 case float_round_to_odd: 4724 increment = !(zSig1 & 0x1) && zSig2; 4725 break; 4726 default: 4727 abort(); 4728 } 4729 if ( 0x7FFD <= (uint32_t) zExp ) { 4730 if ( ( 0x7FFD < zExp ) 4731 || ( ( zExp == 0x7FFD ) 4732 && eq128( 4733 UINT64_C(0x0001FFFFFFFFFFFF), 4734 UINT64_C(0xFFFFFFFFFFFFFFFF), 4735 zSig0, 4736 zSig1 4737 ) 4738 && increment 4739 ) 4740 ) { 4741 float_raise(float_flag_overflow | float_flag_inexact, status); 4742 if ( ( roundingMode == float_round_to_zero ) 4743 || ( zSign && ( roundingMode == float_round_up ) ) 4744 || ( ! zSign && ( roundingMode == float_round_down ) ) 4745 || (roundingMode == float_round_to_odd) 4746 ) { 4747 return 4748 packFloat128( 4749 zSign, 4750 0x7FFE, 4751 UINT64_C(0x0000FFFFFFFFFFFF), 4752 UINT64_C(0xFFFFFFFFFFFFFFFF) 4753 ); 4754 } 4755 return packFloat128( zSign, 0x7FFF, 0, 0 ); 4756 } 4757 if ( zExp < 0 ) { 4758 if (status->flush_to_zero) { 4759 float_raise(float_flag_output_denormal, status); 4760 return packFloat128(zSign, 0, 0, 0); 4761 } 4762 isTiny = status->tininess_before_rounding 4763 || (zExp < -1) 4764 || !increment 4765 || lt128(zSig0, zSig1, 4766 UINT64_C(0x0001FFFFFFFFFFFF), 4767 UINT64_C(0xFFFFFFFFFFFFFFFF)); 4768 shift128ExtraRightJamming( 4769 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 4770 zExp = 0; 4771 if (isTiny && zSig2) { 4772 float_raise(float_flag_underflow, status); 4773 } 4774 switch (roundingMode) { 4775 case float_round_nearest_even: 4776 case float_round_ties_away: 4777 increment = ((int64_t)zSig2 < 0); 4778 break; 4779 case float_round_to_zero: 4780 increment = 0; 4781 break; 4782 case float_round_up: 4783 increment = !zSign && zSig2; 4784 break; 4785 case float_round_down: 4786 increment = zSign && zSig2; 4787 break; 4788 case float_round_to_odd: 4789 increment = !(zSig1 & 0x1) && zSig2; 4790 break; 4791 default: 4792 abort(); 4793 } 4794 } 4795 } 4796 if (zSig2) { 4797 float_raise(float_flag_inexact, status); 4798 } 4799 if ( increment ) { 4800 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 4801 if ((zSig2 + zSig2 == 0) && roundNearestEven) { 4802 zSig1 &= ~1; 4803 } 4804 } 4805 else { 4806 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 4807 } 4808 return packFloat128( zSign, zExp, zSig0, zSig1 ); 4809 4810 } 4811 4812 /*---------------------------------------------------------------------------- 4813 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4814 | and significand formed by the concatenation of `zSig0' and `zSig1', and 4815 | returns the proper quadruple-precision floating-point value corresponding 4816 | to the abstract input. This routine is just like `roundAndPackFloat128' 4817 | except that the input significand has fewer bits and does not have to be 4818 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 4819 | point exponent. 4820 *----------------------------------------------------------------------------*/ 4821 4822 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp, 4823 uint64_t zSig0, uint64_t zSig1, 4824 float_status *status) 4825 { 4826 int8_t shiftCount; 4827 uint64_t zSig2; 4828 4829 if ( zSig0 == 0 ) { 4830 zSig0 = zSig1; 4831 zSig1 = 0; 4832 zExp -= 64; 4833 } 4834 shiftCount = clz64(zSig0) - 15; 4835 if ( 0 <= shiftCount ) { 4836 zSig2 = 0; 4837 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 4838 } 4839 else { 4840 shift128ExtraRightJamming( 4841 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 4842 } 4843 zExp -= shiftCount; 4844 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); 4845 4846 } 4847 4848 4849 /*---------------------------------------------------------------------------- 4850 | Returns the result of converting the 32-bit two's complement integer `a' 4851 | to the extended double-precision floating-point format. The conversion 4852 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4853 | Arithmetic. 4854 *----------------------------------------------------------------------------*/ 4855 4856 floatx80 int32_to_floatx80(int32_t a, float_status *status) 4857 { 4858 bool zSign; 4859 uint32_t absA; 4860 int8_t shiftCount; 4861 uint64_t zSig; 4862 4863 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 4864 zSign = ( a < 0 ); 4865 absA = zSign ? - a : a; 4866 shiftCount = clz32(absA) + 32; 4867 zSig = absA; 4868 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 4869 4870 } 4871 4872 /*---------------------------------------------------------------------------- 4873 | Returns the result of converting the 64-bit two's complement integer `a' 4874 | to the extended double-precision floating-point format. The conversion 4875 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4876 | Arithmetic. 4877 *----------------------------------------------------------------------------*/ 4878 4879 floatx80 int64_to_floatx80(int64_t a, float_status *status) 4880 { 4881 bool zSign; 4882 uint64_t absA; 4883 int8_t shiftCount; 4884 4885 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 4886 zSign = ( a < 0 ); 4887 absA = zSign ? - a : a; 4888 shiftCount = clz64(absA); 4889 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 4890 4891 } 4892 4893 /*---------------------------------------------------------------------------- 4894 | Returns the result of converting the single-precision floating-point value 4895 | `a' to the extended double-precision floating-point format. The conversion 4896 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 4897 | Arithmetic. 4898 *----------------------------------------------------------------------------*/ 4899 4900 floatx80 float32_to_floatx80(float32 a, float_status *status) 4901 { 4902 bool aSign; 4903 int aExp; 4904 uint32_t aSig; 4905 4906 a = float32_squash_input_denormal(a, status); 4907 aSig = extractFloat32Frac( a ); 4908 aExp = extractFloat32Exp( a ); 4909 aSign = extractFloat32Sign( a ); 4910 if ( aExp == 0xFF ) { 4911 if (aSig) { 4912 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status), 4913 status); 4914 return floatx80_silence_nan(res, status); 4915 } 4916 return packFloatx80(aSign, 4917 floatx80_infinity_high, 4918 floatx80_infinity_low); 4919 } 4920 if ( aExp == 0 ) { 4921 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 4922 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 4923 } 4924 aSig |= 0x00800000; 4925 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 ); 4926 4927 } 4928 4929 /*---------------------------------------------------------------------------- 4930 | Returns the remainder of the single-precision floating-point value `a' 4931 | with respect to the corresponding value `b'. The operation is performed 4932 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4933 *----------------------------------------------------------------------------*/ 4934 4935 float32 float32_rem(float32 a, float32 b, float_status *status) 4936 { 4937 bool aSign, zSign; 4938 int aExp, bExp, expDiff; 4939 uint32_t aSig, bSig; 4940 uint32_t q; 4941 uint64_t aSig64, bSig64, q64; 4942 uint32_t alternateASig; 4943 int32_t sigMean; 4944 a = float32_squash_input_denormal(a, status); 4945 b = float32_squash_input_denormal(b, status); 4946 4947 aSig = extractFloat32Frac( a ); 4948 aExp = extractFloat32Exp( a ); 4949 aSign = extractFloat32Sign( a ); 4950 bSig = extractFloat32Frac( b ); 4951 bExp = extractFloat32Exp( b ); 4952 if ( aExp == 0xFF ) { 4953 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 4954 return propagateFloat32NaN(a, b, status); 4955 } 4956 float_raise(float_flag_invalid, status); 4957 return float32_default_nan(status); 4958 } 4959 if ( bExp == 0xFF ) { 4960 if (bSig) { 4961 return propagateFloat32NaN(a, b, status); 4962 } 4963 return a; 4964 } 4965 if ( bExp == 0 ) { 4966 if ( bSig == 0 ) { 4967 float_raise(float_flag_invalid, status); 4968 return float32_default_nan(status); 4969 } 4970 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 4971 } 4972 if ( aExp == 0 ) { 4973 if ( aSig == 0 ) return a; 4974 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 4975 } 4976 expDiff = aExp - bExp; 4977 aSig |= 0x00800000; 4978 bSig |= 0x00800000; 4979 if ( expDiff < 32 ) { 4980 aSig <<= 8; 4981 bSig <<= 8; 4982 if ( expDiff < 0 ) { 4983 if ( expDiff < -1 ) return a; 4984 aSig >>= 1; 4985 } 4986 q = ( bSig <= aSig ); 4987 if ( q ) aSig -= bSig; 4988 if ( 0 < expDiff ) { 4989 q = ( ( (uint64_t) aSig )<<32 ) / bSig; 4990 q >>= 32 - expDiff; 4991 bSig >>= 2; 4992 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 4993 } 4994 else { 4995 aSig >>= 2; 4996 bSig >>= 2; 4997 } 4998 } 4999 else { 5000 if ( bSig <= aSig ) aSig -= bSig; 5001 aSig64 = ( (uint64_t) aSig )<<40; 5002 bSig64 = ( (uint64_t) bSig )<<40; 5003 expDiff -= 64; 5004 while ( 0 < expDiff ) { 5005 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 5006 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 5007 aSig64 = - ( ( bSig * q64 )<<38 ); 5008 expDiff -= 62; 5009 } 5010 expDiff += 64; 5011 q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 5012 q64 = ( 2 < q64 ) ? q64 - 2 : 0; 5013 q = q64>>( 64 - expDiff ); 5014 bSig <<= 6; 5015 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 5016 } 5017 do { 5018 alternateASig = aSig; 5019 ++q; 5020 aSig -= bSig; 5021 } while ( 0 <= (int32_t) aSig ); 5022 sigMean = aSig + alternateASig; 5023 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 5024 aSig = alternateASig; 5025 } 5026 zSign = ( (int32_t) aSig < 0 ); 5027 if ( zSign ) aSig = - aSig; 5028 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status); 5029 } 5030 5031 5032 5033 /*---------------------------------------------------------------------------- 5034 | Returns the binary exponential of the single-precision floating-point value 5035 | `a'. The operation is performed according to the IEC/IEEE Standard for 5036 | Binary Floating-Point Arithmetic. 5037 | 5038 | Uses the following identities: 5039 | 5040 | 1. ------------------------------------------------------------------------- 5041 | x x*ln(2) 5042 | 2 = e 5043 | 5044 | 2. ------------------------------------------------------------------------- 5045 | 2 3 4 5 n 5046 | x x x x x x x 5047 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ... 5048 | 1! 2! 3! 4! 5! n! 5049 *----------------------------------------------------------------------------*/ 5050 5051 static const float64 float32_exp2_coefficients[15] = 5052 { 5053 const_float64( 0x3ff0000000000000ll ), /* 1 */ 5054 const_float64( 0x3fe0000000000000ll ), /* 2 */ 5055 const_float64( 0x3fc5555555555555ll ), /* 3 */ 5056 const_float64( 0x3fa5555555555555ll ), /* 4 */ 5057 const_float64( 0x3f81111111111111ll ), /* 5 */ 5058 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ 5059 const_float64( 0x3f2a01a01a01a01all ), /* 7 */ 5060 const_float64( 0x3efa01a01a01a01all ), /* 8 */ 5061 const_float64( 0x3ec71de3a556c734ll ), /* 9 */ 5062 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ 5063 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ 5064 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ 5065 const_float64( 0x3de6124613a86d09ll ), /* 13 */ 5066 const_float64( 0x3da93974a8c07c9dll ), /* 14 */ 5067 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ 5068 }; 5069 5070 float32 float32_exp2(float32 a, float_status *status) 5071 { 5072 bool aSign; 5073 int aExp; 5074 uint32_t aSig; 5075 float64 r, x, xn; 5076 int i; 5077 a = float32_squash_input_denormal(a, status); 5078 5079 aSig = extractFloat32Frac( a ); 5080 aExp = extractFloat32Exp( a ); 5081 aSign = extractFloat32Sign( a ); 5082 5083 if ( aExp == 0xFF) { 5084 if (aSig) { 5085 return propagateFloat32NaN(a, float32_zero, status); 5086 } 5087 return (aSign) ? float32_zero : a; 5088 } 5089 if (aExp == 0) { 5090 if (aSig == 0) return float32_one; 5091 } 5092 5093 float_raise(float_flag_inexact, status); 5094 5095 /* ******************************* */ 5096 /* using float64 for approximation */ 5097 /* ******************************* */ 5098 x = float32_to_float64(a, status); 5099 x = float64_mul(x, float64_ln2, status); 5100 5101 xn = x; 5102 r = float64_one; 5103 for (i = 0 ; i < 15 ; i++) { 5104 float64 f; 5105 5106 f = float64_mul(xn, float32_exp2_coefficients[i], status); 5107 r = float64_add(r, f, status); 5108 5109 xn = float64_mul(xn, x, status); 5110 } 5111 5112 return float64_to_float32(r, status); 5113 } 5114 5115 /*---------------------------------------------------------------------------- 5116 | Returns the binary log of the single-precision floating-point value `a'. 5117 | The operation is performed according to the IEC/IEEE Standard for Binary 5118 | Floating-Point Arithmetic. 5119 *----------------------------------------------------------------------------*/ 5120 float32 float32_log2(float32 a, float_status *status) 5121 { 5122 bool aSign, zSign; 5123 int aExp; 5124 uint32_t aSig, zSig, i; 5125 5126 a = float32_squash_input_denormal(a, status); 5127 aSig = extractFloat32Frac( a ); 5128 aExp = extractFloat32Exp( a ); 5129 aSign = extractFloat32Sign( a ); 5130 5131 if ( aExp == 0 ) { 5132 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); 5133 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 5134 } 5135 if ( aSign ) { 5136 float_raise(float_flag_invalid, status); 5137 return float32_default_nan(status); 5138 } 5139 if ( aExp == 0xFF ) { 5140 if (aSig) { 5141 return propagateFloat32NaN(a, float32_zero, status); 5142 } 5143 return a; 5144 } 5145 5146 aExp -= 0x7F; 5147 aSig |= 0x00800000; 5148 zSign = aExp < 0; 5149 zSig = aExp << 23; 5150 5151 for (i = 1 << 22; i > 0; i >>= 1) { 5152 aSig = ( (uint64_t)aSig * aSig ) >> 23; 5153 if ( aSig & 0x01000000 ) { 5154 aSig >>= 1; 5155 zSig |= i; 5156 } 5157 } 5158 5159 if ( zSign ) 5160 zSig = -zSig; 5161 5162 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); 5163 } 5164 5165 /*---------------------------------------------------------------------------- 5166 | Returns the result of converting the double-precision floating-point value 5167 | `a' to the extended double-precision floating-point format. The conversion 5168 | is performed according to the IEC/IEEE Standard for Binary Floating-Point 5169 | Arithmetic. 5170 *----------------------------------------------------------------------------*/ 5171 5172 floatx80 float64_to_floatx80(float64 a, float_status *status) 5173 { 5174 bool aSign; 5175 int aExp; 5176 uint64_t aSig; 5177 5178 a = float64_squash_input_denormal(a, status); 5179 aSig = extractFloat64Frac( a ); 5180 aExp = extractFloat64Exp( a ); 5181 aSign = extractFloat64Sign( a ); 5182 if ( aExp == 0x7FF ) { 5183 if (aSig) { 5184 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), 5185 status); 5186 return floatx80_silence_nan(res, status); 5187 } 5188 return packFloatx80(aSign, 5189 floatx80_infinity_high, 5190 floatx80_infinity_low); 5191 } 5192 if ( aExp == 0 ) { 5193 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 5194 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5195 } 5196 return 5197 packFloatx80( 5198 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11); 5199 5200 } 5201 5202 /*---------------------------------------------------------------------------- 5203 | Returns the remainder of the double-precision floating-point value `a' 5204 | with respect to the corresponding value `b'. The operation is performed 5205 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5206 *----------------------------------------------------------------------------*/ 5207 5208 float64 float64_rem(float64 a, float64 b, float_status *status) 5209 { 5210 bool aSign, zSign; 5211 int aExp, bExp, expDiff; 5212 uint64_t aSig, bSig; 5213 uint64_t q, alternateASig; 5214 int64_t sigMean; 5215 5216 a = float64_squash_input_denormal(a, status); 5217 b = float64_squash_input_denormal(b, status); 5218 aSig = extractFloat64Frac( a ); 5219 aExp = extractFloat64Exp( a ); 5220 aSign = extractFloat64Sign( a ); 5221 bSig = extractFloat64Frac( b ); 5222 bExp = extractFloat64Exp( b ); 5223 if ( aExp == 0x7FF ) { 5224 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 5225 return propagateFloat64NaN(a, b, status); 5226 } 5227 float_raise(float_flag_invalid, status); 5228 return float64_default_nan(status); 5229 } 5230 if ( bExp == 0x7FF ) { 5231 if (bSig) { 5232 return propagateFloat64NaN(a, b, status); 5233 } 5234 return a; 5235 } 5236 if ( bExp == 0 ) { 5237 if ( bSig == 0 ) { 5238 float_raise(float_flag_invalid, status); 5239 return float64_default_nan(status); 5240 } 5241 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 5242 } 5243 if ( aExp == 0 ) { 5244 if ( aSig == 0 ) return a; 5245 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5246 } 5247 expDiff = aExp - bExp; 5248 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11; 5249 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11; 5250 if ( expDiff < 0 ) { 5251 if ( expDiff < -1 ) return a; 5252 aSig >>= 1; 5253 } 5254 q = ( bSig <= aSig ); 5255 if ( q ) aSig -= bSig; 5256 expDiff -= 64; 5257 while ( 0 < expDiff ) { 5258 q = estimateDiv128To64( aSig, 0, bSig ); 5259 q = ( 2 < q ) ? q - 2 : 0; 5260 aSig = - ( ( bSig>>2 ) * q ); 5261 expDiff -= 62; 5262 } 5263 expDiff += 64; 5264 if ( 0 < expDiff ) { 5265 q = estimateDiv128To64( aSig, 0, bSig ); 5266 q = ( 2 < q ) ? q - 2 : 0; 5267 q >>= 64 - expDiff; 5268 bSig >>= 2; 5269 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 5270 } 5271 else { 5272 aSig >>= 2; 5273 bSig >>= 2; 5274 } 5275 do { 5276 alternateASig = aSig; 5277 ++q; 5278 aSig -= bSig; 5279 } while ( 0 <= (int64_t) aSig ); 5280 sigMean = aSig + alternateASig; 5281 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 5282 aSig = alternateASig; 5283 } 5284 zSign = ( (int64_t) aSig < 0 ); 5285 if ( zSign ) aSig = - aSig; 5286 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status); 5287 5288 } 5289 5290 /*---------------------------------------------------------------------------- 5291 | Returns the binary log of the double-precision floating-point value `a'. 5292 | The operation is performed according to the IEC/IEEE Standard for Binary 5293 | Floating-Point Arithmetic. 5294 *----------------------------------------------------------------------------*/ 5295 float64 float64_log2(float64 a, float_status *status) 5296 { 5297 bool aSign, zSign; 5298 int aExp; 5299 uint64_t aSig, aSig0, aSig1, zSig, i; 5300 a = float64_squash_input_denormal(a, status); 5301 5302 aSig = extractFloat64Frac( a ); 5303 aExp = extractFloat64Exp( a ); 5304 aSign = extractFloat64Sign( a ); 5305 5306 if ( aExp == 0 ) { 5307 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); 5308 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 5309 } 5310 if ( aSign ) { 5311 float_raise(float_flag_invalid, status); 5312 return float64_default_nan(status); 5313 } 5314 if ( aExp == 0x7FF ) { 5315 if (aSig) { 5316 return propagateFloat64NaN(a, float64_zero, status); 5317 } 5318 return a; 5319 } 5320 5321 aExp -= 0x3FF; 5322 aSig |= UINT64_C(0x0010000000000000); 5323 zSign = aExp < 0; 5324 zSig = (uint64_t)aExp << 52; 5325 for (i = 1LL << 51; i > 0; i >>= 1) { 5326 mul64To128( aSig, aSig, &aSig0, &aSig1 ); 5327 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); 5328 if ( aSig & UINT64_C(0x0020000000000000) ) { 5329 aSig >>= 1; 5330 zSig |= i; 5331 } 5332 } 5333 5334 if ( zSign ) 5335 zSig = -zSig; 5336 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status); 5337 } 5338 5339 /*---------------------------------------------------------------------------- 5340 | Returns the result of converting the extended double-precision floating- 5341 | point value `a' to the 32-bit two's complement integer format. The 5342 | conversion is performed according to the IEC/IEEE Standard for Binary 5343 | Floating-Point Arithmetic---which means in particular that the conversion 5344 | is rounded according to the current rounding mode. If `a' is a NaN, the 5345 | largest positive integer is returned. Otherwise, if the conversion 5346 | overflows, the largest integer with the same sign as `a' is returned. 5347 *----------------------------------------------------------------------------*/ 5348 5349 int32_t floatx80_to_int32(floatx80 a, float_status *status) 5350 { 5351 bool aSign; 5352 int32_t aExp, shiftCount; 5353 uint64_t aSig; 5354 5355 if (floatx80_invalid_encoding(a)) { 5356 float_raise(float_flag_invalid, status); 5357 return 1 << 31; 5358 } 5359 aSig = extractFloatx80Frac( a ); 5360 aExp = extractFloatx80Exp( a ); 5361 aSign = extractFloatx80Sign( a ); 5362 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; 5363 shiftCount = 0x4037 - aExp; 5364 if ( shiftCount <= 0 ) shiftCount = 1; 5365 shift64RightJamming( aSig, shiftCount, &aSig ); 5366 return roundAndPackInt32(aSign, aSig, status); 5367 5368 } 5369 5370 /*---------------------------------------------------------------------------- 5371 | Returns the result of converting the extended double-precision floating- 5372 | point value `a' to the 32-bit two's complement integer format. The 5373 | conversion is performed according to the IEC/IEEE Standard for Binary 5374 | Floating-Point Arithmetic, except that the conversion is always rounded 5375 | toward zero. If `a' is a NaN, the largest positive integer is returned. 5376 | Otherwise, if the conversion overflows, the largest integer with the same 5377 | sign as `a' is returned. 5378 *----------------------------------------------------------------------------*/ 5379 5380 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) 5381 { 5382 bool aSign; 5383 int32_t aExp, shiftCount; 5384 uint64_t aSig, savedASig; 5385 int32_t z; 5386 5387 if (floatx80_invalid_encoding(a)) { 5388 float_raise(float_flag_invalid, status); 5389 return 1 << 31; 5390 } 5391 aSig = extractFloatx80Frac( a ); 5392 aExp = extractFloatx80Exp( a ); 5393 aSign = extractFloatx80Sign( a ); 5394 if ( 0x401E < aExp ) { 5395 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; 5396 goto invalid; 5397 } 5398 else if ( aExp < 0x3FFF ) { 5399 if (aExp || aSig) { 5400 float_raise(float_flag_inexact, status); 5401 } 5402 return 0; 5403 } 5404 shiftCount = 0x403E - aExp; 5405 savedASig = aSig; 5406 aSig >>= shiftCount; 5407 z = aSig; 5408 if ( aSign ) z = - z; 5409 if ( ( z < 0 ) ^ aSign ) { 5410 invalid: 5411 float_raise(float_flag_invalid, status); 5412 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; 5413 } 5414 if ( ( aSig<<shiftCount ) != savedASig ) { 5415 float_raise(float_flag_inexact, status); 5416 } 5417 return z; 5418 5419 } 5420 5421 /*---------------------------------------------------------------------------- 5422 | Returns the result of converting the extended double-precision floating- 5423 | point value `a' to the 64-bit two's complement integer format. The 5424 | conversion is performed according to the IEC/IEEE Standard for Binary 5425 | Floating-Point Arithmetic---which means in particular that the conversion 5426 | is rounded according to the current rounding mode. If `a' is a NaN, 5427 | the largest positive integer is returned. Otherwise, if the conversion 5428 | overflows, the largest integer with the same sign as `a' is returned. 5429 *----------------------------------------------------------------------------*/ 5430 5431 int64_t floatx80_to_int64(floatx80 a, float_status *status) 5432 { 5433 bool aSign; 5434 int32_t aExp, shiftCount; 5435 uint64_t aSig, aSigExtra; 5436 5437 if (floatx80_invalid_encoding(a)) { 5438 float_raise(float_flag_invalid, status); 5439 return 1ULL << 63; 5440 } 5441 aSig = extractFloatx80Frac( a ); 5442 aExp = extractFloatx80Exp( a ); 5443 aSign = extractFloatx80Sign( a ); 5444 shiftCount = 0x403E - aExp; 5445 if ( shiftCount <= 0 ) { 5446 if ( shiftCount ) { 5447 float_raise(float_flag_invalid, status); 5448 if (!aSign || floatx80_is_any_nan(a)) { 5449 return INT64_MAX; 5450 } 5451 return INT64_MIN; 5452 } 5453 aSigExtra = 0; 5454 } 5455 else { 5456 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 5457 } 5458 return roundAndPackInt64(aSign, aSig, aSigExtra, status); 5459 5460 } 5461 5462 /*---------------------------------------------------------------------------- 5463 | Returns the result of converting the extended double-precision floating- 5464 | point value `a' to the 64-bit two's complement integer format. The 5465 | conversion is performed according to the IEC/IEEE Standard for Binary 5466 | Floating-Point Arithmetic, except that the conversion is always rounded 5467 | toward zero. If `a' is a NaN, the largest positive integer is returned. 5468 | Otherwise, if the conversion overflows, the largest integer with the same 5469 | sign as `a' is returned. 5470 *----------------------------------------------------------------------------*/ 5471 5472 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) 5473 { 5474 bool aSign; 5475 int32_t aExp, shiftCount; 5476 uint64_t aSig; 5477 int64_t z; 5478 5479 if (floatx80_invalid_encoding(a)) { 5480 float_raise(float_flag_invalid, status); 5481 return 1ULL << 63; 5482 } 5483 aSig = extractFloatx80Frac( a ); 5484 aExp = extractFloatx80Exp( a ); 5485 aSign = extractFloatx80Sign( a ); 5486 shiftCount = aExp - 0x403E; 5487 if ( 0 <= shiftCount ) { 5488 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF); 5489 if ( ( a.high != 0xC03E ) || aSig ) { 5490 float_raise(float_flag_invalid, status); 5491 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 5492 return INT64_MAX; 5493 } 5494 } 5495 return INT64_MIN; 5496 } 5497 else if ( aExp < 0x3FFF ) { 5498 if (aExp | aSig) { 5499 float_raise(float_flag_inexact, status); 5500 } 5501 return 0; 5502 } 5503 z = aSig>>( - shiftCount ); 5504 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { 5505 float_raise(float_flag_inexact, status); 5506 } 5507 if ( aSign ) z = - z; 5508 return z; 5509 5510 } 5511 5512 /*---------------------------------------------------------------------------- 5513 | Returns the result of converting the extended double-precision floating- 5514 | point value `a' to the single-precision floating-point format. The 5515 | conversion is performed according to the IEC/IEEE Standard for Binary 5516 | Floating-Point Arithmetic. 5517 *----------------------------------------------------------------------------*/ 5518 5519 float32 floatx80_to_float32(floatx80 a, float_status *status) 5520 { 5521 bool aSign; 5522 int32_t aExp; 5523 uint64_t aSig; 5524 5525 if (floatx80_invalid_encoding(a)) { 5526 float_raise(float_flag_invalid, status); 5527 return float32_default_nan(status); 5528 } 5529 aSig = extractFloatx80Frac( a ); 5530 aExp = extractFloatx80Exp( a ); 5531 aSign = extractFloatx80Sign( a ); 5532 if ( aExp == 0x7FFF ) { 5533 if ( (uint64_t) ( aSig<<1 ) ) { 5534 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), 5535 status); 5536 return float32_silence_nan(res, status); 5537 } 5538 return packFloat32( aSign, 0xFF, 0 ); 5539 } 5540 shift64RightJamming( aSig, 33, &aSig ); 5541 if ( aExp || aSig ) aExp -= 0x3F81; 5542 return roundAndPackFloat32(aSign, aExp, aSig, status); 5543 5544 } 5545 5546 /*---------------------------------------------------------------------------- 5547 | Returns the result of converting the extended double-precision floating- 5548 | point value `a' to the double-precision floating-point format. The 5549 | conversion is performed according to the IEC/IEEE Standard for Binary 5550 | Floating-Point Arithmetic. 5551 *----------------------------------------------------------------------------*/ 5552 5553 float64 floatx80_to_float64(floatx80 a, float_status *status) 5554 { 5555 bool aSign; 5556 int32_t aExp; 5557 uint64_t aSig, zSig; 5558 5559 if (floatx80_invalid_encoding(a)) { 5560 float_raise(float_flag_invalid, status); 5561 return float64_default_nan(status); 5562 } 5563 aSig = extractFloatx80Frac( a ); 5564 aExp = extractFloatx80Exp( a ); 5565 aSign = extractFloatx80Sign( a ); 5566 if ( aExp == 0x7FFF ) { 5567 if ( (uint64_t) ( aSig<<1 ) ) { 5568 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), 5569 status); 5570 return float64_silence_nan(res, status); 5571 } 5572 return packFloat64( aSign, 0x7FF, 0 ); 5573 } 5574 shift64RightJamming( aSig, 1, &zSig ); 5575 if ( aExp || aSig ) aExp -= 0x3C01; 5576 return roundAndPackFloat64(aSign, aExp, zSig, status); 5577 5578 } 5579 5580 /*---------------------------------------------------------------------------- 5581 | Returns the result of converting the extended double-precision floating- 5582 | point value `a' to the quadruple-precision floating-point format. The 5583 | conversion is performed according to the IEC/IEEE Standard for Binary 5584 | Floating-Point Arithmetic. 5585 *----------------------------------------------------------------------------*/ 5586 5587 float128 floatx80_to_float128(floatx80 a, float_status *status) 5588 { 5589 bool aSign; 5590 int aExp; 5591 uint64_t aSig, zSig0, zSig1; 5592 5593 if (floatx80_invalid_encoding(a)) { 5594 float_raise(float_flag_invalid, status); 5595 return float128_default_nan(status); 5596 } 5597 aSig = extractFloatx80Frac( a ); 5598 aExp = extractFloatx80Exp( a ); 5599 aSign = extractFloatx80Sign( a ); 5600 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { 5601 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), 5602 status); 5603 return float128_silence_nan(res, status); 5604 } 5605 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 5606 return packFloat128( aSign, aExp, zSig0, zSig1 ); 5607 5608 } 5609 5610 /*---------------------------------------------------------------------------- 5611 | Rounds the extended double-precision floating-point value `a' 5612 | to the precision provided by floatx80_rounding_precision and returns the 5613 | result as an extended double-precision floating-point value. 5614 | The operation is performed according to the IEC/IEEE Standard for Binary 5615 | Floating-Point Arithmetic. 5616 *----------------------------------------------------------------------------*/ 5617 5618 floatx80 floatx80_round(floatx80 a, float_status *status) 5619 { 5620 return roundAndPackFloatx80(status->floatx80_rounding_precision, 5621 extractFloatx80Sign(a), 5622 extractFloatx80Exp(a), 5623 extractFloatx80Frac(a), 0, status); 5624 } 5625 5626 /*---------------------------------------------------------------------------- 5627 | Rounds the extended double-precision floating-point value `a' to an integer, 5628 | and returns the result as an extended quadruple-precision floating-point 5629 | value. The operation is performed according to the IEC/IEEE Standard for 5630 | Binary Floating-Point Arithmetic. 5631 *----------------------------------------------------------------------------*/ 5632 5633 floatx80 floatx80_round_to_int(floatx80 a, float_status *status) 5634 { 5635 bool aSign; 5636 int32_t aExp; 5637 uint64_t lastBitMask, roundBitsMask; 5638 floatx80 z; 5639 5640 if (floatx80_invalid_encoding(a)) { 5641 float_raise(float_flag_invalid, status); 5642 return floatx80_default_nan(status); 5643 } 5644 aExp = extractFloatx80Exp( a ); 5645 if ( 0x403E <= aExp ) { 5646 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { 5647 return propagateFloatx80NaN(a, a, status); 5648 } 5649 return a; 5650 } 5651 if ( aExp < 0x3FFF ) { 5652 if ( ( aExp == 0 ) 5653 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) { 5654 return a; 5655 } 5656 float_raise(float_flag_inexact, status); 5657 aSign = extractFloatx80Sign( a ); 5658 switch (status->float_rounding_mode) { 5659 case float_round_nearest_even: 5660 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) 5661 ) { 5662 return 5663 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000)); 5664 } 5665 break; 5666 case float_round_ties_away: 5667 if (aExp == 0x3FFE) { 5668 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000)); 5669 } 5670 break; 5671 case float_round_down: 5672 return 5673 aSign ? 5674 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000)) 5675 : packFloatx80( 0, 0, 0 ); 5676 case float_round_up: 5677 return 5678 aSign ? packFloatx80( 1, 0, 0 ) 5679 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000)); 5680 5681 case float_round_to_zero: 5682 break; 5683 default: 5684 g_assert_not_reached(); 5685 } 5686 return packFloatx80( aSign, 0, 0 ); 5687 } 5688 lastBitMask = 1; 5689 lastBitMask <<= 0x403E - aExp; 5690 roundBitsMask = lastBitMask - 1; 5691 z = a; 5692 switch (status->float_rounding_mode) { 5693 case float_round_nearest_even: 5694 z.low += lastBitMask>>1; 5695 if ((z.low & roundBitsMask) == 0) { 5696 z.low &= ~lastBitMask; 5697 } 5698 break; 5699 case float_round_ties_away: 5700 z.low += lastBitMask >> 1; 5701 break; 5702 case float_round_to_zero: 5703 break; 5704 case float_round_up: 5705 if (!extractFloatx80Sign(z)) { 5706 z.low += roundBitsMask; 5707 } 5708 break; 5709 case float_round_down: 5710 if (extractFloatx80Sign(z)) { 5711 z.low += roundBitsMask; 5712 } 5713 break; 5714 default: 5715 abort(); 5716 } 5717 z.low &= ~ roundBitsMask; 5718 if ( z.low == 0 ) { 5719 ++z.high; 5720 z.low = UINT64_C(0x8000000000000000); 5721 } 5722 if (z.low != a.low) { 5723 float_raise(float_flag_inexact, status); 5724 } 5725 return z; 5726 5727 } 5728 5729 /*---------------------------------------------------------------------------- 5730 | Returns the result of adding the absolute values of the extended double- 5731 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is 5732 | negated before being returned. `zSign' is ignored if the result is a NaN. 5733 | The addition is performed according to the IEC/IEEE Standard for Binary 5734 | Floating-Point Arithmetic. 5735 *----------------------------------------------------------------------------*/ 5736 5737 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, 5738 float_status *status) 5739 { 5740 int32_t aExp, bExp, zExp; 5741 uint64_t aSig, bSig, zSig0, zSig1; 5742 int32_t expDiff; 5743 5744 aSig = extractFloatx80Frac( a ); 5745 aExp = extractFloatx80Exp( a ); 5746 bSig = extractFloatx80Frac( b ); 5747 bExp = extractFloatx80Exp( b ); 5748 expDiff = aExp - bExp; 5749 if ( 0 < expDiff ) { 5750 if ( aExp == 0x7FFF ) { 5751 if ((uint64_t)(aSig << 1)) { 5752 return propagateFloatx80NaN(a, b, status); 5753 } 5754 return a; 5755 } 5756 if ( bExp == 0 ) --expDiff; 5757 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 5758 zExp = aExp; 5759 } 5760 else if ( expDiff < 0 ) { 5761 if ( bExp == 0x7FFF ) { 5762 if ((uint64_t)(bSig << 1)) { 5763 return propagateFloatx80NaN(a, b, status); 5764 } 5765 return packFloatx80(zSign, 5766 floatx80_infinity_high, 5767 floatx80_infinity_low); 5768 } 5769 if ( aExp == 0 ) ++expDiff; 5770 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 5771 zExp = bExp; 5772 } 5773 else { 5774 if ( aExp == 0x7FFF ) { 5775 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { 5776 return propagateFloatx80NaN(a, b, status); 5777 } 5778 return a; 5779 } 5780 zSig1 = 0; 5781 zSig0 = aSig + bSig; 5782 if ( aExp == 0 ) { 5783 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) { 5784 /* At least one of the values is a pseudo-denormal, 5785 * and there is a carry out of the result. */ 5786 zExp = 1; 5787 goto shiftRight1; 5788 } 5789 if (zSig0 == 0) { 5790 return packFloatx80(zSign, 0, 0); 5791 } 5792 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 5793 goto roundAndPack; 5794 } 5795 zExp = aExp; 5796 goto shiftRight1; 5797 } 5798 zSig0 = aSig + bSig; 5799 if ( (int64_t) zSig0 < 0 ) goto roundAndPack; 5800 shiftRight1: 5801 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 5802 zSig0 |= UINT64_C(0x8000000000000000); 5803 ++zExp; 5804 roundAndPack: 5805 return roundAndPackFloatx80(status->floatx80_rounding_precision, 5806 zSign, zExp, zSig0, zSig1, status); 5807 } 5808 5809 /*---------------------------------------------------------------------------- 5810 | Returns the result of subtracting the absolute values of the extended 5811 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the 5812 | difference is negated before being returned. `zSign' is ignored if the 5813 | result is a NaN. The subtraction is performed according to the IEC/IEEE 5814 | Standard for Binary Floating-Point Arithmetic. 5815 *----------------------------------------------------------------------------*/ 5816 5817 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, 5818 float_status *status) 5819 { 5820 int32_t aExp, bExp, zExp; 5821 uint64_t aSig, bSig, zSig0, zSig1; 5822 int32_t expDiff; 5823 5824 aSig = extractFloatx80Frac( a ); 5825 aExp = extractFloatx80Exp( a ); 5826 bSig = extractFloatx80Frac( b ); 5827 bExp = extractFloatx80Exp( b ); 5828 expDiff = aExp - bExp; 5829 if ( 0 < expDiff ) goto aExpBigger; 5830 if ( expDiff < 0 ) goto bExpBigger; 5831 if ( aExp == 0x7FFF ) { 5832 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { 5833 return propagateFloatx80NaN(a, b, status); 5834 } 5835 float_raise(float_flag_invalid, status); 5836 return floatx80_default_nan(status); 5837 } 5838 if ( aExp == 0 ) { 5839 aExp = 1; 5840 bExp = 1; 5841 } 5842 zSig1 = 0; 5843 if ( bSig < aSig ) goto aBigger; 5844 if ( aSig < bSig ) goto bBigger; 5845 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); 5846 bExpBigger: 5847 if ( bExp == 0x7FFF ) { 5848 if ((uint64_t)(bSig << 1)) { 5849 return propagateFloatx80NaN(a, b, status); 5850 } 5851 return packFloatx80(zSign ^ 1, floatx80_infinity_high, 5852 floatx80_infinity_low); 5853 } 5854 if ( aExp == 0 ) ++expDiff; 5855 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 5856 bBigger: 5857 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 5858 zExp = bExp; 5859 zSign ^= 1; 5860 goto normalizeRoundAndPack; 5861 aExpBigger: 5862 if ( aExp == 0x7FFF ) { 5863 if ((uint64_t)(aSig << 1)) { 5864 return propagateFloatx80NaN(a, b, status); 5865 } 5866 return a; 5867 } 5868 if ( bExp == 0 ) --expDiff; 5869 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 5870 aBigger: 5871 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 5872 zExp = aExp; 5873 normalizeRoundAndPack: 5874 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 5875 zSign, zExp, zSig0, zSig1, status); 5876 } 5877 5878 /*---------------------------------------------------------------------------- 5879 | Returns the result of adding the extended double-precision floating-point 5880 | values `a' and `b'. The operation is performed according to the IEC/IEEE 5881 | Standard for Binary Floating-Point Arithmetic. 5882 *----------------------------------------------------------------------------*/ 5883 5884 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) 5885 { 5886 bool aSign, bSign; 5887 5888 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5889 float_raise(float_flag_invalid, status); 5890 return floatx80_default_nan(status); 5891 } 5892 aSign = extractFloatx80Sign( a ); 5893 bSign = extractFloatx80Sign( b ); 5894 if ( aSign == bSign ) { 5895 return addFloatx80Sigs(a, b, aSign, status); 5896 } 5897 else { 5898 return subFloatx80Sigs(a, b, aSign, status); 5899 } 5900 5901 } 5902 5903 /*---------------------------------------------------------------------------- 5904 | Returns the result of subtracting the extended double-precision floating- 5905 | point values `a' and `b'. The operation is performed according to the 5906 | IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5907 *----------------------------------------------------------------------------*/ 5908 5909 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) 5910 { 5911 bool aSign, bSign; 5912 5913 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5914 float_raise(float_flag_invalid, status); 5915 return floatx80_default_nan(status); 5916 } 5917 aSign = extractFloatx80Sign( a ); 5918 bSign = extractFloatx80Sign( b ); 5919 if ( aSign == bSign ) { 5920 return subFloatx80Sigs(a, b, aSign, status); 5921 } 5922 else { 5923 return addFloatx80Sigs(a, b, aSign, status); 5924 } 5925 5926 } 5927 5928 /*---------------------------------------------------------------------------- 5929 | Returns the result of multiplying the extended double-precision floating- 5930 | point values `a' and `b'. The operation is performed according to the 5931 | IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5932 *----------------------------------------------------------------------------*/ 5933 5934 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) 5935 { 5936 bool aSign, bSign, zSign; 5937 int32_t aExp, bExp, zExp; 5938 uint64_t aSig, bSig, zSig0, zSig1; 5939 5940 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 5941 float_raise(float_flag_invalid, status); 5942 return floatx80_default_nan(status); 5943 } 5944 aSig = extractFloatx80Frac( a ); 5945 aExp = extractFloatx80Exp( a ); 5946 aSign = extractFloatx80Sign( a ); 5947 bSig = extractFloatx80Frac( b ); 5948 bExp = extractFloatx80Exp( b ); 5949 bSign = extractFloatx80Sign( b ); 5950 zSign = aSign ^ bSign; 5951 if ( aExp == 0x7FFF ) { 5952 if ( (uint64_t) ( aSig<<1 ) 5953 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { 5954 return propagateFloatx80NaN(a, b, status); 5955 } 5956 if ( ( bExp | bSig ) == 0 ) goto invalid; 5957 return packFloatx80(zSign, floatx80_infinity_high, 5958 floatx80_infinity_low); 5959 } 5960 if ( bExp == 0x7FFF ) { 5961 if ((uint64_t)(bSig << 1)) { 5962 return propagateFloatx80NaN(a, b, status); 5963 } 5964 if ( ( aExp | aSig ) == 0 ) { 5965 invalid: 5966 float_raise(float_flag_invalid, status); 5967 return floatx80_default_nan(status); 5968 } 5969 return packFloatx80(zSign, floatx80_infinity_high, 5970 floatx80_infinity_low); 5971 } 5972 if ( aExp == 0 ) { 5973 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 5974 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 5975 } 5976 if ( bExp == 0 ) { 5977 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 5978 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 5979 } 5980 zExp = aExp + bExp - 0x3FFE; 5981 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 5982 if ( 0 < (int64_t) zSig0 ) { 5983 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 5984 --zExp; 5985 } 5986 return roundAndPackFloatx80(status->floatx80_rounding_precision, 5987 zSign, zExp, zSig0, zSig1, status); 5988 } 5989 5990 /*---------------------------------------------------------------------------- 5991 | Returns the result of dividing the extended double-precision floating-point 5992 | value `a' by the corresponding value `b'. The operation is performed 5993 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5994 *----------------------------------------------------------------------------*/ 5995 5996 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) 5997 { 5998 bool aSign, bSign, zSign; 5999 int32_t aExp, bExp, zExp; 6000 uint64_t aSig, bSig, zSig0, zSig1; 6001 uint64_t rem0, rem1, rem2, term0, term1, term2; 6002 6003 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6004 float_raise(float_flag_invalid, status); 6005 return floatx80_default_nan(status); 6006 } 6007 aSig = extractFloatx80Frac( a ); 6008 aExp = extractFloatx80Exp( a ); 6009 aSign = extractFloatx80Sign( a ); 6010 bSig = extractFloatx80Frac( b ); 6011 bExp = extractFloatx80Exp( b ); 6012 bSign = extractFloatx80Sign( b ); 6013 zSign = aSign ^ bSign; 6014 if ( aExp == 0x7FFF ) { 6015 if ((uint64_t)(aSig << 1)) { 6016 return propagateFloatx80NaN(a, b, status); 6017 } 6018 if ( bExp == 0x7FFF ) { 6019 if ((uint64_t)(bSig << 1)) { 6020 return propagateFloatx80NaN(a, b, status); 6021 } 6022 goto invalid; 6023 } 6024 return packFloatx80(zSign, floatx80_infinity_high, 6025 floatx80_infinity_low); 6026 } 6027 if ( bExp == 0x7FFF ) { 6028 if ((uint64_t)(bSig << 1)) { 6029 return propagateFloatx80NaN(a, b, status); 6030 } 6031 return packFloatx80( zSign, 0, 0 ); 6032 } 6033 if ( bExp == 0 ) { 6034 if ( bSig == 0 ) { 6035 if ( ( aExp | aSig ) == 0 ) { 6036 invalid: 6037 float_raise(float_flag_invalid, status); 6038 return floatx80_default_nan(status); 6039 } 6040 float_raise(float_flag_divbyzero, status); 6041 return packFloatx80(zSign, floatx80_infinity_high, 6042 floatx80_infinity_low); 6043 } 6044 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 6045 } 6046 if ( aExp == 0 ) { 6047 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 6048 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 6049 } 6050 zExp = aExp - bExp + 0x3FFE; 6051 rem1 = 0; 6052 if ( bSig <= aSig ) { 6053 shift128Right( aSig, 0, 1, &aSig, &rem1 ); 6054 ++zExp; 6055 } 6056 zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 6057 mul64To128( bSig, zSig0, &term0, &term1 ); 6058 sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 6059 while ( (int64_t) rem0 < 0 ) { 6060 --zSig0; 6061 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 6062 } 6063 zSig1 = estimateDiv128To64( rem1, 0, bSig ); 6064 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { 6065 mul64To128( bSig, zSig1, &term1, &term2 ); 6066 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 6067 while ( (int64_t) rem1 < 0 ) { 6068 --zSig1; 6069 add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 6070 } 6071 zSig1 |= ( ( rem1 | rem2 ) != 0 ); 6072 } 6073 return roundAndPackFloatx80(status->floatx80_rounding_precision, 6074 zSign, zExp, zSig0, zSig1, status); 6075 } 6076 6077 /*---------------------------------------------------------------------------- 6078 | Returns the remainder of the extended double-precision floating-point value 6079 | `a' with respect to the corresponding value `b'. The operation is performed 6080 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, 6081 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating 6082 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of 6083 | the absolute value of the integer quotient. 6084 *----------------------------------------------------------------------------*/ 6085 6086 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, 6087 float_status *status) 6088 { 6089 bool aSign, zSign; 6090 int32_t aExp, bExp, expDiff, aExpOrig; 6091 uint64_t aSig0, aSig1, bSig; 6092 uint64_t q, term0, term1, alternateASig0, alternateASig1; 6093 6094 *quotient = 0; 6095 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6096 float_raise(float_flag_invalid, status); 6097 return floatx80_default_nan(status); 6098 } 6099 aSig0 = extractFloatx80Frac( a ); 6100 aExpOrig = aExp = extractFloatx80Exp( a ); 6101 aSign = extractFloatx80Sign( a ); 6102 bSig = extractFloatx80Frac( b ); 6103 bExp = extractFloatx80Exp( b ); 6104 if ( aExp == 0x7FFF ) { 6105 if ( (uint64_t) ( aSig0<<1 ) 6106 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { 6107 return propagateFloatx80NaN(a, b, status); 6108 } 6109 goto invalid; 6110 } 6111 if ( bExp == 0x7FFF ) { 6112 if ((uint64_t)(bSig << 1)) { 6113 return propagateFloatx80NaN(a, b, status); 6114 } 6115 if (aExp == 0 && aSig0 >> 63) { 6116 /* 6117 * Pseudo-denormal argument must be returned in normalized 6118 * form. 6119 */ 6120 return packFloatx80(aSign, 1, aSig0); 6121 } 6122 return a; 6123 } 6124 if ( bExp == 0 ) { 6125 if ( bSig == 0 ) { 6126 invalid: 6127 float_raise(float_flag_invalid, status); 6128 return floatx80_default_nan(status); 6129 } 6130 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 6131 } 6132 if ( aExp == 0 ) { 6133 if ( aSig0 == 0 ) return a; 6134 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 6135 } 6136 zSign = aSign; 6137 expDiff = aExp - bExp; 6138 aSig1 = 0; 6139 if ( expDiff < 0 ) { 6140 if ( mod || expDiff < -1 ) { 6141 if (aExp == 1 && aExpOrig == 0) { 6142 /* 6143 * Pseudo-denormal argument must be returned in 6144 * normalized form. 6145 */ 6146 return packFloatx80(aSign, aExp, aSig0); 6147 } 6148 return a; 6149 } 6150 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 6151 expDiff = 0; 6152 } 6153 *quotient = q = ( bSig <= aSig0 ); 6154 if ( q ) aSig0 -= bSig; 6155 expDiff -= 64; 6156 while ( 0 < expDiff ) { 6157 q = estimateDiv128To64( aSig0, aSig1, bSig ); 6158 q = ( 2 < q ) ? q - 2 : 0; 6159 mul64To128( bSig, q, &term0, &term1 ); 6160 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6161 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 6162 expDiff -= 62; 6163 *quotient <<= 62; 6164 *quotient += q; 6165 } 6166 expDiff += 64; 6167 if ( 0 < expDiff ) { 6168 q = estimateDiv128To64( aSig0, aSig1, bSig ); 6169 q = ( 2 < q ) ? q - 2 : 0; 6170 q >>= 64 - expDiff; 6171 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 6172 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6173 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 6174 while ( le128( term0, term1, aSig0, aSig1 ) ) { 6175 ++q; 6176 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 6177 } 6178 if (expDiff < 64) { 6179 *quotient <<= expDiff; 6180 } else { 6181 *quotient = 0; 6182 } 6183 *quotient += q; 6184 } 6185 else { 6186 term1 = 0; 6187 term0 = bSig; 6188 } 6189 if (!mod) { 6190 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 6191 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 6192 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 6193 && ( q & 1 ) ) 6194 ) { 6195 aSig0 = alternateASig0; 6196 aSig1 = alternateASig1; 6197 zSign = ! zSign; 6198 ++*quotient; 6199 } 6200 } 6201 return 6202 normalizeRoundAndPackFloatx80( 6203 80, zSign, bExp + expDiff, aSig0, aSig1, status); 6204 6205 } 6206 6207 /*---------------------------------------------------------------------------- 6208 | Returns the remainder of the extended double-precision floating-point value 6209 | `a' with respect to the corresponding value `b'. The operation is performed 6210 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 6211 *----------------------------------------------------------------------------*/ 6212 6213 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) 6214 { 6215 uint64_t quotient; 6216 return floatx80_modrem(a, b, false, "ient, status); 6217 } 6218 6219 /*---------------------------------------------------------------------------- 6220 | Returns the remainder of the extended double-precision floating-point value 6221 | `a' with respect to the corresponding value `b', with the quotient truncated 6222 | toward zero. 6223 *----------------------------------------------------------------------------*/ 6224 6225 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 6226 { 6227 uint64_t quotient; 6228 return floatx80_modrem(a, b, true, "ient, status); 6229 } 6230 6231 /*---------------------------------------------------------------------------- 6232 | Returns the square root of the extended double-precision floating-point 6233 | value `a'. The operation is performed according to the IEC/IEEE Standard 6234 | for Binary Floating-Point Arithmetic. 6235 *----------------------------------------------------------------------------*/ 6236 6237 floatx80 floatx80_sqrt(floatx80 a, float_status *status) 6238 { 6239 bool aSign; 6240 int32_t aExp, zExp; 6241 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; 6242 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; 6243 6244 if (floatx80_invalid_encoding(a)) { 6245 float_raise(float_flag_invalid, status); 6246 return floatx80_default_nan(status); 6247 } 6248 aSig0 = extractFloatx80Frac( a ); 6249 aExp = extractFloatx80Exp( a ); 6250 aSign = extractFloatx80Sign( a ); 6251 if ( aExp == 0x7FFF ) { 6252 if ((uint64_t)(aSig0 << 1)) { 6253 return propagateFloatx80NaN(a, a, status); 6254 } 6255 if ( ! aSign ) return a; 6256 goto invalid; 6257 } 6258 if ( aSign ) { 6259 if ( ( aExp | aSig0 ) == 0 ) return a; 6260 invalid: 6261 float_raise(float_flag_invalid, status); 6262 return floatx80_default_nan(status); 6263 } 6264 if ( aExp == 0 ) { 6265 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 6266 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 6267 } 6268 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 6269 zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 6270 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 6271 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 6272 doubleZSig0 = zSig0<<1; 6273 mul64To128( zSig0, zSig0, &term0, &term1 ); 6274 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 6275 while ( (int64_t) rem0 < 0 ) { 6276 --zSig0; 6277 doubleZSig0 -= 2; 6278 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 6279 } 6280 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 6281 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) { 6282 if ( zSig1 == 0 ) zSig1 = 1; 6283 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 6284 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 6285 mul64To128( zSig1, zSig1, &term2, &term3 ); 6286 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 6287 while ( (int64_t) rem1 < 0 ) { 6288 --zSig1; 6289 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 6290 term3 |= 1; 6291 term2 |= doubleZSig0; 6292 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 6293 } 6294 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 6295 } 6296 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 6297 zSig0 |= doubleZSig0; 6298 return roundAndPackFloatx80(status->floatx80_rounding_precision, 6299 0, zExp, zSig0, zSig1, status); 6300 } 6301 6302 /*---------------------------------------------------------------------------- 6303 | Returns the result of converting the quadruple-precision floating-point 6304 | value `a' to the extended double-precision floating-point format. The 6305 | conversion is performed according to the IEC/IEEE Standard for Binary 6306 | Floating-Point Arithmetic. 6307 *----------------------------------------------------------------------------*/ 6308 6309 floatx80 float128_to_floatx80(float128 a, float_status *status) 6310 { 6311 bool aSign; 6312 int32_t aExp; 6313 uint64_t aSig0, aSig1; 6314 6315 aSig1 = extractFloat128Frac1( a ); 6316 aSig0 = extractFloat128Frac0( a ); 6317 aExp = extractFloat128Exp( a ); 6318 aSign = extractFloat128Sign( a ); 6319 if ( aExp == 0x7FFF ) { 6320 if ( aSig0 | aSig1 ) { 6321 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), 6322 status); 6323 return floatx80_silence_nan(res, status); 6324 } 6325 return packFloatx80(aSign, floatx80_infinity_high, 6326 floatx80_infinity_low); 6327 } 6328 if ( aExp == 0 ) { 6329 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 6330 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 6331 } 6332 else { 6333 aSig0 |= UINT64_C(0x0001000000000000); 6334 } 6335 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 6336 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); 6337 6338 } 6339 6340 /*---------------------------------------------------------------------------- 6341 | Returns the remainder of the quadruple-precision floating-point value `a' 6342 | with respect to the corresponding value `b'. The operation is performed 6343 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 6344 *----------------------------------------------------------------------------*/ 6345 6346 float128 float128_rem(float128 a, float128 b, float_status *status) 6347 { 6348 bool aSign, zSign; 6349 int32_t aExp, bExp, expDiff; 6350 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 6351 uint64_t allZero, alternateASig0, alternateASig1, sigMean1; 6352 int64_t sigMean0; 6353 6354 aSig1 = extractFloat128Frac1( a ); 6355 aSig0 = extractFloat128Frac0( a ); 6356 aExp = extractFloat128Exp( a ); 6357 aSign = extractFloat128Sign( a ); 6358 bSig1 = extractFloat128Frac1( b ); 6359 bSig0 = extractFloat128Frac0( b ); 6360 bExp = extractFloat128Exp( b ); 6361 if ( aExp == 0x7FFF ) { 6362 if ( ( aSig0 | aSig1 ) 6363 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 6364 return propagateFloat128NaN(a, b, status); 6365 } 6366 goto invalid; 6367 } 6368 if ( bExp == 0x7FFF ) { 6369 if (bSig0 | bSig1) { 6370 return propagateFloat128NaN(a, b, status); 6371 } 6372 return a; 6373 } 6374 if ( bExp == 0 ) { 6375 if ( ( bSig0 | bSig1 ) == 0 ) { 6376 invalid: 6377 float_raise(float_flag_invalid, status); 6378 return float128_default_nan(status); 6379 } 6380 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 6381 } 6382 if ( aExp == 0 ) { 6383 if ( ( aSig0 | aSig1 ) == 0 ) return a; 6384 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 6385 } 6386 expDiff = aExp - bExp; 6387 if ( expDiff < -1 ) return a; 6388 shortShift128Left( 6389 aSig0 | UINT64_C(0x0001000000000000), 6390 aSig1, 6391 15 - ( expDiff < 0 ), 6392 &aSig0, 6393 &aSig1 6394 ); 6395 shortShift128Left( 6396 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 ); 6397 q = le128( bSig0, bSig1, aSig0, aSig1 ); 6398 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 6399 expDiff -= 64; 6400 while ( 0 < expDiff ) { 6401 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 6402 q = ( 4 < q ) ? q - 4 : 0; 6403 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 6404 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); 6405 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); 6406 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 6407 expDiff -= 61; 6408 } 6409 if ( -64 < expDiff ) { 6410 q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 6411 q = ( 4 < q ) ? q - 4 : 0; 6412 q >>= - expDiff; 6413 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 6414 expDiff += 52; 6415 if ( expDiff < 0 ) { 6416 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 6417 } 6418 else { 6419 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 6420 } 6421 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 6422 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 6423 } 6424 else { 6425 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); 6426 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 6427 } 6428 do { 6429 alternateASig0 = aSig0; 6430 alternateASig1 = aSig1; 6431 ++q; 6432 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 6433 } while ( 0 <= (int64_t) aSig0 ); 6434 add128( 6435 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 ); 6436 if ( ( sigMean0 < 0 ) 6437 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 6438 aSig0 = alternateASig0; 6439 aSig1 = alternateASig1; 6440 } 6441 zSign = ( (int64_t) aSig0 < 0 ); 6442 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 6443 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1, 6444 status); 6445 } 6446 6447 static inline FloatRelation 6448 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet, 6449 float_status *status) 6450 { 6451 bool aSign, bSign; 6452 6453 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { 6454 float_raise(float_flag_invalid, status); 6455 return float_relation_unordered; 6456 } 6457 if (( ( extractFloatx80Exp( a ) == 0x7fff ) && 6458 ( extractFloatx80Frac( a )<<1 ) ) || 6459 ( ( extractFloatx80Exp( b ) == 0x7fff ) && 6460 ( extractFloatx80Frac( b )<<1 ) )) { 6461 if (!is_quiet || 6462 floatx80_is_signaling_nan(a, status) || 6463 floatx80_is_signaling_nan(b, status)) { 6464 float_raise(float_flag_invalid, status); 6465 } 6466 return float_relation_unordered; 6467 } 6468 aSign = extractFloatx80Sign( a ); 6469 bSign = extractFloatx80Sign( b ); 6470 if ( aSign != bSign ) { 6471 6472 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) && 6473 ( ( a.low | b.low ) == 0 ) ) { 6474 /* zero case */ 6475 return float_relation_equal; 6476 } else { 6477 return 1 - (2 * aSign); 6478 } 6479 } else { 6480 /* Normalize pseudo-denormals before comparison. */ 6481 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { 6482 ++a.high; 6483 } 6484 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) { 6485 ++b.high; 6486 } 6487 if (a.low == b.low && a.high == b.high) { 6488 return float_relation_equal; 6489 } else { 6490 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); 6491 } 6492 } 6493 } 6494 6495 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status) 6496 { 6497 return floatx80_compare_internal(a, b, 0, status); 6498 } 6499 6500 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, 6501 float_status *status) 6502 { 6503 return floatx80_compare_internal(a, b, 1, status); 6504 } 6505 6506 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) 6507 { 6508 bool aSign; 6509 int32_t aExp; 6510 uint64_t aSig; 6511 6512 if (floatx80_invalid_encoding(a)) { 6513 float_raise(float_flag_invalid, status); 6514 return floatx80_default_nan(status); 6515 } 6516 aSig = extractFloatx80Frac( a ); 6517 aExp = extractFloatx80Exp( a ); 6518 aSign = extractFloatx80Sign( a ); 6519 6520 if ( aExp == 0x7FFF ) { 6521 if ( aSig<<1 ) { 6522 return propagateFloatx80NaN(a, a, status); 6523 } 6524 return a; 6525 } 6526 6527 if (aExp == 0) { 6528 if (aSig == 0) { 6529 return a; 6530 } 6531 aExp++; 6532 } 6533 6534 if (n > 0x10000) { 6535 n = 0x10000; 6536 } else if (n < -0x10000) { 6537 n = -0x10000; 6538 } 6539 6540 aExp += n; 6541 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, 6542 aSign, aExp, aSig, 0, status); 6543 } 6544 6545 static void __attribute__((constructor)) softfloat_init(void) 6546 { 6547 union_float64 ua, ub, uc, ur; 6548 6549 if (QEMU_NO_HARDFLOAT) { 6550 return; 6551 } 6552 /* 6553 * Test that the host's FMA is not obviously broken. For example, 6554 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see 6555 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304 6556 */ 6557 ua.s = 0x0020000000000001ULL; 6558 ub.s = 0x3ca0000000000000ULL; 6559 uc.s = 0x0020000000000000ULL; 6560 ur.h = fma(ua.h, ub.h, uc.h); 6561 if (ur.s != 0x0020000000000001ULL) { 6562 force_soft_fma = true; 6563 } 6564 } 6565