xref: /openbmc/qemu/fpu/softfloat.c (revision ceebc129)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 /* Flags for parts_minmax. */
486 enum {
487     /* Set for minimum; clear for maximum. */
488     minmax_ismin = 1,
489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490     minmax_isnum = 2,
491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492     minmax_ismag = 4,
493 };
494 
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498     return unlikely(c >= float_class_qnan);
499 }
500 
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503     return c == float_class_snan;
504 }
505 
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508     return c == float_class_qnan;
509 }
510 
511 /*
512  * Structure holding all of the decomposed parts of a float.
513  * The exponent is unbiased and the fraction is normalized.
514  *
515  * The fraction words are stored in big-endian word ordering,
516  * so that truncation from a larger format to a smaller format
517  * can be done simply by ignoring subsequent elements.
518  */
519 
520 typedef struct {
521     FloatClass cls;
522     bool sign;
523     int32_t exp;
524     union {
525         /* Routines that know the structure may reference the singular name. */
526         uint64_t frac;
527         /*
528          * Routines expanded with multiple structures reference "hi" and "lo"
529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
530          * both the same word and aliased here.
531          */
532         uint64_t frac_hi;
533         uint64_t frac_lo;
534     };
535 } FloatParts64;
536 
537 typedef struct {
538     FloatClass cls;
539     bool sign;
540     int32_t exp;
541     uint64_t frac_hi;
542     uint64_t frac_lo;
543 } FloatParts128;
544 
545 typedef struct {
546     FloatClass cls;
547     bool sign;
548     int32_t exp;
549     uint64_t frac_hi;
550     uint64_t frac_hm;  /* high-middle */
551     uint64_t frac_lm;  /* low-middle */
552     uint64_t frac_lo;
553 } FloatParts256;
554 
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT    63
557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
558 
559 /* Structure holding all of the relevant parameters for a format.
560  *   exp_size: the size of the exponent field
561  *   exp_bias: the offset applied to the exponent field
562  *   exp_max: the maximum normalised exponent
563  *   frac_size: the size of the fraction field
564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565  * The following are computed based the size of fraction
566  *   frac_lsb: least significant bit of fraction
567  *   frac_lsbm1: the bit below the least significant bit (for rounding)
568  *   round_mask/roundeven_mask: masks used for rounding
569  * The following optional modifiers are available:
570  *   arm_althp: handle ARM Alternative Half Precision
571  */
572 typedef struct {
573     int exp_size;
574     int exp_bias;
575     int exp_max;
576     int frac_size;
577     int frac_shift;
578     uint64_t frac_lsb;
579     uint64_t frac_lsbm1;
580     uint64_t round_mask;
581     uint64_t roundeven_mask;
582     bool arm_althp;
583 } FloatFmt;
584 
585 /* Expand fields based on the size of exponent and fraction */
586 #define FLOAT_PARAMS(E, F)                                           \
587     .exp_size       = E,                                             \
588     .exp_bias       = ((1 << E) - 1) >> 1,                           \
589     .exp_max        = (1 << E) - 1,                                  \
590     .frac_size      = F,                                             \
591     .frac_shift     = (-F - 1) & 63,                                 \
592     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
593     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
594     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
595     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
596 
597 static const FloatFmt float16_params = {
598     FLOAT_PARAMS(5, 10)
599 };
600 
601 static const FloatFmt float16_params_ahp = {
602     FLOAT_PARAMS(5, 10),
603     .arm_althp = true
604 };
605 
606 static const FloatFmt bfloat16_params = {
607     FLOAT_PARAMS(8, 7)
608 };
609 
610 static const FloatFmt float32_params = {
611     FLOAT_PARAMS(8, 23)
612 };
613 
614 static const FloatFmt float64_params = {
615     FLOAT_PARAMS(11, 52)
616 };
617 
618 static const FloatFmt float128_params = {
619     FLOAT_PARAMS(15, 112)
620 };
621 
622 /* Unpack a float to parts, but do not canonicalize.  */
623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
624 {
625     const int f_size = fmt->frac_size;
626     const int e_size = fmt->exp_size;
627 
628     *r = (FloatParts64) {
629         .cls = float_class_unclassified,
630         .sign = extract64(raw, f_size + e_size, 1),
631         .exp = extract64(raw, f_size, e_size),
632         .frac = extract64(raw, 0, f_size)
633     };
634 }
635 
636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
637 {
638     unpack_raw64(p, &float16_params, f);
639 }
640 
641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
642 {
643     unpack_raw64(p, &bfloat16_params, f);
644 }
645 
646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
647 {
648     unpack_raw64(p, &float32_params, f);
649 }
650 
651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
652 {
653     unpack_raw64(p, &float64_params, f);
654 }
655 
656 static void float128_unpack_raw(FloatParts128 *p, float128 f)
657 {
658     const int f_size = float128_params.frac_size - 64;
659     const int e_size = float128_params.exp_size;
660 
661     *p = (FloatParts128) {
662         .cls = float_class_unclassified,
663         .sign = extract64(f.high, f_size + e_size, 1),
664         .exp = extract64(f.high, f_size, e_size),
665         .frac_hi = extract64(f.high, 0, f_size),
666         .frac_lo = f.low,
667     };
668 }
669 
670 /* Pack a float from parts, but do not canonicalize.  */
671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
672 {
673     const int f_size = fmt->frac_size;
674     const int e_size = fmt->exp_size;
675     uint64_t ret;
676 
677     ret = (uint64_t)p->sign << (f_size + e_size);
678     ret = deposit64(ret, f_size, e_size, p->exp);
679     ret = deposit64(ret, 0, f_size, p->frac);
680     return ret;
681 }
682 
683 static inline float16 float16_pack_raw(const FloatParts64 *p)
684 {
685     return make_float16(pack_raw64(p, &float16_params));
686 }
687 
688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
689 {
690     return pack_raw64(p, &bfloat16_params);
691 }
692 
693 static inline float32 float32_pack_raw(const FloatParts64 *p)
694 {
695     return make_float32(pack_raw64(p, &float32_params));
696 }
697 
698 static inline float64 float64_pack_raw(const FloatParts64 *p)
699 {
700     return make_float64(pack_raw64(p, &float64_params));
701 }
702 
703 static float128 float128_pack_raw(const FloatParts128 *p)
704 {
705     const int f_size = float128_params.frac_size - 64;
706     const int e_size = float128_params.exp_size;
707     uint64_t hi;
708 
709     hi = (uint64_t)p->sign << (f_size + e_size);
710     hi = deposit64(hi, f_size, e_size, p->exp);
711     hi = deposit64(hi, 0, f_size, p->frac_hi);
712     return make_float128(hi, p->frac_lo);
713 }
714 
715 /*----------------------------------------------------------------------------
716 | Functions and definitions to determine:  (1) whether tininess for underflow
717 | is detected before or after rounding by default, (2) what (if anything)
718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720 | are propagated from function inputs to output.  These details are target-
721 | specific.
722 *----------------------------------------------------------------------------*/
723 #include "softfloat-specialize.c.inc"
724 
725 #define PARTS_GENERIC_64_128(NAME, P) \
726     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727 
728 #define PARTS_GENERIC_64_128_256(NAME, P) \
729     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731 
732 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
733 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734 
735 static void parts64_return_nan(FloatParts64 *a, float_status *s);
736 static void parts128_return_nan(FloatParts128 *a, float_status *s);
737 
738 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
739 
740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741                                       float_status *s);
742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743                                         float_status *s);
744 
745 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746 
747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748                                              FloatParts64 *c, float_status *s,
749                                              int ab_mask, int abc_mask);
750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751                                                FloatParts128 *b,
752                                                FloatParts128 *c,
753                                                float_status *s,
754                                                int ab_mask, int abc_mask);
755 
756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758 
759 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760                                  const FloatFmt *fmt);
761 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762                                   const FloatFmt *fmt);
763 
764 #define parts_canonicalize(A, S, F) \
765     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766 
767 static void parts64_uncanon(FloatParts64 *p, float_status *status,
768                             const FloatFmt *fmt);
769 static void parts128_uncanon(FloatParts128 *p, float_status *status,
770                              const FloatFmt *fmt);
771 
772 #define parts_uncanon(A, S, F) \
773     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774 
775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
778 
779 #define parts_add_normal(A, B) \
780     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
781 
782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_sub_normal(A, B) \
787     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
788 
789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790                                     float_status *s, bool subtract);
791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792                                       float_status *s, bool subtract);
793 
794 #define parts_addsub(A, B, S, Z) \
795     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796 
797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798                                  float_status *s);
799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800                                    float_status *s);
801 
802 #define parts_mul(A, B, S) \
803     PARTS_GENERIC_64_128(mul, A)(A, B, S)
804 
805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806                                     FloatParts64 *c, int flags,
807                                     float_status *s);
808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809                                       FloatParts128 *c, int flags,
810                                       float_status *s);
811 
812 #define parts_muladd(A, B, C, Z, S) \
813     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814 
815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816                                  float_status *s);
817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818                                    float_status *s);
819 
820 #define parts_div(A, B, S) \
821     PARTS_GENERIC_64_128(div, A)(A, B, S)
822 
823 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
824                                         int scale, int frac_size);
825 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
826                                          int scale, int frac_size);
827 
828 #define parts_round_to_int_normal(A, R, C, F) \
829     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
830 
831 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
832                                  int scale, float_status *s,
833                                  const FloatFmt *fmt);
834 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
835                                   int scale, float_status *s,
836                                   const FloatFmt *fmt);
837 
838 #define parts_round_to_int(A, R, C, S, F) \
839     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
840 
841 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
842                                      int scale, int64_t min, int64_t max,
843                                      float_status *s);
844 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
845                                      int scale, int64_t min, int64_t max,
846                                      float_status *s);
847 
848 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
849     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
850 
851 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
852                                       int scale, uint64_t max,
853                                       float_status *s);
854 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
855                                        int scale, uint64_t max,
856                                        float_status *s);
857 
858 #define parts_float_to_uint(P, R, Z, M, S) \
859     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
860 
861 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
862                                   int scale, float_status *s);
863 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
864                                    int scale, float_status *s);
865 
866 #define parts_sint_to_float(P, I, Z, S) \
867     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
868 
869 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
870                                   int scale, float_status *s);
871 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
872                                    int scale, float_status *s);
873 
874 #define parts_uint_to_float(P, I, Z, S) \
875     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
876 
877 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
878                                     float_status *s, int flags);
879 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
880                                       float_status *s, int flags);
881 
882 #define parts_minmax(A, B, S, F) \
883     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
884 
885 /*
886  * Helper functions for softfloat-parts.c.inc, per-size operations.
887  */
888 
889 #define FRAC_GENERIC_64_128(NAME, P) \
890     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
891 
892 #define FRAC_GENERIC_64_128_256(NAME, P) \
893     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
894                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
895 
896 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
897 {
898     return uadd64_overflow(a->frac, b->frac, &r->frac);
899 }
900 
901 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
902 {
903     bool c = 0;
904     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
905     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
906     return c;
907 }
908 
909 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
910 {
911     bool c = 0;
912     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
913     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
914     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
915     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
916     return c;
917 }
918 
919 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
920 
921 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
922 {
923     return uadd64_overflow(a->frac, c, &r->frac);
924 }
925 
926 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
927 {
928     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
929     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
930 }
931 
932 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
933 
934 static void frac64_allones(FloatParts64 *a)
935 {
936     a->frac = -1;
937 }
938 
939 static void frac128_allones(FloatParts128 *a)
940 {
941     a->frac_hi = a->frac_lo = -1;
942 }
943 
944 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
945 
946 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
947 {
948     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
949 }
950 
951 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
952 {
953     uint64_t ta = a->frac_hi, tb = b->frac_hi;
954     if (ta == tb) {
955         ta = a->frac_lo, tb = b->frac_lo;
956         if (ta == tb) {
957             return 0;
958         }
959     }
960     return ta < tb ? -1 : 1;
961 }
962 
963 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
964 
965 static void frac64_clear(FloatParts64 *a)
966 {
967     a->frac = 0;
968 }
969 
970 static void frac128_clear(FloatParts128 *a)
971 {
972     a->frac_hi = a->frac_lo = 0;
973 }
974 
975 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
976 
977 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
978 {
979     uint64_t n1, n0, r, q;
980     bool ret;
981 
982     /*
983      * We want a 2*N / N-bit division to produce exactly an N-bit
984      * result, so that we do not lose any precision and so that we
985      * do not have to renormalize afterward.  If A.frac < B.frac,
986      * then division would produce an (N-1)-bit result; shift A left
987      * by one to produce the an N-bit result, and return true to
988      * decrement the exponent to match.
989      *
990      * The udiv_qrnnd algorithm that we're using requires normalization,
991      * i.e. the msb of the denominator must be set, which is already true.
992      */
993     ret = a->frac < b->frac;
994     if (ret) {
995         n0 = a->frac;
996         n1 = 0;
997     } else {
998         n0 = a->frac >> 1;
999         n1 = a->frac << 63;
1000     }
1001     q = udiv_qrnnd(&r, n0, n1, b->frac);
1002 
1003     /* Set lsb if there is a remainder, to set inexact. */
1004     a->frac = q | (r != 0);
1005 
1006     return ret;
1007 }
1008 
1009 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1010 {
1011     uint64_t q0, q1, a0, a1, b0, b1;
1012     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1013     bool ret = false;
1014 
1015     a0 = a->frac_hi, a1 = a->frac_lo;
1016     b0 = b->frac_hi, b1 = b->frac_lo;
1017 
1018     ret = lt128(a0, a1, b0, b1);
1019     if (!ret) {
1020         a1 = shr_double(a0, a1, 1);
1021         a0 = a0 >> 1;
1022     }
1023 
1024     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1025     q0 = estimateDiv128To64(a0, a1, b0);
1026 
1027     /*
1028      * Estimate is high because B1 was not included (unless B1 == 0).
1029      * Reduce quotient and increase remainder until remainder is non-negative.
1030      * This loop will execute 0 to 2 times.
1031      */
1032     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1033     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1034     while (r0 != 0) {
1035         q0--;
1036         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1037     }
1038 
1039     /* Repeat using the remainder, producing a second word of quotient. */
1040     q1 = estimateDiv128To64(r1, r2, b0);
1041     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1042     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1043     while (r1 != 0) {
1044         q1--;
1045         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1046     }
1047 
1048     /* Any remainder indicates inexact; set sticky bit. */
1049     q1 |= (r2 | r3) != 0;
1050 
1051     a->frac_hi = q0;
1052     a->frac_lo = q1;
1053     return ret;
1054 }
1055 
1056 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1057 
1058 static bool frac64_eqz(FloatParts64 *a)
1059 {
1060     return a->frac == 0;
1061 }
1062 
1063 static bool frac128_eqz(FloatParts128 *a)
1064 {
1065     return (a->frac_hi | a->frac_lo) == 0;
1066 }
1067 
1068 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1069 
1070 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1071 {
1072     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1073 }
1074 
1075 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1076 {
1077     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1078                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1079 }
1080 
1081 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1082 
1083 static void frac64_neg(FloatParts64 *a)
1084 {
1085     a->frac = -a->frac;
1086 }
1087 
1088 static void frac128_neg(FloatParts128 *a)
1089 {
1090     bool c = 0;
1091     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1092     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1093 }
1094 
1095 static void frac256_neg(FloatParts256 *a)
1096 {
1097     bool c = 0;
1098     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1099     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1100     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1101     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1102 }
1103 
1104 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1105 
1106 static int frac64_normalize(FloatParts64 *a)
1107 {
1108     if (a->frac) {
1109         int shift = clz64(a->frac);
1110         a->frac <<= shift;
1111         return shift;
1112     }
1113     return 64;
1114 }
1115 
1116 static int frac128_normalize(FloatParts128 *a)
1117 {
1118     if (a->frac_hi) {
1119         int shl = clz64(a->frac_hi);
1120         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1121         a->frac_lo <<= shl;
1122         return shl;
1123     } else if (a->frac_lo) {
1124         int shl = clz64(a->frac_lo);
1125         a->frac_hi = a->frac_lo << shl;
1126         a->frac_lo = 0;
1127         return shl + 64;
1128     }
1129     return 128;
1130 }
1131 
1132 static int frac256_normalize(FloatParts256 *a)
1133 {
1134     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1135     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1136     int ret, shl;
1137 
1138     if (likely(a0)) {
1139         shl = clz64(a0);
1140         if (shl == 0) {
1141             return 0;
1142         }
1143         ret = shl;
1144     } else {
1145         if (a1) {
1146             ret = 64;
1147             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1148         } else if (a2) {
1149             ret = 128;
1150             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1151         } else if (a3) {
1152             ret = 192;
1153             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1154         } else {
1155             ret = 256;
1156             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1157             goto done;
1158         }
1159         shl = clz64(a0);
1160         if (shl == 0) {
1161             goto done;
1162         }
1163         ret += shl;
1164     }
1165 
1166     a0 = shl_double(a0, a1, shl);
1167     a1 = shl_double(a1, a2, shl);
1168     a2 = shl_double(a2, a3, shl);
1169     a3 <<= shl;
1170 
1171  done:
1172     a->frac_hi = a0;
1173     a->frac_hm = a1;
1174     a->frac_lm = a2;
1175     a->frac_lo = a3;
1176     return ret;
1177 }
1178 
1179 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1180 
1181 static void frac64_shl(FloatParts64 *a, int c)
1182 {
1183     a->frac <<= c;
1184 }
1185 
1186 static void frac128_shl(FloatParts128 *a, int c)
1187 {
1188     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1189 
1190     if (c & 64) {
1191         a0 = a1, a1 = 0;
1192     }
1193 
1194     c &= 63;
1195     if (c) {
1196         a0 = shl_double(a0, a1, c);
1197         a1 = a1 << c;
1198     }
1199 
1200     a->frac_hi = a0;
1201     a->frac_lo = a1;
1202 }
1203 
1204 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1205 
1206 static void frac64_shr(FloatParts64 *a, int c)
1207 {
1208     a->frac >>= c;
1209 }
1210 
1211 static void frac128_shr(FloatParts128 *a, int c)
1212 {
1213     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1214 
1215     if (c & 64) {
1216         a1 = a0, a0 = 0;
1217     }
1218 
1219     c &= 63;
1220     if (c) {
1221         a1 = shr_double(a0, a1, c);
1222         a0 = a0 >> c;
1223     }
1224 
1225     a->frac_hi = a0;
1226     a->frac_lo = a1;
1227 }
1228 
1229 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1230 
1231 static void frac64_shrjam(FloatParts64 *a, int c)
1232 {
1233     uint64_t a0 = a->frac;
1234 
1235     if (likely(c != 0)) {
1236         if (likely(c < 64)) {
1237             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1238         } else {
1239             a0 = a0 != 0;
1240         }
1241         a->frac = a0;
1242     }
1243 }
1244 
1245 static void frac128_shrjam(FloatParts128 *a, int c)
1246 {
1247     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1248     uint64_t sticky = 0;
1249 
1250     if (unlikely(c == 0)) {
1251         return;
1252     } else if (likely(c < 64)) {
1253         /* nothing */
1254     } else if (likely(c < 128)) {
1255         sticky = a1;
1256         a1 = a0;
1257         a0 = 0;
1258         c &= 63;
1259         if (c == 0) {
1260             goto done;
1261         }
1262     } else {
1263         sticky = a0 | a1;
1264         a0 = a1 = 0;
1265         goto done;
1266     }
1267 
1268     sticky |= shr_double(a1, 0, c);
1269     a1 = shr_double(a0, a1, c);
1270     a0 = a0 >> c;
1271 
1272  done:
1273     a->frac_lo = a1 | (sticky != 0);
1274     a->frac_hi = a0;
1275 }
1276 
1277 static void frac256_shrjam(FloatParts256 *a, int c)
1278 {
1279     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1280     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1281     uint64_t sticky = 0;
1282 
1283     if (unlikely(c == 0)) {
1284         return;
1285     } else if (likely(c < 64)) {
1286         /* nothing */
1287     } else if (likely(c < 256)) {
1288         if (unlikely(c & 128)) {
1289             sticky |= a2 | a3;
1290             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1291         }
1292         if (unlikely(c & 64)) {
1293             sticky |= a3;
1294             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1295         }
1296         c &= 63;
1297         if (c == 0) {
1298             goto done;
1299         }
1300     } else {
1301         sticky = a0 | a1 | a2 | a3;
1302         a0 = a1 = a2 = a3 = 0;
1303         goto done;
1304     }
1305 
1306     sticky |= shr_double(a3, 0, c);
1307     a3 = shr_double(a2, a3, c);
1308     a2 = shr_double(a1, a2, c);
1309     a1 = shr_double(a0, a1, c);
1310     a0 = a0 >> c;
1311 
1312  done:
1313     a->frac_lo = a3 | (sticky != 0);
1314     a->frac_lm = a2;
1315     a->frac_hm = a1;
1316     a->frac_hi = a0;
1317 }
1318 
1319 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1320 
1321 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1322 {
1323     return usub64_overflow(a->frac, b->frac, &r->frac);
1324 }
1325 
1326 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1327 {
1328     bool c = 0;
1329     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1330     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1331     return c;
1332 }
1333 
1334 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1335 {
1336     bool c = 0;
1337     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1338     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1339     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1340     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1341     return c;
1342 }
1343 
1344 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1345 
1346 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1347 {
1348     r->frac = a->frac_hi | (a->frac_lo != 0);
1349 }
1350 
1351 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1352 {
1353     r->frac_hi = a->frac_hi;
1354     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1355 }
1356 
1357 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1358 
1359 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1360 {
1361     r->frac_hi = a->frac;
1362     r->frac_lo = 0;
1363 }
1364 
1365 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1366 {
1367     r->frac_hi = a->frac_hi;
1368     r->frac_hm = a->frac_lo;
1369     r->frac_lm = 0;
1370     r->frac_lo = 0;
1371 }
1372 
1373 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1374 
1375 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1376 #define FloatPartsN    glue(FloatParts,N)
1377 #define FloatPartsW    glue(FloatParts,W)
1378 
1379 #define N 64
1380 #define W 128
1381 
1382 #include "softfloat-parts-addsub.c.inc"
1383 #include "softfloat-parts.c.inc"
1384 
1385 #undef  N
1386 #undef  W
1387 #define N 128
1388 #define W 256
1389 
1390 #include "softfloat-parts-addsub.c.inc"
1391 #include "softfloat-parts.c.inc"
1392 
1393 #undef  N
1394 #undef  W
1395 #define N            256
1396 
1397 #include "softfloat-parts-addsub.c.inc"
1398 
1399 #undef  N
1400 #undef  W
1401 #undef  partsN
1402 #undef  FloatPartsN
1403 #undef  FloatPartsW
1404 
1405 /*
1406  * Pack/unpack routines with a specific FloatFmt.
1407  */
1408 
1409 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1410                                       float_status *s, const FloatFmt *params)
1411 {
1412     float16_unpack_raw(p, f);
1413     parts_canonicalize(p, s, params);
1414 }
1415 
1416 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1417                                      float_status *s)
1418 {
1419     float16a_unpack_canonical(p, f, s, &float16_params);
1420 }
1421 
1422 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1423                                       float_status *s)
1424 {
1425     bfloat16_unpack_raw(p, f);
1426     parts_canonicalize(p, s, &bfloat16_params);
1427 }
1428 
1429 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1430                                              float_status *s,
1431                                              const FloatFmt *params)
1432 {
1433     parts_uncanon(p, s, params);
1434     return float16_pack_raw(p);
1435 }
1436 
1437 static float16 float16_round_pack_canonical(FloatParts64 *p,
1438                                             float_status *s)
1439 {
1440     return float16a_round_pack_canonical(p, s, &float16_params);
1441 }
1442 
1443 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1444                                               float_status *s)
1445 {
1446     parts_uncanon(p, s, &bfloat16_params);
1447     return bfloat16_pack_raw(p);
1448 }
1449 
1450 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1451                                      float_status *s)
1452 {
1453     float32_unpack_raw(p, f);
1454     parts_canonicalize(p, s, &float32_params);
1455 }
1456 
1457 static float32 float32_round_pack_canonical(FloatParts64 *p,
1458                                             float_status *s)
1459 {
1460     parts_uncanon(p, s, &float32_params);
1461     return float32_pack_raw(p);
1462 }
1463 
1464 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1465                                      float_status *s)
1466 {
1467     float64_unpack_raw(p, f);
1468     parts_canonicalize(p, s, &float64_params);
1469 }
1470 
1471 static float64 float64_round_pack_canonical(FloatParts64 *p,
1472                                             float_status *s)
1473 {
1474     parts_uncanon(p, s, &float64_params);
1475     return float64_pack_raw(p);
1476 }
1477 
1478 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1479                                       float_status *s)
1480 {
1481     float128_unpack_raw(p, f);
1482     parts_canonicalize(p, s, &float128_params);
1483 }
1484 
1485 static float128 float128_round_pack_canonical(FloatParts128 *p,
1486                                               float_status *s)
1487 {
1488     parts_uncanon(p, s, &float128_params);
1489     return float128_pack_raw(p);
1490 }
1491 
1492 /*
1493  * Addition and subtraction
1494  */
1495 
1496 static float16 QEMU_FLATTEN
1497 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1498 {
1499     FloatParts64 pa, pb, *pr;
1500 
1501     float16_unpack_canonical(&pa, a, status);
1502     float16_unpack_canonical(&pb, b, status);
1503     pr = parts_addsub(&pa, &pb, status, subtract);
1504 
1505     return float16_round_pack_canonical(pr, status);
1506 }
1507 
1508 float16 float16_add(float16 a, float16 b, float_status *status)
1509 {
1510     return float16_addsub(a, b, status, false);
1511 }
1512 
1513 float16 float16_sub(float16 a, float16 b, float_status *status)
1514 {
1515     return float16_addsub(a, b, status, true);
1516 }
1517 
1518 static float32 QEMU_SOFTFLOAT_ATTR
1519 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1520 {
1521     FloatParts64 pa, pb, *pr;
1522 
1523     float32_unpack_canonical(&pa, a, status);
1524     float32_unpack_canonical(&pb, b, status);
1525     pr = parts_addsub(&pa, &pb, status, subtract);
1526 
1527     return float32_round_pack_canonical(pr, status);
1528 }
1529 
1530 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1531 {
1532     return soft_f32_addsub(a, b, status, false);
1533 }
1534 
1535 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1536 {
1537     return soft_f32_addsub(a, b, status, true);
1538 }
1539 
1540 static float64 QEMU_SOFTFLOAT_ATTR
1541 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1542 {
1543     FloatParts64 pa, pb, *pr;
1544 
1545     float64_unpack_canonical(&pa, a, status);
1546     float64_unpack_canonical(&pb, b, status);
1547     pr = parts_addsub(&pa, &pb, status, subtract);
1548 
1549     return float64_round_pack_canonical(pr, status);
1550 }
1551 
1552 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1553 {
1554     return soft_f64_addsub(a, b, status, false);
1555 }
1556 
1557 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1558 {
1559     return soft_f64_addsub(a, b, status, true);
1560 }
1561 
1562 static float hard_f32_add(float a, float b)
1563 {
1564     return a + b;
1565 }
1566 
1567 static float hard_f32_sub(float a, float b)
1568 {
1569     return a - b;
1570 }
1571 
1572 static double hard_f64_add(double a, double b)
1573 {
1574     return a + b;
1575 }
1576 
1577 static double hard_f64_sub(double a, double b)
1578 {
1579     return a - b;
1580 }
1581 
1582 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1583 {
1584     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1585         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1586     }
1587     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1588 }
1589 
1590 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1591 {
1592     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1593         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1594     } else {
1595         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1596     }
1597 }
1598 
1599 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1600                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1601 {
1602     return float32_gen2(a, b, s, hard, soft,
1603                         f32_is_zon2, f32_addsubmul_post);
1604 }
1605 
1606 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1607                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1608 {
1609     return float64_gen2(a, b, s, hard, soft,
1610                         f64_is_zon2, f64_addsubmul_post);
1611 }
1612 
1613 float32 QEMU_FLATTEN
1614 float32_add(float32 a, float32 b, float_status *s)
1615 {
1616     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1617 }
1618 
1619 float32 QEMU_FLATTEN
1620 float32_sub(float32 a, float32 b, float_status *s)
1621 {
1622     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1623 }
1624 
1625 float64 QEMU_FLATTEN
1626 float64_add(float64 a, float64 b, float_status *s)
1627 {
1628     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1629 }
1630 
1631 float64 QEMU_FLATTEN
1632 float64_sub(float64 a, float64 b, float_status *s)
1633 {
1634     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1635 }
1636 
1637 static bfloat16 QEMU_FLATTEN
1638 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1639 {
1640     FloatParts64 pa, pb, *pr;
1641 
1642     bfloat16_unpack_canonical(&pa, a, status);
1643     bfloat16_unpack_canonical(&pb, b, status);
1644     pr = parts_addsub(&pa, &pb, status, subtract);
1645 
1646     return bfloat16_round_pack_canonical(pr, status);
1647 }
1648 
1649 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1650 {
1651     return bfloat16_addsub(a, b, status, false);
1652 }
1653 
1654 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1655 {
1656     return bfloat16_addsub(a, b, status, true);
1657 }
1658 
1659 static float128 QEMU_FLATTEN
1660 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1661 {
1662     FloatParts128 pa, pb, *pr;
1663 
1664     float128_unpack_canonical(&pa, a, status);
1665     float128_unpack_canonical(&pb, b, status);
1666     pr = parts_addsub(&pa, &pb, status, subtract);
1667 
1668     return float128_round_pack_canonical(pr, status);
1669 }
1670 
1671 float128 float128_add(float128 a, float128 b, float_status *status)
1672 {
1673     return float128_addsub(a, b, status, false);
1674 }
1675 
1676 float128 float128_sub(float128 a, float128 b, float_status *status)
1677 {
1678     return float128_addsub(a, b, status, true);
1679 }
1680 
1681 /*
1682  * Multiplication
1683  */
1684 
1685 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1686 {
1687     FloatParts64 pa, pb, *pr;
1688 
1689     float16_unpack_canonical(&pa, a, status);
1690     float16_unpack_canonical(&pb, b, status);
1691     pr = parts_mul(&pa, &pb, status);
1692 
1693     return float16_round_pack_canonical(pr, status);
1694 }
1695 
1696 static float32 QEMU_SOFTFLOAT_ATTR
1697 soft_f32_mul(float32 a, float32 b, float_status *status)
1698 {
1699     FloatParts64 pa, pb, *pr;
1700 
1701     float32_unpack_canonical(&pa, a, status);
1702     float32_unpack_canonical(&pb, b, status);
1703     pr = parts_mul(&pa, &pb, status);
1704 
1705     return float32_round_pack_canonical(pr, status);
1706 }
1707 
1708 static float64 QEMU_SOFTFLOAT_ATTR
1709 soft_f64_mul(float64 a, float64 b, float_status *status)
1710 {
1711     FloatParts64 pa, pb, *pr;
1712 
1713     float64_unpack_canonical(&pa, a, status);
1714     float64_unpack_canonical(&pb, b, status);
1715     pr = parts_mul(&pa, &pb, status);
1716 
1717     return float64_round_pack_canonical(pr, status);
1718 }
1719 
1720 static float hard_f32_mul(float a, float b)
1721 {
1722     return a * b;
1723 }
1724 
1725 static double hard_f64_mul(double a, double b)
1726 {
1727     return a * b;
1728 }
1729 
1730 float32 QEMU_FLATTEN
1731 float32_mul(float32 a, float32 b, float_status *s)
1732 {
1733     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1734                         f32_is_zon2, f32_addsubmul_post);
1735 }
1736 
1737 float64 QEMU_FLATTEN
1738 float64_mul(float64 a, float64 b, float_status *s)
1739 {
1740     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1741                         f64_is_zon2, f64_addsubmul_post);
1742 }
1743 
1744 bfloat16 QEMU_FLATTEN
1745 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1746 {
1747     FloatParts64 pa, pb, *pr;
1748 
1749     bfloat16_unpack_canonical(&pa, a, status);
1750     bfloat16_unpack_canonical(&pb, b, status);
1751     pr = parts_mul(&pa, &pb, status);
1752 
1753     return bfloat16_round_pack_canonical(pr, status);
1754 }
1755 
1756 float128 QEMU_FLATTEN
1757 float128_mul(float128 a, float128 b, float_status *status)
1758 {
1759     FloatParts128 pa, pb, *pr;
1760 
1761     float128_unpack_canonical(&pa, a, status);
1762     float128_unpack_canonical(&pb, b, status);
1763     pr = parts_mul(&pa, &pb, status);
1764 
1765     return float128_round_pack_canonical(pr, status);
1766 }
1767 
1768 /*
1769  * Fused multiply-add
1770  */
1771 
1772 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1773                                     int flags, float_status *status)
1774 {
1775     FloatParts64 pa, pb, pc, *pr;
1776 
1777     float16_unpack_canonical(&pa, a, status);
1778     float16_unpack_canonical(&pb, b, status);
1779     float16_unpack_canonical(&pc, c, status);
1780     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1781 
1782     return float16_round_pack_canonical(pr, status);
1783 }
1784 
1785 static float32 QEMU_SOFTFLOAT_ATTR
1786 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1787                 float_status *status)
1788 {
1789     FloatParts64 pa, pb, pc, *pr;
1790 
1791     float32_unpack_canonical(&pa, a, status);
1792     float32_unpack_canonical(&pb, b, status);
1793     float32_unpack_canonical(&pc, c, status);
1794     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1795 
1796     return float32_round_pack_canonical(pr, status);
1797 }
1798 
1799 static float64 QEMU_SOFTFLOAT_ATTR
1800 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1801                 float_status *status)
1802 {
1803     FloatParts64 pa, pb, pc, *pr;
1804 
1805     float64_unpack_canonical(&pa, a, status);
1806     float64_unpack_canonical(&pb, b, status);
1807     float64_unpack_canonical(&pc, c, status);
1808     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1809 
1810     return float64_round_pack_canonical(pr, status);
1811 }
1812 
1813 static bool force_soft_fma;
1814 
1815 float32 QEMU_FLATTEN
1816 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1817 {
1818     union_float32 ua, ub, uc, ur;
1819 
1820     ua.s = xa;
1821     ub.s = xb;
1822     uc.s = xc;
1823 
1824     if (unlikely(!can_use_fpu(s))) {
1825         goto soft;
1826     }
1827     if (unlikely(flags & float_muladd_halve_result)) {
1828         goto soft;
1829     }
1830 
1831     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1833         goto soft;
1834     }
1835 
1836     if (unlikely(force_soft_fma)) {
1837         goto soft;
1838     }
1839 
1840     /*
1841      * When (a || b) == 0, there's no need to check for under/over flow,
1842      * since we know the addend is (normal || 0) and the product is 0.
1843      */
1844     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1845         union_float32 up;
1846         bool prod_sign;
1847 
1848         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1849         prod_sign ^= !!(flags & float_muladd_negate_product);
1850         up.s = float32_set_sign(float32_zero, prod_sign);
1851 
1852         if (flags & float_muladd_negate_c) {
1853             uc.h = -uc.h;
1854         }
1855         ur.h = up.h + uc.h;
1856     } else {
1857         union_float32 ua_orig = ua;
1858         union_float32 uc_orig = uc;
1859 
1860         if (flags & float_muladd_negate_product) {
1861             ua.h = -ua.h;
1862         }
1863         if (flags & float_muladd_negate_c) {
1864             uc.h = -uc.h;
1865         }
1866 
1867         ur.h = fmaf(ua.h, ub.h, uc.h);
1868 
1869         if (unlikely(f32_is_inf(ur))) {
1870             float_raise(float_flag_overflow, s);
1871         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1872             ua = ua_orig;
1873             uc = uc_orig;
1874             goto soft;
1875         }
1876     }
1877     if (flags & float_muladd_negate_result) {
1878         return float32_chs(ur.s);
1879     }
1880     return ur.s;
1881 
1882  soft:
1883     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1884 }
1885 
1886 float64 QEMU_FLATTEN
1887 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1888 {
1889     union_float64 ua, ub, uc, ur;
1890 
1891     ua.s = xa;
1892     ub.s = xb;
1893     uc.s = xc;
1894 
1895     if (unlikely(!can_use_fpu(s))) {
1896         goto soft;
1897     }
1898     if (unlikely(flags & float_muladd_halve_result)) {
1899         goto soft;
1900     }
1901 
1902     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1903     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1904         goto soft;
1905     }
1906 
1907     if (unlikely(force_soft_fma)) {
1908         goto soft;
1909     }
1910 
1911     /*
1912      * When (a || b) == 0, there's no need to check for under/over flow,
1913      * since we know the addend is (normal || 0) and the product is 0.
1914      */
1915     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1916         union_float64 up;
1917         bool prod_sign;
1918 
1919         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1920         prod_sign ^= !!(flags & float_muladd_negate_product);
1921         up.s = float64_set_sign(float64_zero, prod_sign);
1922 
1923         if (flags & float_muladd_negate_c) {
1924             uc.h = -uc.h;
1925         }
1926         ur.h = up.h + uc.h;
1927     } else {
1928         union_float64 ua_orig = ua;
1929         union_float64 uc_orig = uc;
1930 
1931         if (flags & float_muladd_negate_product) {
1932             ua.h = -ua.h;
1933         }
1934         if (flags & float_muladd_negate_c) {
1935             uc.h = -uc.h;
1936         }
1937 
1938         ur.h = fma(ua.h, ub.h, uc.h);
1939 
1940         if (unlikely(f64_is_inf(ur))) {
1941             float_raise(float_flag_overflow, s);
1942         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1943             ua = ua_orig;
1944             uc = uc_orig;
1945             goto soft;
1946         }
1947     }
1948     if (flags & float_muladd_negate_result) {
1949         return float64_chs(ur.s);
1950     }
1951     return ur.s;
1952 
1953  soft:
1954     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1955 }
1956 
1957 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1958                                       int flags, float_status *status)
1959 {
1960     FloatParts64 pa, pb, pc, *pr;
1961 
1962     bfloat16_unpack_canonical(&pa, a, status);
1963     bfloat16_unpack_canonical(&pb, b, status);
1964     bfloat16_unpack_canonical(&pc, c, status);
1965     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1966 
1967     return bfloat16_round_pack_canonical(pr, status);
1968 }
1969 
1970 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1971                                       int flags, float_status *status)
1972 {
1973     FloatParts128 pa, pb, pc, *pr;
1974 
1975     float128_unpack_canonical(&pa, a, status);
1976     float128_unpack_canonical(&pb, b, status);
1977     float128_unpack_canonical(&pc, c, status);
1978     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1979 
1980     return float128_round_pack_canonical(pr, status);
1981 }
1982 
1983 /*
1984  * Division
1985  */
1986 
1987 float16 float16_div(float16 a, float16 b, float_status *status)
1988 {
1989     FloatParts64 pa, pb, *pr;
1990 
1991     float16_unpack_canonical(&pa, a, status);
1992     float16_unpack_canonical(&pb, b, status);
1993     pr = parts_div(&pa, &pb, status);
1994 
1995     return float16_round_pack_canonical(pr, status);
1996 }
1997 
1998 static float32 QEMU_SOFTFLOAT_ATTR
1999 soft_f32_div(float32 a, float32 b, float_status *status)
2000 {
2001     FloatParts64 pa, pb, *pr;
2002 
2003     float32_unpack_canonical(&pa, a, status);
2004     float32_unpack_canonical(&pb, b, status);
2005     pr = parts_div(&pa, &pb, status);
2006 
2007     return float32_round_pack_canonical(pr, status);
2008 }
2009 
2010 static float64 QEMU_SOFTFLOAT_ATTR
2011 soft_f64_div(float64 a, float64 b, float_status *status)
2012 {
2013     FloatParts64 pa, pb, *pr;
2014 
2015     float64_unpack_canonical(&pa, a, status);
2016     float64_unpack_canonical(&pb, b, status);
2017     pr = parts_div(&pa, &pb, status);
2018 
2019     return float64_round_pack_canonical(pr, status);
2020 }
2021 
2022 static float hard_f32_div(float a, float b)
2023 {
2024     return a / b;
2025 }
2026 
2027 static double hard_f64_div(double a, double b)
2028 {
2029     return a / b;
2030 }
2031 
2032 static bool f32_div_pre(union_float32 a, union_float32 b)
2033 {
2034     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2035         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2036                fpclassify(b.h) == FP_NORMAL;
2037     }
2038     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2039 }
2040 
2041 static bool f64_div_pre(union_float64 a, union_float64 b)
2042 {
2043     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2044         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2045                fpclassify(b.h) == FP_NORMAL;
2046     }
2047     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2048 }
2049 
2050 static bool f32_div_post(union_float32 a, union_float32 b)
2051 {
2052     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2053         return fpclassify(a.h) != FP_ZERO;
2054     }
2055     return !float32_is_zero(a.s);
2056 }
2057 
2058 static bool f64_div_post(union_float64 a, union_float64 b)
2059 {
2060     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2061         return fpclassify(a.h) != FP_ZERO;
2062     }
2063     return !float64_is_zero(a.s);
2064 }
2065 
2066 float32 QEMU_FLATTEN
2067 float32_div(float32 a, float32 b, float_status *s)
2068 {
2069     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2070                         f32_div_pre, f32_div_post);
2071 }
2072 
2073 float64 QEMU_FLATTEN
2074 float64_div(float64 a, float64 b, float_status *s)
2075 {
2076     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2077                         f64_div_pre, f64_div_post);
2078 }
2079 
2080 bfloat16 QEMU_FLATTEN
2081 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2082 {
2083     FloatParts64 pa, pb, *pr;
2084 
2085     bfloat16_unpack_canonical(&pa, a, status);
2086     bfloat16_unpack_canonical(&pb, b, status);
2087     pr = parts_div(&pa, &pb, status);
2088 
2089     return bfloat16_round_pack_canonical(pr, status);
2090 }
2091 
2092 float128 QEMU_FLATTEN
2093 float128_div(float128 a, float128 b, float_status *status)
2094 {
2095     FloatParts128 pa, pb, *pr;
2096 
2097     float128_unpack_canonical(&pa, a, status);
2098     float128_unpack_canonical(&pb, b, status);
2099     pr = parts_div(&pa, &pb, status);
2100 
2101     return float128_round_pack_canonical(pr, status);
2102 }
2103 
2104 /*
2105  * Float to Float conversions
2106  *
2107  * Returns the result of converting one float format to another. The
2108  * conversion is performed according to the IEC/IEEE Standard for
2109  * Binary Floating-Point Arithmetic.
2110  *
2111  * Usually this only needs to take care of raising invalid exceptions
2112  * and handling the conversion on NaNs.
2113  */
2114 
2115 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2116 {
2117     switch (a->cls) {
2118     case float_class_qnan:
2119     case float_class_snan:
2120         /*
2121          * There is no NaN in the destination format.  Raise Invalid
2122          * and return a zero with the sign of the input NaN.
2123          */
2124         float_raise(float_flag_invalid, s);
2125         a->cls = float_class_zero;
2126         break;
2127 
2128     case float_class_inf:
2129         /*
2130          * There is no Inf in the destination format.  Raise Invalid
2131          * and return the maximum normal with the correct sign.
2132          */
2133         float_raise(float_flag_invalid, s);
2134         a->cls = float_class_normal;
2135         a->exp = float16_params_ahp.exp_max;
2136         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2137                                   float16_params_ahp.frac_size + 1);
2138         break;
2139 
2140     case float_class_normal:
2141     case float_class_zero:
2142         break;
2143 
2144     default:
2145         g_assert_not_reached();
2146     }
2147 }
2148 
2149 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2150 {
2151     if (is_nan(a->cls)) {
2152         parts_return_nan(a, s);
2153     }
2154 }
2155 
2156 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2157 {
2158     if (is_nan(a->cls)) {
2159         parts_return_nan(a, s);
2160     }
2161 }
2162 
2163 #define parts_float_to_float(P, S) \
2164     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2165 
2166 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2167                                         float_status *s)
2168 {
2169     a->cls = b->cls;
2170     a->sign = b->sign;
2171     a->exp = b->exp;
2172 
2173     if (a->cls == float_class_normal) {
2174         frac_truncjam(a, b);
2175     } else if (is_nan(a->cls)) {
2176         /* Discard the low bits of the NaN. */
2177         a->frac = b->frac_hi;
2178         parts_return_nan(a, s);
2179     }
2180 }
2181 
2182 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2183                                        float_status *s)
2184 {
2185     a->cls = b->cls;
2186     a->sign = b->sign;
2187     a->exp = b->exp;
2188     frac_widen(a, b);
2189 
2190     if (is_nan(a->cls)) {
2191         parts_return_nan(a, s);
2192     }
2193 }
2194 
2195 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2196 {
2197     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2198     FloatParts64 p;
2199 
2200     float16a_unpack_canonical(&p, a, s, fmt16);
2201     parts_float_to_float(&p, s);
2202     return float32_round_pack_canonical(&p, s);
2203 }
2204 
2205 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2206 {
2207     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2208     FloatParts64 p;
2209 
2210     float16a_unpack_canonical(&p, a, s, fmt16);
2211     parts_float_to_float(&p, s);
2212     return float64_round_pack_canonical(&p, s);
2213 }
2214 
2215 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2216 {
2217     FloatParts64 p;
2218     const FloatFmt *fmt;
2219 
2220     float32_unpack_canonical(&p, a, s);
2221     if (ieee) {
2222         parts_float_to_float(&p, s);
2223         fmt = &float16_params;
2224     } else {
2225         parts_float_to_ahp(&p, s);
2226         fmt = &float16_params_ahp;
2227     }
2228     return float16a_round_pack_canonical(&p, s, fmt);
2229 }
2230 
2231 static float64 QEMU_SOFTFLOAT_ATTR
2232 soft_float32_to_float64(float32 a, float_status *s)
2233 {
2234     FloatParts64 p;
2235 
2236     float32_unpack_canonical(&p, a, s);
2237     parts_float_to_float(&p, s);
2238     return float64_round_pack_canonical(&p, s);
2239 }
2240 
2241 float64 float32_to_float64(float32 a, float_status *s)
2242 {
2243     if (likely(float32_is_normal(a))) {
2244         /* Widening conversion can never produce inexact results.  */
2245         union_float32 uf;
2246         union_float64 ud;
2247         uf.s = a;
2248         ud.h = uf.h;
2249         return ud.s;
2250     } else if (float32_is_zero(a)) {
2251         return float64_set_sign(float64_zero, float32_is_neg(a));
2252     } else {
2253         return soft_float32_to_float64(a, s);
2254     }
2255 }
2256 
2257 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2258 {
2259     FloatParts64 p;
2260     const FloatFmt *fmt;
2261 
2262     float64_unpack_canonical(&p, a, s);
2263     if (ieee) {
2264         parts_float_to_float(&p, s);
2265         fmt = &float16_params;
2266     } else {
2267         parts_float_to_ahp(&p, s);
2268         fmt = &float16_params_ahp;
2269     }
2270     return float16a_round_pack_canonical(&p, s, fmt);
2271 }
2272 
2273 float32 float64_to_float32(float64 a, float_status *s)
2274 {
2275     FloatParts64 p;
2276 
2277     float64_unpack_canonical(&p, a, s);
2278     parts_float_to_float(&p, s);
2279     return float32_round_pack_canonical(&p, s);
2280 }
2281 
2282 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2283 {
2284     FloatParts64 p;
2285 
2286     bfloat16_unpack_canonical(&p, a, s);
2287     parts_float_to_float(&p, s);
2288     return float32_round_pack_canonical(&p, s);
2289 }
2290 
2291 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2292 {
2293     FloatParts64 p;
2294 
2295     bfloat16_unpack_canonical(&p, a, s);
2296     parts_float_to_float(&p, s);
2297     return float64_round_pack_canonical(&p, s);
2298 }
2299 
2300 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2301 {
2302     FloatParts64 p;
2303 
2304     float32_unpack_canonical(&p, a, s);
2305     parts_float_to_float(&p, s);
2306     return bfloat16_round_pack_canonical(&p, s);
2307 }
2308 
2309 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2310 {
2311     FloatParts64 p;
2312 
2313     float64_unpack_canonical(&p, a, s);
2314     parts_float_to_float(&p, s);
2315     return bfloat16_round_pack_canonical(&p, s);
2316 }
2317 
2318 float32 float128_to_float32(float128 a, float_status *s)
2319 {
2320     FloatParts64 p64;
2321     FloatParts128 p128;
2322 
2323     float128_unpack_canonical(&p128, a, s);
2324     parts_float_to_float_narrow(&p64, &p128, s);
2325     return float32_round_pack_canonical(&p64, s);
2326 }
2327 
2328 float64 float128_to_float64(float128 a, float_status *s)
2329 {
2330     FloatParts64 p64;
2331     FloatParts128 p128;
2332 
2333     float128_unpack_canonical(&p128, a, s);
2334     parts_float_to_float_narrow(&p64, &p128, s);
2335     return float64_round_pack_canonical(&p64, s);
2336 }
2337 
2338 float128 float32_to_float128(float32 a, float_status *s)
2339 {
2340     FloatParts64 p64;
2341     FloatParts128 p128;
2342 
2343     float32_unpack_canonical(&p64, a, s);
2344     parts_float_to_float_widen(&p128, &p64, s);
2345     return float128_round_pack_canonical(&p128, s);
2346 }
2347 
2348 float128 float64_to_float128(float64 a, float_status *s)
2349 {
2350     FloatParts64 p64;
2351     FloatParts128 p128;
2352 
2353     float64_unpack_canonical(&p64, a, s);
2354     parts_float_to_float_widen(&p128, &p64, s);
2355     return float128_round_pack_canonical(&p128, s);
2356 }
2357 
2358 /*
2359  * Round to integral value
2360  */
2361 
2362 float16 float16_round_to_int(float16 a, float_status *s)
2363 {
2364     FloatParts64 p;
2365 
2366     float16_unpack_canonical(&p, a, s);
2367     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2368     return float16_round_pack_canonical(&p, s);
2369 }
2370 
2371 float32 float32_round_to_int(float32 a, float_status *s)
2372 {
2373     FloatParts64 p;
2374 
2375     float32_unpack_canonical(&p, a, s);
2376     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2377     return float32_round_pack_canonical(&p, s);
2378 }
2379 
2380 float64 float64_round_to_int(float64 a, float_status *s)
2381 {
2382     FloatParts64 p;
2383 
2384     float64_unpack_canonical(&p, a, s);
2385     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2386     return float64_round_pack_canonical(&p, s);
2387 }
2388 
2389 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2390 {
2391     FloatParts64 p;
2392 
2393     bfloat16_unpack_canonical(&p, a, s);
2394     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2395     return bfloat16_round_pack_canonical(&p, s);
2396 }
2397 
2398 float128 float128_round_to_int(float128 a, float_status *s)
2399 {
2400     FloatParts128 p;
2401 
2402     float128_unpack_canonical(&p, a, s);
2403     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2404     return float128_round_pack_canonical(&p, s);
2405 }
2406 
2407 /*
2408  * Floating-point to signed integer conversions
2409  */
2410 
2411 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2412                               float_status *s)
2413 {
2414     FloatParts64 p;
2415 
2416     float16_unpack_canonical(&p, a, s);
2417     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2418 }
2419 
2420 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2421                                 float_status *s)
2422 {
2423     FloatParts64 p;
2424 
2425     float16_unpack_canonical(&p, a, s);
2426     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2427 }
2428 
2429 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2430                                 float_status *s)
2431 {
2432     FloatParts64 p;
2433 
2434     float16_unpack_canonical(&p, a, s);
2435     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2436 }
2437 
2438 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2439                                 float_status *s)
2440 {
2441     FloatParts64 p;
2442 
2443     float16_unpack_canonical(&p, a, s);
2444     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2445 }
2446 
2447 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2448                                 float_status *s)
2449 {
2450     FloatParts64 p;
2451 
2452     float32_unpack_canonical(&p, a, s);
2453     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2454 }
2455 
2456 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2457                                 float_status *s)
2458 {
2459     FloatParts64 p;
2460 
2461     float32_unpack_canonical(&p, a, s);
2462     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2463 }
2464 
2465 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2466                                 float_status *s)
2467 {
2468     FloatParts64 p;
2469 
2470     float32_unpack_canonical(&p, a, s);
2471     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2472 }
2473 
2474 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2475                                 float_status *s)
2476 {
2477     FloatParts64 p;
2478 
2479     float64_unpack_canonical(&p, a, s);
2480     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2481 }
2482 
2483 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2484                                 float_status *s)
2485 {
2486     FloatParts64 p;
2487 
2488     float64_unpack_canonical(&p, a, s);
2489     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2490 }
2491 
2492 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2493                                 float_status *s)
2494 {
2495     FloatParts64 p;
2496 
2497     float64_unpack_canonical(&p, a, s);
2498     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2499 }
2500 
2501 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2502                                  float_status *s)
2503 {
2504     FloatParts64 p;
2505 
2506     bfloat16_unpack_canonical(&p, a, s);
2507     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2508 }
2509 
2510 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2511                                  float_status *s)
2512 {
2513     FloatParts64 p;
2514 
2515     bfloat16_unpack_canonical(&p, a, s);
2516     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2517 }
2518 
2519 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2520                                  float_status *s)
2521 {
2522     FloatParts64 p;
2523 
2524     bfloat16_unpack_canonical(&p, a, s);
2525     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2526 }
2527 
2528 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2529                                         int scale, float_status *s)
2530 {
2531     FloatParts128 p;
2532 
2533     float128_unpack_canonical(&p, a, s);
2534     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2535 }
2536 
2537 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2538                                         int scale, float_status *s)
2539 {
2540     FloatParts128 p;
2541 
2542     float128_unpack_canonical(&p, a, s);
2543     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2544 }
2545 
2546 int8_t float16_to_int8(float16 a, float_status *s)
2547 {
2548     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2549 }
2550 
2551 int16_t float16_to_int16(float16 a, float_status *s)
2552 {
2553     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2554 }
2555 
2556 int32_t float16_to_int32(float16 a, float_status *s)
2557 {
2558     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2559 }
2560 
2561 int64_t float16_to_int64(float16 a, float_status *s)
2562 {
2563     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2564 }
2565 
2566 int16_t float32_to_int16(float32 a, float_status *s)
2567 {
2568     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2569 }
2570 
2571 int32_t float32_to_int32(float32 a, float_status *s)
2572 {
2573     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2574 }
2575 
2576 int64_t float32_to_int64(float32 a, float_status *s)
2577 {
2578     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2579 }
2580 
2581 int16_t float64_to_int16(float64 a, float_status *s)
2582 {
2583     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2584 }
2585 
2586 int32_t float64_to_int32(float64 a, float_status *s)
2587 {
2588     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2589 }
2590 
2591 int64_t float64_to_int64(float64 a, float_status *s)
2592 {
2593     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2594 }
2595 
2596 int32_t float128_to_int32(float128 a, float_status *s)
2597 {
2598     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2599 }
2600 
2601 int64_t float128_to_int64(float128 a, float_status *s)
2602 {
2603     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2604 }
2605 
2606 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2607 {
2608     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2609 }
2610 
2611 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2612 {
2613     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2614 }
2615 
2616 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2617 {
2618     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2619 }
2620 
2621 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2622 {
2623     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2624 }
2625 
2626 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2627 {
2628     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2629 }
2630 
2631 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2632 {
2633     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2634 }
2635 
2636 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2637 {
2638     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2639 }
2640 
2641 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2642 {
2643     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2644 }
2645 
2646 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2647 {
2648     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2649 }
2650 
2651 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2652 {
2653     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2654 }
2655 
2656 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2657 {
2658     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2659 }
2660 
2661 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2662 {
2663     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2664 }
2665 
2666 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2667 {
2668     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2669 }
2670 
2671 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2672 {
2673     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2674 }
2675 
2676 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2677 {
2678     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2679 }
2680 
2681 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2682 {
2683     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2684 }
2685 
2686 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2687 {
2688     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2689 }
2690 
2691 /*
2692  * Floating-point to unsigned integer conversions
2693  */
2694 
2695 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2696                                 float_status *s)
2697 {
2698     FloatParts64 p;
2699 
2700     float16_unpack_canonical(&p, a, s);
2701     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2702 }
2703 
2704 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2705                                   float_status *s)
2706 {
2707     FloatParts64 p;
2708 
2709     float16_unpack_canonical(&p, a, s);
2710     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2711 }
2712 
2713 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2714                                   float_status *s)
2715 {
2716     FloatParts64 p;
2717 
2718     float16_unpack_canonical(&p, a, s);
2719     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2720 }
2721 
2722 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2723                                   float_status *s)
2724 {
2725     FloatParts64 p;
2726 
2727     float16_unpack_canonical(&p, a, s);
2728     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2729 }
2730 
2731 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2732                                   float_status *s)
2733 {
2734     FloatParts64 p;
2735 
2736     float32_unpack_canonical(&p, a, s);
2737     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2738 }
2739 
2740 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2741                                   float_status *s)
2742 {
2743     FloatParts64 p;
2744 
2745     float32_unpack_canonical(&p, a, s);
2746     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2747 }
2748 
2749 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2750                                   float_status *s)
2751 {
2752     FloatParts64 p;
2753 
2754     float32_unpack_canonical(&p, a, s);
2755     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2756 }
2757 
2758 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2759                                   float_status *s)
2760 {
2761     FloatParts64 p;
2762 
2763     float64_unpack_canonical(&p, a, s);
2764     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2765 }
2766 
2767 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2768                                   float_status *s)
2769 {
2770     FloatParts64 p;
2771 
2772     float64_unpack_canonical(&p, a, s);
2773     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2774 }
2775 
2776 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2777                                   float_status *s)
2778 {
2779     FloatParts64 p;
2780 
2781     float64_unpack_canonical(&p, a, s);
2782     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2783 }
2784 
2785 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2786                                    int scale, float_status *s)
2787 {
2788     FloatParts64 p;
2789 
2790     bfloat16_unpack_canonical(&p, a, s);
2791     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2792 }
2793 
2794 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2795                                    int scale, float_status *s)
2796 {
2797     FloatParts64 p;
2798 
2799     bfloat16_unpack_canonical(&p, a, s);
2800     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2801 }
2802 
2803 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2804                                    int scale, float_status *s)
2805 {
2806     FloatParts64 p;
2807 
2808     bfloat16_unpack_canonical(&p, a, s);
2809     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2810 }
2811 
2812 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2813                                           int scale, float_status *s)
2814 {
2815     FloatParts128 p;
2816 
2817     float128_unpack_canonical(&p, a, s);
2818     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2819 }
2820 
2821 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2822                                           int scale, float_status *s)
2823 {
2824     FloatParts128 p;
2825 
2826     float128_unpack_canonical(&p, a, s);
2827     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2828 }
2829 
2830 uint8_t float16_to_uint8(float16 a, float_status *s)
2831 {
2832     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2833 }
2834 
2835 uint16_t float16_to_uint16(float16 a, float_status *s)
2836 {
2837     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2838 }
2839 
2840 uint32_t float16_to_uint32(float16 a, float_status *s)
2841 {
2842     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2843 }
2844 
2845 uint64_t float16_to_uint64(float16 a, float_status *s)
2846 {
2847     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2848 }
2849 
2850 uint16_t float32_to_uint16(float32 a, float_status *s)
2851 {
2852     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2853 }
2854 
2855 uint32_t float32_to_uint32(float32 a, float_status *s)
2856 {
2857     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2858 }
2859 
2860 uint64_t float32_to_uint64(float32 a, float_status *s)
2861 {
2862     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2863 }
2864 
2865 uint16_t float64_to_uint16(float64 a, float_status *s)
2866 {
2867     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2868 }
2869 
2870 uint32_t float64_to_uint32(float64 a, float_status *s)
2871 {
2872     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2873 }
2874 
2875 uint64_t float64_to_uint64(float64 a, float_status *s)
2876 {
2877     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2878 }
2879 
2880 uint32_t float128_to_uint32(float128 a, float_status *s)
2881 {
2882     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2883 }
2884 
2885 uint64_t float128_to_uint64(float128 a, float_status *s)
2886 {
2887     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2888 }
2889 
2890 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2891 {
2892     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2893 }
2894 
2895 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2896 {
2897     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2898 }
2899 
2900 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2901 {
2902     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2903 }
2904 
2905 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2906 {
2907     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2908 }
2909 
2910 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2911 {
2912     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2913 }
2914 
2915 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2916 {
2917     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2918 }
2919 
2920 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2921 {
2922     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2923 }
2924 
2925 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2926 {
2927     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2928 }
2929 
2930 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2931 {
2932     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2933 }
2934 
2935 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2936 {
2937     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2938 }
2939 
2940 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2941 {
2942     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2943 }
2944 
2945 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2946 {
2947     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2948 }
2949 
2950 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2951 {
2952     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2953 }
2954 
2955 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2956 {
2957     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2958 }
2959 
2960 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2961 {
2962     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2963 }
2964 
2965 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2966 {
2967     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2968 }
2969 
2970 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2971 {
2972     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2973 }
2974 
2975 /*
2976  * Signed integer to floating-point conversions
2977  */
2978 
2979 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2980 {
2981     FloatParts64 p;
2982 
2983     parts_sint_to_float(&p, a, scale, status);
2984     return float16_round_pack_canonical(&p, status);
2985 }
2986 
2987 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2988 {
2989     return int64_to_float16_scalbn(a, scale, status);
2990 }
2991 
2992 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2993 {
2994     return int64_to_float16_scalbn(a, scale, status);
2995 }
2996 
2997 float16 int64_to_float16(int64_t a, float_status *status)
2998 {
2999     return int64_to_float16_scalbn(a, 0, status);
3000 }
3001 
3002 float16 int32_to_float16(int32_t a, float_status *status)
3003 {
3004     return int64_to_float16_scalbn(a, 0, status);
3005 }
3006 
3007 float16 int16_to_float16(int16_t a, float_status *status)
3008 {
3009     return int64_to_float16_scalbn(a, 0, status);
3010 }
3011 
3012 float16 int8_to_float16(int8_t a, float_status *status)
3013 {
3014     return int64_to_float16_scalbn(a, 0, status);
3015 }
3016 
3017 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3018 {
3019     FloatParts64 p;
3020 
3021     parts64_sint_to_float(&p, a, scale, status);
3022     return float32_round_pack_canonical(&p, status);
3023 }
3024 
3025 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3026 {
3027     return int64_to_float32_scalbn(a, scale, status);
3028 }
3029 
3030 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3031 {
3032     return int64_to_float32_scalbn(a, scale, status);
3033 }
3034 
3035 float32 int64_to_float32(int64_t a, float_status *status)
3036 {
3037     return int64_to_float32_scalbn(a, 0, status);
3038 }
3039 
3040 float32 int32_to_float32(int32_t a, float_status *status)
3041 {
3042     return int64_to_float32_scalbn(a, 0, status);
3043 }
3044 
3045 float32 int16_to_float32(int16_t a, float_status *status)
3046 {
3047     return int64_to_float32_scalbn(a, 0, status);
3048 }
3049 
3050 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3051 {
3052     FloatParts64 p;
3053 
3054     parts_sint_to_float(&p, a, scale, status);
3055     return float64_round_pack_canonical(&p, status);
3056 }
3057 
3058 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3059 {
3060     return int64_to_float64_scalbn(a, scale, status);
3061 }
3062 
3063 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3064 {
3065     return int64_to_float64_scalbn(a, scale, status);
3066 }
3067 
3068 float64 int64_to_float64(int64_t a, float_status *status)
3069 {
3070     return int64_to_float64_scalbn(a, 0, status);
3071 }
3072 
3073 float64 int32_to_float64(int32_t a, float_status *status)
3074 {
3075     return int64_to_float64_scalbn(a, 0, status);
3076 }
3077 
3078 float64 int16_to_float64(int16_t a, float_status *status)
3079 {
3080     return int64_to_float64_scalbn(a, 0, status);
3081 }
3082 
3083 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3084 {
3085     FloatParts64 p;
3086 
3087     parts_sint_to_float(&p, a, scale, status);
3088     return bfloat16_round_pack_canonical(&p, status);
3089 }
3090 
3091 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3092 {
3093     return int64_to_bfloat16_scalbn(a, scale, status);
3094 }
3095 
3096 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3097 {
3098     return int64_to_bfloat16_scalbn(a, scale, status);
3099 }
3100 
3101 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3102 {
3103     return int64_to_bfloat16_scalbn(a, 0, status);
3104 }
3105 
3106 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3107 {
3108     return int64_to_bfloat16_scalbn(a, 0, status);
3109 }
3110 
3111 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3112 {
3113     return int64_to_bfloat16_scalbn(a, 0, status);
3114 }
3115 
3116 float128 int64_to_float128(int64_t a, float_status *status)
3117 {
3118     FloatParts128 p;
3119 
3120     parts_sint_to_float(&p, a, 0, status);
3121     return float128_round_pack_canonical(&p, status);
3122 }
3123 
3124 float128 int32_to_float128(int32_t a, float_status *status)
3125 {
3126     return int64_to_float128(a, status);
3127 }
3128 
3129 /*
3130  * Unsigned Integer to floating-point conversions
3131  */
3132 
3133 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3134 {
3135     FloatParts64 p;
3136 
3137     parts_uint_to_float(&p, a, scale, status);
3138     return float16_round_pack_canonical(&p, status);
3139 }
3140 
3141 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3142 {
3143     return uint64_to_float16_scalbn(a, scale, status);
3144 }
3145 
3146 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3147 {
3148     return uint64_to_float16_scalbn(a, scale, status);
3149 }
3150 
3151 float16 uint64_to_float16(uint64_t a, float_status *status)
3152 {
3153     return uint64_to_float16_scalbn(a, 0, status);
3154 }
3155 
3156 float16 uint32_to_float16(uint32_t a, float_status *status)
3157 {
3158     return uint64_to_float16_scalbn(a, 0, status);
3159 }
3160 
3161 float16 uint16_to_float16(uint16_t a, float_status *status)
3162 {
3163     return uint64_to_float16_scalbn(a, 0, status);
3164 }
3165 
3166 float16 uint8_to_float16(uint8_t a, float_status *status)
3167 {
3168     return uint64_to_float16_scalbn(a, 0, status);
3169 }
3170 
3171 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3172 {
3173     FloatParts64 p;
3174 
3175     parts_uint_to_float(&p, a, scale, status);
3176     return float32_round_pack_canonical(&p, status);
3177 }
3178 
3179 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3180 {
3181     return uint64_to_float32_scalbn(a, scale, status);
3182 }
3183 
3184 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3185 {
3186     return uint64_to_float32_scalbn(a, scale, status);
3187 }
3188 
3189 float32 uint64_to_float32(uint64_t a, float_status *status)
3190 {
3191     return uint64_to_float32_scalbn(a, 0, status);
3192 }
3193 
3194 float32 uint32_to_float32(uint32_t a, float_status *status)
3195 {
3196     return uint64_to_float32_scalbn(a, 0, status);
3197 }
3198 
3199 float32 uint16_to_float32(uint16_t a, float_status *status)
3200 {
3201     return uint64_to_float32_scalbn(a, 0, status);
3202 }
3203 
3204 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3205 {
3206     FloatParts64 p;
3207 
3208     parts_uint_to_float(&p, a, scale, status);
3209     return float64_round_pack_canonical(&p, status);
3210 }
3211 
3212 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3213 {
3214     return uint64_to_float64_scalbn(a, scale, status);
3215 }
3216 
3217 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3218 {
3219     return uint64_to_float64_scalbn(a, scale, status);
3220 }
3221 
3222 float64 uint64_to_float64(uint64_t a, float_status *status)
3223 {
3224     return uint64_to_float64_scalbn(a, 0, status);
3225 }
3226 
3227 float64 uint32_to_float64(uint32_t a, float_status *status)
3228 {
3229     return uint64_to_float64_scalbn(a, 0, status);
3230 }
3231 
3232 float64 uint16_to_float64(uint16_t a, float_status *status)
3233 {
3234     return uint64_to_float64_scalbn(a, 0, status);
3235 }
3236 
3237 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3238 {
3239     FloatParts64 p;
3240 
3241     parts_uint_to_float(&p, a, scale, status);
3242     return bfloat16_round_pack_canonical(&p, status);
3243 }
3244 
3245 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3246 {
3247     return uint64_to_bfloat16_scalbn(a, scale, status);
3248 }
3249 
3250 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3251 {
3252     return uint64_to_bfloat16_scalbn(a, scale, status);
3253 }
3254 
3255 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3256 {
3257     return uint64_to_bfloat16_scalbn(a, 0, status);
3258 }
3259 
3260 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3261 {
3262     return uint64_to_bfloat16_scalbn(a, 0, status);
3263 }
3264 
3265 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3266 {
3267     return uint64_to_bfloat16_scalbn(a, 0, status);
3268 }
3269 
3270 float128 uint64_to_float128(uint64_t a, float_status *status)
3271 {
3272     FloatParts128 p;
3273 
3274     parts_uint_to_float(&p, a, 0, status);
3275     return float128_round_pack_canonical(&p, status);
3276 }
3277 
3278 /*
3279  * Minimum and maximum
3280  */
3281 
3282 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3283 {
3284     FloatParts64 pa, pb, *pr;
3285 
3286     float16_unpack_canonical(&pa, a, s);
3287     float16_unpack_canonical(&pb, b, s);
3288     pr = parts_minmax(&pa, &pb, s, flags);
3289 
3290     return float16_round_pack_canonical(pr, s);
3291 }
3292 
3293 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3294                                 float_status *s, int flags)
3295 {
3296     FloatParts64 pa, pb, *pr;
3297 
3298     bfloat16_unpack_canonical(&pa, a, s);
3299     bfloat16_unpack_canonical(&pb, b, s);
3300     pr = parts_minmax(&pa, &pb, s, flags);
3301 
3302     return bfloat16_round_pack_canonical(pr, s);
3303 }
3304 
3305 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3306 {
3307     FloatParts64 pa, pb, *pr;
3308 
3309     float32_unpack_canonical(&pa, a, s);
3310     float32_unpack_canonical(&pb, b, s);
3311     pr = parts_minmax(&pa, &pb, s, flags);
3312 
3313     return float32_round_pack_canonical(pr, s);
3314 }
3315 
3316 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3317 {
3318     FloatParts64 pa, pb, *pr;
3319 
3320     float64_unpack_canonical(&pa, a, s);
3321     float64_unpack_canonical(&pb, b, s);
3322     pr = parts_minmax(&pa, &pb, s, flags);
3323 
3324     return float64_round_pack_canonical(pr, s);
3325 }
3326 
3327 static float128 float128_minmax(float128 a, float128 b,
3328                                 float_status *s, int flags)
3329 {
3330     FloatParts128 pa, pb, *pr;
3331 
3332     float128_unpack_canonical(&pa, a, s);
3333     float128_unpack_canonical(&pb, b, s);
3334     pr = parts_minmax(&pa, &pb, s, flags);
3335 
3336     return float128_round_pack_canonical(pr, s);
3337 }
3338 
3339 #define MINMAX_1(type, name, flags) \
3340     type type##_##name(type a, type b, float_status *s) \
3341     { return type##_minmax(a, b, s, flags); }
3342 
3343 #define MINMAX_2(type) \
3344     MINMAX_1(type, max, 0)                                      \
3345     MINMAX_1(type, maxnum, minmax_isnum)                        \
3346     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3347     MINMAX_1(type, min, minmax_ismin)                           \
3348     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3349     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3350 
3351 MINMAX_2(float16)
3352 MINMAX_2(bfloat16)
3353 MINMAX_2(float32)
3354 MINMAX_2(float64)
3355 MINMAX_2(float128)
3356 
3357 #undef MINMAX_1
3358 #undef MINMAX_2
3359 
3360 /* Floating point compare */
3361 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3362                                     float_status *s)
3363 {
3364     if (is_nan(a.cls) || is_nan(b.cls)) {
3365         if (!is_quiet ||
3366             a.cls == float_class_snan ||
3367             b.cls == float_class_snan) {
3368             float_raise(float_flag_invalid, s);
3369         }
3370         return float_relation_unordered;
3371     }
3372 
3373     if (a.cls == float_class_zero) {
3374         if (b.cls == float_class_zero) {
3375             return float_relation_equal;
3376         }
3377         return b.sign ? float_relation_greater : float_relation_less;
3378     } else if (b.cls == float_class_zero) {
3379         return a.sign ? float_relation_less : float_relation_greater;
3380     }
3381 
3382     /* The only really important thing about infinity is its sign. If
3383      * both are infinities the sign marks the smallest of the two.
3384      */
3385     if (a.cls == float_class_inf) {
3386         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3387             return float_relation_equal;
3388         }
3389         return a.sign ? float_relation_less : float_relation_greater;
3390     } else if (b.cls == float_class_inf) {
3391         return b.sign ? float_relation_greater : float_relation_less;
3392     }
3393 
3394     if (a.sign != b.sign) {
3395         return a.sign ? float_relation_less : float_relation_greater;
3396     }
3397 
3398     if (a.exp == b.exp) {
3399         if (a.frac == b.frac) {
3400             return float_relation_equal;
3401         }
3402         if (a.sign) {
3403             return a.frac > b.frac ?
3404                 float_relation_less : float_relation_greater;
3405         } else {
3406             return a.frac > b.frac ?
3407                 float_relation_greater : float_relation_less;
3408         }
3409     } else {
3410         if (a.sign) {
3411             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3412         } else {
3413             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3414         }
3415     }
3416 }
3417 
3418 #define COMPARE(name, attr, sz)                                         \
3419 static int attr                                                         \
3420 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3421 {                                                                       \
3422     FloatParts64 pa, pb;                                                \
3423     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3424     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3425     return compare_floats(pa, pb, is_quiet, s);                         \
3426 }
3427 
3428 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3429 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3430 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3431 
3432 #undef COMPARE
3433 
3434 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3435 {
3436     return soft_f16_compare(a, b, false, s);
3437 }
3438 
3439 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3440 {
3441     return soft_f16_compare(a, b, true, s);
3442 }
3443 
3444 static FloatRelation QEMU_FLATTEN
3445 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3446 {
3447     union_float32 ua, ub;
3448 
3449     ua.s = xa;
3450     ub.s = xb;
3451 
3452     if (QEMU_NO_HARDFLOAT) {
3453         goto soft;
3454     }
3455 
3456     float32_input_flush2(&ua.s, &ub.s, s);
3457     if (isgreaterequal(ua.h, ub.h)) {
3458         if (isgreater(ua.h, ub.h)) {
3459             return float_relation_greater;
3460         }
3461         return float_relation_equal;
3462     }
3463     if (likely(isless(ua.h, ub.h))) {
3464         return float_relation_less;
3465     }
3466     /* The only condition remaining is unordered.
3467      * Fall through to set flags.
3468      */
3469  soft:
3470     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3471 }
3472 
3473 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3474 {
3475     return f32_compare(a, b, false, s);
3476 }
3477 
3478 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3479 {
3480     return f32_compare(a, b, true, s);
3481 }
3482 
3483 static FloatRelation QEMU_FLATTEN
3484 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3485 {
3486     union_float64 ua, ub;
3487 
3488     ua.s = xa;
3489     ub.s = xb;
3490 
3491     if (QEMU_NO_HARDFLOAT) {
3492         goto soft;
3493     }
3494 
3495     float64_input_flush2(&ua.s, &ub.s, s);
3496     if (isgreaterequal(ua.h, ub.h)) {
3497         if (isgreater(ua.h, ub.h)) {
3498             return float_relation_greater;
3499         }
3500         return float_relation_equal;
3501     }
3502     if (likely(isless(ua.h, ub.h))) {
3503         return float_relation_less;
3504     }
3505     /* The only condition remaining is unordered.
3506      * Fall through to set flags.
3507      */
3508  soft:
3509     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3510 }
3511 
3512 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3513 {
3514     return f64_compare(a, b, false, s);
3515 }
3516 
3517 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3518 {
3519     return f64_compare(a, b, true, s);
3520 }
3521 
3522 static FloatRelation QEMU_FLATTEN
3523 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3524 {
3525     FloatParts64 pa, pb;
3526 
3527     bfloat16_unpack_canonical(&pa, a, s);
3528     bfloat16_unpack_canonical(&pb, b, s);
3529     return compare_floats(pa, pb, is_quiet, s);
3530 }
3531 
3532 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3533 {
3534     return soft_bf16_compare(a, b, false, s);
3535 }
3536 
3537 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3538 {
3539     return soft_bf16_compare(a, b, true, s);
3540 }
3541 
3542 /* Multiply A by 2 raised to the power N.  */
3543 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3544 {
3545     if (unlikely(is_nan(a.cls))) {
3546         parts_return_nan(&a, s);
3547     }
3548     if (a.cls == float_class_normal) {
3549         /* The largest float type (even though not supported by FloatParts64)
3550          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3551          * still allows rounding to infinity, without allowing overflow
3552          * within the int32_t that backs FloatParts64.exp.
3553          */
3554         n = MIN(MAX(n, -0x10000), 0x10000);
3555         a.exp += n;
3556     }
3557     return a;
3558 }
3559 
3560 float16 float16_scalbn(float16 a, int n, float_status *status)
3561 {
3562     FloatParts64 pa, pr;
3563 
3564     float16_unpack_canonical(&pa, a, status);
3565     pr = scalbn_decomposed(pa, n, status);
3566     return float16_round_pack_canonical(&pr, status);
3567 }
3568 
3569 float32 float32_scalbn(float32 a, int n, float_status *status)
3570 {
3571     FloatParts64 pa, pr;
3572 
3573     float32_unpack_canonical(&pa, a, status);
3574     pr = scalbn_decomposed(pa, n, status);
3575     return float32_round_pack_canonical(&pr, status);
3576 }
3577 
3578 float64 float64_scalbn(float64 a, int n, float_status *status)
3579 {
3580     FloatParts64 pa, pr;
3581 
3582     float64_unpack_canonical(&pa, a, status);
3583     pr = scalbn_decomposed(pa, n, status);
3584     return float64_round_pack_canonical(&pr, status);
3585 }
3586 
3587 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3588 {
3589     FloatParts64 pa, pr;
3590 
3591     bfloat16_unpack_canonical(&pa, a, status);
3592     pr = scalbn_decomposed(pa, n, status);
3593     return bfloat16_round_pack_canonical(&pr, status);
3594 }
3595 
3596 /*
3597  * Square Root
3598  *
3599  * The old softfloat code did an approximation step before zeroing in
3600  * on the final result. However for simpleness we just compute the
3601  * square root by iterating down from the implicit bit to enough extra
3602  * bits to ensure we get a correctly rounded result.
3603  *
3604  * This does mean however the calculation is slower than before,
3605  * especially for 64 bit floats.
3606  */
3607 
3608 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3609 {
3610     uint64_t a_frac, r_frac, s_frac;
3611     int bit, last_bit;
3612 
3613     if (is_nan(a.cls)) {
3614         parts_return_nan(&a, s);
3615         return a;
3616     }
3617     if (a.cls == float_class_zero) {
3618         return a;  /* sqrt(+-0) = +-0 */
3619     }
3620     if (a.sign) {
3621         float_raise(float_flag_invalid, s);
3622         parts_default_nan(&a, s);
3623         return a;
3624     }
3625     if (a.cls == float_class_inf) {
3626         return a;  /* sqrt(+inf) = +inf */
3627     }
3628 
3629     assert(a.cls == float_class_normal);
3630 
3631     /* We need two overflow bits at the top. Adding room for that is a
3632      * right shift. If the exponent is odd, we can discard the low bit
3633      * by multiplying the fraction by 2; that's a left shift. Combine
3634      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3635      */
3636     a_frac = a.frac >> (2 - (a.exp & 1));
3637     a.exp >>= 1;
3638 
3639     /* Bit-by-bit computation of sqrt.  */
3640     r_frac = 0;
3641     s_frac = 0;
3642 
3643     /* Iterate from implicit bit down to the 3 extra bits to compute a
3644      * properly rounded result. Remember we've inserted two more bits
3645      * at the top, so these positions are two less.
3646      */
3647     bit = DECOMPOSED_BINARY_POINT - 2;
3648     last_bit = MAX(p->frac_shift - 4, 0);
3649     do {
3650         uint64_t q = 1ULL << bit;
3651         uint64_t t_frac = s_frac + q;
3652         if (t_frac <= a_frac) {
3653             s_frac = t_frac + q;
3654             a_frac -= t_frac;
3655             r_frac += q;
3656         }
3657         a_frac <<= 1;
3658     } while (--bit >= last_bit);
3659 
3660     /* Undo the right shift done above. If there is any remaining
3661      * fraction, the result is inexact. Set the sticky bit.
3662      */
3663     a.frac = (r_frac << 2) + (a_frac != 0);
3664 
3665     return a;
3666 }
3667 
3668 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3669 {
3670     FloatParts64 pa, pr;
3671 
3672     float16_unpack_canonical(&pa, a, status);
3673     pr = sqrt_float(pa, status, &float16_params);
3674     return float16_round_pack_canonical(&pr, status);
3675 }
3676 
3677 static float32 QEMU_SOFTFLOAT_ATTR
3678 soft_f32_sqrt(float32 a, float_status *status)
3679 {
3680     FloatParts64 pa, pr;
3681 
3682     float32_unpack_canonical(&pa, a, status);
3683     pr = sqrt_float(pa, status, &float32_params);
3684     return float32_round_pack_canonical(&pr, status);
3685 }
3686 
3687 static float64 QEMU_SOFTFLOAT_ATTR
3688 soft_f64_sqrt(float64 a, float_status *status)
3689 {
3690     FloatParts64 pa, pr;
3691 
3692     float64_unpack_canonical(&pa, a, status);
3693     pr = sqrt_float(pa, status, &float64_params);
3694     return float64_round_pack_canonical(&pr, status);
3695 }
3696 
3697 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3698 {
3699     union_float32 ua, ur;
3700 
3701     ua.s = xa;
3702     if (unlikely(!can_use_fpu(s))) {
3703         goto soft;
3704     }
3705 
3706     float32_input_flush1(&ua.s, s);
3707     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3708         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3709                        fpclassify(ua.h) == FP_ZERO) ||
3710                      signbit(ua.h))) {
3711             goto soft;
3712         }
3713     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3714                         float32_is_neg(ua.s))) {
3715         goto soft;
3716     }
3717     ur.h = sqrtf(ua.h);
3718     return ur.s;
3719 
3720  soft:
3721     return soft_f32_sqrt(ua.s, s);
3722 }
3723 
3724 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3725 {
3726     union_float64 ua, ur;
3727 
3728     ua.s = xa;
3729     if (unlikely(!can_use_fpu(s))) {
3730         goto soft;
3731     }
3732 
3733     float64_input_flush1(&ua.s, s);
3734     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3735         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3736                        fpclassify(ua.h) == FP_ZERO) ||
3737                      signbit(ua.h))) {
3738             goto soft;
3739         }
3740     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3741                         float64_is_neg(ua.s))) {
3742         goto soft;
3743     }
3744     ur.h = sqrt(ua.h);
3745     return ur.s;
3746 
3747  soft:
3748     return soft_f64_sqrt(ua.s, s);
3749 }
3750 
3751 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3752 {
3753     FloatParts64 pa, pr;
3754 
3755     bfloat16_unpack_canonical(&pa, a, status);
3756     pr = sqrt_float(pa, status, &bfloat16_params);
3757     return bfloat16_round_pack_canonical(&pr, status);
3758 }
3759 
3760 /*----------------------------------------------------------------------------
3761 | The pattern for a default generated NaN.
3762 *----------------------------------------------------------------------------*/
3763 
3764 float16 float16_default_nan(float_status *status)
3765 {
3766     FloatParts64 p;
3767 
3768     parts_default_nan(&p, status);
3769     p.frac >>= float16_params.frac_shift;
3770     return float16_pack_raw(&p);
3771 }
3772 
3773 float32 float32_default_nan(float_status *status)
3774 {
3775     FloatParts64 p;
3776 
3777     parts_default_nan(&p, status);
3778     p.frac >>= float32_params.frac_shift;
3779     return float32_pack_raw(&p);
3780 }
3781 
3782 float64 float64_default_nan(float_status *status)
3783 {
3784     FloatParts64 p;
3785 
3786     parts_default_nan(&p, status);
3787     p.frac >>= float64_params.frac_shift;
3788     return float64_pack_raw(&p);
3789 }
3790 
3791 float128 float128_default_nan(float_status *status)
3792 {
3793     FloatParts128 p;
3794 
3795     parts_default_nan(&p, status);
3796     frac_shr(&p, float128_params.frac_shift);
3797     return float128_pack_raw(&p);
3798 }
3799 
3800 bfloat16 bfloat16_default_nan(float_status *status)
3801 {
3802     FloatParts64 p;
3803 
3804     parts_default_nan(&p, status);
3805     p.frac >>= bfloat16_params.frac_shift;
3806     return bfloat16_pack_raw(&p);
3807 }
3808 
3809 /*----------------------------------------------------------------------------
3810 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3811 *----------------------------------------------------------------------------*/
3812 
3813 float16 float16_silence_nan(float16 a, float_status *status)
3814 {
3815     FloatParts64 p;
3816 
3817     float16_unpack_raw(&p, a);
3818     p.frac <<= float16_params.frac_shift;
3819     parts_silence_nan(&p, status);
3820     p.frac >>= float16_params.frac_shift;
3821     return float16_pack_raw(&p);
3822 }
3823 
3824 float32 float32_silence_nan(float32 a, float_status *status)
3825 {
3826     FloatParts64 p;
3827 
3828     float32_unpack_raw(&p, a);
3829     p.frac <<= float32_params.frac_shift;
3830     parts_silence_nan(&p, status);
3831     p.frac >>= float32_params.frac_shift;
3832     return float32_pack_raw(&p);
3833 }
3834 
3835 float64 float64_silence_nan(float64 a, float_status *status)
3836 {
3837     FloatParts64 p;
3838 
3839     float64_unpack_raw(&p, a);
3840     p.frac <<= float64_params.frac_shift;
3841     parts_silence_nan(&p, status);
3842     p.frac >>= float64_params.frac_shift;
3843     return float64_pack_raw(&p);
3844 }
3845 
3846 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3847 {
3848     FloatParts64 p;
3849 
3850     bfloat16_unpack_raw(&p, a);
3851     p.frac <<= bfloat16_params.frac_shift;
3852     parts_silence_nan(&p, status);
3853     p.frac >>= bfloat16_params.frac_shift;
3854     return bfloat16_pack_raw(&p);
3855 }
3856 
3857 float128 float128_silence_nan(float128 a, float_status *status)
3858 {
3859     FloatParts128 p;
3860 
3861     float128_unpack_raw(&p, a);
3862     frac_shl(&p, float128_params.frac_shift);
3863     parts_silence_nan(&p, status);
3864     frac_shr(&p, float128_params.frac_shift);
3865     return float128_pack_raw(&p);
3866 }
3867 
3868 /*----------------------------------------------------------------------------
3869 | If `a' is denormal and we are in flush-to-zero mode then set the
3870 | input-denormal exception and return zero. Otherwise just return the value.
3871 *----------------------------------------------------------------------------*/
3872 
3873 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3874 {
3875     if (p.exp == 0 && p.frac != 0) {
3876         float_raise(float_flag_input_denormal, status);
3877         return true;
3878     }
3879 
3880     return false;
3881 }
3882 
3883 float16 float16_squash_input_denormal(float16 a, float_status *status)
3884 {
3885     if (status->flush_inputs_to_zero) {
3886         FloatParts64 p;
3887 
3888         float16_unpack_raw(&p, a);
3889         if (parts_squash_denormal(p, status)) {
3890             return float16_set_sign(float16_zero, p.sign);
3891         }
3892     }
3893     return a;
3894 }
3895 
3896 float32 float32_squash_input_denormal(float32 a, float_status *status)
3897 {
3898     if (status->flush_inputs_to_zero) {
3899         FloatParts64 p;
3900 
3901         float32_unpack_raw(&p, a);
3902         if (parts_squash_denormal(p, status)) {
3903             return float32_set_sign(float32_zero, p.sign);
3904         }
3905     }
3906     return a;
3907 }
3908 
3909 float64 float64_squash_input_denormal(float64 a, float_status *status)
3910 {
3911     if (status->flush_inputs_to_zero) {
3912         FloatParts64 p;
3913 
3914         float64_unpack_raw(&p, a);
3915         if (parts_squash_denormal(p, status)) {
3916             return float64_set_sign(float64_zero, p.sign);
3917         }
3918     }
3919     return a;
3920 }
3921 
3922 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3923 {
3924     if (status->flush_inputs_to_zero) {
3925         FloatParts64 p;
3926 
3927         bfloat16_unpack_raw(&p, a);
3928         if (parts_squash_denormal(p, status)) {
3929             return bfloat16_set_sign(bfloat16_zero, p.sign);
3930         }
3931     }
3932     return a;
3933 }
3934 
3935 /*----------------------------------------------------------------------------
3936 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3937 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3938 | input.  If `zSign' is 1, the input is negated before being converted to an
3939 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3940 | is simply rounded to an integer, with the inexact exception raised if the
3941 | input cannot be represented exactly as an integer.  However, if the fixed-
3942 | point input is too large, the invalid exception is raised and the largest
3943 | positive or negative integer is returned.
3944 *----------------------------------------------------------------------------*/
3945 
3946 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3947                                  float_status *status)
3948 {
3949     int8_t roundingMode;
3950     bool roundNearestEven;
3951     int8_t roundIncrement, roundBits;
3952     int32_t z;
3953 
3954     roundingMode = status->float_rounding_mode;
3955     roundNearestEven = ( roundingMode == float_round_nearest_even );
3956     switch (roundingMode) {
3957     case float_round_nearest_even:
3958     case float_round_ties_away:
3959         roundIncrement = 0x40;
3960         break;
3961     case float_round_to_zero:
3962         roundIncrement = 0;
3963         break;
3964     case float_round_up:
3965         roundIncrement = zSign ? 0 : 0x7f;
3966         break;
3967     case float_round_down:
3968         roundIncrement = zSign ? 0x7f : 0;
3969         break;
3970     case float_round_to_odd:
3971         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3972         break;
3973     default:
3974         abort();
3975     }
3976     roundBits = absZ & 0x7F;
3977     absZ = ( absZ + roundIncrement )>>7;
3978     if (!(roundBits ^ 0x40) && roundNearestEven) {
3979         absZ &= ~1;
3980     }
3981     z = absZ;
3982     if ( zSign ) z = - z;
3983     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3984         float_raise(float_flag_invalid, status);
3985         return zSign ? INT32_MIN : INT32_MAX;
3986     }
3987     if (roundBits) {
3988         float_raise(float_flag_inexact, status);
3989     }
3990     return z;
3991 
3992 }
3993 
3994 /*----------------------------------------------------------------------------
3995 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3996 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3997 | and returns the properly rounded 64-bit integer corresponding to the input.
3998 | If `zSign' is 1, the input is negated before being converted to an integer.
3999 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4000 | the inexact exception raised if the input cannot be represented exactly as
4001 | an integer.  However, if the fixed-point input is too large, the invalid
4002 | exception is raised and the largest positive or negative integer is
4003 | returned.
4004 *----------------------------------------------------------------------------*/
4005 
4006 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4007                                float_status *status)
4008 {
4009     int8_t roundingMode;
4010     bool roundNearestEven, increment;
4011     int64_t z;
4012 
4013     roundingMode = status->float_rounding_mode;
4014     roundNearestEven = ( roundingMode == float_round_nearest_even );
4015     switch (roundingMode) {
4016     case float_round_nearest_even:
4017     case float_round_ties_away:
4018         increment = ((int64_t) absZ1 < 0);
4019         break;
4020     case float_round_to_zero:
4021         increment = 0;
4022         break;
4023     case float_round_up:
4024         increment = !zSign && absZ1;
4025         break;
4026     case float_round_down:
4027         increment = zSign && absZ1;
4028         break;
4029     case float_round_to_odd:
4030         increment = !(absZ0 & 1) && absZ1;
4031         break;
4032     default:
4033         abort();
4034     }
4035     if ( increment ) {
4036         ++absZ0;
4037         if ( absZ0 == 0 ) goto overflow;
4038         if (!(absZ1 << 1) && roundNearestEven) {
4039             absZ0 &= ~1;
4040         }
4041     }
4042     z = absZ0;
4043     if ( zSign ) z = - z;
4044     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4045  overflow:
4046         float_raise(float_flag_invalid, status);
4047         return zSign ? INT64_MIN : INT64_MAX;
4048     }
4049     if (absZ1) {
4050         float_raise(float_flag_inexact, status);
4051     }
4052     return z;
4053 
4054 }
4055 
4056 /*----------------------------------------------------------------------------
4057 | Normalizes the subnormal single-precision floating-point value represented
4058 | by the denormalized significand `aSig'.  The normalized exponent and
4059 | significand are stored at the locations pointed to by `zExpPtr' and
4060 | `zSigPtr', respectively.
4061 *----------------------------------------------------------------------------*/
4062 
4063 static void
4064  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4065 {
4066     int8_t shiftCount;
4067 
4068     shiftCount = clz32(aSig) - 8;
4069     *zSigPtr = aSig<<shiftCount;
4070     *zExpPtr = 1 - shiftCount;
4071 
4072 }
4073 
4074 /*----------------------------------------------------------------------------
4075 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4076 | and significand `zSig', and returns the proper single-precision floating-
4077 | point value corresponding to the abstract input.  Ordinarily, the abstract
4078 | value is simply rounded and packed into the single-precision format, with
4079 | the inexact exception raised if the abstract input cannot be represented
4080 | exactly.  However, if the abstract value is too large, the overflow and
4081 | inexact exceptions are raised and an infinity or maximal finite value is
4082 | returned.  If the abstract value is too small, the input value is rounded to
4083 | a subnormal number, and the underflow and inexact exceptions are raised if
4084 | the abstract input cannot be represented exactly as a subnormal single-
4085 | precision floating-point number.
4086 |     The input significand `zSig' has its binary point between bits 30
4087 | and 29, which is 7 bits to the left of the usual location.  This shifted
4088 | significand must be normalized or smaller.  If `zSig' is not normalized,
4089 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4090 | and it must not require rounding.  In the usual case that `zSig' is
4091 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4092 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4093 | Binary Floating-Point Arithmetic.
4094 *----------------------------------------------------------------------------*/
4095 
4096 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4097                                    float_status *status)
4098 {
4099     int8_t roundingMode;
4100     bool roundNearestEven;
4101     int8_t roundIncrement, roundBits;
4102     bool isTiny;
4103 
4104     roundingMode = status->float_rounding_mode;
4105     roundNearestEven = ( roundingMode == float_round_nearest_even );
4106     switch (roundingMode) {
4107     case float_round_nearest_even:
4108     case float_round_ties_away:
4109         roundIncrement = 0x40;
4110         break;
4111     case float_round_to_zero:
4112         roundIncrement = 0;
4113         break;
4114     case float_round_up:
4115         roundIncrement = zSign ? 0 : 0x7f;
4116         break;
4117     case float_round_down:
4118         roundIncrement = zSign ? 0x7f : 0;
4119         break;
4120     case float_round_to_odd:
4121         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4122         break;
4123     default:
4124         abort();
4125         break;
4126     }
4127     roundBits = zSig & 0x7F;
4128     if ( 0xFD <= (uint16_t) zExp ) {
4129         if (    ( 0xFD < zExp )
4130              || (    ( zExp == 0xFD )
4131                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4132            ) {
4133             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4134                                    roundIncrement != 0;
4135             float_raise(float_flag_overflow | float_flag_inexact, status);
4136             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4137         }
4138         if ( zExp < 0 ) {
4139             if (status->flush_to_zero) {
4140                 float_raise(float_flag_output_denormal, status);
4141                 return packFloat32(zSign, 0, 0);
4142             }
4143             isTiny = status->tininess_before_rounding
4144                   || (zExp < -1)
4145                   || (zSig + roundIncrement < 0x80000000);
4146             shift32RightJamming( zSig, - zExp, &zSig );
4147             zExp = 0;
4148             roundBits = zSig & 0x7F;
4149             if (isTiny && roundBits) {
4150                 float_raise(float_flag_underflow, status);
4151             }
4152             if (roundingMode == float_round_to_odd) {
4153                 /*
4154                  * For round-to-odd case, the roundIncrement depends on
4155                  * zSig which just changed.
4156                  */
4157                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4158             }
4159         }
4160     }
4161     if (roundBits) {
4162         float_raise(float_flag_inexact, status);
4163     }
4164     zSig = ( zSig + roundIncrement )>>7;
4165     if (!(roundBits ^ 0x40) && roundNearestEven) {
4166         zSig &= ~1;
4167     }
4168     if ( zSig == 0 ) zExp = 0;
4169     return packFloat32( zSign, zExp, zSig );
4170 
4171 }
4172 
4173 /*----------------------------------------------------------------------------
4174 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4175 | and significand `zSig', and returns the proper single-precision floating-
4176 | point value corresponding to the abstract input.  This routine is just like
4177 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4178 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4179 | floating-point exponent.
4180 *----------------------------------------------------------------------------*/
4181 
4182 static float32
4183  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4184                               float_status *status)
4185 {
4186     int8_t shiftCount;
4187 
4188     shiftCount = clz32(zSig) - 1;
4189     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4190                                status);
4191 
4192 }
4193 
4194 /*----------------------------------------------------------------------------
4195 | Normalizes the subnormal double-precision floating-point value represented
4196 | by the denormalized significand `aSig'.  The normalized exponent and
4197 | significand are stored at the locations pointed to by `zExpPtr' and
4198 | `zSigPtr', respectively.
4199 *----------------------------------------------------------------------------*/
4200 
4201 static void
4202  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4203 {
4204     int8_t shiftCount;
4205 
4206     shiftCount = clz64(aSig) - 11;
4207     *zSigPtr = aSig<<shiftCount;
4208     *zExpPtr = 1 - shiftCount;
4209 
4210 }
4211 
4212 /*----------------------------------------------------------------------------
4213 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4214 | double-precision floating-point value, returning the result.  After being
4215 | shifted into the proper positions, the three fields are simply added
4216 | together to form the result.  This means that any integer portion of `zSig'
4217 | will be added into the exponent.  Since a properly normalized significand
4218 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4219 | than the desired result exponent whenever `zSig' is a complete, normalized
4220 | significand.
4221 *----------------------------------------------------------------------------*/
4222 
4223 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4224 {
4225 
4226     return make_float64(
4227         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4228 
4229 }
4230 
4231 /*----------------------------------------------------------------------------
4232 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4233 | and significand `zSig', and returns the proper double-precision floating-
4234 | point value corresponding to the abstract input.  Ordinarily, the abstract
4235 | value is simply rounded and packed into the double-precision format, with
4236 | the inexact exception raised if the abstract input cannot be represented
4237 | exactly.  However, if the abstract value is too large, the overflow and
4238 | inexact exceptions are raised and an infinity or maximal finite value is
4239 | returned.  If the abstract value is too small, the input value is rounded to
4240 | a subnormal number, and the underflow and inexact exceptions are raised if
4241 | the abstract input cannot be represented exactly as a subnormal double-
4242 | precision floating-point number.
4243 |     The input significand `zSig' has its binary point between bits 62
4244 | and 61, which is 10 bits to the left of the usual location.  This shifted
4245 | significand must be normalized or smaller.  If `zSig' is not normalized,
4246 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4247 | and it must not require rounding.  In the usual case that `zSig' is
4248 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4249 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4250 | Binary Floating-Point Arithmetic.
4251 *----------------------------------------------------------------------------*/
4252 
4253 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4254                                    float_status *status)
4255 {
4256     int8_t roundingMode;
4257     bool roundNearestEven;
4258     int roundIncrement, roundBits;
4259     bool isTiny;
4260 
4261     roundingMode = status->float_rounding_mode;
4262     roundNearestEven = ( roundingMode == float_round_nearest_even );
4263     switch (roundingMode) {
4264     case float_round_nearest_even:
4265     case float_round_ties_away:
4266         roundIncrement = 0x200;
4267         break;
4268     case float_round_to_zero:
4269         roundIncrement = 0;
4270         break;
4271     case float_round_up:
4272         roundIncrement = zSign ? 0 : 0x3ff;
4273         break;
4274     case float_round_down:
4275         roundIncrement = zSign ? 0x3ff : 0;
4276         break;
4277     case float_round_to_odd:
4278         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4279         break;
4280     default:
4281         abort();
4282     }
4283     roundBits = zSig & 0x3FF;
4284     if ( 0x7FD <= (uint16_t) zExp ) {
4285         if (    ( 0x7FD < zExp )
4286              || (    ( zExp == 0x7FD )
4287                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4288            ) {
4289             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4290                                    roundIncrement != 0;
4291             float_raise(float_flag_overflow | float_flag_inexact, status);
4292             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4293         }
4294         if ( zExp < 0 ) {
4295             if (status->flush_to_zero) {
4296                 float_raise(float_flag_output_denormal, status);
4297                 return packFloat64(zSign, 0, 0);
4298             }
4299             isTiny = status->tininess_before_rounding
4300                   || (zExp < -1)
4301                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4302             shift64RightJamming( zSig, - zExp, &zSig );
4303             zExp = 0;
4304             roundBits = zSig & 0x3FF;
4305             if (isTiny && roundBits) {
4306                 float_raise(float_flag_underflow, status);
4307             }
4308             if (roundingMode == float_round_to_odd) {
4309                 /*
4310                  * For round-to-odd case, the roundIncrement depends on
4311                  * zSig which just changed.
4312                  */
4313                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4314             }
4315         }
4316     }
4317     if (roundBits) {
4318         float_raise(float_flag_inexact, status);
4319     }
4320     zSig = ( zSig + roundIncrement )>>10;
4321     if (!(roundBits ^ 0x200) && roundNearestEven) {
4322         zSig &= ~1;
4323     }
4324     if ( zSig == 0 ) zExp = 0;
4325     return packFloat64( zSign, zExp, zSig );
4326 
4327 }
4328 
4329 /*----------------------------------------------------------------------------
4330 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4331 | and significand `zSig', and returns the proper double-precision floating-
4332 | point value corresponding to the abstract input.  This routine is just like
4333 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4334 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4335 | floating-point exponent.
4336 *----------------------------------------------------------------------------*/
4337 
4338 static float64
4339  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4340                               float_status *status)
4341 {
4342     int8_t shiftCount;
4343 
4344     shiftCount = clz64(zSig) - 1;
4345     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4346                                status);
4347 
4348 }
4349 
4350 /*----------------------------------------------------------------------------
4351 | Normalizes the subnormal extended double-precision floating-point value
4352 | represented by the denormalized significand `aSig'.  The normalized exponent
4353 | and significand are stored at the locations pointed to by `zExpPtr' and
4354 | `zSigPtr', respectively.
4355 *----------------------------------------------------------------------------*/
4356 
4357 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4358                                 uint64_t *zSigPtr)
4359 {
4360     int8_t shiftCount;
4361 
4362     shiftCount = clz64(aSig);
4363     *zSigPtr = aSig<<shiftCount;
4364     *zExpPtr = 1 - shiftCount;
4365 }
4366 
4367 /*----------------------------------------------------------------------------
4368 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4369 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4370 | and returns the proper extended double-precision floating-point value
4371 | corresponding to the abstract input.  Ordinarily, the abstract value is
4372 | rounded and packed into the extended double-precision format, with the
4373 | inexact exception raised if the abstract input cannot be represented
4374 | exactly.  However, if the abstract value is too large, the overflow and
4375 | inexact exceptions are raised and an infinity or maximal finite value is
4376 | returned.  If the abstract value is too small, the input value is rounded to
4377 | a subnormal number, and the underflow and inexact exceptions are raised if
4378 | the abstract input cannot be represented exactly as a subnormal extended
4379 | double-precision floating-point number.
4380 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4381 | number of bits as single or double precision, respectively.  Otherwise, the
4382 | result is rounded to the full precision of the extended double-precision
4383 | format.
4384 |     The input significand must be normalized or smaller.  If the input
4385 | significand is not normalized, `zExp' must be 0; in that case, the result
4386 | returned is a subnormal number, and it must not require rounding.  The
4387 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4388 | Floating-Point Arithmetic.
4389 *----------------------------------------------------------------------------*/
4390 
4391 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4392                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4393                               float_status *status)
4394 {
4395     int8_t roundingMode;
4396     bool roundNearestEven, increment, isTiny;
4397     int64_t roundIncrement, roundMask, roundBits;
4398 
4399     roundingMode = status->float_rounding_mode;
4400     roundNearestEven = ( roundingMode == float_round_nearest_even );
4401     if ( roundingPrecision == 80 ) goto precision80;
4402     if ( roundingPrecision == 64 ) {
4403         roundIncrement = UINT64_C(0x0000000000000400);
4404         roundMask = UINT64_C(0x00000000000007FF);
4405     }
4406     else if ( roundingPrecision == 32 ) {
4407         roundIncrement = UINT64_C(0x0000008000000000);
4408         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4409     }
4410     else {
4411         goto precision80;
4412     }
4413     zSig0 |= ( zSig1 != 0 );
4414     switch (roundingMode) {
4415     case float_round_nearest_even:
4416     case float_round_ties_away:
4417         break;
4418     case float_round_to_zero:
4419         roundIncrement = 0;
4420         break;
4421     case float_round_up:
4422         roundIncrement = zSign ? 0 : roundMask;
4423         break;
4424     case float_round_down:
4425         roundIncrement = zSign ? roundMask : 0;
4426         break;
4427     default:
4428         abort();
4429     }
4430     roundBits = zSig0 & roundMask;
4431     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4432         if (    ( 0x7FFE < zExp )
4433              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4434            ) {
4435             goto overflow;
4436         }
4437         if ( zExp <= 0 ) {
4438             if (status->flush_to_zero) {
4439                 float_raise(float_flag_output_denormal, status);
4440                 return packFloatx80(zSign, 0, 0);
4441             }
4442             isTiny = status->tininess_before_rounding
4443                   || (zExp < 0 )
4444                   || (zSig0 <= zSig0 + roundIncrement);
4445             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4446             zExp = 0;
4447             roundBits = zSig0 & roundMask;
4448             if (isTiny && roundBits) {
4449                 float_raise(float_flag_underflow, status);
4450             }
4451             if (roundBits) {
4452                 float_raise(float_flag_inexact, status);
4453             }
4454             zSig0 += roundIncrement;
4455             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4456             roundIncrement = roundMask + 1;
4457             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4458                 roundMask |= roundIncrement;
4459             }
4460             zSig0 &= ~ roundMask;
4461             return packFloatx80( zSign, zExp, zSig0 );
4462         }
4463     }
4464     if (roundBits) {
4465         float_raise(float_flag_inexact, status);
4466     }
4467     zSig0 += roundIncrement;
4468     if ( zSig0 < roundIncrement ) {
4469         ++zExp;
4470         zSig0 = UINT64_C(0x8000000000000000);
4471     }
4472     roundIncrement = roundMask + 1;
4473     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4474         roundMask |= roundIncrement;
4475     }
4476     zSig0 &= ~ roundMask;
4477     if ( zSig0 == 0 ) zExp = 0;
4478     return packFloatx80( zSign, zExp, zSig0 );
4479  precision80:
4480     switch (roundingMode) {
4481     case float_round_nearest_even:
4482     case float_round_ties_away:
4483         increment = ((int64_t)zSig1 < 0);
4484         break;
4485     case float_round_to_zero:
4486         increment = 0;
4487         break;
4488     case float_round_up:
4489         increment = !zSign && zSig1;
4490         break;
4491     case float_round_down:
4492         increment = zSign && zSig1;
4493         break;
4494     default:
4495         abort();
4496     }
4497     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4498         if (    ( 0x7FFE < zExp )
4499              || (    ( zExp == 0x7FFE )
4500                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4501                   && increment
4502                 )
4503            ) {
4504             roundMask = 0;
4505  overflow:
4506             float_raise(float_flag_overflow | float_flag_inexact, status);
4507             if (    ( roundingMode == float_round_to_zero )
4508                  || ( zSign && ( roundingMode == float_round_up ) )
4509                  || ( ! zSign && ( roundingMode == float_round_down ) )
4510                ) {
4511                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4512             }
4513             return packFloatx80(zSign,
4514                                 floatx80_infinity_high,
4515                                 floatx80_infinity_low);
4516         }
4517         if ( zExp <= 0 ) {
4518             isTiny = status->tininess_before_rounding
4519                   || (zExp < 0)
4520                   || !increment
4521                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4522             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4523             zExp = 0;
4524             if (isTiny && zSig1) {
4525                 float_raise(float_flag_underflow, status);
4526             }
4527             if (zSig1) {
4528                 float_raise(float_flag_inexact, status);
4529             }
4530             switch (roundingMode) {
4531             case float_round_nearest_even:
4532             case float_round_ties_away:
4533                 increment = ((int64_t)zSig1 < 0);
4534                 break;
4535             case float_round_to_zero:
4536                 increment = 0;
4537                 break;
4538             case float_round_up:
4539                 increment = !zSign && zSig1;
4540                 break;
4541             case float_round_down:
4542                 increment = zSign && zSig1;
4543                 break;
4544             default:
4545                 abort();
4546             }
4547             if ( increment ) {
4548                 ++zSig0;
4549                 if (!(zSig1 << 1) && roundNearestEven) {
4550                     zSig0 &= ~1;
4551                 }
4552                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4553             }
4554             return packFloatx80( zSign, zExp, zSig0 );
4555         }
4556     }
4557     if (zSig1) {
4558         float_raise(float_flag_inexact, status);
4559     }
4560     if ( increment ) {
4561         ++zSig0;
4562         if ( zSig0 == 0 ) {
4563             ++zExp;
4564             zSig0 = UINT64_C(0x8000000000000000);
4565         }
4566         else {
4567             if (!(zSig1 << 1) && roundNearestEven) {
4568                 zSig0 &= ~1;
4569             }
4570         }
4571     }
4572     else {
4573         if ( zSig0 == 0 ) zExp = 0;
4574     }
4575     return packFloatx80( zSign, zExp, zSig0 );
4576 
4577 }
4578 
4579 /*----------------------------------------------------------------------------
4580 | Takes an abstract floating-point value having sign `zSign', exponent
4581 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4582 | and returns the proper extended double-precision floating-point value
4583 | corresponding to the abstract input.  This routine is just like
4584 | `roundAndPackFloatx80' except that the input significand does not have to be
4585 | normalized.
4586 *----------------------------------------------------------------------------*/
4587 
4588 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4589                                        bool zSign, int32_t zExp,
4590                                        uint64_t zSig0, uint64_t zSig1,
4591                                        float_status *status)
4592 {
4593     int8_t shiftCount;
4594 
4595     if ( zSig0 == 0 ) {
4596         zSig0 = zSig1;
4597         zSig1 = 0;
4598         zExp -= 64;
4599     }
4600     shiftCount = clz64(zSig0);
4601     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4602     zExp -= shiftCount;
4603     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4604                                 zSig0, zSig1, status);
4605 
4606 }
4607 
4608 /*----------------------------------------------------------------------------
4609 | Returns the least-significant 64 fraction bits of the quadruple-precision
4610 | floating-point value `a'.
4611 *----------------------------------------------------------------------------*/
4612 
4613 static inline uint64_t extractFloat128Frac1( float128 a )
4614 {
4615 
4616     return a.low;
4617 
4618 }
4619 
4620 /*----------------------------------------------------------------------------
4621 | Returns the most-significant 48 fraction bits of the quadruple-precision
4622 | floating-point value `a'.
4623 *----------------------------------------------------------------------------*/
4624 
4625 static inline uint64_t extractFloat128Frac0( float128 a )
4626 {
4627 
4628     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4629 
4630 }
4631 
4632 /*----------------------------------------------------------------------------
4633 | Returns the exponent bits of the quadruple-precision floating-point value
4634 | `a'.
4635 *----------------------------------------------------------------------------*/
4636 
4637 static inline int32_t extractFloat128Exp( float128 a )
4638 {
4639 
4640     return ( a.high>>48 ) & 0x7FFF;
4641 
4642 }
4643 
4644 /*----------------------------------------------------------------------------
4645 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4646 *----------------------------------------------------------------------------*/
4647 
4648 static inline bool extractFloat128Sign(float128 a)
4649 {
4650     return a.high >> 63;
4651 }
4652 
4653 /*----------------------------------------------------------------------------
4654 | Normalizes the subnormal quadruple-precision floating-point value
4655 | represented by the denormalized significand formed by the concatenation of
4656 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4657 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4658 | significand are stored at the location pointed to by `zSig0Ptr', and the
4659 | least significant 64 bits of the normalized significand are stored at the
4660 | location pointed to by `zSig1Ptr'.
4661 *----------------------------------------------------------------------------*/
4662 
4663 static void
4664  normalizeFloat128Subnormal(
4665      uint64_t aSig0,
4666      uint64_t aSig1,
4667      int32_t *zExpPtr,
4668      uint64_t *zSig0Ptr,
4669      uint64_t *zSig1Ptr
4670  )
4671 {
4672     int8_t shiftCount;
4673 
4674     if ( aSig0 == 0 ) {
4675         shiftCount = clz64(aSig1) - 15;
4676         if ( shiftCount < 0 ) {
4677             *zSig0Ptr = aSig1>>( - shiftCount );
4678             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4679         }
4680         else {
4681             *zSig0Ptr = aSig1<<shiftCount;
4682             *zSig1Ptr = 0;
4683         }
4684         *zExpPtr = - shiftCount - 63;
4685     }
4686     else {
4687         shiftCount = clz64(aSig0) - 15;
4688         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4689         *zExpPtr = 1 - shiftCount;
4690     }
4691 
4692 }
4693 
4694 /*----------------------------------------------------------------------------
4695 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4696 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4697 | floating-point value, returning the result.  After being shifted into the
4698 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4699 | added together to form the most significant 32 bits of the result.  This
4700 | means that any integer portion of `zSig0' will be added into the exponent.
4701 | Since a properly normalized significand will have an integer portion equal
4702 | to 1, the `zExp' input should be 1 less than the desired result exponent
4703 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4704 | significand.
4705 *----------------------------------------------------------------------------*/
4706 
4707 static inline float128
4708 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4709 {
4710     float128 z;
4711 
4712     z.low = zSig1;
4713     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4714     return z;
4715 }
4716 
4717 /*----------------------------------------------------------------------------
4718 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4719 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4720 | and `zSig2', and returns the proper quadruple-precision floating-point value
4721 | corresponding to the abstract input.  Ordinarily, the abstract value is
4722 | simply rounded and packed into the quadruple-precision format, with the
4723 | inexact exception raised if the abstract input cannot be represented
4724 | exactly.  However, if the abstract value is too large, the overflow and
4725 | inexact exceptions are raised and an infinity or maximal finite value is
4726 | returned.  If the abstract value is too small, the input value is rounded to
4727 | a subnormal number, and the underflow and inexact exceptions are raised if
4728 | the abstract input cannot be represented exactly as a subnormal quadruple-
4729 | precision floating-point number.
4730 |     The input significand must be normalized or smaller.  If the input
4731 | significand is not normalized, `zExp' must be 0; in that case, the result
4732 | returned is a subnormal number, and it must not require rounding.  In the
4733 | usual case that the input significand is normalized, `zExp' must be 1 less
4734 | than the ``true'' floating-point exponent.  The handling of underflow and
4735 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4736 *----------------------------------------------------------------------------*/
4737 
4738 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4739                                      uint64_t zSig0, uint64_t zSig1,
4740                                      uint64_t zSig2, float_status *status)
4741 {
4742     int8_t roundingMode;
4743     bool roundNearestEven, increment, isTiny;
4744 
4745     roundingMode = status->float_rounding_mode;
4746     roundNearestEven = ( roundingMode == float_round_nearest_even );
4747     switch (roundingMode) {
4748     case float_round_nearest_even:
4749     case float_round_ties_away:
4750         increment = ((int64_t)zSig2 < 0);
4751         break;
4752     case float_round_to_zero:
4753         increment = 0;
4754         break;
4755     case float_round_up:
4756         increment = !zSign && zSig2;
4757         break;
4758     case float_round_down:
4759         increment = zSign && zSig2;
4760         break;
4761     case float_round_to_odd:
4762         increment = !(zSig1 & 0x1) && zSig2;
4763         break;
4764     default:
4765         abort();
4766     }
4767     if ( 0x7FFD <= (uint32_t) zExp ) {
4768         if (    ( 0x7FFD < zExp )
4769              || (    ( zExp == 0x7FFD )
4770                   && eq128(
4771                          UINT64_C(0x0001FFFFFFFFFFFF),
4772                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4773                          zSig0,
4774                          zSig1
4775                      )
4776                   && increment
4777                 )
4778            ) {
4779             float_raise(float_flag_overflow | float_flag_inexact, status);
4780             if (    ( roundingMode == float_round_to_zero )
4781                  || ( zSign && ( roundingMode == float_round_up ) )
4782                  || ( ! zSign && ( roundingMode == float_round_down ) )
4783                  || (roundingMode == float_round_to_odd)
4784                ) {
4785                 return
4786                     packFloat128(
4787                         zSign,
4788                         0x7FFE,
4789                         UINT64_C(0x0000FFFFFFFFFFFF),
4790                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4791                     );
4792             }
4793             return packFloat128( zSign, 0x7FFF, 0, 0 );
4794         }
4795         if ( zExp < 0 ) {
4796             if (status->flush_to_zero) {
4797                 float_raise(float_flag_output_denormal, status);
4798                 return packFloat128(zSign, 0, 0, 0);
4799             }
4800             isTiny = status->tininess_before_rounding
4801                   || (zExp < -1)
4802                   || !increment
4803                   || lt128(zSig0, zSig1,
4804                            UINT64_C(0x0001FFFFFFFFFFFF),
4805                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4806             shift128ExtraRightJamming(
4807                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4808             zExp = 0;
4809             if (isTiny && zSig2) {
4810                 float_raise(float_flag_underflow, status);
4811             }
4812             switch (roundingMode) {
4813             case float_round_nearest_even:
4814             case float_round_ties_away:
4815                 increment = ((int64_t)zSig2 < 0);
4816                 break;
4817             case float_round_to_zero:
4818                 increment = 0;
4819                 break;
4820             case float_round_up:
4821                 increment = !zSign && zSig2;
4822                 break;
4823             case float_round_down:
4824                 increment = zSign && zSig2;
4825                 break;
4826             case float_round_to_odd:
4827                 increment = !(zSig1 & 0x1) && zSig2;
4828                 break;
4829             default:
4830                 abort();
4831             }
4832         }
4833     }
4834     if (zSig2) {
4835         float_raise(float_flag_inexact, status);
4836     }
4837     if ( increment ) {
4838         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4839         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4840             zSig1 &= ~1;
4841         }
4842     }
4843     else {
4844         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4845     }
4846     return packFloat128( zSign, zExp, zSig0, zSig1 );
4847 
4848 }
4849 
4850 /*----------------------------------------------------------------------------
4851 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4852 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4853 | returns the proper quadruple-precision floating-point value corresponding
4854 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4855 | except that the input significand has fewer bits and does not have to be
4856 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4857 | point exponent.
4858 *----------------------------------------------------------------------------*/
4859 
4860 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4861                                               uint64_t zSig0, uint64_t zSig1,
4862                                               float_status *status)
4863 {
4864     int8_t shiftCount;
4865     uint64_t zSig2;
4866 
4867     if ( zSig0 == 0 ) {
4868         zSig0 = zSig1;
4869         zSig1 = 0;
4870         zExp -= 64;
4871     }
4872     shiftCount = clz64(zSig0) - 15;
4873     if ( 0 <= shiftCount ) {
4874         zSig2 = 0;
4875         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4876     }
4877     else {
4878         shift128ExtraRightJamming(
4879             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4880     }
4881     zExp -= shiftCount;
4882     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4883 
4884 }
4885 
4886 
4887 /*----------------------------------------------------------------------------
4888 | Returns the result of converting the 32-bit two's complement integer `a'
4889 | to the extended double-precision floating-point format.  The conversion
4890 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4891 | Arithmetic.
4892 *----------------------------------------------------------------------------*/
4893 
4894 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4895 {
4896     bool zSign;
4897     uint32_t absA;
4898     int8_t shiftCount;
4899     uint64_t zSig;
4900 
4901     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4902     zSign = ( a < 0 );
4903     absA = zSign ? - a : a;
4904     shiftCount = clz32(absA) + 32;
4905     zSig = absA;
4906     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4907 
4908 }
4909 
4910 /*----------------------------------------------------------------------------
4911 | Returns the result of converting the 64-bit two's complement integer `a'
4912 | to the extended double-precision floating-point format.  The conversion
4913 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4914 | Arithmetic.
4915 *----------------------------------------------------------------------------*/
4916 
4917 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4918 {
4919     bool zSign;
4920     uint64_t absA;
4921     int8_t shiftCount;
4922 
4923     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4924     zSign = ( a < 0 );
4925     absA = zSign ? - a : a;
4926     shiftCount = clz64(absA);
4927     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4928 
4929 }
4930 
4931 /*----------------------------------------------------------------------------
4932 | Returns the result of converting the single-precision floating-point value
4933 | `a' to the extended double-precision floating-point format.  The conversion
4934 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4935 | Arithmetic.
4936 *----------------------------------------------------------------------------*/
4937 
4938 floatx80 float32_to_floatx80(float32 a, float_status *status)
4939 {
4940     bool aSign;
4941     int aExp;
4942     uint32_t aSig;
4943 
4944     a = float32_squash_input_denormal(a, status);
4945     aSig = extractFloat32Frac( a );
4946     aExp = extractFloat32Exp( a );
4947     aSign = extractFloat32Sign( a );
4948     if ( aExp == 0xFF ) {
4949         if (aSig) {
4950             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4951                                                status);
4952             return floatx80_silence_nan(res, status);
4953         }
4954         return packFloatx80(aSign,
4955                             floatx80_infinity_high,
4956                             floatx80_infinity_low);
4957     }
4958     if ( aExp == 0 ) {
4959         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4960         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4961     }
4962     aSig |= 0x00800000;
4963     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4964 
4965 }
4966 
4967 /*----------------------------------------------------------------------------
4968 | Returns the remainder of the single-precision floating-point value `a'
4969 | with respect to the corresponding value `b'.  The operation is performed
4970 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4971 *----------------------------------------------------------------------------*/
4972 
4973 float32 float32_rem(float32 a, float32 b, float_status *status)
4974 {
4975     bool aSign, zSign;
4976     int aExp, bExp, expDiff;
4977     uint32_t aSig, bSig;
4978     uint32_t q;
4979     uint64_t aSig64, bSig64, q64;
4980     uint32_t alternateASig;
4981     int32_t sigMean;
4982     a = float32_squash_input_denormal(a, status);
4983     b = float32_squash_input_denormal(b, status);
4984 
4985     aSig = extractFloat32Frac( a );
4986     aExp = extractFloat32Exp( a );
4987     aSign = extractFloat32Sign( a );
4988     bSig = extractFloat32Frac( b );
4989     bExp = extractFloat32Exp( b );
4990     if ( aExp == 0xFF ) {
4991         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4992             return propagateFloat32NaN(a, b, status);
4993         }
4994         float_raise(float_flag_invalid, status);
4995         return float32_default_nan(status);
4996     }
4997     if ( bExp == 0xFF ) {
4998         if (bSig) {
4999             return propagateFloat32NaN(a, b, status);
5000         }
5001         return a;
5002     }
5003     if ( bExp == 0 ) {
5004         if ( bSig == 0 ) {
5005             float_raise(float_flag_invalid, status);
5006             return float32_default_nan(status);
5007         }
5008         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5009     }
5010     if ( aExp == 0 ) {
5011         if ( aSig == 0 ) return a;
5012         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5013     }
5014     expDiff = aExp - bExp;
5015     aSig |= 0x00800000;
5016     bSig |= 0x00800000;
5017     if ( expDiff < 32 ) {
5018         aSig <<= 8;
5019         bSig <<= 8;
5020         if ( expDiff < 0 ) {
5021             if ( expDiff < -1 ) return a;
5022             aSig >>= 1;
5023         }
5024         q = ( bSig <= aSig );
5025         if ( q ) aSig -= bSig;
5026         if ( 0 < expDiff ) {
5027             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5028             q >>= 32 - expDiff;
5029             bSig >>= 2;
5030             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5031         }
5032         else {
5033             aSig >>= 2;
5034             bSig >>= 2;
5035         }
5036     }
5037     else {
5038         if ( bSig <= aSig ) aSig -= bSig;
5039         aSig64 = ( (uint64_t) aSig )<<40;
5040         bSig64 = ( (uint64_t) bSig )<<40;
5041         expDiff -= 64;
5042         while ( 0 < expDiff ) {
5043             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5044             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5045             aSig64 = - ( ( bSig * q64 )<<38 );
5046             expDiff -= 62;
5047         }
5048         expDiff += 64;
5049         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5050         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5051         q = q64>>( 64 - expDiff );
5052         bSig <<= 6;
5053         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5054     }
5055     do {
5056         alternateASig = aSig;
5057         ++q;
5058         aSig -= bSig;
5059     } while ( 0 <= (int32_t) aSig );
5060     sigMean = aSig + alternateASig;
5061     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5062         aSig = alternateASig;
5063     }
5064     zSign = ( (int32_t) aSig < 0 );
5065     if ( zSign ) aSig = - aSig;
5066     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5067 }
5068 
5069 
5070 
5071 /*----------------------------------------------------------------------------
5072 | Returns the binary exponential of the single-precision floating-point value
5073 | `a'. The operation is performed according to the IEC/IEEE Standard for
5074 | Binary Floating-Point Arithmetic.
5075 |
5076 | Uses the following identities:
5077 |
5078 | 1. -------------------------------------------------------------------------
5079 |      x    x*ln(2)
5080 |     2  = e
5081 |
5082 | 2. -------------------------------------------------------------------------
5083 |                      2     3     4     5           n
5084 |      x        x     x     x     x     x           x
5085 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5086 |               1!    2!    3!    4!    5!          n!
5087 *----------------------------------------------------------------------------*/
5088 
5089 static const float64 float32_exp2_coefficients[15] =
5090 {
5091     const_float64( 0x3ff0000000000000ll ), /*  1 */
5092     const_float64( 0x3fe0000000000000ll ), /*  2 */
5093     const_float64( 0x3fc5555555555555ll ), /*  3 */
5094     const_float64( 0x3fa5555555555555ll ), /*  4 */
5095     const_float64( 0x3f81111111111111ll ), /*  5 */
5096     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5097     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5098     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5099     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5100     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5101     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5102     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5103     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5104     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5105     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5106 };
5107 
5108 float32 float32_exp2(float32 a, float_status *status)
5109 {
5110     bool aSign;
5111     int aExp;
5112     uint32_t aSig;
5113     float64 r, x, xn;
5114     int i;
5115     a = float32_squash_input_denormal(a, status);
5116 
5117     aSig = extractFloat32Frac( a );
5118     aExp = extractFloat32Exp( a );
5119     aSign = extractFloat32Sign( a );
5120 
5121     if ( aExp == 0xFF) {
5122         if (aSig) {
5123             return propagateFloat32NaN(a, float32_zero, status);
5124         }
5125         return (aSign) ? float32_zero : a;
5126     }
5127     if (aExp == 0) {
5128         if (aSig == 0) return float32_one;
5129     }
5130 
5131     float_raise(float_flag_inexact, status);
5132 
5133     /* ******************************* */
5134     /* using float64 for approximation */
5135     /* ******************************* */
5136     x = float32_to_float64(a, status);
5137     x = float64_mul(x, float64_ln2, status);
5138 
5139     xn = x;
5140     r = float64_one;
5141     for (i = 0 ; i < 15 ; i++) {
5142         float64 f;
5143 
5144         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5145         r = float64_add(r, f, status);
5146 
5147         xn = float64_mul(xn, x, status);
5148     }
5149 
5150     return float64_to_float32(r, status);
5151 }
5152 
5153 /*----------------------------------------------------------------------------
5154 | Returns the binary log of the single-precision floating-point value `a'.
5155 | The operation is performed according to the IEC/IEEE Standard for Binary
5156 | Floating-Point Arithmetic.
5157 *----------------------------------------------------------------------------*/
5158 float32 float32_log2(float32 a, float_status *status)
5159 {
5160     bool aSign, zSign;
5161     int aExp;
5162     uint32_t aSig, zSig, i;
5163 
5164     a = float32_squash_input_denormal(a, status);
5165     aSig = extractFloat32Frac( a );
5166     aExp = extractFloat32Exp( a );
5167     aSign = extractFloat32Sign( a );
5168 
5169     if ( aExp == 0 ) {
5170         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5171         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5172     }
5173     if ( aSign ) {
5174         float_raise(float_flag_invalid, status);
5175         return float32_default_nan(status);
5176     }
5177     if ( aExp == 0xFF ) {
5178         if (aSig) {
5179             return propagateFloat32NaN(a, float32_zero, status);
5180         }
5181         return a;
5182     }
5183 
5184     aExp -= 0x7F;
5185     aSig |= 0x00800000;
5186     zSign = aExp < 0;
5187     zSig = aExp << 23;
5188 
5189     for (i = 1 << 22; i > 0; i >>= 1) {
5190         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5191         if ( aSig & 0x01000000 ) {
5192             aSig >>= 1;
5193             zSig |= i;
5194         }
5195     }
5196 
5197     if ( zSign )
5198         zSig = -zSig;
5199 
5200     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5201 }
5202 
5203 /*----------------------------------------------------------------------------
5204 | Returns the result of converting the double-precision floating-point value
5205 | `a' to the extended double-precision floating-point format.  The conversion
5206 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5207 | Arithmetic.
5208 *----------------------------------------------------------------------------*/
5209 
5210 floatx80 float64_to_floatx80(float64 a, float_status *status)
5211 {
5212     bool aSign;
5213     int aExp;
5214     uint64_t aSig;
5215 
5216     a = float64_squash_input_denormal(a, status);
5217     aSig = extractFloat64Frac( a );
5218     aExp = extractFloat64Exp( a );
5219     aSign = extractFloat64Sign( a );
5220     if ( aExp == 0x7FF ) {
5221         if (aSig) {
5222             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5223                                                status);
5224             return floatx80_silence_nan(res, status);
5225         }
5226         return packFloatx80(aSign,
5227                             floatx80_infinity_high,
5228                             floatx80_infinity_low);
5229     }
5230     if ( aExp == 0 ) {
5231         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5232         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5233     }
5234     return
5235         packFloatx80(
5236             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5237 
5238 }
5239 
5240 /*----------------------------------------------------------------------------
5241 | Returns the remainder of the double-precision floating-point value `a'
5242 | with respect to the corresponding value `b'.  The operation is performed
5243 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5244 *----------------------------------------------------------------------------*/
5245 
5246 float64 float64_rem(float64 a, float64 b, float_status *status)
5247 {
5248     bool aSign, zSign;
5249     int aExp, bExp, expDiff;
5250     uint64_t aSig, bSig;
5251     uint64_t q, alternateASig;
5252     int64_t sigMean;
5253 
5254     a = float64_squash_input_denormal(a, status);
5255     b = float64_squash_input_denormal(b, status);
5256     aSig = extractFloat64Frac( a );
5257     aExp = extractFloat64Exp( a );
5258     aSign = extractFloat64Sign( a );
5259     bSig = extractFloat64Frac( b );
5260     bExp = extractFloat64Exp( b );
5261     if ( aExp == 0x7FF ) {
5262         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5263             return propagateFloat64NaN(a, b, status);
5264         }
5265         float_raise(float_flag_invalid, status);
5266         return float64_default_nan(status);
5267     }
5268     if ( bExp == 0x7FF ) {
5269         if (bSig) {
5270             return propagateFloat64NaN(a, b, status);
5271         }
5272         return a;
5273     }
5274     if ( bExp == 0 ) {
5275         if ( bSig == 0 ) {
5276             float_raise(float_flag_invalid, status);
5277             return float64_default_nan(status);
5278         }
5279         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5280     }
5281     if ( aExp == 0 ) {
5282         if ( aSig == 0 ) return a;
5283         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5284     }
5285     expDiff = aExp - bExp;
5286     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5287     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5288     if ( expDiff < 0 ) {
5289         if ( expDiff < -1 ) return a;
5290         aSig >>= 1;
5291     }
5292     q = ( bSig <= aSig );
5293     if ( q ) aSig -= bSig;
5294     expDiff -= 64;
5295     while ( 0 < expDiff ) {
5296         q = estimateDiv128To64( aSig, 0, bSig );
5297         q = ( 2 < q ) ? q - 2 : 0;
5298         aSig = - ( ( bSig>>2 ) * q );
5299         expDiff -= 62;
5300     }
5301     expDiff += 64;
5302     if ( 0 < expDiff ) {
5303         q = estimateDiv128To64( aSig, 0, bSig );
5304         q = ( 2 < q ) ? q - 2 : 0;
5305         q >>= 64 - expDiff;
5306         bSig >>= 2;
5307         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5308     }
5309     else {
5310         aSig >>= 2;
5311         bSig >>= 2;
5312     }
5313     do {
5314         alternateASig = aSig;
5315         ++q;
5316         aSig -= bSig;
5317     } while ( 0 <= (int64_t) aSig );
5318     sigMean = aSig + alternateASig;
5319     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5320         aSig = alternateASig;
5321     }
5322     zSign = ( (int64_t) aSig < 0 );
5323     if ( zSign ) aSig = - aSig;
5324     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5325 
5326 }
5327 
5328 /*----------------------------------------------------------------------------
5329 | Returns the binary log of the double-precision floating-point value `a'.
5330 | The operation is performed according to the IEC/IEEE Standard for Binary
5331 | Floating-Point Arithmetic.
5332 *----------------------------------------------------------------------------*/
5333 float64 float64_log2(float64 a, float_status *status)
5334 {
5335     bool aSign, zSign;
5336     int aExp;
5337     uint64_t aSig, aSig0, aSig1, zSig, i;
5338     a = float64_squash_input_denormal(a, status);
5339 
5340     aSig = extractFloat64Frac( a );
5341     aExp = extractFloat64Exp( a );
5342     aSign = extractFloat64Sign( a );
5343 
5344     if ( aExp == 0 ) {
5345         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5346         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5347     }
5348     if ( aSign ) {
5349         float_raise(float_flag_invalid, status);
5350         return float64_default_nan(status);
5351     }
5352     if ( aExp == 0x7FF ) {
5353         if (aSig) {
5354             return propagateFloat64NaN(a, float64_zero, status);
5355         }
5356         return a;
5357     }
5358 
5359     aExp -= 0x3FF;
5360     aSig |= UINT64_C(0x0010000000000000);
5361     zSign = aExp < 0;
5362     zSig = (uint64_t)aExp << 52;
5363     for (i = 1LL << 51; i > 0; i >>= 1) {
5364         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5365         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5366         if ( aSig & UINT64_C(0x0020000000000000) ) {
5367             aSig >>= 1;
5368             zSig |= i;
5369         }
5370     }
5371 
5372     if ( zSign )
5373         zSig = -zSig;
5374     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5375 }
5376 
5377 /*----------------------------------------------------------------------------
5378 | Returns the result of converting the extended double-precision floating-
5379 | point value `a' to the 32-bit two's complement integer format.  The
5380 | conversion is performed according to the IEC/IEEE Standard for Binary
5381 | Floating-Point Arithmetic---which means in particular that the conversion
5382 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5383 | largest positive integer is returned.  Otherwise, if the conversion
5384 | overflows, the largest integer with the same sign as `a' is returned.
5385 *----------------------------------------------------------------------------*/
5386 
5387 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5388 {
5389     bool aSign;
5390     int32_t aExp, shiftCount;
5391     uint64_t aSig;
5392 
5393     if (floatx80_invalid_encoding(a)) {
5394         float_raise(float_flag_invalid, status);
5395         return 1 << 31;
5396     }
5397     aSig = extractFloatx80Frac( a );
5398     aExp = extractFloatx80Exp( a );
5399     aSign = extractFloatx80Sign( a );
5400     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5401     shiftCount = 0x4037 - aExp;
5402     if ( shiftCount <= 0 ) shiftCount = 1;
5403     shift64RightJamming( aSig, shiftCount, &aSig );
5404     return roundAndPackInt32(aSign, aSig, status);
5405 
5406 }
5407 
5408 /*----------------------------------------------------------------------------
5409 | Returns the result of converting the extended double-precision floating-
5410 | point value `a' to the 32-bit two's complement integer format.  The
5411 | conversion is performed according to the IEC/IEEE Standard for Binary
5412 | Floating-Point Arithmetic, except that the conversion is always rounded
5413 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5414 | Otherwise, if the conversion overflows, the largest integer with the same
5415 | sign as `a' is returned.
5416 *----------------------------------------------------------------------------*/
5417 
5418 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5419 {
5420     bool aSign;
5421     int32_t aExp, shiftCount;
5422     uint64_t aSig, savedASig;
5423     int32_t z;
5424 
5425     if (floatx80_invalid_encoding(a)) {
5426         float_raise(float_flag_invalid, status);
5427         return 1 << 31;
5428     }
5429     aSig = extractFloatx80Frac( a );
5430     aExp = extractFloatx80Exp( a );
5431     aSign = extractFloatx80Sign( a );
5432     if ( 0x401E < aExp ) {
5433         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5434         goto invalid;
5435     }
5436     else if ( aExp < 0x3FFF ) {
5437         if (aExp || aSig) {
5438             float_raise(float_flag_inexact, status);
5439         }
5440         return 0;
5441     }
5442     shiftCount = 0x403E - aExp;
5443     savedASig = aSig;
5444     aSig >>= shiftCount;
5445     z = aSig;
5446     if ( aSign ) z = - z;
5447     if ( ( z < 0 ) ^ aSign ) {
5448  invalid:
5449         float_raise(float_flag_invalid, status);
5450         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5451     }
5452     if ( ( aSig<<shiftCount ) != savedASig ) {
5453         float_raise(float_flag_inexact, status);
5454     }
5455     return z;
5456 
5457 }
5458 
5459 /*----------------------------------------------------------------------------
5460 | Returns the result of converting the extended double-precision floating-
5461 | point value `a' to the 64-bit two's complement integer format.  The
5462 | conversion is performed according to the IEC/IEEE Standard for Binary
5463 | Floating-Point Arithmetic---which means in particular that the conversion
5464 | is rounded according to the current rounding mode.  If `a' is a NaN,
5465 | the largest positive integer is returned.  Otherwise, if the conversion
5466 | overflows, the largest integer with the same sign as `a' is returned.
5467 *----------------------------------------------------------------------------*/
5468 
5469 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5470 {
5471     bool aSign;
5472     int32_t aExp, shiftCount;
5473     uint64_t aSig, aSigExtra;
5474 
5475     if (floatx80_invalid_encoding(a)) {
5476         float_raise(float_flag_invalid, status);
5477         return 1ULL << 63;
5478     }
5479     aSig = extractFloatx80Frac( a );
5480     aExp = extractFloatx80Exp( a );
5481     aSign = extractFloatx80Sign( a );
5482     shiftCount = 0x403E - aExp;
5483     if ( shiftCount <= 0 ) {
5484         if ( shiftCount ) {
5485             float_raise(float_flag_invalid, status);
5486             if (!aSign || floatx80_is_any_nan(a)) {
5487                 return INT64_MAX;
5488             }
5489             return INT64_MIN;
5490         }
5491         aSigExtra = 0;
5492     }
5493     else {
5494         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5495     }
5496     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5497 
5498 }
5499 
5500 /*----------------------------------------------------------------------------
5501 | Returns the result of converting the extended double-precision floating-
5502 | point value `a' to the 64-bit two's complement integer format.  The
5503 | conversion is performed according to the IEC/IEEE Standard for Binary
5504 | Floating-Point Arithmetic, except that the conversion is always rounded
5505 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5506 | Otherwise, if the conversion overflows, the largest integer with the same
5507 | sign as `a' is returned.
5508 *----------------------------------------------------------------------------*/
5509 
5510 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5511 {
5512     bool aSign;
5513     int32_t aExp, shiftCount;
5514     uint64_t aSig;
5515     int64_t z;
5516 
5517     if (floatx80_invalid_encoding(a)) {
5518         float_raise(float_flag_invalid, status);
5519         return 1ULL << 63;
5520     }
5521     aSig = extractFloatx80Frac( a );
5522     aExp = extractFloatx80Exp( a );
5523     aSign = extractFloatx80Sign( a );
5524     shiftCount = aExp - 0x403E;
5525     if ( 0 <= shiftCount ) {
5526         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5527         if ( ( a.high != 0xC03E ) || aSig ) {
5528             float_raise(float_flag_invalid, status);
5529             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5530                 return INT64_MAX;
5531             }
5532         }
5533         return INT64_MIN;
5534     }
5535     else if ( aExp < 0x3FFF ) {
5536         if (aExp | aSig) {
5537             float_raise(float_flag_inexact, status);
5538         }
5539         return 0;
5540     }
5541     z = aSig>>( - shiftCount );
5542     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5543         float_raise(float_flag_inexact, status);
5544     }
5545     if ( aSign ) z = - z;
5546     return z;
5547 
5548 }
5549 
5550 /*----------------------------------------------------------------------------
5551 | Returns the result of converting the extended double-precision floating-
5552 | point value `a' to the single-precision floating-point format.  The
5553 | conversion is performed according to the IEC/IEEE Standard for Binary
5554 | Floating-Point Arithmetic.
5555 *----------------------------------------------------------------------------*/
5556 
5557 float32 floatx80_to_float32(floatx80 a, float_status *status)
5558 {
5559     bool aSign;
5560     int32_t aExp;
5561     uint64_t aSig;
5562 
5563     if (floatx80_invalid_encoding(a)) {
5564         float_raise(float_flag_invalid, status);
5565         return float32_default_nan(status);
5566     }
5567     aSig = extractFloatx80Frac( a );
5568     aExp = extractFloatx80Exp( a );
5569     aSign = extractFloatx80Sign( a );
5570     if ( aExp == 0x7FFF ) {
5571         if ( (uint64_t) ( aSig<<1 ) ) {
5572             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5573                                              status);
5574             return float32_silence_nan(res, status);
5575         }
5576         return packFloat32( aSign, 0xFF, 0 );
5577     }
5578     shift64RightJamming( aSig, 33, &aSig );
5579     if ( aExp || aSig ) aExp -= 0x3F81;
5580     return roundAndPackFloat32(aSign, aExp, aSig, status);
5581 
5582 }
5583 
5584 /*----------------------------------------------------------------------------
5585 | Returns the result of converting the extended double-precision floating-
5586 | point value `a' to the double-precision floating-point format.  The
5587 | conversion is performed according to the IEC/IEEE Standard for Binary
5588 | Floating-Point Arithmetic.
5589 *----------------------------------------------------------------------------*/
5590 
5591 float64 floatx80_to_float64(floatx80 a, float_status *status)
5592 {
5593     bool aSign;
5594     int32_t aExp;
5595     uint64_t aSig, zSig;
5596 
5597     if (floatx80_invalid_encoding(a)) {
5598         float_raise(float_flag_invalid, status);
5599         return float64_default_nan(status);
5600     }
5601     aSig = extractFloatx80Frac( a );
5602     aExp = extractFloatx80Exp( a );
5603     aSign = extractFloatx80Sign( a );
5604     if ( aExp == 0x7FFF ) {
5605         if ( (uint64_t) ( aSig<<1 ) ) {
5606             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5607                                              status);
5608             return float64_silence_nan(res, status);
5609         }
5610         return packFloat64( aSign, 0x7FF, 0 );
5611     }
5612     shift64RightJamming( aSig, 1, &zSig );
5613     if ( aExp || aSig ) aExp -= 0x3C01;
5614     return roundAndPackFloat64(aSign, aExp, zSig, status);
5615 
5616 }
5617 
5618 /*----------------------------------------------------------------------------
5619 | Returns the result of converting the extended double-precision floating-
5620 | point value `a' to the quadruple-precision floating-point format.  The
5621 | conversion is performed according to the IEC/IEEE Standard for Binary
5622 | Floating-Point Arithmetic.
5623 *----------------------------------------------------------------------------*/
5624 
5625 float128 floatx80_to_float128(floatx80 a, float_status *status)
5626 {
5627     bool aSign;
5628     int aExp;
5629     uint64_t aSig, zSig0, zSig1;
5630 
5631     if (floatx80_invalid_encoding(a)) {
5632         float_raise(float_flag_invalid, status);
5633         return float128_default_nan(status);
5634     }
5635     aSig = extractFloatx80Frac( a );
5636     aExp = extractFloatx80Exp( a );
5637     aSign = extractFloatx80Sign( a );
5638     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5639         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5640                                            status);
5641         return float128_silence_nan(res, status);
5642     }
5643     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5644     return packFloat128( aSign, aExp, zSig0, zSig1 );
5645 
5646 }
5647 
5648 /*----------------------------------------------------------------------------
5649 | Rounds the extended double-precision floating-point value `a'
5650 | to the precision provided by floatx80_rounding_precision and returns the
5651 | result as an extended double-precision floating-point value.
5652 | The operation is performed according to the IEC/IEEE Standard for Binary
5653 | Floating-Point Arithmetic.
5654 *----------------------------------------------------------------------------*/
5655 
5656 floatx80 floatx80_round(floatx80 a, float_status *status)
5657 {
5658     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5659                                 extractFloatx80Sign(a),
5660                                 extractFloatx80Exp(a),
5661                                 extractFloatx80Frac(a), 0, status);
5662 }
5663 
5664 /*----------------------------------------------------------------------------
5665 | Rounds the extended double-precision floating-point value `a' to an integer,
5666 | and returns the result as an extended quadruple-precision floating-point
5667 | value.  The operation is performed according to the IEC/IEEE Standard for
5668 | Binary Floating-Point Arithmetic.
5669 *----------------------------------------------------------------------------*/
5670 
5671 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5672 {
5673     bool aSign;
5674     int32_t aExp;
5675     uint64_t lastBitMask, roundBitsMask;
5676     floatx80 z;
5677 
5678     if (floatx80_invalid_encoding(a)) {
5679         float_raise(float_flag_invalid, status);
5680         return floatx80_default_nan(status);
5681     }
5682     aExp = extractFloatx80Exp( a );
5683     if ( 0x403E <= aExp ) {
5684         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5685             return propagateFloatx80NaN(a, a, status);
5686         }
5687         return a;
5688     }
5689     if ( aExp < 0x3FFF ) {
5690         if (    ( aExp == 0 )
5691              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5692             return a;
5693         }
5694         float_raise(float_flag_inexact, status);
5695         aSign = extractFloatx80Sign( a );
5696         switch (status->float_rounding_mode) {
5697          case float_round_nearest_even:
5698             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5699                ) {
5700                 return
5701                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5702             }
5703             break;
5704         case float_round_ties_away:
5705             if (aExp == 0x3FFE) {
5706                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5707             }
5708             break;
5709          case float_round_down:
5710             return
5711                   aSign ?
5712                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5713                 : packFloatx80( 0, 0, 0 );
5714          case float_round_up:
5715             return
5716                   aSign ? packFloatx80( 1, 0, 0 )
5717                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5718 
5719         case float_round_to_zero:
5720             break;
5721         default:
5722             g_assert_not_reached();
5723         }
5724         return packFloatx80( aSign, 0, 0 );
5725     }
5726     lastBitMask = 1;
5727     lastBitMask <<= 0x403E - aExp;
5728     roundBitsMask = lastBitMask - 1;
5729     z = a;
5730     switch (status->float_rounding_mode) {
5731     case float_round_nearest_even:
5732         z.low += lastBitMask>>1;
5733         if ((z.low & roundBitsMask) == 0) {
5734             z.low &= ~lastBitMask;
5735         }
5736         break;
5737     case float_round_ties_away:
5738         z.low += lastBitMask >> 1;
5739         break;
5740     case float_round_to_zero:
5741         break;
5742     case float_round_up:
5743         if (!extractFloatx80Sign(z)) {
5744             z.low += roundBitsMask;
5745         }
5746         break;
5747     case float_round_down:
5748         if (extractFloatx80Sign(z)) {
5749             z.low += roundBitsMask;
5750         }
5751         break;
5752     default:
5753         abort();
5754     }
5755     z.low &= ~ roundBitsMask;
5756     if ( z.low == 0 ) {
5757         ++z.high;
5758         z.low = UINT64_C(0x8000000000000000);
5759     }
5760     if (z.low != a.low) {
5761         float_raise(float_flag_inexact, status);
5762     }
5763     return z;
5764 
5765 }
5766 
5767 /*----------------------------------------------------------------------------
5768 | Returns the result of adding the absolute values of the extended double-
5769 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5770 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5771 | The addition is performed according to the IEC/IEEE Standard for Binary
5772 | Floating-Point Arithmetic.
5773 *----------------------------------------------------------------------------*/
5774 
5775 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5776                                 float_status *status)
5777 {
5778     int32_t aExp, bExp, zExp;
5779     uint64_t aSig, bSig, zSig0, zSig1;
5780     int32_t expDiff;
5781 
5782     aSig = extractFloatx80Frac( a );
5783     aExp = extractFloatx80Exp( a );
5784     bSig = extractFloatx80Frac( b );
5785     bExp = extractFloatx80Exp( b );
5786     expDiff = aExp - bExp;
5787     if ( 0 < expDiff ) {
5788         if ( aExp == 0x7FFF ) {
5789             if ((uint64_t)(aSig << 1)) {
5790                 return propagateFloatx80NaN(a, b, status);
5791             }
5792             return a;
5793         }
5794         if ( bExp == 0 ) --expDiff;
5795         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5796         zExp = aExp;
5797     }
5798     else if ( expDiff < 0 ) {
5799         if ( bExp == 0x7FFF ) {
5800             if ((uint64_t)(bSig << 1)) {
5801                 return propagateFloatx80NaN(a, b, status);
5802             }
5803             return packFloatx80(zSign,
5804                                 floatx80_infinity_high,
5805                                 floatx80_infinity_low);
5806         }
5807         if ( aExp == 0 ) ++expDiff;
5808         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5809         zExp = bExp;
5810     }
5811     else {
5812         if ( aExp == 0x7FFF ) {
5813             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5814                 return propagateFloatx80NaN(a, b, status);
5815             }
5816             return a;
5817         }
5818         zSig1 = 0;
5819         zSig0 = aSig + bSig;
5820         if ( aExp == 0 ) {
5821             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5822                 /* At least one of the values is a pseudo-denormal,
5823                  * and there is a carry out of the result.  */
5824                 zExp = 1;
5825                 goto shiftRight1;
5826             }
5827             if (zSig0 == 0) {
5828                 return packFloatx80(zSign, 0, 0);
5829             }
5830             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5831             goto roundAndPack;
5832         }
5833         zExp = aExp;
5834         goto shiftRight1;
5835     }
5836     zSig0 = aSig + bSig;
5837     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5838  shiftRight1:
5839     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5840     zSig0 |= UINT64_C(0x8000000000000000);
5841     ++zExp;
5842  roundAndPack:
5843     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5844                                 zSign, zExp, zSig0, zSig1, status);
5845 }
5846 
5847 /*----------------------------------------------------------------------------
5848 | Returns the result of subtracting the absolute values of the extended
5849 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5850 | difference is negated before being returned.  `zSign' is ignored if the
5851 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5852 | Standard for Binary Floating-Point Arithmetic.
5853 *----------------------------------------------------------------------------*/
5854 
5855 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5856                                 float_status *status)
5857 {
5858     int32_t aExp, bExp, zExp;
5859     uint64_t aSig, bSig, zSig0, zSig1;
5860     int32_t expDiff;
5861 
5862     aSig = extractFloatx80Frac( a );
5863     aExp = extractFloatx80Exp( a );
5864     bSig = extractFloatx80Frac( b );
5865     bExp = extractFloatx80Exp( b );
5866     expDiff = aExp - bExp;
5867     if ( 0 < expDiff ) goto aExpBigger;
5868     if ( expDiff < 0 ) goto bExpBigger;
5869     if ( aExp == 0x7FFF ) {
5870         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5871             return propagateFloatx80NaN(a, b, status);
5872         }
5873         float_raise(float_flag_invalid, status);
5874         return floatx80_default_nan(status);
5875     }
5876     if ( aExp == 0 ) {
5877         aExp = 1;
5878         bExp = 1;
5879     }
5880     zSig1 = 0;
5881     if ( bSig < aSig ) goto aBigger;
5882     if ( aSig < bSig ) goto bBigger;
5883     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5884  bExpBigger:
5885     if ( bExp == 0x7FFF ) {
5886         if ((uint64_t)(bSig << 1)) {
5887             return propagateFloatx80NaN(a, b, status);
5888         }
5889         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5890                             floatx80_infinity_low);
5891     }
5892     if ( aExp == 0 ) ++expDiff;
5893     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5894  bBigger:
5895     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5896     zExp = bExp;
5897     zSign ^= 1;
5898     goto normalizeRoundAndPack;
5899  aExpBigger:
5900     if ( aExp == 0x7FFF ) {
5901         if ((uint64_t)(aSig << 1)) {
5902             return propagateFloatx80NaN(a, b, status);
5903         }
5904         return a;
5905     }
5906     if ( bExp == 0 ) --expDiff;
5907     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5908  aBigger:
5909     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5910     zExp = aExp;
5911  normalizeRoundAndPack:
5912     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5913                                          zSign, zExp, zSig0, zSig1, status);
5914 }
5915 
5916 /*----------------------------------------------------------------------------
5917 | Returns the result of adding the extended double-precision floating-point
5918 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5919 | Standard for Binary Floating-Point Arithmetic.
5920 *----------------------------------------------------------------------------*/
5921 
5922 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5923 {
5924     bool aSign, bSign;
5925 
5926     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5927         float_raise(float_flag_invalid, status);
5928         return floatx80_default_nan(status);
5929     }
5930     aSign = extractFloatx80Sign( a );
5931     bSign = extractFloatx80Sign( b );
5932     if ( aSign == bSign ) {
5933         return addFloatx80Sigs(a, b, aSign, status);
5934     }
5935     else {
5936         return subFloatx80Sigs(a, b, aSign, status);
5937     }
5938 
5939 }
5940 
5941 /*----------------------------------------------------------------------------
5942 | Returns the result of subtracting the extended double-precision floating-
5943 | point values `a' and `b'.  The operation is performed according to the
5944 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5945 *----------------------------------------------------------------------------*/
5946 
5947 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5948 {
5949     bool aSign, bSign;
5950 
5951     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5952         float_raise(float_flag_invalid, status);
5953         return floatx80_default_nan(status);
5954     }
5955     aSign = extractFloatx80Sign( a );
5956     bSign = extractFloatx80Sign( b );
5957     if ( aSign == bSign ) {
5958         return subFloatx80Sigs(a, b, aSign, status);
5959     }
5960     else {
5961         return addFloatx80Sigs(a, b, aSign, status);
5962     }
5963 
5964 }
5965 
5966 /*----------------------------------------------------------------------------
5967 | Returns the result of multiplying the extended double-precision floating-
5968 | point values `a' and `b'.  The operation is performed according to the
5969 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5970 *----------------------------------------------------------------------------*/
5971 
5972 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5973 {
5974     bool aSign, bSign, zSign;
5975     int32_t aExp, bExp, zExp;
5976     uint64_t aSig, bSig, zSig0, zSig1;
5977 
5978     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5979         float_raise(float_flag_invalid, status);
5980         return floatx80_default_nan(status);
5981     }
5982     aSig = extractFloatx80Frac( a );
5983     aExp = extractFloatx80Exp( a );
5984     aSign = extractFloatx80Sign( a );
5985     bSig = extractFloatx80Frac( b );
5986     bExp = extractFloatx80Exp( b );
5987     bSign = extractFloatx80Sign( b );
5988     zSign = aSign ^ bSign;
5989     if ( aExp == 0x7FFF ) {
5990         if (    (uint64_t) ( aSig<<1 )
5991              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5992             return propagateFloatx80NaN(a, b, status);
5993         }
5994         if ( ( bExp | bSig ) == 0 ) goto invalid;
5995         return packFloatx80(zSign, floatx80_infinity_high,
5996                                    floatx80_infinity_low);
5997     }
5998     if ( bExp == 0x7FFF ) {
5999         if ((uint64_t)(bSig << 1)) {
6000             return propagateFloatx80NaN(a, b, status);
6001         }
6002         if ( ( aExp | aSig ) == 0 ) {
6003  invalid:
6004             float_raise(float_flag_invalid, status);
6005             return floatx80_default_nan(status);
6006         }
6007         return packFloatx80(zSign, floatx80_infinity_high,
6008                                    floatx80_infinity_low);
6009     }
6010     if ( aExp == 0 ) {
6011         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6012         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6013     }
6014     if ( bExp == 0 ) {
6015         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6016         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6017     }
6018     zExp = aExp + bExp - 0x3FFE;
6019     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6020     if ( 0 < (int64_t) zSig0 ) {
6021         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6022         --zExp;
6023     }
6024     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6025                                 zSign, zExp, zSig0, zSig1, status);
6026 }
6027 
6028 /*----------------------------------------------------------------------------
6029 | Returns the result of dividing the extended double-precision floating-point
6030 | value `a' by the corresponding value `b'.  The operation is performed
6031 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6032 *----------------------------------------------------------------------------*/
6033 
6034 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6035 {
6036     bool aSign, bSign, zSign;
6037     int32_t aExp, bExp, zExp;
6038     uint64_t aSig, bSig, zSig0, zSig1;
6039     uint64_t rem0, rem1, rem2, term0, term1, term2;
6040 
6041     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6042         float_raise(float_flag_invalid, status);
6043         return floatx80_default_nan(status);
6044     }
6045     aSig = extractFloatx80Frac( a );
6046     aExp = extractFloatx80Exp( a );
6047     aSign = extractFloatx80Sign( a );
6048     bSig = extractFloatx80Frac( b );
6049     bExp = extractFloatx80Exp( b );
6050     bSign = extractFloatx80Sign( b );
6051     zSign = aSign ^ bSign;
6052     if ( aExp == 0x7FFF ) {
6053         if ((uint64_t)(aSig << 1)) {
6054             return propagateFloatx80NaN(a, b, status);
6055         }
6056         if ( bExp == 0x7FFF ) {
6057             if ((uint64_t)(bSig << 1)) {
6058                 return propagateFloatx80NaN(a, b, status);
6059             }
6060             goto invalid;
6061         }
6062         return packFloatx80(zSign, floatx80_infinity_high,
6063                                    floatx80_infinity_low);
6064     }
6065     if ( bExp == 0x7FFF ) {
6066         if ((uint64_t)(bSig << 1)) {
6067             return propagateFloatx80NaN(a, b, status);
6068         }
6069         return packFloatx80( zSign, 0, 0 );
6070     }
6071     if ( bExp == 0 ) {
6072         if ( bSig == 0 ) {
6073             if ( ( aExp | aSig ) == 0 ) {
6074  invalid:
6075                 float_raise(float_flag_invalid, status);
6076                 return floatx80_default_nan(status);
6077             }
6078             float_raise(float_flag_divbyzero, status);
6079             return packFloatx80(zSign, floatx80_infinity_high,
6080                                        floatx80_infinity_low);
6081         }
6082         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6083     }
6084     if ( aExp == 0 ) {
6085         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6086         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6087     }
6088     zExp = aExp - bExp + 0x3FFE;
6089     rem1 = 0;
6090     if ( bSig <= aSig ) {
6091         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6092         ++zExp;
6093     }
6094     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6095     mul64To128( bSig, zSig0, &term0, &term1 );
6096     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6097     while ( (int64_t) rem0 < 0 ) {
6098         --zSig0;
6099         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6100     }
6101     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6102     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6103         mul64To128( bSig, zSig1, &term1, &term2 );
6104         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6105         while ( (int64_t) rem1 < 0 ) {
6106             --zSig1;
6107             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6108         }
6109         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6110     }
6111     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6112                                 zSign, zExp, zSig0, zSig1, status);
6113 }
6114 
6115 /*----------------------------------------------------------------------------
6116 | Returns the remainder of the extended double-precision floating-point value
6117 | `a' with respect to the corresponding value `b'.  The operation is performed
6118 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6119 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6120 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6121 | the absolute value of the integer quotient.
6122 *----------------------------------------------------------------------------*/
6123 
6124 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6125                          float_status *status)
6126 {
6127     bool aSign, zSign;
6128     int32_t aExp, bExp, expDiff, aExpOrig;
6129     uint64_t aSig0, aSig1, bSig;
6130     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6131 
6132     *quotient = 0;
6133     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6134         float_raise(float_flag_invalid, status);
6135         return floatx80_default_nan(status);
6136     }
6137     aSig0 = extractFloatx80Frac( a );
6138     aExpOrig = aExp = extractFloatx80Exp( a );
6139     aSign = extractFloatx80Sign( a );
6140     bSig = extractFloatx80Frac( b );
6141     bExp = extractFloatx80Exp( b );
6142     if ( aExp == 0x7FFF ) {
6143         if (    (uint64_t) ( aSig0<<1 )
6144              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6145             return propagateFloatx80NaN(a, b, status);
6146         }
6147         goto invalid;
6148     }
6149     if ( bExp == 0x7FFF ) {
6150         if ((uint64_t)(bSig << 1)) {
6151             return propagateFloatx80NaN(a, b, status);
6152         }
6153         if (aExp == 0 && aSig0 >> 63) {
6154             /*
6155              * Pseudo-denormal argument must be returned in normalized
6156              * form.
6157              */
6158             return packFloatx80(aSign, 1, aSig0);
6159         }
6160         return a;
6161     }
6162     if ( bExp == 0 ) {
6163         if ( bSig == 0 ) {
6164  invalid:
6165             float_raise(float_flag_invalid, status);
6166             return floatx80_default_nan(status);
6167         }
6168         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6169     }
6170     if ( aExp == 0 ) {
6171         if ( aSig0 == 0 ) return a;
6172         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6173     }
6174     zSign = aSign;
6175     expDiff = aExp - bExp;
6176     aSig1 = 0;
6177     if ( expDiff < 0 ) {
6178         if ( mod || expDiff < -1 ) {
6179             if (aExp == 1 && aExpOrig == 0) {
6180                 /*
6181                  * Pseudo-denormal argument must be returned in
6182                  * normalized form.
6183                  */
6184                 return packFloatx80(aSign, aExp, aSig0);
6185             }
6186             return a;
6187         }
6188         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6189         expDiff = 0;
6190     }
6191     *quotient = q = ( bSig <= aSig0 );
6192     if ( q ) aSig0 -= bSig;
6193     expDiff -= 64;
6194     while ( 0 < expDiff ) {
6195         q = estimateDiv128To64( aSig0, aSig1, bSig );
6196         q = ( 2 < q ) ? q - 2 : 0;
6197         mul64To128( bSig, q, &term0, &term1 );
6198         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6199         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6200         expDiff -= 62;
6201         *quotient <<= 62;
6202         *quotient += q;
6203     }
6204     expDiff += 64;
6205     if ( 0 < expDiff ) {
6206         q = estimateDiv128To64( aSig0, aSig1, bSig );
6207         q = ( 2 < q ) ? q - 2 : 0;
6208         q >>= 64 - expDiff;
6209         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6210         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6211         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6212         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6213             ++q;
6214             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6215         }
6216         if (expDiff < 64) {
6217             *quotient <<= expDiff;
6218         } else {
6219             *quotient = 0;
6220         }
6221         *quotient += q;
6222     }
6223     else {
6224         term1 = 0;
6225         term0 = bSig;
6226     }
6227     if (!mod) {
6228         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6229         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6230                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6231                         && ( q & 1 ) )
6232             ) {
6233             aSig0 = alternateASig0;
6234             aSig1 = alternateASig1;
6235             zSign = ! zSign;
6236             ++*quotient;
6237         }
6238     }
6239     return
6240         normalizeRoundAndPackFloatx80(
6241             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6242 
6243 }
6244 
6245 /*----------------------------------------------------------------------------
6246 | Returns the remainder of the extended double-precision floating-point value
6247 | `a' with respect to the corresponding value `b'.  The operation is performed
6248 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6249 *----------------------------------------------------------------------------*/
6250 
6251 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6252 {
6253     uint64_t quotient;
6254     return floatx80_modrem(a, b, false, &quotient, status);
6255 }
6256 
6257 /*----------------------------------------------------------------------------
6258 | Returns the remainder of the extended double-precision floating-point value
6259 | `a' with respect to the corresponding value `b', with the quotient truncated
6260 | toward zero.
6261 *----------------------------------------------------------------------------*/
6262 
6263 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6264 {
6265     uint64_t quotient;
6266     return floatx80_modrem(a, b, true, &quotient, status);
6267 }
6268 
6269 /*----------------------------------------------------------------------------
6270 | Returns the square root of the extended double-precision floating-point
6271 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6272 | for Binary Floating-Point Arithmetic.
6273 *----------------------------------------------------------------------------*/
6274 
6275 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6276 {
6277     bool aSign;
6278     int32_t aExp, zExp;
6279     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6280     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6281 
6282     if (floatx80_invalid_encoding(a)) {
6283         float_raise(float_flag_invalid, status);
6284         return floatx80_default_nan(status);
6285     }
6286     aSig0 = extractFloatx80Frac( a );
6287     aExp = extractFloatx80Exp( a );
6288     aSign = extractFloatx80Sign( a );
6289     if ( aExp == 0x7FFF ) {
6290         if ((uint64_t)(aSig0 << 1)) {
6291             return propagateFloatx80NaN(a, a, status);
6292         }
6293         if ( ! aSign ) return a;
6294         goto invalid;
6295     }
6296     if ( aSign ) {
6297         if ( ( aExp | aSig0 ) == 0 ) return a;
6298  invalid:
6299         float_raise(float_flag_invalid, status);
6300         return floatx80_default_nan(status);
6301     }
6302     if ( aExp == 0 ) {
6303         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6304         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6305     }
6306     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6307     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6308     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6309     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6310     doubleZSig0 = zSig0<<1;
6311     mul64To128( zSig0, zSig0, &term0, &term1 );
6312     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6313     while ( (int64_t) rem0 < 0 ) {
6314         --zSig0;
6315         doubleZSig0 -= 2;
6316         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6317     }
6318     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6319     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6320         if ( zSig1 == 0 ) zSig1 = 1;
6321         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6322         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6323         mul64To128( zSig1, zSig1, &term2, &term3 );
6324         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6325         while ( (int64_t) rem1 < 0 ) {
6326             --zSig1;
6327             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6328             term3 |= 1;
6329             term2 |= doubleZSig0;
6330             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6331         }
6332         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6333     }
6334     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6335     zSig0 |= doubleZSig0;
6336     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6337                                 0, zExp, zSig0, zSig1, status);
6338 }
6339 
6340 /*----------------------------------------------------------------------------
6341 | Returns the result of converting the quadruple-precision floating-point
6342 | value `a' to the extended double-precision floating-point format.  The
6343 | conversion is performed according to the IEC/IEEE Standard for Binary
6344 | Floating-Point Arithmetic.
6345 *----------------------------------------------------------------------------*/
6346 
6347 floatx80 float128_to_floatx80(float128 a, float_status *status)
6348 {
6349     bool aSign;
6350     int32_t aExp;
6351     uint64_t aSig0, aSig1;
6352 
6353     aSig1 = extractFloat128Frac1( a );
6354     aSig0 = extractFloat128Frac0( a );
6355     aExp = extractFloat128Exp( a );
6356     aSign = extractFloat128Sign( a );
6357     if ( aExp == 0x7FFF ) {
6358         if ( aSig0 | aSig1 ) {
6359             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6360                                                status);
6361             return floatx80_silence_nan(res, status);
6362         }
6363         return packFloatx80(aSign, floatx80_infinity_high,
6364                                    floatx80_infinity_low);
6365     }
6366     if ( aExp == 0 ) {
6367         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6368         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6369     }
6370     else {
6371         aSig0 |= UINT64_C(0x0001000000000000);
6372     }
6373     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6374     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6375 
6376 }
6377 
6378 /*----------------------------------------------------------------------------
6379 | Returns the remainder of the quadruple-precision floating-point value `a'
6380 | with respect to the corresponding value `b'.  The operation is performed
6381 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6382 *----------------------------------------------------------------------------*/
6383 
6384 float128 float128_rem(float128 a, float128 b, float_status *status)
6385 {
6386     bool aSign, zSign;
6387     int32_t aExp, bExp, expDiff;
6388     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6389     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6390     int64_t sigMean0;
6391 
6392     aSig1 = extractFloat128Frac1( a );
6393     aSig0 = extractFloat128Frac0( a );
6394     aExp = extractFloat128Exp( a );
6395     aSign = extractFloat128Sign( a );
6396     bSig1 = extractFloat128Frac1( b );
6397     bSig0 = extractFloat128Frac0( b );
6398     bExp = extractFloat128Exp( b );
6399     if ( aExp == 0x7FFF ) {
6400         if (    ( aSig0 | aSig1 )
6401              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6402             return propagateFloat128NaN(a, b, status);
6403         }
6404         goto invalid;
6405     }
6406     if ( bExp == 0x7FFF ) {
6407         if (bSig0 | bSig1) {
6408             return propagateFloat128NaN(a, b, status);
6409         }
6410         return a;
6411     }
6412     if ( bExp == 0 ) {
6413         if ( ( bSig0 | bSig1 ) == 0 ) {
6414  invalid:
6415             float_raise(float_flag_invalid, status);
6416             return float128_default_nan(status);
6417         }
6418         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6419     }
6420     if ( aExp == 0 ) {
6421         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6422         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6423     }
6424     expDiff = aExp - bExp;
6425     if ( expDiff < -1 ) return a;
6426     shortShift128Left(
6427         aSig0 | UINT64_C(0x0001000000000000),
6428         aSig1,
6429         15 - ( expDiff < 0 ),
6430         &aSig0,
6431         &aSig1
6432     );
6433     shortShift128Left(
6434         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6435     q = le128( bSig0, bSig1, aSig0, aSig1 );
6436     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6437     expDiff -= 64;
6438     while ( 0 < expDiff ) {
6439         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6440         q = ( 4 < q ) ? q - 4 : 0;
6441         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6442         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6443         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6444         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6445         expDiff -= 61;
6446     }
6447     if ( -64 < expDiff ) {
6448         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6449         q = ( 4 < q ) ? q - 4 : 0;
6450         q >>= - expDiff;
6451         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6452         expDiff += 52;
6453         if ( expDiff < 0 ) {
6454             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6455         }
6456         else {
6457             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6458         }
6459         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6460         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6461     }
6462     else {
6463         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6464         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6465     }
6466     do {
6467         alternateASig0 = aSig0;
6468         alternateASig1 = aSig1;
6469         ++q;
6470         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6471     } while ( 0 <= (int64_t) aSig0 );
6472     add128(
6473         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6474     if (    ( sigMean0 < 0 )
6475          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6476         aSig0 = alternateASig0;
6477         aSig1 = alternateASig1;
6478     }
6479     zSign = ( (int64_t) aSig0 < 0 );
6480     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6481     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6482                                          status);
6483 }
6484 
6485 /*----------------------------------------------------------------------------
6486 | Returns the square root of the quadruple-precision floating-point value `a'.
6487 | The operation is performed according to the IEC/IEEE Standard for Binary
6488 | Floating-Point Arithmetic.
6489 *----------------------------------------------------------------------------*/
6490 
6491 float128 float128_sqrt(float128 a, float_status *status)
6492 {
6493     bool aSign;
6494     int32_t aExp, zExp;
6495     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6496     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6497 
6498     aSig1 = extractFloat128Frac1( a );
6499     aSig0 = extractFloat128Frac0( a );
6500     aExp = extractFloat128Exp( a );
6501     aSign = extractFloat128Sign( a );
6502     if ( aExp == 0x7FFF ) {
6503         if (aSig0 | aSig1) {
6504             return propagateFloat128NaN(a, a, status);
6505         }
6506         if ( ! aSign ) return a;
6507         goto invalid;
6508     }
6509     if ( aSign ) {
6510         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6511  invalid:
6512         float_raise(float_flag_invalid, status);
6513         return float128_default_nan(status);
6514     }
6515     if ( aExp == 0 ) {
6516         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6517         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6518     }
6519     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6520     aSig0 |= UINT64_C(0x0001000000000000);
6521     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6522     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6523     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6524     doubleZSig0 = zSig0<<1;
6525     mul64To128( zSig0, zSig0, &term0, &term1 );
6526     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6527     while ( (int64_t) rem0 < 0 ) {
6528         --zSig0;
6529         doubleZSig0 -= 2;
6530         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6531     }
6532     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6533     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6534         if ( zSig1 == 0 ) zSig1 = 1;
6535         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6536         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6537         mul64To128( zSig1, zSig1, &term2, &term3 );
6538         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6539         while ( (int64_t) rem1 < 0 ) {
6540             --zSig1;
6541             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6542             term3 |= 1;
6543             term2 |= doubleZSig0;
6544             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6545         }
6546         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6547     }
6548     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6549     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6550 
6551 }
6552 
6553 static inline FloatRelation
6554 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6555                           float_status *status)
6556 {
6557     bool aSign, bSign;
6558 
6559     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6560         float_raise(float_flag_invalid, status);
6561         return float_relation_unordered;
6562     }
6563     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6564           ( extractFloatx80Frac( a )<<1 ) ) ||
6565         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6566           ( extractFloatx80Frac( b )<<1 ) )) {
6567         if (!is_quiet ||
6568             floatx80_is_signaling_nan(a, status) ||
6569             floatx80_is_signaling_nan(b, status)) {
6570             float_raise(float_flag_invalid, status);
6571         }
6572         return float_relation_unordered;
6573     }
6574     aSign = extractFloatx80Sign( a );
6575     bSign = extractFloatx80Sign( b );
6576     if ( aSign != bSign ) {
6577 
6578         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6579              ( ( a.low | b.low ) == 0 ) ) {
6580             /* zero case */
6581             return float_relation_equal;
6582         } else {
6583             return 1 - (2 * aSign);
6584         }
6585     } else {
6586         /* Normalize pseudo-denormals before comparison.  */
6587         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6588             ++a.high;
6589         }
6590         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6591             ++b.high;
6592         }
6593         if (a.low == b.low && a.high == b.high) {
6594             return float_relation_equal;
6595         } else {
6596             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6597         }
6598     }
6599 }
6600 
6601 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6602 {
6603     return floatx80_compare_internal(a, b, 0, status);
6604 }
6605 
6606 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6607                                      float_status *status)
6608 {
6609     return floatx80_compare_internal(a, b, 1, status);
6610 }
6611 
6612 static inline FloatRelation
6613 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6614                           float_status *status)
6615 {
6616     bool aSign, bSign;
6617 
6618     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6619           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6620         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6621           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6622         if (!is_quiet ||
6623             float128_is_signaling_nan(a, status) ||
6624             float128_is_signaling_nan(b, status)) {
6625             float_raise(float_flag_invalid, status);
6626         }
6627         return float_relation_unordered;
6628     }
6629     aSign = extractFloat128Sign( a );
6630     bSign = extractFloat128Sign( b );
6631     if ( aSign != bSign ) {
6632         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6633             /* zero case */
6634             return float_relation_equal;
6635         } else {
6636             return 1 - (2 * aSign);
6637         }
6638     } else {
6639         if (a.low == b.low && a.high == b.high) {
6640             return float_relation_equal;
6641         } else {
6642             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6643         }
6644     }
6645 }
6646 
6647 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6648 {
6649     return float128_compare_internal(a, b, 0, status);
6650 }
6651 
6652 FloatRelation float128_compare_quiet(float128 a, float128 b,
6653                                      float_status *status)
6654 {
6655     return float128_compare_internal(a, b, 1, status);
6656 }
6657 
6658 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6659 {
6660     bool aSign;
6661     int32_t aExp;
6662     uint64_t aSig;
6663 
6664     if (floatx80_invalid_encoding(a)) {
6665         float_raise(float_flag_invalid, status);
6666         return floatx80_default_nan(status);
6667     }
6668     aSig = extractFloatx80Frac( a );
6669     aExp = extractFloatx80Exp( a );
6670     aSign = extractFloatx80Sign( a );
6671 
6672     if ( aExp == 0x7FFF ) {
6673         if ( aSig<<1 ) {
6674             return propagateFloatx80NaN(a, a, status);
6675         }
6676         return a;
6677     }
6678 
6679     if (aExp == 0) {
6680         if (aSig == 0) {
6681             return a;
6682         }
6683         aExp++;
6684     }
6685 
6686     if (n > 0x10000) {
6687         n = 0x10000;
6688     } else if (n < -0x10000) {
6689         n = -0x10000;
6690     }
6691 
6692     aExp += n;
6693     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6694                                          aSign, aExp, aSig, 0, status);
6695 }
6696 
6697 float128 float128_scalbn(float128 a, int n, float_status *status)
6698 {
6699     bool aSign;
6700     int32_t aExp;
6701     uint64_t aSig0, aSig1;
6702 
6703     aSig1 = extractFloat128Frac1( a );
6704     aSig0 = extractFloat128Frac0( a );
6705     aExp = extractFloat128Exp( a );
6706     aSign = extractFloat128Sign( a );
6707     if ( aExp == 0x7FFF ) {
6708         if ( aSig0 | aSig1 ) {
6709             return propagateFloat128NaN(a, a, status);
6710         }
6711         return a;
6712     }
6713     if (aExp != 0) {
6714         aSig0 |= UINT64_C(0x0001000000000000);
6715     } else if (aSig0 == 0 && aSig1 == 0) {
6716         return a;
6717     } else {
6718         aExp++;
6719     }
6720 
6721     if (n > 0x10000) {
6722         n = 0x10000;
6723     } else if (n < -0x10000) {
6724         n = -0x10000;
6725     }
6726 
6727     aExp += n - 1;
6728     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6729                                          , status);
6730 
6731 }
6732 
6733 static void __attribute__((constructor)) softfloat_init(void)
6734 {
6735     union_float64 ua, ub, uc, ur;
6736 
6737     if (QEMU_NO_HARDFLOAT) {
6738         return;
6739     }
6740     /*
6741      * Test that the host's FMA is not obviously broken. For example,
6742      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6743      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6744      */
6745     ua.s = 0x0020000000000001ULL;
6746     ub.s = 0x3ca0000000000000ULL;
6747     uc.s = 0x0020000000000000ULL;
6748     ur.h = fma(ua.h, ub.h, uc.h);
6749     if (ur.s != 0x0020000000000001ULL) {
6750         force_soft_fma = true;
6751     }
6752 }
6753