xref: /openbmc/qemu/fpu/softfloat.c (revision 9261b245)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 /* Flags for parts_minmax. */
486 enum {
487     /* Set for minimum; clear for maximum. */
488     minmax_ismin = 1,
489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490     minmax_isnum = 2,
491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492     minmax_ismag = 4,
493 };
494 
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498     return unlikely(c >= float_class_qnan);
499 }
500 
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503     return c == float_class_snan;
504 }
505 
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508     return c == float_class_qnan;
509 }
510 
511 /*
512  * Structure holding all of the decomposed parts of a float.
513  * The exponent is unbiased and the fraction is normalized.
514  *
515  * The fraction words are stored in big-endian word ordering,
516  * so that truncation from a larger format to a smaller format
517  * can be done simply by ignoring subsequent elements.
518  */
519 
520 typedef struct {
521     FloatClass cls;
522     bool sign;
523     int32_t exp;
524     union {
525         /* Routines that know the structure may reference the singular name. */
526         uint64_t frac;
527         /*
528          * Routines expanded with multiple structures reference "hi" and "lo"
529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
530          * both the same word and aliased here.
531          */
532         uint64_t frac_hi;
533         uint64_t frac_lo;
534     };
535 } FloatParts64;
536 
537 typedef struct {
538     FloatClass cls;
539     bool sign;
540     int32_t exp;
541     uint64_t frac_hi;
542     uint64_t frac_lo;
543 } FloatParts128;
544 
545 typedef struct {
546     FloatClass cls;
547     bool sign;
548     int32_t exp;
549     uint64_t frac_hi;
550     uint64_t frac_hm;  /* high-middle */
551     uint64_t frac_lm;  /* low-middle */
552     uint64_t frac_lo;
553 } FloatParts256;
554 
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT    63
557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
558 
559 /* Structure holding all of the relevant parameters for a format.
560  *   exp_size: the size of the exponent field
561  *   exp_bias: the offset applied to the exponent field
562  *   exp_max: the maximum normalised exponent
563  *   frac_size: the size of the fraction field
564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565  * The following are computed based the size of fraction
566  *   frac_lsb: least significant bit of fraction
567  *   frac_lsbm1: the bit below the least significant bit (for rounding)
568  *   round_mask/roundeven_mask: masks used for rounding
569  * The following optional modifiers are available:
570  *   arm_althp: handle ARM Alternative Half Precision
571  */
572 typedef struct {
573     int exp_size;
574     int exp_bias;
575     int exp_max;
576     int frac_size;
577     int frac_shift;
578     uint64_t frac_lsb;
579     uint64_t frac_lsbm1;
580     uint64_t round_mask;
581     uint64_t roundeven_mask;
582     bool arm_althp;
583 } FloatFmt;
584 
585 /* Expand fields based on the size of exponent and fraction */
586 #define FLOAT_PARAMS(E, F)                                           \
587     .exp_size       = E,                                             \
588     .exp_bias       = ((1 << E) - 1) >> 1,                           \
589     .exp_max        = (1 << E) - 1,                                  \
590     .frac_size      = F,                                             \
591     .frac_shift     = (-F - 1) & 63,                                 \
592     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
593     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
594     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
595     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
596 
597 static const FloatFmt float16_params = {
598     FLOAT_PARAMS(5, 10)
599 };
600 
601 static const FloatFmt float16_params_ahp = {
602     FLOAT_PARAMS(5, 10),
603     .arm_althp = true
604 };
605 
606 static const FloatFmt bfloat16_params = {
607     FLOAT_PARAMS(8, 7)
608 };
609 
610 static const FloatFmt float32_params = {
611     FLOAT_PARAMS(8, 23)
612 };
613 
614 static const FloatFmt float64_params = {
615     FLOAT_PARAMS(11, 52)
616 };
617 
618 static const FloatFmt float128_params = {
619     FLOAT_PARAMS(15, 112)
620 };
621 
622 /* Unpack a float to parts, but do not canonicalize.  */
623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
624 {
625     const int f_size = fmt->frac_size;
626     const int e_size = fmt->exp_size;
627 
628     *r = (FloatParts64) {
629         .cls = float_class_unclassified,
630         .sign = extract64(raw, f_size + e_size, 1),
631         .exp = extract64(raw, f_size, e_size),
632         .frac = extract64(raw, 0, f_size)
633     };
634 }
635 
636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
637 {
638     unpack_raw64(p, &float16_params, f);
639 }
640 
641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
642 {
643     unpack_raw64(p, &bfloat16_params, f);
644 }
645 
646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
647 {
648     unpack_raw64(p, &float32_params, f);
649 }
650 
651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
652 {
653     unpack_raw64(p, &float64_params, f);
654 }
655 
656 static void float128_unpack_raw(FloatParts128 *p, float128 f)
657 {
658     const int f_size = float128_params.frac_size - 64;
659     const int e_size = float128_params.exp_size;
660 
661     *p = (FloatParts128) {
662         .cls = float_class_unclassified,
663         .sign = extract64(f.high, f_size + e_size, 1),
664         .exp = extract64(f.high, f_size, e_size),
665         .frac_hi = extract64(f.high, 0, f_size),
666         .frac_lo = f.low,
667     };
668 }
669 
670 /* Pack a float from parts, but do not canonicalize.  */
671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
672 {
673     const int f_size = fmt->frac_size;
674     const int e_size = fmt->exp_size;
675     uint64_t ret;
676 
677     ret = (uint64_t)p->sign << (f_size + e_size);
678     ret = deposit64(ret, f_size, e_size, p->exp);
679     ret = deposit64(ret, 0, f_size, p->frac);
680     return ret;
681 }
682 
683 static inline float16 float16_pack_raw(const FloatParts64 *p)
684 {
685     return make_float16(pack_raw64(p, &float16_params));
686 }
687 
688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
689 {
690     return pack_raw64(p, &bfloat16_params);
691 }
692 
693 static inline float32 float32_pack_raw(const FloatParts64 *p)
694 {
695     return make_float32(pack_raw64(p, &float32_params));
696 }
697 
698 static inline float64 float64_pack_raw(const FloatParts64 *p)
699 {
700     return make_float64(pack_raw64(p, &float64_params));
701 }
702 
703 static float128 float128_pack_raw(const FloatParts128 *p)
704 {
705     const int f_size = float128_params.frac_size - 64;
706     const int e_size = float128_params.exp_size;
707     uint64_t hi;
708 
709     hi = (uint64_t)p->sign << (f_size + e_size);
710     hi = deposit64(hi, f_size, e_size, p->exp);
711     hi = deposit64(hi, 0, f_size, p->frac_hi);
712     return make_float128(hi, p->frac_lo);
713 }
714 
715 /*----------------------------------------------------------------------------
716 | Functions and definitions to determine:  (1) whether tininess for underflow
717 | is detected before or after rounding by default, (2) what (if anything)
718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720 | are propagated from function inputs to output.  These details are target-
721 | specific.
722 *----------------------------------------------------------------------------*/
723 #include "softfloat-specialize.c.inc"
724 
725 #define PARTS_GENERIC_64_128(NAME, P) \
726     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727 
728 #define PARTS_GENERIC_64_128_256(NAME, P) \
729     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731 
732 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
733 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734 
735 static void parts64_return_nan(FloatParts64 *a, float_status *s);
736 static void parts128_return_nan(FloatParts128 *a, float_status *s);
737 
738 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
739 
740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741                                       float_status *s);
742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743                                         float_status *s);
744 
745 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746 
747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748                                              FloatParts64 *c, float_status *s,
749                                              int ab_mask, int abc_mask);
750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751                                                FloatParts128 *b,
752                                                FloatParts128 *c,
753                                                float_status *s,
754                                                int ab_mask, int abc_mask);
755 
756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758 
759 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760                                  const FloatFmt *fmt);
761 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762                                   const FloatFmt *fmt);
763 
764 #define parts_canonicalize(A, S, F) \
765     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766 
767 static void parts64_uncanon(FloatParts64 *p, float_status *status,
768                             const FloatFmt *fmt);
769 static void parts128_uncanon(FloatParts128 *p, float_status *status,
770                              const FloatFmt *fmt);
771 
772 #define parts_uncanon(A, S, F) \
773     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774 
775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
778 
779 #define parts_add_normal(A, B) \
780     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
781 
782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_sub_normal(A, B) \
787     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
788 
789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790                                     float_status *s, bool subtract);
791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792                                       float_status *s, bool subtract);
793 
794 #define parts_addsub(A, B, S, Z) \
795     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796 
797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798                                  float_status *s);
799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800                                    float_status *s);
801 
802 #define parts_mul(A, B, S) \
803     PARTS_GENERIC_64_128(mul, A)(A, B, S)
804 
805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806                                     FloatParts64 *c, int flags,
807                                     float_status *s);
808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809                                       FloatParts128 *c, int flags,
810                                       float_status *s);
811 
812 #define parts_muladd(A, B, C, Z, S) \
813     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814 
815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816                                  float_status *s);
817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818                                    float_status *s);
819 
820 #define parts_div(A, B, S) \
821     PARTS_GENERIC_64_128(div, A)(A, B, S)
822 
823 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
824 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
825 
826 #define parts_sqrt(A, S, F) \
827     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
828 
829 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
830                                         int scale, int frac_size);
831 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
832                                          int scale, int frac_size);
833 
834 #define parts_round_to_int_normal(A, R, C, F) \
835     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
836 
837 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
838                                  int scale, float_status *s,
839                                  const FloatFmt *fmt);
840 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
841                                   int scale, float_status *s,
842                                   const FloatFmt *fmt);
843 
844 #define parts_round_to_int(A, R, C, S, F) \
845     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
846 
847 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
848                                      int scale, int64_t min, int64_t max,
849                                      float_status *s);
850 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
851                                      int scale, int64_t min, int64_t max,
852                                      float_status *s);
853 
854 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
855     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
856 
857 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
858                                       int scale, uint64_t max,
859                                       float_status *s);
860 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
861                                        int scale, uint64_t max,
862                                        float_status *s);
863 
864 #define parts_float_to_uint(P, R, Z, M, S) \
865     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
866 
867 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
868                                   int scale, float_status *s);
869 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
870                                    int scale, float_status *s);
871 
872 #define parts_sint_to_float(P, I, Z, S) \
873     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
874 
875 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
876                                   int scale, float_status *s);
877 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
878                                    int scale, float_status *s);
879 
880 #define parts_uint_to_float(P, I, Z, S) \
881     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
882 
883 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
884                                     float_status *s, int flags);
885 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
886                                       float_status *s, int flags);
887 
888 #define parts_minmax(A, B, S, F) \
889     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
890 
891 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
892                            float_status *s, bool q);
893 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
894                             float_status *s, bool q);
895 
896 #define parts_compare(A, B, S, Q) \
897     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
898 
899 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
900 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
901 
902 #define parts_scalbn(A, N, S) \
903     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
904 
905 /*
906  * Helper functions for softfloat-parts.c.inc, per-size operations.
907  */
908 
909 #define FRAC_GENERIC_64_128(NAME, P) \
910     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
911 
912 #define FRAC_GENERIC_64_128_256(NAME, P) \
913     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
914                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
915 
916 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
917 {
918     return uadd64_overflow(a->frac, b->frac, &r->frac);
919 }
920 
921 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
922 {
923     bool c = 0;
924     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
925     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
926     return c;
927 }
928 
929 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
930 {
931     bool c = 0;
932     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
933     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
934     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
935     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
936     return c;
937 }
938 
939 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
940 
941 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
942 {
943     return uadd64_overflow(a->frac, c, &r->frac);
944 }
945 
946 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
947 {
948     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
949     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
950 }
951 
952 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
953 
954 static void frac64_allones(FloatParts64 *a)
955 {
956     a->frac = -1;
957 }
958 
959 static void frac128_allones(FloatParts128 *a)
960 {
961     a->frac_hi = a->frac_lo = -1;
962 }
963 
964 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
965 
966 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
967 {
968     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
969 }
970 
971 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
972 {
973     uint64_t ta = a->frac_hi, tb = b->frac_hi;
974     if (ta == tb) {
975         ta = a->frac_lo, tb = b->frac_lo;
976         if (ta == tb) {
977             return 0;
978         }
979     }
980     return ta < tb ? -1 : 1;
981 }
982 
983 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
984 
985 static void frac64_clear(FloatParts64 *a)
986 {
987     a->frac = 0;
988 }
989 
990 static void frac128_clear(FloatParts128 *a)
991 {
992     a->frac_hi = a->frac_lo = 0;
993 }
994 
995 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
996 
997 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
998 {
999     uint64_t n1, n0, r, q;
1000     bool ret;
1001 
1002     /*
1003      * We want a 2*N / N-bit division to produce exactly an N-bit
1004      * result, so that we do not lose any precision and so that we
1005      * do not have to renormalize afterward.  If A.frac < B.frac,
1006      * then division would produce an (N-1)-bit result; shift A left
1007      * by one to produce the an N-bit result, and return true to
1008      * decrement the exponent to match.
1009      *
1010      * The udiv_qrnnd algorithm that we're using requires normalization,
1011      * i.e. the msb of the denominator must be set, which is already true.
1012      */
1013     ret = a->frac < b->frac;
1014     if (ret) {
1015         n0 = a->frac;
1016         n1 = 0;
1017     } else {
1018         n0 = a->frac >> 1;
1019         n1 = a->frac << 63;
1020     }
1021     q = udiv_qrnnd(&r, n0, n1, b->frac);
1022 
1023     /* Set lsb if there is a remainder, to set inexact. */
1024     a->frac = q | (r != 0);
1025 
1026     return ret;
1027 }
1028 
1029 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1030 {
1031     uint64_t q0, q1, a0, a1, b0, b1;
1032     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1033     bool ret = false;
1034 
1035     a0 = a->frac_hi, a1 = a->frac_lo;
1036     b0 = b->frac_hi, b1 = b->frac_lo;
1037 
1038     ret = lt128(a0, a1, b0, b1);
1039     if (!ret) {
1040         a1 = shr_double(a0, a1, 1);
1041         a0 = a0 >> 1;
1042     }
1043 
1044     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1045     q0 = estimateDiv128To64(a0, a1, b0);
1046 
1047     /*
1048      * Estimate is high because B1 was not included (unless B1 == 0).
1049      * Reduce quotient and increase remainder until remainder is non-negative.
1050      * This loop will execute 0 to 2 times.
1051      */
1052     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1053     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1054     while (r0 != 0) {
1055         q0--;
1056         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1057     }
1058 
1059     /* Repeat using the remainder, producing a second word of quotient. */
1060     q1 = estimateDiv128To64(r1, r2, b0);
1061     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1062     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1063     while (r1 != 0) {
1064         q1--;
1065         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1066     }
1067 
1068     /* Any remainder indicates inexact; set sticky bit. */
1069     q1 |= (r2 | r3) != 0;
1070 
1071     a->frac_hi = q0;
1072     a->frac_lo = q1;
1073     return ret;
1074 }
1075 
1076 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1077 
1078 static bool frac64_eqz(FloatParts64 *a)
1079 {
1080     return a->frac == 0;
1081 }
1082 
1083 static bool frac128_eqz(FloatParts128 *a)
1084 {
1085     return (a->frac_hi | a->frac_lo) == 0;
1086 }
1087 
1088 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1089 
1090 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1091 {
1092     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1093 }
1094 
1095 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1096 {
1097     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1098                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1099 }
1100 
1101 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1102 
1103 static void frac64_neg(FloatParts64 *a)
1104 {
1105     a->frac = -a->frac;
1106 }
1107 
1108 static void frac128_neg(FloatParts128 *a)
1109 {
1110     bool c = 0;
1111     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1112     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1113 }
1114 
1115 static void frac256_neg(FloatParts256 *a)
1116 {
1117     bool c = 0;
1118     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1119     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1120     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1121     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1122 }
1123 
1124 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1125 
1126 static int frac64_normalize(FloatParts64 *a)
1127 {
1128     if (a->frac) {
1129         int shift = clz64(a->frac);
1130         a->frac <<= shift;
1131         return shift;
1132     }
1133     return 64;
1134 }
1135 
1136 static int frac128_normalize(FloatParts128 *a)
1137 {
1138     if (a->frac_hi) {
1139         int shl = clz64(a->frac_hi);
1140         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1141         a->frac_lo <<= shl;
1142         return shl;
1143     } else if (a->frac_lo) {
1144         int shl = clz64(a->frac_lo);
1145         a->frac_hi = a->frac_lo << shl;
1146         a->frac_lo = 0;
1147         return shl + 64;
1148     }
1149     return 128;
1150 }
1151 
1152 static int frac256_normalize(FloatParts256 *a)
1153 {
1154     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1155     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1156     int ret, shl;
1157 
1158     if (likely(a0)) {
1159         shl = clz64(a0);
1160         if (shl == 0) {
1161             return 0;
1162         }
1163         ret = shl;
1164     } else {
1165         if (a1) {
1166             ret = 64;
1167             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1168         } else if (a2) {
1169             ret = 128;
1170             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1171         } else if (a3) {
1172             ret = 192;
1173             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1174         } else {
1175             ret = 256;
1176             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1177             goto done;
1178         }
1179         shl = clz64(a0);
1180         if (shl == 0) {
1181             goto done;
1182         }
1183         ret += shl;
1184     }
1185 
1186     a0 = shl_double(a0, a1, shl);
1187     a1 = shl_double(a1, a2, shl);
1188     a2 = shl_double(a2, a3, shl);
1189     a3 <<= shl;
1190 
1191  done:
1192     a->frac_hi = a0;
1193     a->frac_hm = a1;
1194     a->frac_lm = a2;
1195     a->frac_lo = a3;
1196     return ret;
1197 }
1198 
1199 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1200 
1201 static void frac64_shl(FloatParts64 *a, int c)
1202 {
1203     a->frac <<= c;
1204 }
1205 
1206 static void frac128_shl(FloatParts128 *a, int c)
1207 {
1208     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1209 
1210     if (c & 64) {
1211         a0 = a1, a1 = 0;
1212     }
1213 
1214     c &= 63;
1215     if (c) {
1216         a0 = shl_double(a0, a1, c);
1217         a1 = a1 << c;
1218     }
1219 
1220     a->frac_hi = a0;
1221     a->frac_lo = a1;
1222 }
1223 
1224 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1225 
1226 static void frac64_shr(FloatParts64 *a, int c)
1227 {
1228     a->frac >>= c;
1229 }
1230 
1231 static void frac128_shr(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234 
1235     if (c & 64) {
1236         a1 = a0, a0 = 0;
1237     }
1238 
1239     c &= 63;
1240     if (c) {
1241         a1 = shr_double(a0, a1, c);
1242         a0 = a0 >> c;
1243     }
1244 
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248 
1249 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1250 
1251 static void frac64_shrjam(FloatParts64 *a, int c)
1252 {
1253     uint64_t a0 = a->frac;
1254 
1255     if (likely(c != 0)) {
1256         if (likely(c < 64)) {
1257             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1258         } else {
1259             a0 = a0 != 0;
1260         }
1261         a->frac = a0;
1262     }
1263 }
1264 
1265 static void frac128_shrjam(FloatParts128 *a, int c)
1266 {
1267     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1268     uint64_t sticky = 0;
1269 
1270     if (unlikely(c == 0)) {
1271         return;
1272     } else if (likely(c < 64)) {
1273         /* nothing */
1274     } else if (likely(c < 128)) {
1275         sticky = a1;
1276         a1 = a0;
1277         a0 = 0;
1278         c &= 63;
1279         if (c == 0) {
1280             goto done;
1281         }
1282     } else {
1283         sticky = a0 | a1;
1284         a0 = a1 = 0;
1285         goto done;
1286     }
1287 
1288     sticky |= shr_double(a1, 0, c);
1289     a1 = shr_double(a0, a1, c);
1290     a0 = a0 >> c;
1291 
1292  done:
1293     a->frac_lo = a1 | (sticky != 0);
1294     a->frac_hi = a0;
1295 }
1296 
1297 static void frac256_shrjam(FloatParts256 *a, int c)
1298 {
1299     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1300     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1301     uint64_t sticky = 0;
1302 
1303     if (unlikely(c == 0)) {
1304         return;
1305     } else if (likely(c < 64)) {
1306         /* nothing */
1307     } else if (likely(c < 256)) {
1308         if (unlikely(c & 128)) {
1309             sticky |= a2 | a3;
1310             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1311         }
1312         if (unlikely(c & 64)) {
1313             sticky |= a3;
1314             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1315         }
1316         c &= 63;
1317         if (c == 0) {
1318             goto done;
1319         }
1320     } else {
1321         sticky = a0 | a1 | a2 | a3;
1322         a0 = a1 = a2 = a3 = 0;
1323         goto done;
1324     }
1325 
1326     sticky |= shr_double(a3, 0, c);
1327     a3 = shr_double(a2, a3, c);
1328     a2 = shr_double(a1, a2, c);
1329     a1 = shr_double(a0, a1, c);
1330     a0 = a0 >> c;
1331 
1332  done:
1333     a->frac_lo = a3 | (sticky != 0);
1334     a->frac_lm = a2;
1335     a->frac_hm = a1;
1336     a->frac_hi = a0;
1337 }
1338 
1339 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1340 
1341 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1342 {
1343     return usub64_overflow(a->frac, b->frac, &r->frac);
1344 }
1345 
1346 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1347 {
1348     bool c = 0;
1349     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1350     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1351     return c;
1352 }
1353 
1354 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1355 {
1356     bool c = 0;
1357     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1358     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1359     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1360     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1361     return c;
1362 }
1363 
1364 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1365 
1366 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1367 {
1368     r->frac = a->frac_hi | (a->frac_lo != 0);
1369 }
1370 
1371 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1372 {
1373     r->frac_hi = a->frac_hi;
1374     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1375 }
1376 
1377 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1378 
1379 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1380 {
1381     r->frac_hi = a->frac;
1382     r->frac_lo = 0;
1383 }
1384 
1385 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1386 {
1387     r->frac_hi = a->frac_hi;
1388     r->frac_hm = a->frac_lo;
1389     r->frac_lm = 0;
1390     r->frac_lo = 0;
1391 }
1392 
1393 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1394 
1395 /*
1396  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1397  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1398  * and thus MIT licenced.
1399  */
1400 static const uint16_t rsqrt_tab[128] = {
1401     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1402     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1403     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1404     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1405     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1406     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1407     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1408     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1409     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1410     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1411     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1412     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1413     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1414     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1415     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1416     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1417 };
1418 
1419 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1420 #define FloatPartsN    glue(FloatParts,N)
1421 #define FloatPartsW    glue(FloatParts,W)
1422 
1423 #define N 64
1424 #define W 128
1425 
1426 #include "softfloat-parts-addsub.c.inc"
1427 #include "softfloat-parts.c.inc"
1428 
1429 #undef  N
1430 #undef  W
1431 #define N 128
1432 #define W 256
1433 
1434 #include "softfloat-parts-addsub.c.inc"
1435 #include "softfloat-parts.c.inc"
1436 
1437 #undef  N
1438 #undef  W
1439 #define N            256
1440 
1441 #include "softfloat-parts-addsub.c.inc"
1442 
1443 #undef  N
1444 #undef  W
1445 #undef  partsN
1446 #undef  FloatPartsN
1447 #undef  FloatPartsW
1448 
1449 /*
1450  * Pack/unpack routines with a specific FloatFmt.
1451  */
1452 
1453 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1454                                       float_status *s, const FloatFmt *params)
1455 {
1456     float16_unpack_raw(p, f);
1457     parts_canonicalize(p, s, params);
1458 }
1459 
1460 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1461                                      float_status *s)
1462 {
1463     float16a_unpack_canonical(p, f, s, &float16_params);
1464 }
1465 
1466 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1467                                       float_status *s)
1468 {
1469     bfloat16_unpack_raw(p, f);
1470     parts_canonicalize(p, s, &bfloat16_params);
1471 }
1472 
1473 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1474                                              float_status *s,
1475                                              const FloatFmt *params)
1476 {
1477     parts_uncanon(p, s, params);
1478     return float16_pack_raw(p);
1479 }
1480 
1481 static float16 float16_round_pack_canonical(FloatParts64 *p,
1482                                             float_status *s)
1483 {
1484     return float16a_round_pack_canonical(p, s, &float16_params);
1485 }
1486 
1487 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1488                                               float_status *s)
1489 {
1490     parts_uncanon(p, s, &bfloat16_params);
1491     return bfloat16_pack_raw(p);
1492 }
1493 
1494 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1495                                      float_status *s)
1496 {
1497     float32_unpack_raw(p, f);
1498     parts_canonicalize(p, s, &float32_params);
1499 }
1500 
1501 static float32 float32_round_pack_canonical(FloatParts64 *p,
1502                                             float_status *s)
1503 {
1504     parts_uncanon(p, s, &float32_params);
1505     return float32_pack_raw(p);
1506 }
1507 
1508 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1509                                      float_status *s)
1510 {
1511     float64_unpack_raw(p, f);
1512     parts_canonicalize(p, s, &float64_params);
1513 }
1514 
1515 static float64 float64_round_pack_canonical(FloatParts64 *p,
1516                                             float_status *s)
1517 {
1518     parts_uncanon(p, s, &float64_params);
1519     return float64_pack_raw(p);
1520 }
1521 
1522 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1523                                       float_status *s)
1524 {
1525     float128_unpack_raw(p, f);
1526     parts_canonicalize(p, s, &float128_params);
1527 }
1528 
1529 static float128 float128_round_pack_canonical(FloatParts128 *p,
1530                                               float_status *s)
1531 {
1532     parts_uncanon(p, s, &float128_params);
1533     return float128_pack_raw(p);
1534 }
1535 
1536 /*
1537  * Addition and subtraction
1538  */
1539 
1540 static float16 QEMU_FLATTEN
1541 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1542 {
1543     FloatParts64 pa, pb, *pr;
1544 
1545     float16_unpack_canonical(&pa, a, status);
1546     float16_unpack_canonical(&pb, b, status);
1547     pr = parts_addsub(&pa, &pb, status, subtract);
1548 
1549     return float16_round_pack_canonical(pr, status);
1550 }
1551 
1552 float16 float16_add(float16 a, float16 b, float_status *status)
1553 {
1554     return float16_addsub(a, b, status, false);
1555 }
1556 
1557 float16 float16_sub(float16 a, float16 b, float_status *status)
1558 {
1559     return float16_addsub(a, b, status, true);
1560 }
1561 
1562 static float32 QEMU_SOFTFLOAT_ATTR
1563 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1564 {
1565     FloatParts64 pa, pb, *pr;
1566 
1567     float32_unpack_canonical(&pa, a, status);
1568     float32_unpack_canonical(&pb, b, status);
1569     pr = parts_addsub(&pa, &pb, status, subtract);
1570 
1571     return float32_round_pack_canonical(pr, status);
1572 }
1573 
1574 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1575 {
1576     return soft_f32_addsub(a, b, status, false);
1577 }
1578 
1579 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1580 {
1581     return soft_f32_addsub(a, b, status, true);
1582 }
1583 
1584 static float64 QEMU_SOFTFLOAT_ATTR
1585 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1586 {
1587     FloatParts64 pa, pb, *pr;
1588 
1589     float64_unpack_canonical(&pa, a, status);
1590     float64_unpack_canonical(&pb, b, status);
1591     pr = parts_addsub(&pa, &pb, status, subtract);
1592 
1593     return float64_round_pack_canonical(pr, status);
1594 }
1595 
1596 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1597 {
1598     return soft_f64_addsub(a, b, status, false);
1599 }
1600 
1601 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1602 {
1603     return soft_f64_addsub(a, b, status, true);
1604 }
1605 
1606 static float hard_f32_add(float a, float b)
1607 {
1608     return a + b;
1609 }
1610 
1611 static float hard_f32_sub(float a, float b)
1612 {
1613     return a - b;
1614 }
1615 
1616 static double hard_f64_add(double a, double b)
1617 {
1618     return a + b;
1619 }
1620 
1621 static double hard_f64_sub(double a, double b)
1622 {
1623     return a - b;
1624 }
1625 
1626 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1627 {
1628     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1629         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1630     }
1631     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1632 }
1633 
1634 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1635 {
1636     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1637         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1638     } else {
1639         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1640     }
1641 }
1642 
1643 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1644                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1645 {
1646     return float32_gen2(a, b, s, hard, soft,
1647                         f32_is_zon2, f32_addsubmul_post);
1648 }
1649 
1650 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1651                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1652 {
1653     return float64_gen2(a, b, s, hard, soft,
1654                         f64_is_zon2, f64_addsubmul_post);
1655 }
1656 
1657 float32 QEMU_FLATTEN
1658 float32_add(float32 a, float32 b, float_status *s)
1659 {
1660     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1661 }
1662 
1663 float32 QEMU_FLATTEN
1664 float32_sub(float32 a, float32 b, float_status *s)
1665 {
1666     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1667 }
1668 
1669 float64 QEMU_FLATTEN
1670 float64_add(float64 a, float64 b, float_status *s)
1671 {
1672     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1673 }
1674 
1675 float64 QEMU_FLATTEN
1676 float64_sub(float64 a, float64 b, float_status *s)
1677 {
1678     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1679 }
1680 
1681 static bfloat16 QEMU_FLATTEN
1682 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1683 {
1684     FloatParts64 pa, pb, *pr;
1685 
1686     bfloat16_unpack_canonical(&pa, a, status);
1687     bfloat16_unpack_canonical(&pb, b, status);
1688     pr = parts_addsub(&pa, &pb, status, subtract);
1689 
1690     return bfloat16_round_pack_canonical(pr, status);
1691 }
1692 
1693 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1694 {
1695     return bfloat16_addsub(a, b, status, false);
1696 }
1697 
1698 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1699 {
1700     return bfloat16_addsub(a, b, status, true);
1701 }
1702 
1703 static float128 QEMU_FLATTEN
1704 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1705 {
1706     FloatParts128 pa, pb, *pr;
1707 
1708     float128_unpack_canonical(&pa, a, status);
1709     float128_unpack_canonical(&pb, b, status);
1710     pr = parts_addsub(&pa, &pb, status, subtract);
1711 
1712     return float128_round_pack_canonical(pr, status);
1713 }
1714 
1715 float128 float128_add(float128 a, float128 b, float_status *status)
1716 {
1717     return float128_addsub(a, b, status, false);
1718 }
1719 
1720 float128 float128_sub(float128 a, float128 b, float_status *status)
1721 {
1722     return float128_addsub(a, b, status, true);
1723 }
1724 
1725 /*
1726  * Multiplication
1727  */
1728 
1729 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1730 {
1731     FloatParts64 pa, pb, *pr;
1732 
1733     float16_unpack_canonical(&pa, a, status);
1734     float16_unpack_canonical(&pb, b, status);
1735     pr = parts_mul(&pa, &pb, status);
1736 
1737     return float16_round_pack_canonical(pr, status);
1738 }
1739 
1740 static float32 QEMU_SOFTFLOAT_ATTR
1741 soft_f32_mul(float32 a, float32 b, float_status *status)
1742 {
1743     FloatParts64 pa, pb, *pr;
1744 
1745     float32_unpack_canonical(&pa, a, status);
1746     float32_unpack_canonical(&pb, b, status);
1747     pr = parts_mul(&pa, &pb, status);
1748 
1749     return float32_round_pack_canonical(pr, status);
1750 }
1751 
1752 static float64 QEMU_SOFTFLOAT_ATTR
1753 soft_f64_mul(float64 a, float64 b, float_status *status)
1754 {
1755     FloatParts64 pa, pb, *pr;
1756 
1757     float64_unpack_canonical(&pa, a, status);
1758     float64_unpack_canonical(&pb, b, status);
1759     pr = parts_mul(&pa, &pb, status);
1760 
1761     return float64_round_pack_canonical(pr, status);
1762 }
1763 
1764 static float hard_f32_mul(float a, float b)
1765 {
1766     return a * b;
1767 }
1768 
1769 static double hard_f64_mul(double a, double b)
1770 {
1771     return a * b;
1772 }
1773 
1774 float32 QEMU_FLATTEN
1775 float32_mul(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1778                         f32_is_zon2, f32_addsubmul_post);
1779 }
1780 
1781 float64 QEMU_FLATTEN
1782 float64_mul(float64 a, float64 b, float_status *s)
1783 {
1784     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1785                         f64_is_zon2, f64_addsubmul_post);
1786 }
1787 
1788 bfloat16 QEMU_FLATTEN
1789 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1790 {
1791     FloatParts64 pa, pb, *pr;
1792 
1793     bfloat16_unpack_canonical(&pa, a, status);
1794     bfloat16_unpack_canonical(&pb, b, status);
1795     pr = parts_mul(&pa, &pb, status);
1796 
1797     return bfloat16_round_pack_canonical(pr, status);
1798 }
1799 
1800 float128 QEMU_FLATTEN
1801 float128_mul(float128 a, float128 b, float_status *status)
1802 {
1803     FloatParts128 pa, pb, *pr;
1804 
1805     float128_unpack_canonical(&pa, a, status);
1806     float128_unpack_canonical(&pb, b, status);
1807     pr = parts_mul(&pa, &pb, status);
1808 
1809     return float128_round_pack_canonical(pr, status);
1810 }
1811 
1812 /*
1813  * Fused multiply-add
1814  */
1815 
1816 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1817                                     int flags, float_status *status)
1818 {
1819     FloatParts64 pa, pb, pc, *pr;
1820 
1821     float16_unpack_canonical(&pa, a, status);
1822     float16_unpack_canonical(&pb, b, status);
1823     float16_unpack_canonical(&pc, c, status);
1824     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1825 
1826     return float16_round_pack_canonical(pr, status);
1827 }
1828 
1829 static float32 QEMU_SOFTFLOAT_ATTR
1830 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1831                 float_status *status)
1832 {
1833     FloatParts64 pa, pb, pc, *pr;
1834 
1835     float32_unpack_canonical(&pa, a, status);
1836     float32_unpack_canonical(&pb, b, status);
1837     float32_unpack_canonical(&pc, c, status);
1838     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1839 
1840     return float32_round_pack_canonical(pr, status);
1841 }
1842 
1843 static float64 QEMU_SOFTFLOAT_ATTR
1844 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1845                 float_status *status)
1846 {
1847     FloatParts64 pa, pb, pc, *pr;
1848 
1849     float64_unpack_canonical(&pa, a, status);
1850     float64_unpack_canonical(&pb, b, status);
1851     float64_unpack_canonical(&pc, c, status);
1852     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1853 
1854     return float64_round_pack_canonical(pr, status);
1855 }
1856 
1857 static bool force_soft_fma;
1858 
1859 float32 QEMU_FLATTEN
1860 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1861 {
1862     union_float32 ua, ub, uc, ur;
1863 
1864     ua.s = xa;
1865     ub.s = xb;
1866     uc.s = xc;
1867 
1868     if (unlikely(!can_use_fpu(s))) {
1869         goto soft;
1870     }
1871     if (unlikely(flags & float_muladd_halve_result)) {
1872         goto soft;
1873     }
1874 
1875     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1876     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1877         goto soft;
1878     }
1879 
1880     if (unlikely(force_soft_fma)) {
1881         goto soft;
1882     }
1883 
1884     /*
1885      * When (a || b) == 0, there's no need to check for under/over flow,
1886      * since we know the addend is (normal || 0) and the product is 0.
1887      */
1888     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1889         union_float32 up;
1890         bool prod_sign;
1891 
1892         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1893         prod_sign ^= !!(flags & float_muladd_negate_product);
1894         up.s = float32_set_sign(float32_zero, prod_sign);
1895 
1896         if (flags & float_muladd_negate_c) {
1897             uc.h = -uc.h;
1898         }
1899         ur.h = up.h + uc.h;
1900     } else {
1901         union_float32 ua_orig = ua;
1902         union_float32 uc_orig = uc;
1903 
1904         if (flags & float_muladd_negate_product) {
1905             ua.h = -ua.h;
1906         }
1907         if (flags & float_muladd_negate_c) {
1908             uc.h = -uc.h;
1909         }
1910 
1911         ur.h = fmaf(ua.h, ub.h, uc.h);
1912 
1913         if (unlikely(f32_is_inf(ur))) {
1914             float_raise(float_flag_overflow, s);
1915         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1916             ua = ua_orig;
1917             uc = uc_orig;
1918             goto soft;
1919         }
1920     }
1921     if (flags & float_muladd_negate_result) {
1922         return float32_chs(ur.s);
1923     }
1924     return ur.s;
1925 
1926  soft:
1927     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1928 }
1929 
1930 float64 QEMU_FLATTEN
1931 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1932 {
1933     union_float64 ua, ub, uc, ur;
1934 
1935     ua.s = xa;
1936     ub.s = xb;
1937     uc.s = xc;
1938 
1939     if (unlikely(!can_use_fpu(s))) {
1940         goto soft;
1941     }
1942     if (unlikely(flags & float_muladd_halve_result)) {
1943         goto soft;
1944     }
1945 
1946     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1947     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1948         goto soft;
1949     }
1950 
1951     if (unlikely(force_soft_fma)) {
1952         goto soft;
1953     }
1954 
1955     /*
1956      * When (a || b) == 0, there's no need to check for under/over flow,
1957      * since we know the addend is (normal || 0) and the product is 0.
1958      */
1959     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1960         union_float64 up;
1961         bool prod_sign;
1962 
1963         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1964         prod_sign ^= !!(flags & float_muladd_negate_product);
1965         up.s = float64_set_sign(float64_zero, prod_sign);
1966 
1967         if (flags & float_muladd_negate_c) {
1968             uc.h = -uc.h;
1969         }
1970         ur.h = up.h + uc.h;
1971     } else {
1972         union_float64 ua_orig = ua;
1973         union_float64 uc_orig = uc;
1974 
1975         if (flags & float_muladd_negate_product) {
1976             ua.h = -ua.h;
1977         }
1978         if (flags & float_muladd_negate_c) {
1979             uc.h = -uc.h;
1980         }
1981 
1982         ur.h = fma(ua.h, ub.h, uc.h);
1983 
1984         if (unlikely(f64_is_inf(ur))) {
1985             float_raise(float_flag_overflow, s);
1986         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1987             ua = ua_orig;
1988             uc = uc_orig;
1989             goto soft;
1990         }
1991     }
1992     if (flags & float_muladd_negate_result) {
1993         return float64_chs(ur.s);
1994     }
1995     return ur.s;
1996 
1997  soft:
1998     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1999 }
2000 
2001 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2002                                       int flags, float_status *status)
2003 {
2004     FloatParts64 pa, pb, pc, *pr;
2005 
2006     bfloat16_unpack_canonical(&pa, a, status);
2007     bfloat16_unpack_canonical(&pb, b, status);
2008     bfloat16_unpack_canonical(&pc, c, status);
2009     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2010 
2011     return bfloat16_round_pack_canonical(pr, status);
2012 }
2013 
2014 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2015                                       int flags, float_status *status)
2016 {
2017     FloatParts128 pa, pb, pc, *pr;
2018 
2019     float128_unpack_canonical(&pa, a, status);
2020     float128_unpack_canonical(&pb, b, status);
2021     float128_unpack_canonical(&pc, c, status);
2022     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2023 
2024     return float128_round_pack_canonical(pr, status);
2025 }
2026 
2027 /*
2028  * Division
2029  */
2030 
2031 float16 float16_div(float16 a, float16 b, float_status *status)
2032 {
2033     FloatParts64 pa, pb, *pr;
2034 
2035     float16_unpack_canonical(&pa, a, status);
2036     float16_unpack_canonical(&pb, b, status);
2037     pr = parts_div(&pa, &pb, status);
2038 
2039     return float16_round_pack_canonical(pr, status);
2040 }
2041 
2042 static float32 QEMU_SOFTFLOAT_ATTR
2043 soft_f32_div(float32 a, float32 b, float_status *status)
2044 {
2045     FloatParts64 pa, pb, *pr;
2046 
2047     float32_unpack_canonical(&pa, a, status);
2048     float32_unpack_canonical(&pb, b, status);
2049     pr = parts_div(&pa, &pb, status);
2050 
2051     return float32_round_pack_canonical(pr, status);
2052 }
2053 
2054 static float64 QEMU_SOFTFLOAT_ATTR
2055 soft_f64_div(float64 a, float64 b, float_status *status)
2056 {
2057     FloatParts64 pa, pb, *pr;
2058 
2059     float64_unpack_canonical(&pa, a, status);
2060     float64_unpack_canonical(&pb, b, status);
2061     pr = parts_div(&pa, &pb, status);
2062 
2063     return float64_round_pack_canonical(pr, status);
2064 }
2065 
2066 static float hard_f32_div(float a, float b)
2067 {
2068     return a / b;
2069 }
2070 
2071 static double hard_f64_div(double a, double b)
2072 {
2073     return a / b;
2074 }
2075 
2076 static bool f32_div_pre(union_float32 a, union_float32 b)
2077 {
2078     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2079         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2080                fpclassify(b.h) == FP_NORMAL;
2081     }
2082     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2083 }
2084 
2085 static bool f64_div_pre(union_float64 a, union_float64 b)
2086 {
2087     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2088         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2089                fpclassify(b.h) == FP_NORMAL;
2090     }
2091     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2092 }
2093 
2094 static bool f32_div_post(union_float32 a, union_float32 b)
2095 {
2096     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2097         return fpclassify(a.h) != FP_ZERO;
2098     }
2099     return !float32_is_zero(a.s);
2100 }
2101 
2102 static bool f64_div_post(union_float64 a, union_float64 b)
2103 {
2104     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2105         return fpclassify(a.h) != FP_ZERO;
2106     }
2107     return !float64_is_zero(a.s);
2108 }
2109 
2110 float32 QEMU_FLATTEN
2111 float32_div(float32 a, float32 b, float_status *s)
2112 {
2113     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2114                         f32_div_pre, f32_div_post);
2115 }
2116 
2117 float64 QEMU_FLATTEN
2118 float64_div(float64 a, float64 b, float_status *s)
2119 {
2120     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2121                         f64_div_pre, f64_div_post);
2122 }
2123 
2124 bfloat16 QEMU_FLATTEN
2125 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2126 {
2127     FloatParts64 pa, pb, *pr;
2128 
2129     bfloat16_unpack_canonical(&pa, a, status);
2130     bfloat16_unpack_canonical(&pb, b, status);
2131     pr = parts_div(&pa, &pb, status);
2132 
2133     return bfloat16_round_pack_canonical(pr, status);
2134 }
2135 
2136 float128 QEMU_FLATTEN
2137 float128_div(float128 a, float128 b, float_status *status)
2138 {
2139     FloatParts128 pa, pb, *pr;
2140 
2141     float128_unpack_canonical(&pa, a, status);
2142     float128_unpack_canonical(&pb, b, status);
2143     pr = parts_div(&pa, &pb, status);
2144 
2145     return float128_round_pack_canonical(pr, status);
2146 }
2147 
2148 /*
2149  * Float to Float conversions
2150  *
2151  * Returns the result of converting one float format to another. The
2152  * conversion is performed according to the IEC/IEEE Standard for
2153  * Binary Floating-Point Arithmetic.
2154  *
2155  * Usually this only needs to take care of raising invalid exceptions
2156  * and handling the conversion on NaNs.
2157  */
2158 
2159 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2160 {
2161     switch (a->cls) {
2162     case float_class_qnan:
2163     case float_class_snan:
2164         /*
2165          * There is no NaN in the destination format.  Raise Invalid
2166          * and return a zero with the sign of the input NaN.
2167          */
2168         float_raise(float_flag_invalid, s);
2169         a->cls = float_class_zero;
2170         break;
2171 
2172     case float_class_inf:
2173         /*
2174          * There is no Inf in the destination format.  Raise Invalid
2175          * and return the maximum normal with the correct sign.
2176          */
2177         float_raise(float_flag_invalid, s);
2178         a->cls = float_class_normal;
2179         a->exp = float16_params_ahp.exp_max;
2180         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2181                                   float16_params_ahp.frac_size + 1);
2182         break;
2183 
2184     case float_class_normal:
2185     case float_class_zero:
2186         break;
2187 
2188     default:
2189         g_assert_not_reached();
2190     }
2191 }
2192 
2193 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2194 {
2195     if (is_nan(a->cls)) {
2196         parts_return_nan(a, s);
2197     }
2198 }
2199 
2200 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2201 {
2202     if (is_nan(a->cls)) {
2203         parts_return_nan(a, s);
2204     }
2205 }
2206 
2207 #define parts_float_to_float(P, S) \
2208     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2209 
2210 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2211                                         float_status *s)
2212 {
2213     a->cls = b->cls;
2214     a->sign = b->sign;
2215     a->exp = b->exp;
2216 
2217     if (a->cls == float_class_normal) {
2218         frac_truncjam(a, b);
2219     } else if (is_nan(a->cls)) {
2220         /* Discard the low bits of the NaN. */
2221         a->frac = b->frac_hi;
2222         parts_return_nan(a, s);
2223     }
2224 }
2225 
2226 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2227                                        float_status *s)
2228 {
2229     a->cls = b->cls;
2230     a->sign = b->sign;
2231     a->exp = b->exp;
2232     frac_widen(a, b);
2233 
2234     if (is_nan(a->cls)) {
2235         parts_return_nan(a, s);
2236     }
2237 }
2238 
2239 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2240 {
2241     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2242     FloatParts64 p;
2243 
2244     float16a_unpack_canonical(&p, a, s, fmt16);
2245     parts_float_to_float(&p, s);
2246     return float32_round_pack_canonical(&p, s);
2247 }
2248 
2249 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2250 {
2251     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2252     FloatParts64 p;
2253 
2254     float16a_unpack_canonical(&p, a, s, fmt16);
2255     parts_float_to_float(&p, s);
2256     return float64_round_pack_canonical(&p, s);
2257 }
2258 
2259 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2260 {
2261     FloatParts64 p;
2262     const FloatFmt *fmt;
2263 
2264     float32_unpack_canonical(&p, a, s);
2265     if (ieee) {
2266         parts_float_to_float(&p, s);
2267         fmt = &float16_params;
2268     } else {
2269         parts_float_to_ahp(&p, s);
2270         fmt = &float16_params_ahp;
2271     }
2272     return float16a_round_pack_canonical(&p, s, fmt);
2273 }
2274 
2275 static float64 QEMU_SOFTFLOAT_ATTR
2276 soft_float32_to_float64(float32 a, float_status *s)
2277 {
2278     FloatParts64 p;
2279 
2280     float32_unpack_canonical(&p, a, s);
2281     parts_float_to_float(&p, s);
2282     return float64_round_pack_canonical(&p, s);
2283 }
2284 
2285 float64 float32_to_float64(float32 a, float_status *s)
2286 {
2287     if (likely(float32_is_normal(a))) {
2288         /* Widening conversion can never produce inexact results.  */
2289         union_float32 uf;
2290         union_float64 ud;
2291         uf.s = a;
2292         ud.h = uf.h;
2293         return ud.s;
2294     } else if (float32_is_zero(a)) {
2295         return float64_set_sign(float64_zero, float32_is_neg(a));
2296     } else {
2297         return soft_float32_to_float64(a, s);
2298     }
2299 }
2300 
2301 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2302 {
2303     FloatParts64 p;
2304     const FloatFmt *fmt;
2305 
2306     float64_unpack_canonical(&p, a, s);
2307     if (ieee) {
2308         parts_float_to_float(&p, s);
2309         fmt = &float16_params;
2310     } else {
2311         parts_float_to_ahp(&p, s);
2312         fmt = &float16_params_ahp;
2313     }
2314     return float16a_round_pack_canonical(&p, s, fmt);
2315 }
2316 
2317 float32 float64_to_float32(float64 a, float_status *s)
2318 {
2319     FloatParts64 p;
2320 
2321     float64_unpack_canonical(&p, a, s);
2322     parts_float_to_float(&p, s);
2323     return float32_round_pack_canonical(&p, s);
2324 }
2325 
2326 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2327 {
2328     FloatParts64 p;
2329 
2330     bfloat16_unpack_canonical(&p, a, s);
2331     parts_float_to_float(&p, s);
2332     return float32_round_pack_canonical(&p, s);
2333 }
2334 
2335 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2336 {
2337     FloatParts64 p;
2338 
2339     bfloat16_unpack_canonical(&p, a, s);
2340     parts_float_to_float(&p, s);
2341     return float64_round_pack_canonical(&p, s);
2342 }
2343 
2344 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2345 {
2346     FloatParts64 p;
2347 
2348     float32_unpack_canonical(&p, a, s);
2349     parts_float_to_float(&p, s);
2350     return bfloat16_round_pack_canonical(&p, s);
2351 }
2352 
2353 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2354 {
2355     FloatParts64 p;
2356 
2357     float64_unpack_canonical(&p, a, s);
2358     parts_float_to_float(&p, s);
2359     return bfloat16_round_pack_canonical(&p, s);
2360 }
2361 
2362 float32 float128_to_float32(float128 a, float_status *s)
2363 {
2364     FloatParts64 p64;
2365     FloatParts128 p128;
2366 
2367     float128_unpack_canonical(&p128, a, s);
2368     parts_float_to_float_narrow(&p64, &p128, s);
2369     return float32_round_pack_canonical(&p64, s);
2370 }
2371 
2372 float64 float128_to_float64(float128 a, float_status *s)
2373 {
2374     FloatParts64 p64;
2375     FloatParts128 p128;
2376 
2377     float128_unpack_canonical(&p128, a, s);
2378     parts_float_to_float_narrow(&p64, &p128, s);
2379     return float64_round_pack_canonical(&p64, s);
2380 }
2381 
2382 float128 float32_to_float128(float32 a, float_status *s)
2383 {
2384     FloatParts64 p64;
2385     FloatParts128 p128;
2386 
2387     float32_unpack_canonical(&p64, a, s);
2388     parts_float_to_float_widen(&p128, &p64, s);
2389     return float128_round_pack_canonical(&p128, s);
2390 }
2391 
2392 float128 float64_to_float128(float64 a, float_status *s)
2393 {
2394     FloatParts64 p64;
2395     FloatParts128 p128;
2396 
2397     float64_unpack_canonical(&p64, a, s);
2398     parts_float_to_float_widen(&p128, &p64, s);
2399     return float128_round_pack_canonical(&p128, s);
2400 }
2401 
2402 /*
2403  * Round to integral value
2404  */
2405 
2406 float16 float16_round_to_int(float16 a, float_status *s)
2407 {
2408     FloatParts64 p;
2409 
2410     float16_unpack_canonical(&p, a, s);
2411     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2412     return float16_round_pack_canonical(&p, s);
2413 }
2414 
2415 float32 float32_round_to_int(float32 a, float_status *s)
2416 {
2417     FloatParts64 p;
2418 
2419     float32_unpack_canonical(&p, a, s);
2420     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2421     return float32_round_pack_canonical(&p, s);
2422 }
2423 
2424 float64 float64_round_to_int(float64 a, float_status *s)
2425 {
2426     FloatParts64 p;
2427 
2428     float64_unpack_canonical(&p, a, s);
2429     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2430     return float64_round_pack_canonical(&p, s);
2431 }
2432 
2433 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2434 {
2435     FloatParts64 p;
2436 
2437     bfloat16_unpack_canonical(&p, a, s);
2438     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2439     return bfloat16_round_pack_canonical(&p, s);
2440 }
2441 
2442 float128 float128_round_to_int(float128 a, float_status *s)
2443 {
2444     FloatParts128 p;
2445 
2446     float128_unpack_canonical(&p, a, s);
2447     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2448     return float128_round_pack_canonical(&p, s);
2449 }
2450 
2451 /*
2452  * Floating-point to signed integer conversions
2453  */
2454 
2455 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2456                               float_status *s)
2457 {
2458     FloatParts64 p;
2459 
2460     float16_unpack_canonical(&p, a, s);
2461     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2462 }
2463 
2464 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2465                                 float_status *s)
2466 {
2467     FloatParts64 p;
2468 
2469     float16_unpack_canonical(&p, a, s);
2470     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2471 }
2472 
2473 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2474                                 float_status *s)
2475 {
2476     FloatParts64 p;
2477 
2478     float16_unpack_canonical(&p, a, s);
2479     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2480 }
2481 
2482 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2483                                 float_status *s)
2484 {
2485     FloatParts64 p;
2486 
2487     float16_unpack_canonical(&p, a, s);
2488     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2489 }
2490 
2491 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2492                                 float_status *s)
2493 {
2494     FloatParts64 p;
2495 
2496     float32_unpack_canonical(&p, a, s);
2497     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2498 }
2499 
2500 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2501                                 float_status *s)
2502 {
2503     FloatParts64 p;
2504 
2505     float32_unpack_canonical(&p, a, s);
2506     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2507 }
2508 
2509 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2510                                 float_status *s)
2511 {
2512     FloatParts64 p;
2513 
2514     float32_unpack_canonical(&p, a, s);
2515     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2516 }
2517 
2518 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2519                                 float_status *s)
2520 {
2521     FloatParts64 p;
2522 
2523     float64_unpack_canonical(&p, a, s);
2524     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2525 }
2526 
2527 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2528                                 float_status *s)
2529 {
2530     FloatParts64 p;
2531 
2532     float64_unpack_canonical(&p, a, s);
2533     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2534 }
2535 
2536 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2537                                 float_status *s)
2538 {
2539     FloatParts64 p;
2540 
2541     float64_unpack_canonical(&p, a, s);
2542     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2543 }
2544 
2545 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2546                                  float_status *s)
2547 {
2548     FloatParts64 p;
2549 
2550     bfloat16_unpack_canonical(&p, a, s);
2551     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2552 }
2553 
2554 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2555                                  float_status *s)
2556 {
2557     FloatParts64 p;
2558 
2559     bfloat16_unpack_canonical(&p, a, s);
2560     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2561 }
2562 
2563 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2564                                  float_status *s)
2565 {
2566     FloatParts64 p;
2567 
2568     bfloat16_unpack_canonical(&p, a, s);
2569     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2570 }
2571 
2572 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2573                                         int scale, float_status *s)
2574 {
2575     FloatParts128 p;
2576 
2577     float128_unpack_canonical(&p, a, s);
2578     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2579 }
2580 
2581 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2582                                         int scale, float_status *s)
2583 {
2584     FloatParts128 p;
2585 
2586     float128_unpack_canonical(&p, a, s);
2587     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2588 }
2589 
2590 int8_t float16_to_int8(float16 a, float_status *s)
2591 {
2592     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2593 }
2594 
2595 int16_t float16_to_int16(float16 a, float_status *s)
2596 {
2597     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2598 }
2599 
2600 int32_t float16_to_int32(float16 a, float_status *s)
2601 {
2602     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2603 }
2604 
2605 int64_t float16_to_int64(float16 a, float_status *s)
2606 {
2607     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2608 }
2609 
2610 int16_t float32_to_int16(float32 a, float_status *s)
2611 {
2612     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2613 }
2614 
2615 int32_t float32_to_int32(float32 a, float_status *s)
2616 {
2617     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2618 }
2619 
2620 int64_t float32_to_int64(float32 a, float_status *s)
2621 {
2622     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2623 }
2624 
2625 int16_t float64_to_int16(float64 a, float_status *s)
2626 {
2627     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2628 }
2629 
2630 int32_t float64_to_int32(float64 a, float_status *s)
2631 {
2632     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2633 }
2634 
2635 int64_t float64_to_int64(float64 a, float_status *s)
2636 {
2637     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2638 }
2639 
2640 int32_t float128_to_int32(float128 a, float_status *s)
2641 {
2642     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2643 }
2644 
2645 int64_t float128_to_int64(float128 a, float_status *s)
2646 {
2647     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2648 }
2649 
2650 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2651 {
2652     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2653 }
2654 
2655 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2656 {
2657     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2658 }
2659 
2660 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2661 {
2662     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2663 }
2664 
2665 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2666 {
2667     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2668 }
2669 
2670 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2671 {
2672     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2673 }
2674 
2675 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2676 {
2677     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2678 }
2679 
2680 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2681 {
2682     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2683 }
2684 
2685 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2686 {
2687     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2688 }
2689 
2690 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2691 {
2692     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2693 }
2694 
2695 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2696 {
2697     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2698 }
2699 
2700 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2701 {
2702     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2703 }
2704 
2705 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2706 {
2707     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2708 }
2709 
2710 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2711 {
2712     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2713 }
2714 
2715 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2716 {
2717     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2718 }
2719 
2720 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2721 {
2722     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2723 }
2724 
2725 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2726 {
2727     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2728 }
2729 
2730 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2731 {
2732     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2733 }
2734 
2735 /*
2736  * Floating-point to unsigned integer conversions
2737  */
2738 
2739 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2740                                 float_status *s)
2741 {
2742     FloatParts64 p;
2743 
2744     float16_unpack_canonical(&p, a, s);
2745     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2746 }
2747 
2748 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2749                                   float_status *s)
2750 {
2751     FloatParts64 p;
2752 
2753     float16_unpack_canonical(&p, a, s);
2754     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2755 }
2756 
2757 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2758                                   float_status *s)
2759 {
2760     FloatParts64 p;
2761 
2762     float16_unpack_canonical(&p, a, s);
2763     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2764 }
2765 
2766 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2767                                   float_status *s)
2768 {
2769     FloatParts64 p;
2770 
2771     float16_unpack_canonical(&p, a, s);
2772     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2773 }
2774 
2775 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2776                                   float_status *s)
2777 {
2778     FloatParts64 p;
2779 
2780     float32_unpack_canonical(&p, a, s);
2781     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2782 }
2783 
2784 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2785                                   float_status *s)
2786 {
2787     FloatParts64 p;
2788 
2789     float32_unpack_canonical(&p, a, s);
2790     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2791 }
2792 
2793 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2794                                   float_status *s)
2795 {
2796     FloatParts64 p;
2797 
2798     float32_unpack_canonical(&p, a, s);
2799     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2800 }
2801 
2802 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2803                                   float_status *s)
2804 {
2805     FloatParts64 p;
2806 
2807     float64_unpack_canonical(&p, a, s);
2808     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2809 }
2810 
2811 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2812                                   float_status *s)
2813 {
2814     FloatParts64 p;
2815 
2816     float64_unpack_canonical(&p, a, s);
2817     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2818 }
2819 
2820 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2821                                   float_status *s)
2822 {
2823     FloatParts64 p;
2824 
2825     float64_unpack_canonical(&p, a, s);
2826     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2827 }
2828 
2829 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2830                                    int scale, float_status *s)
2831 {
2832     FloatParts64 p;
2833 
2834     bfloat16_unpack_canonical(&p, a, s);
2835     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2836 }
2837 
2838 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2839                                    int scale, float_status *s)
2840 {
2841     FloatParts64 p;
2842 
2843     bfloat16_unpack_canonical(&p, a, s);
2844     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2845 }
2846 
2847 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2848                                    int scale, float_status *s)
2849 {
2850     FloatParts64 p;
2851 
2852     bfloat16_unpack_canonical(&p, a, s);
2853     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2854 }
2855 
2856 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2857                                           int scale, float_status *s)
2858 {
2859     FloatParts128 p;
2860 
2861     float128_unpack_canonical(&p, a, s);
2862     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2863 }
2864 
2865 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2866                                           int scale, float_status *s)
2867 {
2868     FloatParts128 p;
2869 
2870     float128_unpack_canonical(&p, a, s);
2871     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2872 }
2873 
2874 uint8_t float16_to_uint8(float16 a, float_status *s)
2875 {
2876     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2877 }
2878 
2879 uint16_t float16_to_uint16(float16 a, float_status *s)
2880 {
2881     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2882 }
2883 
2884 uint32_t float16_to_uint32(float16 a, float_status *s)
2885 {
2886     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2887 }
2888 
2889 uint64_t float16_to_uint64(float16 a, float_status *s)
2890 {
2891     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2892 }
2893 
2894 uint16_t float32_to_uint16(float32 a, float_status *s)
2895 {
2896     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2897 }
2898 
2899 uint32_t float32_to_uint32(float32 a, float_status *s)
2900 {
2901     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2902 }
2903 
2904 uint64_t float32_to_uint64(float32 a, float_status *s)
2905 {
2906     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2907 }
2908 
2909 uint16_t float64_to_uint16(float64 a, float_status *s)
2910 {
2911     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2912 }
2913 
2914 uint32_t float64_to_uint32(float64 a, float_status *s)
2915 {
2916     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2917 }
2918 
2919 uint64_t float64_to_uint64(float64 a, float_status *s)
2920 {
2921     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2922 }
2923 
2924 uint32_t float128_to_uint32(float128 a, float_status *s)
2925 {
2926     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2927 }
2928 
2929 uint64_t float128_to_uint64(float128 a, float_status *s)
2930 {
2931     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2932 }
2933 
2934 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2935 {
2936     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2937 }
2938 
2939 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2940 {
2941     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2942 }
2943 
2944 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2945 {
2946     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2947 }
2948 
2949 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2950 {
2951     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2952 }
2953 
2954 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2955 {
2956     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2957 }
2958 
2959 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2960 {
2961     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2962 }
2963 
2964 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2965 {
2966     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2967 }
2968 
2969 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2970 {
2971     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2972 }
2973 
2974 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2975 {
2976     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2977 }
2978 
2979 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2980 {
2981     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2982 }
2983 
2984 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2985 {
2986     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2987 }
2988 
2989 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2990 {
2991     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2992 }
2993 
2994 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2995 {
2996     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2997 }
2998 
2999 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3000 {
3001     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3002 }
3003 
3004 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3005 {
3006     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3007 }
3008 
3009 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3010 {
3011     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3012 }
3013 
3014 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3015 {
3016     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3017 }
3018 
3019 /*
3020  * Signed integer to floating-point conversions
3021  */
3022 
3023 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3024 {
3025     FloatParts64 p;
3026 
3027     parts_sint_to_float(&p, a, scale, status);
3028     return float16_round_pack_canonical(&p, status);
3029 }
3030 
3031 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3032 {
3033     return int64_to_float16_scalbn(a, scale, status);
3034 }
3035 
3036 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3037 {
3038     return int64_to_float16_scalbn(a, scale, status);
3039 }
3040 
3041 float16 int64_to_float16(int64_t a, float_status *status)
3042 {
3043     return int64_to_float16_scalbn(a, 0, status);
3044 }
3045 
3046 float16 int32_to_float16(int32_t a, float_status *status)
3047 {
3048     return int64_to_float16_scalbn(a, 0, status);
3049 }
3050 
3051 float16 int16_to_float16(int16_t a, float_status *status)
3052 {
3053     return int64_to_float16_scalbn(a, 0, status);
3054 }
3055 
3056 float16 int8_to_float16(int8_t a, float_status *status)
3057 {
3058     return int64_to_float16_scalbn(a, 0, status);
3059 }
3060 
3061 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3062 {
3063     FloatParts64 p;
3064 
3065     parts64_sint_to_float(&p, a, scale, status);
3066     return float32_round_pack_canonical(&p, status);
3067 }
3068 
3069 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3070 {
3071     return int64_to_float32_scalbn(a, scale, status);
3072 }
3073 
3074 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3075 {
3076     return int64_to_float32_scalbn(a, scale, status);
3077 }
3078 
3079 float32 int64_to_float32(int64_t a, float_status *status)
3080 {
3081     return int64_to_float32_scalbn(a, 0, status);
3082 }
3083 
3084 float32 int32_to_float32(int32_t a, float_status *status)
3085 {
3086     return int64_to_float32_scalbn(a, 0, status);
3087 }
3088 
3089 float32 int16_to_float32(int16_t a, float_status *status)
3090 {
3091     return int64_to_float32_scalbn(a, 0, status);
3092 }
3093 
3094 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3095 {
3096     FloatParts64 p;
3097 
3098     parts_sint_to_float(&p, a, scale, status);
3099     return float64_round_pack_canonical(&p, status);
3100 }
3101 
3102 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3103 {
3104     return int64_to_float64_scalbn(a, scale, status);
3105 }
3106 
3107 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3108 {
3109     return int64_to_float64_scalbn(a, scale, status);
3110 }
3111 
3112 float64 int64_to_float64(int64_t a, float_status *status)
3113 {
3114     return int64_to_float64_scalbn(a, 0, status);
3115 }
3116 
3117 float64 int32_to_float64(int32_t a, float_status *status)
3118 {
3119     return int64_to_float64_scalbn(a, 0, status);
3120 }
3121 
3122 float64 int16_to_float64(int16_t a, float_status *status)
3123 {
3124     return int64_to_float64_scalbn(a, 0, status);
3125 }
3126 
3127 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3128 {
3129     FloatParts64 p;
3130 
3131     parts_sint_to_float(&p, a, scale, status);
3132     return bfloat16_round_pack_canonical(&p, status);
3133 }
3134 
3135 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3136 {
3137     return int64_to_bfloat16_scalbn(a, scale, status);
3138 }
3139 
3140 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3141 {
3142     return int64_to_bfloat16_scalbn(a, scale, status);
3143 }
3144 
3145 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3146 {
3147     return int64_to_bfloat16_scalbn(a, 0, status);
3148 }
3149 
3150 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3151 {
3152     return int64_to_bfloat16_scalbn(a, 0, status);
3153 }
3154 
3155 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3156 {
3157     return int64_to_bfloat16_scalbn(a, 0, status);
3158 }
3159 
3160 float128 int64_to_float128(int64_t a, float_status *status)
3161 {
3162     FloatParts128 p;
3163 
3164     parts_sint_to_float(&p, a, 0, status);
3165     return float128_round_pack_canonical(&p, status);
3166 }
3167 
3168 float128 int32_to_float128(int32_t a, float_status *status)
3169 {
3170     return int64_to_float128(a, status);
3171 }
3172 
3173 /*
3174  * Unsigned Integer to floating-point conversions
3175  */
3176 
3177 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3178 {
3179     FloatParts64 p;
3180 
3181     parts_uint_to_float(&p, a, scale, status);
3182     return float16_round_pack_canonical(&p, status);
3183 }
3184 
3185 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3186 {
3187     return uint64_to_float16_scalbn(a, scale, status);
3188 }
3189 
3190 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3191 {
3192     return uint64_to_float16_scalbn(a, scale, status);
3193 }
3194 
3195 float16 uint64_to_float16(uint64_t a, float_status *status)
3196 {
3197     return uint64_to_float16_scalbn(a, 0, status);
3198 }
3199 
3200 float16 uint32_to_float16(uint32_t a, float_status *status)
3201 {
3202     return uint64_to_float16_scalbn(a, 0, status);
3203 }
3204 
3205 float16 uint16_to_float16(uint16_t a, float_status *status)
3206 {
3207     return uint64_to_float16_scalbn(a, 0, status);
3208 }
3209 
3210 float16 uint8_to_float16(uint8_t a, float_status *status)
3211 {
3212     return uint64_to_float16_scalbn(a, 0, status);
3213 }
3214 
3215 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3216 {
3217     FloatParts64 p;
3218 
3219     parts_uint_to_float(&p, a, scale, status);
3220     return float32_round_pack_canonical(&p, status);
3221 }
3222 
3223 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3224 {
3225     return uint64_to_float32_scalbn(a, scale, status);
3226 }
3227 
3228 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3229 {
3230     return uint64_to_float32_scalbn(a, scale, status);
3231 }
3232 
3233 float32 uint64_to_float32(uint64_t a, float_status *status)
3234 {
3235     return uint64_to_float32_scalbn(a, 0, status);
3236 }
3237 
3238 float32 uint32_to_float32(uint32_t a, float_status *status)
3239 {
3240     return uint64_to_float32_scalbn(a, 0, status);
3241 }
3242 
3243 float32 uint16_to_float32(uint16_t a, float_status *status)
3244 {
3245     return uint64_to_float32_scalbn(a, 0, status);
3246 }
3247 
3248 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3249 {
3250     FloatParts64 p;
3251 
3252     parts_uint_to_float(&p, a, scale, status);
3253     return float64_round_pack_canonical(&p, status);
3254 }
3255 
3256 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3257 {
3258     return uint64_to_float64_scalbn(a, scale, status);
3259 }
3260 
3261 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3262 {
3263     return uint64_to_float64_scalbn(a, scale, status);
3264 }
3265 
3266 float64 uint64_to_float64(uint64_t a, float_status *status)
3267 {
3268     return uint64_to_float64_scalbn(a, 0, status);
3269 }
3270 
3271 float64 uint32_to_float64(uint32_t a, float_status *status)
3272 {
3273     return uint64_to_float64_scalbn(a, 0, status);
3274 }
3275 
3276 float64 uint16_to_float64(uint16_t a, float_status *status)
3277 {
3278     return uint64_to_float64_scalbn(a, 0, status);
3279 }
3280 
3281 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3282 {
3283     FloatParts64 p;
3284 
3285     parts_uint_to_float(&p, a, scale, status);
3286     return bfloat16_round_pack_canonical(&p, status);
3287 }
3288 
3289 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3290 {
3291     return uint64_to_bfloat16_scalbn(a, scale, status);
3292 }
3293 
3294 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3295 {
3296     return uint64_to_bfloat16_scalbn(a, scale, status);
3297 }
3298 
3299 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3300 {
3301     return uint64_to_bfloat16_scalbn(a, 0, status);
3302 }
3303 
3304 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3305 {
3306     return uint64_to_bfloat16_scalbn(a, 0, status);
3307 }
3308 
3309 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3310 {
3311     return uint64_to_bfloat16_scalbn(a, 0, status);
3312 }
3313 
3314 float128 uint64_to_float128(uint64_t a, float_status *status)
3315 {
3316     FloatParts128 p;
3317 
3318     parts_uint_to_float(&p, a, 0, status);
3319     return float128_round_pack_canonical(&p, status);
3320 }
3321 
3322 /*
3323  * Minimum and maximum
3324  */
3325 
3326 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3327 {
3328     FloatParts64 pa, pb, *pr;
3329 
3330     float16_unpack_canonical(&pa, a, s);
3331     float16_unpack_canonical(&pb, b, s);
3332     pr = parts_minmax(&pa, &pb, s, flags);
3333 
3334     return float16_round_pack_canonical(pr, s);
3335 }
3336 
3337 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3338                                 float_status *s, int flags)
3339 {
3340     FloatParts64 pa, pb, *pr;
3341 
3342     bfloat16_unpack_canonical(&pa, a, s);
3343     bfloat16_unpack_canonical(&pb, b, s);
3344     pr = parts_minmax(&pa, &pb, s, flags);
3345 
3346     return bfloat16_round_pack_canonical(pr, s);
3347 }
3348 
3349 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3350 {
3351     FloatParts64 pa, pb, *pr;
3352 
3353     float32_unpack_canonical(&pa, a, s);
3354     float32_unpack_canonical(&pb, b, s);
3355     pr = parts_minmax(&pa, &pb, s, flags);
3356 
3357     return float32_round_pack_canonical(pr, s);
3358 }
3359 
3360 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3361 {
3362     FloatParts64 pa, pb, *pr;
3363 
3364     float64_unpack_canonical(&pa, a, s);
3365     float64_unpack_canonical(&pb, b, s);
3366     pr = parts_minmax(&pa, &pb, s, flags);
3367 
3368     return float64_round_pack_canonical(pr, s);
3369 }
3370 
3371 static float128 float128_minmax(float128 a, float128 b,
3372                                 float_status *s, int flags)
3373 {
3374     FloatParts128 pa, pb, *pr;
3375 
3376     float128_unpack_canonical(&pa, a, s);
3377     float128_unpack_canonical(&pb, b, s);
3378     pr = parts_minmax(&pa, &pb, s, flags);
3379 
3380     return float128_round_pack_canonical(pr, s);
3381 }
3382 
3383 #define MINMAX_1(type, name, flags) \
3384     type type##_##name(type a, type b, float_status *s) \
3385     { return type##_minmax(a, b, s, flags); }
3386 
3387 #define MINMAX_2(type) \
3388     MINMAX_1(type, max, 0)                                      \
3389     MINMAX_1(type, maxnum, minmax_isnum)                        \
3390     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3391     MINMAX_1(type, min, minmax_ismin)                           \
3392     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3393     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3394 
3395 MINMAX_2(float16)
3396 MINMAX_2(bfloat16)
3397 MINMAX_2(float32)
3398 MINMAX_2(float64)
3399 MINMAX_2(float128)
3400 
3401 #undef MINMAX_1
3402 #undef MINMAX_2
3403 
3404 /*
3405  * Floating point compare
3406  */
3407 
3408 static FloatRelation QEMU_FLATTEN
3409 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3410 {
3411     FloatParts64 pa, pb;
3412 
3413     float16_unpack_canonical(&pa, a, s);
3414     float16_unpack_canonical(&pb, b, s);
3415     return parts_compare(&pa, &pb, s, is_quiet);
3416 }
3417 
3418 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3419 {
3420     return float16_do_compare(a, b, s, false);
3421 }
3422 
3423 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3424 {
3425     return float16_do_compare(a, b, s, true);
3426 }
3427 
3428 static FloatRelation QEMU_SOFTFLOAT_ATTR
3429 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3430 {
3431     FloatParts64 pa, pb;
3432 
3433     float32_unpack_canonical(&pa, a, s);
3434     float32_unpack_canonical(&pb, b, s);
3435     return parts_compare(&pa, &pb, s, is_quiet);
3436 }
3437 
3438 static FloatRelation QEMU_FLATTEN
3439 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3440 {
3441     union_float32 ua, ub;
3442 
3443     ua.s = xa;
3444     ub.s = xb;
3445 
3446     if (QEMU_NO_HARDFLOAT) {
3447         goto soft;
3448     }
3449 
3450     float32_input_flush2(&ua.s, &ub.s, s);
3451     if (isgreaterequal(ua.h, ub.h)) {
3452         if (isgreater(ua.h, ub.h)) {
3453             return float_relation_greater;
3454         }
3455         return float_relation_equal;
3456     }
3457     if (likely(isless(ua.h, ub.h))) {
3458         return float_relation_less;
3459     }
3460     /*
3461      * The only condition remaining is unordered.
3462      * Fall through to set flags.
3463      */
3464  soft:
3465     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3466 }
3467 
3468 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3469 {
3470     return float32_hs_compare(a, b, s, false);
3471 }
3472 
3473 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3474 {
3475     return float32_hs_compare(a, b, s, true);
3476 }
3477 
3478 static FloatRelation QEMU_SOFTFLOAT_ATTR
3479 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3480 {
3481     FloatParts64 pa, pb;
3482 
3483     float64_unpack_canonical(&pa, a, s);
3484     float64_unpack_canonical(&pb, b, s);
3485     return parts_compare(&pa, &pb, s, is_quiet);
3486 }
3487 
3488 static FloatRelation QEMU_FLATTEN
3489 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3490 {
3491     union_float64 ua, ub;
3492 
3493     ua.s = xa;
3494     ub.s = xb;
3495 
3496     if (QEMU_NO_HARDFLOAT) {
3497         goto soft;
3498     }
3499 
3500     float64_input_flush2(&ua.s, &ub.s, s);
3501     if (isgreaterequal(ua.h, ub.h)) {
3502         if (isgreater(ua.h, ub.h)) {
3503             return float_relation_greater;
3504         }
3505         return float_relation_equal;
3506     }
3507     if (likely(isless(ua.h, ub.h))) {
3508         return float_relation_less;
3509     }
3510     /*
3511      * The only condition remaining is unordered.
3512      * Fall through to set flags.
3513      */
3514  soft:
3515     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3516 }
3517 
3518 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3519 {
3520     return float64_hs_compare(a, b, s, false);
3521 }
3522 
3523 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3524 {
3525     return float64_hs_compare(a, b, s, true);
3526 }
3527 
3528 static FloatRelation QEMU_FLATTEN
3529 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3530 {
3531     FloatParts64 pa, pb;
3532 
3533     bfloat16_unpack_canonical(&pa, a, s);
3534     bfloat16_unpack_canonical(&pb, b, s);
3535     return parts_compare(&pa, &pb, s, is_quiet);
3536 }
3537 
3538 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3539 {
3540     return bfloat16_do_compare(a, b, s, false);
3541 }
3542 
3543 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3544 {
3545     return bfloat16_do_compare(a, b, s, true);
3546 }
3547 
3548 static FloatRelation QEMU_FLATTEN
3549 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3550 {
3551     FloatParts128 pa, pb;
3552 
3553     float128_unpack_canonical(&pa, a, s);
3554     float128_unpack_canonical(&pb, b, s);
3555     return parts_compare(&pa, &pb, s, is_quiet);
3556 }
3557 
3558 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3559 {
3560     return float128_do_compare(a, b, s, false);
3561 }
3562 
3563 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3564 {
3565     return float128_do_compare(a, b, s, true);
3566 }
3567 
3568 /*
3569  * Scale by 2**N
3570  */
3571 
3572 float16 float16_scalbn(float16 a, int n, float_status *status)
3573 {
3574     FloatParts64 p;
3575 
3576     float16_unpack_canonical(&p, a, status);
3577     parts_scalbn(&p, n, status);
3578     return float16_round_pack_canonical(&p, status);
3579 }
3580 
3581 float32 float32_scalbn(float32 a, int n, float_status *status)
3582 {
3583     FloatParts64 p;
3584 
3585     float32_unpack_canonical(&p, a, status);
3586     parts_scalbn(&p, n, status);
3587     return float32_round_pack_canonical(&p, status);
3588 }
3589 
3590 float64 float64_scalbn(float64 a, int n, float_status *status)
3591 {
3592     FloatParts64 p;
3593 
3594     float64_unpack_canonical(&p, a, status);
3595     parts_scalbn(&p, n, status);
3596     return float64_round_pack_canonical(&p, status);
3597 }
3598 
3599 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3600 {
3601     FloatParts64 p;
3602 
3603     bfloat16_unpack_canonical(&p, a, status);
3604     parts_scalbn(&p, n, status);
3605     return bfloat16_round_pack_canonical(&p, status);
3606 }
3607 
3608 float128 float128_scalbn(float128 a, int n, float_status *status)
3609 {
3610     FloatParts128 p;
3611 
3612     float128_unpack_canonical(&p, a, status);
3613     parts_scalbn(&p, n, status);
3614     return float128_round_pack_canonical(&p, status);
3615 }
3616 
3617 /*
3618  * Square Root
3619  */
3620 
3621 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3622 {
3623     FloatParts64 p;
3624 
3625     float16_unpack_canonical(&p, a, status);
3626     parts_sqrt(&p, status, &float16_params);
3627     return float16_round_pack_canonical(&p, status);
3628 }
3629 
3630 static float32 QEMU_SOFTFLOAT_ATTR
3631 soft_f32_sqrt(float32 a, float_status *status)
3632 {
3633     FloatParts64 p;
3634 
3635     float32_unpack_canonical(&p, a, status);
3636     parts_sqrt(&p, status, &float32_params);
3637     return float32_round_pack_canonical(&p, status);
3638 }
3639 
3640 static float64 QEMU_SOFTFLOAT_ATTR
3641 soft_f64_sqrt(float64 a, float_status *status)
3642 {
3643     FloatParts64 p;
3644 
3645     float64_unpack_canonical(&p, a, status);
3646     parts_sqrt(&p, status, &float64_params);
3647     return float64_round_pack_canonical(&p, status);
3648 }
3649 
3650 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3651 {
3652     union_float32 ua, ur;
3653 
3654     ua.s = xa;
3655     if (unlikely(!can_use_fpu(s))) {
3656         goto soft;
3657     }
3658 
3659     float32_input_flush1(&ua.s, s);
3660     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3661         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3662                        fpclassify(ua.h) == FP_ZERO) ||
3663                      signbit(ua.h))) {
3664             goto soft;
3665         }
3666     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3667                         float32_is_neg(ua.s))) {
3668         goto soft;
3669     }
3670     ur.h = sqrtf(ua.h);
3671     return ur.s;
3672 
3673  soft:
3674     return soft_f32_sqrt(ua.s, s);
3675 }
3676 
3677 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3678 {
3679     union_float64 ua, ur;
3680 
3681     ua.s = xa;
3682     if (unlikely(!can_use_fpu(s))) {
3683         goto soft;
3684     }
3685 
3686     float64_input_flush1(&ua.s, s);
3687     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3688         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3689                        fpclassify(ua.h) == FP_ZERO) ||
3690                      signbit(ua.h))) {
3691             goto soft;
3692         }
3693     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3694                         float64_is_neg(ua.s))) {
3695         goto soft;
3696     }
3697     ur.h = sqrt(ua.h);
3698     return ur.s;
3699 
3700  soft:
3701     return soft_f64_sqrt(ua.s, s);
3702 }
3703 
3704 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3705 {
3706     FloatParts64 p;
3707 
3708     bfloat16_unpack_canonical(&p, a, status);
3709     parts_sqrt(&p, status, &bfloat16_params);
3710     return bfloat16_round_pack_canonical(&p, status);
3711 }
3712 
3713 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3714 {
3715     FloatParts128 p;
3716 
3717     float128_unpack_canonical(&p, a, status);
3718     parts_sqrt(&p, status, &float128_params);
3719     return float128_round_pack_canonical(&p, status);
3720 }
3721 
3722 /*----------------------------------------------------------------------------
3723 | The pattern for a default generated NaN.
3724 *----------------------------------------------------------------------------*/
3725 
3726 float16 float16_default_nan(float_status *status)
3727 {
3728     FloatParts64 p;
3729 
3730     parts_default_nan(&p, status);
3731     p.frac >>= float16_params.frac_shift;
3732     return float16_pack_raw(&p);
3733 }
3734 
3735 float32 float32_default_nan(float_status *status)
3736 {
3737     FloatParts64 p;
3738 
3739     parts_default_nan(&p, status);
3740     p.frac >>= float32_params.frac_shift;
3741     return float32_pack_raw(&p);
3742 }
3743 
3744 float64 float64_default_nan(float_status *status)
3745 {
3746     FloatParts64 p;
3747 
3748     parts_default_nan(&p, status);
3749     p.frac >>= float64_params.frac_shift;
3750     return float64_pack_raw(&p);
3751 }
3752 
3753 float128 float128_default_nan(float_status *status)
3754 {
3755     FloatParts128 p;
3756 
3757     parts_default_nan(&p, status);
3758     frac_shr(&p, float128_params.frac_shift);
3759     return float128_pack_raw(&p);
3760 }
3761 
3762 bfloat16 bfloat16_default_nan(float_status *status)
3763 {
3764     FloatParts64 p;
3765 
3766     parts_default_nan(&p, status);
3767     p.frac >>= bfloat16_params.frac_shift;
3768     return bfloat16_pack_raw(&p);
3769 }
3770 
3771 /*----------------------------------------------------------------------------
3772 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3773 *----------------------------------------------------------------------------*/
3774 
3775 float16 float16_silence_nan(float16 a, float_status *status)
3776 {
3777     FloatParts64 p;
3778 
3779     float16_unpack_raw(&p, a);
3780     p.frac <<= float16_params.frac_shift;
3781     parts_silence_nan(&p, status);
3782     p.frac >>= float16_params.frac_shift;
3783     return float16_pack_raw(&p);
3784 }
3785 
3786 float32 float32_silence_nan(float32 a, float_status *status)
3787 {
3788     FloatParts64 p;
3789 
3790     float32_unpack_raw(&p, a);
3791     p.frac <<= float32_params.frac_shift;
3792     parts_silence_nan(&p, status);
3793     p.frac >>= float32_params.frac_shift;
3794     return float32_pack_raw(&p);
3795 }
3796 
3797 float64 float64_silence_nan(float64 a, float_status *status)
3798 {
3799     FloatParts64 p;
3800 
3801     float64_unpack_raw(&p, a);
3802     p.frac <<= float64_params.frac_shift;
3803     parts_silence_nan(&p, status);
3804     p.frac >>= float64_params.frac_shift;
3805     return float64_pack_raw(&p);
3806 }
3807 
3808 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3809 {
3810     FloatParts64 p;
3811 
3812     bfloat16_unpack_raw(&p, a);
3813     p.frac <<= bfloat16_params.frac_shift;
3814     parts_silence_nan(&p, status);
3815     p.frac >>= bfloat16_params.frac_shift;
3816     return bfloat16_pack_raw(&p);
3817 }
3818 
3819 float128 float128_silence_nan(float128 a, float_status *status)
3820 {
3821     FloatParts128 p;
3822 
3823     float128_unpack_raw(&p, a);
3824     frac_shl(&p, float128_params.frac_shift);
3825     parts_silence_nan(&p, status);
3826     frac_shr(&p, float128_params.frac_shift);
3827     return float128_pack_raw(&p);
3828 }
3829 
3830 /*----------------------------------------------------------------------------
3831 | If `a' is denormal and we are in flush-to-zero mode then set the
3832 | input-denormal exception and return zero. Otherwise just return the value.
3833 *----------------------------------------------------------------------------*/
3834 
3835 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3836 {
3837     if (p.exp == 0 && p.frac != 0) {
3838         float_raise(float_flag_input_denormal, status);
3839         return true;
3840     }
3841 
3842     return false;
3843 }
3844 
3845 float16 float16_squash_input_denormal(float16 a, float_status *status)
3846 {
3847     if (status->flush_inputs_to_zero) {
3848         FloatParts64 p;
3849 
3850         float16_unpack_raw(&p, a);
3851         if (parts_squash_denormal(p, status)) {
3852             return float16_set_sign(float16_zero, p.sign);
3853         }
3854     }
3855     return a;
3856 }
3857 
3858 float32 float32_squash_input_denormal(float32 a, float_status *status)
3859 {
3860     if (status->flush_inputs_to_zero) {
3861         FloatParts64 p;
3862 
3863         float32_unpack_raw(&p, a);
3864         if (parts_squash_denormal(p, status)) {
3865             return float32_set_sign(float32_zero, p.sign);
3866         }
3867     }
3868     return a;
3869 }
3870 
3871 float64 float64_squash_input_denormal(float64 a, float_status *status)
3872 {
3873     if (status->flush_inputs_to_zero) {
3874         FloatParts64 p;
3875 
3876         float64_unpack_raw(&p, a);
3877         if (parts_squash_denormal(p, status)) {
3878             return float64_set_sign(float64_zero, p.sign);
3879         }
3880     }
3881     return a;
3882 }
3883 
3884 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3885 {
3886     if (status->flush_inputs_to_zero) {
3887         FloatParts64 p;
3888 
3889         bfloat16_unpack_raw(&p, a);
3890         if (parts_squash_denormal(p, status)) {
3891             return bfloat16_set_sign(bfloat16_zero, p.sign);
3892         }
3893     }
3894     return a;
3895 }
3896 
3897 /*----------------------------------------------------------------------------
3898 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3899 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3900 | input.  If `zSign' is 1, the input is negated before being converted to an
3901 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3902 | is simply rounded to an integer, with the inexact exception raised if the
3903 | input cannot be represented exactly as an integer.  However, if the fixed-
3904 | point input is too large, the invalid exception is raised and the largest
3905 | positive or negative integer is returned.
3906 *----------------------------------------------------------------------------*/
3907 
3908 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3909                                  float_status *status)
3910 {
3911     int8_t roundingMode;
3912     bool roundNearestEven;
3913     int8_t roundIncrement, roundBits;
3914     int32_t z;
3915 
3916     roundingMode = status->float_rounding_mode;
3917     roundNearestEven = ( roundingMode == float_round_nearest_even );
3918     switch (roundingMode) {
3919     case float_round_nearest_even:
3920     case float_round_ties_away:
3921         roundIncrement = 0x40;
3922         break;
3923     case float_round_to_zero:
3924         roundIncrement = 0;
3925         break;
3926     case float_round_up:
3927         roundIncrement = zSign ? 0 : 0x7f;
3928         break;
3929     case float_round_down:
3930         roundIncrement = zSign ? 0x7f : 0;
3931         break;
3932     case float_round_to_odd:
3933         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3934         break;
3935     default:
3936         abort();
3937     }
3938     roundBits = absZ & 0x7F;
3939     absZ = ( absZ + roundIncrement )>>7;
3940     if (!(roundBits ^ 0x40) && roundNearestEven) {
3941         absZ &= ~1;
3942     }
3943     z = absZ;
3944     if ( zSign ) z = - z;
3945     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3946         float_raise(float_flag_invalid, status);
3947         return zSign ? INT32_MIN : INT32_MAX;
3948     }
3949     if (roundBits) {
3950         float_raise(float_flag_inexact, status);
3951     }
3952     return z;
3953 
3954 }
3955 
3956 /*----------------------------------------------------------------------------
3957 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3958 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3959 | and returns the properly rounded 64-bit integer corresponding to the input.
3960 | If `zSign' is 1, the input is negated before being converted to an integer.
3961 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3962 | the inexact exception raised if the input cannot be represented exactly as
3963 | an integer.  However, if the fixed-point input is too large, the invalid
3964 | exception is raised and the largest positive or negative integer is
3965 | returned.
3966 *----------------------------------------------------------------------------*/
3967 
3968 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3969                                float_status *status)
3970 {
3971     int8_t roundingMode;
3972     bool roundNearestEven, increment;
3973     int64_t z;
3974 
3975     roundingMode = status->float_rounding_mode;
3976     roundNearestEven = ( roundingMode == float_round_nearest_even );
3977     switch (roundingMode) {
3978     case float_round_nearest_even:
3979     case float_round_ties_away:
3980         increment = ((int64_t) absZ1 < 0);
3981         break;
3982     case float_round_to_zero:
3983         increment = 0;
3984         break;
3985     case float_round_up:
3986         increment = !zSign && absZ1;
3987         break;
3988     case float_round_down:
3989         increment = zSign && absZ1;
3990         break;
3991     case float_round_to_odd:
3992         increment = !(absZ0 & 1) && absZ1;
3993         break;
3994     default:
3995         abort();
3996     }
3997     if ( increment ) {
3998         ++absZ0;
3999         if ( absZ0 == 0 ) goto overflow;
4000         if (!(absZ1 << 1) && roundNearestEven) {
4001             absZ0 &= ~1;
4002         }
4003     }
4004     z = absZ0;
4005     if ( zSign ) z = - z;
4006     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4007  overflow:
4008         float_raise(float_flag_invalid, status);
4009         return zSign ? INT64_MIN : INT64_MAX;
4010     }
4011     if (absZ1) {
4012         float_raise(float_flag_inexact, status);
4013     }
4014     return z;
4015 
4016 }
4017 
4018 /*----------------------------------------------------------------------------
4019 | Normalizes the subnormal single-precision floating-point value represented
4020 | by the denormalized significand `aSig'.  The normalized exponent and
4021 | significand are stored at the locations pointed to by `zExpPtr' and
4022 | `zSigPtr', respectively.
4023 *----------------------------------------------------------------------------*/
4024 
4025 static void
4026  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4027 {
4028     int8_t shiftCount;
4029 
4030     shiftCount = clz32(aSig) - 8;
4031     *zSigPtr = aSig<<shiftCount;
4032     *zExpPtr = 1 - shiftCount;
4033 
4034 }
4035 
4036 /*----------------------------------------------------------------------------
4037 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4038 | and significand `zSig', and returns the proper single-precision floating-
4039 | point value corresponding to the abstract input.  Ordinarily, the abstract
4040 | value is simply rounded and packed into the single-precision format, with
4041 | the inexact exception raised if the abstract input cannot be represented
4042 | exactly.  However, if the abstract value is too large, the overflow and
4043 | inexact exceptions are raised and an infinity or maximal finite value is
4044 | returned.  If the abstract value is too small, the input value is rounded to
4045 | a subnormal number, and the underflow and inexact exceptions are raised if
4046 | the abstract input cannot be represented exactly as a subnormal single-
4047 | precision floating-point number.
4048 |     The input significand `zSig' has its binary point between bits 30
4049 | and 29, which is 7 bits to the left of the usual location.  This shifted
4050 | significand must be normalized or smaller.  If `zSig' is not normalized,
4051 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4052 | and it must not require rounding.  In the usual case that `zSig' is
4053 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4054 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4055 | Binary Floating-Point Arithmetic.
4056 *----------------------------------------------------------------------------*/
4057 
4058 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4059                                    float_status *status)
4060 {
4061     int8_t roundingMode;
4062     bool roundNearestEven;
4063     int8_t roundIncrement, roundBits;
4064     bool isTiny;
4065 
4066     roundingMode = status->float_rounding_mode;
4067     roundNearestEven = ( roundingMode == float_round_nearest_even );
4068     switch (roundingMode) {
4069     case float_round_nearest_even:
4070     case float_round_ties_away:
4071         roundIncrement = 0x40;
4072         break;
4073     case float_round_to_zero:
4074         roundIncrement = 0;
4075         break;
4076     case float_round_up:
4077         roundIncrement = zSign ? 0 : 0x7f;
4078         break;
4079     case float_round_down:
4080         roundIncrement = zSign ? 0x7f : 0;
4081         break;
4082     case float_round_to_odd:
4083         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4084         break;
4085     default:
4086         abort();
4087         break;
4088     }
4089     roundBits = zSig & 0x7F;
4090     if ( 0xFD <= (uint16_t) zExp ) {
4091         if (    ( 0xFD < zExp )
4092              || (    ( zExp == 0xFD )
4093                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4094            ) {
4095             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4096                                    roundIncrement != 0;
4097             float_raise(float_flag_overflow | float_flag_inexact, status);
4098             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4099         }
4100         if ( zExp < 0 ) {
4101             if (status->flush_to_zero) {
4102                 float_raise(float_flag_output_denormal, status);
4103                 return packFloat32(zSign, 0, 0);
4104             }
4105             isTiny = status->tininess_before_rounding
4106                   || (zExp < -1)
4107                   || (zSig + roundIncrement < 0x80000000);
4108             shift32RightJamming( zSig, - zExp, &zSig );
4109             zExp = 0;
4110             roundBits = zSig & 0x7F;
4111             if (isTiny && roundBits) {
4112                 float_raise(float_flag_underflow, status);
4113             }
4114             if (roundingMode == float_round_to_odd) {
4115                 /*
4116                  * For round-to-odd case, the roundIncrement depends on
4117                  * zSig which just changed.
4118                  */
4119                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4120             }
4121         }
4122     }
4123     if (roundBits) {
4124         float_raise(float_flag_inexact, status);
4125     }
4126     zSig = ( zSig + roundIncrement )>>7;
4127     if (!(roundBits ^ 0x40) && roundNearestEven) {
4128         zSig &= ~1;
4129     }
4130     if ( zSig == 0 ) zExp = 0;
4131     return packFloat32( zSign, zExp, zSig );
4132 
4133 }
4134 
4135 /*----------------------------------------------------------------------------
4136 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4137 | and significand `zSig', and returns the proper single-precision floating-
4138 | point value corresponding to the abstract input.  This routine is just like
4139 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4140 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4141 | floating-point exponent.
4142 *----------------------------------------------------------------------------*/
4143 
4144 static float32
4145  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4146                               float_status *status)
4147 {
4148     int8_t shiftCount;
4149 
4150     shiftCount = clz32(zSig) - 1;
4151     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4152                                status);
4153 
4154 }
4155 
4156 /*----------------------------------------------------------------------------
4157 | Normalizes the subnormal double-precision floating-point value represented
4158 | by the denormalized significand `aSig'.  The normalized exponent and
4159 | significand are stored at the locations pointed to by `zExpPtr' and
4160 | `zSigPtr', respectively.
4161 *----------------------------------------------------------------------------*/
4162 
4163 static void
4164  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4165 {
4166     int8_t shiftCount;
4167 
4168     shiftCount = clz64(aSig) - 11;
4169     *zSigPtr = aSig<<shiftCount;
4170     *zExpPtr = 1 - shiftCount;
4171 
4172 }
4173 
4174 /*----------------------------------------------------------------------------
4175 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4176 | double-precision floating-point value, returning the result.  After being
4177 | shifted into the proper positions, the three fields are simply added
4178 | together to form the result.  This means that any integer portion of `zSig'
4179 | will be added into the exponent.  Since a properly normalized significand
4180 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4181 | than the desired result exponent whenever `zSig' is a complete, normalized
4182 | significand.
4183 *----------------------------------------------------------------------------*/
4184 
4185 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4186 {
4187 
4188     return make_float64(
4189         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4190 
4191 }
4192 
4193 /*----------------------------------------------------------------------------
4194 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4195 | and significand `zSig', and returns the proper double-precision floating-
4196 | point value corresponding to the abstract input.  Ordinarily, the abstract
4197 | value is simply rounded and packed into the double-precision format, with
4198 | the inexact exception raised if the abstract input cannot be represented
4199 | exactly.  However, if the abstract value is too large, the overflow and
4200 | inexact exceptions are raised and an infinity or maximal finite value is
4201 | returned.  If the abstract value is too small, the input value is rounded to
4202 | a subnormal number, and the underflow and inexact exceptions are raised if
4203 | the abstract input cannot be represented exactly as a subnormal double-
4204 | precision floating-point number.
4205 |     The input significand `zSig' has its binary point between bits 62
4206 | and 61, which is 10 bits to the left of the usual location.  This shifted
4207 | significand must be normalized or smaller.  If `zSig' is not normalized,
4208 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4209 | and it must not require rounding.  In the usual case that `zSig' is
4210 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4211 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4212 | Binary Floating-Point Arithmetic.
4213 *----------------------------------------------------------------------------*/
4214 
4215 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4216                                    float_status *status)
4217 {
4218     int8_t roundingMode;
4219     bool roundNearestEven;
4220     int roundIncrement, roundBits;
4221     bool isTiny;
4222 
4223     roundingMode = status->float_rounding_mode;
4224     roundNearestEven = ( roundingMode == float_round_nearest_even );
4225     switch (roundingMode) {
4226     case float_round_nearest_even:
4227     case float_round_ties_away:
4228         roundIncrement = 0x200;
4229         break;
4230     case float_round_to_zero:
4231         roundIncrement = 0;
4232         break;
4233     case float_round_up:
4234         roundIncrement = zSign ? 0 : 0x3ff;
4235         break;
4236     case float_round_down:
4237         roundIncrement = zSign ? 0x3ff : 0;
4238         break;
4239     case float_round_to_odd:
4240         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4241         break;
4242     default:
4243         abort();
4244     }
4245     roundBits = zSig & 0x3FF;
4246     if ( 0x7FD <= (uint16_t) zExp ) {
4247         if (    ( 0x7FD < zExp )
4248              || (    ( zExp == 0x7FD )
4249                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4250            ) {
4251             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4252                                    roundIncrement != 0;
4253             float_raise(float_flag_overflow | float_flag_inexact, status);
4254             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4255         }
4256         if ( zExp < 0 ) {
4257             if (status->flush_to_zero) {
4258                 float_raise(float_flag_output_denormal, status);
4259                 return packFloat64(zSign, 0, 0);
4260             }
4261             isTiny = status->tininess_before_rounding
4262                   || (zExp < -1)
4263                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4264             shift64RightJamming( zSig, - zExp, &zSig );
4265             zExp = 0;
4266             roundBits = zSig & 0x3FF;
4267             if (isTiny && roundBits) {
4268                 float_raise(float_flag_underflow, status);
4269             }
4270             if (roundingMode == float_round_to_odd) {
4271                 /*
4272                  * For round-to-odd case, the roundIncrement depends on
4273                  * zSig which just changed.
4274                  */
4275                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4276             }
4277         }
4278     }
4279     if (roundBits) {
4280         float_raise(float_flag_inexact, status);
4281     }
4282     zSig = ( zSig + roundIncrement )>>10;
4283     if (!(roundBits ^ 0x200) && roundNearestEven) {
4284         zSig &= ~1;
4285     }
4286     if ( zSig == 0 ) zExp = 0;
4287     return packFloat64( zSign, zExp, zSig );
4288 
4289 }
4290 
4291 /*----------------------------------------------------------------------------
4292 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4293 | and significand `zSig', and returns the proper double-precision floating-
4294 | point value corresponding to the abstract input.  This routine is just like
4295 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4296 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4297 | floating-point exponent.
4298 *----------------------------------------------------------------------------*/
4299 
4300 static float64
4301  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4302                               float_status *status)
4303 {
4304     int8_t shiftCount;
4305 
4306     shiftCount = clz64(zSig) - 1;
4307     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4308                                status);
4309 
4310 }
4311 
4312 /*----------------------------------------------------------------------------
4313 | Normalizes the subnormal extended double-precision floating-point value
4314 | represented by the denormalized significand `aSig'.  The normalized exponent
4315 | and significand are stored at the locations pointed to by `zExpPtr' and
4316 | `zSigPtr', respectively.
4317 *----------------------------------------------------------------------------*/
4318 
4319 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4320                                 uint64_t *zSigPtr)
4321 {
4322     int8_t shiftCount;
4323 
4324     shiftCount = clz64(aSig);
4325     *zSigPtr = aSig<<shiftCount;
4326     *zExpPtr = 1 - shiftCount;
4327 }
4328 
4329 /*----------------------------------------------------------------------------
4330 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4331 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4332 | and returns the proper extended double-precision floating-point value
4333 | corresponding to the abstract input.  Ordinarily, the abstract value is
4334 | rounded and packed into the extended double-precision format, with the
4335 | inexact exception raised if the abstract input cannot be represented
4336 | exactly.  However, if the abstract value is too large, the overflow and
4337 | inexact exceptions are raised and an infinity or maximal finite value is
4338 | returned.  If the abstract value is too small, the input value is rounded to
4339 | a subnormal number, and the underflow and inexact exceptions are raised if
4340 | the abstract input cannot be represented exactly as a subnormal extended
4341 | double-precision floating-point number.
4342 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4343 | number of bits as single or double precision, respectively.  Otherwise, the
4344 | result is rounded to the full precision of the extended double-precision
4345 | format.
4346 |     The input significand must be normalized or smaller.  If the input
4347 | significand is not normalized, `zExp' must be 0; in that case, the result
4348 | returned is a subnormal number, and it must not require rounding.  The
4349 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4350 | Floating-Point Arithmetic.
4351 *----------------------------------------------------------------------------*/
4352 
4353 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4354                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4355                               float_status *status)
4356 {
4357     int8_t roundingMode;
4358     bool roundNearestEven, increment, isTiny;
4359     int64_t roundIncrement, roundMask, roundBits;
4360 
4361     roundingMode = status->float_rounding_mode;
4362     roundNearestEven = ( roundingMode == float_round_nearest_even );
4363     if ( roundingPrecision == 80 ) goto precision80;
4364     if ( roundingPrecision == 64 ) {
4365         roundIncrement = UINT64_C(0x0000000000000400);
4366         roundMask = UINT64_C(0x00000000000007FF);
4367     }
4368     else if ( roundingPrecision == 32 ) {
4369         roundIncrement = UINT64_C(0x0000008000000000);
4370         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4371     }
4372     else {
4373         goto precision80;
4374     }
4375     zSig0 |= ( zSig1 != 0 );
4376     switch (roundingMode) {
4377     case float_round_nearest_even:
4378     case float_round_ties_away:
4379         break;
4380     case float_round_to_zero:
4381         roundIncrement = 0;
4382         break;
4383     case float_round_up:
4384         roundIncrement = zSign ? 0 : roundMask;
4385         break;
4386     case float_round_down:
4387         roundIncrement = zSign ? roundMask : 0;
4388         break;
4389     default:
4390         abort();
4391     }
4392     roundBits = zSig0 & roundMask;
4393     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4394         if (    ( 0x7FFE < zExp )
4395              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4396            ) {
4397             goto overflow;
4398         }
4399         if ( zExp <= 0 ) {
4400             if (status->flush_to_zero) {
4401                 float_raise(float_flag_output_denormal, status);
4402                 return packFloatx80(zSign, 0, 0);
4403             }
4404             isTiny = status->tininess_before_rounding
4405                   || (zExp < 0 )
4406                   || (zSig0 <= zSig0 + roundIncrement);
4407             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4408             zExp = 0;
4409             roundBits = zSig0 & roundMask;
4410             if (isTiny && roundBits) {
4411                 float_raise(float_flag_underflow, status);
4412             }
4413             if (roundBits) {
4414                 float_raise(float_flag_inexact, status);
4415             }
4416             zSig0 += roundIncrement;
4417             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4418             roundIncrement = roundMask + 1;
4419             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4420                 roundMask |= roundIncrement;
4421             }
4422             zSig0 &= ~ roundMask;
4423             return packFloatx80( zSign, zExp, zSig0 );
4424         }
4425     }
4426     if (roundBits) {
4427         float_raise(float_flag_inexact, status);
4428     }
4429     zSig0 += roundIncrement;
4430     if ( zSig0 < roundIncrement ) {
4431         ++zExp;
4432         zSig0 = UINT64_C(0x8000000000000000);
4433     }
4434     roundIncrement = roundMask + 1;
4435     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4436         roundMask |= roundIncrement;
4437     }
4438     zSig0 &= ~ roundMask;
4439     if ( zSig0 == 0 ) zExp = 0;
4440     return packFloatx80( zSign, zExp, zSig0 );
4441  precision80:
4442     switch (roundingMode) {
4443     case float_round_nearest_even:
4444     case float_round_ties_away:
4445         increment = ((int64_t)zSig1 < 0);
4446         break;
4447     case float_round_to_zero:
4448         increment = 0;
4449         break;
4450     case float_round_up:
4451         increment = !zSign && zSig1;
4452         break;
4453     case float_round_down:
4454         increment = zSign && zSig1;
4455         break;
4456     default:
4457         abort();
4458     }
4459     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4460         if (    ( 0x7FFE < zExp )
4461              || (    ( zExp == 0x7FFE )
4462                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4463                   && increment
4464                 )
4465            ) {
4466             roundMask = 0;
4467  overflow:
4468             float_raise(float_flag_overflow | float_flag_inexact, status);
4469             if (    ( roundingMode == float_round_to_zero )
4470                  || ( zSign && ( roundingMode == float_round_up ) )
4471                  || ( ! zSign && ( roundingMode == float_round_down ) )
4472                ) {
4473                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4474             }
4475             return packFloatx80(zSign,
4476                                 floatx80_infinity_high,
4477                                 floatx80_infinity_low);
4478         }
4479         if ( zExp <= 0 ) {
4480             isTiny = status->tininess_before_rounding
4481                   || (zExp < 0)
4482                   || !increment
4483                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4484             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4485             zExp = 0;
4486             if (isTiny && zSig1) {
4487                 float_raise(float_flag_underflow, status);
4488             }
4489             if (zSig1) {
4490                 float_raise(float_flag_inexact, status);
4491             }
4492             switch (roundingMode) {
4493             case float_round_nearest_even:
4494             case float_round_ties_away:
4495                 increment = ((int64_t)zSig1 < 0);
4496                 break;
4497             case float_round_to_zero:
4498                 increment = 0;
4499                 break;
4500             case float_round_up:
4501                 increment = !zSign && zSig1;
4502                 break;
4503             case float_round_down:
4504                 increment = zSign && zSig1;
4505                 break;
4506             default:
4507                 abort();
4508             }
4509             if ( increment ) {
4510                 ++zSig0;
4511                 if (!(zSig1 << 1) && roundNearestEven) {
4512                     zSig0 &= ~1;
4513                 }
4514                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4515             }
4516             return packFloatx80( zSign, zExp, zSig0 );
4517         }
4518     }
4519     if (zSig1) {
4520         float_raise(float_flag_inexact, status);
4521     }
4522     if ( increment ) {
4523         ++zSig0;
4524         if ( zSig0 == 0 ) {
4525             ++zExp;
4526             zSig0 = UINT64_C(0x8000000000000000);
4527         }
4528         else {
4529             if (!(zSig1 << 1) && roundNearestEven) {
4530                 zSig0 &= ~1;
4531             }
4532         }
4533     }
4534     else {
4535         if ( zSig0 == 0 ) zExp = 0;
4536     }
4537     return packFloatx80( zSign, zExp, zSig0 );
4538 
4539 }
4540 
4541 /*----------------------------------------------------------------------------
4542 | Takes an abstract floating-point value having sign `zSign', exponent
4543 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4544 | and returns the proper extended double-precision floating-point value
4545 | corresponding to the abstract input.  This routine is just like
4546 | `roundAndPackFloatx80' except that the input significand does not have to be
4547 | normalized.
4548 *----------------------------------------------------------------------------*/
4549 
4550 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4551                                        bool zSign, int32_t zExp,
4552                                        uint64_t zSig0, uint64_t zSig1,
4553                                        float_status *status)
4554 {
4555     int8_t shiftCount;
4556 
4557     if ( zSig0 == 0 ) {
4558         zSig0 = zSig1;
4559         zSig1 = 0;
4560         zExp -= 64;
4561     }
4562     shiftCount = clz64(zSig0);
4563     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4564     zExp -= shiftCount;
4565     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4566                                 zSig0, zSig1, status);
4567 
4568 }
4569 
4570 /*----------------------------------------------------------------------------
4571 | Returns the least-significant 64 fraction bits of the quadruple-precision
4572 | floating-point value `a'.
4573 *----------------------------------------------------------------------------*/
4574 
4575 static inline uint64_t extractFloat128Frac1( float128 a )
4576 {
4577 
4578     return a.low;
4579 
4580 }
4581 
4582 /*----------------------------------------------------------------------------
4583 | Returns the most-significant 48 fraction bits of the quadruple-precision
4584 | floating-point value `a'.
4585 *----------------------------------------------------------------------------*/
4586 
4587 static inline uint64_t extractFloat128Frac0( float128 a )
4588 {
4589 
4590     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4591 
4592 }
4593 
4594 /*----------------------------------------------------------------------------
4595 | Returns the exponent bits of the quadruple-precision floating-point value
4596 | `a'.
4597 *----------------------------------------------------------------------------*/
4598 
4599 static inline int32_t extractFloat128Exp( float128 a )
4600 {
4601 
4602     return ( a.high>>48 ) & 0x7FFF;
4603 
4604 }
4605 
4606 /*----------------------------------------------------------------------------
4607 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4608 *----------------------------------------------------------------------------*/
4609 
4610 static inline bool extractFloat128Sign(float128 a)
4611 {
4612     return a.high >> 63;
4613 }
4614 
4615 /*----------------------------------------------------------------------------
4616 | Normalizes the subnormal quadruple-precision floating-point value
4617 | represented by the denormalized significand formed by the concatenation of
4618 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4619 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4620 | significand are stored at the location pointed to by `zSig0Ptr', and the
4621 | least significant 64 bits of the normalized significand are stored at the
4622 | location pointed to by `zSig1Ptr'.
4623 *----------------------------------------------------------------------------*/
4624 
4625 static void
4626  normalizeFloat128Subnormal(
4627      uint64_t aSig0,
4628      uint64_t aSig1,
4629      int32_t *zExpPtr,
4630      uint64_t *zSig0Ptr,
4631      uint64_t *zSig1Ptr
4632  )
4633 {
4634     int8_t shiftCount;
4635 
4636     if ( aSig0 == 0 ) {
4637         shiftCount = clz64(aSig1) - 15;
4638         if ( shiftCount < 0 ) {
4639             *zSig0Ptr = aSig1>>( - shiftCount );
4640             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4641         }
4642         else {
4643             *zSig0Ptr = aSig1<<shiftCount;
4644             *zSig1Ptr = 0;
4645         }
4646         *zExpPtr = - shiftCount - 63;
4647     }
4648     else {
4649         shiftCount = clz64(aSig0) - 15;
4650         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4651         *zExpPtr = 1 - shiftCount;
4652     }
4653 
4654 }
4655 
4656 /*----------------------------------------------------------------------------
4657 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4658 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4659 | floating-point value, returning the result.  After being shifted into the
4660 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4661 | added together to form the most significant 32 bits of the result.  This
4662 | means that any integer portion of `zSig0' will be added into the exponent.
4663 | Since a properly normalized significand will have an integer portion equal
4664 | to 1, the `zExp' input should be 1 less than the desired result exponent
4665 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4666 | significand.
4667 *----------------------------------------------------------------------------*/
4668 
4669 static inline float128
4670 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4671 {
4672     float128 z;
4673 
4674     z.low = zSig1;
4675     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4676     return z;
4677 }
4678 
4679 /*----------------------------------------------------------------------------
4680 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4681 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4682 | and `zSig2', and returns the proper quadruple-precision floating-point value
4683 | corresponding to the abstract input.  Ordinarily, the abstract value is
4684 | simply rounded and packed into the quadruple-precision format, with the
4685 | inexact exception raised if the abstract input cannot be represented
4686 | exactly.  However, if the abstract value is too large, the overflow and
4687 | inexact exceptions are raised and an infinity or maximal finite value is
4688 | returned.  If the abstract value is too small, the input value is rounded to
4689 | a subnormal number, and the underflow and inexact exceptions are raised if
4690 | the abstract input cannot be represented exactly as a subnormal quadruple-
4691 | precision floating-point number.
4692 |     The input significand must be normalized or smaller.  If the input
4693 | significand is not normalized, `zExp' must be 0; in that case, the result
4694 | returned is a subnormal number, and it must not require rounding.  In the
4695 | usual case that the input significand is normalized, `zExp' must be 1 less
4696 | than the ``true'' floating-point exponent.  The handling of underflow and
4697 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4698 *----------------------------------------------------------------------------*/
4699 
4700 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4701                                      uint64_t zSig0, uint64_t zSig1,
4702                                      uint64_t zSig2, float_status *status)
4703 {
4704     int8_t roundingMode;
4705     bool roundNearestEven, increment, isTiny;
4706 
4707     roundingMode = status->float_rounding_mode;
4708     roundNearestEven = ( roundingMode == float_round_nearest_even );
4709     switch (roundingMode) {
4710     case float_round_nearest_even:
4711     case float_round_ties_away:
4712         increment = ((int64_t)zSig2 < 0);
4713         break;
4714     case float_round_to_zero:
4715         increment = 0;
4716         break;
4717     case float_round_up:
4718         increment = !zSign && zSig2;
4719         break;
4720     case float_round_down:
4721         increment = zSign && zSig2;
4722         break;
4723     case float_round_to_odd:
4724         increment = !(zSig1 & 0x1) && zSig2;
4725         break;
4726     default:
4727         abort();
4728     }
4729     if ( 0x7FFD <= (uint32_t) zExp ) {
4730         if (    ( 0x7FFD < zExp )
4731              || (    ( zExp == 0x7FFD )
4732                   && eq128(
4733                          UINT64_C(0x0001FFFFFFFFFFFF),
4734                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4735                          zSig0,
4736                          zSig1
4737                      )
4738                   && increment
4739                 )
4740            ) {
4741             float_raise(float_flag_overflow | float_flag_inexact, status);
4742             if (    ( roundingMode == float_round_to_zero )
4743                  || ( zSign && ( roundingMode == float_round_up ) )
4744                  || ( ! zSign && ( roundingMode == float_round_down ) )
4745                  || (roundingMode == float_round_to_odd)
4746                ) {
4747                 return
4748                     packFloat128(
4749                         zSign,
4750                         0x7FFE,
4751                         UINT64_C(0x0000FFFFFFFFFFFF),
4752                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4753                     );
4754             }
4755             return packFloat128( zSign, 0x7FFF, 0, 0 );
4756         }
4757         if ( zExp < 0 ) {
4758             if (status->flush_to_zero) {
4759                 float_raise(float_flag_output_denormal, status);
4760                 return packFloat128(zSign, 0, 0, 0);
4761             }
4762             isTiny = status->tininess_before_rounding
4763                   || (zExp < -1)
4764                   || !increment
4765                   || lt128(zSig0, zSig1,
4766                            UINT64_C(0x0001FFFFFFFFFFFF),
4767                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4768             shift128ExtraRightJamming(
4769                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4770             zExp = 0;
4771             if (isTiny && zSig2) {
4772                 float_raise(float_flag_underflow, status);
4773             }
4774             switch (roundingMode) {
4775             case float_round_nearest_even:
4776             case float_round_ties_away:
4777                 increment = ((int64_t)zSig2 < 0);
4778                 break;
4779             case float_round_to_zero:
4780                 increment = 0;
4781                 break;
4782             case float_round_up:
4783                 increment = !zSign && zSig2;
4784                 break;
4785             case float_round_down:
4786                 increment = zSign && zSig2;
4787                 break;
4788             case float_round_to_odd:
4789                 increment = !(zSig1 & 0x1) && zSig2;
4790                 break;
4791             default:
4792                 abort();
4793             }
4794         }
4795     }
4796     if (zSig2) {
4797         float_raise(float_flag_inexact, status);
4798     }
4799     if ( increment ) {
4800         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4801         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4802             zSig1 &= ~1;
4803         }
4804     }
4805     else {
4806         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4807     }
4808     return packFloat128( zSign, zExp, zSig0, zSig1 );
4809 
4810 }
4811 
4812 /*----------------------------------------------------------------------------
4813 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4814 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4815 | returns the proper quadruple-precision floating-point value corresponding
4816 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4817 | except that the input significand has fewer bits and does not have to be
4818 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4819 | point exponent.
4820 *----------------------------------------------------------------------------*/
4821 
4822 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4823                                               uint64_t zSig0, uint64_t zSig1,
4824                                               float_status *status)
4825 {
4826     int8_t shiftCount;
4827     uint64_t zSig2;
4828 
4829     if ( zSig0 == 0 ) {
4830         zSig0 = zSig1;
4831         zSig1 = 0;
4832         zExp -= 64;
4833     }
4834     shiftCount = clz64(zSig0) - 15;
4835     if ( 0 <= shiftCount ) {
4836         zSig2 = 0;
4837         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4838     }
4839     else {
4840         shift128ExtraRightJamming(
4841             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4842     }
4843     zExp -= shiftCount;
4844     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4845 
4846 }
4847 
4848 
4849 /*----------------------------------------------------------------------------
4850 | Returns the result of converting the 32-bit two's complement integer `a'
4851 | to the extended double-precision floating-point format.  The conversion
4852 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4853 | Arithmetic.
4854 *----------------------------------------------------------------------------*/
4855 
4856 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4857 {
4858     bool zSign;
4859     uint32_t absA;
4860     int8_t shiftCount;
4861     uint64_t zSig;
4862 
4863     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4864     zSign = ( a < 0 );
4865     absA = zSign ? - a : a;
4866     shiftCount = clz32(absA) + 32;
4867     zSig = absA;
4868     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4869 
4870 }
4871 
4872 /*----------------------------------------------------------------------------
4873 | Returns the result of converting the 64-bit two's complement integer `a'
4874 | to the extended double-precision floating-point format.  The conversion
4875 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4876 | Arithmetic.
4877 *----------------------------------------------------------------------------*/
4878 
4879 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4880 {
4881     bool zSign;
4882     uint64_t absA;
4883     int8_t shiftCount;
4884 
4885     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4886     zSign = ( a < 0 );
4887     absA = zSign ? - a : a;
4888     shiftCount = clz64(absA);
4889     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4890 
4891 }
4892 
4893 /*----------------------------------------------------------------------------
4894 | Returns the result of converting the single-precision floating-point value
4895 | `a' to the extended double-precision floating-point format.  The conversion
4896 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4897 | Arithmetic.
4898 *----------------------------------------------------------------------------*/
4899 
4900 floatx80 float32_to_floatx80(float32 a, float_status *status)
4901 {
4902     bool aSign;
4903     int aExp;
4904     uint32_t aSig;
4905 
4906     a = float32_squash_input_denormal(a, status);
4907     aSig = extractFloat32Frac( a );
4908     aExp = extractFloat32Exp( a );
4909     aSign = extractFloat32Sign( a );
4910     if ( aExp == 0xFF ) {
4911         if (aSig) {
4912             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4913                                                status);
4914             return floatx80_silence_nan(res, status);
4915         }
4916         return packFloatx80(aSign,
4917                             floatx80_infinity_high,
4918                             floatx80_infinity_low);
4919     }
4920     if ( aExp == 0 ) {
4921         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4922         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4923     }
4924     aSig |= 0x00800000;
4925     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4926 
4927 }
4928 
4929 /*----------------------------------------------------------------------------
4930 | Returns the remainder of the single-precision floating-point value `a'
4931 | with respect to the corresponding value `b'.  The operation is performed
4932 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4933 *----------------------------------------------------------------------------*/
4934 
4935 float32 float32_rem(float32 a, float32 b, float_status *status)
4936 {
4937     bool aSign, zSign;
4938     int aExp, bExp, expDiff;
4939     uint32_t aSig, bSig;
4940     uint32_t q;
4941     uint64_t aSig64, bSig64, q64;
4942     uint32_t alternateASig;
4943     int32_t sigMean;
4944     a = float32_squash_input_denormal(a, status);
4945     b = float32_squash_input_denormal(b, status);
4946 
4947     aSig = extractFloat32Frac( a );
4948     aExp = extractFloat32Exp( a );
4949     aSign = extractFloat32Sign( a );
4950     bSig = extractFloat32Frac( b );
4951     bExp = extractFloat32Exp( b );
4952     if ( aExp == 0xFF ) {
4953         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4954             return propagateFloat32NaN(a, b, status);
4955         }
4956         float_raise(float_flag_invalid, status);
4957         return float32_default_nan(status);
4958     }
4959     if ( bExp == 0xFF ) {
4960         if (bSig) {
4961             return propagateFloat32NaN(a, b, status);
4962         }
4963         return a;
4964     }
4965     if ( bExp == 0 ) {
4966         if ( bSig == 0 ) {
4967             float_raise(float_flag_invalid, status);
4968             return float32_default_nan(status);
4969         }
4970         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4971     }
4972     if ( aExp == 0 ) {
4973         if ( aSig == 0 ) return a;
4974         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4975     }
4976     expDiff = aExp - bExp;
4977     aSig |= 0x00800000;
4978     bSig |= 0x00800000;
4979     if ( expDiff < 32 ) {
4980         aSig <<= 8;
4981         bSig <<= 8;
4982         if ( expDiff < 0 ) {
4983             if ( expDiff < -1 ) return a;
4984             aSig >>= 1;
4985         }
4986         q = ( bSig <= aSig );
4987         if ( q ) aSig -= bSig;
4988         if ( 0 < expDiff ) {
4989             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
4990             q >>= 32 - expDiff;
4991             bSig >>= 2;
4992             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4993         }
4994         else {
4995             aSig >>= 2;
4996             bSig >>= 2;
4997         }
4998     }
4999     else {
5000         if ( bSig <= aSig ) aSig -= bSig;
5001         aSig64 = ( (uint64_t) aSig )<<40;
5002         bSig64 = ( (uint64_t) bSig )<<40;
5003         expDiff -= 64;
5004         while ( 0 < expDiff ) {
5005             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5006             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5007             aSig64 = - ( ( bSig * q64 )<<38 );
5008             expDiff -= 62;
5009         }
5010         expDiff += 64;
5011         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5012         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5013         q = q64>>( 64 - expDiff );
5014         bSig <<= 6;
5015         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5016     }
5017     do {
5018         alternateASig = aSig;
5019         ++q;
5020         aSig -= bSig;
5021     } while ( 0 <= (int32_t) aSig );
5022     sigMean = aSig + alternateASig;
5023     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5024         aSig = alternateASig;
5025     }
5026     zSign = ( (int32_t) aSig < 0 );
5027     if ( zSign ) aSig = - aSig;
5028     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5029 }
5030 
5031 
5032 
5033 /*----------------------------------------------------------------------------
5034 | Returns the binary exponential of the single-precision floating-point value
5035 | `a'. The operation is performed according to the IEC/IEEE Standard for
5036 | Binary Floating-Point Arithmetic.
5037 |
5038 | Uses the following identities:
5039 |
5040 | 1. -------------------------------------------------------------------------
5041 |      x    x*ln(2)
5042 |     2  = e
5043 |
5044 | 2. -------------------------------------------------------------------------
5045 |                      2     3     4     5           n
5046 |      x        x     x     x     x     x           x
5047 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5048 |               1!    2!    3!    4!    5!          n!
5049 *----------------------------------------------------------------------------*/
5050 
5051 static const float64 float32_exp2_coefficients[15] =
5052 {
5053     const_float64( 0x3ff0000000000000ll ), /*  1 */
5054     const_float64( 0x3fe0000000000000ll ), /*  2 */
5055     const_float64( 0x3fc5555555555555ll ), /*  3 */
5056     const_float64( 0x3fa5555555555555ll ), /*  4 */
5057     const_float64( 0x3f81111111111111ll ), /*  5 */
5058     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5059     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5060     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5061     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5062     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5063     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5064     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5065     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5066     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5067     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5068 };
5069 
5070 float32 float32_exp2(float32 a, float_status *status)
5071 {
5072     bool aSign;
5073     int aExp;
5074     uint32_t aSig;
5075     float64 r, x, xn;
5076     int i;
5077     a = float32_squash_input_denormal(a, status);
5078 
5079     aSig = extractFloat32Frac( a );
5080     aExp = extractFloat32Exp( a );
5081     aSign = extractFloat32Sign( a );
5082 
5083     if ( aExp == 0xFF) {
5084         if (aSig) {
5085             return propagateFloat32NaN(a, float32_zero, status);
5086         }
5087         return (aSign) ? float32_zero : a;
5088     }
5089     if (aExp == 0) {
5090         if (aSig == 0) return float32_one;
5091     }
5092 
5093     float_raise(float_flag_inexact, status);
5094 
5095     /* ******************************* */
5096     /* using float64 for approximation */
5097     /* ******************************* */
5098     x = float32_to_float64(a, status);
5099     x = float64_mul(x, float64_ln2, status);
5100 
5101     xn = x;
5102     r = float64_one;
5103     for (i = 0 ; i < 15 ; i++) {
5104         float64 f;
5105 
5106         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5107         r = float64_add(r, f, status);
5108 
5109         xn = float64_mul(xn, x, status);
5110     }
5111 
5112     return float64_to_float32(r, status);
5113 }
5114 
5115 /*----------------------------------------------------------------------------
5116 | Returns the binary log of the single-precision floating-point value `a'.
5117 | The operation is performed according to the IEC/IEEE Standard for Binary
5118 | Floating-Point Arithmetic.
5119 *----------------------------------------------------------------------------*/
5120 float32 float32_log2(float32 a, float_status *status)
5121 {
5122     bool aSign, zSign;
5123     int aExp;
5124     uint32_t aSig, zSig, i;
5125 
5126     a = float32_squash_input_denormal(a, status);
5127     aSig = extractFloat32Frac( a );
5128     aExp = extractFloat32Exp( a );
5129     aSign = extractFloat32Sign( a );
5130 
5131     if ( aExp == 0 ) {
5132         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5133         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5134     }
5135     if ( aSign ) {
5136         float_raise(float_flag_invalid, status);
5137         return float32_default_nan(status);
5138     }
5139     if ( aExp == 0xFF ) {
5140         if (aSig) {
5141             return propagateFloat32NaN(a, float32_zero, status);
5142         }
5143         return a;
5144     }
5145 
5146     aExp -= 0x7F;
5147     aSig |= 0x00800000;
5148     zSign = aExp < 0;
5149     zSig = aExp << 23;
5150 
5151     for (i = 1 << 22; i > 0; i >>= 1) {
5152         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5153         if ( aSig & 0x01000000 ) {
5154             aSig >>= 1;
5155             zSig |= i;
5156         }
5157     }
5158 
5159     if ( zSign )
5160         zSig = -zSig;
5161 
5162     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5163 }
5164 
5165 /*----------------------------------------------------------------------------
5166 | Returns the result of converting the double-precision floating-point value
5167 | `a' to the extended double-precision floating-point format.  The conversion
5168 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5169 | Arithmetic.
5170 *----------------------------------------------------------------------------*/
5171 
5172 floatx80 float64_to_floatx80(float64 a, float_status *status)
5173 {
5174     bool aSign;
5175     int aExp;
5176     uint64_t aSig;
5177 
5178     a = float64_squash_input_denormal(a, status);
5179     aSig = extractFloat64Frac( a );
5180     aExp = extractFloat64Exp( a );
5181     aSign = extractFloat64Sign( a );
5182     if ( aExp == 0x7FF ) {
5183         if (aSig) {
5184             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5185                                                status);
5186             return floatx80_silence_nan(res, status);
5187         }
5188         return packFloatx80(aSign,
5189                             floatx80_infinity_high,
5190                             floatx80_infinity_low);
5191     }
5192     if ( aExp == 0 ) {
5193         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5194         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5195     }
5196     return
5197         packFloatx80(
5198             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5199 
5200 }
5201 
5202 /*----------------------------------------------------------------------------
5203 | Returns the remainder of the double-precision floating-point value `a'
5204 | with respect to the corresponding value `b'.  The operation is performed
5205 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5206 *----------------------------------------------------------------------------*/
5207 
5208 float64 float64_rem(float64 a, float64 b, float_status *status)
5209 {
5210     bool aSign, zSign;
5211     int aExp, bExp, expDiff;
5212     uint64_t aSig, bSig;
5213     uint64_t q, alternateASig;
5214     int64_t sigMean;
5215 
5216     a = float64_squash_input_denormal(a, status);
5217     b = float64_squash_input_denormal(b, status);
5218     aSig = extractFloat64Frac( a );
5219     aExp = extractFloat64Exp( a );
5220     aSign = extractFloat64Sign( a );
5221     bSig = extractFloat64Frac( b );
5222     bExp = extractFloat64Exp( b );
5223     if ( aExp == 0x7FF ) {
5224         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5225             return propagateFloat64NaN(a, b, status);
5226         }
5227         float_raise(float_flag_invalid, status);
5228         return float64_default_nan(status);
5229     }
5230     if ( bExp == 0x7FF ) {
5231         if (bSig) {
5232             return propagateFloat64NaN(a, b, status);
5233         }
5234         return a;
5235     }
5236     if ( bExp == 0 ) {
5237         if ( bSig == 0 ) {
5238             float_raise(float_flag_invalid, status);
5239             return float64_default_nan(status);
5240         }
5241         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5242     }
5243     if ( aExp == 0 ) {
5244         if ( aSig == 0 ) return a;
5245         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5246     }
5247     expDiff = aExp - bExp;
5248     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5249     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5250     if ( expDiff < 0 ) {
5251         if ( expDiff < -1 ) return a;
5252         aSig >>= 1;
5253     }
5254     q = ( bSig <= aSig );
5255     if ( q ) aSig -= bSig;
5256     expDiff -= 64;
5257     while ( 0 < expDiff ) {
5258         q = estimateDiv128To64( aSig, 0, bSig );
5259         q = ( 2 < q ) ? q - 2 : 0;
5260         aSig = - ( ( bSig>>2 ) * q );
5261         expDiff -= 62;
5262     }
5263     expDiff += 64;
5264     if ( 0 < expDiff ) {
5265         q = estimateDiv128To64( aSig, 0, bSig );
5266         q = ( 2 < q ) ? q - 2 : 0;
5267         q >>= 64 - expDiff;
5268         bSig >>= 2;
5269         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5270     }
5271     else {
5272         aSig >>= 2;
5273         bSig >>= 2;
5274     }
5275     do {
5276         alternateASig = aSig;
5277         ++q;
5278         aSig -= bSig;
5279     } while ( 0 <= (int64_t) aSig );
5280     sigMean = aSig + alternateASig;
5281     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5282         aSig = alternateASig;
5283     }
5284     zSign = ( (int64_t) aSig < 0 );
5285     if ( zSign ) aSig = - aSig;
5286     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5287 
5288 }
5289 
5290 /*----------------------------------------------------------------------------
5291 | Returns the binary log of the double-precision floating-point value `a'.
5292 | The operation is performed according to the IEC/IEEE Standard for Binary
5293 | Floating-Point Arithmetic.
5294 *----------------------------------------------------------------------------*/
5295 float64 float64_log2(float64 a, float_status *status)
5296 {
5297     bool aSign, zSign;
5298     int aExp;
5299     uint64_t aSig, aSig0, aSig1, zSig, i;
5300     a = float64_squash_input_denormal(a, status);
5301 
5302     aSig = extractFloat64Frac( a );
5303     aExp = extractFloat64Exp( a );
5304     aSign = extractFloat64Sign( a );
5305 
5306     if ( aExp == 0 ) {
5307         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5308         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5309     }
5310     if ( aSign ) {
5311         float_raise(float_flag_invalid, status);
5312         return float64_default_nan(status);
5313     }
5314     if ( aExp == 0x7FF ) {
5315         if (aSig) {
5316             return propagateFloat64NaN(a, float64_zero, status);
5317         }
5318         return a;
5319     }
5320 
5321     aExp -= 0x3FF;
5322     aSig |= UINT64_C(0x0010000000000000);
5323     zSign = aExp < 0;
5324     zSig = (uint64_t)aExp << 52;
5325     for (i = 1LL << 51; i > 0; i >>= 1) {
5326         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5327         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5328         if ( aSig & UINT64_C(0x0020000000000000) ) {
5329             aSig >>= 1;
5330             zSig |= i;
5331         }
5332     }
5333 
5334     if ( zSign )
5335         zSig = -zSig;
5336     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5337 }
5338 
5339 /*----------------------------------------------------------------------------
5340 | Returns the result of converting the extended double-precision floating-
5341 | point value `a' to the 32-bit two's complement integer format.  The
5342 | conversion is performed according to the IEC/IEEE Standard for Binary
5343 | Floating-Point Arithmetic---which means in particular that the conversion
5344 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5345 | largest positive integer is returned.  Otherwise, if the conversion
5346 | overflows, the largest integer with the same sign as `a' is returned.
5347 *----------------------------------------------------------------------------*/
5348 
5349 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5350 {
5351     bool aSign;
5352     int32_t aExp, shiftCount;
5353     uint64_t aSig;
5354 
5355     if (floatx80_invalid_encoding(a)) {
5356         float_raise(float_flag_invalid, status);
5357         return 1 << 31;
5358     }
5359     aSig = extractFloatx80Frac( a );
5360     aExp = extractFloatx80Exp( a );
5361     aSign = extractFloatx80Sign( a );
5362     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5363     shiftCount = 0x4037 - aExp;
5364     if ( shiftCount <= 0 ) shiftCount = 1;
5365     shift64RightJamming( aSig, shiftCount, &aSig );
5366     return roundAndPackInt32(aSign, aSig, status);
5367 
5368 }
5369 
5370 /*----------------------------------------------------------------------------
5371 | Returns the result of converting the extended double-precision floating-
5372 | point value `a' to the 32-bit two's complement integer format.  The
5373 | conversion is performed according to the IEC/IEEE Standard for Binary
5374 | Floating-Point Arithmetic, except that the conversion is always rounded
5375 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5376 | Otherwise, if the conversion overflows, the largest integer with the same
5377 | sign as `a' is returned.
5378 *----------------------------------------------------------------------------*/
5379 
5380 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5381 {
5382     bool aSign;
5383     int32_t aExp, shiftCount;
5384     uint64_t aSig, savedASig;
5385     int32_t z;
5386 
5387     if (floatx80_invalid_encoding(a)) {
5388         float_raise(float_flag_invalid, status);
5389         return 1 << 31;
5390     }
5391     aSig = extractFloatx80Frac( a );
5392     aExp = extractFloatx80Exp( a );
5393     aSign = extractFloatx80Sign( a );
5394     if ( 0x401E < aExp ) {
5395         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5396         goto invalid;
5397     }
5398     else if ( aExp < 0x3FFF ) {
5399         if (aExp || aSig) {
5400             float_raise(float_flag_inexact, status);
5401         }
5402         return 0;
5403     }
5404     shiftCount = 0x403E - aExp;
5405     savedASig = aSig;
5406     aSig >>= shiftCount;
5407     z = aSig;
5408     if ( aSign ) z = - z;
5409     if ( ( z < 0 ) ^ aSign ) {
5410  invalid:
5411         float_raise(float_flag_invalid, status);
5412         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5413     }
5414     if ( ( aSig<<shiftCount ) != savedASig ) {
5415         float_raise(float_flag_inexact, status);
5416     }
5417     return z;
5418 
5419 }
5420 
5421 /*----------------------------------------------------------------------------
5422 | Returns the result of converting the extended double-precision floating-
5423 | point value `a' to the 64-bit two's complement integer format.  The
5424 | conversion is performed according to the IEC/IEEE Standard for Binary
5425 | Floating-Point Arithmetic---which means in particular that the conversion
5426 | is rounded according to the current rounding mode.  If `a' is a NaN,
5427 | the largest positive integer is returned.  Otherwise, if the conversion
5428 | overflows, the largest integer with the same sign as `a' is returned.
5429 *----------------------------------------------------------------------------*/
5430 
5431 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5432 {
5433     bool aSign;
5434     int32_t aExp, shiftCount;
5435     uint64_t aSig, aSigExtra;
5436 
5437     if (floatx80_invalid_encoding(a)) {
5438         float_raise(float_flag_invalid, status);
5439         return 1ULL << 63;
5440     }
5441     aSig = extractFloatx80Frac( a );
5442     aExp = extractFloatx80Exp( a );
5443     aSign = extractFloatx80Sign( a );
5444     shiftCount = 0x403E - aExp;
5445     if ( shiftCount <= 0 ) {
5446         if ( shiftCount ) {
5447             float_raise(float_flag_invalid, status);
5448             if (!aSign || floatx80_is_any_nan(a)) {
5449                 return INT64_MAX;
5450             }
5451             return INT64_MIN;
5452         }
5453         aSigExtra = 0;
5454     }
5455     else {
5456         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5457     }
5458     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5459 
5460 }
5461 
5462 /*----------------------------------------------------------------------------
5463 | Returns the result of converting the extended double-precision floating-
5464 | point value `a' to the 64-bit two's complement integer format.  The
5465 | conversion is performed according to the IEC/IEEE Standard for Binary
5466 | Floating-Point Arithmetic, except that the conversion is always rounded
5467 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5468 | Otherwise, if the conversion overflows, the largest integer with the same
5469 | sign as `a' is returned.
5470 *----------------------------------------------------------------------------*/
5471 
5472 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5473 {
5474     bool aSign;
5475     int32_t aExp, shiftCount;
5476     uint64_t aSig;
5477     int64_t z;
5478 
5479     if (floatx80_invalid_encoding(a)) {
5480         float_raise(float_flag_invalid, status);
5481         return 1ULL << 63;
5482     }
5483     aSig = extractFloatx80Frac( a );
5484     aExp = extractFloatx80Exp( a );
5485     aSign = extractFloatx80Sign( a );
5486     shiftCount = aExp - 0x403E;
5487     if ( 0 <= shiftCount ) {
5488         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5489         if ( ( a.high != 0xC03E ) || aSig ) {
5490             float_raise(float_flag_invalid, status);
5491             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5492                 return INT64_MAX;
5493             }
5494         }
5495         return INT64_MIN;
5496     }
5497     else if ( aExp < 0x3FFF ) {
5498         if (aExp | aSig) {
5499             float_raise(float_flag_inexact, status);
5500         }
5501         return 0;
5502     }
5503     z = aSig>>( - shiftCount );
5504     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5505         float_raise(float_flag_inexact, status);
5506     }
5507     if ( aSign ) z = - z;
5508     return z;
5509 
5510 }
5511 
5512 /*----------------------------------------------------------------------------
5513 | Returns the result of converting the extended double-precision floating-
5514 | point value `a' to the single-precision floating-point format.  The
5515 | conversion is performed according to the IEC/IEEE Standard for Binary
5516 | Floating-Point Arithmetic.
5517 *----------------------------------------------------------------------------*/
5518 
5519 float32 floatx80_to_float32(floatx80 a, float_status *status)
5520 {
5521     bool aSign;
5522     int32_t aExp;
5523     uint64_t aSig;
5524 
5525     if (floatx80_invalid_encoding(a)) {
5526         float_raise(float_flag_invalid, status);
5527         return float32_default_nan(status);
5528     }
5529     aSig = extractFloatx80Frac( a );
5530     aExp = extractFloatx80Exp( a );
5531     aSign = extractFloatx80Sign( a );
5532     if ( aExp == 0x7FFF ) {
5533         if ( (uint64_t) ( aSig<<1 ) ) {
5534             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5535                                              status);
5536             return float32_silence_nan(res, status);
5537         }
5538         return packFloat32( aSign, 0xFF, 0 );
5539     }
5540     shift64RightJamming( aSig, 33, &aSig );
5541     if ( aExp || aSig ) aExp -= 0x3F81;
5542     return roundAndPackFloat32(aSign, aExp, aSig, status);
5543 
5544 }
5545 
5546 /*----------------------------------------------------------------------------
5547 | Returns the result of converting the extended double-precision floating-
5548 | point value `a' to the double-precision floating-point format.  The
5549 | conversion is performed according to the IEC/IEEE Standard for Binary
5550 | Floating-Point Arithmetic.
5551 *----------------------------------------------------------------------------*/
5552 
5553 float64 floatx80_to_float64(floatx80 a, float_status *status)
5554 {
5555     bool aSign;
5556     int32_t aExp;
5557     uint64_t aSig, zSig;
5558 
5559     if (floatx80_invalid_encoding(a)) {
5560         float_raise(float_flag_invalid, status);
5561         return float64_default_nan(status);
5562     }
5563     aSig = extractFloatx80Frac( a );
5564     aExp = extractFloatx80Exp( a );
5565     aSign = extractFloatx80Sign( a );
5566     if ( aExp == 0x7FFF ) {
5567         if ( (uint64_t) ( aSig<<1 ) ) {
5568             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5569                                              status);
5570             return float64_silence_nan(res, status);
5571         }
5572         return packFloat64( aSign, 0x7FF, 0 );
5573     }
5574     shift64RightJamming( aSig, 1, &zSig );
5575     if ( aExp || aSig ) aExp -= 0x3C01;
5576     return roundAndPackFloat64(aSign, aExp, zSig, status);
5577 
5578 }
5579 
5580 /*----------------------------------------------------------------------------
5581 | Returns the result of converting the extended double-precision floating-
5582 | point value `a' to the quadruple-precision floating-point format.  The
5583 | conversion is performed according to the IEC/IEEE Standard for Binary
5584 | Floating-Point Arithmetic.
5585 *----------------------------------------------------------------------------*/
5586 
5587 float128 floatx80_to_float128(floatx80 a, float_status *status)
5588 {
5589     bool aSign;
5590     int aExp;
5591     uint64_t aSig, zSig0, zSig1;
5592 
5593     if (floatx80_invalid_encoding(a)) {
5594         float_raise(float_flag_invalid, status);
5595         return float128_default_nan(status);
5596     }
5597     aSig = extractFloatx80Frac( a );
5598     aExp = extractFloatx80Exp( a );
5599     aSign = extractFloatx80Sign( a );
5600     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5601         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5602                                            status);
5603         return float128_silence_nan(res, status);
5604     }
5605     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5606     return packFloat128( aSign, aExp, zSig0, zSig1 );
5607 
5608 }
5609 
5610 /*----------------------------------------------------------------------------
5611 | Rounds the extended double-precision floating-point value `a'
5612 | to the precision provided by floatx80_rounding_precision and returns the
5613 | result as an extended double-precision floating-point value.
5614 | The operation is performed according to the IEC/IEEE Standard for Binary
5615 | Floating-Point Arithmetic.
5616 *----------------------------------------------------------------------------*/
5617 
5618 floatx80 floatx80_round(floatx80 a, float_status *status)
5619 {
5620     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5621                                 extractFloatx80Sign(a),
5622                                 extractFloatx80Exp(a),
5623                                 extractFloatx80Frac(a), 0, status);
5624 }
5625 
5626 /*----------------------------------------------------------------------------
5627 | Rounds the extended double-precision floating-point value `a' to an integer,
5628 | and returns the result as an extended quadruple-precision floating-point
5629 | value.  The operation is performed according to the IEC/IEEE Standard for
5630 | Binary Floating-Point Arithmetic.
5631 *----------------------------------------------------------------------------*/
5632 
5633 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5634 {
5635     bool aSign;
5636     int32_t aExp;
5637     uint64_t lastBitMask, roundBitsMask;
5638     floatx80 z;
5639 
5640     if (floatx80_invalid_encoding(a)) {
5641         float_raise(float_flag_invalid, status);
5642         return floatx80_default_nan(status);
5643     }
5644     aExp = extractFloatx80Exp( a );
5645     if ( 0x403E <= aExp ) {
5646         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5647             return propagateFloatx80NaN(a, a, status);
5648         }
5649         return a;
5650     }
5651     if ( aExp < 0x3FFF ) {
5652         if (    ( aExp == 0 )
5653              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5654             return a;
5655         }
5656         float_raise(float_flag_inexact, status);
5657         aSign = extractFloatx80Sign( a );
5658         switch (status->float_rounding_mode) {
5659          case float_round_nearest_even:
5660             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5661                ) {
5662                 return
5663                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5664             }
5665             break;
5666         case float_round_ties_away:
5667             if (aExp == 0x3FFE) {
5668                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5669             }
5670             break;
5671          case float_round_down:
5672             return
5673                   aSign ?
5674                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5675                 : packFloatx80( 0, 0, 0 );
5676          case float_round_up:
5677             return
5678                   aSign ? packFloatx80( 1, 0, 0 )
5679                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5680 
5681         case float_round_to_zero:
5682             break;
5683         default:
5684             g_assert_not_reached();
5685         }
5686         return packFloatx80( aSign, 0, 0 );
5687     }
5688     lastBitMask = 1;
5689     lastBitMask <<= 0x403E - aExp;
5690     roundBitsMask = lastBitMask - 1;
5691     z = a;
5692     switch (status->float_rounding_mode) {
5693     case float_round_nearest_even:
5694         z.low += lastBitMask>>1;
5695         if ((z.low & roundBitsMask) == 0) {
5696             z.low &= ~lastBitMask;
5697         }
5698         break;
5699     case float_round_ties_away:
5700         z.low += lastBitMask >> 1;
5701         break;
5702     case float_round_to_zero:
5703         break;
5704     case float_round_up:
5705         if (!extractFloatx80Sign(z)) {
5706             z.low += roundBitsMask;
5707         }
5708         break;
5709     case float_round_down:
5710         if (extractFloatx80Sign(z)) {
5711             z.low += roundBitsMask;
5712         }
5713         break;
5714     default:
5715         abort();
5716     }
5717     z.low &= ~ roundBitsMask;
5718     if ( z.low == 0 ) {
5719         ++z.high;
5720         z.low = UINT64_C(0x8000000000000000);
5721     }
5722     if (z.low != a.low) {
5723         float_raise(float_flag_inexact, status);
5724     }
5725     return z;
5726 
5727 }
5728 
5729 /*----------------------------------------------------------------------------
5730 | Returns the result of adding the absolute values of the extended double-
5731 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5732 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5733 | The addition is performed according to the IEC/IEEE Standard for Binary
5734 | Floating-Point Arithmetic.
5735 *----------------------------------------------------------------------------*/
5736 
5737 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5738                                 float_status *status)
5739 {
5740     int32_t aExp, bExp, zExp;
5741     uint64_t aSig, bSig, zSig0, zSig1;
5742     int32_t expDiff;
5743 
5744     aSig = extractFloatx80Frac( a );
5745     aExp = extractFloatx80Exp( a );
5746     bSig = extractFloatx80Frac( b );
5747     bExp = extractFloatx80Exp( b );
5748     expDiff = aExp - bExp;
5749     if ( 0 < expDiff ) {
5750         if ( aExp == 0x7FFF ) {
5751             if ((uint64_t)(aSig << 1)) {
5752                 return propagateFloatx80NaN(a, b, status);
5753             }
5754             return a;
5755         }
5756         if ( bExp == 0 ) --expDiff;
5757         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5758         zExp = aExp;
5759     }
5760     else if ( expDiff < 0 ) {
5761         if ( bExp == 0x7FFF ) {
5762             if ((uint64_t)(bSig << 1)) {
5763                 return propagateFloatx80NaN(a, b, status);
5764             }
5765             return packFloatx80(zSign,
5766                                 floatx80_infinity_high,
5767                                 floatx80_infinity_low);
5768         }
5769         if ( aExp == 0 ) ++expDiff;
5770         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5771         zExp = bExp;
5772     }
5773     else {
5774         if ( aExp == 0x7FFF ) {
5775             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5776                 return propagateFloatx80NaN(a, b, status);
5777             }
5778             return a;
5779         }
5780         zSig1 = 0;
5781         zSig0 = aSig + bSig;
5782         if ( aExp == 0 ) {
5783             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5784                 /* At least one of the values is a pseudo-denormal,
5785                  * and there is a carry out of the result.  */
5786                 zExp = 1;
5787                 goto shiftRight1;
5788             }
5789             if (zSig0 == 0) {
5790                 return packFloatx80(zSign, 0, 0);
5791             }
5792             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5793             goto roundAndPack;
5794         }
5795         zExp = aExp;
5796         goto shiftRight1;
5797     }
5798     zSig0 = aSig + bSig;
5799     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5800  shiftRight1:
5801     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5802     zSig0 |= UINT64_C(0x8000000000000000);
5803     ++zExp;
5804  roundAndPack:
5805     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5806                                 zSign, zExp, zSig0, zSig1, status);
5807 }
5808 
5809 /*----------------------------------------------------------------------------
5810 | Returns the result of subtracting the absolute values of the extended
5811 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5812 | difference is negated before being returned.  `zSign' is ignored if the
5813 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5814 | Standard for Binary Floating-Point Arithmetic.
5815 *----------------------------------------------------------------------------*/
5816 
5817 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5818                                 float_status *status)
5819 {
5820     int32_t aExp, bExp, zExp;
5821     uint64_t aSig, bSig, zSig0, zSig1;
5822     int32_t expDiff;
5823 
5824     aSig = extractFloatx80Frac( a );
5825     aExp = extractFloatx80Exp( a );
5826     bSig = extractFloatx80Frac( b );
5827     bExp = extractFloatx80Exp( b );
5828     expDiff = aExp - bExp;
5829     if ( 0 < expDiff ) goto aExpBigger;
5830     if ( expDiff < 0 ) goto bExpBigger;
5831     if ( aExp == 0x7FFF ) {
5832         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5833             return propagateFloatx80NaN(a, b, status);
5834         }
5835         float_raise(float_flag_invalid, status);
5836         return floatx80_default_nan(status);
5837     }
5838     if ( aExp == 0 ) {
5839         aExp = 1;
5840         bExp = 1;
5841     }
5842     zSig1 = 0;
5843     if ( bSig < aSig ) goto aBigger;
5844     if ( aSig < bSig ) goto bBigger;
5845     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5846  bExpBigger:
5847     if ( bExp == 0x7FFF ) {
5848         if ((uint64_t)(bSig << 1)) {
5849             return propagateFloatx80NaN(a, b, status);
5850         }
5851         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5852                             floatx80_infinity_low);
5853     }
5854     if ( aExp == 0 ) ++expDiff;
5855     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5856  bBigger:
5857     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5858     zExp = bExp;
5859     zSign ^= 1;
5860     goto normalizeRoundAndPack;
5861  aExpBigger:
5862     if ( aExp == 0x7FFF ) {
5863         if ((uint64_t)(aSig << 1)) {
5864             return propagateFloatx80NaN(a, b, status);
5865         }
5866         return a;
5867     }
5868     if ( bExp == 0 ) --expDiff;
5869     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5870  aBigger:
5871     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5872     zExp = aExp;
5873  normalizeRoundAndPack:
5874     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5875                                          zSign, zExp, zSig0, zSig1, status);
5876 }
5877 
5878 /*----------------------------------------------------------------------------
5879 | Returns the result of adding the extended double-precision floating-point
5880 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5881 | Standard for Binary Floating-Point Arithmetic.
5882 *----------------------------------------------------------------------------*/
5883 
5884 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5885 {
5886     bool aSign, bSign;
5887 
5888     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5889         float_raise(float_flag_invalid, status);
5890         return floatx80_default_nan(status);
5891     }
5892     aSign = extractFloatx80Sign( a );
5893     bSign = extractFloatx80Sign( b );
5894     if ( aSign == bSign ) {
5895         return addFloatx80Sigs(a, b, aSign, status);
5896     }
5897     else {
5898         return subFloatx80Sigs(a, b, aSign, status);
5899     }
5900 
5901 }
5902 
5903 /*----------------------------------------------------------------------------
5904 | Returns the result of subtracting the extended double-precision floating-
5905 | point values `a' and `b'.  The operation is performed according to the
5906 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5907 *----------------------------------------------------------------------------*/
5908 
5909 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5910 {
5911     bool aSign, bSign;
5912 
5913     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5914         float_raise(float_flag_invalid, status);
5915         return floatx80_default_nan(status);
5916     }
5917     aSign = extractFloatx80Sign( a );
5918     bSign = extractFloatx80Sign( b );
5919     if ( aSign == bSign ) {
5920         return subFloatx80Sigs(a, b, aSign, status);
5921     }
5922     else {
5923         return addFloatx80Sigs(a, b, aSign, status);
5924     }
5925 
5926 }
5927 
5928 /*----------------------------------------------------------------------------
5929 | Returns the result of multiplying the extended double-precision floating-
5930 | point values `a' and `b'.  The operation is performed according to the
5931 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932 *----------------------------------------------------------------------------*/
5933 
5934 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5935 {
5936     bool aSign, bSign, zSign;
5937     int32_t aExp, bExp, zExp;
5938     uint64_t aSig, bSig, zSig0, zSig1;
5939 
5940     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5941         float_raise(float_flag_invalid, status);
5942         return floatx80_default_nan(status);
5943     }
5944     aSig = extractFloatx80Frac( a );
5945     aExp = extractFloatx80Exp( a );
5946     aSign = extractFloatx80Sign( a );
5947     bSig = extractFloatx80Frac( b );
5948     bExp = extractFloatx80Exp( b );
5949     bSign = extractFloatx80Sign( b );
5950     zSign = aSign ^ bSign;
5951     if ( aExp == 0x7FFF ) {
5952         if (    (uint64_t) ( aSig<<1 )
5953              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5954             return propagateFloatx80NaN(a, b, status);
5955         }
5956         if ( ( bExp | bSig ) == 0 ) goto invalid;
5957         return packFloatx80(zSign, floatx80_infinity_high,
5958                                    floatx80_infinity_low);
5959     }
5960     if ( bExp == 0x7FFF ) {
5961         if ((uint64_t)(bSig << 1)) {
5962             return propagateFloatx80NaN(a, b, status);
5963         }
5964         if ( ( aExp | aSig ) == 0 ) {
5965  invalid:
5966             float_raise(float_flag_invalid, status);
5967             return floatx80_default_nan(status);
5968         }
5969         return packFloatx80(zSign, floatx80_infinity_high,
5970                                    floatx80_infinity_low);
5971     }
5972     if ( aExp == 0 ) {
5973         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5974         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5975     }
5976     if ( bExp == 0 ) {
5977         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5978         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5979     }
5980     zExp = aExp + bExp - 0x3FFE;
5981     mul64To128( aSig, bSig, &zSig0, &zSig1 );
5982     if ( 0 < (int64_t) zSig0 ) {
5983         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5984         --zExp;
5985     }
5986     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5987                                 zSign, zExp, zSig0, zSig1, status);
5988 }
5989 
5990 /*----------------------------------------------------------------------------
5991 | Returns the result of dividing the extended double-precision floating-point
5992 | value `a' by the corresponding value `b'.  The operation is performed
5993 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5994 *----------------------------------------------------------------------------*/
5995 
5996 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5997 {
5998     bool aSign, bSign, zSign;
5999     int32_t aExp, bExp, zExp;
6000     uint64_t aSig, bSig, zSig0, zSig1;
6001     uint64_t rem0, rem1, rem2, term0, term1, term2;
6002 
6003     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6004         float_raise(float_flag_invalid, status);
6005         return floatx80_default_nan(status);
6006     }
6007     aSig = extractFloatx80Frac( a );
6008     aExp = extractFloatx80Exp( a );
6009     aSign = extractFloatx80Sign( a );
6010     bSig = extractFloatx80Frac( b );
6011     bExp = extractFloatx80Exp( b );
6012     bSign = extractFloatx80Sign( b );
6013     zSign = aSign ^ bSign;
6014     if ( aExp == 0x7FFF ) {
6015         if ((uint64_t)(aSig << 1)) {
6016             return propagateFloatx80NaN(a, b, status);
6017         }
6018         if ( bExp == 0x7FFF ) {
6019             if ((uint64_t)(bSig << 1)) {
6020                 return propagateFloatx80NaN(a, b, status);
6021             }
6022             goto invalid;
6023         }
6024         return packFloatx80(zSign, floatx80_infinity_high,
6025                                    floatx80_infinity_low);
6026     }
6027     if ( bExp == 0x7FFF ) {
6028         if ((uint64_t)(bSig << 1)) {
6029             return propagateFloatx80NaN(a, b, status);
6030         }
6031         return packFloatx80( zSign, 0, 0 );
6032     }
6033     if ( bExp == 0 ) {
6034         if ( bSig == 0 ) {
6035             if ( ( aExp | aSig ) == 0 ) {
6036  invalid:
6037                 float_raise(float_flag_invalid, status);
6038                 return floatx80_default_nan(status);
6039             }
6040             float_raise(float_flag_divbyzero, status);
6041             return packFloatx80(zSign, floatx80_infinity_high,
6042                                        floatx80_infinity_low);
6043         }
6044         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6045     }
6046     if ( aExp == 0 ) {
6047         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6048         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6049     }
6050     zExp = aExp - bExp + 0x3FFE;
6051     rem1 = 0;
6052     if ( bSig <= aSig ) {
6053         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6054         ++zExp;
6055     }
6056     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6057     mul64To128( bSig, zSig0, &term0, &term1 );
6058     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6059     while ( (int64_t) rem0 < 0 ) {
6060         --zSig0;
6061         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6062     }
6063     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6064     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6065         mul64To128( bSig, zSig1, &term1, &term2 );
6066         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6067         while ( (int64_t) rem1 < 0 ) {
6068             --zSig1;
6069             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6070         }
6071         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6072     }
6073     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6074                                 zSign, zExp, zSig0, zSig1, status);
6075 }
6076 
6077 /*----------------------------------------------------------------------------
6078 | Returns the remainder of the extended double-precision floating-point value
6079 | `a' with respect to the corresponding value `b'.  The operation is performed
6080 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6081 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6082 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6083 | the absolute value of the integer quotient.
6084 *----------------------------------------------------------------------------*/
6085 
6086 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6087                          float_status *status)
6088 {
6089     bool aSign, zSign;
6090     int32_t aExp, bExp, expDiff, aExpOrig;
6091     uint64_t aSig0, aSig1, bSig;
6092     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6093 
6094     *quotient = 0;
6095     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6096         float_raise(float_flag_invalid, status);
6097         return floatx80_default_nan(status);
6098     }
6099     aSig0 = extractFloatx80Frac( a );
6100     aExpOrig = aExp = extractFloatx80Exp( a );
6101     aSign = extractFloatx80Sign( a );
6102     bSig = extractFloatx80Frac( b );
6103     bExp = extractFloatx80Exp( b );
6104     if ( aExp == 0x7FFF ) {
6105         if (    (uint64_t) ( aSig0<<1 )
6106              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6107             return propagateFloatx80NaN(a, b, status);
6108         }
6109         goto invalid;
6110     }
6111     if ( bExp == 0x7FFF ) {
6112         if ((uint64_t)(bSig << 1)) {
6113             return propagateFloatx80NaN(a, b, status);
6114         }
6115         if (aExp == 0 && aSig0 >> 63) {
6116             /*
6117              * Pseudo-denormal argument must be returned in normalized
6118              * form.
6119              */
6120             return packFloatx80(aSign, 1, aSig0);
6121         }
6122         return a;
6123     }
6124     if ( bExp == 0 ) {
6125         if ( bSig == 0 ) {
6126  invalid:
6127             float_raise(float_flag_invalid, status);
6128             return floatx80_default_nan(status);
6129         }
6130         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6131     }
6132     if ( aExp == 0 ) {
6133         if ( aSig0 == 0 ) return a;
6134         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6135     }
6136     zSign = aSign;
6137     expDiff = aExp - bExp;
6138     aSig1 = 0;
6139     if ( expDiff < 0 ) {
6140         if ( mod || expDiff < -1 ) {
6141             if (aExp == 1 && aExpOrig == 0) {
6142                 /*
6143                  * Pseudo-denormal argument must be returned in
6144                  * normalized form.
6145                  */
6146                 return packFloatx80(aSign, aExp, aSig0);
6147             }
6148             return a;
6149         }
6150         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6151         expDiff = 0;
6152     }
6153     *quotient = q = ( bSig <= aSig0 );
6154     if ( q ) aSig0 -= bSig;
6155     expDiff -= 64;
6156     while ( 0 < expDiff ) {
6157         q = estimateDiv128To64( aSig0, aSig1, bSig );
6158         q = ( 2 < q ) ? q - 2 : 0;
6159         mul64To128( bSig, q, &term0, &term1 );
6160         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6161         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6162         expDiff -= 62;
6163         *quotient <<= 62;
6164         *quotient += q;
6165     }
6166     expDiff += 64;
6167     if ( 0 < expDiff ) {
6168         q = estimateDiv128To64( aSig0, aSig1, bSig );
6169         q = ( 2 < q ) ? q - 2 : 0;
6170         q >>= 64 - expDiff;
6171         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6172         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6173         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6174         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6175             ++q;
6176             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6177         }
6178         if (expDiff < 64) {
6179             *quotient <<= expDiff;
6180         } else {
6181             *quotient = 0;
6182         }
6183         *quotient += q;
6184     }
6185     else {
6186         term1 = 0;
6187         term0 = bSig;
6188     }
6189     if (!mod) {
6190         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6191         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6192                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6193                         && ( q & 1 ) )
6194             ) {
6195             aSig0 = alternateASig0;
6196             aSig1 = alternateASig1;
6197             zSign = ! zSign;
6198             ++*quotient;
6199         }
6200     }
6201     return
6202         normalizeRoundAndPackFloatx80(
6203             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6204 
6205 }
6206 
6207 /*----------------------------------------------------------------------------
6208 | Returns the remainder of the extended double-precision floating-point value
6209 | `a' with respect to the corresponding value `b'.  The operation is performed
6210 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6211 *----------------------------------------------------------------------------*/
6212 
6213 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6214 {
6215     uint64_t quotient;
6216     return floatx80_modrem(a, b, false, &quotient, status);
6217 }
6218 
6219 /*----------------------------------------------------------------------------
6220 | Returns the remainder of the extended double-precision floating-point value
6221 | `a' with respect to the corresponding value `b', with the quotient truncated
6222 | toward zero.
6223 *----------------------------------------------------------------------------*/
6224 
6225 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6226 {
6227     uint64_t quotient;
6228     return floatx80_modrem(a, b, true, &quotient, status);
6229 }
6230 
6231 /*----------------------------------------------------------------------------
6232 | Returns the square root of the extended double-precision floating-point
6233 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6234 | for Binary Floating-Point Arithmetic.
6235 *----------------------------------------------------------------------------*/
6236 
6237 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6238 {
6239     bool aSign;
6240     int32_t aExp, zExp;
6241     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6242     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6243 
6244     if (floatx80_invalid_encoding(a)) {
6245         float_raise(float_flag_invalid, status);
6246         return floatx80_default_nan(status);
6247     }
6248     aSig0 = extractFloatx80Frac( a );
6249     aExp = extractFloatx80Exp( a );
6250     aSign = extractFloatx80Sign( a );
6251     if ( aExp == 0x7FFF ) {
6252         if ((uint64_t)(aSig0 << 1)) {
6253             return propagateFloatx80NaN(a, a, status);
6254         }
6255         if ( ! aSign ) return a;
6256         goto invalid;
6257     }
6258     if ( aSign ) {
6259         if ( ( aExp | aSig0 ) == 0 ) return a;
6260  invalid:
6261         float_raise(float_flag_invalid, status);
6262         return floatx80_default_nan(status);
6263     }
6264     if ( aExp == 0 ) {
6265         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6266         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6267     }
6268     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6269     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6270     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6271     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6272     doubleZSig0 = zSig0<<1;
6273     mul64To128( zSig0, zSig0, &term0, &term1 );
6274     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6275     while ( (int64_t) rem0 < 0 ) {
6276         --zSig0;
6277         doubleZSig0 -= 2;
6278         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6279     }
6280     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6281     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6282         if ( zSig1 == 0 ) zSig1 = 1;
6283         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6284         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6285         mul64To128( zSig1, zSig1, &term2, &term3 );
6286         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6287         while ( (int64_t) rem1 < 0 ) {
6288             --zSig1;
6289             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6290             term3 |= 1;
6291             term2 |= doubleZSig0;
6292             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6293         }
6294         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6295     }
6296     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6297     zSig0 |= doubleZSig0;
6298     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6299                                 0, zExp, zSig0, zSig1, status);
6300 }
6301 
6302 /*----------------------------------------------------------------------------
6303 | Returns the result of converting the quadruple-precision floating-point
6304 | value `a' to the extended double-precision floating-point format.  The
6305 | conversion is performed according to the IEC/IEEE Standard for Binary
6306 | Floating-Point Arithmetic.
6307 *----------------------------------------------------------------------------*/
6308 
6309 floatx80 float128_to_floatx80(float128 a, float_status *status)
6310 {
6311     bool aSign;
6312     int32_t aExp;
6313     uint64_t aSig0, aSig1;
6314 
6315     aSig1 = extractFloat128Frac1( a );
6316     aSig0 = extractFloat128Frac0( a );
6317     aExp = extractFloat128Exp( a );
6318     aSign = extractFloat128Sign( a );
6319     if ( aExp == 0x7FFF ) {
6320         if ( aSig0 | aSig1 ) {
6321             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6322                                                status);
6323             return floatx80_silence_nan(res, status);
6324         }
6325         return packFloatx80(aSign, floatx80_infinity_high,
6326                                    floatx80_infinity_low);
6327     }
6328     if ( aExp == 0 ) {
6329         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6330         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6331     }
6332     else {
6333         aSig0 |= UINT64_C(0x0001000000000000);
6334     }
6335     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6336     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6337 
6338 }
6339 
6340 /*----------------------------------------------------------------------------
6341 | Returns the remainder of the quadruple-precision floating-point value `a'
6342 | with respect to the corresponding value `b'.  The operation is performed
6343 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6344 *----------------------------------------------------------------------------*/
6345 
6346 float128 float128_rem(float128 a, float128 b, float_status *status)
6347 {
6348     bool aSign, zSign;
6349     int32_t aExp, bExp, expDiff;
6350     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6351     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6352     int64_t sigMean0;
6353 
6354     aSig1 = extractFloat128Frac1( a );
6355     aSig0 = extractFloat128Frac0( a );
6356     aExp = extractFloat128Exp( a );
6357     aSign = extractFloat128Sign( a );
6358     bSig1 = extractFloat128Frac1( b );
6359     bSig0 = extractFloat128Frac0( b );
6360     bExp = extractFloat128Exp( b );
6361     if ( aExp == 0x7FFF ) {
6362         if (    ( aSig0 | aSig1 )
6363              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6364             return propagateFloat128NaN(a, b, status);
6365         }
6366         goto invalid;
6367     }
6368     if ( bExp == 0x7FFF ) {
6369         if (bSig0 | bSig1) {
6370             return propagateFloat128NaN(a, b, status);
6371         }
6372         return a;
6373     }
6374     if ( bExp == 0 ) {
6375         if ( ( bSig0 | bSig1 ) == 0 ) {
6376  invalid:
6377             float_raise(float_flag_invalid, status);
6378             return float128_default_nan(status);
6379         }
6380         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6381     }
6382     if ( aExp == 0 ) {
6383         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6384         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6385     }
6386     expDiff = aExp - bExp;
6387     if ( expDiff < -1 ) return a;
6388     shortShift128Left(
6389         aSig0 | UINT64_C(0x0001000000000000),
6390         aSig1,
6391         15 - ( expDiff < 0 ),
6392         &aSig0,
6393         &aSig1
6394     );
6395     shortShift128Left(
6396         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6397     q = le128( bSig0, bSig1, aSig0, aSig1 );
6398     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6399     expDiff -= 64;
6400     while ( 0 < expDiff ) {
6401         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6402         q = ( 4 < q ) ? q - 4 : 0;
6403         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6404         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6405         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6406         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6407         expDiff -= 61;
6408     }
6409     if ( -64 < expDiff ) {
6410         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6411         q = ( 4 < q ) ? q - 4 : 0;
6412         q >>= - expDiff;
6413         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6414         expDiff += 52;
6415         if ( expDiff < 0 ) {
6416             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6417         }
6418         else {
6419             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6420         }
6421         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6422         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6423     }
6424     else {
6425         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6426         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6427     }
6428     do {
6429         alternateASig0 = aSig0;
6430         alternateASig1 = aSig1;
6431         ++q;
6432         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6433     } while ( 0 <= (int64_t) aSig0 );
6434     add128(
6435         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6436     if (    ( sigMean0 < 0 )
6437          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6438         aSig0 = alternateASig0;
6439         aSig1 = alternateASig1;
6440     }
6441     zSign = ( (int64_t) aSig0 < 0 );
6442     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6443     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6444                                          status);
6445 }
6446 
6447 static inline FloatRelation
6448 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6449                           float_status *status)
6450 {
6451     bool aSign, bSign;
6452 
6453     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6454         float_raise(float_flag_invalid, status);
6455         return float_relation_unordered;
6456     }
6457     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6458           ( extractFloatx80Frac( a )<<1 ) ) ||
6459         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6460           ( extractFloatx80Frac( b )<<1 ) )) {
6461         if (!is_quiet ||
6462             floatx80_is_signaling_nan(a, status) ||
6463             floatx80_is_signaling_nan(b, status)) {
6464             float_raise(float_flag_invalid, status);
6465         }
6466         return float_relation_unordered;
6467     }
6468     aSign = extractFloatx80Sign( a );
6469     bSign = extractFloatx80Sign( b );
6470     if ( aSign != bSign ) {
6471 
6472         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6473              ( ( a.low | b.low ) == 0 ) ) {
6474             /* zero case */
6475             return float_relation_equal;
6476         } else {
6477             return 1 - (2 * aSign);
6478         }
6479     } else {
6480         /* Normalize pseudo-denormals before comparison.  */
6481         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6482             ++a.high;
6483         }
6484         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6485             ++b.high;
6486         }
6487         if (a.low == b.low && a.high == b.high) {
6488             return float_relation_equal;
6489         } else {
6490             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6491         }
6492     }
6493 }
6494 
6495 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6496 {
6497     return floatx80_compare_internal(a, b, 0, status);
6498 }
6499 
6500 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6501                                      float_status *status)
6502 {
6503     return floatx80_compare_internal(a, b, 1, status);
6504 }
6505 
6506 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6507 {
6508     bool aSign;
6509     int32_t aExp;
6510     uint64_t aSig;
6511 
6512     if (floatx80_invalid_encoding(a)) {
6513         float_raise(float_flag_invalid, status);
6514         return floatx80_default_nan(status);
6515     }
6516     aSig = extractFloatx80Frac( a );
6517     aExp = extractFloatx80Exp( a );
6518     aSign = extractFloatx80Sign( a );
6519 
6520     if ( aExp == 0x7FFF ) {
6521         if ( aSig<<1 ) {
6522             return propagateFloatx80NaN(a, a, status);
6523         }
6524         return a;
6525     }
6526 
6527     if (aExp == 0) {
6528         if (aSig == 0) {
6529             return a;
6530         }
6531         aExp++;
6532     }
6533 
6534     if (n > 0x10000) {
6535         n = 0x10000;
6536     } else if (n < -0x10000) {
6537         n = -0x10000;
6538     }
6539 
6540     aExp += n;
6541     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6542                                          aSign, aExp, aSig, 0, status);
6543 }
6544 
6545 static void __attribute__((constructor)) softfloat_init(void)
6546 {
6547     union_float64 ua, ub, uc, ur;
6548 
6549     if (QEMU_NO_HARDFLOAT) {
6550         return;
6551     }
6552     /*
6553      * Test that the host's FMA is not obviously broken. For example,
6554      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6555      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6556      */
6557     ua.s = 0x0020000000000001ULL;
6558     ub.s = 0x3ca0000000000000ULL;
6559     uc.s = 0x0020000000000000ULL;
6560     ur.h = fma(ua.h, ub.h, uc.h);
6561     if (ur.s != 0x0020000000000001ULL) {
6562         force_soft_fma = true;
6563     }
6564 }
6565