xref: /openbmc/qemu/fpu/softfloat.c (revision 6eb169b8)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 /* Flags for parts_minmax. */
486 enum {
487     /* Set for minimum; clear for maximum. */
488     minmax_ismin = 1,
489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490     minmax_isnum = 2,
491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492     minmax_ismag = 4,
493 };
494 
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498     return unlikely(c >= float_class_qnan);
499 }
500 
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503     return c == float_class_snan;
504 }
505 
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508     return c == float_class_qnan;
509 }
510 
511 /*
512  * Structure holding all of the decomposed parts of a float.
513  * The exponent is unbiased and the fraction is normalized.
514  *
515  * The fraction words are stored in big-endian word ordering,
516  * so that truncation from a larger format to a smaller format
517  * can be done simply by ignoring subsequent elements.
518  */
519 
520 typedef struct {
521     FloatClass cls;
522     bool sign;
523     int32_t exp;
524     union {
525         /* Routines that know the structure may reference the singular name. */
526         uint64_t frac;
527         /*
528          * Routines expanded with multiple structures reference "hi" and "lo"
529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
530          * both the same word and aliased here.
531          */
532         uint64_t frac_hi;
533         uint64_t frac_lo;
534     };
535 } FloatParts64;
536 
537 typedef struct {
538     FloatClass cls;
539     bool sign;
540     int32_t exp;
541     uint64_t frac_hi;
542     uint64_t frac_lo;
543 } FloatParts128;
544 
545 typedef struct {
546     FloatClass cls;
547     bool sign;
548     int32_t exp;
549     uint64_t frac_hi;
550     uint64_t frac_hm;  /* high-middle */
551     uint64_t frac_lm;  /* low-middle */
552     uint64_t frac_lo;
553 } FloatParts256;
554 
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT    63
557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
558 
559 /* Structure holding all of the relevant parameters for a format.
560  *   exp_size: the size of the exponent field
561  *   exp_bias: the offset applied to the exponent field
562  *   exp_max: the maximum normalised exponent
563  *   frac_size: the size of the fraction field
564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565  * The following are computed based the size of fraction
566  *   frac_lsb: least significant bit of fraction
567  *   frac_lsbm1: the bit below the least significant bit (for rounding)
568  *   round_mask/roundeven_mask: masks used for rounding
569  * The following optional modifiers are available:
570  *   arm_althp: handle ARM Alternative Half Precision
571  */
572 typedef struct {
573     int exp_size;
574     int exp_bias;
575     int exp_max;
576     int frac_size;
577     int frac_shift;
578     uint64_t frac_lsb;
579     uint64_t frac_lsbm1;
580     uint64_t round_mask;
581     uint64_t roundeven_mask;
582     bool arm_althp;
583 } FloatFmt;
584 
585 /* Expand fields based on the size of exponent and fraction */
586 #define FLOAT_PARAMS(E, F)                                           \
587     .exp_size       = E,                                             \
588     .exp_bias       = ((1 << E) - 1) >> 1,                           \
589     .exp_max        = (1 << E) - 1,                                  \
590     .frac_size      = F,                                             \
591     .frac_shift     = (-F - 1) & 63,                                 \
592     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
593     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
594     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
595     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
596 
597 static const FloatFmt float16_params = {
598     FLOAT_PARAMS(5, 10)
599 };
600 
601 static const FloatFmt float16_params_ahp = {
602     FLOAT_PARAMS(5, 10),
603     .arm_althp = true
604 };
605 
606 static const FloatFmt bfloat16_params = {
607     FLOAT_PARAMS(8, 7)
608 };
609 
610 static const FloatFmt float32_params = {
611     FLOAT_PARAMS(8, 23)
612 };
613 
614 static const FloatFmt float64_params = {
615     FLOAT_PARAMS(11, 52)
616 };
617 
618 static const FloatFmt float128_params = {
619     FLOAT_PARAMS(15, 112)
620 };
621 
622 /* Unpack a float to parts, but do not canonicalize.  */
623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
624 {
625     const int f_size = fmt->frac_size;
626     const int e_size = fmt->exp_size;
627 
628     *r = (FloatParts64) {
629         .cls = float_class_unclassified,
630         .sign = extract64(raw, f_size + e_size, 1),
631         .exp = extract64(raw, f_size, e_size),
632         .frac = extract64(raw, 0, f_size)
633     };
634 }
635 
636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
637 {
638     unpack_raw64(p, &float16_params, f);
639 }
640 
641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
642 {
643     unpack_raw64(p, &bfloat16_params, f);
644 }
645 
646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
647 {
648     unpack_raw64(p, &float32_params, f);
649 }
650 
651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
652 {
653     unpack_raw64(p, &float64_params, f);
654 }
655 
656 static void float128_unpack_raw(FloatParts128 *p, float128 f)
657 {
658     const int f_size = float128_params.frac_size - 64;
659     const int e_size = float128_params.exp_size;
660 
661     *p = (FloatParts128) {
662         .cls = float_class_unclassified,
663         .sign = extract64(f.high, f_size + e_size, 1),
664         .exp = extract64(f.high, f_size, e_size),
665         .frac_hi = extract64(f.high, 0, f_size),
666         .frac_lo = f.low,
667     };
668 }
669 
670 /* Pack a float from parts, but do not canonicalize.  */
671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
672 {
673     const int f_size = fmt->frac_size;
674     const int e_size = fmt->exp_size;
675     uint64_t ret;
676 
677     ret = (uint64_t)p->sign << (f_size + e_size);
678     ret = deposit64(ret, f_size, e_size, p->exp);
679     ret = deposit64(ret, 0, f_size, p->frac);
680     return ret;
681 }
682 
683 static inline float16 float16_pack_raw(const FloatParts64 *p)
684 {
685     return make_float16(pack_raw64(p, &float16_params));
686 }
687 
688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
689 {
690     return pack_raw64(p, &bfloat16_params);
691 }
692 
693 static inline float32 float32_pack_raw(const FloatParts64 *p)
694 {
695     return make_float32(pack_raw64(p, &float32_params));
696 }
697 
698 static inline float64 float64_pack_raw(const FloatParts64 *p)
699 {
700     return make_float64(pack_raw64(p, &float64_params));
701 }
702 
703 static float128 float128_pack_raw(const FloatParts128 *p)
704 {
705     const int f_size = float128_params.frac_size - 64;
706     const int e_size = float128_params.exp_size;
707     uint64_t hi;
708 
709     hi = (uint64_t)p->sign << (f_size + e_size);
710     hi = deposit64(hi, f_size, e_size, p->exp);
711     hi = deposit64(hi, 0, f_size, p->frac_hi);
712     return make_float128(hi, p->frac_lo);
713 }
714 
715 /*----------------------------------------------------------------------------
716 | Functions and definitions to determine:  (1) whether tininess for underflow
717 | is detected before or after rounding by default, (2) what (if anything)
718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720 | are propagated from function inputs to output.  These details are target-
721 | specific.
722 *----------------------------------------------------------------------------*/
723 #include "softfloat-specialize.c.inc"
724 
725 #define PARTS_GENERIC_64_128(NAME, P) \
726     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727 
728 #define PARTS_GENERIC_64_128_256(NAME, P) \
729     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731 
732 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
733 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734 
735 static void parts64_return_nan(FloatParts64 *a, float_status *s);
736 static void parts128_return_nan(FloatParts128 *a, float_status *s);
737 
738 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
739 
740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741                                       float_status *s);
742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743                                         float_status *s);
744 
745 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746 
747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748                                              FloatParts64 *c, float_status *s,
749                                              int ab_mask, int abc_mask);
750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751                                                FloatParts128 *b,
752                                                FloatParts128 *c,
753                                                float_status *s,
754                                                int ab_mask, int abc_mask);
755 
756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758 
759 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760                                  const FloatFmt *fmt);
761 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762                                   const FloatFmt *fmt);
763 
764 #define parts_canonicalize(A, S, F) \
765     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766 
767 static void parts64_uncanon(FloatParts64 *p, float_status *status,
768                             const FloatFmt *fmt);
769 static void parts128_uncanon(FloatParts128 *p, float_status *status,
770                              const FloatFmt *fmt);
771 
772 #define parts_uncanon(A, S, F) \
773     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774 
775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
778 
779 #define parts_add_normal(A, B) \
780     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
781 
782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_sub_normal(A, B) \
787     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
788 
789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790                                     float_status *s, bool subtract);
791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792                                       float_status *s, bool subtract);
793 
794 #define parts_addsub(A, B, S, Z) \
795     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796 
797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798                                  float_status *s);
799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800                                    float_status *s);
801 
802 #define parts_mul(A, B, S) \
803     PARTS_GENERIC_64_128(mul, A)(A, B, S)
804 
805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806                                     FloatParts64 *c, int flags,
807                                     float_status *s);
808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809                                       FloatParts128 *c, int flags,
810                                       float_status *s);
811 
812 #define parts_muladd(A, B, C, Z, S) \
813     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814 
815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816                                  float_status *s);
817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818                                    float_status *s);
819 
820 #define parts_div(A, B, S) \
821     PARTS_GENERIC_64_128(div, A)(A, B, S)
822 
823 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
824                                         int scale, int frac_size);
825 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
826                                          int scale, int frac_size);
827 
828 #define parts_round_to_int_normal(A, R, C, F) \
829     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
830 
831 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
832                                  int scale, float_status *s,
833                                  const FloatFmt *fmt);
834 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
835                                   int scale, float_status *s,
836                                   const FloatFmt *fmt);
837 
838 #define parts_round_to_int(A, R, C, S, F) \
839     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
840 
841 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
842                                      int scale, int64_t min, int64_t max,
843                                      float_status *s);
844 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
845                                      int scale, int64_t min, int64_t max,
846                                      float_status *s);
847 
848 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
849     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
850 
851 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
852                                       int scale, uint64_t max,
853                                       float_status *s);
854 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
855                                        int scale, uint64_t max,
856                                        float_status *s);
857 
858 #define parts_float_to_uint(P, R, Z, M, S) \
859     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
860 
861 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
862                                   int scale, float_status *s);
863 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
864                                    int scale, float_status *s);
865 
866 #define parts_sint_to_float(P, I, Z, S) \
867     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
868 
869 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
870                                   int scale, float_status *s);
871 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
872                                    int scale, float_status *s);
873 
874 #define parts_uint_to_float(P, I, Z, S) \
875     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
876 
877 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
878                                     float_status *s, int flags);
879 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
880                                       float_status *s, int flags);
881 
882 #define parts_minmax(A, B, S, F) \
883     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
884 
885 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
886                            float_status *s, bool q);
887 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
888                             float_status *s, bool q);
889 
890 #define parts_compare(A, B, S, Q) \
891     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
892 
893 /*
894  * Helper functions for softfloat-parts.c.inc, per-size operations.
895  */
896 
897 #define FRAC_GENERIC_64_128(NAME, P) \
898     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
899 
900 #define FRAC_GENERIC_64_128_256(NAME, P) \
901     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
902                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
903 
904 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
905 {
906     return uadd64_overflow(a->frac, b->frac, &r->frac);
907 }
908 
909 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
910 {
911     bool c = 0;
912     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
913     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
914     return c;
915 }
916 
917 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
918 {
919     bool c = 0;
920     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
921     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
922     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
923     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
924     return c;
925 }
926 
927 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
928 
929 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
930 {
931     return uadd64_overflow(a->frac, c, &r->frac);
932 }
933 
934 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
935 {
936     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
937     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
938 }
939 
940 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
941 
942 static void frac64_allones(FloatParts64 *a)
943 {
944     a->frac = -1;
945 }
946 
947 static void frac128_allones(FloatParts128 *a)
948 {
949     a->frac_hi = a->frac_lo = -1;
950 }
951 
952 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
953 
954 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
955 {
956     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
957 }
958 
959 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
960 {
961     uint64_t ta = a->frac_hi, tb = b->frac_hi;
962     if (ta == tb) {
963         ta = a->frac_lo, tb = b->frac_lo;
964         if (ta == tb) {
965             return 0;
966         }
967     }
968     return ta < tb ? -1 : 1;
969 }
970 
971 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
972 
973 static void frac64_clear(FloatParts64 *a)
974 {
975     a->frac = 0;
976 }
977 
978 static void frac128_clear(FloatParts128 *a)
979 {
980     a->frac_hi = a->frac_lo = 0;
981 }
982 
983 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
984 
985 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
986 {
987     uint64_t n1, n0, r, q;
988     bool ret;
989 
990     /*
991      * We want a 2*N / N-bit division to produce exactly an N-bit
992      * result, so that we do not lose any precision and so that we
993      * do not have to renormalize afterward.  If A.frac < B.frac,
994      * then division would produce an (N-1)-bit result; shift A left
995      * by one to produce the an N-bit result, and return true to
996      * decrement the exponent to match.
997      *
998      * The udiv_qrnnd algorithm that we're using requires normalization,
999      * i.e. the msb of the denominator must be set, which is already true.
1000      */
1001     ret = a->frac < b->frac;
1002     if (ret) {
1003         n0 = a->frac;
1004         n1 = 0;
1005     } else {
1006         n0 = a->frac >> 1;
1007         n1 = a->frac << 63;
1008     }
1009     q = udiv_qrnnd(&r, n0, n1, b->frac);
1010 
1011     /* Set lsb if there is a remainder, to set inexact. */
1012     a->frac = q | (r != 0);
1013 
1014     return ret;
1015 }
1016 
1017 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1018 {
1019     uint64_t q0, q1, a0, a1, b0, b1;
1020     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1021     bool ret = false;
1022 
1023     a0 = a->frac_hi, a1 = a->frac_lo;
1024     b0 = b->frac_hi, b1 = b->frac_lo;
1025 
1026     ret = lt128(a0, a1, b0, b1);
1027     if (!ret) {
1028         a1 = shr_double(a0, a1, 1);
1029         a0 = a0 >> 1;
1030     }
1031 
1032     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1033     q0 = estimateDiv128To64(a0, a1, b0);
1034 
1035     /*
1036      * Estimate is high because B1 was not included (unless B1 == 0).
1037      * Reduce quotient and increase remainder until remainder is non-negative.
1038      * This loop will execute 0 to 2 times.
1039      */
1040     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1041     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1042     while (r0 != 0) {
1043         q0--;
1044         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1045     }
1046 
1047     /* Repeat using the remainder, producing a second word of quotient. */
1048     q1 = estimateDiv128To64(r1, r2, b0);
1049     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1050     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1051     while (r1 != 0) {
1052         q1--;
1053         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1054     }
1055 
1056     /* Any remainder indicates inexact; set sticky bit. */
1057     q1 |= (r2 | r3) != 0;
1058 
1059     a->frac_hi = q0;
1060     a->frac_lo = q1;
1061     return ret;
1062 }
1063 
1064 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1065 
1066 static bool frac64_eqz(FloatParts64 *a)
1067 {
1068     return a->frac == 0;
1069 }
1070 
1071 static bool frac128_eqz(FloatParts128 *a)
1072 {
1073     return (a->frac_hi | a->frac_lo) == 0;
1074 }
1075 
1076 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1077 
1078 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1079 {
1080     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1081 }
1082 
1083 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1084 {
1085     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1086                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1087 }
1088 
1089 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1090 
1091 static void frac64_neg(FloatParts64 *a)
1092 {
1093     a->frac = -a->frac;
1094 }
1095 
1096 static void frac128_neg(FloatParts128 *a)
1097 {
1098     bool c = 0;
1099     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1100     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1101 }
1102 
1103 static void frac256_neg(FloatParts256 *a)
1104 {
1105     bool c = 0;
1106     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1107     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1108     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1109     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1110 }
1111 
1112 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1113 
1114 static int frac64_normalize(FloatParts64 *a)
1115 {
1116     if (a->frac) {
1117         int shift = clz64(a->frac);
1118         a->frac <<= shift;
1119         return shift;
1120     }
1121     return 64;
1122 }
1123 
1124 static int frac128_normalize(FloatParts128 *a)
1125 {
1126     if (a->frac_hi) {
1127         int shl = clz64(a->frac_hi);
1128         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1129         a->frac_lo <<= shl;
1130         return shl;
1131     } else if (a->frac_lo) {
1132         int shl = clz64(a->frac_lo);
1133         a->frac_hi = a->frac_lo << shl;
1134         a->frac_lo = 0;
1135         return shl + 64;
1136     }
1137     return 128;
1138 }
1139 
1140 static int frac256_normalize(FloatParts256 *a)
1141 {
1142     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1143     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1144     int ret, shl;
1145 
1146     if (likely(a0)) {
1147         shl = clz64(a0);
1148         if (shl == 0) {
1149             return 0;
1150         }
1151         ret = shl;
1152     } else {
1153         if (a1) {
1154             ret = 64;
1155             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1156         } else if (a2) {
1157             ret = 128;
1158             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1159         } else if (a3) {
1160             ret = 192;
1161             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1162         } else {
1163             ret = 256;
1164             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1165             goto done;
1166         }
1167         shl = clz64(a0);
1168         if (shl == 0) {
1169             goto done;
1170         }
1171         ret += shl;
1172     }
1173 
1174     a0 = shl_double(a0, a1, shl);
1175     a1 = shl_double(a1, a2, shl);
1176     a2 = shl_double(a2, a3, shl);
1177     a3 <<= shl;
1178 
1179  done:
1180     a->frac_hi = a0;
1181     a->frac_hm = a1;
1182     a->frac_lm = a2;
1183     a->frac_lo = a3;
1184     return ret;
1185 }
1186 
1187 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1188 
1189 static void frac64_shl(FloatParts64 *a, int c)
1190 {
1191     a->frac <<= c;
1192 }
1193 
1194 static void frac128_shl(FloatParts128 *a, int c)
1195 {
1196     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1197 
1198     if (c & 64) {
1199         a0 = a1, a1 = 0;
1200     }
1201 
1202     c &= 63;
1203     if (c) {
1204         a0 = shl_double(a0, a1, c);
1205         a1 = a1 << c;
1206     }
1207 
1208     a->frac_hi = a0;
1209     a->frac_lo = a1;
1210 }
1211 
1212 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1213 
1214 static void frac64_shr(FloatParts64 *a, int c)
1215 {
1216     a->frac >>= c;
1217 }
1218 
1219 static void frac128_shr(FloatParts128 *a, int c)
1220 {
1221     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1222 
1223     if (c & 64) {
1224         a1 = a0, a0 = 0;
1225     }
1226 
1227     c &= 63;
1228     if (c) {
1229         a1 = shr_double(a0, a1, c);
1230         a0 = a0 >> c;
1231     }
1232 
1233     a->frac_hi = a0;
1234     a->frac_lo = a1;
1235 }
1236 
1237 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1238 
1239 static void frac64_shrjam(FloatParts64 *a, int c)
1240 {
1241     uint64_t a0 = a->frac;
1242 
1243     if (likely(c != 0)) {
1244         if (likely(c < 64)) {
1245             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1246         } else {
1247             a0 = a0 != 0;
1248         }
1249         a->frac = a0;
1250     }
1251 }
1252 
1253 static void frac128_shrjam(FloatParts128 *a, int c)
1254 {
1255     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1256     uint64_t sticky = 0;
1257 
1258     if (unlikely(c == 0)) {
1259         return;
1260     } else if (likely(c < 64)) {
1261         /* nothing */
1262     } else if (likely(c < 128)) {
1263         sticky = a1;
1264         a1 = a0;
1265         a0 = 0;
1266         c &= 63;
1267         if (c == 0) {
1268             goto done;
1269         }
1270     } else {
1271         sticky = a0 | a1;
1272         a0 = a1 = 0;
1273         goto done;
1274     }
1275 
1276     sticky |= shr_double(a1, 0, c);
1277     a1 = shr_double(a0, a1, c);
1278     a0 = a0 >> c;
1279 
1280  done:
1281     a->frac_lo = a1 | (sticky != 0);
1282     a->frac_hi = a0;
1283 }
1284 
1285 static void frac256_shrjam(FloatParts256 *a, int c)
1286 {
1287     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1288     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1289     uint64_t sticky = 0;
1290 
1291     if (unlikely(c == 0)) {
1292         return;
1293     } else if (likely(c < 64)) {
1294         /* nothing */
1295     } else if (likely(c < 256)) {
1296         if (unlikely(c & 128)) {
1297             sticky |= a2 | a3;
1298             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1299         }
1300         if (unlikely(c & 64)) {
1301             sticky |= a3;
1302             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1303         }
1304         c &= 63;
1305         if (c == 0) {
1306             goto done;
1307         }
1308     } else {
1309         sticky = a0 | a1 | a2 | a3;
1310         a0 = a1 = a2 = a3 = 0;
1311         goto done;
1312     }
1313 
1314     sticky |= shr_double(a3, 0, c);
1315     a3 = shr_double(a2, a3, c);
1316     a2 = shr_double(a1, a2, c);
1317     a1 = shr_double(a0, a1, c);
1318     a0 = a0 >> c;
1319 
1320  done:
1321     a->frac_lo = a3 | (sticky != 0);
1322     a->frac_lm = a2;
1323     a->frac_hm = a1;
1324     a->frac_hi = a0;
1325 }
1326 
1327 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1328 
1329 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1330 {
1331     return usub64_overflow(a->frac, b->frac, &r->frac);
1332 }
1333 
1334 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1335 {
1336     bool c = 0;
1337     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1338     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1339     return c;
1340 }
1341 
1342 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1343 {
1344     bool c = 0;
1345     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1346     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1347     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1348     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1349     return c;
1350 }
1351 
1352 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1353 
1354 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1355 {
1356     r->frac = a->frac_hi | (a->frac_lo != 0);
1357 }
1358 
1359 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1360 {
1361     r->frac_hi = a->frac_hi;
1362     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1363 }
1364 
1365 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1366 
1367 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1368 {
1369     r->frac_hi = a->frac;
1370     r->frac_lo = 0;
1371 }
1372 
1373 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1374 {
1375     r->frac_hi = a->frac_hi;
1376     r->frac_hm = a->frac_lo;
1377     r->frac_lm = 0;
1378     r->frac_lo = 0;
1379 }
1380 
1381 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1382 
1383 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1384 #define FloatPartsN    glue(FloatParts,N)
1385 #define FloatPartsW    glue(FloatParts,W)
1386 
1387 #define N 64
1388 #define W 128
1389 
1390 #include "softfloat-parts-addsub.c.inc"
1391 #include "softfloat-parts.c.inc"
1392 
1393 #undef  N
1394 #undef  W
1395 #define N 128
1396 #define W 256
1397 
1398 #include "softfloat-parts-addsub.c.inc"
1399 #include "softfloat-parts.c.inc"
1400 
1401 #undef  N
1402 #undef  W
1403 #define N            256
1404 
1405 #include "softfloat-parts-addsub.c.inc"
1406 
1407 #undef  N
1408 #undef  W
1409 #undef  partsN
1410 #undef  FloatPartsN
1411 #undef  FloatPartsW
1412 
1413 /*
1414  * Pack/unpack routines with a specific FloatFmt.
1415  */
1416 
1417 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1418                                       float_status *s, const FloatFmt *params)
1419 {
1420     float16_unpack_raw(p, f);
1421     parts_canonicalize(p, s, params);
1422 }
1423 
1424 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1425                                      float_status *s)
1426 {
1427     float16a_unpack_canonical(p, f, s, &float16_params);
1428 }
1429 
1430 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1431                                       float_status *s)
1432 {
1433     bfloat16_unpack_raw(p, f);
1434     parts_canonicalize(p, s, &bfloat16_params);
1435 }
1436 
1437 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1438                                              float_status *s,
1439                                              const FloatFmt *params)
1440 {
1441     parts_uncanon(p, s, params);
1442     return float16_pack_raw(p);
1443 }
1444 
1445 static float16 float16_round_pack_canonical(FloatParts64 *p,
1446                                             float_status *s)
1447 {
1448     return float16a_round_pack_canonical(p, s, &float16_params);
1449 }
1450 
1451 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1452                                               float_status *s)
1453 {
1454     parts_uncanon(p, s, &bfloat16_params);
1455     return bfloat16_pack_raw(p);
1456 }
1457 
1458 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1459                                      float_status *s)
1460 {
1461     float32_unpack_raw(p, f);
1462     parts_canonicalize(p, s, &float32_params);
1463 }
1464 
1465 static float32 float32_round_pack_canonical(FloatParts64 *p,
1466                                             float_status *s)
1467 {
1468     parts_uncanon(p, s, &float32_params);
1469     return float32_pack_raw(p);
1470 }
1471 
1472 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1473                                      float_status *s)
1474 {
1475     float64_unpack_raw(p, f);
1476     parts_canonicalize(p, s, &float64_params);
1477 }
1478 
1479 static float64 float64_round_pack_canonical(FloatParts64 *p,
1480                                             float_status *s)
1481 {
1482     parts_uncanon(p, s, &float64_params);
1483     return float64_pack_raw(p);
1484 }
1485 
1486 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1487                                       float_status *s)
1488 {
1489     float128_unpack_raw(p, f);
1490     parts_canonicalize(p, s, &float128_params);
1491 }
1492 
1493 static float128 float128_round_pack_canonical(FloatParts128 *p,
1494                                               float_status *s)
1495 {
1496     parts_uncanon(p, s, &float128_params);
1497     return float128_pack_raw(p);
1498 }
1499 
1500 /*
1501  * Addition and subtraction
1502  */
1503 
1504 static float16 QEMU_FLATTEN
1505 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1506 {
1507     FloatParts64 pa, pb, *pr;
1508 
1509     float16_unpack_canonical(&pa, a, status);
1510     float16_unpack_canonical(&pb, b, status);
1511     pr = parts_addsub(&pa, &pb, status, subtract);
1512 
1513     return float16_round_pack_canonical(pr, status);
1514 }
1515 
1516 float16 float16_add(float16 a, float16 b, float_status *status)
1517 {
1518     return float16_addsub(a, b, status, false);
1519 }
1520 
1521 float16 float16_sub(float16 a, float16 b, float_status *status)
1522 {
1523     return float16_addsub(a, b, status, true);
1524 }
1525 
1526 static float32 QEMU_SOFTFLOAT_ATTR
1527 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1528 {
1529     FloatParts64 pa, pb, *pr;
1530 
1531     float32_unpack_canonical(&pa, a, status);
1532     float32_unpack_canonical(&pb, b, status);
1533     pr = parts_addsub(&pa, &pb, status, subtract);
1534 
1535     return float32_round_pack_canonical(pr, status);
1536 }
1537 
1538 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1539 {
1540     return soft_f32_addsub(a, b, status, false);
1541 }
1542 
1543 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1544 {
1545     return soft_f32_addsub(a, b, status, true);
1546 }
1547 
1548 static float64 QEMU_SOFTFLOAT_ATTR
1549 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1550 {
1551     FloatParts64 pa, pb, *pr;
1552 
1553     float64_unpack_canonical(&pa, a, status);
1554     float64_unpack_canonical(&pb, b, status);
1555     pr = parts_addsub(&pa, &pb, status, subtract);
1556 
1557     return float64_round_pack_canonical(pr, status);
1558 }
1559 
1560 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1561 {
1562     return soft_f64_addsub(a, b, status, false);
1563 }
1564 
1565 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1566 {
1567     return soft_f64_addsub(a, b, status, true);
1568 }
1569 
1570 static float hard_f32_add(float a, float b)
1571 {
1572     return a + b;
1573 }
1574 
1575 static float hard_f32_sub(float a, float b)
1576 {
1577     return a - b;
1578 }
1579 
1580 static double hard_f64_add(double a, double b)
1581 {
1582     return a + b;
1583 }
1584 
1585 static double hard_f64_sub(double a, double b)
1586 {
1587     return a - b;
1588 }
1589 
1590 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1591 {
1592     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1593         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1594     }
1595     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1596 }
1597 
1598 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1599 {
1600     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1601         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1602     } else {
1603         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1604     }
1605 }
1606 
1607 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1608                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1609 {
1610     return float32_gen2(a, b, s, hard, soft,
1611                         f32_is_zon2, f32_addsubmul_post);
1612 }
1613 
1614 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1615                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1616 {
1617     return float64_gen2(a, b, s, hard, soft,
1618                         f64_is_zon2, f64_addsubmul_post);
1619 }
1620 
1621 float32 QEMU_FLATTEN
1622 float32_add(float32 a, float32 b, float_status *s)
1623 {
1624     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1625 }
1626 
1627 float32 QEMU_FLATTEN
1628 float32_sub(float32 a, float32 b, float_status *s)
1629 {
1630     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1631 }
1632 
1633 float64 QEMU_FLATTEN
1634 float64_add(float64 a, float64 b, float_status *s)
1635 {
1636     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1637 }
1638 
1639 float64 QEMU_FLATTEN
1640 float64_sub(float64 a, float64 b, float_status *s)
1641 {
1642     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1643 }
1644 
1645 static bfloat16 QEMU_FLATTEN
1646 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1647 {
1648     FloatParts64 pa, pb, *pr;
1649 
1650     bfloat16_unpack_canonical(&pa, a, status);
1651     bfloat16_unpack_canonical(&pb, b, status);
1652     pr = parts_addsub(&pa, &pb, status, subtract);
1653 
1654     return bfloat16_round_pack_canonical(pr, status);
1655 }
1656 
1657 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1658 {
1659     return bfloat16_addsub(a, b, status, false);
1660 }
1661 
1662 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1663 {
1664     return bfloat16_addsub(a, b, status, true);
1665 }
1666 
1667 static float128 QEMU_FLATTEN
1668 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1669 {
1670     FloatParts128 pa, pb, *pr;
1671 
1672     float128_unpack_canonical(&pa, a, status);
1673     float128_unpack_canonical(&pb, b, status);
1674     pr = parts_addsub(&pa, &pb, status, subtract);
1675 
1676     return float128_round_pack_canonical(pr, status);
1677 }
1678 
1679 float128 float128_add(float128 a, float128 b, float_status *status)
1680 {
1681     return float128_addsub(a, b, status, false);
1682 }
1683 
1684 float128 float128_sub(float128 a, float128 b, float_status *status)
1685 {
1686     return float128_addsub(a, b, status, true);
1687 }
1688 
1689 /*
1690  * Multiplication
1691  */
1692 
1693 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1694 {
1695     FloatParts64 pa, pb, *pr;
1696 
1697     float16_unpack_canonical(&pa, a, status);
1698     float16_unpack_canonical(&pb, b, status);
1699     pr = parts_mul(&pa, &pb, status);
1700 
1701     return float16_round_pack_canonical(pr, status);
1702 }
1703 
1704 static float32 QEMU_SOFTFLOAT_ATTR
1705 soft_f32_mul(float32 a, float32 b, float_status *status)
1706 {
1707     FloatParts64 pa, pb, *pr;
1708 
1709     float32_unpack_canonical(&pa, a, status);
1710     float32_unpack_canonical(&pb, b, status);
1711     pr = parts_mul(&pa, &pb, status);
1712 
1713     return float32_round_pack_canonical(pr, status);
1714 }
1715 
1716 static float64 QEMU_SOFTFLOAT_ATTR
1717 soft_f64_mul(float64 a, float64 b, float_status *status)
1718 {
1719     FloatParts64 pa, pb, *pr;
1720 
1721     float64_unpack_canonical(&pa, a, status);
1722     float64_unpack_canonical(&pb, b, status);
1723     pr = parts_mul(&pa, &pb, status);
1724 
1725     return float64_round_pack_canonical(pr, status);
1726 }
1727 
1728 static float hard_f32_mul(float a, float b)
1729 {
1730     return a * b;
1731 }
1732 
1733 static double hard_f64_mul(double a, double b)
1734 {
1735     return a * b;
1736 }
1737 
1738 float32 QEMU_FLATTEN
1739 float32_mul(float32 a, float32 b, float_status *s)
1740 {
1741     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1742                         f32_is_zon2, f32_addsubmul_post);
1743 }
1744 
1745 float64 QEMU_FLATTEN
1746 float64_mul(float64 a, float64 b, float_status *s)
1747 {
1748     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1749                         f64_is_zon2, f64_addsubmul_post);
1750 }
1751 
1752 bfloat16 QEMU_FLATTEN
1753 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1754 {
1755     FloatParts64 pa, pb, *pr;
1756 
1757     bfloat16_unpack_canonical(&pa, a, status);
1758     bfloat16_unpack_canonical(&pb, b, status);
1759     pr = parts_mul(&pa, &pb, status);
1760 
1761     return bfloat16_round_pack_canonical(pr, status);
1762 }
1763 
1764 float128 QEMU_FLATTEN
1765 float128_mul(float128 a, float128 b, float_status *status)
1766 {
1767     FloatParts128 pa, pb, *pr;
1768 
1769     float128_unpack_canonical(&pa, a, status);
1770     float128_unpack_canonical(&pb, b, status);
1771     pr = parts_mul(&pa, &pb, status);
1772 
1773     return float128_round_pack_canonical(pr, status);
1774 }
1775 
1776 /*
1777  * Fused multiply-add
1778  */
1779 
1780 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1781                                     int flags, float_status *status)
1782 {
1783     FloatParts64 pa, pb, pc, *pr;
1784 
1785     float16_unpack_canonical(&pa, a, status);
1786     float16_unpack_canonical(&pb, b, status);
1787     float16_unpack_canonical(&pc, c, status);
1788     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1789 
1790     return float16_round_pack_canonical(pr, status);
1791 }
1792 
1793 static float32 QEMU_SOFTFLOAT_ATTR
1794 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1795                 float_status *status)
1796 {
1797     FloatParts64 pa, pb, pc, *pr;
1798 
1799     float32_unpack_canonical(&pa, a, status);
1800     float32_unpack_canonical(&pb, b, status);
1801     float32_unpack_canonical(&pc, c, status);
1802     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1803 
1804     return float32_round_pack_canonical(pr, status);
1805 }
1806 
1807 static float64 QEMU_SOFTFLOAT_ATTR
1808 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1809                 float_status *status)
1810 {
1811     FloatParts64 pa, pb, pc, *pr;
1812 
1813     float64_unpack_canonical(&pa, a, status);
1814     float64_unpack_canonical(&pb, b, status);
1815     float64_unpack_canonical(&pc, c, status);
1816     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1817 
1818     return float64_round_pack_canonical(pr, status);
1819 }
1820 
1821 static bool force_soft_fma;
1822 
1823 float32 QEMU_FLATTEN
1824 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1825 {
1826     union_float32 ua, ub, uc, ur;
1827 
1828     ua.s = xa;
1829     ub.s = xb;
1830     uc.s = xc;
1831 
1832     if (unlikely(!can_use_fpu(s))) {
1833         goto soft;
1834     }
1835     if (unlikely(flags & float_muladd_halve_result)) {
1836         goto soft;
1837     }
1838 
1839     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1840     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1841         goto soft;
1842     }
1843 
1844     if (unlikely(force_soft_fma)) {
1845         goto soft;
1846     }
1847 
1848     /*
1849      * When (a || b) == 0, there's no need to check for under/over flow,
1850      * since we know the addend is (normal || 0) and the product is 0.
1851      */
1852     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1853         union_float32 up;
1854         bool prod_sign;
1855 
1856         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1857         prod_sign ^= !!(flags & float_muladd_negate_product);
1858         up.s = float32_set_sign(float32_zero, prod_sign);
1859 
1860         if (flags & float_muladd_negate_c) {
1861             uc.h = -uc.h;
1862         }
1863         ur.h = up.h + uc.h;
1864     } else {
1865         union_float32 ua_orig = ua;
1866         union_float32 uc_orig = uc;
1867 
1868         if (flags & float_muladd_negate_product) {
1869             ua.h = -ua.h;
1870         }
1871         if (flags & float_muladd_negate_c) {
1872             uc.h = -uc.h;
1873         }
1874 
1875         ur.h = fmaf(ua.h, ub.h, uc.h);
1876 
1877         if (unlikely(f32_is_inf(ur))) {
1878             float_raise(float_flag_overflow, s);
1879         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1880             ua = ua_orig;
1881             uc = uc_orig;
1882             goto soft;
1883         }
1884     }
1885     if (flags & float_muladd_negate_result) {
1886         return float32_chs(ur.s);
1887     }
1888     return ur.s;
1889 
1890  soft:
1891     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1892 }
1893 
1894 float64 QEMU_FLATTEN
1895 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1896 {
1897     union_float64 ua, ub, uc, ur;
1898 
1899     ua.s = xa;
1900     ub.s = xb;
1901     uc.s = xc;
1902 
1903     if (unlikely(!can_use_fpu(s))) {
1904         goto soft;
1905     }
1906     if (unlikely(flags & float_muladd_halve_result)) {
1907         goto soft;
1908     }
1909 
1910     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1911     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1912         goto soft;
1913     }
1914 
1915     if (unlikely(force_soft_fma)) {
1916         goto soft;
1917     }
1918 
1919     /*
1920      * When (a || b) == 0, there's no need to check for under/over flow,
1921      * since we know the addend is (normal || 0) and the product is 0.
1922      */
1923     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1924         union_float64 up;
1925         bool prod_sign;
1926 
1927         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1928         prod_sign ^= !!(flags & float_muladd_negate_product);
1929         up.s = float64_set_sign(float64_zero, prod_sign);
1930 
1931         if (flags & float_muladd_negate_c) {
1932             uc.h = -uc.h;
1933         }
1934         ur.h = up.h + uc.h;
1935     } else {
1936         union_float64 ua_orig = ua;
1937         union_float64 uc_orig = uc;
1938 
1939         if (flags & float_muladd_negate_product) {
1940             ua.h = -ua.h;
1941         }
1942         if (flags & float_muladd_negate_c) {
1943             uc.h = -uc.h;
1944         }
1945 
1946         ur.h = fma(ua.h, ub.h, uc.h);
1947 
1948         if (unlikely(f64_is_inf(ur))) {
1949             float_raise(float_flag_overflow, s);
1950         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1951             ua = ua_orig;
1952             uc = uc_orig;
1953             goto soft;
1954         }
1955     }
1956     if (flags & float_muladd_negate_result) {
1957         return float64_chs(ur.s);
1958     }
1959     return ur.s;
1960 
1961  soft:
1962     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1963 }
1964 
1965 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1966                                       int flags, float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969 
1970     bfloat16_unpack_canonical(&pa, a, status);
1971     bfloat16_unpack_canonical(&pb, b, status);
1972     bfloat16_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974 
1975     return bfloat16_round_pack_canonical(pr, status);
1976 }
1977 
1978 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1979                                       int flags, float_status *status)
1980 {
1981     FloatParts128 pa, pb, pc, *pr;
1982 
1983     float128_unpack_canonical(&pa, a, status);
1984     float128_unpack_canonical(&pb, b, status);
1985     float128_unpack_canonical(&pc, c, status);
1986     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1987 
1988     return float128_round_pack_canonical(pr, status);
1989 }
1990 
1991 /*
1992  * Division
1993  */
1994 
1995 float16 float16_div(float16 a, float16 b, float_status *status)
1996 {
1997     FloatParts64 pa, pb, *pr;
1998 
1999     float16_unpack_canonical(&pa, a, status);
2000     float16_unpack_canonical(&pb, b, status);
2001     pr = parts_div(&pa, &pb, status);
2002 
2003     return float16_round_pack_canonical(pr, status);
2004 }
2005 
2006 static float32 QEMU_SOFTFLOAT_ATTR
2007 soft_f32_div(float32 a, float32 b, float_status *status)
2008 {
2009     FloatParts64 pa, pb, *pr;
2010 
2011     float32_unpack_canonical(&pa, a, status);
2012     float32_unpack_canonical(&pb, b, status);
2013     pr = parts_div(&pa, &pb, status);
2014 
2015     return float32_round_pack_canonical(pr, status);
2016 }
2017 
2018 static float64 QEMU_SOFTFLOAT_ATTR
2019 soft_f64_div(float64 a, float64 b, float_status *status)
2020 {
2021     FloatParts64 pa, pb, *pr;
2022 
2023     float64_unpack_canonical(&pa, a, status);
2024     float64_unpack_canonical(&pb, b, status);
2025     pr = parts_div(&pa, &pb, status);
2026 
2027     return float64_round_pack_canonical(pr, status);
2028 }
2029 
2030 static float hard_f32_div(float a, float b)
2031 {
2032     return a / b;
2033 }
2034 
2035 static double hard_f64_div(double a, double b)
2036 {
2037     return a / b;
2038 }
2039 
2040 static bool f32_div_pre(union_float32 a, union_float32 b)
2041 {
2042     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2043         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2044                fpclassify(b.h) == FP_NORMAL;
2045     }
2046     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2047 }
2048 
2049 static bool f64_div_pre(union_float64 a, union_float64 b)
2050 {
2051     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2052         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2053                fpclassify(b.h) == FP_NORMAL;
2054     }
2055     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2056 }
2057 
2058 static bool f32_div_post(union_float32 a, union_float32 b)
2059 {
2060     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2061         return fpclassify(a.h) != FP_ZERO;
2062     }
2063     return !float32_is_zero(a.s);
2064 }
2065 
2066 static bool f64_div_post(union_float64 a, union_float64 b)
2067 {
2068     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2069         return fpclassify(a.h) != FP_ZERO;
2070     }
2071     return !float64_is_zero(a.s);
2072 }
2073 
2074 float32 QEMU_FLATTEN
2075 float32_div(float32 a, float32 b, float_status *s)
2076 {
2077     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2078                         f32_div_pre, f32_div_post);
2079 }
2080 
2081 float64 QEMU_FLATTEN
2082 float64_div(float64 a, float64 b, float_status *s)
2083 {
2084     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2085                         f64_div_pre, f64_div_post);
2086 }
2087 
2088 bfloat16 QEMU_FLATTEN
2089 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2090 {
2091     FloatParts64 pa, pb, *pr;
2092 
2093     bfloat16_unpack_canonical(&pa, a, status);
2094     bfloat16_unpack_canonical(&pb, b, status);
2095     pr = parts_div(&pa, &pb, status);
2096 
2097     return bfloat16_round_pack_canonical(pr, status);
2098 }
2099 
2100 float128 QEMU_FLATTEN
2101 float128_div(float128 a, float128 b, float_status *status)
2102 {
2103     FloatParts128 pa, pb, *pr;
2104 
2105     float128_unpack_canonical(&pa, a, status);
2106     float128_unpack_canonical(&pb, b, status);
2107     pr = parts_div(&pa, &pb, status);
2108 
2109     return float128_round_pack_canonical(pr, status);
2110 }
2111 
2112 /*
2113  * Float to Float conversions
2114  *
2115  * Returns the result of converting one float format to another. The
2116  * conversion is performed according to the IEC/IEEE Standard for
2117  * Binary Floating-Point Arithmetic.
2118  *
2119  * Usually this only needs to take care of raising invalid exceptions
2120  * and handling the conversion on NaNs.
2121  */
2122 
2123 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2124 {
2125     switch (a->cls) {
2126     case float_class_qnan:
2127     case float_class_snan:
2128         /*
2129          * There is no NaN in the destination format.  Raise Invalid
2130          * and return a zero with the sign of the input NaN.
2131          */
2132         float_raise(float_flag_invalid, s);
2133         a->cls = float_class_zero;
2134         break;
2135 
2136     case float_class_inf:
2137         /*
2138          * There is no Inf in the destination format.  Raise Invalid
2139          * and return the maximum normal with the correct sign.
2140          */
2141         float_raise(float_flag_invalid, s);
2142         a->cls = float_class_normal;
2143         a->exp = float16_params_ahp.exp_max;
2144         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2145                                   float16_params_ahp.frac_size + 1);
2146         break;
2147 
2148     case float_class_normal:
2149     case float_class_zero:
2150         break;
2151 
2152     default:
2153         g_assert_not_reached();
2154     }
2155 }
2156 
2157 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2158 {
2159     if (is_nan(a->cls)) {
2160         parts_return_nan(a, s);
2161     }
2162 }
2163 
2164 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2165 {
2166     if (is_nan(a->cls)) {
2167         parts_return_nan(a, s);
2168     }
2169 }
2170 
2171 #define parts_float_to_float(P, S) \
2172     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2173 
2174 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2175                                         float_status *s)
2176 {
2177     a->cls = b->cls;
2178     a->sign = b->sign;
2179     a->exp = b->exp;
2180 
2181     if (a->cls == float_class_normal) {
2182         frac_truncjam(a, b);
2183     } else if (is_nan(a->cls)) {
2184         /* Discard the low bits of the NaN. */
2185         a->frac = b->frac_hi;
2186         parts_return_nan(a, s);
2187     }
2188 }
2189 
2190 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2191                                        float_status *s)
2192 {
2193     a->cls = b->cls;
2194     a->sign = b->sign;
2195     a->exp = b->exp;
2196     frac_widen(a, b);
2197 
2198     if (is_nan(a->cls)) {
2199         parts_return_nan(a, s);
2200     }
2201 }
2202 
2203 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2204 {
2205     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2206     FloatParts64 p;
2207 
2208     float16a_unpack_canonical(&p, a, s, fmt16);
2209     parts_float_to_float(&p, s);
2210     return float32_round_pack_canonical(&p, s);
2211 }
2212 
2213 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2214 {
2215     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2216     FloatParts64 p;
2217 
2218     float16a_unpack_canonical(&p, a, s, fmt16);
2219     parts_float_to_float(&p, s);
2220     return float64_round_pack_canonical(&p, s);
2221 }
2222 
2223 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2224 {
2225     FloatParts64 p;
2226     const FloatFmt *fmt;
2227 
2228     float32_unpack_canonical(&p, a, s);
2229     if (ieee) {
2230         parts_float_to_float(&p, s);
2231         fmt = &float16_params;
2232     } else {
2233         parts_float_to_ahp(&p, s);
2234         fmt = &float16_params_ahp;
2235     }
2236     return float16a_round_pack_canonical(&p, s, fmt);
2237 }
2238 
2239 static float64 QEMU_SOFTFLOAT_ATTR
2240 soft_float32_to_float64(float32 a, float_status *s)
2241 {
2242     FloatParts64 p;
2243 
2244     float32_unpack_canonical(&p, a, s);
2245     parts_float_to_float(&p, s);
2246     return float64_round_pack_canonical(&p, s);
2247 }
2248 
2249 float64 float32_to_float64(float32 a, float_status *s)
2250 {
2251     if (likely(float32_is_normal(a))) {
2252         /* Widening conversion can never produce inexact results.  */
2253         union_float32 uf;
2254         union_float64 ud;
2255         uf.s = a;
2256         ud.h = uf.h;
2257         return ud.s;
2258     } else if (float32_is_zero(a)) {
2259         return float64_set_sign(float64_zero, float32_is_neg(a));
2260     } else {
2261         return soft_float32_to_float64(a, s);
2262     }
2263 }
2264 
2265 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2266 {
2267     FloatParts64 p;
2268     const FloatFmt *fmt;
2269 
2270     float64_unpack_canonical(&p, a, s);
2271     if (ieee) {
2272         parts_float_to_float(&p, s);
2273         fmt = &float16_params;
2274     } else {
2275         parts_float_to_ahp(&p, s);
2276         fmt = &float16_params_ahp;
2277     }
2278     return float16a_round_pack_canonical(&p, s, fmt);
2279 }
2280 
2281 float32 float64_to_float32(float64 a, float_status *s)
2282 {
2283     FloatParts64 p;
2284 
2285     float64_unpack_canonical(&p, a, s);
2286     parts_float_to_float(&p, s);
2287     return float32_round_pack_canonical(&p, s);
2288 }
2289 
2290 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2291 {
2292     FloatParts64 p;
2293 
2294     bfloat16_unpack_canonical(&p, a, s);
2295     parts_float_to_float(&p, s);
2296     return float32_round_pack_canonical(&p, s);
2297 }
2298 
2299 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2300 {
2301     FloatParts64 p;
2302 
2303     bfloat16_unpack_canonical(&p, a, s);
2304     parts_float_to_float(&p, s);
2305     return float64_round_pack_canonical(&p, s);
2306 }
2307 
2308 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2309 {
2310     FloatParts64 p;
2311 
2312     float32_unpack_canonical(&p, a, s);
2313     parts_float_to_float(&p, s);
2314     return bfloat16_round_pack_canonical(&p, s);
2315 }
2316 
2317 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2318 {
2319     FloatParts64 p;
2320 
2321     float64_unpack_canonical(&p, a, s);
2322     parts_float_to_float(&p, s);
2323     return bfloat16_round_pack_canonical(&p, s);
2324 }
2325 
2326 float32 float128_to_float32(float128 a, float_status *s)
2327 {
2328     FloatParts64 p64;
2329     FloatParts128 p128;
2330 
2331     float128_unpack_canonical(&p128, a, s);
2332     parts_float_to_float_narrow(&p64, &p128, s);
2333     return float32_round_pack_canonical(&p64, s);
2334 }
2335 
2336 float64 float128_to_float64(float128 a, float_status *s)
2337 {
2338     FloatParts64 p64;
2339     FloatParts128 p128;
2340 
2341     float128_unpack_canonical(&p128, a, s);
2342     parts_float_to_float_narrow(&p64, &p128, s);
2343     return float64_round_pack_canonical(&p64, s);
2344 }
2345 
2346 float128 float32_to_float128(float32 a, float_status *s)
2347 {
2348     FloatParts64 p64;
2349     FloatParts128 p128;
2350 
2351     float32_unpack_canonical(&p64, a, s);
2352     parts_float_to_float_widen(&p128, &p64, s);
2353     return float128_round_pack_canonical(&p128, s);
2354 }
2355 
2356 float128 float64_to_float128(float64 a, float_status *s)
2357 {
2358     FloatParts64 p64;
2359     FloatParts128 p128;
2360 
2361     float64_unpack_canonical(&p64, a, s);
2362     parts_float_to_float_widen(&p128, &p64, s);
2363     return float128_round_pack_canonical(&p128, s);
2364 }
2365 
2366 /*
2367  * Round to integral value
2368  */
2369 
2370 float16 float16_round_to_int(float16 a, float_status *s)
2371 {
2372     FloatParts64 p;
2373 
2374     float16_unpack_canonical(&p, a, s);
2375     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2376     return float16_round_pack_canonical(&p, s);
2377 }
2378 
2379 float32 float32_round_to_int(float32 a, float_status *s)
2380 {
2381     FloatParts64 p;
2382 
2383     float32_unpack_canonical(&p, a, s);
2384     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2385     return float32_round_pack_canonical(&p, s);
2386 }
2387 
2388 float64 float64_round_to_int(float64 a, float_status *s)
2389 {
2390     FloatParts64 p;
2391 
2392     float64_unpack_canonical(&p, a, s);
2393     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2394     return float64_round_pack_canonical(&p, s);
2395 }
2396 
2397 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2398 {
2399     FloatParts64 p;
2400 
2401     bfloat16_unpack_canonical(&p, a, s);
2402     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2403     return bfloat16_round_pack_canonical(&p, s);
2404 }
2405 
2406 float128 float128_round_to_int(float128 a, float_status *s)
2407 {
2408     FloatParts128 p;
2409 
2410     float128_unpack_canonical(&p, a, s);
2411     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2412     return float128_round_pack_canonical(&p, s);
2413 }
2414 
2415 /*
2416  * Floating-point to signed integer conversions
2417  */
2418 
2419 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2420                               float_status *s)
2421 {
2422     FloatParts64 p;
2423 
2424     float16_unpack_canonical(&p, a, s);
2425     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2426 }
2427 
2428 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2429                                 float_status *s)
2430 {
2431     FloatParts64 p;
2432 
2433     float16_unpack_canonical(&p, a, s);
2434     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2435 }
2436 
2437 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2438                                 float_status *s)
2439 {
2440     FloatParts64 p;
2441 
2442     float16_unpack_canonical(&p, a, s);
2443     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2444 }
2445 
2446 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2447                                 float_status *s)
2448 {
2449     FloatParts64 p;
2450 
2451     float16_unpack_canonical(&p, a, s);
2452     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2453 }
2454 
2455 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2456                                 float_status *s)
2457 {
2458     FloatParts64 p;
2459 
2460     float32_unpack_canonical(&p, a, s);
2461     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2462 }
2463 
2464 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2465                                 float_status *s)
2466 {
2467     FloatParts64 p;
2468 
2469     float32_unpack_canonical(&p, a, s);
2470     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2471 }
2472 
2473 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2474                                 float_status *s)
2475 {
2476     FloatParts64 p;
2477 
2478     float32_unpack_canonical(&p, a, s);
2479     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2480 }
2481 
2482 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2483                                 float_status *s)
2484 {
2485     FloatParts64 p;
2486 
2487     float64_unpack_canonical(&p, a, s);
2488     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2489 }
2490 
2491 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2492                                 float_status *s)
2493 {
2494     FloatParts64 p;
2495 
2496     float64_unpack_canonical(&p, a, s);
2497     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2498 }
2499 
2500 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2501                                 float_status *s)
2502 {
2503     FloatParts64 p;
2504 
2505     float64_unpack_canonical(&p, a, s);
2506     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2507 }
2508 
2509 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2510                                  float_status *s)
2511 {
2512     FloatParts64 p;
2513 
2514     bfloat16_unpack_canonical(&p, a, s);
2515     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2516 }
2517 
2518 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2519                                  float_status *s)
2520 {
2521     FloatParts64 p;
2522 
2523     bfloat16_unpack_canonical(&p, a, s);
2524     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2525 }
2526 
2527 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2528                                  float_status *s)
2529 {
2530     FloatParts64 p;
2531 
2532     bfloat16_unpack_canonical(&p, a, s);
2533     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2534 }
2535 
2536 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2537                                         int scale, float_status *s)
2538 {
2539     FloatParts128 p;
2540 
2541     float128_unpack_canonical(&p, a, s);
2542     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2543 }
2544 
2545 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2546                                         int scale, float_status *s)
2547 {
2548     FloatParts128 p;
2549 
2550     float128_unpack_canonical(&p, a, s);
2551     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2552 }
2553 
2554 int8_t float16_to_int8(float16 a, float_status *s)
2555 {
2556     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2557 }
2558 
2559 int16_t float16_to_int16(float16 a, float_status *s)
2560 {
2561     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2562 }
2563 
2564 int32_t float16_to_int32(float16 a, float_status *s)
2565 {
2566     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2567 }
2568 
2569 int64_t float16_to_int64(float16 a, float_status *s)
2570 {
2571     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2572 }
2573 
2574 int16_t float32_to_int16(float32 a, float_status *s)
2575 {
2576     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2577 }
2578 
2579 int32_t float32_to_int32(float32 a, float_status *s)
2580 {
2581     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2582 }
2583 
2584 int64_t float32_to_int64(float32 a, float_status *s)
2585 {
2586     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2587 }
2588 
2589 int16_t float64_to_int16(float64 a, float_status *s)
2590 {
2591     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2592 }
2593 
2594 int32_t float64_to_int32(float64 a, float_status *s)
2595 {
2596     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2597 }
2598 
2599 int64_t float64_to_int64(float64 a, float_status *s)
2600 {
2601     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2602 }
2603 
2604 int32_t float128_to_int32(float128 a, float_status *s)
2605 {
2606     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2607 }
2608 
2609 int64_t float128_to_int64(float128 a, float_status *s)
2610 {
2611     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2612 }
2613 
2614 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2615 {
2616     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2617 }
2618 
2619 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2620 {
2621     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2622 }
2623 
2624 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2625 {
2626     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2627 }
2628 
2629 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2630 {
2631     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2632 }
2633 
2634 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2635 {
2636     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2637 }
2638 
2639 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2640 {
2641     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2642 }
2643 
2644 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2645 {
2646     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2647 }
2648 
2649 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2650 {
2651     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2652 }
2653 
2654 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2655 {
2656     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2657 }
2658 
2659 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2660 {
2661     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2662 }
2663 
2664 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2665 {
2666     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2667 }
2668 
2669 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2670 {
2671     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2672 }
2673 
2674 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2675 {
2676     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2677 }
2678 
2679 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2680 {
2681     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2682 }
2683 
2684 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2685 {
2686     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2687 }
2688 
2689 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2690 {
2691     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2692 }
2693 
2694 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2695 {
2696     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2697 }
2698 
2699 /*
2700  * Floating-point to unsigned integer conversions
2701  */
2702 
2703 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2704                                 float_status *s)
2705 {
2706     FloatParts64 p;
2707 
2708     float16_unpack_canonical(&p, a, s);
2709     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2710 }
2711 
2712 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2713                                   float_status *s)
2714 {
2715     FloatParts64 p;
2716 
2717     float16_unpack_canonical(&p, a, s);
2718     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2719 }
2720 
2721 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2722                                   float_status *s)
2723 {
2724     FloatParts64 p;
2725 
2726     float16_unpack_canonical(&p, a, s);
2727     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2728 }
2729 
2730 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2731                                   float_status *s)
2732 {
2733     FloatParts64 p;
2734 
2735     float16_unpack_canonical(&p, a, s);
2736     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2737 }
2738 
2739 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2740                                   float_status *s)
2741 {
2742     FloatParts64 p;
2743 
2744     float32_unpack_canonical(&p, a, s);
2745     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2746 }
2747 
2748 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2749                                   float_status *s)
2750 {
2751     FloatParts64 p;
2752 
2753     float32_unpack_canonical(&p, a, s);
2754     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2755 }
2756 
2757 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2758                                   float_status *s)
2759 {
2760     FloatParts64 p;
2761 
2762     float32_unpack_canonical(&p, a, s);
2763     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2764 }
2765 
2766 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2767                                   float_status *s)
2768 {
2769     FloatParts64 p;
2770 
2771     float64_unpack_canonical(&p, a, s);
2772     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2773 }
2774 
2775 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2776                                   float_status *s)
2777 {
2778     FloatParts64 p;
2779 
2780     float64_unpack_canonical(&p, a, s);
2781     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2782 }
2783 
2784 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2785                                   float_status *s)
2786 {
2787     FloatParts64 p;
2788 
2789     float64_unpack_canonical(&p, a, s);
2790     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2791 }
2792 
2793 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2794                                    int scale, float_status *s)
2795 {
2796     FloatParts64 p;
2797 
2798     bfloat16_unpack_canonical(&p, a, s);
2799     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2800 }
2801 
2802 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2803                                    int scale, float_status *s)
2804 {
2805     FloatParts64 p;
2806 
2807     bfloat16_unpack_canonical(&p, a, s);
2808     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2809 }
2810 
2811 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2812                                    int scale, float_status *s)
2813 {
2814     FloatParts64 p;
2815 
2816     bfloat16_unpack_canonical(&p, a, s);
2817     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2818 }
2819 
2820 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2821                                           int scale, float_status *s)
2822 {
2823     FloatParts128 p;
2824 
2825     float128_unpack_canonical(&p, a, s);
2826     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2827 }
2828 
2829 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2830                                           int scale, float_status *s)
2831 {
2832     FloatParts128 p;
2833 
2834     float128_unpack_canonical(&p, a, s);
2835     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2836 }
2837 
2838 uint8_t float16_to_uint8(float16 a, float_status *s)
2839 {
2840     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2841 }
2842 
2843 uint16_t float16_to_uint16(float16 a, float_status *s)
2844 {
2845     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2846 }
2847 
2848 uint32_t float16_to_uint32(float16 a, float_status *s)
2849 {
2850     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2851 }
2852 
2853 uint64_t float16_to_uint64(float16 a, float_status *s)
2854 {
2855     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2856 }
2857 
2858 uint16_t float32_to_uint16(float32 a, float_status *s)
2859 {
2860     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2861 }
2862 
2863 uint32_t float32_to_uint32(float32 a, float_status *s)
2864 {
2865     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2866 }
2867 
2868 uint64_t float32_to_uint64(float32 a, float_status *s)
2869 {
2870     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2871 }
2872 
2873 uint16_t float64_to_uint16(float64 a, float_status *s)
2874 {
2875     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2876 }
2877 
2878 uint32_t float64_to_uint32(float64 a, float_status *s)
2879 {
2880     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2881 }
2882 
2883 uint64_t float64_to_uint64(float64 a, float_status *s)
2884 {
2885     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2886 }
2887 
2888 uint32_t float128_to_uint32(float128 a, float_status *s)
2889 {
2890     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2891 }
2892 
2893 uint64_t float128_to_uint64(float128 a, float_status *s)
2894 {
2895     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2896 }
2897 
2898 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2899 {
2900     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2901 }
2902 
2903 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2904 {
2905     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2906 }
2907 
2908 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2909 {
2910     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2911 }
2912 
2913 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2914 {
2915     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2916 }
2917 
2918 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2919 {
2920     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2921 }
2922 
2923 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2924 {
2925     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2926 }
2927 
2928 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2929 {
2930     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2931 }
2932 
2933 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2934 {
2935     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2936 }
2937 
2938 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2939 {
2940     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2941 }
2942 
2943 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2944 {
2945     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2946 }
2947 
2948 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2949 {
2950     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2951 }
2952 
2953 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2954 {
2955     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2956 }
2957 
2958 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2959 {
2960     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2961 }
2962 
2963 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2964 {
2965     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2966 }
2967 
2968 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2969 {
2970     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2971 }
2972 
2973 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2974 {
2975     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2976 }
2977 
2978 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2979 {
2980     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2981 }
2982 
2983 /*
2984  * Signed integer to floating-point conversions
2985  */
2986 
2987 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2988 {
2989     FloatParts64 p;
2990 
2991     parts_sint_to_float(&p, a, scale, status);
2992     return float16_round_pack_canonical(&p, status);
2993 }
2994 
2995 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2996 {
2997     return int64_to_float16_scalbn(a, scale, status);
2998 }
2999 
3000 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3001 {
3002     return int64_to_float16_scalbn(a, scale, status);
3003 }
3004 
3005 float16 int64_to_float16(int64_t a, float_status *status)
3006 {
3007     return int64_to_float16_scalbn(a, 0, status);
3008 }
3009 
3010 float16 int32_to_float16(int32_t a, float_status *status)
3011 {
3012     return int64_to_float16_scalbn(a, 0, status);
3013 }
3014 
3015 float16 int16_to_float16(int16_t a, float_status *status)
3016 {
3017     return int64_to_float16_scalbn(a, 0, status);
3018 }
3019 
3020 float16 int8_to_float16(int8_t a, float_status *status)
3021 {
3022     return int64_to_float16_scalbn(a, 0, status);
3023 }
3024 
3025 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3026 {
3027     FloatParts64 p;
3028 
3029     parts64_sint_to_float(&p, a, scale, status);
3030     return float32_round_pack_canonical(&p, status);
3031 }
3032 
3033 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3034 {
3035     return int64_to_float32_scalbn(a, scale, status);
3036 }
3037 
3038 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3039 {
3040     return int64_to_float32_scalbn(a, scale, status);
3041 }
3042 
3043 float32 int64_to_float32(int64_t a, float_status *status)
3044 {
3045     return int64_to_float32_scalbn(a, 0, status);
3046 }
3047 
3048 float32 int32_to_float32(int32_t a, float_status *status)
3049 {
3050     return int64_to_float32_scalbn(a, 0, status);
3051 }
3052 
3053 float32 int16_to_float32(int16_t a, float_status *status)
3054 {
3055     return int64_to_float32_scalbn(a, 0, status);
3056 }
3057 
3058 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3059 {
3060     FloatParts64 p;
3061 
3062     parts_sint_to_float(&p, a, scale, status);
3063     return float64_round_pack_canonical(&p, status);
3064 }
3065 
3066 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3067 {
3068     return int64_to_float64_scalbn(a, scale, status);
3069 }
3070 
3071 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3072 {
3073     return int64_to_float64_scalbn(a, scale, status);
3074 }
3075 
3076 float64 int64_to_float64(int64_t a, float_status *status)
3077 {
3078     return int64_to_float64_scalbn(a, 0, status);
3079 }
3080 
3081 float64 int32_to_float64(int32_t a, float_status *status)
3082 {
3083     return int64_to_float64_scalbn(a, 0, status);
3084 }
3085 
3086 float64 int16_to_float64(int16_t a, float_status *status)
3087 {
3088     return int64_to_float64_scalbn(a, 0, status);
3089 }
3090 
3091 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3092 {
3093     FloatParts64 p;
3094 
3095     parts_sint_to_float(&p, a, scale, status);
3096     return bfloat16_round_pack_canonical(&p, status);
3097 }
3098 
3099 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3100 {
3101     return int64_to_bfloat16_scalbn(a, scale, status);
3102 }
3103 
3104 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3105 {
3106     return int64_to_bfloat16_scalbn(a, scale, status);
3107 }
3108 
3109 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3110 {
3111     return int64_to_bfloat16_scalbn(a, 0, status);
3112 }
3113 
3114 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3115 {
3116     return int64_to_bfloat16_scalbn(a, 0, status);
3117 }
3118 
3119 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3120 {
3121     return int64_to_bfloat16_scalbn(a, 0, status);
3122 }
3123 
3124 float128 int64_to_float128(int64_t a, float_status *status)
3125 {
3126     FloatParts128 p;
3127 
3128     parts_sint_to_float(&p, a, 0, status);
3129     return float128_round_pack_canonical(&p, status);
3130 }
3131 
3132 float128 int32_to_float128(int32_t a, float_status *status)
3133 {
3134     return int64_to_float128(a, status);
3135 }
3136 
3137 /*
3138  * Unsigned Integer to floating-point conversions
3139  */
3140 
3141 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3142 {
3143     FloatParts64 p;
3144 
3145     parts_uint_to_float(&p, a, scale, status);
3146     return float16_round_pack_canonical(&p, status);
3147 }
3148 
3149 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3150 {
3151     return uint64_to_float16_scalbn(a, scale, status);
3152 }
3153 
3154 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3155 {
3156     return uint64_to_float16_scalbn(a, scale, status);
3157 }
3158 
3159 float16 uint64_to_float16(uint64_t a, float_status *status)
3160 {
3161     return uint64_to_float16_scalbn(a, 0, status);
3162 }
3163 
3164 float16 uint32_to_float16(uint32_t a, float_status *status)
3165 {
3166     return uint64_to_float16_scalbn(a, 0, status);
3167 }
3168 
3169 float16 uint16_to_float16(uint16_t a, float_status *status)
3170 {
3171     return uint64_to_float16_scalbn(a, 0, status);
3172 }
3173 
3174 float16 uint8_to_float16(uint8_t a, float_status *status)
3175 {
3176     return uint64_to_float16_scalbn(a, 0, status);
3177 }
3178 
3179 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3180 {
3181     FloatParts64 p;
3182 
3183     parts_uint_to_float(&p, a, scale, status);
3184     return float32_round_pack_canonical(&p, status);
3185 }
3186 
3187 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3188 {
3189     return uint64_to_float32_scalbn(a, scale, status);
3190 }
3191 
3192 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3193 {
3194     return uint64_to_float32_scalbn(a, scale, status);
3195 }
3196 
3197 float32 uint64_to_float32(uint64_t a, float_status *status)
3198 {
3199     return uint64_to_float32_scalbn(a, 0, status);
3200 }
3201 
3202 float32 uint32_to_float32(uint32_t a, float_status *status)
3203 {
3204     return uint64_to_float32_scalbn(a, 0, status);
3205 }
3206 
3207 float32 uint16_to_float32(uint16_t a, float_status *status)
3208 {
3209     return uint64_to_float32_scalbn(a, 0, status);
3210 }
3211 
3212 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3213 {
3214     FloatParts64 p;
3215 
3216     parts_uint_to_float(&p, a, scale, status);
3217     return float64_round_pack_canonical(&p, status);
3218 }
3219 
3220 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3221 {
3222     return uint64_to_float64_scalbn(a, scale, status);
3223 }
3224 
3225 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3226 {
3227     return uint64_to_float64_scalbn(a, scale, status);
3228 }
3229 
3230 float64 uint64_to_float64(uint64_t a, float_status *status)
3231 {
3232     return uint64_to_float64_scalbn(a, 0, status);
3233 }
3234 
3235 float64 uint32_to_float64(uint32_t a, float_status *status)
3236 {
3237     return uint64_to_float64_scalbn(a, 0, status);
3238 }
3239 
3240 float64 uint16_to_float64(uint16_t a, float_status *status)
3241 {
3242     return uint64_to_float64_scalbn(a, 0, status);
3243 }
3244 
3245 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3246 {
3247     FloatParts64 p;
3248 
3249     parts_uint_to_float(&p, a, scale, status);
3250     return bfloat16_round_pack_canonical(&p, status);
3251 }
3252 
3253 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3254 {
3255     return uint64_to_bfloat16_scalbn(a, scale, status);
3256 }
3257 
3258 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3259 {
3260     return uint64_to_bfloat16_scalbn(a, scale, status);
3261 }
3262 
3263 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3264 {
3265     return uint64_to_bfloat16_scalbn(a, 0, status);
3266 }
3267 
3268 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3269 {
3270     return uint64_to_bfloat16_scalbn(a, 0, status);
3271 }
3272 
3273 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3274 {
3275     return uint64_to_bfloat16_scalbn(a, 0, status);
3276 }
3277 
3278 float128 uint64_to_float128(uint64_t a, float_status *status)
3279 {
3280     FloatParts128 p;
3281 
3282     parts_uint_to_float(&p, a, 0, status);
3283     return float128_round_pack_canonical(&p, status);
3284 }
3285 
3286 /*
3287  * Minimum and maximum
3288  */
3289 
3290 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3291 {
3292     FloatParts64 pa, pb, *pr;
3293 
3294     float16_unpack_canonical(&pa, a, s);
3295     float16_unpack_canonical(&pb, b, s);
3296     pr = parts_minmax(&pa, &pb, s, flags);
3297 
3298     return float16_round_pack_canonical(pr, s);
3299 }
3300 
3301 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3302                                 float_status *s, int flags)
3303 {
3304     FloatParts64 pa, pb, *pr;
3305 
3306     bfloat16_unpack_canonical(&pa, a, s);
3307     bfloat16_unpack_canonical(&pb, b, s);
3308     pr = parts_minmax(&pa, &pb, s, flags);
3309 
3310     return bfloat16_round_pack_canonical(pr, s);
3311 }
3312 
3313 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3314 {
3315     FloatParts64 pa, pb, *pr;
3316 
3317     float32_unpack_canonical(&pa, a, s);
3318     float32_unpack_canonical(&pb, b, s);
3319     pr = parts_minmax(&pa, &pb, s, flags);
3320 
3321     return float32_round_pack_canonical(pr, s);
3322 }
3323 
3324 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3325 {
3326     FloatParts64 pa, pb, *pr;
3327 
3328     float64_unpack_canonical(&pa, a, s);
3329     float64_unpack_canonical(&pb, b, s);
3330     pr = parts_minmax(&pa, &pb, s, flags);
3331 
3332     return float64_round_pack_canonical(pr, s);
3333 }
3334 
3335 static float128 float128_minmax(float128 a, float128 b,
3336                                 float_status *s, int flags)
3337 {
3338     FloatParts128 pa, pb, *pr;
3339 
3340     float128_unpack_canonical(&pa, a, s);
3341     float128_unpack_canonical(&pb, b, s);
3342     pr = parts_minmax(&pa, &pb, s, flags);
3343 
3344     return float128_round_pack_canonical(pr, s);
3345 }
3346 
3347 #define MINMAX_1(type, name, flags) \
3348     type type##_##name(type a, type b, float_status *s) \
3349     { return type##_minmax(a, b, s, flags); }
3350 
3351 #define MINMAX_2(type) \
3352     MINMAX_1(type, max, 0)                                      \
3353     MINMAX_1(type, maxnum, minmax_isnum)                        \
3354     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3355     MINMAX_1(type, min, minmax_ismin)                           \
3356     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3357     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3358 
3359 MINMAX_2(float16)
3360 MINMAX_2(bfloat16)
3361 MINMAX_2(float32)
3362 MINMAX_2(float64)
3363 MINMAX_2(float128)
3364 
3365 #undef MINMAX_1
3366 #undef MINMAX_2
3367 
3368 /*
3369  * Floating point compare
3370  */
3371 
3372 static FloatRelation QEMU_FLATTEN
3373 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3374 {
3375     FloatParts64 pa, pb;
3376 
3377     float16_unpack_canonical(&pa, a, s);
3378     float16_unpack_canonical(&pb, b, s);
3379     return parts_compare(&pa, &pb, s, is_quiet);
3380 }
3381 
3382 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3383 {
3384     return float16_do_compare(a, b, s, false);
3385 }
3386 
3387 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3388 {
3389     return float16_do_compare(a, b, s, true);
3390 }
3391 
3392 static FloatRelation QEMU_SOFTFLOAT_ATTR
3393 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3394 {
3395     FloatParts64 pa, pb;
3396 
3397     float32_unpack_canonical(&pa, a, s);
3398     float32_unpack_canonical(&pb, b, s);
3399     return parts_compare(&pa, &pb, s, is_quiet);
3400 }
3401 
3402 static FloatRelation QEMU_FLATTEN
3403 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3404 {
3405     union_float32 ua, ub;
3406 
3407     ua.s = xa;
3408     ub.s = xb;
3409 
3410     if (QEMU_NO_HARDFLOAT) {
3411         goto soft;
3412     }
3413 
3414     float32_input_flush2(&ua.s, &ub.s, s);
3415     if (isgreaterequal(ua.h, ub.h)) {
3416         if (isgreater(ua.h, ub.h)) {
3417             return float_relation_greater;
3418         }
3419         return float_relation_equal;
3420     }
3421     if (likely(isless(ua.h, ub.h))) {
3422         return float_relation_less;
3423     }
3424     /*
3425      * The only condition remaining is unordered.
3426      * Fall through to set flags.
3427      */
3428  soft:
3429     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3430 }
3431 
3432 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3433 {
3434     return float32_hs_compare(a, b, s, false);
3435 }
3436 
3437 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3438 {
3439     return float32_hs_compare(a, b, s, true);
3440 }
3441 
3442 static FloatRelation QEMU_SOFTFLOAT_ATTR
3443 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3444 {
3445     FloatParts64 pa, pb;
3446 
3447     float64_unpack_canonical(&pa, a, s);
3448     float64_unpack_canonical(&pb, b, s);
3449     return parts_compare(&pa, &pb, s, is_quiet);
3450 }
3451 
3452 static FloatRelation QEMU_FLATTEN
3453 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3454 {
3455     union_float64 ua, ub;
3456 
3457     ua.s = xa;
3458     ub.s = xb;
3459 
3460     if (QEMU_NO_HARDFLOAT) {
3461         goto soft;
3462     }
3463 
3464     float64_input_flush2(&ua.s, &ub.s, s);
3465     if (isgreaterequal(ua.h, ub.h)) {
3466         if (isgreater(ua.h, ub.h)) {
3467             return float_relation_greater;
3468         }
3469         return float_relation_equal;
3470     }
3471     if (likely(isless(ua.h, ub.h))) {
3472         return float_relation_less;
3473     }
3474     /*
3475      * The only condition remaining is unordered.
3476      * Fall through to set flags.
3477      */
3478  soft:
3479     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3480 }
3481 
3482 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3483 {
3484     return float64_hs_compare(a, b, s, false);
3485 }
3486 
3487 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3488 {
3489     return float64_hs_compare(a, b, s, true);
3490 }
3491 
3492 static FloatRelation QEMU_FLATTEN
3493 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3494 {
3495     FloatParts64 pa, pb;
3496 
3497     bfloat16_unpack_canonical(&pa, a, s);
3498     bfloat16_unpack_canonical(&pb, b, s);
3499     return parts_compare(&pa, &pb, s, is_quiet);
3500 }
3501 
3502 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3503 {
3504     return bfloat16_do_compare(a, b, s, false);
3505 }
3506 
3507 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3508 {
3509     return bfloat16_do_compare(a, b, s, true);
3510 }
3511 
3512 static FloatRelation QEMU_FLATTEN
3513 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3514 {
3515     FloatParts128 pa, pb;
3516 
3517     float128_unpack_canonical(&pa, a, s);
3518     float128_unpack_canonical(&pb, b, s);
3519     return parts_compare(&pa, &pb, s, is_quiet);
3520 }
3521 
3522 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3523 {
3524     return float128_do_compare(a, b, s, false);
3525 }
3526 
3527 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3528 {
3529     return float128_do_compare(a, b, s, true);
3530 }
3531 
3532 /* Multiply A by 2 raised to the power N.  */
3533 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3534 {
3535     if (unlikely(is_nan(a.cls))) {
3536         parts_return_nan(&a, s);
3537     }
3538     if (a.cls == float_class_normal) {
3539         /* The largest float type (even though not supported by FloatParts64)
3540          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3541          * still allows rounding to infinity, without allowing overflow
3542          * within the int32_t that backs FloatParts64.exp.
3543          */
3544         n = MIN(MAX(n, -0x10000), 0x10000);
3545         a.exp += n;
3546     }
3547     return a;
3548 }
3549 
3550 float16 float16_scalbn(float16 a, int n, float_status *status)
3551 {
3552     FloatParts64 pa, pr;
3553 
3554     float16_unpack_canonical(&pa, a, status);
3555     pr = scalbn_decomposed(pa, n, status);
3556     return float16_round_pack_canonical(&pr, status);
3557 }
3558 
3559 float32 float32_scalbn(float32 a, int n, float_status *status)
3560 {
3561     FloatParts64 pa, pr;
3562 
3563     float32_unpack_canonical(&pa, a, status);
3564     pr = scalbn_decomposed(pa, n, status);
3565     return float32_round_pack_canonical(&pr, status);
3566 }
3567 
3568 float64 float64_scalbn(float64 a, int n, float_status *status)
3569 {
3570     FloatParts64 pa, pr;
3571 
3572     float64_unpack_canonical(&pa, a, status);
3573     pr = scalbn_decomposed(pa, n, status);
3574     return float64_round_pack_canonical(&pr, status);
3575 }
3576 
3577 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3578 {
3579     FloatParts64 pa, pr;
3580 
3581     bfloat16_unpack_canonical(&pa, a, status);
3582     pr = scalbn_decomposed(pa, n, status);
3583     return bfloat16_round_pack_canonical(&pr, status);
3584 }
3585 
3586 /*
3587  * Square Root
3588  *
3589  * The old softfloat code did an approximation step before zeroing in
3590  * on the final result. However for simpleness we just compute the
3591  * square root by iterating down from the implicit bit to enough extra
3592  * bits to ensure we get a correctly rounded result.
3593  *
3594  * This does mean however the calculation is slower than before,
3595  * especially for 64 bit floats.
3596  */
3597 
3598 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3599 {
3600     uint64_t a_frac, r_frac, s_frac;
3601     int bit, last_bit;
3602 
3603     if (is_nan(a.cls)) {
3604         parts_return_nan(&a, s);
3605         return a;
3606     }
3607     if (a.cls == float_class_zero) {
3608         return a;  /* sqrt(+-0) = +-0 */
3609     }
3610     if (a.sign) {
3611         float_raise(float_flag_invalid, s);
3612         parts_default_nan(&a, s);
3613         return a;
3614     }
3615     if (a.cls == float_class_inf) {
3616         return a;  /* sqrt(+inf) = +inf */
3617     }
3618 
3619     assert(a.cls == float_class_normal);
3620 
3621     /* We need two overflow bits at the top. Adding room for that is a
3622      * right shift. If the exponent is odd, we can discard the low bit
3623      * by multiplying the fraction by 2; that's a left shift. Combine
3624      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3625      */
3626     a_frac = a.frac >> (2 - (a.exp & 1));
3627     a.exp >>= 1;
3628 
3629     /* Bit-by-bit computation of sqrt.  */
3630     r_frac = 0;
3631     s_frac = 0;
3632 
3633     /* Iterate from implicit bit down to the 3 extra bits to compute a
3634      * properly rounded result. Remember we've inserted two more bits
3635      * at the top, so these positions are two less.
3636      */
3637     bit = DECOMPOSED_BINARY_POINT - 2;
3638     last_bit = MAX(p->frac_shift - 4, 0);
3639     do {
3640         uint64_t q = 1ULL << bit;
3641         uint64_t t_frac = s_frac + q;
3642         if (t_frac <= a_frac) {
3643             s_frac = t_frac + q;
3644             a_frac -= t_frac;
3645             r_frac += q;
3646         }
3647         a_frac <<= 1;
3648     } while (--bit >= last_bit);
3649 
3650     /* Undo the right shift done above. If there is any remaining
3651      * fraction, the result is inexact. Set the sticky bit.
3652      */
3653     a.frac = (r_frac << 2) + (a_frac != 0);
3654 
3655     return a;
3656 }
3657 
3658 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3659 {
3660     FloatParts64 pa, pr;
3661 
3662     float16_unpack_canonical(&pa, a, status);
3663     pr = sqrt_float(pa, status, &float16_params);
3664     return float16_round_pack_canonical(&pr, status);
3665 }
3666 
3667 static float32 QEMU_SOFTFLOAT_ATTR
3668 soft_f32_sqrt(float32 a, float_status *status)
3669 {
3670     FloatParts64 pa, pr;
3671 
3672     float32_unpack_canonical(&pa, a, status);
3673     pr = sqrt_float(pa, status, &float32_params);
3674     return float32_round_pack_canonical(&pr, status);
3675 }
3676 
3677 static float64 QEMU_SOFTFLOAT_ATTR
3678 soft_f64_sqrt(float64 a, float_status *status)
3679 {
3680     FloatParts64 pa, pr;
3681 
3682     float64_unpack_canonical(&pa, a, status);
3683     pr = sqrt_float(pa, status, &float64_params);
3684     return float64_round_pack_canonical(&pr, status);
3685 }
3686 
3687 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3688 {
3689     union_float32 ua, ur;
3690 
3691     ua.s = xa;
3692     if (unlikely(!can_use_fpu(s))) {
3693         goto soft;
3694     }
3695 
3696     float32_input_flush1(&ua.s, s);
3697     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3698         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3699                        fpclassify(ua.h) == FP_ZERO) ||
3700                      signbit(ua.h))) {
3701             goto soft;
3702         }
3703     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3704                         float32_is_neg(ua.s))) {
3705         goto soft;
3706     }
3707     ur.h = sqrtf(ua.h);
3708     return ur.s;
3709 
3710  soft:
3711     return soft_f32_sqrt(ua.s, s);
3712 }
3713 
3714 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3715 {
3716     union_float64 ua, ur;
3717 
3718     ua.s = xa;
3719     if (unlikely(!can_use_fpu(s))) {
3720         goto soft;
3721     }
3722 
3723     float64_input_flush1(&ua.s, s);
3724     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3725         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3726                        fpclassify(ua.h) == FP_ZERO) ||
3727                      signbit(ua.h))) {
3728             goto soft;
3729         }
3730     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3731                         float64_is_neg(ua.s))) {
3732         goto soft;
3733     }
3734     ur.h = sqrt(ua.h);
3735     return ur.s;
3736 
3737  soft:
3738     return soft_f64_sqrt(ua.s, s);
3739 }
3740 
3741 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3742 {
3743     FloatParts64 pa, pr;
3744 
3745     bfloat16_unpack_canonical(&pa, a, status);
3746     pr = sqrt_float(pa, status, &bfloat16_params);
3747     return bfloat16_round_pack_canonical(&pr, status);
3748 }
3749 
3750 /*----------------------------------------------------------------------------
3751 | The pattern for a default generated NaN.
3752 *----------------------------------------------------------------------------*/
3753 
3754 float16 float16_default_nan(float_status *status)
3755 {
3756     FloatParts64 p;
3757 
3758     parts_default_nan(&p, status);
3759     p.frac >>= float16_params.frac_shift;
3760     return float16_pack_raw(&p);
3761 }
3762 
3763 float32 float32_default_nan(float_status *status)
3764 {
3765     FloatParts64 p;
3766 
3767     parts_default_nan(&p, status);
3768     p.frac >>= float32_params.frac_shift;
3769     return float32_pack_raw(&p);
3770 }
3771 
3772 float64 float64_default_nan(float_status *status)
3773 {
3774     FloatParts64 p;
3775 
3776     parts_default_nan(&p, status);
3777     p.frac >>= float64_params.frac_shift;
3778     return float64_pack_raw(&p);
3779 }
3780 
3781 float128 float128_default_nan(float_status *status)
3782 {
3783     FloatParts128 p;
3784 
3785     parts_default_nan(&p, status);
3786     frac_shr(&p, float128_params.frac_shift);
3787     return float128_pack_raw(&p);
3788 }
3789 
3790 bfloat16 bfloat16_default_nan(float_status *status)
3791 {
3792     FloatParts64 p;
3793 
3794     parts_default_nan(&p, status);
3795     p.frac >>= bfloat16_params.frac_shift;
3796     return bfloat16_pack_raw(&p);
3797 }
3798 
3799 /*----------------------------------------------------------------------------
3800 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3801 *----------------------------------------------------------------------------*/
3802 
3803 float16 float16_silence_nan(float16 a, float_status *status)
3804 {
3805     FloatParts64 p;
3806 
3807     float16_unpack_raw(&p, a);
3808     p.frac <<= float16_params.frac_shift;
3809     parts_silence_nan(&p, status);
3810     p.frac >>= float16_params.frac_shift;
3811     return float16_pack_raw(&p);
3812 }
3813 
3814 float32 float32_silence_nan(float32 a, float_status *status)
3815 {
3816     FloatParts64 p;
3817 
3818     float32_unpack_raw(&p, a);
3819     p.frac <<= float32_params.frac_shift;
3820     parts_silence_nan(&p, status);
3821     p.frac >>= float32_params.frac_shift;
3822     return float32_pack_raw(&p);
3823 }
3824 
3825 float64 float64_silence_nan(float64 a, float_status *status)
3826 {
3827     FloatParts64 p;
3828 
3829     float64_unpack_raw(&p, a);
3830     p.frac <<= float64_params.frac_shift;
3831     parts_silence_nan(&p, status);
3832     p.frac >>= float64_params.frac_shift;
3833     return float64_pack_raw(&p);
3834 }
3835 
3836 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3837 {
3838     FloatParts64 p;
3839 
3840     bfloat16_unpack_raw(&p, a);
3841     p.frac <<= bfloat16_params.frac_shift;
3842     parts_silence_nan(&p, status);
3843     p.frac >>= bfloat16_params.frac_shift;
3844     return bfloat16_pack_raw(&p);
3845 }
3846 
3847 float128 float128_silence_nan(float128 a, float_status *status)
3848 {
3849     FloatParts128 p;
3850 
3851     float128_unpack_raw(&p, a);
3852     frac_shl(&p, float128_params.frac_shift);
3853     parts_silence_nan(&p, status);
3854     frac_shr(&p, float128_params.frac_shift);
3855     return float128_pack_raw(&p);
3856 }
3857 
3858 /*----------------------------------------------------------------------------
3859 | If `a' is denormal and we are in flush-to-zero mode then set the
3860 | input-denormal exception and return zero. Otherwise just return the value.
3861 *----------------------------------------------------------------------------*/
3862 
3863 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3864 {
3865     if (p.exp == 0 && p.frac != 0) {
3866         float_raise(float_flag_input_denormal, status);
3867         return true;
3868     }
3869 
3870     return false;
3871 }
3872 
3873 float16 float16_squash_input_denormal(float16 a, float_status *status)
3874 {
3875     if (status->flush_inputs_to_zero) {
3876         FloatParts64 p;
3877 
3878         float16_unpack_raw(&p, a);
3879         if (parts_squash_denormal(p, status)) {
3880             return float16_set_sign(float16_zero, p.sign);
3881         }
3882     }
3883     return a;
3884 }
3885 
3886 float32 float32_squash_input_denormal(float32 a, float_status *status)
3887 {
3888     if (status->flush_inputs_to_zero) {
3889         FloatParts64 p;
3890 
3891         float32_unpack_raw(&p, a);
3892         if (parts_squash_denormal(p, status)) {
3893             return float32_set_sign(float32_zero, p.sign);
3894         }
3895     }
3896     return a;
3897 }
3898 
3899 float64 float64_squash_input_denormal(float64 a, float_status *status)
3900 {
3901     if (status->flush_inputs_to_zero) {
3902         FloatParts64 p;
3903 
3904         float64_unpack_raw(&p, a);
3905         if (parts_squash_denormal(p, status)) {
3906             return float64_set_sign(float64_zero, p.sign);
3907         }
3908     }
3909     return a;
3910 }
3911 
3912 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3913 {
3914     if (status->flush_inputs_to_zero) {
3915         FloatParts64 p;
3916 
3917         bfloat16_unpack_raw(&p, a);
3918         if (parts_squash_denormal(p, status)) {
3919             return bfloat16_set_sign(bfloat16_zero, p.sign);
3920         }
3921     }
3922     return a;
3923 }
3924 
3925 /*----------------------------------------------------------------------------
3926 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3927 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3928 | input.  If `zSign' is 1, the input is negated before being converted to an
3929 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3930 | is simply rounded to an integer, with the inexact exception raised if the
3931 | input cannot be represented exactly as an integer.  However, if the fixed-
3932 | point input is too large, the invalid exception is raised and the largest
3933 | positive or negative integer is returned.
3934 *----------------------------------------------------------------------------*/
3935 
3936 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3937                                  float_status *status)
3938 {
3939     int8_t roundingMode;
3940     bool roundNearestEven;
3941     int8_t roundIncrement, roundBits;
3942     int32_t z;
3943 
3944     roundingMode = status->float_rounding_mode;
3945     roundNearestEven = ( roundingMode == float_round_nearest_even );
3946     switch (roundingMode) {
3947     case float_round_nearest_even:
3948     case float_round_ties_away:
3949         roundIncrement = 0x40;
3950         break;
3951     case float_round_to_zero:
3952         roundIncrement = 0;
3953         break;
3954     case float_round_up:
3955         roundIncrement = zSign ? 0 : 0x7f;
3956         break;
3957     case float_round_down:
3958         roundIncrement = zSign ? 0x7f : 0;
3959         break;
3960     case float_round_to_odd:
3961         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3962         break;
3963     default:
3964         abort();
3965     }
3966     roundBits = absZ & 0x7F;
3967     absZ = ( absZ + roundIncrement )>>7;
3968     if (!(roundBits ^ 0x40) && roundNearestEven) {
3969         absZ &= ~1;
3970     }
3971     z = absZ;
3972     if ( zSign ) z = - z;
3973     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3974         float_raise(float_flag_invalid, status);
3975         return zSign ? INT32_MIN : INT32_MAX;
3976     }
3977     if (roundBits) {
3978         float_raise(float_flag_inexact, status);
3979     }
3980     return z;
3981 
3982 }
3983 
3984 /*----------------------------------------------------------------------------
3985 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3986 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3987 | and returns the properly rounded 64-bit integer corresponding to the input.
3988 | If `zSign' is 1, the input is negated before being converted to an integer.
3989 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3990 | the inexact exception raised if the input cannot be represented exactly as
3991 | an integer.  However, if the fixed-point input is too large, the invalid
3992 | exception is raised and the largest positive or negative integer is
3993 | returned.
3994 *----------------------------------------------------------------------------*/
3995 
3996 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3997                                float_status *status)
3998 {
3999     int8_t roundingMode;
4000     bool roundNearestEven, increment;
4001     int64_t z;
4002 
4003     roundingMode = status->float_rounding_mode;
4004     roundNearestEven = ( roundingMode == float_round_nearest_even );
4005     switch (roundingMode) {
4006     case float_round_nearest_even:
4007     case float_round_ties_away:
4008         increment = ((int64_t) absZ1 < 0);
4009         break;
4010     case float_round_to_zero:
4011         increment = 0;
4012         break;
4013     case float_round_up:
4014         increment = !zSign && absZ1;
4015         break;
4016     case float_round_down:
4017         increment = zSign && absZ1;
4018         break;
4019     case float_round_to_odd:
4020         increment = !(absZ0 & 1) && absZ1;
4021         break;
4022     default:
4023         abort();
4024     }
4025     if ( increment ) {
4026         ++absZ0;
4027         if ( absZ0 == 0 ) goto overflow;
4028         if (!(absZ1 << 1) && roundNearestEven) {
4029             absZ0 &= ~1;
4030         }
4031     }
4032     z = absZ0;
4033     if ( zSign ) z = - z;
4034     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4035  overflow:
4036         float_raise(float_flag_invalid, status);
4037         return zSign ? INT64_MIN : INT64_MAX;
4038     }
4039     if (absZ1) {
4040         float_raise(float_flag_inexact, status);
4041     }
4042     return z;
4043 
4044 }
4045 
4046 /*----------------------------------------------------------------------------
4047 | Normalizes the subnormal single-precision floating-point value represented
4048 | by the denormalized significand `aSig'.  The normalized exponent and
4049 | significand are stored at the locations pointed to by `zExpPtr' and
4050 | `zSigPtr', respectively.
4051 *----------------------------------------------------------------------------*/
4052 
4053 static void
4054  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4055 {
4056     int8_t shiftCount;
4057 
4058     shiftCount = clz32(aSig) - 8;
4059     *zSigPtr = aSig<<shiftCount;
4060     *zExpPtr = 1 - shiftCount;
4061 
4062 }
4063 
4064 /*----------------------------------------------------------------------------
4065 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4066 | and significand `zSig', and returns the proper single-precision floating-
4067 | point value corresponding to the abstract input.  Ordinarily, the abstract
4068 | value is simply rounded and packed into the single-precision format, with
4069 | the inexact exception raised if the abstract input cannot be represented
4070 | exactly.  However, if the abstract value is too large, the overflow and
4071 | inexact exceptions are raised and an infinity or maximal finite value is
4072 | returned.  If the abstract value is too small, the input value is rounded to
4073 | a subnormal number, and the underflow and inexact exceptions are raised if
4074 | the abstract input cannot be represented exactly as a subnormal single-
4075 | precision floating-point number.
4076 |     The input significand `zSig' has its binary point between bits 30
4077 | and 29, which is 7 bits to the left of the usual location.  This shifted
4078 | significand must be normalized or smaller.  If `zSig' is not normalized,
4079 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4080 | and it must not require rounding.  In the usual case that `zSig' is
4081 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4082 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4083 | Binary Floating-Point Arithmetic.
4084 *----------------------------------------------------------------------------*/
4085 
4086 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4087                                    float_status *status)
4088 {
4089     int8_t roundingMode;
4090     bool roundNearestEven;
4091     int8_t roundIncrement, roundBits;
4092     bool isTiny;
4093 
4094     roundingMode = status->float_rounding_mode;
4095     roundNearestEven = ( roundingMode == float_round_nearest_even );
4096     switch (roundingMode) {
4097     case float_round_nearest_even:
4098     case float_round_ties_away:
4099         roundIncrement = 0x40;
4100         break;
4101     case float_round_to_zero:
4102         roundIncrement = 0;
4103         break;
4104     case float_round_up:
4105         roundIncrement = zSign ? 0 : 0x7f;
4106         break;
4107     case float_round_down:
4108         roundIncrement = zSign ? 0x7f : 0;
4109         break;
4110     case float_round_to_odd:
4111         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4112         break;
4113     default:
4114         abort();
4115         break;
4116     }
4117     roundBits = zSig & 0x7F;
4118     if ( 0xFD <= (uint16_t) zExp ) {
4119         if (    ( 0xFD < zExp )
4120              || (    ( zExp == 0xFD )
4121                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4122            ) {
4123             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4124                                    roundIncrement != 0;
4125             float_raise(float_flag_overflow | float_flag_inexact, status);
4126             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4127         }
4128         if ( zExp < 0 ) {
4129             if (status->flush_to_zero) {
4130                 float_raise(float_flag_output_denormal, status);
4131                 return packFloat32(zSign, 0, 0);
4132             }
4133             isTiny = status->tininess_before_rounding
4134                   || (zExp < -1)
4135                   || (zSig + roundIncrement < 0x80000000);
4136             shift32RightJamming( zSig, - zExp, &zSig );
4137             zExp = 0;
4138             roundBits = zSig & 0x7F;
4139             if (isTiny && roundBits) {
4140                 float_raise(float_flag_underflow, status);
4141             }
4142             if (roundingMode == float_round_to_odd) {
4143                 /*
4144                  * For round-to-odd case, the roundIncrement depends on
4145                  * zSig which just changed.
4146                  */
4147                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4148             }
4149         }
4150     }
4151     if (roundBits) {
4152         float_raise(float_flag_inexact, status);
4153     }
4154     zSig = ( zSig + roundIncrement )>>7;
4155     if (!(roundBits ^ 0x40) && roundNearestEven) {
4156         zSig &= ~1;
4157     }
4158     if ( zSig == 0 ) zExp = 0;
4159     return packFloat32( zSign, zExp, zSig );
4160 
4161 }
4162 
4163 /*----------------------------------------------------------------------------
4164 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4165 | and significand `zSig', and returns the proper single-precision floating-
4166 | point value corresponding to the abstract input.  This routine is just like
4167 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4168 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4169 | floating-point exponent.
4170 *----------------------------------------------------------------------------*/
4171 
4172 static float32
4173  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4174                               float_status *status)
4175 {
4176     int8_t shiftCount;
4177 
4178     shiftCount = clz32(zSig) - 1;
4179     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4180                                status);
4181 
4182 }
4183 
4184 /*----------------------------------------------------------------------------
4185 | Normalizes the subnormal double-precision floating-point value represented
4186 | by the denormalized significand `aSig'.  The normalized exponent and
4187 | significand are stored at the locations pointed to by `zExpPtr' and
4188 | `zSigPtr', respectively.
4189 *----------------------------------------------------------------------------*/
4190 
4191 static void
4192  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4193 {
4194     int8_t shiftCount;
4195 
4196     shiftCount = clz64(aSig) - 11;
4197     *zSigPtr = aSig<<shiftCount;
4198     *zExpPtr = 1 - shiftCount;
4199 
4200 }
4201 
4202 /*----------------------------------------------------------------------------
4203 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4204 | double-precision floating-point value, returning the result.  After being
4205 | shifted into the proper positions, the three fields are simply added
4206 | together to form the result.  This means that any integer portion of `zSig'
4207 | will be added into the exponent.  Since a properly normalized significand
4208 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4209 | than the desired result exponent whenever `zSig' is a complete, normalized
4210 | significand.
4211 *----------------------------------------------------------------------------*/
4212 
4213 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4214 {
4215 
4216     return make_float64(
4217         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4218 
4219 }
4220 
4221 /*----------------------------------------------------------------------------
4222 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4223 | and significand `zSig', and returns the proper double-precision floating-
4224 | point value corresponding to the abstract input.  Ordinarily, the abstract
4225 | value is simply rounded and packed into the double-precision format, with
4226 | the inexact exception raised if the abstract input cannot be represented
4227 | exactly.  However, if the abstract value is too large, the overflow and
4228 | inexact exceptions are raised and an infinity or maximal finite value is
4229 | returned.  If the abstract value is too small, the input value is rounded to
4230 | a subnormal number, and the underflow and inexact exceptions are raised if
4231 | the abstract input cannot be represented exactly as a subnormal double-
4232 | precision floating-point number.
4233 |     The input significand `zSig' has its binary point between bits 62
4234 | and 61, which is 10 bits to the left of the usual location.  This shifted
4235 | significand must be normalized or smaller.  If `zSig' is not normalized,
4236 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4237 | and it must not require rounding.  In the usual case that `zSig' is
4238 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4239 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4240 | Binary Floating-Point Arithmetic.
4241 *----------------------------------------------------------------------------*/
4242 
4243 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4244                                    float_status *status)
4245 {
4246     int8_t roundingMode;
4247     bool roundNearestEven;
4248     int roundIncrement, roundBits;
4249     bool isTiny;
4250 
4251     roundingMode = status->float_rounding_mode;
4252     roundNearestEven = ( roundingMode == float_round_nearest_even );
4253     switch (roundingMode) {
4254     case float_round_nearest_even:
4255     case float_round_ties_away:
4256         roundIncrement = 0x200;
4257         break;
4258     case float_round_to_zero:
4259         roundIncrement = 0;
4260         break;
4261     case float_round_up:
4262         roundIncrement = zSign ? 0 : 0x3ff;
4263         break;
4264     case float_round_down:
4265         roundIncrement = zSign ? 0x3ff : 0;
4266         break;
4267     case float_round_to_odd:
4268         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4269         break;
4270     default:
4271         abort();
4272     }
4273     roundBits = zSig & 0x3FF;
4274     if ( 0x7FD <= (uint16_t) zExp ) {
4275         if (    ( 0x7FD < zExp )
4276              || (    ( zExp == 0x7FD )
4277                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4278            ) {
4279             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4280                                    roundIncrement != 0;
4281             float_raise(float_flag_overflow | float_flag_inexact, status);
4282             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4283         }
4284         if ( zExp < 0 ) {
4285             if (status->flush_to_zero) {
4286                 float_raise(float_flag_output_denormal, status);
4287                 return packFloat64(zSign, 0, 0);
4288             }
4289             isTiny = status->tininess_before_rounding
4290                   || (zExp < -1)
4291                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4292             shift64RightJamming( zSig, - zExp, &zSig );
4293             zExp = 0;
4294             roundBits = zSig & 0x3FF;
4295             if (isTiny && roundBits) {
4296                 float_raise(float_flag_underflow, status);
4297             }
4298             if (roundingMode == float_round_to_odd) {
4299                 /*
4300                  * For round-to-odd case, the roundIncrement depends on
4301                  * zSig which just changed.
4302                  */
4303                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4304             }
4305         }
4306     }
4307     if (roundBits) {
4308         float_raise(float_flag_inexact, status);
4309     }
4310     zSig = ( zSig + roundIncrement )>>10;
4311     if (!(roundBits ^ 0x200) && roundNearestEven) {
4312         zSig &= ~1;
4313     }
4314     if ( zSig == 0 ) zExp = 0;
4315     return packFloat64( zSign, zExp, zSig );
4316 
4317 }
4318 
4319 /*----------------------------------------------------------------------------
4320 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4321 | and significand `zSig', and returns the proper double-precision floating-
4322 | point value corresponding to the abstract input.  This routine is just like
4323 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4324 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4325 | floating-point exponent.
4326 *----------------------------------------------------------------------------*/
4327 
4328 static float64
4329  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4330                               float_status *status)
4331 {
4332     int8_t shiftCount;
4333 
4334     shiftCount = clz64(zSig) - 1;
4335     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4336                                status);
4337 
4338 }
4339 
4340 /*----------------------------------------------------------------------------
4341 | Normalizes the subnormal extended double-precision floating-point value
4342 | represented by the denormalized significand `aSig'.  The normalized exponent
4343 | and significand are stored at the locations pointed to by `zExpPtr' and
4344 | `zSigPtr', respectively.
4345 *----------------------------------------------------------------------------*/
4346 
4347 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4348                                 uint64_t *zSigPtr)
4349 {
4350     int8_t shiftCount;
4351 
4352     shiftCount = clz64(aSig);
4353     *zSigPtr = aSig<<shiftCount;
4354     *zExpPtr = 1 - shiftCount;
4355 }
4356 
4357 /*----------------------------------------------------------------------------
4358 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4359 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4360 | and returns the proper extended double-precision floating-point value
4361 | corresponding to the abstract input.  Ordinarily, the abstract value is
4362 | rounded and packed into the extended double-precision format, with the
4363 | inexact exception raised if the abstract input cannot be represented
4364 | exactly.  However, if the abstract value is too large, the overflow and
4365 | inexact exceptions are raised and an infinity or maximal finite value is
4366 | returned.  If the abstract value is too small, the input value is rounded to
4367 | a subnormal number, and the underflow and inexact exceptions are raised if
4368 | the abstract input cannot be represented exactly as a subnormal extended
4369 | double-precision floating-point number.
4370 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4371 | number of bits as single or double precision, respectively.  Otherwise, the
4372 | result is rounded to the full precision of the extended double-precision
4373 | format.
4374 |     The input significand must be normalized or smaller.  If the input
4375 | significand is not normalized, `zExp' must be 0; in that case, the result
4376 | returned is a subnormal number, and it must not require rounding.  The
4377 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4378 | Floating-Point Arithmetic.
4379 *----------------------------------------------------------------------------*/
4380 
4381 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4382                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4383                               float_status *status)
4384 {
4385     int8_t roundingMode;
4386     bool roundNearestEven, increment, isTiny;
4387     int64_t roundIncrement, roundMask, roundBits;
4388 
4389     roundingMode = status->float_rounding_mode;
4390     roundNearestEven = ( roundingMode == float_round_nearest_even );
4391     if ( roundingPrecision == 80 ) goto precision80;
4392     if ( roundingPrecision == 64 ) {
4393         roundIncrement = UINT64_C(0x0000000000000400);
4394         roundMask = UINT64_C(0x00000000000007FF);
4395     }
4396     else if ( roundingPrecision == 32 ) {
4397         roundIncrement = UINT64_C(0x0000008000000000);
4398         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4399     }
4400     else {
4401         goto precision80;
4402     }
4403     zSig0 |= ( zSig1 != 0 );
4404     switch (roundingMode) {
4405     case float_round_nearest_even:
4406     case float_round_ties_away:
4407         break;
4408     case float_round_to_zero:
4409         roundIncrement = 0;
4410         break;
4411     case float_round_up:
4412         roundIncrement = zSign ? 0 : roundMask;
4413         break;
4414     case float_round_down:
4415         roundIncrement = zSign ? roundMask : 0;
4416         break;
4417     default:
4418         abort();
4419     }
4420     roundBits = zSig0 & roundMask;
4421     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4422         if (    ( 0x7FFE < zExp )
4423              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4424            ) {
4425             goto overflow;
4426         }
4427         if ( zExp <= 0 ) {
4428             if (status->flush_to_zero) {
4429                 float_raise(float_flag_output_denormal, status);
4430                 return packFloatx80(zSign, 0, 0);
4431             }
4432             isTiny = status->tininess_before_rounding
4433                   || (zExp < 0 )
4434                   || (zSig0 <= zSig0 + roundIncrement);
4435             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4436             zExp = 0;
4437             roundBits = zSig0 & roundMask;
4438             if (isTiny && roundBits) {
4439                 float_raise(float_flag_underflow, status);
4440             }
4441             if (roundBits) {
4442                 float_raise(float_flag_inexact, status);
4443             }
4444             zSig0 += roundIncrement;
4445             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4446             roundIncrement = roundMask + 1;
4447             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4448                 roundMask |= roundIncrement;
4449             }
4450             zSig0 &= ~ roundMask;
4451             return packFloatx80( zSign, zExp, zSig0 );
4452         }
4453     }
4454     if (roundBits) {
4455         float_raise(float_flag_inexact, status);
4456     }
4457     zSig0 += roundIncrement;
4458     if ( zSig0 < roundIncrement ) {
4459         ++zExp;
4460         zSig0 = UINT64_C(0x8000000000000000);
4461     }
4462     roundIncrement = roundMask + 1;
4463     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4464         roundMask |= roundIncrement;
4465     }
4466     zSig0 &= ~ roundMask;
4467     if ( zSig0 == 0 ) zExp = 0;
4468     return packFloatx80( zSign, zExp, zSig0 );
4469  precision80:
4470     switch (roundingMode) {
4471     case float_round_nearest_even:
4472     case float_round_ties_away:
4473         increment = ((int64_t)zSig1 < 0);
4474         break;
4475     case float_round_to_zero:
4476         increment = 0;
4477         break;
4478     case float_round_up:
4479         increment = !zSign && zSig1;
4480         break;
4481     case float_round_down:
4482         increment = zSign && zSig1;
4483         break;
4484     default:
4485         abort();
4486     }
4487     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4488         if (    ( 0x7FFE < zExp )
4489              || (    ( zExp == 0x7FFE )
4490                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4491                   && increment
4492                 )
4493            ) {
4494             roundMask = 0;
4495  overflow:
4496             float_raise(float_flag_overflow | float_flag_inexact, status);
4497             if (    ( roundingMode == float_round_to_zero )
4498                  || ( zSign && ( roundingMode == float_round_up ) )
4499                  || ( ! zSign && ( roundingMode == float_round_down ) )
4500                ) {
4501                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4502             }
4503             return packFloatx80(zSign,
4504                                 floatx80_infinity_high,
4505                                 floatx80_infinity_low);
4506         }
4507         if ( zExp <= 0 ) {
4508             isTiny = status->tininess_before_rounding
4509                   || (zExp < 0)
4510                   || !increment
4511                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4512             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4513             zExp = 0;
4514             if (isTiny && zSig1) {
4515                 float_raise(float_flag_underflow, status);
4516             }
4517             if (zSig1) {
4518                 float_raise(float_flag_inexact, status);
4519             }
4520             switch (roundingMode) {
4521             case float_round_nearest_even:
4522             case float_round_ties_away:
4523                 increment = ((int64_t)zSig1 < 0);
4524                 break;
4525             case float_round_to_zero:
4526                 increment = 0;
4527                 break;
4528             case float_round_up:
4529                 increment = !zSign && zSig1;
4530                 break;
4531             case float_round_down:
4532                 increment = zSign && zSig1;
4533                 break;
4534             default:
4535                 abort();
4536             }
4537             if ( increment ) {
4538                 ++zSig0;
4539                 if (!(zSig1 << 1) && roundNearestEven) {
4540                     zSig0 &= ~1;
4541                 }
4542                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4543             }
4544             return packFloatx80( zSign, zExp, zSig0 );
4545         }
4546     }
4547     if (zSig1) {
4548         float_raise(float_flag_inexact, status);
4549     }
4550     if ( increment ) {
4551         ++zSig0;
4552         if ( zSig0 == 0 ) {
4553             ++zExp;
4554             zSig0 = UINT64_C(0x8000000000000000);
4555         }
4556         else {
4557             if (!(zSig1 << 1) && roundNearestEven) {
4558                 zSig0 &= ~1;
4559             }
4560         }
4561     }
4562     else {
4563         if ( zSig0 == 0 ) zExp = 0;
4564     }
4565     return packFloatx80( zSign, zExp, zSig0 );
4566 
4567 }
4568 
4569 /*----------------------------------------------------------------------------
4570 | Takes an abstract floating-point value having sign `zSign', exponent
4571 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4572 | and returns the proper extended double-precision floating-point value
4573 | corresponding to the abstract input.  This routine is just like
4574 | `roundAndPackFloatx80' except that the input significand does not have to be
4575 | normalized.
4576 *----------------------------------------------------------------------------*/
4577 
4578 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4579                                        bool zSign, int32_t zExp,
4580                                        uint64_t zSig0, uint64_t zSig1,
4581                                        float_status *status)
4582 {
4583     int8_t shiftCount;
4584 
4585     if ( zSig0 == 0 ) {
4586         zSig0 = zSig1;
4587         zSig1 = 0;
4588         zExp -= 64;
4589     }
4590     shiftCount = clz64(zSig0);
4591     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4592     zExp -= shiftCount;
4593     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4594                                 zSig0, zSig1, status);
4595 
4596 }
4597 
4598 /*----------------------------------------------------------------------------
4599 | Returns the least-significant 64 fraction bits of the quadruple-precision
4600 | floating-point value `a'.
4601 *----------------------------------------------------------------------------*/
4602 
4603 static inline uint64_t extractFloat128Frac1( float128 a )
4604 {
4605 
4606     return a.low;
4607 
4608 }
4609 
4610 /*----------------------------------------------------------------------------
4611 | Returns the most-significant 48 fraction bits of the quadruple-precision
4612 | floating-point value `a'.
4613 *----------------------------------------------------------------------------*/
4614 
4615 static inline uint64_t extractFloat128Frac0( float128 a )
4616 {
4617 
4618     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4619 
4620 }
4621 
4622 /*----------------------------------------------------------------------------
4623 | Returns the exponent bits of the quadruple-precision floating-point value
4624 | `a'.
4625 *----------------------------------------------------------------------------*/
4626 
4627 static inline int32_t extractFloat128Exp( float128 a )
4628 {
4629 
4630     return ( a.high>>48 ) & 0x7FFF;
4631 
4632 }
4633 
4634 /*----------------------------------------------------------------------------
4635 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4636 *----------------------------------------------------------------------------*/
4637 
4638 static inline bool extractFloat128Sign(float128 a)
4639 {
4640     return a.high >> 63;
4641 }
4642 
4643 /*----------------------------------------------------------------------------
4644 | Normalizes the subnormal quadruple-precision floating-point value
4645 | represented by the denormalized significand formed by the concatenation of
4646 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4647 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4648 | significand are stored at the location pointed to by `zSig0Ptr', and the
4649 | least significant 64 bits of the normalized significand are stored at the
4650 | location pointed to by `zSig1Ptr'.
4651 *----------------------------------------------------------------------------*/
4652 
4653 static void
4654  normalizeFloat128Subnormal(
4655      uint64_t aSig0,
4656      uint64_t aSig1,
4657      int32_t *zExpPtr,
4658      uint64_t *zSig0Ptr,
4659      uint64_t *zSig1Ptr
4660  )
4661 {
4662     int8_t shiftCount;
4663 
4664     if ( aSig0 == 0 ) {
4665         shiftCount = clz64(aSig1) - 15;
4666         if ( shiftCount < 0 ) {
4667             *zSig0Ptr = aSig1>>( - shiftCount );
4668             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4669         }
4670         else {
4671             *zSig0Ptr = aSig1<<shiftCount;
4672             *zSig1Ptr = 0;
4673         }
4674         *zExpPtr = - shiftCount - 63;
4675     }
4676     else {
4677         shiftCount = clz64(aSig0) - 15;
4678         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4679         *zExpPtr = 1 - shiftCount;
4680     }
4681 
4682 }
4683 
4684 /*----------------------------------------------------------------------------
4685 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4686 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4687 | floating-point value, returning the result.  After being shifted into the
4688 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4689 | added together to form the most significant 32 bits of the result.  This
4690 | means that any integer portion of `zSig0' will be added into the exponent.
4691 | Since a properly normalized significand will have an integer portion equal
4692 | to 1, the `zExp' input should be 1 less than the desired result exponent
4693 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4694 | significand.
4695 *----------------------------------------------------------------------------*/
4696 
4697 static inline float128
4698 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4699 {
4700     float128 z;
4701 
4702     z.low = zSig1;
4703     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4704     return z;
4705 }
4706 
4707 /*----------------------------------------------------------------------------
4708 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4709 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4710 | and `zSig2', and returns the proper quadruple-precision floating-point value
4711 | corresponding to the abstract input.  Ordinarily, the abstract value is
4712 | simply rounded and packed into the quadruple-precision format, with the
4713 | inexact exception raised if the abstract input cannot be represented
4714 | exactly.  However, if the abstract value is too large, the overflow and
4715 | inexact exceptions are raised and an infinity or maximal finite value is
4716 | returned.  If the abstract value is too small, the input value is rounded to
4717 | a subnormal number, and the underflow and inexact exceptions are raised if
4718 | the abstract input cannot be represented exactly as a subnormal quadruple-
4719 | precision floating-point number.
4720 |     The input significand must be normalized or smaller.  If the input
4721 | significand is not normalized, `zExp' must be 0; in that case, the result
4722 | returned is a subnormal number, and it must not require rounding.  In the
4723 | usual case that the input significand is normalized, `zExp' must be 1 less
4724 | than the ``true'' floating-point exponent.  The handling of underflow and
4725 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4726 *----------------------------------------------------------------------------*/
4727 
4728 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4729                                      uint64_t zSig0, uint64_t zSig1,
4730                                      uint64_t zSig2, float_status *status)
4731 {
4732     int8_t roundingMode;
4733     bool roundNearestEven, increment, isTiny;
4734 
4735     roundingMode = status->float_rounding_mode;
4736     roundNearestEven = ( roundingMode == float_round_nearest_even );
4737     switch (roundingMode) {
4738     case float_round_nearest_even:
4739     case float_round_ties_away:
4740         increment = ((int64_t)zSig2 < 0);
4741         break;
4742     case float_round_to_zero:
4743         increment = 0;
4744         break;
4745     case float_round_up:
4746         increment = !zSign && zSig2;
4747         break;
4748     case float_round_down:
4749         increment = zSign && zSig2;
4750         break;
4751     case float_round_to_odd:
4752         increment = !(zSig1 & 0x1) && zSig2;
4753         break;
4754     default:
4755         abort();
4756     }
4757     if ( 0x7FFD <= (uint32_t) zExp ) {
4758         if (    ( 0x7FFD < zExp )
4759              || (    ( zExp == 0x7FFD )
4760                   && eq128(
4761                          UINT64_C(0x0001FFFFFFFFFFFF),
4762                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4763                          zSig0,
4764                          zSig1
4765                      )
4766                   && increment
4767                 )
4768            ) {
4769             float_raise(float_flag_overflow | float_flag_inexact, status);
4770             if (    ( roundingMode == float_round_to_zero )
4771                  || ( zSign && ( roundingMode == float_round_up ) )
4772                  || ( ! zSign && ( roundingMode == float_round_down ) )
4773                  || (roundingMode == float_round_to_odd)
4774                ) {
4775                 return
4776                     packFloat128(
4777                         zSign,
4778                         0x7FFE,
4779                         UINT64_C(0x0000FFFFFFFFFFFF),
4780                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4781                     );
4782             }
4783             return packFloat128( zSign, 0x7FFF, 0, 0 );
4784         }
4785         if ( zExp < 0 ) {
4786             if (status->flush_to_zero) {
4787                 float_raise(float_flag_output_denormal, status);
4788                 return packFloat128(zSign, 0, 0, 0);
4789             }
4790             isTiny = status->tininess_before_rounding
4791                   || (zExp < -1)
4792                   || !increment
4793                   || lt128(zSig0, zSig1,
4794                            UINT64_C(0x0001FFFFFFFFFFFF),
4795                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4796             shift128ExtraRightJamming(
4797                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4798             zExp = 0;
4799             if (isTiny && zSig2) {
4800                 float_raise(float_flag_underflow, status);
4801             }
4802             switch (roundingMode) {
4803             case float_round_nearest_even:
4804             case float_round_ties_away:
4805                 increment = ((int64_t)zSig2 < 0);
4806                 break;
4807             case float_round_to_zero:
4808                 increment = 0;
4809                 break;
4810             case float_round_up:
4811                 increment = !zSign && zSig2;
4812                 break;
4813             case float_round_down:
4814                 increment = zSign && zSig2;
4815                 break;
4816             case float_round_to_odd:
4817                 increment = !(zSig1 & 0x1) && zSig2;
4818                 break;
4819             default:
4820                 abort();
4821             }
4822         }
4823     }
4824     if (zSig2) {
4825         float_raise(float_flag_inexact, status);
4826     }
4827     if ( increment ) {
4828         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4829         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4830             zSig1 &= ~1;
4831         }
4832     }
4833     else {
4834         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4835     }
4836     return packFloat128( zSign, zExp, zSig0, zSig1 );
4837 
4838 }
4839 
4840 /*----------------------------------------------------------------------------
4841 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4842 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4843 | returns the proper quadruple-precision floating-point value corresponding
4844 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4845 | except that the input significand has fewer bits and does not have to be
4846 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4847 | point exponent.
4848 *----------------------------------------------------------------------------*/
4849 
4850 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4851                                               uint64_t zSig0, uint64_t zSig1,
4852                                               float_status *status)
4853 {
4854     int8_t shiftCount;
4855     uint64_t zSig2;
4856 
4857     if ( zSig0 == 0 ) {
4858         zSig0 = zSig1;
4859         zSig1 = 0;
4860         zExp -= 64;
4861     }
4862     shiftCount = clz64(zSig0) - 15;
4863     if ( 0 <= shiftCount ) {
4864         zSig2 = 0;
4865         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4866     }
4867     else {
4868         shift128ExtraRightJamming(
4869             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4870     }
4871     zExp -= shiftCount;
4872     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4873 
4874 }
4875 
4876 
4877 /*----------------------------------------------------------------------------
4878 | Returns the result of converting the 32-bit two's complement integer `a'
4879 | to the extended double-precision floating-point format.  The conversion
4880 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4881 | Arithmetic.
4882 *----------------------------------------------------------------------------*/
4883 
4884 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4885 {
4886     bool zSign;
4887     uint32_t absA;
4888     int8_t shiftCount;
4889     uint64_t zSig;
4890 
4891     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4892     zSign = ( a < 0 );
4893     absA = zSign ? - a : a;
4894     shiftCount = clz32(absA) + 32;
4895     zSig = absA;
4896     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4897 
4898 }
4899 
4900 /*----------------------------------------------------------------------------
4901 | Returns the result of converting the 64-bit two's complement integer `a'
4902 | to the extended double-precision floating-point format.  The conversion
4903 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4904 | Arithmetic.
4905 *----------------------------------------------------------------------------*/
4906 
4907 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4908 {
4909     bool zSign;
4910     uint64_t absA;
4911     int8_t shiftCount;
4912 
4913     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4914     zSign = ( a < 0 );
4915     absA = zSign ? - a : a;
4916     shiftCount = clz64(absA);
4917     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4918 
4919 }
4920 
4921 /*----------------------------------------------------------------------------
4922 | Returns the result of converting the single-precision floating-point value
4923 | `a' to the extended double-precision floating-point format.  The conversion
4924 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4925 | Arithmetic.
4926 *----------------------------------------------------------------------------*/
4927 
4928 floatx80 float32_to_floatx80(float32 a, float_status *status)
4929 {
4930     bool aSign;
4931     int aExp;
4932     uint32_t aSig;
4933 
4934     a = float32_squash_input_denormal(a, status);
4935     aSig = extractFloat32Frac( a );
4936     aExp = extractFloat32Exp( a );
4937     aSign = extractFloat32Sign( a );
4938     if ( aExp == 0xFF ) {
4939         if (aSig) {
4940             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4941                                                status);
4942             return floatx80_silence_nan(res, status);
4943         }
4944         return packFloatx80(aSign,
4945                             floatx80_infinity_high,
4946                             floatx80_infinity_low);
4947     }
4948     if ( aExp == 0 ) {
4949         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4950         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4951     }
4952     aSig |= 0x00800000;
4953     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4954 
4955 }
4956 
4957 /*----------------------------------------------------------------------------
4958 | Returns the remainder of the single-precision floating-point value `a'
4959 | with respect to the corresponding value `b'.  The operation is performed
4960 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4961 *----------------------------------------------------------------------------*/
4962 
4963 float32 float32_rem(float32 a, float32 b, float_status *status)
4964 {
4965     bool aSign, zSign;
4966     int aExp, bExp, expDiff;
4967     uint32_t aSig, bSig;
4968     uint32_t q;
4969     uint64_t aSig64, bSig64, q64;
4970     uint32_t alternateASig;
4971     int32_t sigMean;
4972     a = float32_squash_input_denormal(a, status);
4973     b = float32_squash_input_denormal(b, status);
4974 
4975     aSig = extractFloat32Frac( a );
4976     aExp = extractFloat32Exp( a );
4977     aSign = extractFloat32Sign( a );
4978     bSig = extractFloat32Frac( b );
4979     bExp = extractFloat32Exp( b );
4980     if ( aExp == 0xFF ) {
4981         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4982             return propagateFloat32NaN(a, b, status);
4983         }
4984         float_raise(float_flag_invalid, status);
4985         return float32_default_nan(status);
4986     }
4987     if ( bExp == 0xFF ) {
4988         if (bSig) {
4989             return propagateFloat32NaN(a, b, status);
4990         }
4991         return a;
4992     }
4993     if ( bExp == 0 ) {
4994         if ( bSig == 0 ) {
4995             float_raise(float_flag_invalid, status);
4996             return float32_default_nan(status);
4997         }
4998         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4999     }
5000     if ( aExp == 0 ) {
5001         if ( aSig == 0 ) return a;
5002         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5003     }
5004     expDiff = aExp - bExp;
5005     aSig |= 0x00800000;
5006     bSig |= 0x00800000;
5007     if ( expDiff < 32 ) {
5008         aSig <<= 8;
5009         bSig <<= 8;
5010         if ( expDiff < 0 ) {
5011             if ( expDiff < -1 ) return a;
5012             aSig >>= 1;
5013         }
5014         q = ( bSig <= aSig );
5015         if ( q ) aSig -= bSig;
5016         if ( 0 < expDiff ) {
5017             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5018             q >>= 32 - expDiff;
5019             bSig >>= 2;
5020             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5021         }
5022         else {
5023             aSig >>= 2;
5024             bSig >>= 2;
5025         }
5026     }
5027     else {
5028         if ( bSig <= aSig ) aSig -= bSig;
5029         aSig64 = ( (uint64_t) aSig )<<40;
5030         bSig64 = ( (uint64_t) bSig )<<40;
5031         expDiff -= 64;
5032         while ( 0 < expDiff ) {
5033             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5034             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5035             aSig64 = - ( ( bSig * q64 )<<38 );
5036             expDiff -= 62;
5037         }
5038         expDiff += 64;
5039         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5040         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5041         q = q64>>( 64 - expDiff );
5042         bSig <<= 6;
5043         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5044     }
5045     do {
5046         alternateASig = aSig;
5047         ++q;
5048         aSig -= bSig;
5049     } while ( 0 <= (int32_t) aSig );
5050     sigMean = aSig + alternateASig;
5051     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5052         aSig = alternateASig;
5053     }
5054     zSign = ( (int32_t) aSig < 0 );
5055     if ( zSign ) aSig = - aSig;
5056     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5057 }
5058 
5059 
5060 
5061 /*----------------------------------------------------------------------------
5062 | Returns the binary exponential of the single-precision floating-point value
5063 | `a'. The operation is performed according to the IEC/IEEE Standard for
5064 | Binary Floating-Point Arithmetic.
5065 |
5066 | Uses the following identities:
5067 |
5068 | 1. -------------------------------------------------------------------------
5069 |      x    x*ln(2)
5070 |     2  = e
5071 |
5072 | 2. -------------------------------------------------------------------------
5073 |                      2     3     4     5           n
5074 |      x        x     x     x     x     x           x
5075 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5076 |               1!    2!    3!    4!    5!          n!
5077 *----------------------------------------------------------------------------*/
5078 
5079 static const float64 float32_exp2_coefficients[15] =
5080 {
5081     const_float64( 0x3ff0000000000000ll ), /*  1 */
5082     const_float64( 0x3fe0000000000000ll ), /*  2 */
5083     const_float64( 0x3fc5555555555555ll ), /*  3 */
5084     const_float64( 0x3fa5555555555555ll ), /*  4 */
5085     const_float64( 0x3f81111111111111ll ), /*  5 */
5086     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5087     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5088     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5089     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5090     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5091     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5092     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5093     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5094     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5095     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5096 };
5097 
5098 float32 float32_exp2(float32 a, float_status *status)
5099 {
5100     bool aSign;
5101     int aExp;
5102     uint32_t aSig;
5103     float64 r, x, xn;
5104     int i;
5105     a = float32_squash_input_denormal(a, status);
5106 
5107     aSig = extractFloat32Frac( a );
5108     aExp = extractFloat32Exp( a );
5109     aSign = extractFloat32Sign( a );
5110 
5111     if ( aExp == 0xFF) {
5112         if (aSig) {
5113             return propagateFloat32NaN(a, float32_zero, status);
5114         }
5115         return (aSign) ? float32_zero : a;
5116     }
5117     if (aExp == 0) {
5118         if (aSig == 0) return float32_one;
5119     }
5120 
5121     float_raise(float_flag_inexact, status);
5122 
5123     /* ******************************* */
5124     /* using float64 for approximation */
5125     /* ******************************* */
5126     x = float32_to_float64(a, status);
5127     x = float64_mul(x, float64_ln2, status);
5128 
5129     xn = x;
5130     r = float64_one;
5131     for (i = 0 ; i < 15 ; i++) {
5132         float64 f;
5133 
5134         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5135         r = float64_add(r, f, status);
5136 
5137         xn = float64_mul(xn, x, status);
5138     }
5139 
5140     return float64_to_float32(r, status);
5141 }
5142 
5143 /*----------------------------------------------------------------------------
5144 | Returns the binary log of the single-precision floating-point value `a'.
5145 | The operation is performed according to the IEC/IEEE Standard for Binary
5146 | Floating-Point Arithmetic.
5147 *----------------------------------------------------------------------------*/
5148 float32 float32_log2(float32 a, float_status *status)
5149 {
5150     bool aSign, zSign;
5151     int aExp;
5152     uint32_t aSig, zSig, i;
5153 
5154     a = float32_squash_input_denormal(a, status);
5155     aSig = extractFloat32Frac( a );
5156     aExp = extractFloat32Exp( a );
5157     aSign = extractFloat32Sign( a );
5158 
5159     if ( aExp == 0 ) {
5160         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5161         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5162     }
5163     if ( aSign ) {
5164         float_raise(float_flag_invalid, status);
5165         return float32_default_nan(status);
5166     }
5167     if ( aExp == 0xFF ) {
5168         if (aSig) {
5169             return propagateFloat32NaN(a, float32_zero, status);
5170         }
5171         return a;
5172     }
5173 
5174     aExp -= 0x7F;
5175     aSig |= 0x00800000;
5176     zSign = aExp < 0;
5177     zSig = aExp << 23;
5178 
5179     for (i = 1 << 22; i > 0; i >>= 1) {
5180         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5181         if ( aSig & 0x01000000 ) {
5182             aSig >>= 1;
5183             zSig |= i;
5184         }
5185     }
5186 
5187     if ( zSign )
5188         zSig = -zSig;
5189 
5190     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5191 }
5192 
5193 /*----------------------------------------------------------------------------
5194 | Returns the result of converting the double-precision floating-point value
5195 | `a' to the extended double-precision floating-point format.  The conversion
5196 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5197 | Arithmetic.
5198 *----------------------------------------------------------------------------*/
5199 
5200 floatx80 float64_to_floatx80(float64 a, float_status *status)
5201 {
5202     bool aSign;
5203     int aExp;
5204     uint64_t aSig;
5205 
5206     a = float64_squash_input_denormal(a, status);
5207     aSig = extractFloat64Frac( a );
5208     aExp = extractFloat64Exp( a );
5209     aSign = extractFloat64Sign( a );
5210     if ( aExp == 0x7FF ) {
5211         if (aSig) {
5212             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5213                                                status);
5214             return floatx80_silence_nan(res, status);
5215         }
5216         return packFloatx80(aSign,
5217                             floatx80_infinity_high,
5218                             floatx80_infinity_low);
5219     }
5220     if ( aExp == 0 ) {
5221         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5222         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5223     }
5224     return
5225         packFloatx80(
5226             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5227 
5228 }
5229 
5230 /*----------------------------------------------------------------------------
5231 | Returns the remainder of the double-precision floating-point value `a'
5232 | with respect to the corresponding value `b'.  The operation is performed
5233 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5234 *----------------------------------------------------------------------------*/
5235 
5236 float64 float64_rem(float64 a, float64 b, float_status *status)
5237 {
5238     bool aSign, zSign;
5239     int aExp, bExp, expDiff;
5240     uint64_t aSig, bSig;
5241     uint64_t q, alternateASig;
5242     int64_t sigMean;
5243 
5244     a = float64_squash_input_denormal(a, status);
5245     b = float64_squash_input_denormal(b, status);
5246     aSig = extractFloat64Frac( a );
5247     aExp = extractFloat64Exp( a );
5248     aSign = extractFloat64Sign( a );
5249     bSig = extractFloat64Frac( b );
5250     bExp = extractFloat64Exp( b );
5251     if ( aExp == 0x7FF ) {
5252         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5253             return propagateFloat64NaN(a, b, status);
5254         }
5255         float_raise(float_flag_invalid, status);
5256         return float64_default_nan(status);
5257     }
5258     if ( bExp == 0x7FF ) {
5259         if (bSig) {
5260             return propagateFloat64NaN(a, b, status);
5261         }
5262         return a;
5263     }
5264     if ( bExp == 0 ) {
5265         if ( bSig == 0 ) {
5266             float_raise(float_flag_invalid, status);
5267             return float64_default_nan(status);
5268         }
5269         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5270     }
5271     if ( aExp == 0 ) {
5272         if ( aSig == 0 ) return a;
5273         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5274     }
5275     expDiff = aExp - bExp;
5276     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5277     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5278     if ( expDiff < 0 ) {
5279         if ( expDiff < -1 ) return a;
5280         aSig >>= 1;
5281     }
5282     q = ( bSig <= aSig );
5283     if ( q ) aSig -= bSig;
5284     expDiff -= 64;
5285     while ( 0 < expDiff ) {
5286         q = estimateDiv128To64( aSig, 0, bSig );
5287         q = ( 2 < q ) ? q - 2 : 0;
5288         aSig = - ( ( bSig>>2 ) * q );
5289         expDiff -= 62;
5290     }
5291     expDiff += 64;
5292     if ( 0 < expDiff ) {
5293         q = estimateDiv128To64( aSig, 0, bSig );
5294         q = ( 2 < q ) ? q - 2 : 0;
5295         q >>= 64 - expDiff;
5296         bSig >>= 2;
5297         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5298     }
5299     else {
5300         aSig >>= 2;
5301         bSig >>= 2;
5302     }
5303     do {
5304         alternateASig = aSig;
5305         ++q;
5306         aSig -= bSig;
5307     } while ( 0 <= (int64_t) aSig );
5308     sigMean = aSig + alternateASig;
5309     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5310         aSig = alternateASig;
5311     }
5312     zSign = ( (int64_t) aSig < 0 );
5313     if ( zSign ) aSig = - aSig;
5314     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5315 
5316 }
5317 
5318 /*----------------------------------------------------------------------------
5319 | Returns the binary log of the double-precision floating-point value `a'.
5320 | The operation is performed according to the IEC/IEEE Standard for Binary
5321 | Floating-Point Arithmetic.
5322 *----------------------------------------------------------------------------*/
5323 float64 float64_log2(float64 a, float_status *status)
5324 {
5325     bool aSign, zSign;
5326     int aExp;
5327     uint64_t aSig, aSig0, aSig1, zSig, i;
5328     a = float64_squash_input_denormal(a, status);
5329 
5330     aSig = extractFloat64Frac( a );
5331     aExp = extractFloat64Exp( a );
5332     aSign = extractFloat64Sign( a );
5333 
5334     if ( aExp == 0 ) {
5335         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5336         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5337     }
5338     if ( aSign ) {
5339         float_raise(float_flag_invalid, status);
5340         return float64_default_nan(status);
5341     }
5342     if ( aExp == 0x7FF ) {
5343         if (aSig) {
5344             return propagateFloat64NaN(a, float64_zero, status);
5345         }
5346         return a;
5347     }
5348 
5349     aExp -= 0x3FF;
5350     aSig |= UINT64_C(0x0010000000000000);
5351     zSign = aExp < 0;
5352     zSig = (uint64_t)aExp << 52;
5353     for (i = 1LL << 51; i > 0; i >>= 1) {
5354         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5355         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5356         if ( aSig & UINT64_C(0x0020000000000000) ) {
5357             aSig >>= 1;
5358             zSig |= i;
5359         }
5360     }
5361 
5362     if ( zSign )
5363         zSig = -zSig;
5364     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5365 }
5366 
5367 /*----------------------------------------------------------------------------
5368 | Returns the result of converting the extended double-precision floating-
5369 | point value `a' to the 32-bit two's complement integer format.  The
5370 | conversion is performed according to the IEC/IEEE Standard for Binary
5371 | Floating-Point Arithmetic---which means in particular that the conversion
5372 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5373 | largest positive integer is returned.  Otherwise, if the conversion
5374 | overflows, the largest integer with the same sign as `a' is returned.
5375 *----------------------------------------------------------------------------*/
5376 
5377 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5378 {
5379     bool aSign;
5380     int32_t aExp, shiftCount;
5381     uint64_t aSig;
5382 
5383     if (floatx80_invalid_encoding(a)) {
5384         float_raise(float_flag_invalid, status);
5385         return 1 << 31;
5386     }
5387     aSig = extractFloatx80Frac( a );
5388     aExp = extractFloatx80Exp( a );
5389     aSign = extractFloatx80Sign( a );
5390     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5391     shiftCount = 0x4037 - aExp;
5392     if ( shiftCount <= 0 ) shiftCount = 1;
5393     shift64RightJamming( aSig, shiftCount, &aSig );
5394     return roundAndPackInt32(aSign, aSig, status);
5395 
5396 }
5397 
5398 /*----------------------------------------------------------------------------
5399 | Returns the result of converting the extended double-precision floating-
5400 | point value `a' to the 32-bit two's complement integer format.  The
5401 | conversion is performed according to the IEC/IEEE Standard for Binary
5402 | Floating-Point Arithmetic, except that the conversion is always rounded
5403 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5404 | Otherwise, if the conversion overflows, the largest integer with the same
5405 | sign as `a' is returned.
5406 *----------------------------------------------------------------------------*/
5407 
5408 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5409 {
5410     bool aSign;
5411     int32_t aExp, shiftCount;
5412     uint64_t aSig, savedASig;
5413     int32_t z;
5414 
5415     if (floatx80_invalid_encoding(a)) {
5416         float_raise(float_flag_invalid, status);
5417         return 1 << 31;
5418     }
5419     aSig = extractFloatx80Frac( a );
5420     aExp = extractFloatx80Exp( a );
5421     aSign = extractFloatx80Sign( a );
5422     if ( 0x401E < aExp ) {
5423         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5424         goto invalid;
5425     }
5426     else if ( aExp < 0x3FFF ) {
5427         if (aExp || aSig) {
5428             float_raise(float_flag_inexact, status);
5429         }
5430         return 0;
5431     }
5432     shiftCount = 0x403E - aExp;
5433     savedASig = aSig;
5434     aSig >>= shiftCount;
5435     z = aSig;
5436     if ( aSign ) z = - z;
5437     if ( ( z < 0 ) ^ aSign ) {
5438  invalid:
5439         float_raise(float_flag_invalid, status);
5440         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5441     }
5442     if ( ( aSig<<shiftCount ) != savedASig ) {
5443         float_raise(float_flag_inexact, status);
5444     }
5445     return z;
5446 
5447 }
5448 
5449 /*----------------------------------------------------------------------------
5450 | Returns the result of converting the extended double-precision floating-
5451 | point value `a' to the 64-bit two's complement integer format.  The
5452 | conversion is performed according to the IEC/IEEE Standard for Binary
5453 | Floating-Point Arithmetic---which means in particular that the conversion
5454 | is rounded according to the current rounding mode.  If `a' is a NaN,
5455 | the largest positive integer is returned.  Otherwise, if the conversion
5456 | overflows, the largest integer with the same sign as `a' is returned.
5457 *----------------------------------------------------------------------------*/
5458 
5459 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5460 {
5461     bool aSign;
5462     int32_t aExp, shiftCount;
5463     uint64_t aSig, aSigExtra;
5464 
5465     if (floatx80_invalid_encoding(a)) {
5466         float_raise(float_flag_invalid, status);
5467         return 1ULL << 63;
5468     }
5469     aSig = extractFloatx80Frac( a );
5470     aExp = extractFloatx80Exp( a );
5471     aSign = extractFloatx80Sign( a );
5472     shiftCount = 0x403E - aExp;
5473     if ( shiftCount <= 0 ) {
5474         if ( shiftCount ) {
5475             float_raise(float_flag_invalid, status);
5476             if (!aSign || floatx80_is_any_nan(a)) {
5477                 return INT64_MAX;
5478             }
5479             return INT64_MIN;
5480         }
5481         aSigExtra = 0;
5482     }
5483     else {
5484         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5485     }
5486     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5487 
5488 }
5489 
5490 /*----------------------------------------------------------------------------
5491 | Returns the result of converting the extended double-precision floating-
5492 | point value `a' to the 64-bit two's complement integer format.  The
5493 | conversion is performed according to the IEC/IEEE Standard for Binary
5494 | Floating-Point Arithmetic, except that the conversion is always rounded
5495 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5496 | Otherwise, if the conversion overflows, the largest integer with the same
5497 | sign as `a' is returned.
5498 *----------------------------------------------------------------------------*/
5499 
5500 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5501 {
5502     bool aSign;
5503     int32_t aExp, shiftCount;
5504     uint64_t aSig;
5505     int64_t z;
5506 
5507     if (floatx80_invalid_encoding(a)) {
5508         float_raise(float_flag_invalid, status);
5509         return 1ULL << 63;
5510     }
5511     aSig = extractFloatx80Frac( a );
5512     aExp = extractFloatx80Exp( a );
5513     aSign = extractFloatx80Sign( a );
5514     shiftCount = aExp - 0x403E;
5515     if ( 0 <= shiftCount ) {
5516         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5517         if ( ( a.high != 0xC03E ) || aSig ) {
5518             float_raise(float_flag_invalid, status);
5519             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5520                 return INT64_MAX;
5521             }
5522         }
5523         return INT64_MIN;
5524     }
5525     else if ( aExp < 0x3FFF ) {
5526         if (aExp | aSig) {
5527             float_raise(float_flag_inexact, status);
5528         }
5529         return 0;
5530     }
5531     z = aSig>>( - shiftCount );
5532     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5533         float_raise(float_flag_inexact, status);
5534     }
5535     if ( aSign ) z = - z;
5536     return z;
5537 
5538 }
5539 
5540 /*----------------------------------------------------------------------------
5541 | Returns the result of converting the extended double-precision floating-
5542 | point value `a' to the single-precision floating-point format.  The
5543 | conversion is performed according to the IEC/IEEE Standard for Binary
5544 | Floating-Point Arithmetic.
5545 *----------------------------------------------------------------------------*/
5546 
5547 float32 floatx80_to_float32(floatx80 a, float_status *status)
5548 {
5549     bool aSign;
5550     int32_t aExp;
5551     uint64_t aSig;
5552 
5553     if (floatx80_invalid_encoding(a)) {
5554         float_raise(float_flag_invalid, status);
5555         return float32_default_nan(status);
5556     }
5557     aSig = extractFloatx80Frac( a );
5558     aExp = extractFloatx80Exp( a );
5559     aSign = extractFloatx80Sign( a );
5560     if ( aExp == 0x7FFF ) {
5561         if ( (uint64_t) ( aSig<<1 ) ) {
5562             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5563                                              status);
5564             return float32_silence_nan(res, status);
5565         }
5566         return packFloat32( aSign, 0xFF, 0 );
5567     }
5568     shift64RightJamming( aSig, 33, &aSig );
5569     if ( aExp || aSig ) aExp -= 0x3F81;
5570     return roundAndPackFloat32(aSign, aExp, aSig, status);
5571 
5572 }
5573 
5574 /*----------------------------------------------------------------------------
5575 | Returns the result of converting the extended double-precision floating-
5576 | point value `a' to the double-precision floating-point format.  The
5577 | conversion is performed according to the IEC/IEEE Standard for Binary
5578 | Floating-Point Arithmetic.
5579 *----------------------------------------------------------------------------*/
5580 
5581 float64 floatx80_to_float64(floatx80 a, float_status *status)
5582 {
5583     bool aSign;
5584     int32_t aExp;
5585     uint64_t aSig, zSig;
5586 
5587     if (floatx80_invalid_encoding(a)) {
5588         float_raise(float_flag_invalid, status);
5589         return float64_default_nan(status);
5590     }
5591     aSig = extractFloatx80Frac( a );
5592     aExp = extractFloatx80Exp( a );
5593     aSign = extractFloatx80Sign( a );
5594     if ( aExp == 0x7FFF ) {
5595         if ( (uint64_t) ( aSig<<1 ) ) {
5596             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5597                                              status);
5598             return float64_silence_nan(res, status);
5599         }
5600         return packFloat64( aSign, 0x7FF, 0 );
5601     }
5602     shift64RightJamming( aSig, 1, &zSig );
5603     if ( aExp || aSig ) aExp -= 0x3C01;
5604     return roundAndPackFloat64(aSign, aExp, zSig, status);
5605 
5606 }
5607 
5608 /*----------------------------------------------------------------------------
5609 | Returns the result of converting the extended double-precision floating-
5610 | point value `a' to the quadruple-precision floating-point format.  The
5611 | conversion is performed according to the IEC/IEEE Standard for Binary
5612 | Floating-Point Arithmetic.
5613 *----------------------------------------------------------------------------*/
5614 
5615 float128 floatx80_to_float128(floatx80 a, float_status *status)
5616 {
5617     bool aSign;
5618     int aExp;
5619     uint64_t aSig, zSig0, zSig1;
5620 
5621     if (floatx80_invalid_encoding(a)) {
5622         float_raise(float_flag_invalid, status);
5623         return float128_default_nan(status);
5624     }
5625     aSig = extractFloatx80Frac( a );
5626     aExp = extractFloatx80Exp( a );
5627     aSign = extractFloatx80Sign( a );
5628     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5629         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5630                                            status);
5631         return float128_silence_nan(res, status);
5632     }
5633     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5634     return packFloat128( aSign, aExp, zSig0, zSig1 );
5635 
5636 }
5637 
5638 /*----------------------------------------------------------------------------
5639 | Rounds the extended double-precision floating-point value `a'
5640 | to the precision provided by floatx80_rounding_precision and returns the
5641 | result as an extended double-precision floating-point value.
5642 | The operation is performed according to the IEC/IEEE Standard for Binary
5643 | Floating-Point Arithmetic.
5644 *----------------------------------------------------------------------------*/
5645 
5646 floatx80 floatx80_round(floatx80 a, float_status *status)
5647 {
5648     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5649                                 extractFloatx80Sign(a),
5650                                 extractFloatx80Exp(a),
5651                                 extractFloatx80Frac(a), 0, status);
5652 }
5653 
5654 /*----------------------------------------------------------------------------
5655 | Rounds the extended double-precision floating-point value `a' to an integer,
5656 | and returns the result as an extended quadruple-precision floating-point
5657 | value.  The operation is performed according to the IEC/IEEE Standard for
5658 | Binary Floating-Point Arithmetic.
5659 *----------------------------------------------------------------------------*/
5660 
5661 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5662 {
5663     bool aSign;
5664     int32_t aExp;
5665     uint64_t lastBitMask, roundBitsMask;
5666     floatx80 z;
5667 
5668     if (floatx80_invalid_encoding(a)) {
5669         float_raise(float_flag_invalid, status);
5670         return floatx80_default_nan(status);
5671     }
5672     aExp = extractFloatx80Exp( a );
5673     if ( 0x403E <= aExp ) {
5674         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5675             return propagateFloatx80NaN(a, a, status);
5676         }
5677         return a;
5678     }
5679     if ( aExp < 0x3FFF ) {
5680         if (    ( aExp == 0 )
5681              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5682             return a;
5683         }
5684         float_raise(float_flag_inexact, status);
5685         aSign = extractFloatx80Sign( a );
5686         switch (status->float_rounding_mode) {
5687          case float_round_nearest_even:
5688             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5689                ) {
5690                 return
5691                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5692             }
5693             break;
5694         case float_round_ties_away:
5695             if (aExp == 0x3FFE) {
5696                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5697             }
5698             break;
5699          case float_round_down:
5700             return
5701                   aSign ?
5702                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5703                 : packFloatx80( 0, 0, 0 );
5704          case float_round_up:
5705             return
5706                   aSign ? packFloatx80( 1, 0, 0 )
5707                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5708 
5709         case float_round_to_zero:
5710             break;
5711         default:
5712             g_assert_not_reached();
5713         }
5714         return packFloatx80( aSign, 0, 0 );
5715     }
5716     lastBitMask = 1;
5717     lastBitMask <<= 0x403E - aExp;
5718     roundBitsMask = lastBitMask - 1;
5719     z = a;
5720     switch (status->float_rounding_mode) {
5721     case float_round_nearest_even:
5722         z.low += lastBitMask>>1;
5723         if ((z.low & roundBitsMask) == 0) {
5724             z.low &= ~lastBitMask;
5725         }
5726         break;
5727     case float_round_ties_away:
5728         z.low += lastBitMask >> 1;
5729         break;
5730     case float_round_to_zero:
5731         break;
5732     case float_round_up:
5733         if (!extractFloatx80Sign(z)) {
5734             z.low += roundBitsMask;
5735         }
5736         break;
5737     case float_round_down:
5738         if (extractFloatx80Sign(z)) {
5739             z.low += roundBitsMask;
5740         }
5741         break;
5742     default:
5743         abort();
5744     }
5745     z.low &= ~ roundBitsMask;
5746     if ( z.low == 0 ) {
5747         ++z.high;
5748         z.low = UINT64_C(0x8000000000000000);
5749     }
5750     if (z.low != a.low) {
5751         float_raise(float_flag_inexact, status);
5752     }
5753     return z;
5754 
5755 }
5756 
5757 /*----------------------------------------------------------------------------
5758 | Returns the result of adding the absolute values of the extended double-
5759 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5760 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5761 | The addition is performed according to the IEC/IEEE Standard for Binary
5762 | Floating-Point Arithmetic.
5763 *----------------------------------------------------------------------------*/
5764 
5765 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5766                                 float_status *status)
5767 {
5768     int32_t aExp, bExp, zExp;
5769     uint64_t aSig, bSig, zSig0, zSig1;
5770     int32_t expDiff;
5771 
5772     aSig = extractFloatx80Frac( a );
5773     aExp = extractFloatx80Exp( a );
5774     bSig = extractFloatx80Frac( b );
5775     bExp = extractFloatx80Exp( b );
5776     expDiff = aExp - bExp;
5777     if ( 0 < expDiff ) {
5778         if ( aExp == 0x7FFF ) {
5779             if ((uint64_t)(aSig << 1)) {
5780                 return propagateFloatx80NaN(a, b, status);
5781             }
5782             return a;
5783         }
5784         if ( bExp == 0 ) --expDiff;
5785         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5786         zExp = aExp;
5787     }
5788     else if ( expDiff < 0 ) {
5789         if ( bExp == 0x7FFF ) {
5790             if ((uint64_t)(bSig << 1)) {
5791                 return propagateFloatx80NaN(a, b, status);
5792             }
5793             return packFloatx80(zSign,
5794                                 floatx80_infinity_high,
5795                                 floatx80_infinity_low);
5796         }
5797         if ( aExp == 0 ) ++expDiff;
5798         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5799         zExp = bExp;
5800     }
5801     else {
5802         if ( aExp == 0x7FFF ) {
5803             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5804                 return propagateFloatx80NaN(a, b, status);
5805             }
5806             return a;
5807         }
5808         zSig1 = 0;
5809         zSig0 = aSig + bSig;
5810         if ( aExp == 0 ) {
5811             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5812                 /* At least one of the values is a pseudo-denormal,
5813                  * and there is a carry out of the result.  */
5814                 zExp = 1;
5815                 goto shiftRight1;
5816             }
5817             if (zSig0 == 0) {
5818                 return packFloatx80(zSign, 0, 0);
5819             }
5820             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5821             goto roundAndPack;
5822         }
5823         zExp = aExp;
5824         goto shiftRight1;
5825     }
5826     zSig0 = aSig + bSig;
5827     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5828  shiftRight1:
5829     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5830     zSig0 |= UINT64_C(0x8000000000000000);
5831     ++zExp;
5832  roundAndPack:
5833     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5834                                 zSign, zExp, zSig0, zSig1, status);
5835 }
5836 
5837 /*----------------------------------------------------------------------------
5838 | Returns the result of subtracting the absolute values of the extended
5839 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5840 | difference is negated before being returned.  `zSign' is ignored if the
5841 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5842 | Standard for Binary Floating-Point Arithmetic.
5843 *----------------------------------------------------------------------------*/
5844 
5845 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5846                                 float_status *status)
5847 {
5848     int32_t aExp, bExp, zExp;
5849     uint64_t aSig, bSig, zSig0, zSig1;
5850     int32_t expDiff;
5851 
5852     aSig = extractFloatx80Frac( a );
5853     aExp = extractFloatx80Exp( a );
5854     bSig = extractFloatx80Frac( b );
5855     bExp = extractFloatx80Exp( b );
5856     expDiff = aExp - bExp;
5857     if ( 0 < expDiff ) goto aExpBigger;
5858     if ( expDiff < 0 ) goto bExpBigger;
5859     if ( aExp == 0x7FFF ) {
5860         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5861             return propagateFloatx80NaN(a, b, status);
5862         }
5863         float_raise(float_flag_invalid, status);
5864         return floatx80_default_nan(status);
5865     }
5866     if ( aExp == 0 ) {
5867         aExp = 1;
5868         bExp = 1;
5869     }
5870     zSig1 = 0;
5871     if ( bSig < aSig ) goto aBigger;
5872     if ( aSig < bSig ) goto bBigger;
5873     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5874  bExpBigger:
5875     if ( bExp == 0x7FFF ) {
5876         if ((uint64_t)(bSig << 1)) {
5877             return propagateFloatx80NaN(a, b, status);
5878         }
5879         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5880                             floatx80_infinity_low);
5881     }
5882     if ( aExp == 0 ) ++expDiff;
5883     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5884  bBigger:
5885     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5886     zExp = bExp;
5887     zSign ^= 1;
5888     goto normalizeRoundAndPack;
5889  aExpBigger:
5890     if ( aExp == 0x7FFF ) {
5891         if ((uint64_t)(aSig << 1)) {
5892             return propagateFloatx80NaN(a, b, status);
5893         }
5894         return a;
5895     }
5896     if ( bExp == 0 ) --expDiff;
5897     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5898  aBigger:
5899     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5900     zExp = aExp;
5901  normalizeRoundAndPack:
5902     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5903                                          zSign, zExp, zSig0, zSig1, status);
5904 }
5905 
5906 /*----------------------------------------------------------------------------
5907 | Returns the result of adding the extended double-precision floating-point
5908 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5909 | Standard for Binary Floating-Point Arithmetic.
5910 *----------------------------------------------------------------------------*/
5911 
5912 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5913 {
5914     bool aSign, bSign;
5915 
5916     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5917         float_raise(float_flag_invalid, status);
5918         return floatx80_default_nan(status);
5919     }
5920     aSign = extractFloatx80Sign( a );
5921     bSign = extractFloatx80Sign( b );
5922     if ( aSign == bSign ) {
5923         return addFloatx80Sigs(a, b, aSign, status);
5924     }
5925     else {
5926         return subFloatx80Sigs(a, b, aSign, status);
5927     }
5928 
5929 }
5930 
5931 /*----------------------------------------------------------------------------
5932 | Returns the result of subtracting the extended double-precision floating-
5933 | point values `a' and `b'.  The operation is performed according to the
5934 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5935 *----------------------------------------------------------------------------*/
5936 
5937 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5938 {
5939     bool aSign, bSign;
5940 
5941     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5942         float_raise(float_flag_invalid, status);
5943         return floatx80_default_nan(status);
5944     }
5945     aSign = extractFloatx80Sign( a );
5946     bSign = extractFloatx80Sign( b );
5947     if ( aSign == bSign ) {
5948         return subFloatx80Sigs(a, b, aSign, status);
5949     }
5950     else {
5951         return addFloatx80Sigs(a, b, aSign, status);
5952     }
5953 
5954 }
5955 
5956 /*----------------------------------------------------------------------------
5957 | Returns the result of multiplying the extended double-precision floating-
5958 | point values `a' and `b'.  The operation is performed according to the
5959 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5960 *----------------------------------------------------------------------------*/
5961 
5962 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5963 {
5964     bool aSign, bSign, zSign;
5965     int32_t aExp, bExp, zExp;
5966     uint64_t aSig, bSig, zSig0, zSig1;
5967 
5968     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5969         float_raise(float_flag_invalid, status);
5970         return floatx80_default_nan(status);
5971     }
5972     aSig = extractFloatx80Frac( a );
5973     aExp = extractFloatx80Exp( a );
5974     aSign = extractFloatx80Sign( a );
5975     bSig = extractFloatx80Frac( b );
5976     bExp = extractFloatx80Exp( b );
5977     bSign = extractFloatx80Sign( b );
5978     zSign = aSign ^ bSign;
5979     if ( aExp == 0x7FFF ) {
5980         if (    (uint64_t) ( aSig<<1 )
5981              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5982             return propagateFloatx80NaN(a, b, status);
5983         }
5984         if ( ( bExp | bSig ) == 0 ) goto invalid;
5985         return packFloatx80(zSign, floatx80_infinity_high,
5986                                    floatx80_infinity_low);
5987     }
5988     if ( bExp == 0x7FFF ) {
5989         if ((uint64_t)(bSig << 1)) {
5990             return propagateFloatx80NaN(a, b, status);
5991         }
5992         if ( ( aExp | aSig ) == 0 ) {
5993  invalid:
5994             float_raise(float_flag_invalid, status);
5995             return floatx80_default_nan(status);
5996         }
5997         return packFloatx80(zSign, floatx80_infinity_high,
5998                                    floatx80_infinity_low);
5999     }
6000     if ( aExp == 0 ) {
6001         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6002         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6003     }
6004     if ( bExp == 0 ) {
6005         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6006         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6007     }
6008     zExp = aExp + bExp - 0x3FFE;
6009     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6010     if ( 0 < (int64_t) zSig0 ) {
6011         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6012         --zExp;
6013     }
6014     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6015                                 zSign, zExp, zSig0, zSig1, status);
6016 }
6017 
6018 /*----------------------------------------------------------------------------
6019 | Returns the result of dividing the extended double-precision floating-point
6020 | value `a' by the corresponding value `b'.  The operation is performed
6021 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6022 *----------------------------------------------------------------------------*/
6023 
6024 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6025 {
6026     bool aSign, bSign, zSign;
6027     int32_t aExp, bExp, zExp;
6028     uint64_t aSig, bSig, zSig0, zSig1;
6029     uint64_t rem0, rem1, rem2, term0, term1, term2;
6030 
6031     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6032         float_raise(float_flag_invalid, status);
6033         return floatx80_default_nan(status);
6034     }
6035     aSig = extractFloatx80Frac( a );
6036     aExp = extractFloatx80Exp( a );
6037     aSign = extractFloatx80Sign( a );
6038     bSig = extractFloatx80Frac( b );
6039     bExp = extractFloatx80Exp( b );
6040     bSign = extractFloatx80Sign( b );
6041     zSign = aSign ^ bSign;
6042     if ( aExp == 0x7FFF ) {
6043         if ((uint64_t)(aSig << 1)) {
6044             return propagateFloatx80NaN(a, b, status);
6045         }
6046         if ( bExp == 0x7FFF ) {
6047             if ((uint64_t)(bSig << 1)) {
6048                 return propagateFloatx80NaN(a, b, status);
6049             }
6050             goto invalid;
6051         }
6052         return packFloatx80(zSign, floatx80_infinity_high,
6053                                    floatx80_infinity_low);
6054     }
6055     if ( bExp == 0x7FFF ) {
6056         if ((uint64_t)(bSig << 1)) {
6057             return propagateFloatx80NaN(a, b, status);
6058         }
6059         return packFloatx80( zSign, 0, 0 );
6060     }
6061     if ( bExp == 0 ) {
6062         if ( bSig == 0 ) {
6063             if ( ( aExp | aSig ) == 0 ) {
6064  invalid:
6065                 float_raise(float_flag_invalid, status);
6066                 return floatx80_default_nan(status);
6067             }
6068             float_raise(float_flag_divbyzero, status);
6069             return packFloatx80(zSign, floatx80_infinity_high,
6070                                        floatx80_infinity_low);
6071         }
6072         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6073     }
6074     if ( aExp == 0 ) {
6075         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6076         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6077     }
6078     zExp = aExp - bExp + 0x3FFE;
6079     rem1 = 0;
6080     if ( bSig <= aSig ) {
6081         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6082         ++zExp;
6083     }
6084     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6085     mul64To128( bSig, zSig0, &term0, &term1 );
6086     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6087     while ( (int64_t) rem0 < 0 ) {
6088         --zSig0;
6089         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6090     }
6091     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6092     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6093         mul64To128( bSig, zSig1, &term1, &term2 );
6094         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6095         while ( (int64_t) rem1 < 0 ) {
6096             --zSig1;
6097             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6098         }
6099         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6100     }
6101     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6102                                 zSign, zExp, zSig0, zSig1, status);
6103 }
6104 
6105 /*----------------------------------------------------------------------------
6106 | Returns the remainder of the extended double-precision floating-point value
6107 | `a' with respect to the corresponding value `b'.  The operation is performed
6108 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6109 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6110 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6111 | the absolute value of the integer quotient.
6112 *----------------------------------------------------------------------------*/
6113 
6114 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6115                          float_status *status)
6116 {
6117     bool aSign, zSign;
6118     int32_t aExp, bExp, expDiff, aExpOrig;
6119     uint64_t aSig0, aSig1, bSig;
6120     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6121 
6122     *quotient = 0;
6123     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6124         float_raise(float_flag_invalid, status);
6125         return floatx80_default_nan(status);
6126     }
6127     aSig0 = extractFloatx80Frac( a );
6128     aExpOrig = aExp = extractFloatx80Exp( a );
6129     aSign = extractFloatx80Sign( a );
6130     bSig = extractFloatx80Frac( b );
6131     bExp = extractFloatx80Exp( b );
6132     if ( aExp == 0x7FFF ) {
6133         if (    (uint64_t) ( aSig0<<1 )
6134              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6135             return propagateFloatx80NaN(a, b, status);
6136         }
6137         goto invalid;
6138     }
6139     if ( bExp == 0x7FFF ) {
6140         if ((uint64_t)(bSig << 1)) {
6141             return propagateFloatx80NaN(a, b, status);
6142         }
6143         if (aExp == 0 && aSig0 >> 63) {
6144             /*
6145              * Pseudo-denormal argument must be returned in normalized
6146              * form.
6147              */
6148             return packFloatx80(aSign, 1, aSig0);
6149         }
6150         return a;
6151     }
6152     if ( bExp == 0 ) {
6153         if ( bSig == 0 ) {
6154  invalid:
6155             float_raise(float_flag_invalid, status);
6156             return floatx80_default_nan(status);
6157         }
6158         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6159     }
6160     if ( aExp == 0 ) {
6161         if ( aSig0 == 0 ) return a;
6162         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6163     }
6164     zSign = aSign;
6165     expDiff = aExp - bExp;
6166     aSig1 = 0;
6167     if ( expDiff < 0 ) {
6168         if ( mod || expDiff < -1 ) {
6169             if (aExp == 1 && aExpOrig == 0) {
6170                 /*
6171                  * Pseudo-denormal argument must be returned in
6172                  * normalized form.
6173                  */
6174                 return packFloatx80(aSign, aExp, aSig0);
6175             }
6176             return a;
6177         }
6178         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6179         expDiff = 0;
6180     }
6181     *quotient = q = ( bSig <= aSig0 );
6182     if ( q ) aSig0 -= bSig;
6183     expDiff -= 64;
6184     while ( 0 < expDiff ) {
6185         q = estimateDiv128To64( aSig0, aSig1, bSig );
6186         q = ( 2 < q ) ? q - 2 : 0;
6187         mul64To128( bSig, q, &term0, &term1 );
6188         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6189         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6190         expDiff -= 62;
6191         *quotient <<= 62;
6192         *quotient += q;
6193     }
6194     expDiff += 64;
6195     if ( 0 < expDiff ) {
6196         q = estimateDiv128To64( aSig0, aSig1, bSig );
6197         q = ( 2 < q ) ? q - 2 : 0;
6198         q >>= 64 - expDiff;
6199         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6200         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6201         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6202         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6203             ++q;
6204             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6205         }
6206         if (expDiff < 64) {
6207             *quotient <<= expDiff;
6208         } else {
6209             *quotient = 0;
6210         }
6211         *quotient += q;
6212     }
6213     else {
6214         term1 = 0;
6215         term0 = bSig;
6216     }
6217     if (!mod) {
6218         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6219         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6220                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6221                         && ( q & 1 ) )
6222             ) {
6223             aSig0 = alternateASig0;
6224             aSig1 = alternateASig1;
6225             zSign = ! zSign;
6226             ++*quotient;
6227         }
6228     }
6229     return
6230         normalizeRoundAndPackFloatx80(
6231             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6232 
6233 }
6234 
6235 /*----------------------------------------------------------------------------
6236 | Returns the remainder of the extended double-precision floating-point value
6237 | `a' with respect to the corresponding value `b'.  The operation is performed
6238 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6239 *----------------------------------------------------------------------------*/
6240 
6241 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6242 {
6243     uint64_t quotient;
6244     return floatx80_modrem(a, b, false, &quotient, status);
6245 }
6246 
6247 /*----------------------------------------------------------------------------
6248 | Returns the remainder of the extended double-precision floating-point value
6249 | `a' with respect to the corresponding value `b', with the quotient truncated
6250 | toward zero.
6251 *----------------------------------------------------------------------------*/
6252 
6253 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6254 {
6255     uint64_t quotient;
6256     return floatx80_modrem(a, b, true, &quotient, status);
6257 }
6258 
6259 /*----------------------------------------------------------------------------
6260 | Returns the square root of the extended double-precision floating-point
6261 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6262 | for Binary Floating-Point Arithmetic.
6263 *----------------------------------------------------------------------------*/
6264 
6265 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6266 {
6267     bool aSign;
6268     int32_t aExp, zExp;
6269     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6270     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6271 
6272     if (floatx80_invalid_encoding(a)) {
6273         float_raise(float_flag_invalid, status);
6274         return floatx80_default_nan(status);
6275     }
6276     aSig0 = extractFloatx80Frac( a );
6277     aExp = extractFloatx80Exp( a );
6278     aSign = extractFloatx80Sign( a );
6279     if ( aExp == 0x7FFF ) {
6280         if ((uint64_t)(aSig0 << 1)) {
6281             return propagateFloatx80NaN(a, a, status);
6282         }
6283         if ( ! aSign ) return a;
6284         goto invalid;
6285     }
6286     if ( aSign ) {
6287         if ( ( aExp | aSig0 ) == 0 ) return a;
6288  invalid:
6289         float_raise(float_flag_invalid, status);
6290         return floatx80_default_nan(status);
6291     }
6292     if ( aExp == 0 ) {
6293         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6294         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6295     }
6296     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6297     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6298     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6299     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6300     doubleZSig0 = zSig0<<1;
6301     mul64To128( zSig0, zSig0, &term0, &term1 );
6302     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6303     while ( (int64_t) rem0 < 0 ) {
6304         --zSig0;
6305         doubleZSig0 -= 2;
6306         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6307     }
6308     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6309     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6310         if ( zSig1 == 0 ) zSig1 = 1;
6311         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6312         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6313         mul64To128( zSig1, zSig1, &term2, &term3 );
6314         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6315         while ( (int64_t) rem1 < 0 ) {
6316             --zSig1;
6317             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6318             term3 |= 1;
6319             term2 |= doubleZSig0;
6320             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6321         }
6322         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6323     }
6324     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6325     zSig0 |= doubleZSig0;
6326     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6327                                 0, zExp, zSig0, zSig1, status);
6328 }
6329 
6330 /*----------------------------------------------------------------------------
6331 | Returns the result of converting the quadruple-precision floating-point
6332 | value `a' to the extended double-precision floating-point format.  The
6333 | conversion is performed according to the IEC/IEEE Standard for Binary
6334 | Floating-Point Arithmetic.
6335 *----------------------------------------------------------------------------*/
6336 
6337 floatx80 float128_to_floatx80(float128 a, float_status *status)
6338 {
6339     bool aSign;
6340     int32_t aExp;
6341     uint64_t aSig0, aSig1;
6342 
6343     aSig1 = extractFloat128Frac1( a );
6344     aSig0 = extractFloat128Frac0( a );
6345     aExp = extractFloat128Exp( a );
6346     aSign = extractFloat128Sign( a );
6347     if ( aExp == 0x7FFF ) {
6348         if ( aSig0 | aSig1 ) {
6349             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6350                                                status);
6351             return floatx80_silence_nan(res, status);
6352         }
6353         return packFloatx80(aSign, floatx80_infinity_high,
6354                                    floatx80_infinity_low);
6355     }
6356     if ( aExp == 0 ) {
6357         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6358         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6359     }
6360     else {
6361         aSig0 |= UINT64_C(0x0001000000000000);
6362     }
6363     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6364     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6365 
6366 }
6367 
6368 /*----------------------------------------------------------------------------
6369 | Returns the remainder of the quadruple-precision floating-point value `a'
6370 | with respect to the corresponding value `b'.  The operation is performed
6371 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6372 *----------------------------------------------------------------------------*/
6373 
6374 float128 float128_rem(float128 a, float128 b, float_status *status)
6375 {
6376     bool aSign, zSign;
6377     int32_t aExp, bExp, expDiff;
6378     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6379     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6380     int64_t sigMean0;
6381 
6382     aSig1 = extractFloat128Frac1( a );
6383     aSig0 = extractFloat128Frac0( a );
6384     aExp = extractFloat128Exp( a );
6385     aSign = extractFloat128Sign( a );
6386     bSig1 = extractFloat128Frac1( b );
6387     bSig0 = extractFloat128Frac0( b );
6388     bExp = extractFloat128Exp( b );
6389     if ( aExp == 0x7FFF ) {
6390         if (    ( aSig0 | aSig1 )
6391              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6392             return propagateFloat128NaN(a, b, status);
6393         }
6394         goto invalid;
6395     }
6396     if ( bExp == 0x7FFF ) {
6397         if (bSig0 | bSig1) {
6398             return propagateFloat128NaN(a, b, status);
6399         }
6400         return a;
6401     }
6402     if ( bExp == 0 ) {
6403         if ( ( bSig0 | bSig1 ) == 0 ) {
6404  invalid:
6405             float_raise(float_flag_invalid, status);
6406             return float128_default_nan(status);
6407         }
6408         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6409     }
6410     if ( aExp == 0 ) {
6411         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6412         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6413     }
6414     expDiff = aExp - bExp;
6415     if ( expDiff < -1 ) return a;
6416     shortShift128Left(
6417         aSig0 | UINT64_C(0x0001000000000000),
6418         aSig1,
6419         15 - ( expDiff < 0 ),
6420         &aSig0,
6421         &aSig1
6422     );
6423     shortShift128Left(
6424         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6425     q = le128( bSig0, bSig1, aSig0, aSig1 );
6426     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6427     expDiff -= 64;
6428     while ( 0 < expDiff ) {
6429         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6430         q = ( 4 < q ) ? q - 4 : 0;
6431         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6432         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6433         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6434         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6435         expDiff -= 61;
6436     }
6437     if ( -64 < expDiff ) {
6438         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6439         q = ( 4 < q ) ? q - 4 : 0;
6440         q >>= - expDiff;
6441         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6442         expDiff += 52;
6443         if ( expDiff < 0 ) {
6444             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6445         }
6446         else {
6447             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6448         }
6449         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6450         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6451     }
6452     else {
6453         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6454         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6455     }
6456     do {
6457         alternateASig0 = aSig0;
6458         alternateASig1 = aSig1;
6459         ++q;
6460         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6461     } while ( 0 <= (int64_t) aSig0 );
6462     add128(
6463         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6464     if (    ( sigMean0 < 0 )
6465          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6466         aSig0 = alternateASig0;
6467         aSig1 = alternateASig1;
6468     }
6469     zSign = ( (int64_t) aSig0 < 0 );
6470     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6471     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6472                                          status);
6473 }
6474 
6475 /*----------------------------------------------------------------------------
6476 | Returns the square root of the quadruple-precision floating-point value `a'.
6477 | The operation is performed according to the IEC/IEEE Standard for Binary
6478 | Floating-Point Arithmetic.
6479 *----------------------------------------------------------------------------*/
6480 
6481 float128 float128_sqrt(float128 a, float_status *status)
6482 {
6483     bool aSign;
6484     int32_t aExp, zExp;
6485     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6486     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6487 
6488     aSig1 = extractFloat128Frac1( a );
6489     aSig0 = extractFloat128Frac0( a );
6490     aExp = extractFloat128Exp( a );
6491     aSign = extractFloat128Sign( a );
6492     if ( aExp == 0x7FFF ) {
6493         if (aSig0 | aSig1) {
6494             return propagateFloat128NaN(a, a, status);
6495         }
6496         if ( ! aSign ) return a;
6497         goto invalid;
6498     }
6499     if ( aSign ) {
6500         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6501  invalid:
6502         float_raise(float_flag_invalid, status);
6503         return float128_default_nan(status);
6504     }
6505     if ( aExp == 0 ) {
6506         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6507         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6508     }
6509     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6510     aSig0 |= UINT64_C(0x0001000000000000);
6511     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6512     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6513     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6514     doubleZSig0 = zSig0<<1;
6515     mul64To128( zSig0, zSig0, &term0, &term1 );
6516     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6517     while ( (int64_t) rem0 < 0 ) {
6518         --zSig0;
6519         doubleZSig0 -= 2;
6520         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6521     }
6522     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6523     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6524         if ( zSig1 == 0 ) zSig1 = 1;
6525         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6526         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6527         mul64To128( zSig1, zSig1, &term2, &term3 );
6528         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6529         while ( (int64_t) rem1 < 0 ) {
6530             --zSig1;
6531             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6532             term3 |= 1;
6533             term2 |= doubleZSig0;
6534             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6535         }
6536         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6537     }
6538     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6539     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6540 
6541 }
6542 
6543 static inline FloatRelation
6544 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6545                           float_status *status)
6546 {
6547     bool aSign, bSign;
6548 
6549     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6550         float_raise(float_flag_invalid, status);
6551         return float_relation_unordered;
6552     }
6553     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6554           ( extractFloatx80Frac( a )<<1 ) ) ||
6555         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6556           ( extractFloatx80Frac( b )<<1 ) )) {
6557         if (!is_quiet ||
6558             floatx80_is_signaling_nan(a, status) ||
6559             floatx80_is_signaling_nan(b, status)) {
6560             float_raise(float_flag_invalid, status);
6561         }
6562         return float_relation_unordered;
6563     }
6564     aSign = extractFloatx80Sign( a );
6565     bSign = extractFloatx80Sign( b );
6566     if ( aSign != bSign ) {
6567 
6568         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6569              ( ( a.low | b.low ) == 0 ) ) {
6570             /* zero case */
6571             return float_relation_equal;
6572         } else {
6573             return 1 - (2 * aSign);
6574         }
6575     } else {
6576         /* Normalize pseudo-denormals before comparison.  */
6577         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6578             ++a.high;
6579         }
6580         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6581             ++b.high;
6582         }
6583         if (a.low == b.low && a.high == b.high) {
6584             return float_relation_equal;
6585         } else {
6586             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6587         }
6588     }
6589 }
6590 
6591 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6592 {
6593     return floatx80_compare_internal(a, b, 0, status);
6594 }
6595 
6596 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6597                                      float_status *status)
6598 {
6599     return floatx80_compare_internal(a, b, 1, status);
6600 }
6601 
6602 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6603 {
6604     bool aSign;
6605     int32_t aExp;
6606     uint64_t aSig;
6607 
6608     if (floatx80_invalid_encoding(a)) {
6609         float_raise(float_flag_invalid, status);
6610         return floatx80_default_nan(status);
6611     }
6612     aSig = extractFloatx80Frac( a );
6613     aExp = extractFloatx80Exp( a );
6614     aSign = extractFloatx80Sign( a );
6615 
6616     if ( aExp == 0x7FFF ) {
6617         if ( aSig<<1 ) {
6618             return propagateFloatx80NaN(a, a, status);
6619         }
6620         return a;
6621     }
6622 
6623     if (aExp == 0) {
6624         if (aSig == 0) {
6625             return a;
6626         }
6627         aExp++;
6628     }
6629 
6630     if (n > 0x10000) {
6631         n = 0x10000;
6632     } else if (n < -0x10000) {
6633         n = -0x10000;
6634     }
6635 
6636     aExp += n;
6637     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6638                                          aSign, aExp, aSig, 0, status);
6639 }
6640 
6641 float128 float128_scalbn(float128 a, int n, float_status *status)
6642 {
6643     bool aSign;
6644     int32_t aExp;
6645     uint64_t aSig0, aSig1;
6646 
6647     aSig1 = extractFloat128Frac1( a );
6648     aSig0 = extractFloat128Frac0( a );
6649     aExp = extractFloat128Exp( a );
6650     aSign = extractFloat128Sign( a );
6651     if ( aExp == 0x7FFF ) {
6652         if ( aSig0 | aSig1 ) {
6653             return propagateFloat128NaN(a, a, status);
6654         }
6655         return a;
6656     }
6657     if (aExp != 0) {
6658         aSig0 |= UINT64_C(0x0001000000000000);
6659     } else if (aSig0 == 0 && aSig1 == 0) {
6660         return a;
6661     } else {
6662         aExp++;
6663     }
6664 
6665     if (n > 0x10000) {
6666         n = 0x10000;
6667     } else if (n < -0x10000) {
6668         n = -0x10000;
6669     }
6670 
6671     aExp += n - 1;
6672     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6673                                          , status);
6674 
6675 }
6676 
6677 static void __attribute__((constructor)) softfloat_init(void)
6678 {
6679     union_float64 ua, ub, uc, ur;
6680 
6681     if (QEMU_NO_HARDFLOAT) {
6682         return;
6683     }
6684     /*
6685      * Test that the host's FMA is not obviously broken. For example,
6686      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6687      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6688      */
6689     ua.s = 0x0020000000000001ULL;
6690     ub.s = 0x3ca0000000000000ULL;
6691     uc.s = 0x0020000000000000ULL;
6692     ur.h = fma(ua.h, ub.h, uc.h);
6693     if (ur.s != 0x0020000000000001ULL) {
6694         force_soft_fma = true;
6695     }
6696 }
6697