xref: /openbmc/qemu/fpu/softfloat.c (revision 762c280d)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  */
521 typedef struct {
522     int exp_size;
523     int exp_bias;
524     int exp_max;
525     int frac_size;
526     int frac_shift;
527     bool arm_althp;
528     uint64_t round_mask;
529 } FloatFmt;
530 
531 /* Expand fields based on the size of exponent and fraction */
532 #define FLOAT_PARAMS_(E)                                \
533     .exp_size       = E,                                \
534     .exp_bias       = ((1 << E) - 1) >> 1,              \
535     .exp_max        = (1 << E) - 1
536 
537 #define FLOAT_PARAMS(E, F)                              \
538     FLOAT_PARAMS_(E),                                   \
539     .frac_size      = F,                                \
540     .frac_shift     = (-F - 1) & 63,                    \
541     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
542 
543 static const FloatFmt float16_params = {
544     FLOAT_PARAMS(5, 10)
545 };
546 
547 static const FloatFmt float16_params_ahp = {
548     FLOAT_PARAMS(5, 10),
549     .arm_althp = true
550 };
551 
552 static const FloatFmt bfloat16_params = {
553     FLOAT_PARAMS(8, 7)
554 };
555 
556 static const FloatFmt float32_params = {
557     FLOAT_PARAMS(8, 23)
558 };
559 
560 static const FloatFmt float64_params = {
561     FLOAT_PARAMS(11, 52)
562 };
563 
564 static const FloatFmt float128_params = {
565     FLOAT_PARAMS(15, 112)
566 };
567 
568 #define FLOATX80_PARAMS(R)              \
569     FLOAT_PARAMS_(15),                  \
570     .frac_size = R == 64 ? 63 : R,      \
571     .frac_shift = 0,                    \
572     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
573 
574 static const FloatFmt floatx80_params[3] = {
575     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
576     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
577     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
578 };
579 
580 /* Unpack a float to parts, but do not canonicalize.  */
581 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
582 {
583     const int f_size = fmt->frac_size;
584     const int e_size = fmt->exp_size;
585 
586     *r = (FloatParts64) {
587         .cls = float_class_unclassified,
588         .sign = extract64(raw, f_size + e_size, 1),
589         .exp = extract64(raw, f_size, e_size),
590         .frac = extract64(raw, 0, f_size)
591     };
592 }
593 
594 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
595 {
596     unpack_raw64(p, &float16_params, f);
597 }
598 
599 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
600 {
601     unpack_raw64(p, &bfloat16_params, f);
602 }
603 
604 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
605 {
606     unpack_raw64(p, &float32_params, f);
607 }
608 
609 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
610 {
611     unpack_raw64(p, &float64_params, f);
612 }
613 
614 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
615 {
616     *p = (FloatParts128) {
617         .cls = float_class_unclassified,
618         .sign = extract32(f.high, 15, 1),
619         .exp = extract32(f.high, 0, 15),
620         .frac_hi = f.low
621     };
622 }
623 
624 static void float128_unpack_raw(FloatParts128 *p, float128 f)
625 {
626     const int f_size = float128_params.frac_size - 64;
627     const int e_size = float128_params.exp_size;
628 
629     *p = (FloatParts128) {
630         .cls = float_class_unclassified,
631         .sign = extract64(f.high, f_size + e_size, 1),
632         .exp = extract64(f.high, f_size, e_size),
633         .frac_hi = extract64(f.high, 0, f_size),
634         .frac_lo = f.low,
635     };
636 }
637 
638 /* Pack a float from parts, but do not canonicalize.  */
639 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
640 {
641     const int f_size = fmt->frac_size;
642     const int e_size = fmt->exp_size;
643     uint64_t ret;
644 
645     ret = (uint64_t)p->sign << (f_size + e_size);
646     ret = deposit64(ret, f_size, e_size, p->exp);
647     ret = deposit64(ret, 0, f_size, p->frac);
648     return ret;
649 }
650 
651 static inline float16 float16_pack_raw(const FloatParts64 *p)
652 {
653     return make_float16(pack_raw64(p, &float16_params));
654 }
655 
656 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
657 {
658     return pack_raw64(p, &bfloat16_params);
659 }
660 
661 static inline float32 float32_pack_raw(const FloatParts64 *p)
662 {
663     return make_float32(pack_raw64(p, &float32_params));
664 }
665 
666 static inline float64 float64_pack_raw(const FloatParts64 *p)
667 {
668     return make_float64(pack_raw64(p, &float64_params));
669 }
670 
671 static float128 float128_pack_raw(const FloatParts128 *p)
672 {
673     const int f_size = float128_params.frac_size - 64;
674     const int e_size = float128_params.exp_size;
675     uint64_t hi;
676 
677     hi = (uint64_t)p->sign << (f_size + e_size);
678     hi = deposit64(hi, f_size, e_size, p->exp);
679     hi = deposit64(hi, 0, f_size, p->frac_hi);
680     return make_float128(hi, p->frac_lo);
681 }
682 
683 /*----------------------------------------------------------------------------
684 | Functions and definitions to determine:  (1) whether tininess for underflow
685 | is detected before or after rounding by default, (2) what (if anything)
686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
688 | are propagated from function inputs to output.  These details are target-
689 | specific.
690 *----------------------------------------------------------------------------*/
691 #include "softfloat-specialize.c.inc"
692 
693 #define PARTS_GENERIC_64_128(NAME, P) \
694     _Generic((P), FloatParts64 *: parts64_##NAME, \
695                   FloatParts128 *: parts128_##NAME)
696 
697 #define PARTS_GENERIC_64_128_256(NAME, P) \
698     _Generic((P), FloatParts64 *: parts64_##NAME, \
699                   FloatParts128 *: parts128_##NAME, \
700                   FloatParts256 *: parts256_##NAME)
701 
702 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
703 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
704 
705 static void parts64_return_nan(FloatParts64 *a, float_status *s);
706 static void parts128_return_nan(FloatParts128 *a, float_status *s);
707 
708 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
709 
710 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
711                                       float_status *s);
712 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
713                                         float_status *s);
714 
715 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
716 
717 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
718                                              FloatParts64 *c, float_status *s,
719                                              int ab_mask, int abc_mask);
720 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
721                                                FloatParts128 *b,
722                                                FloatParts128 *c,
723                                                float_status *s,
724                                                int ab_mask, int abc_mask);
725 
726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
727     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
728 
729 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
730                                  const FloatFmt *fmt);
731 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
732                                   const FloatFmt *fmt);
733 
734 #define parts_canonicalize(A, S, F) \
735     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
736 
737 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
738                                    const FloatFmt *fmt);
739 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
740                                     const FloatFmt *fmt);
741 
742 #define parts_uncanon_normal(A, S, F) \
743     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
744 
745 static void parts64_uncanon(FloatParts64 *p, float_status *status,
746                             const FloatFmt *fmt);
747 static void parts128_uncanon(FloatParts128 *p, float_status *status,
748                              const FloatFmt *fmt);
749 
750 #define parts_uncanon(A, S, F) \
751     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
752 
753 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
754 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
755 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
756 
757 #define parts_add_normal(A, B) \
758     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
759 
760 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
761 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
762 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
763 
764 #define parts_sub_normal(A, B) \
765     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
766 
767 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
768                                     float_status *s, bool subtract);
769 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
770                                       float_status *s, bool subtract);
771 
772 #define parts_addsub(A, B, S, Z) \
773     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
774 
775 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
776                                  float_status *s);
777 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
778                                    float_status *s);
779 
780 #define parts_mul(A, B, S) \
781     PARTS_GENERIC_64_128(mul, A)(A, B, S)
782 
783 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
784                                     FloatParts64 *c, int flags,
785                                     float_status *s);
786 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
787                                       FloatParts128 *c, int flags,
788                                       float_status *s);
789 
790 #define parts_muladd(A, B, C, Z, S) \
791     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
792 
793 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
794                                  float_status *s);
795 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
796                                    float_status *s);
797 
798 #define parts_div(A, B, S) \
799     PARTS_GENERIC_64_128(div, A)(A, B, S)
800 
801 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
802                                     uint64_t *mod_quot, float_status *s);
803 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
804                                       uint64_t *mod_quot, float_status *s);
805 
806 #define parts_modrem(A, B, Q, S) \
807     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
808 
809 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
810 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
811 
812 #define parts_sqrt(A, S, F) \
813     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
814 
815 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
816                                         int scale, int frac_size);
817 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
818                                          int scale, int frac_size);
819 
820 #define parts_round_to_int_normal(A, R, C, F) \
821     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
822 
823 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
824                                  int scale, float_status *s,
825                                  const FloatFmt *fmt);
826 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
827                                   int scale, float_status *s,
828                                   const FloatFmt *fmt);
829 
830 #define parts_round_to_int(A, R, C, S, F) \
831     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
832 
833 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
834                                      int scale, int64_t min, int64_t max,
835                                      float_status *s);
836 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
837                                      int scale, int64_t min, int64_t max,
838                                      float_status *s);
839 
840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
841     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
842 
843 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
844                                       int scale, uint64_t max,
845                                       float_status *s);
846 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
847                                        int scale, uint64_t max,
848                                        float_status *s);
849 
850 #define parts_float_to_uint(P, R, Z, M, S) \
851     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
852 
853 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
854                                   int scale, float_status *s);
855 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
856                                    int scale, float_status *s);
857 
858 #define parts_sint_to_float(P, I, Z, S) \
859     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
860 
861 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
862                                   int scale, float_status *s);
863 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
864                                    int scale, float_status *s);
865 
866 #define parts_uint_to_float(P, I, Z, S) \
867     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
868 
869 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
870                                     float_status *s, int flags);
871 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
872                                       float_status *s, int flags);
873 
874 #define parts_minmax(A, B, S, F) \
875     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
876 
877 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
878                            float_status *s, bool q);
879 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
880                             float_status *s, bool q);
881 
882 #define parts_compare(A, B, S, Q) \
883     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
884 
885 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
886 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
887 
888 #define parts_scalbn(A, N, S) \
889     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
890 
891 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
892 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
893 
894 #define parts_log2(A, S, F) \
895     PARTS_GENERIC_64_128(log2, A)(A, S, F)
896 
897 /*
898  * Helper functions for softfloat-parts.c.inc, per-size operations.
899  */
900 
901 #define FRAC_GENERIC_64_128(NAME, P) \
902     _Generic((P), FloatParts64 *: frac64_##NAME, \
903                   FloatParts128 *: frac128_##NAME)
904 
905 #define FRAC_GENERIC_64_128_256(NAME, P) \
906     _Generic((P), FloatParts64 *: frac64_##NAME, \
907                   FloatParts128 *: frac128_##NAME, \
908                   FloatParts256 *: frac256_##NAME)
909 
910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
911 {
912     return uadd64_overflow(a->frac, b->frac, &r->frac);
913 }
914 
915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
916 {
917     bool c = 0;
918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
920     return c;
921 }
922 
923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
924 {
925     bool c = 0;
926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
930     return c;
931 }
932 
933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
934 
935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
936 {
937     return uadd64_overflow(a->frac, c, &r->frac);
938 }
939 
940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
941 {
942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
944 }
945 
946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
947 
948 static void frac64_allones(FloatParts64 *a)
949 {
950     a->frac = -1;
951 }
952 
953 static void frac128_allones(FloatParts128 *a)
954 {
955     a->frac_hi = a->frac_lo = -1;
956 }
957 
958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
959 
960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
961 {
962     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
963 }
964 
965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
966 {
967     uint64_t ta = a->frac_hi, tb = b->frac_hi;
968     if (ta == tb) {
969         ta = a->frac_lo, tb = b->frac_lo;
970         if (ta == tb) {
971             return 0;
972         }
973     }
974     return ta < tb ? -1 : 1;
975 }
976 
977 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
978 
979 static void frac64_clear(FloatParts64 *a)
980 {
981     a->frac = 0;
982 }
983 
984 static void frac128_clear(FloatParts128 *a)
985 {
986     a->frac_hi = a->frac_lo = 0;
987 }
988 
989 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
990 
991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
992 {
993     uint64_t n1, n0, r, q;
994     bool ret;
995 
996     /*
997      * We want a 2*N / N-bit division to produce exactly an N-bit
998      * result, so that we do not lose any precision and so that we
999      * do not have to renormalize afterward.  If A.frac < B.frac,
1000      * then division would produce an (N-1)-bit result; shift A left
1001      * by one to produce the an N-bit result, and return true to
1002      * decrement the exponent to match.
1003      *
1004      * The udiv_qrnnd algorithm that we're using requires normalization,
1005      * i.e. the msb of the denominator must be set, which is already true.
1006      */
1007     ret = a->frac < b->frac;
1008     if (ret) {
1009         n0 = a->frac;
1010         n1 = 0;
1011     } else {
1012         n0 = a->frac >> 1;
1013         n1 = a->frac << 63;
1014     }
1015     q = udiv_qrnnd(&r, n0, n1, b->frac);
1016 
1017     /* Set lsb if there is a remainder, to set inexact. */
1018     a->frac = q | (r != 0);
1019 
1020     return ret;
1021 }
1022 
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025     uint64_t q0, q1, a0, a1, b0, b1;
1026     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027     bool ret = false;
1028 
1029     a0 = a->frac_hi, a1 = a->frac_lo;
1030     b0 = b->frac_hi, b1 = b->frac_lo;
1031 
1032     ret = lt128(a0, a1, b0, b1);
1033     if (!ret) {
1034         a1 = shr_double(a0, a1, 1);
1035         a0 = a0 >> 1;
1036     }
1037 
1038     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039     q0 = estimateDiv128To64(a0, a1, b0);
1040 
1041     /*
1042      * Estimate is high because B1 was not included (unless B1 == 0).
1043      * Reduce quotient and increase remainder until remainder is non-negative.
1044      * This loop will execute 0 to 2 times.
1045      */
1046     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048     while (r0 != 0) {
1049         q0--;
1050         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051     }
1052 
1053     /* Repeat using the remainder, producing a second word of quotient. */
1054     q1 = estimateDiv128To64(r1, r2, b0);
1055     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057     while (r1 != 0) {
1058         q1--;
1059         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060     }
1061 
1062     /* Any remainder indicates inexact; set sticky bit. */
1063     q1 |= (r2 | r3) != 0;
1064 
1065     a->frac_hi = q0;
1066     a->frac_lo = q1;
1067     return ret;
1068 }
1069 
1070 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071 
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074     return a->frac == 0;
1075 }
1076 
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079     return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081 
1082 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083 
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088 
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094 
1095 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096 
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099     a->frac = -a->frac;
1100 }
1101 
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104     bool c = 0;
1105     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108 
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111     bool c = 0;
1112     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117 
1118 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119 
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122     if (a->frac) {
1123         int shift = clz64(a->frac);
1124         a->frac <<= shift;
1125         return shift;
1126     }
1127     return 64;
1128 }
1129 
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132     if (a->frac_hi) {
1133         int shl = clz64(a->frac_hi);
1134         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135         a->frac_lo <<= shl;
1136         return shl;
1137     } else if (a->frac_lo) {
1138         int shl = clz64(a->frac_lo);
1139         a->frac_hi = a->frac_lo << shl;
1140         a->frac_lo = 0;
1141         return shl + 64;
1142     }
1143     return 128;
1144 }
1145 
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150     int ret, shl;
1151 
1152     if (likely(a0)) {
1153         shl = clz64(a0);
1154         if (shl == 0) {
1155             return 0;
1156         }
1157         ret = shl;
1158     } else {
1159         if (a1) {
1160             ret = 64;
1161             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162         } else if (a2) {
1163             ret = 128;
1164             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165         } else if (a3) {
1166             ret = 192;
1167             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168         } else {
1169             ret = 256;
1170             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171             goto done;
1172         }
1173         shl = clz64(a0);
1174         if (shl == 0) {
1175             goto done;
1176         }
1177         ret += shl;
1178     }
1179 
1180     a0 = shl_double(a0, a1, shl);
1181     a1 = shl_double(a1, a2, shl);
1182     a2 = shl_double(a2, a3, shl);
1183     a3 <<= shl;
1184 
1185  done:
1186     a->frac_hi = a0;
1187     a->frac_hm = a1;
1188     a->frac_lm = a2;
1189     a->frac_lo = a3;
1190     return ret;
1191 }
1192 
1193 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194 
1195 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196 {
1197     uint64_t a0, a1, b0, t0, t1, q, quot;
1198     int exp_diff = a->exp - b->exp;
1199     int shift;
1200 
1201     a0 = a->frac;
1202     a1 = 0;
1203 
1204     if (exp_diff < -1) {
1205         if (mod_quot) {
1206             *mod_quot = 0;
1207         }
1208         return;
1209     }
1210     if (exp_diff == -1) {
1211         a0 >>= 1;
1212         exp_diff = 0;
1213     }
1214 
1215     b0 = b->frac;
1216     quot = q = b0 <= a0;
1217     if (q) {
1218         a0 -= b0;
1219     }
1220 
1221     exp_diff -= 64;
1222     while (exp_diff > 0) {
1223         q = estimateDiv128To64(a0, a1, b0);
1224         q = q > 2 ? q - 2 : 0;
1225         mul64To128(b0, q, &t0, &t1);
1226         sub128(a0, a1, t0, t1, &a0, &a1);
1227         shortShift128Left(a0, a1, 62, &a0, &a1);
1228         exp_diff -= 62;
1229         quot = (quot << 62) + q;
1230     }
1231 
1232     exp_diff += 64;
1233     if (exp_diff > 0) {
1234         q = estimateDiv128To64(a0, a1, b0);
1235         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237         sub128(a0, a1, t0, t1, &a0, &a1);
1238         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239         while (le128(t0, t1, a0, a1)) {
1240             ++q;
1241             sub128(a0, a1, t0, t1, &a0, &a1);
1242         }
1243         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244     } else {
1245         t0 = b0;
1246         t1 = 0;
1247     }
1248 
1249     if (mod_quot) {
1250         *mod_quot = quot;
1251     } else {
1252         sub128(t0, t1, a0, a1, &t0, &t1);
1253         if (lt128(t0, t1, a0, a1) ||
1254             (eq128(t0, t1, a0, a1) && (q & 1))) {
1255             a0 = t0;
1256             a1 = t1;
1257             a->sign = !a->sign;
1258         }
1259     }
1260 
1261     if (likely(a0)) {
1262         shift = clz64(a0);
1263         shortShift128Left(a0, a1, shift, &a0, &a1);
1264     } else if (likely(a1)) {
1265         shift = clz64(a1);
1266         a0 = a1 << shift;
1267         a1 = 0;
1268         shift += 64;
1269     } else {
1270         a->cls = float_class_zero;
1271         return;
1272     }
1273 
1274     a->exp = b->exp + exp_diff - shift;
1275     a->frac = a0 | (a1 != 0);
1276 }
1277 
1278 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                            uint64_t *mod_quot)
1280 {
1281     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282     int exp_diff = a->exp - b->exp;
1283     int shift;
1284 
1285     a0 = a->frac_hi;
1286     a1 = a->frac_lo;
1287     a2 = 0;
1288 
1289     if (exp_diff < -1) {
1290         if (mod_quot) {
1291             *mod_quot = 0;
1292         }
1293         return;
1294     }
1295     if (exp_diff == -1) {
1296         shift128Right(a0, a1, 1, &a0, &a1);
1297         exp_diff = 0;
1298     }
1299 
1300     b0 = b->frac_hi;
1301     b1 = b->frac_lo;
1302 
1303     quot = q = le128(b0, b1, a0, a1);
1304     if (q) {
1305         sub128(a0, a1, b0, b1, &a0, &a1);
1306     }
1307 
1308     exp_diff -= 64;
1309     while (exp_diff > 0) {
1310         q = estimateDiv128To64(a0, a1, b0);
1311         q = q > 4 ? q - 4 : 0;
1312         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315         exp_diff -= 61;
1316         quot = (quot << 61) + q;
1317     }
1318 
1319     exp_diff += 64;
1320     if (exp_diff > 0) {
1321         q = estimateDiv128To64(a0, a1, b0);
1322         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326         while (le192(t0, t1, t2, a0, a1, a2)) {
1327             ++q;
1328             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         }
1330         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331     } else {
1332         t0 = b0;
1333         t1 = b1;
1334         t2 = 0;
1335     }
1336 
1337     if (mod_quot) {
1338         *mod_quot = quot;
1339     } else {
1340         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341         if (lt192(t0, t1, t2, a0, a1, a2) ||
1342             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343             a0 = t0;
1344             a1 = t1;
1345             a2 = t2;
1346             a->sign = !a->sign;
1347         }
1348     }
1349 
1350     if (likely(a0)) {
1351         shift = clz64(a0);
1352         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353     } else if (likely(a1)) {
1354         shift = clz64(a1);
1355         shortShift128Left(a1, a2, shift, &a0, &a1);
1356         a2 = 0;
1357         shift += 64;
1358     } else if (likely(a2)) {
1359         shift = clz64(a2);
1360         a0 = a2 << shift;
1361         a1 = a2 = 0;
1362         shift += 128;
1363     } else {
1364         a->cls = float_class_zero;
1365         return;
1366     }
1367 
1368     a->exp = b->exp + exp_diff - shift;
1369     a->frac_hi = a0;
1370     a->frac_lo = a1 | (a2 != 0);
1371 }
1372 
1373 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374 
1375 static void frac64_shl(FloatParts64 *a, int c)
1376 {
1377     a->frac <<= c;
1378 }
1379 
1380 static void frac128_shl(FloatParts128 *a, int c)
1381 {
1382     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383 
1384     if (c & 64) {
1385         a0 = a1, a1 = 0;
1386     }
1387 
1388     c &= 63;
1389     if (c) {
1390         a0 = shl_double(a0, a1, c);
1391         a1 = a1 << c;
1392     }
1393 
1394     a->frac_hi = a0;
1395     a->frac_lo = a1;
1396 }
1397 
1398 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399 
1400 static void frac64_shr(FloatParts64 *a, int c)
1401 {
1402     a->frac >>= c;
1403 }
1404 
1405 static void frac128_shr(FloatParts128 *a, int c)
1406 {
1407     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408 
1409     if (c & 64) {
1410         a1 = a0, a0 = 0;
1411     }
1412 
1413     c &= 63;
1414     if (c) {
1415         a1 = shr_double(a0, a1, c);
1416         a0 = a0 >> c;
1417     }
1418 
1419     a->frac_hi = a0;
1420     a->frac_lo = a1;
1421 }
1422 
1423 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424 
1425 static void frac64_shrjam(FloatParts64 *a, int c)
1426 {
1427     uint64_t a0 = a->frac;
1428 
1429     if (likely(c != 0)) {
1430         if (likely(c < 64)) {
1431             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432         } else {
1433             a0 = a0 != 0;
1434         }
1435         a->frac = a0;
1436     }
1437 }
1438 
1439 static void frac128_shrjam(FloatParts128 *a, int c)
1440 {
1441     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442     uint64_t sticky = 0;
1443 
1444     if (unlikely(c == 0)) {
1445         return;
1446     } else if (likely(c < 64)) {
1447         /* nothing */
1448     } else if (likely(c < 128)) {
1449         sticky = a1;
1450         a1 = a0;
1451         a0 = 0;
1452         c &= 63;
1453         if (c == 0) {
1454             goto done;
1455         }
1456     } else {
1457         sticky = a0 | a1;
1458         a0 = a1 = 0;
1459         goto done;
1460     }
1461 
1462     sticky |= shr_double(a1, 0, c);
1463     a1 = shr_double(a0, a1, c);
1464     a0 = a0 >> c;
1465 
1466  done:
1467     a->frac_lo = a1 | (sticky != 0);
1468     a->frac_hi = a0;
1469 }
1470 
1471 static void frac256_shrjam(FloatParts256 *a, int c)
1472 {
1473     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475     uint64_t sticky = 0;
1476 
1477     if (unlikely(c == 0)) {
1478         return;
1479     } else if (likely(c < 64)) {
1480         /* nothing */
1481     } else if (likely(c < 256)) {
1482         if (unlikely(c & 128)) {
1483             sticky |= a2 | a3;
1484             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485         }
1486         if (unlikely(c & 64)) {
1487             sticky |= a3;
1488             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489         }
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1 | a2 | a3;
1496         a0 = a1 = a2 = a3 = 0;
1497         goto done;
1498     }
1499 
1500     sticky |= shr_double(a3, 0, c);
1501     a3 = shr_double(a2, a3, c);
1502     a2 = shr_double(a1, a2, c);
1503     a1 = shr_double(a0, a1, c);
1504     a0 = a0 >> c;
1505 
1506  done:
1507     a->frac_lo = a3 | (sticky != 0);
1508     a->frac_lm = a2;
1509     a->frac_hm = a1;
1510     a->frac_hi = a0;
1511 }
1512 
1513 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514 
1515 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516 {
1517     return usub64_overflow(a->frac, b->frac, &r->frac);
1518 }
1519 
1520 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521 {
1522     bool c = 0;
1523     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525     return c;
1526 }
1527 
1528 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529 {
1530     bool c = 0;
1531     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535     return c;
1536 }
1537 
1538 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539 
1540 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541 {
1542     r->frac = a->frac_hi | (a->frac_lo != 0);
1543 }
1544 
1545 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546 {
1547     r->frac_hi = a->frac_hi;
1548     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549 }
1550 
1551 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552 
1553 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554 {
1555     r->frac_hi = a->frac;
1556     r->frac_lo = 0;
1557 }
1558 
1559 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560 {
1561     r->frac_hi = a->frac_hi;
1562     r->frac_hm = a->frac_lo;
1563     r->frac_lm = 0;
1564     r->frac_lo = 0;
1565 }
1566 
1567 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568 
1569 /*
1570  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572  * and thus MIT licenced.
1573  */
1574 static const uint16_t rsqrt_tab[128] = {
1575     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591 };
1592 
1593 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594 #define FloatPartsN    glue(FloatParts,N)
1595 #define FloatPartsW    glue(FloatParts,W)
1596 
1597 #define N 64
1598 #define W 128
1599 
1600 #include "softfloat-parts-addsub.c.inc"
1601 #include "softfloat-parts.c.inc"
1602 
1603 #undef  N
1604 #undef  W
1605 #define N 128
1606 #define W 256
1607 
1608 #include "softfloat-parts-addsub.c.inc"
1609 #include "softfloat-parts.c.inc"
1610 
1611 #undef  N
1612 #undef  W
1613 #define N            256
1614 
1615 #include "softfloat-parts-addsub.c.inc"
1616 
1617 #undef  N
1618 #undef  W
1619 #undef  partsN
1620 #undef  FloatPartsN
1621 #undef  FloatPartsW
1622 
1623 /*
1624  * Pack/unpack routines with a specific FloatFmt.
1625  */
1626 
1627 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                       float_status *s, const FloatFmt *params)
1629 {
1630     float16_unpack_raw(p, f);
1631     parts_canonicalize(p, s, params);
1632 }
1633 
1634 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                      float_status *s)
1636 {
1637     float16a_unpack_canonical(p, f, s, &float16_params);
1638 }
1639 
1640 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                       float_status *s)
1642 {
1643     bfloat16_unpack_raw(p, f);
1644     parts_canonicalize(p, s, &bfloat16_params);
1645 }
1646 
1647 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                              float_status *s,
1649                                              const FloatFmt *params)
1650 {
1651     parts_uncanon(p, s, params);
1652     return float16_pack_raw(p);
1653 }
1654 
1655 static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                             float_status *s)
1657 {
1658     return float16a_round_pack_canonical(p, s, &float16_params);
1659 }
1660 
1661 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                               float_status *s)
1663 {
1664     parts_uncanon(p, s, &bfloat16_params);
1665     return bfloat16_pack_raw(p);
1666 }
1667 
1668 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                      float_status *s)
1670 {
1671     float32_unpack_raw(p, f);
1672     parts_canonicalize(p, s, &float32_params);
1673 }
1674 
1675 static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                             float_status *s)
1677 {
1678     parts_uncanon(p, s, &float32_params);
1679     return float32_pack_raw(p);
1680 }
1681 
1682 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                      float_status *s)
1684 {
1685     float64_unpack_raw(p, f);
1686     parts_canonicalize(p, s, &float64_params);
1687 }
1688 
1689 static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                             float_status *s)
1691 {
1692     parts_uncanon(p, s, &float64_params);
1693     return float64_pack_raw(p);
1694 }
1695 
1696 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1697                                                float_status *s)
1698 {
1699     parts_uncanon(p, s, &float32_params);
1700 
1701     /*
1702      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1703      * We need to adjust the fraction higher so that the least N bits are
1704      * zero, and the fraction is adjacent to the float64 implicit bit.
1705      */
1706     switch (p->cls) {
1707     case float_class_normal:
1708         if (unlikely(p->exp == 0)) {
1709             /*
1710              * The result is denormal for float32, but can be represented
1711              * in normalized form for float64.  Adjust, per canonicalize.
1712              */
1713             int shift = frac_normalize(p);
1714             p->exp = (float32_params.frac_shift -
1715                       float32_params.exp_bias - shift + 1 +
1716                       float64_params.exp_bias);
1717             frac_shr(p, float64_params.frac_shift);
1718         } else {
1719             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1720             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1721         }
1722         break;
1723     case float_class_snan:
1724     case float_class_qnan:
1725         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1726         p->exp = float64_params.exp_max;
1727         break;
1728     case float_class_inf:
1729         p->exp = float64_params.exp_max;
1730         break;
1731     case float_class_zero:
1732         break;
1733     default:
1734         g_assert_not_reached();
1735     }
1736 
1737     return float64_pack_raw(p);
1738 }
1739 
1740 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1741                                       float_status *s)
1742 {
1743     float128_unpack_raw(p, f);
1744     parts_canonicalize(p, s, &float128_params);
1745 }
1746 
1747 static float128 float128_round_pack_canonical(FloatParts128 *p,
1748                                               float_status *s)
1749 {
1750     parts_uncanon(p, s, &float128_params);
1751     return float128_pack_raw(p);
1752 }
1753 
1754 /* Returns false if the encoding is invalid. */
1755 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1756                                       float_status *s)
1757 {
1758     /* Ensure rounding precision is set before beginning. */
1759     switch (s->floatx80_rounding_precision) {
1760     case floatx80_precision_x:
1761     case floatx80_precision_d:
1762     case floatx80_precision_s:
1763         break;
1764     default:
1765         g_assert_not_reached();
1766     }
1767 
1768     if (unlikely(floatx80_invalid_encoding(f))) {
1769         float_raise(float_flag_invalid, s);
1770         return false;
1771     }
1772 
1773     floatx80_unpack_raw(p, f);
1774 
1775     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1776         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1777     } else {
1778         /* The explicit integer bit is ignored, after invalid checks. */
1779         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1780         p->cls = (p->frac_hi == 0 ? float_class_inf
1781                   : parts_is_snan_frac(p->frac_hi, s)
1782                   ? float_class_snan : float_class_qnan);
1783     }
1784     return true;
1785 }
1786 
1787 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1788                                               float_status *s)
1789 {
1790     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1791     uint64_t frac;
1792     int exp;
1793 
1794     switch (p->cls) {
1795     case float_class_normal:
1796         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1797             parts_uncanon_normal(p, s, fmt);
1798             frac = p->frac_hi;
1799             exp = p->exp;
1800         } else {
1801             FloatParts64 p64;
1802 
1803             p64.sign = p->sign;
1804             p64.exp = p->exp;
1805             frac_truncjam(&p64, p);
1806             parts_uncanon_normal(&p64, s, fmt);
1807             frac = p64.frac;
1808             exp = p64.exp;
1809         }
1810         if (exp != fmt->exp_max) {
1811             break;
1812         }
1813         /* rounded to inf -- fall through to set frac correctly */
1814 
1815     case float_class_inf:
1816         /* x86 and m68k differ in the setting of the integer bit. */
1817         frac = floatx80_infinity_low;
1818         exp = fmt->exp_max;
1819         break;
1820 
1821     case float_class_zero:
1822         frac = 0;
1823         exp = 0;
1824         break;
1825 
1826     case float_class_snan:
1827     case float_class_qnan:
1828         /* NaNs have the integer bit set. */
1829         frac = p->frac_hi | (1ull << 63);
1830         exp = fmt->exp_max;
1831         break;
1832 
1833     default:
1834         g_assert_not_reached();
1835     }
1836 
1837     return packFloatx80(p->sign, exp, frac);
1838 }
1839 
1840 /*
1841  * Addition and subtraction
1842  */
1843 
1844 static float16 QEMU_FLATTEN
1845 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1846 {
1847     FloatParts64 pa, pb, *pr;
1848 
1849     float16_unpack_canonical(&pa, a, status);
1850     float16_unpack_canonical(&pb, b, status);
1851     pr = parts_addsub(&pa, &pb, status, subtract);
1852 
1853     return float16_round_pack_canonical(pr, status);
1854 }
1855 
1856 float16 float16_add(float16 a, float16 b, float_status *status)
1857 {
1858     return float16_addsub(a, b, status, false);
1859 }
1860 
1861 float16 float16_sub(float16 a, float16 b, float_status *status)
1862 {
1863     return float16_addsub(a, b, status, true);
1864 }
1865 
1866 static float32 QEMU_SOFTFLOAT_ATTR
1867 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1868 {
1869     FloatParts64 pa, pb, *pr;
1870 
1871     float32_unpack_canonical(&pa, a, status);
1872     float32_unpack_canonical(&pb, b, status);
1873     pr = parts_addsub(&pa, &pb, status, subtract);
1874 
1875     return float32_round_pack_canonical(pr, status);
1876 }
1877 
1878 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1879 {
1880     return soft_f32_addsub(a, b, status, false);
1881 }
1882 
1883 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1884 {
1885     return soft_f32_addsub(a, b, status, true);
1886 }
1887 
1888 static float64 QEMU_SOFTFLOAT_ATTR
1889 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1890 {
1891     FloatParts64 pa, pb, *pr;
1892 
1893     float64_unpack_canonical(&pa, a, status);
1894     float64_unpack_canonical(&pb, b, status);
1895     pr = parts_addsub(&pa, &pb, status, subtract);
1896 
1897     return float64_round_pack_canonical(pr, status);
1898 }
1899 
1900 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1901 {
1902     return soft_f64_addsub(a, b, status, false);
1903 }
1904 
1905 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1906 {
1907     return soft_f64_addsub(a, b, status, true);
1908 }
1909 
1910 static float hard_f32_add(float a, float b)
1911 {
1912     return a + b;
1913 }
1914 
1915 static float hard_f32_sub(float a, float b)
1916 {
1917     return a - b;
1918 }
1919 
1920 static double hard_f64_add(double a, double b)
1921 {
1922     return a + b;
1923 }
1924 
1925 static double hard_f64_sub(double a, double b)
1926 {
1927     return a - b;
1928 }
1929 
1930 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1931 {
1932     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1933         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1934     }
1935     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1936 }
1937 
1938 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1939 {
1940     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1941         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1942     } else {
1943         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1944     }
1945 }
1946 
1947 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1948                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1949 {
1950     return float32_gen2(a, b, s, hard, soft,
1951                         f32_is_zon2, f32_addsubmul_post);
1952 }
1953 
1954 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1955                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1956 {
1957     return float64_gen2(a, b, s, hard, soft,
1958                         f64_is_zon2, f64_addsubmul_post);
1959 }
1960 
1961 float32 QEMU_FLATTEN
1962 float32_add(float32 a, float32 b, float_status *s)
1963 {
1964     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1965 }
1966 
1967 float32 QEMU_FLATTEN
1968 float32_sub(float32 a, float32 b, float_status *s)
1969 {
1970     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1971 }
1972 
1973 float64 QEMU_FLATTEN
1974 float64_add(float64 a, float64 b, float_status *s)
1975 {
1976     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1977 }
1978 
1979 float64 QEMU_FLATTEN
1980 float64_sub(float64 a, float64 b, float_status *s)
1981 {
1982     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1983 }
1984 
1985 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1986                                  bool subtract)
1987 {
1988     FloatParts64 pa, pb, *pr;
1989 
1990     float64_unpack_canonical(&pa, a, status);
1991     float64_unpack_canonical(&pb, b, status);
1992     pr = parts_addsub(&pa, &pb, status, subtract);
1993 
1994     return float64r32_round_pack_canonical(pr, status);
1995 }
1996 
1997 float64 float64r32_add(float64 a, float64 b, float_status *status)
1998 {
1999     return float64r32_addsub(a, b, status, false);
2000 }
2001 
2002 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2003 {
2004     return float64r32_addsub(a, b, status, true);
2005 }
2006 
2007 static bfloat16 QEMU_FLATTEN
2008 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2009 {
2010     FloatParts64 pa, pb, *pr;
2011 
2012     bfloat16_unpack_canonical(&pa, a, status);
2013     bfloat16_unpack_canonical(&pb, b, status);
2014     pr = parts_addsub(&pa, &pb, status, subtract);
2015 
2016     return bfloat16_round_pack_canonical(pr, status);
2017 }
2018 
2019 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2020 {
2021     return bfloat16_addsub(a, b, status, false);
2022 }
2023 
2024 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2025 {
2026     return bfloat16_addsub(a, b, status, true);
2027 }
2028 
2029 static float128 QEMU_FLATTEN
2030 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2031 {
2032     FloatParts128 pa, pb, *pr;
2033 
2034     float128_unpack_canonical(&pa, a, status);
2035     float128_unpack_canonical(&pb, b, status);
2036     pr = parts_addsub(&pa, &pb, status, subtract);
2037 
2038     return float128_round_pack_canonical(pr, status);
2039 }
2040 
2041 float128 float128_add(float128 a, float128 b, float_status *status)
2042 {
2043     return float128_addsub(a, b, status, false);
2044 }
2045 
2046 float128 float128_sub(float128 a, float128 b, float_status *status)
2047 {
2048     return float128_addsub(a, b, status, true);
2049 }
2050 
2051 static floatx80 QEMU_FLATTEN
2052 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2053 {
2054     FloatParts128 pa, pb, *pr;
2055 
2056     if (!floatx80_unpack_canonical(&pa, a, status) ||
2057         !floatx80_unpack_canonical(&pb, b, status)) {
2058         return floatx80_default_nan(status);
2059     }
2060 
2061     pr = parts_addsub(&pa, &pb, status, subtract);
2062     return floatx80_round_pack_canonical(pr, status);
2063 }
2064 
2065 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2066 {
2067     return floatx80_addsub(a, b, status, false);
2068 }
2069 
2070 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2071 {
2072     return floatx80_addsub(a, b, status, true);
2073 }
2074 
2075 /*
2076  * Multiplication
2077  */
2078 
2079 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2080 {
2081     FloatParts64 pa, pb, *pr;
2082 
2083     float16_unpack_canonical(&pa, a, status);
2084     float16_unpack_canonical(&pb, b, status);
2085     pr = parts_mul(&pa, &pb, status);
2086 
2087     return float16_round_pack_canonical(pr, status);
2088 }
2089 
2090 static float32 QEMU_SOFTFLOAT_ATTR
2091 soft_f32_mul(float32 a, float32 b, float_status *status)
2092 {
2093     FloatParts64 pa, pb, *pr;
2094 
2095     float32_unpack_canonical(&pa, a, status);
2096     float32_unpack_canonical(&pb, b, status);
2097     pr = parts_mul(&pa, &pb, status);
2098 
2099     return float32_round_pack_canonical(pr, status);
2100 }
2101 
2102 static float64 QEMU_SOFTFLOAT_ATTR
2103 soft_f64_mul(float64 a, float64 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106 
2107     float64_unpack_canonical(&pa, a, status);
2108     float64_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110 
2111     return float64_round_pack_canonical(pr, status);
2112 }
2113 
2114 static float hard_f32_mul(float a, float b)
2115 {
2116     return a * b;
2117 }
2118 
2119 static double hard_f64_mul(double a, double b)
2120 {
2121     return a * b;
2122 }
2123 
2124 float32 QEMU_FLATTEN
2125 float32_mul(float32 a, float32 b, float_status *s)
2126 {
2127     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2128                         f32_is_zon2, f32_addsubmul_post);
2129 }
2130 
2131 float64 QEMU_FLATTEN
2132 float64_mul(float64 a, float64 b, float_status *s)
2133 {
2134     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2135                         f64_is_zon2, f64_addsubmul_post);
2136 }
2137 
2138 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2139 {
2140     FloatParts64 pa, pb, *pr;
2141 
2142     float64_unpack_canonical(&pa, a, status);
2143     float64_unpack_canonical(&pb, b, status);
2144     pr = parts_mul(&pa, &pb, status);
2145 
2146     return float64r32_round_pack_canonical(pr, status);
2147 }
2148 
2149 bfloat16 QEMU_FLATTEN
2150 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2151 {
2152     FloatParts64 pa, pb, *pr;
2153 
2154     bfloat16_unpack_canonical(&pa, a, status);
2155     bfloat16_unpack_canonical(&pb, b, status);
2156     pr = parts_mul(&pa, &pb, status);
2157 
2158     return bfloat16_round_pack_canonical(pr, status);
2159 }
2160 
2161 float128 QEMU_FLATTEN
2162 float128_mul(float128 a, float128 b, float_status *status)
2163 {
2164     FloatParts128 pa, pb, *pr;
2165 
2166     float128_unpack_canonical(&pa, a, status);
2167     float128_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169 
2170     return float128_round_pack_canonical(pr, status);
2171 }
2172 
2173 floatx80 QEMU_FLATTEN
2174 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2175 {
2176     FloatParts128 pa, pb, *pr;
2177 
2178     if (!floatx80_unpack_canonical(&pa, a, status) ||
2179         !floatx80_unpack_canonical(&pb, b, status)) {
2180         return floatx80_default_nan(status);
2181     }
2182 
2183     pr = parts_mul(&pa, &pb, status);
2184     return floatx80_round_pack_canonical(pr, status);
2185 }
2186 
2187 /*
2188  * Fused multiply-add
2189  */
2190 
2191 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2192                                     int flags, float_status *status)
2193 {
2194     FloatParts64 pa, pb, pc, *pr;
2195 
2196     float16_unpack_canonical(&pa, a, status);
2197     float16_unpack_canonical(&pb, b, status);
2198     float16_unpack_canonical(&pc, c, status);
2199     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2200 
2201     return float16_round_pack_canonical(pr, status);
2202 }
2203 
2204 static float32 QEMU_SOFTFLOAT_ATTR
2205 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2206                 float_status *status)
2207 {
2208     FloatParts64 pa, pb, pc, *pr;
2209 
2210     float32_unpack_canonical(&pa, a, status);
2211     float32_unpack_canonical(&pb, b, status);
2212     float32_unpack_canonical(&pc, c, status);
2213     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2214 
2215     return float32_round_pack_canonical(pr, status);
2216 }
2217 
2218 static float64 QEMU_SOFTFLOAT_ATTR
2219 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2220                 float_status *status)
2221 {
2222     FloatParts64 pa, pb, pc, *pr;
2223 
2224     float64_unpack_canonical(&pa, a, status);
2225     float64_unpack_canonical(&pb, b, status);
2226     float64_unpack_canonical(&pc, c, status);
2227     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2228 
2229     return float64_round_pack_canonical(pr, status);
2230 }
2231 
2232 static bool force_soft_fma;
2233 
2234 float32 QEMU_FLATTEN
2235 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2236 {
2237     union_float32 ua, ub, uc, ur;
2238 
2239     ua.s = xa;
2240     ub.s = xb;
2241     uc.s = xc;
2242 
2243     if (unlikely(!can_use_fpu(s))) {
2244         goto soft;
2245     }
2246     if (unlikely(flags & float_muladd_halve_result)) {
2247         goto soft;
2248     }
2249 
2250     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2251     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2252         goto soft;
2253     }
2254 
2255     if (unlikely(force_soft_fma)) {
2256         goto soft;
2257     }
2258 
2259     /*
2260      * When (a || b) == 0, there's no need to check for under/over flow,
2261      * since we know the addend is (normal || 0) and the product is 0.
2262      */
2263     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2264         union_float32 up;
2265         bool prod_sign;
2266 
2267         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2268         prod_sign ^= !!(flags & float_muladd_negate_product);
2269         up.s = float32_set_sign(float32_zero, prod_sign);
2270 
2271         if (flags & float_muladd_negate_c) {
2272             uc.h = -uc.h;
2273         }
2274         ur.h = up.h + uc.h;
2275     } else {
2276         union_float32 ua_orig = ua;
2277         union_float32 uc_orig = uc;
2278 
2279         if (flags & float_muladd_negate_product) {
2280             ua.h = -ua.h;
2281         }
2282         if (flags & float_muladd_negate_c) {
2283             uc.h = -uc.h;
2284         }
2285 
2286         ur.h = fmaf(ua.h, ub.h, uc.h);
2287 
2288         if (unlikely(f32_is_inf(ur))) {
2289             float_raise(float_flag_overflow, s);
2290         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2291             ua = ua_orig;
2292             uc = uc_orig;
2293             goto soft;
2294         }
2295     }
2296     if (flags & float_muladd_negate_result) {
2297         return float32_chs(ur.s);
2298     }
2299     return ur.s;
2300 
2301  soft:
2302     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2303 }
2304 
2305 float64 QEMU_FLATTEN
2306 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2307 {
2308     union_float64 ua, ub, uc, ur;
2309 
2310     ua.s = xa;
2311     ub.s = xb;
2312     uc.s = xc;
2313 
2314     if (unlikely(!can_use_fpu(s))) {
2315         goto soft;
2316     }
2317     if (unlikely(flags & float_muladd_halve_result)) {
2318         goto soft;
2319     }
2320 
2321     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2322     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2323         goto soft;
2324     }
2325 
2326     if (unlikely(force_soft_fma)) {
2327         goto soft;
2328     }
2329 
2330     /*
2331      * When (a || b) == 0, there's no need to check for under/over flow,
2332      * since we know the addend is (normal || 0) and the product is 0.
2333      */
2334     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2335         union_float64 up;
2336         bool prod_sign;
2337 
2338         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2339         prod_sign ^= !!(flags & float_muladd_negate_product);
2340         up.s = float64_set_sign(float64_zero, prod_sign);
2341 
2342         if (flags & float_muladd_negate_c) {
2343             uc.h = -uc.h;
2344         }
2345         ur.h = up.h + uc.h;
2346     } else {
2347         union_float64 ua_orig = ua;
2348         union_float64 uc_orig = uc;
2349 
2350         if (flags & float_muladd_negate_product) {
2351             ua.h = -ua.h;
2352         }
2353         if (flags & float_muladd_negate_c) {
2354             uc.h = -uc.h;
2355         }
2356 
2357         ur.h = fma(ua.h, ub.h, uc.h);
2358 
2359         if (unlikely(f64_is_inf(ur))) {
2360             float_raise(float_flag_overflow, s);
2361         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2362             ua = ua_orig;
2363             uc = uc_orig;
2364             goto soft;
2365         }
2366     }
2367     if (flags & float_muladd_negate_result) {
2368         return float64_chs(ur.s);
2369     }
2370     return ur.s;
2371 
2372  soft:
2373     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2374 }
2375 
2376 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2377                           int flags, float_status *status)
2378 {
2379     FloatParts64 pa, pb, pc, *pr;
2380 
2381     float64_unpack_canonical(&pa, a, status);
2382     float64_unpack_canonical(&pb, b, status);
2383     float64_unpack_canonical(&pc, c, status);
2384     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2385 
2386     return float64r32_round_pack_canonical(pr, status);
2387 }
2388 
2389 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2390                                       int flags, float_status *status)
2391 {
2392     FloatParts64 pa, pb, pc, *pr;
2393 
2394     bfloat16_unpack_canonical(&pa, a, status);
2395     bfloat16_unpack_canonical(&pb, b, status);
2396     bfloat16_unpack_canonical(&pc, c, status);
2397     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2398 
2399     return bfloat16_round_pack_canonical(pr, status);
2400 }
2401 
2402 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2403                                       int flags, float_status *status)
2404 {
2405     FloatParts128 pa, pb, pc, *pr;
2406 
2407     float128_unpack_canonical(&pa, a, status);
2408     float128_unpack_canonical(&pb, b, status);
2409     float128_unpack_canonical(&pc, c, status);
2410     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2411 
2412     return float128_round_pack_canonical(pr, status);
2413 }
2414 
2415 /*
2416  * Division
2417  */
2418 
2419 float16 float16_div(float16 a, float16 b, float_status *status)
2420 {
2421     FloatParts64 pa, pb, *pr;
2422 
2423     float16_unpack_canonical(&pa, a, status);
2424     float16_unpack_canonical(&pb, b, status);
2425     pr = parts_div(&pa, &pb, status);
2426 
2427     return float16_round_pack_canonical(pr, status);
2428 }
2429 
2430 static float32 QEMU_SOFTFLOAT_ATTR
2431 soft_f32_div(float32 a, float32 b, float_status *status)
2432 {
2433     FloatParts64 pa, pb, *pr;
2434 
2435     float32_unpack_canonical(&pa, a, status);
2436     float32_unpack_canonical(&pb, b, status);
2437     pr = parts_div(&pa, &pb, status);
2438 
2439     return float32_round_pack_canonical(pr, status);
2440 }
2441 
2442 static float64 QEMU_SOFTFLOAT_ATTR
2443 soft_f64_div(float64 a, float64 b, float_status *status)
2444 {
2445     FloatParts64 pa, pb, *pr;
2446 
2447     float64_unpack_canonical(&pa, a, status);
2448     float64_unpack_canonical(&pb, b, status);
2449     pr = parts_div(&pa, &pb, status);
2450 
2451     return float64_round_pack_canonical(pr, status);
2452 }
2453 
2454 static float hard_f32_div(float a, float b)
2455 {
2456     return a / b;
2457 }
2458 
2459 static double hard_f64_div(double a, double b)
2460 {
2461     return a / b;
2462 }
2463 
2464 static bool f32_div_pre(union_float32 a, union_float32 b)
2465 {
2466     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2467         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2468                fpclassify(b.h) == FP_NORMAL;
2469     }
2470     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2471 }
2472 
2473 static bool f64_div_pre(union_float64 a, union_float64 b)
2474 {
2475     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2476         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2477                fpclassify(b.h) == FP_NORMAL;
2478     }
2479     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2480 }
2481 
2482 static bool f32_div_post(union_float32 a, union_float32 b)
2483 {
2484     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2485         return fpclassify(a.h) != FP_ZERO;
2486     }
2487     return !float32_is_zero(a.s);
2488 }
2489 
2490 static bool f64_div_post(union_float64 a, union_float64 b)
2491 {
2492     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493         return fpclassify(a.h) != FP_ZERO;
2494     }
2495     return !float64_is_zero(a.s);
2496 }
2497 
2498 float32 QEMU_FLATTEN
2499 float32_div(float32 a, float32 b, float_status *s)
2500 {
2501     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2502                         f32_div_pre, f32_div_post);
2503 }
2504 
2505 float64 QEMU_FLATTEN
2506 float64_div(float64 a, float64 b, float_status *s)
2507 {
2508     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2509                         f64_div_pre, f64_div_post);
2510 }
2511 
2512 float64 float64r32_div(float64 a, float64 b, float_status *status)
2513 {
2514     FloatParts64 pa, pb, *pr;
2515 
2516     float64_unpack_canonical(&pa, a, status);
2517     float64_unpack_canonical(&pb, b, status);
2518     pr = parts_div(&pa, &pb, status);
2519 
2520     return float64r32_round_pack_canonical(pr, status);
2521 }
2522 
2523 bfloat16 QEMU_FLATTEN
2524 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2525 {
2526     FloatParts64 pa, pb, *pr;
2527 
2528     bfloat16_unpack_canonical(&pa, a, status);
2529     bfloat16_unpack_canonical(&pb, b, status);
2530     pr = parts_div(&pa, &pb, status);
2531 
2532     return bfloat16_round_pack_canonical(pr, status);
2533 }
2534 
2535 float128 QEMU_FLATTEN
2536 float128_div(float128 a, float128 b, float_status *status)
2537 {
2538     FloatParts128 pa, pb, *pr;
2539 
2540     float128_unpack_canonical(&pa, a, status);
2541     float128_unpack_canonical(&pb, b, status);
2542     pr = parts_div(&pa, &pb, status);
2543 
2544     return float128_round_pack_canonical(pr, status);
2545 }
2546 
2547 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2548 {
2549     FloatParts128 pa, pb, *pr;
2550 
2551     if (!floatx80_unpack_canonical(&pa, a, status) ||
2552         !floatx80_unpack_canonical(&pb, b, status)) {
2553         return floatx80_default_nan(status);
2554     }
2555 
2556     pr = parts_div(&pa, &pb, status);
2557     return floatx80_round_pack_canonical(pr, status);
2558 }
2559 
2560 /*
2561  * Remainder
2562  */
2563 
2564 float32 float32_rem(float32 a, float32 b, float_status *status)
2565 {
2566     FloatParts64 pa, pb, *pr;
2567 
2568     float32_unpack_canonical(&pa, a, status);
2569     float32_unpack_canonical(&pb, b, status);
2570     pr = parts_modrem(&pa, &pb, NULL, status);
2571 
2572     return float32_round_pack_canonical(pr, status);
2573 }
2574 
2575 float64 float64_rem(float64 a, float64 b, float_status *status)
2576 {
2577     FloatParts64 pa, pb, *pr;
2578 
2579     float64_unpack_canonical(&pa, a, status);
2580     float64_unpack_canonical(&pb, b, status);
2581     pr = parts_modrem(&pa, &pb, NULL, status);
2582 
2583     return float64_round_pack_canonical(pr, status);
2584 }
2585 
2586 float128 float128_rem(float128 a, float128 b, float_status *status)
2587 {
2588     FloatParts128 pa, pb, *pr;
2589 
2590     float128_unpack_canonical(&pa, a, status);
2591     float128_unpack_canonical(&pb, b, status);
2592     pr = parts_modrem(&pa, &pb, NULL, status);
2593 
2594     return float128_round_pack_canonical(pr, status);
2595 }
2596 
2597 /*
2598  * Returns the remainder of the extended double-precision floating-point value
2599  * `a' with respect to the corresponding value `b'.
2600  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2601  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2602  * the remainder based on truncating the quotient toward zero instead and
2603  * *quotient is set to the low 64 bits of the absolute value of the integer
2604  * quotient.
2605  */
2606 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2607                          uint64_t *quotient, float_status *status)
2608 {
2609     FloatParts128 pa, pb, *pr;
2610 
2611     *quotient = 0;
2612     if (!floatx80_unpack_canonical(&pa, a, status) ||
2613         !floatx80_unpack_canonical(&pb, b, status)) {
2614         return floatx80_default_nan(status);
2615     }
2616     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2617 
2618     return floatx80_round_pack_canonical(pr, status);
2619 }
2620 
2621 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2622 {
2623     uint64_t quotient;
2624     return floatx80_modrem(a, b, false, &quotient, status);
2625 }
2626 
2627 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2628 {
2629     uint64_t quotient;
2630     return floatx80_modrem(a, b, true, &quotient, status);
2631 }
2632 
2633 /*
2634  * Float to Float conversions
2635  *
2636  * Returns the result of converting one float format to another. The
2637  * conversion is performed according to the IEC/IEEE Standard for
2638  * Binary Floating-Point Arithmetic.
2639  *
2640  * Usually this only needs to take care of raising invalid exceptions
2641  * and handling the conversion on NaNs.
2642  */
2643 
2644 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2645 {
2646     switch (a->cls) {
2647     case float_class_snan:
2648         float_raise(float_flag_invalid_snan, s);
2649         /* fall through */
2650     case float_class_qnan:
2651         /*
2652          * There is no NaN in the destination format.  Raise Invalid
2653          * and return a zero with the sign of the input NaN.
2654          */
2655         float_raise(float_flag_invalid, s);
2656         a->cls = float_class_zero;
2657         break;
2658 
2659     case float_class_inf:
2660         /*
2661          * There is no Inf in the destination format.  Raise Invalid
2662          * and return the maximum normal with the correct sign.
2663          */
2664         float_raise(float_flag_invalid, s);
2665         a->cls = float_class_normal;
2666         a->exp = float16_params_ahp.exp_max;
2667         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2668                                   float16_params_ahp.frac_size + 1);
2669         break;
2670 
2671     case float_class_normal:
2672     case float_class_zero:
2673         break;
2674 
2675     default:
2676         g_assert_not_reached();
2677     }
2678 }
2679 
2680 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2681 {
2682     if (is_nan(a->cls)) {
2683         parts_return_nan(a, s);
2684     }
2685 }
2686 
2687 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2688 {
2689     if (is_nan(a->cls)) {
2690         parts_return_nan(a, s);
2691     }
2692 }
2693 
2694 #define parts_float_to_float(P, S) \
2695     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2696 
2697 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2698                                         float_status *s)
2699 {
2700     a->cls = b->cls;
2701     a->sign = b->sign;
2702     a->exp = b->exp;
2703 
2704     if (a->cls == float_class_normal) {
2705         frac_truncjam(a, b);
2706     } else if (is_nan(a->cls)) {
2707         /* Discard the low bits of the NaN. */
2708         a->frac = b->frac_hi;
2709         parts_return_nan(a, s);
2710     }
2711 }
2712 
2713 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2714                                        float_status *s)
2715 {
2716     a->cls = b->cls;
2717     a->sign = b->sign;
2718     a->exp = b->exp;
2719     frac_widen(a, b);
2720 
2721     if (is_nan(a->cls)) {
2722         parts_return_nan(a, s);
2723     }
2724 }
2725 
2726 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2727 {
2728     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2729     FloatParts64 p;
2730 
2731     float16a_unpack_canonical(&p, a, s, fmt16);
2732     parts_float_to_float(&p, s);
2733     return float32_round_pack_canonical(&p, s);
2734 }
2735 
2736 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2737 {
2738     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2739     FloatParts64 p;
2740 
2741     float16a_unpack_canonical(&p, a, s, fmt16);
2742     parts_float_to_float(&p, s);
2743     return float64_round_pack_canonical(&p, s);
2744 }
2745 
2746 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2747 {
2748     FloatParts64 p;
2749     const FloatFmt *fmt;
2750 
2751     float32_unpack_canonical(&p, a, s);
2752     if (ieee) {
2753         parts_float_to_float(&p, s);
2754         fmt = &float16_params;
2755     } else {
2756         parts_float_to_ahp(&p, s);
2757         fmt = &float16_params_ahp;
2758     }
2759     return float16a_round_pack_canonical(&p, s, fmt);
2760 }
2761 
2762 static float64 QEMU_SOFTFLOAT_ATTR
2763 soft_float32_to_float64(float32 a, float_status *s)
2764 {
2765     FloatParts64 p;
2766 
2767     float32_unpack_canonical(&p, a, s);
2768     parts_float_to_float(&p, s);
2769     return float64_round_pack_canonical(&p, s);
2770 }
2771 
2772 float64 float32_to_float64(float32 a, float_status *s)
2773 {
2774     if (likely(float32_is_normal(a))) {
2775         /* Widening conversion can never produce inexact results.  */
2776         union_float32 uf;
2777         union_float64 ud;
2778         uf.s = a;
2779         ud.h = uf.h;
2780         return ud.s;
2781     } else if (float32_is_zero(a)) {
2782         return float64_set_sign(float64_zero, float32_is_neg(a));
2783     } else {
2784         return soft_float32_to_float64(a, s);
2785     }
2786 }
2787 
2788 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2789 {
2790     FloatParts64 p;
2791     const FloatFmt *fmt;
2792 
2793     float64_unpack_canonical(&p, a, s);
2794     if (ieee) {
2795         parts_float_to_float(&p, s);
2796         fmt = &float16_params;
2797     } else {
2798         parts_float_to_ahp(&p, s);
2799         fmt = &float16_params_ahp;
2800     }
2801     return float16a_round_pack_canonical(&p, s, fmt);
2802 }
2803 
2804 float32 float64_to_float32(float64 a, float_status *s)
2805 {
2806     FloatParts64 p;
2807 
2808     float64_unpack_canonical(&p, a, s);
2809     parts_float_to_float(&p, s);
2810     return float32_round_pack_canonical(&p, s);
2811 }
2812 
2813 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2814 {
2815     FloatParts64 p;
2816 
2817     bfloat16_unpack_canonical(&p, a, s);
2818     parts_float_to_float(&p, s);
2819     return float32_round_pack_canonical(&p, s);
2820 }
2821 
2822 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2823 {
2824     FloatParts64 p;
2825 
2826     bfloat16_unpack_canonical(&p, a, s);
2827     parts_float_to_float(&p, s);
2828     return float64_round_pack_canonical(&p, s);
2829 }
2830 
2831 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2832 {
2833     FloatParts64 p;
2834 
2835     float32_unpack_canonical(&p, a, s);
2836     parts_float_to_float(&p, s);
2837     return bfloat16_round_pack_canonical(&p, s);
2838 }
2839 
2840 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2841 {
2842     FloatParts64 p;
2843 
2844     float64_unpack_canonical(&p, a, s);
2845     parts_float_to_float(&p, s);
2846     return bfloat16_round_pack_canonical(&p, s);
2847 }
2848 
2849 float32 float128_to_float32(float128 a, float_status *s)
2850 {
2851     FloatParts64 p64;
2852     FloatParts128 p128;
2853 
2854     float128_unpack_canonical(&p128, a, s);
2855     parts_float_to_float_narrow(&p64, &p128, s);
2856     return float32_round_pack_canonical(&p64, s);
2857 }
2858 
2859 float64 float128_to_float64(float128 a, float_status *s)
2860 {
2861     FloatParts64 p64;
2862     FloatParts128 p128;
2863 
2864     float128_unpack_canonical(&p128, a, s);
2865     parts_float_to_float_narrow(&p64, &p128, s);
2866     return float64_round_pack_canonical(&p64, s);
2867 }
2868 
2869 float128 float32_to_float128(float32 a, float_status *s)
2870 {
2871     FloatParts64 p64;
2872     FloatParts128 p128;
2873 
2874     float32_unpack_canonical(&p64, a, s);
2875     parts_float_to_float_widen(&p128, &p64, s);
2876     return float128_round_pack_canonical(&p128, s);
2877 }
2878 
2879 float128 float64_to_float128(float64 a, float_status *s)
2880 {
2881     FloatParts64 p64;
2882     FloatParts128 p128;
2883 
2884     float64_unpack_canonical(&p64, a, s);
2885     parts_float_to_float_widen(&p128, &p64, s);
2886     return float128_round_pack_canonical(&p128, s);
2887 }
2888 
2889 float32 floatx80_to_float32(floatx80 a, float_status *s)
2890 {
2891     FloatParts64 p64;
2892     FloatParts128 p128;
2893 
2894     if (floatx80_unpack_canonical(&p128, a, s)) {
2895         parts_float_to_float_narrow(&p64, &p128, s);
2896     } else {
2897         parts_default_nan(&p64, s);
2898     }
2899     return float32_round_pack_canonical(&p64, s);
2900 }
2901 
2902 float64 floatx80_to_float64(floatx80 a, float_status *s)
2903 {
2904     FloatParts64 p64;
2905     FloatParts128 p128;
2906 
2907     if (floatx80_unpack_canonical(&p128, a, s)) {
2908         parts_float_to_float_narrow(&p64, &p128, s);
2909     } else {
2910         parts_default_nan(&p64, s);
2911     }
2912     return float64_round_pack_canonical(&p64, s);
2913 }
2914 
2915 float128 floatx80_to_float128(floatx80 a, float_status *s)
2916 {
2917     FloatParts128 p;
2918 
2919     if (floatx80_unpack_canonical(&p, a, s)) {
2920         parts_float_to_float(&p, s);
2921     } else {
2922         parts_default_nan(&p, s);
2923     }
2924     return float128_round_pack_canonical(&p, s);
2925 }
2926 
2927 floatx80 float32_to_floatx80(float32 a, float_status *s)
2928 {
2929     FloatParts64 p64;
2930     FloatParts128 p128;
2931 
2932     float32_unpack_canonical(&p64, a, s);
2933     parts_float_to_float_widen(&p128, &p64, s);
2934     return floatx80_round_pack_canonical(&p128, s);
2935 }
2936 
2937 floatx80 float64_to_floatx80(float64 a, float_status *s)
2938 {
2939     FloatParts64 p64;
2940     FloatParts128 p128;
2941 
2942     float64_unpack_canonical(&p64, a, s);
2943     parts_float_to_float_widen(&p128, &p64, s);
2944     return floatx80_round_pack_canonical(&p128, s);
2945 }
2946 
2947 floatx80 float128_to_floatx80(float128 a, float_status *s)
2948 {
2949     FloatParts128 p;
2950 
2951     float128_unpack_canonical(&p, a, s);
2952     parts_float_to_float(&p, s);
2953     return floatx80_round_pack_canonical(&p, s);
2954 }
2955 
2956 /*
2957  * Round to integral value
2958  */
2959 
2960 float16 float16_round_to_int(float16 a, float_status *s)
2961 {
2962     FloatParts64 p;
2963 
2964     float16_unpack_canonical(&p, a, s);
2965     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2966     return float16_round_pack_canonical(&p, s);
2967 }
2968 
2969 float32 float32_round_to_int(float32 a, float_status *s)
2970 {
2971     FloatParts64 p;
2972 
2973     float32_unpack_canonical(&p, a, s);
2974     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2975     return float32_round_pack_canonical(&p, s);
2976 }
2977 
2978 float64 float64_round_to_int(float64 a, float_status *s)
2979 {
2980     FloatParts64 p;
2981 
2982     float64_unpack_canonical(&p, a, s);
2983     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2984     return float64_round_pack_canonical(&p, s);
2985 }
2986 
2987 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2988 {
2989     FloatParts64 p;
2990 
2991     bfloat16_unpack_canonical(&p, a, s);
2992     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2993     return bfloat16_round_pack_canonical(&p, s);
2994 }
2995 
2996 float128 float128_round_to_int(float128 a, float_status *s)
2997 {
2998     FloatParts128 p;
2999 
3000     float128_unpack_canonical(&p, a, s);
3001     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3002     return float128_round_pack_canonical(&p, s);
3003 }
3004 
3005 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3006 {
3007     FloatParts128 p;
3008 
3009     if (!floatx80_unpack_canonical(&p, a, status)) {
3010         return floatx80_default_nan(status);
3011     }
3012 
3013     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3014                        &floatx80_params[status->floatx80_rounding_precision]);
3015     return floatx80_round_pack_canonical(&p, status);
3016 }
3017 
3018 /*
3019  * Floating-point to signed integer conversions
3020  */
3021 
3022 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3023                               float_status *s)
3024 {
3025     FloatParts64 p;
3026 
3027     float16_unpack_canonical(&p, a, s);
3028     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3029 }
3030 
3031 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3032                                 float_status *s)
3033 {
3034     FloatParts64 p;
3035 
3036     float16_unpack_canonical(&p, a, s);
3037     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3038 }
3039 
3040 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3041                                 float_status *s)
3042 {
3043     FloatParts64 p;
3044 
3045     float16_unpack_canonical(&p, a, s);
3046     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3047 }
3048 
3049 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3050                                 float_status *s)
3051 {
3052     FloatParts64 p;
3053 
3054     float16_unpack_canonical(&p, a, s);
3055     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3056 }
3057 
3058 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3059                                 float_status *s)
3060 {
3061     FloatParts64 p;
3062 
3063     float32_unpack_canonical(&p, a, s);
3064     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3065 }
3066 
3067 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3068                                 float_status *s)
3069 {
3070     FloatParts64 p;
3071 
3072     float32_unpack_canonical(&p, a, s);
3073     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3074 }
3075 
3076 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3077                                 float_status *s)
3078 {
3079     FloatParts64 p;
3080 
3081     float32_unpack_canonical(&p, a, s);
3082     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3083 }
3084 
3085 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3086                                 float_status *s)
3087 {
3088     FloatParts64 p;
3089 
3090     float64_unpack_canonical(&p, a, s);
3091     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3092 }
3093 
3094 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3095                                 float_status *s)
3096 {
3097     FloatParts64 p;
3098 
3099     float64_unpack_canonical(&p, a, s);
3100     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3101 }
3102 
3103 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3104                                 float_status *s)
3105 {
3106     FloatParts64 p;
3107 
3108     float64_unpack_canonical(&p, a, s);
3109     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3110 }
3111 
3112 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3113                                  float_status *s)
3114 {
3115     FloatParts64 p;
3116 
3117     bfloat16_unpack_canonical(&p, a, s);
3118     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3119 }
3120 
3121 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3122                                  float_status *s)
3123 {
3124     FloatParts64 p;
3125 
3126     bfloat16_unpack_canonical(&p, a, s);
3127     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3128 }
3129 
3130 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3131                                  float_status *s)
3132 {
3133     FloatParts64 p;
3134 
3135     bfloat16_unpack_canonical(&p, a, s);
3136     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3137 }
3138 
3139 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3140                                         int scale, float_status *s)
3141 {
3142     FloatParts128 p;
3143 
3144     float128_unpack_canonical(&p, a, s);
3145     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3146 }
3147 
3148 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3149                                         int scale, float_status *s)
3150 {
3151     FloatParts128 p;
3152 
3153     float128_unpack_canonical(&p, a, s);
3154     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3155 }
3156 
3157 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3158                                         int scale, float_status *s)
3159 {
3160     FloatParts128 p;
3161 
3162     if (!floatx80_unpack_canonical(&p, a, s)) {
3163         parts_default_nan(&p, s);
3164     }
3165     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3166 }
3167 
3168 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3169                                         int scale, float_status *s)
3170 {
3171     FloatParts128 p;
3172 
3173     if (!floatx80_unpack_canonical(&p, a, s)) {
3174         parts_default_nan(&p, s);
3175     }
3176     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3177 }
3178 
3179 int8_t float16_to_int8(float16 a, float_status *s)
3180 {
3181     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3182 }
3183 
3184 int16_t float16_to_int16(float16 a, float_status *s)
3185 {
3186     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3187 }
3188 
3189 int32_t float16_to_int32(float16 a, float_status *s)
3190 {
3191     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3192 }
3193 
3194 int64_t float16_to_int64(float16 a, float_status *s)
3195 {
3196     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3197 }
3198 
3199 int16_t float32_to_int16(float32 a, float_status *s)
3200 {
3201     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3202 }
3203 
3204 int32_t float32_to_int32(float32 a, float_status *s)
3205 {
3206     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3207 }
3208 
3209 int64_t float32_to_int64(float32 a, float_status *s)
3210 {
3211     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3212 }
3213 
3214 int16_t float64_to_int16(float64 a, float_status *s)
3215 {
3216     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3217 }
3218 
3219 int32_t float64_to_int32(float64 a, float_status *s)
3220 {
3221     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3222 }
3223 
3224 int64_t float64_to_int64(float64 a, float_status *s)
3225 {
3226     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3227 }
3228 
3229 int32_t float128_to_int32(float128 a, float_status *s)
3230 {
3231     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3232 }
3233 
3234 int64_t float128_to_int64(float128 a, float_status *s)
3235 {
3236     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3237 }
3238 
3239 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3240 {
3241     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3242 }
3243 
3244 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3245 {
3246     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3247 }
3248 
3249 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3250 {
3251     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3252 }
3253 
3254 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3255 {
3256     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3257 }
3258 
3259 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3260 {
3261     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3262 }
3263 
3264 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3265 {
3266     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3267 }
3268 
3269 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3270 {
3271     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3272 }
3273 
3274 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3275 {
3276     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3277 }
3278 
3279 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3280 {
3281     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3282 }
3283 
3284 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3285 {
3286     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3287 }
3288 
3289 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3290 {
3291     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3292 }
3293 
3294 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3295 {
3296     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3297 }
3298 
3299 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3300 {
3301     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3302 }
3303 
3304 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3305 {
3306     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3307 }
3308 
3309 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3310 {
3311     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3312 }
3313 
3314 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3315 {
3316     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3317 }
3318 
3319 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3320 {
3321     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3322 }
3323 
3324 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3325 {
3326     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3327 }
3328 
3329 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3330 {
3331     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3332 }
3333 
3334 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3335 {
3336     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3337 }
3338 
3339 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3340 {
3341     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3342 }
3343 
3344 /*
3345  * Floating-point to unsigned integer conversions
3346  */
3347 
3348 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3349                                 float_status *s)
3350 {
3351     FloatParts64 p;
3352 
3353     float16_unpack_canonical(&p, a, s);
3354     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3355 }
3356 
3357 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3358                                   float_status *s)
3359 {
3360     FloatParts64 p;
3361 
3362     float16_unpack_canonical(&p, a, s);
3363     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3364 }
3365 
3366 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3367                                   float_status *s)
3368 {
3369     FloatParts64 p;
3370 
3371     float16_unpack_canonical(&p, a, s);
3372     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3373 }
3374 
3375 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3376                                   float_status *s)
3377 {
3378     FloatParts64 p;
3379 
3380     float16_unpack_canonical(&p, a, s);
3381     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3382 }
3383 
3384 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3385                                   float_status *s)
3386 {
3387     FloatParts64 p;
3388 
3389     float32_unpack_canonical(&p, a, s);
3390     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3391 }
3392 
3393 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3394                                   float_status *s)
3395 {
3396     FloatParts64 p;
3397 
3398     float32_unpack_canonical(&p, a, s);
3399     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3400 }
3401 
3402 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3403                                   float_status *s)
3404 {
3405     FloatParts64 p;
3406 
3407     float32_unpack_canonical(&p, a, s);
3408     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3409 }
3410 
3411 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3412                                   float_status *s)
3413 {
3414     FloatParts64 p;
3415 
3416     float64_unpack_canonical(&p, a, s);
3417     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3418 }
3419 
3420 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3421                                   float_status *s)
3422 {
3423     FloatParts64 p;
3424 
3425     float64_unpack_canonical(&p, a, s);
3426     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3427 }
3428 
3429 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3430                                   float_status *s)
3431 {
3432     FloatParts64 p;
3433 
3434     float64_unpack_canonical(&p, a, s);
3435     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3436 }
3437 
3438 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3439                                    int scale, float_status *s)
3440 {
3441     FloatParts64 p;
3442 
3443     bfloat16_unpack_canonical(&p, a, s);
3444     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3445 }
3446 
3447 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3448                                    int scale, float_status *s)
3449 {
3450     FloatParts64 p;
3451 
3452     bfloat16_unpack_canonical(&p, a, s);
3453     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3454 }
3455 
3456 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3457                                    int scale, float_status *s)
3458 {
3459     FloatParts64 p;
3460 
3461     bfloat16_unpack_canonical(&p, a, s);
3462     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3463 }
3464 
3465 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3466                                           int scale, float_status *s)
3467 {
3468     FloatParts128 p;
3469 
3470     float128_unpack_canonical(&p, a, s);
3471     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3472 }
3473 
3474 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3475                                           int scale, float_status *s)
3476 {
3477     FloatParts128 p;
3478 
3479     float128_unpack_canonical(&p, a, s);
3480     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3481 }
3482 
3483 uint8_t float16_to_uint8(float16 a, float_status *s)
3484 {
3485     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3486 }
3487 
3488 uint16_t float16_to_uint16(float16 a, float_status *s)
3489 {
3490     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3491 }
3492 
3493 uint32_t float16_to_uint32(float16 a, float_status *s)
3494 {
3495     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3496 }
3497 
3498 uint64_t float16_to_uint64(float16 a, float_status *s)
3499 {
3500     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3501 }
3502 
3503 uint16_t float32_to_uint16(float32 a, float_status *s)
3504 {
3505     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3506 }
3507 
3508 uint32_t float32_to_uint32(float32 a, float_status *s)
3509 {
3510     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3511 }
3512 
3513 uint64_t float32_to_uint64(float32 a, float_status *s)
3514 {
3515     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3516 }
3517 
3518 uint16_t float64_to_uint16(float64 a, float_status *s)
3519 {
3520     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3521 }
3522 
3523 uint32_t float64_to_uint32(float64 a, float_status *s)
3524 {
3525     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3526 }
3527 
3528 uint64_t float64_to_uint64(float64 a, float_status *s)
3529 {
3530     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3531 }
3532 
3533 uint32_t float128_to_uint32(float128 a, float_status *s)
3534 {
3535     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3536 }
3537 
3538 uint64_t float128_to_uint64(float128 a, float_status *s)
3539 {
3540     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3541 }
3542 
3543 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3544 {
3545     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3546 }
3547 
3548 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3549 {
3550     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3551 }
3552 
3553 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3554 {
3555     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3556 }
3557 
3558 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3559 {
3560     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3561 }
3562 
3563 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3564 {
3565     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3566 }
3567 
3568 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3569 {
3570     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3571 }
3572 
3573 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3574 {
3575     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3576 }
3577 
3578 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3579 {
3580     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3581 }
3582 
3583 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3584 {
3585     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3586 }
3587 
3588 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3589 {
3590     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3591 }
3592 
3593 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3594 {
3595     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3596 }
3597 
3598 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3599 {
3600     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3601 }
3602 
3603 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3604 {
3605     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3606 }
3607 
3608 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3609 {
3610     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3611 }
3612 
3613 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3614 {
3615     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3616 }
3617 
3618 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3619 {
3620     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3621 }
3622 
3623 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3624 {
3625     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3626 }
3627 
3628 /*
3629  * Signed integer to floating-point conversions
3630  */
3631 
3632 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3633 {
3634     FloatParts64 p;
3635 
3636     parts_sint_to_float(&p, a, scale, status);
3637     return float16_round_pack_canonical(&p, status);
3638 }
3639 
3640 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3641 {
3642     return int64_to_float16_scalbn(a, scale, status);
3643 }
3644 
3645 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3646 {
3647     return int64_to_float16_scalbn(a, scale, status);
3648 }
3649 
3650 float16 int64_to_float16(int64_t a, float_status *status)
3651 {
3652     return int64_to_float16_scalbn(a, 0, status);
3653 }
3654 
3655 float16 int32_to_float16(int32_t a, float_status *status)
3656 {
3657     return int64_to_float16_scalbn(a, 0, status);
3658 }
3659 
3660 float16 int16_to_float16(int16_t a, float_status *status)
3661 {
3662     return int64_to_float16_scalbn(a, 0, status);
3663 }
3664 
3665 float16 int8_to_float16(int8_t a, float_status *status)
3666 {
3667     return int64_to_float16_scalbn(a, 0, status);
3668 }
3669 
3670 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3671 {
3672     FloatParts64 p;
3673 
3674     /* Without scaling, there are no overflow concerns. */
3675     if (likely(scale == 0) && can_use_fpu(status)) {
3676         union_float32 ur;
3677         ur.h = a;
3678         return ur.s;
3679     }
3680 
3681     parts64_sint_to_float(&p, a, scale, status);
3682     return float32_round_pack_canonical(&p, status);
3683 }
3684 
3685 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3686 {
3687     return int64_to_float32_scalbn(a, scale, status);
3688 }
3689 
3690 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3691 {
3692     return int64_to_float32_scalbn(a, scale, status);
3693 }
3694 
3695 float32 int64_to_float32(int64_t a, float_status *status)
3696 {
3697     return int64_to_float32_scalbn(a, 0, status);
3698 }
3699 
3700 float32 int32_to_float32(int32_t a, float_status *status)
3701 {
3702     return int64_to_float32_scalbn(a, 0, status);
3703 }
3704 
3705 float32 int16_to_float32(int16_t a, float_status *status)
3706 {
3707     return int64_to_float32_scalbn(a, 0, status);
3708 }
3709 
3710 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3711 {
3712     FloatParts64 p;
3713 
3714     /* Without scaling, there are no overflow concerns. */
3715     if (likely(scale == 0) && can_use_fpu(status)) {
3716         union_float64 ur;
3717         ur.h = a;
3718         return ur.s;
3719     }
3720 
3721     parts_sint_to_float(&p, a, scale, status);
3722     return float64_round_pack_canonical(&p, status);
3723 }
3724 
3725 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3726 {
3727     return int64_to_float64_scalbn(a, scale, status);
3728 }
3729 
3730 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3731 {
3732     return int64_to_float64_scalbn(a, scale, status);
3733 }
3734 
3735 float64 int64_to_float64(int64_t a, float_status *status)
3736 {
3737     return int64_to_float64_scalbn(a, 0, status);
3738 }
3739 
3740 float64 int32_to_float64(int32_t a, float_status *status)
3741 {
3742     return int64_to_float64_scalbn(a, 0, status);
3743 }
3744 
3745 float64 int16_to_float64(int16_t a, float_status *status)
3746 {
3747     return int64_to_float64_scalbn(a, 0, status);
3748 }
3749 
3750 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3751 {
3752     FloatParts64 p;
3753 
3754     parts_sint_to_float(&p, a, scale, status);
3755     return bfloat16_round_pack_canonical(&p, status);
3756 }
3757 
3758 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3759 {
3760     return int64_to_bfloat16_scalbn(a, scale, status);
3761 }
3762 
3763 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3764 {
3765     return int64_to_bfloat16_scalbn(a, scale, status);
3766 }
3767 
3768 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3769 {
3770     return int64_to_bfloat16_scalbn(a, 0, status);
3771 }
3772 
3773 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3774 {
3775     return int64_to_bfloat16_scalbn(a, 0, status);
3776 }
3777 
3778 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3779 {
3780     return int64_to_bfloat16_scalbn(a, 0, status);
3781 }
3782 
3783 float128 int64_to_float128(int64_t a, float_status *status)
3784 {
3785     FloatParts128 p;
3786 
3787     parts_sint_to_float(&p, a, 0, status);
3788     return float128_round_pack_canonical(&p, status);
3789 }
3790 
3791 float128 int32_to_float128(int32_t a, float_status *status)
3792 {
3793     return int64_to_float128(a, status);
3794 }
3795 
3796 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3797 {
3798     FloatParts128 p;
3799 
3800     parts_sint_to_float(&p, a, 0, status);
3801     return floatx80_round_pack_canonical(&p, status);
3802 }
3803 
3804 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3805 {
3806     return int64_to_floatx80(a, status);
3807 }
3808 
3809 /*
3810  * Unsigned Integer to floating-point conversions
3811  */
3812 
3813 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3814 {
3815     FloatParts64 p;
3816 
3817     parts_uint_to_float(&p, a, scale, status);
3818     return float16_round_pack_canonical(&p, status);
3819 }
3820 
3821 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3822 {
3823     return uint64_to_float16_scalbn(a, scale, status);
3824 }
3825 
3826 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3827 {
3828     return uint64_to_float16_scalbn(a, scale, status);
3829 }
3830 
3831 float16 uint64_to_float16(uint64_t a, float_status *status)
3832 {
3833     return uint64_to_float16_scalbn(a, 0, status);
3834 }
3835 
3836 float16 uint32_to_float16(uint32_t a, float_status *status)
3837 {
3838     return uint64_to_float16_scalbn(a, 0, status);
3839 }
3840 
3841 float16 uint16_to_float16(uint16_t a, float_status *status)
3842 {
3843     return uint64_to_float16_scalbn(a, 0, status);
3844 }
3845 
3846 float16 uint8_to_float16(uint8_t a, float_status *status)
3847 {
3848     return uint64_to_float16_scalbn(a, 0, status);
3849 }
3850 
3851 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3852 {
3853     FloatParts64 p;
3854 
3855     /* Without scaling, there are no overflow concerns. */
3856     if (likely(scale == 0) && can_use_fpu(status)) {
3857         union_float32 ur;
3858         ur.h = a;
3859         return ur.s;
3860     }
3861 
3862     parts_uint_to_float(&p, a, scale, status);
3863     return float32_round_pack_canonical(&p, status);
3864 }
3865 
3866 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3867 {
3868     return uint64_to_float32_scalbn(a, scale, status);
3869 }
3870 
3871 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3872 {
3873     return uint64_to_float32_scalbn(a, scale, status);
3874 }
3875 
3876 float32 uint64_to_float32(uint64_t a, float_status *status)
3877 {
3878     return uint64_to_float32_scalbn(a, 0, status);
3879 }
3880 
3881 float32 uint32_to_float32(uint32_t a, float_status *status)
3882 {
3883     return uint64_to_float32_scalbn(a, 0, status);
3884 }
3885 
3886 float32 uint16_to_float32(uint16_t a, float_status *status)
3887 {
3888     return uint64_to_float32_scalbn(a, 0, status);
3889 }
3890 
3891 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3892 {
3893     FloatParts64 p;
3894 
3895     /* Without scaling, there are no overflow concerns. */
3896     if (likely(scale == 0) && can_use_fpu(status)) {
3897         union_float64 ur;
3898         ur.h = a;
3899         return ur.s;
3900     }
3901 
3902     parts_uint_to_float(&p, a, scale, status);
3903     return float64_round_pack_canonical(&p, status);
3904 }
3905 
3906 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3907 {
3908     return uint64_to_float64_scalbn(a, scale, status);
3909 }
3910 
3911 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3912 {
3913     return uint64_to_float64_scalbn(a, scale, status);
3914 }
3915 
3916 float64 uint64_to_float64(uint64_t a, float_status *status)
3917 {
3918     return uint64_to_float64_scalbn(a, 0, status);
3919 }
3920 
3921 float64 uint32_to_float64(uint32_t a, float_status *status)
3922 {
3923     return uint64_to_float64_scalbn(a, 0, status);
3924 }
3925 
3926 float64 uint16_to_float64(uint16_t a, float_status *status)
3927 {
3928     return uint64_to_float64_scalbn(a, 0, status);
3929 }
3930 
3931 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3932 {
3933     FloatParts64 p;
3934 
3935     parts_uint_to_float(&p, a, scale, status);
3936     return bfloat16_round_pack_canonical(&p, status);
3937 }
3938 
3939 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3940 {
3941     return uint64_to_bfloat16_scalbn(a, scale, status);
3942 }
3943 
3944 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3945 {
3946     return uint64_to_bfloat16_scalbn(a, scale, status);
3947 }
3948 
3949 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3950 {
3951     return uint64_to_bfloat16_scalbn(a, 0, status);
3952 }
3953 
3954 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3955 {
3956     return uint64_to_bfloat16_scalbn(a, 0, status);
3957 }
3958 
3959 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3960 {
3961     return uint64_to_bfloat16_scalbn(a, 0, status);
3962 }
3963 
3964 float128 uint64_to_float128(uint64_t a, float_status *status)
3965 {
3966     FloatParts128 p;
3967 
3968     parts_uint_to_float(&p, a, 0, status);
3969     return float128_round_pack_canonical(&p, status);
3970 }
3971 
3972 /*
3973  * Minimum and maximum
3974  */
3975 
3976 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3977 {
3978     FloatParts64 pa, pb, *pr;
3979 
3980     float16_unpack_canonical(&pa, a, s);
3981     float16_unpack_canonical(&pb, b, s);
3982     pr = parts_minmax(&pa, &pb, s, flags);
3983 
3984     return float16_round_pack_canonical(pr, s);
3985 }
3986 
3987 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3988                                 float_status *s, int flags)
3989 {
3990     FloatParts64 pa, pb, *pr;
3991 
3992     bfloat16_unpack_canonical(&pa, a, s);
3993     bfloat16_unpack_canonical(&pb, b, s);
3994     pr = parts_minmax(&pa, &pb, s, flags);
3995 
3996     return bfloat16_round_pack_canonical(pr, s);
3997 }
3998 
3999 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4000 {
4001     FloatParts64 pa, pb, *pr;
4002 
4003     float32_unpack_canonical(&pa, a, s);
4004     float32_unpack_canonical(&pb, b, s);
4005     pr = parts_minmax(&pa, &pb, s, flags);
4006 
4007     return float32_round_pack_canonical(pr, s);
4008 }
4009 
4010 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4011 {
4012     FloatParts64 pa, pb, *pr;
4013 
4014     float64_unpack_canonical(&pa, a, s);
4015     float64_unpack_canonical(&pb, b, s);
4016     pr = parts_minmax(&pa, &pb, s, flags);
4017 
4018     return float64_round_pack_canonical(pr, s);
4019 }
4020 
4021 static float128 float128_minmax(float128 a, float128 b,
4022                                 float_status *s, int flags)
4023 {
4024     FloatParts128 pa, pb, *pr;
4025 
4026     float128_unpack_canonical(&pa, a, s);
4027     float128_unpack_canonical(&pb, b, s);
4028     pr = parts_minmax(&pa, &pb, s, flags);
4029 
4030     return float128_round_pack_canonical(pr, s);
4031 }
4032 
4033 #define MINMAX_1(type, name, flags) \
4034     type type##_##name(type a, type b, float_status *s) \
4035     { return type##_minmax(a, b, s, flags); }
4036 
4037 #define MINMAX_2(type) \
4038     MINMAX_1(type, max, 0)                                                \
4039     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4040     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4041     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4042     MINMAX_1(type, min, minmax_ismin)                                     \
4043     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4044     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4045     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4046 
4047 MINMAX_2(float16)
4048 MINMAX_2(bfloat16)
4049 MINMAX_2(float32)
4050 MINMAX_2(float64)
4051 MINMAX_2(float128)
4052 
4053 #undef MINMAX_1
4054 #undef MINMAX_2
4055 
4056 /*
4057  * Floating point compare
4058  */
4059 
4060 static FloatRelation QEMU_FLATTEN
4061 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4062 {
4063     FloatParts64 pa, pb;
4064 
4065     float16_unpack_canonical(&pa, a, s);
4066     float16_unpack_canonical(&pb, b, s);
4067     return parts_compare(&pa, &pb, s, is_quiet);
4068 }
4069 
4070 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4071 {
4072     return float16_do_compare(a, b, s, false);
4073 }
4074 
4075 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4076 {
4077     return float16_do_compare(a, b, s, true);
4078 }
4079 
4080 static FloatRelation QEMU_SOFTFLOAT_ATTR
4081 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4082 {
4083     FloatParts64 pa, pb;
4084 
4085     float32_unpack_canonical(&pa, a, s);
4086     float32_unpack_canonical(&pb, b, s);
4087     return parts_compare(&pa, &pb, s, is_quiet);
4088 }
4089 
4090 static FloatRelation QEMU_FLATTEN
4091 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4092 {
4093     union_float32 ua, ub;
4094 
4095     ua.s = xa;
4096     ub.s = xb;
4097 
4098     if (QEMU_NO_HARDFLOAT) {
4099         goto soft;
4100     }
4101 
4102     float32_input_flush2(&ua.s, &ub.s, s);
4103     if (isgreaterequal(ua.h, ub.h)) {
4104         if (isgreater(ua.h, ub.h)) {
4105             return float_relation_greater;
4106         }
4107         return float_relation_equal;
4108     }
4109     if (likely(isless(ua.h, ub.h))) {
4110         return float_relation_less;
4111     }
4112     /*
4113      * The only condition remaining is unordered.
4114      * Fall through to set flags.
4115      */
4116  soft:
4117     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4118 }
4119 
4120 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4121 {
4122     return float32_hs_compare(a, b, s, false);
4123 }
4124 
4125 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4126 {
4127     return float32_hs_compare(a, b, s, true);
4128 }
4129 
4130 static FloatRelation QEMU_SOFTFLOAT_ATTR
4131 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4132 {
4133     FloatParts64 pa, pb;
4134 
4135     float64_unpack_canonical(&pa, a, s);
4136     float64_unpack_canonical(&pb, b, s);
4137     return parts_compare(&pa, &pb, s, is_quiet);
4138 }
4139 
4140 static FloatRelation QEMU_FLATTEN
4141 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4142 {
4143     union_float64 ua, ub;
4144 
4145     ua.s = xa;
4146     ub.s = xb;
4147 
4148     if (QEMU_NO_HARDFLOAT) {
4149         goto soft;
4150     }
4151 
4152     float64_input_flush2(&ua.s, &ub.s, s);
4153     if (isgreaterequal(ua.h, ub.h)) {
4154         if (isgreater(ua.h, ub.h)) {
4155             return float_relation_greater;
4156         }
4157         return float_relation_equal;
4158     }
4159     if (likely(isless(ua.h, ub.h))) {
4160         return float_relation_less;
4161     }
4162     /*
4163      * The only condition remaining is unordered.
4164      * Fall through to set flags.
4165      */
4166  soft:
4167     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4168 }
4169 
4170 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4171 {
4172     return float64_hs_compare(a, b, s, false);
4173 }
4174 
4175 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4176 {
4177     return float64_hs_compare(a, b, s, true);
4178 }
4179 
4180 static FloatRelation QEMU_FLATTEN
4181 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4182 {
4183     FloatParts64 pa, pb;
4184 
4185     bfloat16_unpack_canonical(&pa, a, s);
4186     bfloat16_unpack_canonical(&pb, b, s);
4187     return parts_compare(&pa, &pb, s, is_quiet);
4188 }
4189 
4190 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4191 {
4192     return bfloat16_do_compare(a, b, s, false);
4193 }
4194 
4195 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4196 {
4197     return bfloat16_do_compare(a, b, s, true);
4198 }
4199 
4200 static FloatRelation QEMU_FLATTEN
4201 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4202 {
4203     FloatParts128 pa, pb;
4204 
4205     float128_unpack_canonical(&pa, a, s);
4206     float128_unpack_canonical(&pb, b, s);
4207     return parts_compare(&pa, &pb, s, is_quiet);
4208 }
4209 
4210 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4211 {
4212     return float128_do_compare(a, b, s, false);
4213 }
4214 
4215 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4216 {
4217     return float128_do_compare(a, b, s, true);
4218 }
4219 
4220 static FloatRelation QEMU_FLATTEN
4221 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4222 {
4223     FloatParts128 pa, pb;
4224 
4225     if (!floatx80_unpack_canonical(&pa, a, s) ||
4226         !floatx80_unpack_canonical(&pb, b, s)) {
4227         return float_relation_unordered;
4228     }
4229     return parts_compare(&pa, &pb, s, is_quiet);
4230 }
4231 
4232 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4233 {
4234     return floatx80_do_compare(a, b, s, false);
4235 }
4236 
4237 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4238 {
4239     return floatx80_do_compare(a, b, s, true);
4240 }
4241 
4242 /*
4243  * Scale by 2**N
4244  */
4245 
4246 float16 float16_scalbn(float16 a, int n, float_status *status)
4247 {
4248     FloatParts64 p;
4249 
4250     float16_unpack_canonical(&p, a, status);
4251     parts_scalbn(&p, n, status);
4252     return float16_round_pack_canonical(&p, status);
4253 }
4254 
4255 float32 float32_scalbn(float32 a, int n, float_status *status)
4256 {
4257     FloatParts64 p;
4258 
4259     float32_unpack_canonical(&p, a, status);
4260     parts_scalbn(&p, n, status);
4261     return float32_round_pack_canonical(&p, status);
4262 }
4263 
4264 float64 float64_scalbn(float64 a, int n, float_status *status)
4265 {
4266     FloatParts64 p;
4267 
4268     float64_unpack_canonical(&p, a, status);
4269     parts_scalbn(&p, n, status);
4270     return float64_round_pack_canonical(&p, status);
4271 }
4272 
4273 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4274 {
4275     FloatParts64 p;
4276 
4277     bfloat16_unpack_canonical(&p, a, status);
4278     parts_scalbn(&p, n, status);
4279     return bfloat16_round_pack_canonical(&p, status);
4280 }
4281 
4282 float128 float128_scalbn(float128 a, int n, float_status *status)
4283 {
4284     FloatParts128 p;
4285 
4286     float128_unpack_canonical(&p, a, status);
4287     parts_scalbn(&p, n, status);
4288     return float128_round_pack_canonical(&p, status);
4289 }
4290 
4291 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4292 {
4293     FloatParts128 p;
4294 
4295     if (!floatx80_unpack_canonical(&p, a, status)) {
4296         return floatx80_default_nan(status);
4297     }
4298     parts_scalbn(&p, n, status);
4299     return floatx80_round_pack_canonical(&p, status);
4300 }
4301 
4302 /*
4303  * Square Root
4304  */
4305 
4306 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4307 {
4308     FloatParts64 p;
4309 
4310     float16_unpack_canonical(&p, a, status);
4311     parts_sqrt(&p, status, &float16_params);
4312     return float16_round_pack_canonical(&p, status);
4313 }
4314 
4315 static float32 QEMU_SOFTFLOAT_ATTR
4316 soft_f32_sqrt(float32 a, float_status *status)
4317 {
4318     FloatParts64 p;
4319 
4320     float32_unpack_canonical(&p, a, status);
4321     parts_sqrt(&p, status, &float32_params);
4322     return float32_round_pack_canonical(&p, status);
4323 }
4324 
4325 static float64 QEMU_SOFTFLOAT_ATTR
4326 soft_f64_sqrt(float64 a, float_status *status)
4327 {
4328     FloatParts64 p;
4329 
4330     float64_unpack_canonical(&p, a, status);
4331     parts_sqrt(&p, status, &float64_params);
4332     return float64_round_pack_canonical(&p, status);
4333 }
4334 
4335 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4336 {
4337     union_float32 ua, ur;
4338 
4339     ua.s = xa;
4340     if (unlikely(!can_use_fpu(s))) {
4341         goto soft;
4342     }
4343 
4344     float32_input_flush1(&ua.s, s);
4345     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4346         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4347                        fpclassify(ua.h) == FP_ZERO) ||
4348                      signbit(ua.h))) {
4349             goto soft;
4350         }
4351     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4352                         float32_is_neg(ua.s))) {
4353         goto soft;
4354     }
4355     ur.h = sqrtf(ua.h);
4356     return ur.s;
4357 
4358  soft:
4359     return soft_f32_sqrt(ua.s, s);
4360 }
4361 
4362 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4363 {
4364     union_float64 ua, ur;
4365 
4366     ua.s = xa;
4367     if (unlikely(!can_use_fpu(s))) {
4368         goto soft;
4369     }
4370 
4371     float64_input_flush1(&ua.s, s);
4372     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4373         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4374                        fpclassify(ua.h) == FP_ZERO) ||
4375                      signbit(ua.h))) {
4376             goto soft;
4377         }
4378     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4379                         float64_is_neg(ua.s))) {
4380         goto soft;
4381     }
4382     ur.h = sqrt(ua.h);
4383     return ur.s;
4384 
4385  soft:
4386     return soft_f64_sqrt(ua.s, s);
4387 }
4388 
4389 float64 float64r32_sqrt(float64 a, float_status *status)
4390 {
4391     FloatParts64 p;
4392 
4393     float64_unpack_canonical(&p, a, status);
4394     parts_sqrt(&p, status, &float64_params);
4395     return float64r32_round_pack_canonical(&p, status);
4396 }
4397 
4398 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4399 {
4400     FloatParts64 p;
4401 
4402     bfloat16_unpack_canonical(&p, a, status);
4403     parts_sqrt(&p, status, &bfloat16_params);
4404     return bfloat16_round_pack_canonical(&p, status);
4405 }
4406 
4407 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4408 {
4409     FloatParts128 p;
4410 
4411     float128_unpack_canonical(&p, a, status);
4412     parts_sqrt(&p, status, &float128_params);
4413     return float128_round_pack_canonical(&p, status);
4414 }
4415 
4416 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4417 {
4418     FloatParts128 p;
4419 
4420     if (!floatx80_unpack_canonical(&p, a, s)) {
4421         return floatx80_default_nan(s);
4422     }
4423     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4424     return floatx80_round_pack_canonical(&p, s);
4425 }
4426 
4427 /*
4428  * log2
4429  */
4430 float32 float32_log2(float32 a, float_status *status)
4431 {
4432     FloatParts64 p;
4433 
4434     float32_unpack_canonical(&p, a, status);
4435     parts_log2(&p, status, &float32_params);
4436     return float32_round_pack_canonical(&p, status);
4437 }
4438 
4439 float64 float64_log2(float64 a, float_status *status)
4440 {
4441     FloatParts64 p;
4442 
4443     float64_unpack_canonical(&p, a, status);
4444     parts_log2(&p, status, &float64_params);
4445     return float64_round_pack_canonical(&p, status);
4446 }
4447 
4448 /*----------------------------------------------------------------------------
4449 | The pattern for a default generated NaN.
4450 *----------------------------------------------------------------------------*/
4451 
4452 float16 float16_default_nan(float_status *status)
4453 {
4454     FloatParts64 p;
4455 
4456     parts_default_nan(&p, status);
4457     p.frac >>= float16_params.frac_shift;
4458     return float16_pack_raw(&p);
4459 }
4460 
4461 float32 float32_default_nan(float_status *status)
4462 {
4463     FloatParts64 p;
4464 
4465     parts_default_nan(&p, status);
4466     p.frac >>= float32_params.frac_shift;
4467     return float32_pack_raw(&p);
4468 }
4469 
4470 float64 float64_default_nan(float_status *status)
4471 {
4472     FloatParts64 p;
4473 
4474     parts_default_nan(&p, status);
4475     p.frac >>= float64_params.frac_shift;
4476     return float64_pack_raw(&p);
4477 }
4478 
4479 float128 float128_default_nan(float_status *status)
4480 {
4481     FloatParts128 p;
4482 
4483     parts_default_nan(&p, status);
4484     frac_shr(&p, float128_params.frac_shift);
4485     return float128_pack_raw(&p);
4486 }
4487 
4488 bfloat16 bfloat16_default_nan(float_status *status)
4489 {
4490     FloatParts64 p;
4491 
4492     parts_default_nan(&p, status);
4493     p.frac >>= bfloat16_params.frac_shift;
4494     return bfloat16_pack_raw(&p);
4495 }
4496 
4497 /*----------------------------------------------------------------------------
4498 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4499 *----------------------------------------------------------------------------*/
4500 
4501 float16 float16_silence_nan(float16 a, float_status *status)
4502 {
4503     FloatParts64 p;
4504 
4505     float16_unpack_raw(&p, a);
4506     p.frac <<= float16_params.frac_shift;
4507     parts_silence_nan(&p, status);
4508     p.frac >>= float16_params.frac_shift;
4509     return float16_pack_raw(&p);
4510 }
4511 
4512 float32 float32_silence_nan(float32 a, float_status *status)
4513 {
4514     FloatParts64 p;
4515 
4516     float32_unpack_raw(&p, a);
4517     p.frac <<= float32_params.frac_shift;
4518     parts_silence_nan(&p, status);
4519     p.frac >>= float32_params.frac_shift;
4520     return float32_pack_raw(&p);
4521 }
4522 
4523 float64 float64_silence_nan(float64 a, float_status *status)
4524 {
4525     FloatParts64 p;
4526 
4527     float64_unpack_raw(&p, a);
4528     p.frac <<= float64_params.frac_shift;
4529     parts_silence_nan(&p, status);
4530     p.frac >>= float64_params.frac_shift;
4531     return float64_pack_raw(&p);
4532 }
4533 
4534 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4535 {
4536     FloatParts64 p;
4537 
4538     bfloat16_unpack_raw(&p, a);
4539     p.frac <<= bfloat16_params.frac_shift;
4540     parts_silence_nan(&p, status);
4541     p.frac >>= bfloat16_params.frac_shift;
4542     return bfloat16_pack_raw(&p);
4543 }
4544 
4545 float128 float128_silence_nan(float128 a, float_status *status)
4546 {
4547     FloatParts128 p;
4548 
4549     float128_unpack_raw(&p, a);
4550     frac_shl(&p, float128_params.frac_shift);
4551     parts_silence_nan(&p, status);
4552     frac_shr(&p, float128_params.frac_shift);
4553     return float128_pack_raw(&p);
4554 }
4555 
4556 /*----------------------------------------------------------------------------
4557 | If `a' is denormal and we are in flush-to-zero mode then set the
4558 | input-denormal exception and return zero. Otherwise just return the value.
4559 *----------------------------------------------------------------------------*/
4560 
4561 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4562 {
4563     if (p.exp == 0 && p.frac != 0) {
4564         float_raise(float_flag_input_denormal, status);
4565         return true;
4566     }
4567 
4568     return false;
4569 }
4570 
4571 float16 float16_squash_input_denormal(float16 a, float_status *status)
4572 {
4573     if (status->flush_inputs_to_zero) {
4574         FloatParts64 p;
4575 
4576         float16_unpack_raw(&p, a);
4577         if (parts_squash_denormal(p, status)) {
4578             return float16_set_sign(float16_zero, p.sign);
4579         }
4580     }
4581     return a;
4582 }
4583 
4584 float32 float32_squash_input_denormal(float32 a, float_status *status)
4585 {
4586     if (status->flush_inputs_to_zero) {
4587         FloatParts64 p;
4588 
4589         float32_unpack_raw(&p, a);
4590         if (parts_squash_denormal(p, status)) {
4591             return float32_set_sign(float32_zero, p.sign);
4592         }
4593     }
4594     return a;
4595 }
4596 
4597 float64 float64_squash_input_denormal(float64 a, float_status *status)
4598 {
4599     if (status->flush_inputs_to_zero) {
4600         FloatParts64 p;
4601 
4602         float64_unpack_raw(&p, a);
4603         if (parts_squash_denormal(p, status)) {
4604             return float64_set_sign(float64_zero, p.sign);
4605         }
4606     }
4607     return a;
4608 }
4609 
4610 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4611 {
4612     if (status->flush_inputs_to_zero) {
4613         FloatParts64 p;
4614 
4615         bfloat16_unpack_raw(&p, a);
4616         if (parts_squash_denormal(p, status)) {
4617             return bfloat16_set_sign(bfloat16_zero, p.sign);
4618         }
4619     }
4620     return a;
4621 }
4622 
4623 /*----------------------------------------------------------------------------
4624 | Normalizes the subnormal extended double-precision floating-point value
4625 | represented by the denormalized significand `aSig'.  The normalized exponent
4626 | and significand are stored at the locations pointed to by `zExpPtr' and
4627 | `zSigPtr', respectively.
4628 *----------------------------------------------------------------------------*/
4629 
4630 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4631                                 uint64_t *zSigPtr)
4632 {
4633     int8_t shiftCount;
4634 
4635     shiftCount = clz64(aSig);
4636     *zSigPtr = aSig<<shiftCount;
4637     *zExpPtr = 1 - shiftCount;
4638 }
4639 
4640 /*----------------------------------------------------------------------------
4641 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4642 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4643 | and returns the proper extended double-precision floating-point value
4644 | corresponding to the abstract input.  Ordinarily, the abstract value is
4645 | rounded and packed into the extended double-precision format, with the
4646 | inexact exception raised if the abstract input cannot be represented
4647 | exactly.  However, if the abstract value is too large, the overflow and
4648 | inexact exceptions are raised and an infinity or maximal finite value is
4649 | returned.  If the abstract value is too small, the input value is rounded to
4650 | a subnormal number, and the underflow and inexact exceptions are raised if
4651 | the abstract input cannot be represented exactly as a subnormal extended
4652 | double-precision floating-point number.
4653 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4654 | the result is rounded to the same number of bits as single or double
4655 | precision, respectively.  Otherwise, the result is rounded to the full
4656 | precision of the extended double-precision format.
4657 |     The input significand must be normalized or smaller.  If the input
4658 | significand is not normalized, `zExp' must be 0; in that case, the result
4659 | returned is a subnormal number, and it must not require rounding.  The
4660 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4661 | Floating-Point Arithmetic.
4662 *----------------------------------------------------------------------------*/
4663 
4664 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4665                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4666                               float_status *status)
4667 {
4668     FloatRoundMode roundingMode;
4669     bool roundNearestEven, increment, isTiny;
4670     int64_t roundIncrement, roundMask, roundBits;
4671 
4672     roundingMode = status->float_rounding_mode;
4673     roundNearestEven = ( roundingMode == float_round_nearest_even );
4674     switch (roundingPrecision) {
4675     case floatx80_precision_x:
4676         goto precision80;
4677     case floatx80_precision_d:
4678         roundIncrement = UINT64_C(0x0000000000000400);
4679         roundMask = UINT64_C(0x00000000000007FF);
4680         break;
4681     case floatx80_precision_s:
4682         roundIncrement = UINT64_C(0x0000008000000000);
4683         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4684         break;
4685     default:
4686         g_assert_not_reached();
4687     }
4688     zSig0 |= ( zSig1 != 0 );
4689     switch (roundingMode) {
4690     case float_round_nearest_even:
4691     case float_round_ties_away:
4692         break;
4693     case float_round_to_zero:
4694         roundIncrement = 0;
4695         break;
4696     case float_round_up:
4697         roundIncrement = zSign ? 0 : roundMask;
4698         break;
4699     case float_round_down:
4700         roundIncrement = zSign ? roundMask : 0;
4701         break;
4702     default:
4703         abort();
4704     }
4705     roundBits = zSig0 & roundMask;
4706     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4707         if (    ( 0x7FFE < zExp )
4708              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4709            ) {
4710             goto overflow;
4711         }
4712         if ( zExp <= 0 ) {
4713             if (status->flush_to_zero) {
4714                 float_raise(float_flag_output_denormal, status);
4715                 return packFloatx80(zSign, 0, 0);
4716             }
4717             isTiny = status->tininess_before_rounding
4718                   || (zExp < 0 )
4719                   || (zSig0 <= zSig0 + roundIncrement);
4720             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4721             zExp = 0;
4722             roundBits = zSig0 & roundMask;
4723             if (isTiny && roundBits) {
4724                 float_raise(float_flag_underflow, status);
4725             }
4726             if (roundBits) {
4727                 float_raise(float_flag_inexact, status);
4728             }
4729             zSig0 += roundIncrement;
4730             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4731             roundIncrement = roundMask + 1;
4732             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4733                 roundMask |= roundIncrement;
4734             }
4735             zSig0 &= ~ roundMask;
4736             return packFloatx80( zSign, zExp, zSig0 );
4737         }
4738     }
4739     if (roundBits) {
4740         float_raise(float_flag_inexact, status);
4741     }
4742     zSig0 += roundIncrement;
4743     if ( zSig0 < roundIncrement ) {
4744         ++zExp;
4745         zSig0 = UINT64_C(0x8000000000000000);
4746     }
4747     roundIncrement = roundMask + 1;
4748     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4749         roundMask |= roundIncrement;
4750     }
4751     zSig0 &= ~ roundMask;
4752     if ( zSig0 == 0 ) zExp = 0;
4753     return packFloatx80( zSign, zExp, zSig0 );
4754  precision80:
4755     switch (roundingMode) {
4756     case float_round_nearest_even:
4757     case float_round_ties_away:
4758         increment = ((int64_t)zSig1 < 0);
4759         break;
4760     case float_round_to_zero:
4761         increment = 0;
4762         break;
4763     case float_round_up:
4764         increment = !zSign && zSig1;
4765         break;
4766     case float_round_down:
4767         increment = zSign && zSig1;
4768         break;
4769     default:
4770         abort();
4771     }
4772     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4773         if (    ( 0x7FFE < zExp )
4774              || (    ( zExp == 0x7FFE )
4775                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4776                   && increment
4777                 )
4778            ) {
4779             roundMask = 0;
4780  overflow:
4781             float_raise(float_flag_overflow | float_flag_inexact, status);
4782             if (    ( roundingMode == float_round_to_zero )
4783                  || ( zSign && ( roundingMode == float_round_up ) )
4784                  || ( ! zSign && ( roundingMode == float_round_down ) )
4785                ) {
4786                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4787             }
4788             return packFloatx80(zSign,
4789                                 floatx80_infinity_high,
4790                                 floatx80_infinity_low);
4791         }
4792         if ( zExp <= 0 ) {
4793             isTiny = status->tininess_before_rounding
4794                   || (zExp < 0)
4795                   || !increment
4796                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4797             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4798             zExp = 0;
4799             if (isTiny && zSig1) {
4800                 float_raise(float_flag_underflow, status);
4801             }
4802             if (zSig1) {
4803                 float_raise(float_flag_inexact, status);
4804             }
4805             switch (roundingMode) {
4806             case float_round_nearest_even:
4807             case float_round_ties_away:
4808                 increment = ((int64_t)zSig1 < 0);
4809                 break;
4810             case float_round_to_zero:
4811                 increment = 0;
4812                 break;
4813             case float_round_up:
4814                 increment = !zSign && zSig1;
4815                 break;
4816             case float_round_down:
4817                 increment = zSign && zSig1;
4818                 break;
4819             default:
4820                 abort();
4821             }
4822             if ( increment ) {
4823                 ++zSig0;
4824                 if (!(zSig1 << 1) && roundNearestEven) {
4825                     zSig0 &= ~1;
4826                 }
4827                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4828             }
4829             return packFloatx80( zSign, zExp, zSig0 );
4830         }
4831     }
4832     if (zSig1) {
4833         float_raise(float_flag_inexact, status);
4834     }
4835     if ( increment ) {
4836         ++zSig0;
4837         if ( zSig0 == 0 ) {
4838             ++zExp;
4839             zSig0 = UINT64_C(0x8000000000000000);
4840         }
4841         else {
4842             if (!(zSig1 << 1) && roundNearestEven) {
4843                 zSig0 &= ~1;
4844             }
4845         }
4846     }
4847     else {
4848         if ( zSig0 == 0 ) zExp = 0;
4849     }
4850     return packFloatx80( zSign, zExp, zSig0 );
4851 
4852 }
4853 
4854 /*----------------------------------------------------------------------------
4855 | Takes an abstract floating-point value having sign `zSign', exponent
4856 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4857 | and returns the proper extended double-precision floating-point value
4858 | corresponding to the abstract input.  This routine is just like
4859 | `roundAndPackFloatx80' except that the input significand does not have to be
4860 | normalized.
4861 *----------------------------------------------------------------------------*/
4862 
4863 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4864                                        bool zSign, int32_t zExp,
4865                                        uint64_t zSig0, uint64_t zSig1,
4866                                        float_status *status)
4867 {
4868     int8_t shiftCount;
4869 
4870     if ( zSig0 == 0 ) {
4871         zSig0 = zSig1;
4872         zSig1 = 0;
4873         zExp -= 64;
4874     }
4875     shiftCount = clz64(zSig0);
4876     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4877     zExp -= shiftCount;
4878     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4879                                 zSig0, zSig1, status);
4880 
4881 }
4882 
4883 /*----------------------------------------------------------------------------
4884 | Returns the binary exponential of the single-precision floating-point value
4885 | `a'. The operation is performed according to the IEC/IEEE Standard for
4886 | Binary Floating-Point Arithmetic.
4887 |
4888 | Uses the following identities:
4889 |
4890 | 1. -------------------------------------------------------------------------
4891 |      x    x*ln(2)
4892 |     2  = e
4893 |
4894 | 2. -------------------------------------------------------------------------
4895 |                      2     3     4     5           n
4896 |      x        x     x     x     x     x           x
4897 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4898 |               1!    2!    3!    4!    5!          n!
4899 *----------------------------------------------------------------------------*/
4900 
4901 static const float64 float32_exp2_coefficients[15] =
4902 {
4903     const_float64( 0x3ff0000000000000ll ), /*  1 */
4904     const_float64( 0x3fe0000000000000ll ), /*  2 */
4905     const_float64( 0x3fc5555555555555ll ), /*  3 */
4906     const_float64( 0x3fa5555555555555ll ), /*  4 */
4907     const_float64( 0x3f81111111111111ll ), /*  5 */
4908     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4909     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4910     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4911     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4912     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4913     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4914     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4915     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4916     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4917     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4918 };
4919 
4920 float32 float32_exp2(float32 a, float_status *status)
4921 {
4922     FloatParts64 xp, xnp, tp, rp;
4923     int i;
4924 
4925     float32_unpack_canonical(&xp, a, status);
4926     if (unlikely(xp.cls != float_class_normal)) {
4927         switch (xp.cls) {
4928         case float_class_snan:
4929         case float_class_qnan:
4930             parts_return_nan(&xp, status);
4931             return float32_round_pack_canonical(&xp, status);
4932         case float_class_inf:
4933             return xp.sign ? float32_zero : a;
4934         case float_class_zero:
4935             return float32_one;
4936         default:
4937             break;
4938         }
4939         g_assert_not_reached();
4940     }
4941 
4942     float_raise(float_flag_inexact, status);
4943 
4944     float64_unpack_canonical(&tp, float64_ln2, status);
4945     xp = *parts_mul(&xp, &tp, status);
4946     xnp = xp;
4947 
4948     float64_unpack_canonical(&rp, float64_one, status);
4949     for (i = 0 ; i < 15 ; i++) {
4950         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4951         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4952         xnp = *parts_mul(&xnp, &xp, status);
4953     }
4954 
4955     return float32_round_pack_canonical(&rp, status);
4956 }
4957 
4958 /*----------------------------------------------------------------------------
4959 | Rounds the extended double-precision floating-point value `a'
4960 | to the precision provided by floatx80_rounding_precision and returns the
4961 | result as an extended double-precision floating-point value.
4962 | The operation is performed according to the IEC/IEEE Standard for Binary
4963 | Floating-Point Arithmetic.
4964 *----------------------------------------------------------------------------*/
4965 
4966 floatx80 floatx80_round(floatx80 a, float_status *status)
4967 {
4968     FloatParts128 p;
4969 
4970     if (!floatx80_unpack_canonical(&p, a, status)) {
4971         return floatx80_default_nan(status);
4972     }
4973     return floatx80_round_pack_canonical(&p, status);
4974 }
4975 
4976 static void __attribute__((constructor)) softfloat_init(void)
4977 {
4978     union_float64 ua, ub, uc, ur;
4979 
4980     if (QEMU_NO_HARDFLOAT) {
4981         return;
4982     }
4983     /*
4984      * Test that the host's FMA is not obviously broken. For example,
4985      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4986      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4987      */
4988     ua.s = 0x0020000000000001ULL;
4989     ub.s = 0x3ca0000000000000ULL;
4990     uc.s = 0x0020000000000000ULL;
4991     ur.h = fma(ua.h, ub.h, uc.h);
4992     if (ur.s != 0x0020000000000001ULL) {
4993         force_soft_fma = true;
4994     }
4995 }
4996