xref: /openbmc/qemu/fpu/softfloat.c (revision 96916f36)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439 };
440 
441 /* Simple helpers for checking if, or what kind of, NaN we have */
442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
443 {
444     return unlikely(c >= float_class_qnan);
445 }
446 
447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
448 {
449     return c == float_class_snan;
450 }
451 
452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
453 {
454     return c == float_class_qnan;
455 }
456 
457 /*
458  * Structure holding all of the decomposed parts of a float.
459  * The exponent is unbiased and the fraction is normalized.
460  *
461  * The fraction words are stored in big-endian word ordering,
462  * so that truncation from a larger format to a smaller format
463  * can be done simply by ignoring subsequent elements.
464  */
465 
466 typedef struct {
467     FloatClass cls;
468     bool sign;
469     int32_t exp;
470     union {
471         /* Routines that know the structure may reference the singular name. */
472         uint64_t frac;
473         /*
474          * Routines expanded with multiple structures reference "hi" and "lo"
475          * depending on the operation.  In FloatParts64, "hi" and "lo" are
476          * both the same word and aliased here.
477          */
478         uint64_t frac_hi;
479         uint64_t frac_lo;
480     };
481 } FloatParts64;
482 
483 typedef struct {
484     FloatClass cls;
485     bool sign;
486     int32_t exp;
487     uint64_t frac_hi;
488     uint64_t frac_lo;
489 } FloatParts128;
490 
491 typedef struct {
492     FloatClass cls;
493     bool sign;
494     int32_t exp;
495     uint64_t frac_hi;
496     uint64_t frac_hm;  /* high-middle */
497     uint64_t frac_lm;  /* low-middle */
498     uint64_t frac_lo;
499 } FloatParts256;
500 
501 /* These apply to the most significant word of each FloatPartsN. */
502 #define DECOMPOSED_BINARY_POINT    63
503 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
504 
505 /* Structure holding all of the relevant parameters for a format.
506  *   exp_size: the size of the exponent field
507  *   exp_bias: the offset applied to the exponent field
508  *   exp_max: the maximum normalised exponent
509  *   frac_size: the size of the fraction field
510  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
511  * The following are computed based the size of fraction
512  *   round_mask: bits below lsb which must be rounded
513  * The following optional modifiers are available:
514  *   arm_althp: handle ARM Alternative Half Precision
515  */
516 typedef struct {
517     int exp_size;
518     int exp_bias;
519     int exp_max;
520     int frac_size;
521     int frac_shift;
522     bool arm_althp;
523     uint64_t round_mask;
524 } FloatFmt;
525 
526 /* Expand fields based on the size of exponent and fraction */
527 #define FLOAT_PARAMS_(E)                                \
528     .exp_size       = E,                                \
529     .exp_bias       = ((1 << E) - 1) >> 1,              \
530     .exp_max        = (1 << E) - 1
531 
532 #define FLOAT_PARAMS(E, F)                              \
533     FLOAT_PARAMS_(E),                                   \
534     .frac_size      = F,                                \
535     .frac_shift     = (-F - 1) & 63,                    \
536     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
537 
538 static const FloatFmt float16_params = {
539     FLOAT_PARAMS(5, 10)
540 };
541 
542 static const FloatFmt float16_params_ahp = {
543     FLOAT_PARAMS(5, 10),
544     .arm_althp = true
545 };
546 
547 static const FloatFmt bfloat16_params = {
548     FLOAT_PARAMS(8, 7)
549 };
550 
551 static const FloatFmt float32_params = {
552     FLOAT_PARAMS(8, 23)
553 };
554 
555 static const FloatFmt float64_params = {
556     FLOAT_PARAMS(11, 52)
557 };
558 
559 static const FloatFmt float128_params = {
560     FLOAT_PARAMS(15, 112)
561 };
562 
563 #define FLOATX80_PARAMS(R)              \
564     FLOAT_PARAMS_(15),                  \
565     .frac_size = R == 64 ? 63 : R,      \
566     .frac_shift = 0,                    \
567     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
568 
569 static const FloatFmt floatx80_params[3] = {
570     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
571     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
572     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
573 };
574 
575 /* Unpack a float to parts, but do not canonicalize.  */
576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
577 {
578     const int f_size = fmt->frac_size;
579     const int e_size = fmt->exp_size;
580 
581     *r = (FloatParts64) {
582         .cls = float_class_unclassified,
583         .sign = extract64(raw, f_size + e_size, 1),
584         .exp = extract64(raw, f_size, e_size),
585         .frac = extract64(raw, 0, f_size)
586     };
587 }
588 
589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
590 {
591     unpack_raw64(p, &float16_params, f);
592 }
593 
594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
595 {
596     unpack_raw64(p, &bfloat16_params, f);
597 }
598 
599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
600 {
601     unpack_raw64(p, &float32_params, f);
602 }
603 
604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
605 {
606     unpack_raw64(p, &float64_params, f);
607 }
608 
609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
610 {
611     *p = (FloatParts128) {
612         .cls = float_class_unclassified,
613         .sign = extract32(f.high, 15, 1),
614         .exp = extract32(f.high, 0, 15),
615         .frac_hi = f.low
616     };
617 }
618 
619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
620 {
621     const int f_size = float128_params.frac_size - 64;
622     const int e_size = float128_params.exp_size;
623 
624     *p = (FloatParts128) {
625         .cls = float_class_unclassified,
626         .sign = extract64(f.high, f_size + e_size, 1),
627         .exp = extract64(f.high, f_size, e_size),
628         .frac_hi = extract64(f.high, 0, f_size),
629         .frac_lo = f.low,
630     };
631 }
632 
633 /* Pack a float from parts, but do not canonicalize.  */
634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
635 {
636     const int f_size = fmt->frac_size;
637     const int e_size = fmt->exp_size;
638     uint64_t ret;
639 
640     ret = (uint64_t)p->sign << (f_size + e_size);
641     ret = deposit64(ret, f_size, e_size, p->exp);
642     ret = deposit64(ret, 0, f_size, p->frac);
643     return ret;
644 }
645 
646 static inline float16 float16_pack_raw(const FloatParts64 *p)
647 {
648     return make_float16(pack_raw64(p, &float16_params));
649 }
650 
651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
652 {
653     return pack_raw64(p, &bfloat16_params);
654 }
655 
656 static inline float32 float32_pack_raw(const FloatParts64 *p)
657 {
658     return make_float32(pack_raw64(p, &float32_params));
659 }
660 
661 static inline float64 float64_pack_raw(const FloatParts64 *p)
662 {
663     return make_float64(pack_raw64(p, &float64_params));
664 }
665 
666 static float128 float128_pack_raw(const FloatParts128 *p)
667 {
668     const int f_size = float128_params.frac_size - 64;
669     const int e_size = float128_params.exp_size;
670     uint64_t hi;
671 
672     hi = (uint64_t)p->sign << (f_size + e_size);
673     hi = deposit64(hi, f_size, e_size, p->exp);
674     hi = deposit64(hi, 0, f_size, p->frac_hi);
675     return make_float128(hi, p->frac_lo);
676 }
677 
678 /*----------------------------------------------------------------------------
679 | Functions and definitions to determine:  (1) whether tininess for underflow
680 | is detected before or after rounding by default, (2) what (if anything)
681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
683 | are propagated from function inputs to output.  These details are target-
684 | specific.
685 *----------------------------------------------------------------------------*/
686 #include "softfloat-specialize.c.inc"
687 
688 #define PARTS_GENERIC_64_128(NAME, P) \
689     _Generic((P), FloatParts64 *: parts64_##NAME, \
690                   FloatParts128 *: parts128_##NAME)
691 
692 #define PARTS_GENERIC_64_128_256(NAME, P) \
693     _Generic((P), FloatParts64 *: parts64_##NAME, \
694                   FloatParts128 *: parts128_##NAME, \
695                   FloatParts256 *: parts256_##NAME)
696 
697 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
698 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
699 
700 static void parts64_return_nan(FloatParts64 *a, float_status *s);
701 static void parts128_return_nan(FloatParts128 *a, float_status *s);
702 
703 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
704 
705 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
706                                       float_status *s);
707 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
708                                         float_status *s);
709 
710 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
711 
712 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
713                                              FloatParts64 *c, float_status *s,
714                                              int ab_mask, int abc_mask);
715 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
716                                                FloatParts128 *b,
717                                                FloatParts128 *c,
718                                                float_status *s,
719                                                int ab_mask, int abc_mask);
720 
721 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
722     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
723 
724 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
725                                  const FloatFmt *fmt);
726 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
727                                   const FloatFmt *fmt);
728 
729 #define parts_canonicalize(A, S, F) \
730     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
731 
732 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
733                                    const FloatFmt *fmt);
734 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
735                                     const FloatFmt *fmt);
736 
737 #define parts_uncanon_normal(A, S, F) \
738     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
739 
740 static void parts64_uncanon(FloatParts64 *p, float_status *status,
741                             const FloatFmt *fmt);
742 static void parts128_uncanon(FloatParts128 *p, float_status *status,
743                              const FloatFmt *fmt);
744 
745 #define parts_uncanon(A, S, F) \
746     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
747 
748 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
749 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
750 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
751 
752 #define parts_add_normal(A, B) \
753     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
754 
755 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
756 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
757 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
758 
759 #define parts_sub_normal(A, B) \
760     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
761 
762 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
763                                     float_status *s, bool subtract);
764 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
765                                       float_status *s, bool subtract);
766 
767 #define parts_addsub(A, B, S, Z) \
768     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
769 
770 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
771                                  float_status *s);
772 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
773                                    float_status *s);
774 
775 #define parts_mul(A, B, S) \
776     PARTS_GENERIC_64_128(mul, A)(A, B, S)
777 
778 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
779                                     FloatParts64 *c, int flags,
780                                     float_status *s);
781 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
782                                       FloatParts128 *c, int flags,
783                                       float_status *s);
784 
785 #define parts_muladd(A, B, C, Z, S) \
786     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
787 
788 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_div(A, B, S) \
794     PARTS_GENERIC_64_128(div, A)(A, B, S)
795 
796 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
797                                     uint64_t *mod_quot, float_status *s);
798 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
799                                       uint64_t *mod_quot, float_status *s);
800 
801 #define parts_modrem(A, B, Q, S) \
802     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
803 
804 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
805 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
806 
807 #define parts_sqrt(A, S, F) \
808     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
809 
810 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
811                                         int scale, int frac_size);
812 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
813                                          int scale, int frac_size);
814 
815 #define parts_round_to_int_normal(A, R, C, F) \
816     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
817 
818 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
819                                  int scale, float_status *s,
820                                  const FloatFmt *fmt);
821 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
822                                   int scale, float_status *s,
823                                   const FloatFmt *fmt);
824 
825 #define parts_round_to_int(A, R, C, S, F) \
826     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
827 
828 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
829                                      int scale, int64_t min, int64_t max,
830                                      float_status *s);
831 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
832                                      int scale, int64_t min, int64_t max,
833                                      float_status *s);
834 
835 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
836     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
837 
838 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
839                                       int scale, uint64_t max,
840                                       float_status *s);
841 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
842                                        int scale, uint64_t max,
843                                        float_status *s);
844 
845 #define parts_float_to_uint(P, R, Z, M, S) \
846     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
847 
848 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
849                                   int scale, float_status *s);
850 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
851                                    int scale, float_status *s);
852 
853 #define parts_sint_to_float(P, I, Z, S) \
854     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
855 
856 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
857                                   int scale, float_status *s);
858 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
859                                    int scale, float_status *s);
860 
861 #define parts_uint_to_float(P, I, Z, S) \
862     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
863 
864 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
865                                     float_status *s, int flags);
866 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
867                                       float_status *s, int flags);
868 
869 #define parts_minmax(A, B, S, F) \
870     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
871 
872 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
873                            float_status *s, bool q);
874 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
875                             float_status *s, bool q);
876 
877 #define parts_compare(A, B, S, Q) \
878     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
879 
880 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
881 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
882 
883 #define parts_scalbn(A, N, S) \
884     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
885 
886 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
887 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
888 
889 #define parts_log2(A, S, F) \
890     PARTS_GENERIC_64_128(log2, A)(A, S, F)
891 
892 /*
893  * Helper functions for softfloat-parts.c.inc, per-size operations.
894  */
895 
896 #define FRAC_GENERIC_64_128(NAME, P) \
897     _Generic((P), FloatParts64 *: frac64_##NAME, \
898                   FloatParts128 *: frac128_##NAME)
899 
900 #define FRAC_GENERIC_64_128_256(NAME, P) \
901     _Generic((P), FloatParts64 *: frac64_##NAME, \
902                   FloatParts128 *: frac128_##NAME, \
903                   FloatParts256 *: frac256_##NAME)
904 
905 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
906 {
907     return uadd64_overflow(a->frac, b->frac, &r->frac);
908 }
909 
910 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
911 {
912     bool c = 0;
913     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
914     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
915     return c;
916 }
917 
918 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
919 {
920     bool c = 0;
921     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
922     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
923     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
924     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
925     return c;
926 }
927 
928 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
929 
930 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
931 {
932     return uadd64_overflow(a->frac, c, &r->frac);
933 }
934 
935 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
936 {
937     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
938     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
939 }
940 
941 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
942 
943 static void frac64_allones(FloatParts64 *a)
944 {
945     a->frac = -1;
946 }
947 
948 static void frac128_allones(FloatParts128 *a)
949 {
950     a->frac_hi = a->frac_lo = -1;
951 }
952 
953 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
954 
955 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
956 {
957     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
958 }
959 
960 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
961 {
962     uint64_t ta = a->frac_hi, tb = b->frac_hi;
963     if (ta == tb) {
964         ta = a->frac_lo, tb = b->frac_lo;
965         if (ta == tb) {
966             return 0;
967         }
968     }
969     return ta < tb ? -1 : 1;
970 }
971 
972 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
973 
974 static void frac64_clear(FloatParts64 *a)
975 {
976     a->frac = 0;
977 }
978 
979 static void frac128_clear(FloatParts128 *a)
980 {
981     a->frac_hi = a->frac_lo = 0;
982 }
983 
984 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
985 
986 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
987 {
988     uint64_t n1, n0, r, q;
989     bool ret;
990 
991     /*
992      * We want a 2*N / N-bit division to produce exactly an N-bit
993      * result, so that we do not lose any precision and so that we
994      * do not have to renormalize afterward.  If A.frac < B.frac,
995      * then division would produce an (N-1)-bit result; shift A left
996      * by one to produce the an N-bit result, and return true to
997      * decrement the exponent to match.
998      *
999      * The udiv_qrnnd algorithm that we're using requires normalization,
1000      * i.e. the msb of the denominator must be set, which is already true.
1001      */
1002     ret = a->frac < b->frac;
1003     if (ret) {
1004         n0 = a->frac;
1005         n1 = 0;
1006     } else {
1007         n0 = a->frac >> 1;
1008         n1 = a->frac << 63;
1009     }
1010     q = udiv_qrnnd(&r, n0, n1, b->frac);
1011 
1012     /* Set lsb if there is a remainder, to set inexact. */
1013     a->frac = q | (r != 0);
1014 
1015     return ret;
1016 }
1017 
1018 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1019 {
1020     uint64_t q0, q1, a0, a1, b0, b1;
1021     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1022     bool ret = false;
1023 
1024     a0 = a->frac_hi, a1 = a->frac_lo;
1025     b0 = b->frac_hi, b1 = b->frac_lo;
1026 
1027     ret = lt128(a0, a1, b0, b1);
1028     if (!ret) {
1029         a1 = shr_double(a0, a1, 1);
1030         a0 = a0 >> 1;
1031     }
1032 
1033     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1034     q0 = estimateDiv128To64(a0, a1, b0);
1035 
1036     /*
1037      * Estimate is high because B1 was not included (unless B1 == 0).
1038      * Reduce quotient and increase remainder until remainder is non-negative.
1039      * This loop will execute 0 to 2 times.
1040      */
1041     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1042     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1043     while (r0 != 0) {
1044         q0--;
1045         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1046     }
1047 
1048     /* Repeat using the remainder, producing a second word of quotient. */
1049     q1 = estimateDiv128To64(r1, r2, b0);
1050     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1051     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1052     while (r1 != 0) {
1053         q1--;
1054         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1055     }
1056 
1057     /* Any remainder indicates inexact; set sticky bit. */
1058     q1 |= (r2 | r3) != 0;
1059 
1060     a->frac_hi = q0;
1061     a->frac_lo = q1;
1062     return ret;
1063 }
1064 
1065 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1066 
1067 static bool frac64_eqz(FloatParts64 *a)
1068 {
1069     return a->frac == 0;
1070 }
1071 
1072 static bool frac128_eqz(FloatParts128 *a)
1073 {
1074     return (a->frac_hi | a->frac_lo) == 0;
1075 }
1076 
1077 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1078 
1079 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1080 {
1081     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1082 }
1083 
1084 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1085 {
1086     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1087                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1088 }
1089 
1090 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1091 
1092 static void frac64_neg(FloatParts64 *a)
1093 {
1094     a->frac = -a->frac;
1095 }
1096 
1097 static void frac128_neg(FloatParts128 *a)
1098 {
1099     bool c = 0;
1100     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1101     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1102 }
1103 
1104 static void frac256_neg(FloatParts256 *a)
1105 {
1106     bool c = 0;
1107     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1108     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1109     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1110     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1111 }
1112 
1113 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1114 
1115 static int frac64_normalize(FloatParts64 *a)
1116 {
1117     if (a->frac) {
1118         int shift = clz64(a->frac);
1119         a->frac <<= shift;
1120         return shift;
1121     }
1122     return 64;
1123 }
1124 
1125 static int frac128_normalize(FloatParts128 *a)
1126 {
1127     if (a->frac_hi) {
1128         int shl = clz64(a->frac_hi);
1129         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1130         a->frac_lo <<= shl;
1131         return shl;
1132     } else if (a->frac_lo) {
1133         int shl = clz64(a->frac_lo);
1134         a->frac_hi = a->frac_lo << shl;
1135         a->frac_lo = 0;
1136         return shl + 64;
1137     }
1138     return 128;
1139 }
1140 
1141 static int frac256_normalize(FloatParts256 *a)
1142 {
1143     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1144     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1145     int ret, shl;
1146 
1147     if (likely(a0)) {
1148         shl = clz64(a0);
1149         if (shl == 0) {
1150             return 0;
1151         }
1152         ret = shl;
1153     } else {
1154         if (a1) {
1155             ret = 64;
1156             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1157         } else if (a2) {
1158             ret = 128;
1159             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1160         } else if (a3) {
1161             ret = 192;
1162             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1163         } else {
1164             ret = 256;
1165             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1166             goto done;
1167         }
1168         shl = clz64(a0);
1169         if (shl == 0) {
1170             goto done;
1171         }
1172         ret += shl;
1173     }
1174 
1175     a0 = shl_double(a0, a1, shl);
1176     a1 = shl_double(a1, a2, shl);
1177     a2 = shl_double(a2, a3, shl);
1178     a3 <<= shl;
1179 
1180  done:
1181     a->frac_hi = a0;
1182     a->frac_hm = a1;
1183     a->frac_lm = a2;
1184     a->frac_lo = a3;
1185     return ret;
1186 }
1187 
1188 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1189 
1190 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1191 {
1192     uint64_t a0, a1, b0, t0, t1, q, quot;
1193     int exp_diff = a->exp - b->exp;
1194     int shift;
1195 
1196     a0 = a->frac;
1197     a1 = 0;
1198 
1199     if (exp_diff < -1) {
1200         if (mod_quot) {
1201             *mod_quot = 0;
1202         }
1203         return;
1204     }
1205     if (exp_diff == -1) {
1206         a0 >>= 1;
1207         exp_diff = 0;
1208     }
1209 
1210     b0 = b->frac;
1211     quot = q = b0 <= a0;
1212     if (q) {
1213         a0 -= b0;
1214     }
1215 
1216     exp_diff -= 64;
1217     while (exp_diff > 0) {
1218         q = estimateDiv128To64(a0, a1, b0);
1219         q = q > 2 ? q - 2 : 0;
1220         mul64To128(b0, q, &t0, &t1);
1221         sub128(a0, a1, t0, t1, &a0, &a1);
1222         shortShift128Left(a0, a1, 62, &a0, &a1);
1223         exp_diff -= 62;
1224         quot = (quot << 62) + q;
1225     }
1226 
1227     exp_diff += 64;
1228     if (exp_diff > 0) {
1229         q = estimateDiv128To64(a0, a1, b0);
1230         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1231         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1232         sub128(a0, a1, t0, t1, &a0, &a1);
1233         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1234         while (le128(t0, t1, a0, a1)) {
1235             ++q;
1236             sub128(a0, a1, t0, t1, &a0, &a1);
1237         }
1238         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1239     } else {
1240         t0 = b0;
1241         t1 = 0;
1242     }
1243 
1244     if (mod_quot) {
1245         *mod_quot = quot;
1246     } else {
1247         sub128(t0, t1, a0, a1, &t0, &t1);
1248         if (lt128(t0, t1, a0, a1) ||
1249             (eq128(t0, t1, a0, a1) && (q & 1))) {
1250             a0 = t0;
1251             a1 = t1;
1252             a->sign = !a->sign;
1253         }
1254     }
1255 
1256     if (likely(a0)) {
1257         shift = clz64(a0);
1258         shortShift128Left(a0, a1, shift, &a0, &a1);
1259     } else if (likely(a1)) {
1260         shift = clz64(a1);
1261         a0 = a1 << shift;
1262         a1 = 0;
1263         shift += 64;
1264     } else {
1265         a->cls = float_class_zero;
1266         return;
1267     }
1268 
1269     a->exp = b->exp + exp_diff - shift;
1270     a->frac = a0 | (a1 != 0);
1271 }
1272 
1273 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1274                            uint64_t *mod_quot)
1275 {
1276     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1277     int exp_diff = a->exp - b->exp;
1278     int shift;
1279 
1280     a0 = a->frac_hi;
1281     a1 = a->frac_lo;
1282     a2 = 0;
1283 
1284     if (exp_diff < -1) {
1285         if (mod_quot) {
1286             *mod_quot = 0;
1287         }
1288         return;
1289     }
1290     if (exp_diff == -1) {
1291         shift128Right(a0, a1, 1, &a0, &a1);
1292         exp_diff = 0;
1293     }
1294 
1295     b0 = b->frac_hi;
1296     b1 = b->frac_lo;
1297 
1298     quot = q = le128(b0, b1, a0, a1);
1299     if (q) {
1300         sub128(a0, a1, b0, b1, &a0, &a1);
1301     }
1302 
1303     exp_diff -= 64;
1304     while (exp_diff > 0) {
1305         q = estimateDiv128To64(a0, a1, b0);
1306         q = q > 4 ? q - 4 : 0;
1307         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1308         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1309         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1310         exp_diff -= 61;
1311         quot = (quot << 61) + q;
1312     }
1313 
1314     exp_diff += 64;
1315     if (exp_diff > 0) {
1316         q = estimateDiv128To64(a0, a1, b0);
1317         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1318         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1319         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1321         while (le192(t0, t1, t2, a0, a1, a2)) {
1322             ++q;
1323             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1324         }
1325         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1326     } else {
1327         t0 = b0;
1328         t1 = b1;
1329         t2 = 0;
1330     }
1331 
1332     if (mod_quot) {
1333         *mod_quot = quot;
1334     } else {
1335         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1336         if (lt192(t0, t1, t2, a0, a1, a2) ||
1337             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1338             a0 = t0;
1339             a1 = t1;
1340             a2 = t2;
1341             a->sign = !a->sign;
1342         }
1343     }
1344 
1345     if (likely(a0)) {
1346         shift = clz64(a0);
1347         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1348     } else if (likely(a1)) {
1349         shift = clz64(a1);
1350         shortShift128Left(a1, a2, shift, &a0, &a1);
1351         a2 = 0;
1352         shift += 64;
1353     } else if (likely(a2)) {
1354         shift = clz64(a2);
1355         a0 = a2 << shift;
1356         a1 = a2 = 0;
1357         shift += 128;
1358     } else {
1359         a->cls = float_class_zero;
1360         return;
1361     }
1362 
1363     a->exp = b->exp + exp_diff - shift;
1364     a->frac_hi = a0;
1365     a->frac_lo = a1 | (a2 != 0);
1366 }
1367 
1368 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1369 
1370 static void frac64_shl(FloatParts64 *a, int c)
1371 {
1372     a->frac <<= c;
1373 }
1374 
1375 static void frac128_shl(FloatParts128 *a, int c)
1376 {
1377     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1378 
1379     if (c & 64) {
1380         a0 = a1, a1 = 0;
1381     }
1382 
1383     c &= 63;
1384     if (c) {
1385         a0 = shl_double(a0, a1, c);
1386         a1 = a1 << c;
1387     }
1388 
1389     a->frac_hi = a0;
1390     a->frac_lo = a1;
1391 }
1392 
1393 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1394 
1395 static void frac64_shr(FloatParts64 *a, int c)
1396 {
1397     a->frac >>= c;
1398 }
1399 
1400 static void frac128_shr(FloatParts128 *a, int c)
1401 {
1402     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1403 
1404     if (c & 64) {
1405         a1 = a0, a0 = 0;
1406     }
1407 
1408     c &= 63;
1409     if (c) {
1410         a1 = shr_double(a0, a1, c);
1411         a0 = a0 >> c;
1412     }
1413 
1414     a->frac_hi = a0;
1415     a->frac_lo = a1;
1416 }
1417 
1418 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1419 
1420 static void frac64_shrjam(FloatParts64 *a, int c)
1421 {
1422     uint64_t a0 = a->frac;
1423 
1424     if (likely(c != 0)) {
1425         if (likely(c < 64)) {
1426             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1427         } else {
1428             a0 = a0 != 0;
1429         }
1430         a->frac = a0;
1431     }
1432 }
1433 
1434 static void frac128_shrjam(FloatParts128 *a, int c)
1435 {
1436     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1437     uint64_t sticky = 0;
1438 
1439     if (unlikely(c == 0)) {
1440         return;
1441     } else if (likely(c < 64)) {
1442         /* nothing */
1443     } else if (likely(c < 128)) {
1444         sticky = a1;
1445         a1 = a0;
1446         a0 = 0;
1447         c &= 63;
1448         if (c == 0) {
1449             goto done;
1450         }
1451     } else {
1452         sticky = a0 | a1;
1453         a0 = a1 = 0;
1454         goto done;
1455     }
1456 
1457     sticky |= shr_double(a1, 0, c);
1458     a1 = shr_double(a0, a1, c);
1459     a0 = a0 >> c;
1460 
1461  done:
1462     a->frac_lo = a1 | (sticky != 0);
1463     a->frac_hi = a0;
1464 }
1465 
1466 static void frac256_shrjam(FloatParts256 *a, int c)
1467 {
1468     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1469     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1470     uint64_t sticky = 0;
1471 
1472     if (unlikely(c == 0)) {
1473         return;
1474     } else if (likely(c < 64)) {
1475         /* nothing */
1476     } else if (likely(c < 256)) {
1477         if (unlikely(c & 128)) {
1478             sticky |= a2 | a3;
1479             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1480         }
1481         if (unlikely(c & 64)) {
1482             sticky |= a3;
1483             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1484         }
1485         c &= 63;
1486         if (c == 0) {
1487             goto done;
1488         }
1489     } else {
1490         sticky = a0 | a1 | a2 | a3;
1491         a0 = a1 = a2 = a3 = 0;
1492         goto done;
1493     }
1494 
1495     sticky |= shr_double(a3, 0, c);
1496     a3 = shr_double(a2, a3, c);
1497     a2 = shr_double(a1, a2, c);
1498     a1 = shr_double(a0, a1, c);
1499     a0 = a0 >> c;
1500 
1501  done:
1502     a->frac_lo = a3 | (sticky != 0);
1503     a->frac_lm = a2;
1504     a->frac_hm = a1;
1505     a->frac_hi = a0;
1506 }
1507 
1508 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1509 
1510 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1511 {
1512     return usub64_overflow(a->frac, b->frac, &r->frac);
1513 }
1514 
1515 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1516 {
1517     bool c = 0;
1518     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1519     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1520     return c;
1521 }
1522 
1523 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1524 {
1525     bool c = 0;
1526     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1527     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1528     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1529     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1530     return c;
1531 }
1532 
1533 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1534 
1535 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1536 {
1537     r->frac = a->frac_hi | (a->frac_lo != 0);
1538 }
1539 
1540 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1541 {
1542     r->frac_hi = a->frac_hi;
1543     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1544 }
1545 
1546 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1547 
1548 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1549 {
1550     r->frac_hi = a->frac;
1551     r->frac_lo = 0;
1552 }
1553 
1554 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1555 {
1556     r->frac_hi = a->frac_hi;
1557     r->frac_hm = a->frac_lo;
1558     r->frac_lm = 0;
1559     r->frac_lo = 0;
1560 }
1561 
1562 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1563 
1564 /*
1565  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1566  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1567  * and thus MIT licenced.
1568  */
1569 static const uint16_t rsqrt_tab[128] = {
1570     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1571     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1572     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1573     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1574     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1575     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1576     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1577     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1578     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1579     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1580     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1581     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1582     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1583     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1584     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1585     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1586 };
1587 
1588 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1589 #define FloatPartsN    glue(FloatParts,N)
1590 #define FloatPartsW    glue(FloatParts,W)
1591 
1592 #define N 64
1593 #define W 128
1594 
1595 #include "softfloat-parts-addsub.c.inc"
1596 #include "softfloat-parts.c.inc"
1597 
1598 #undef  N
1599 #undef  W
1600 #define N 128
1601 #define W 256
1602 
1603 #include "softfloat-parts-addsub.c.inc"
1604 #include "softfloat-parts.c.inc"
1605 
1606 #undef  N
1607 #undef  W
1608 #define N            256
1609 
1610 #include "softfloat-parts-addsub.c.inc"
1611 
1612 #undef  N
1613 #undef  W
1614 #undef  partsN
1615 #undef  FloatPartsN
1616 #undef  FloatPartsW
1617 
1618 /*
1619  * Pack/unpack routines with a specific FloatFmt.
1620  */
1621 
1622 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1623                                       float_status *s, const FloatFmt *params)
1624 {
1625     float16_unpack_raw(p, f);
1626     parts_canonicalize(p, s, params);
1627 }
1628 
1629 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1630                                      float_status *s)
1631 {
1632     float16a_unpack_canonical(p, f, s, &float16_params);
1633 }
1634 
1635 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1636                                       float_status *s)
1637 {
1638     bfloat16_unpack_raw(p, f);
1639     parts_canonicalize(p, s, &bfloat16_params);
1640 }
1641 
1642 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1643                                              float_status *s,
1644                                              const FloatFmt *params)
1645 {
1646     parts_uncanon(p, s, params);
1647     return float16_pack_raw(p);
1648 }
1649 
1650 static float16 float16_round_pack_canonical(FloatParts64 *p,
1651                                             float_status *s)
1652 {
1653     return float16a_round_pack_canonical(p, s, &float16_params);
1654 }
1655 
1656 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1657                                               float_status *s)
1658 {
1659     parts_uncanon(p, s, &bfloat16_params);
1660     return bfloat16_pack_raw(p);
1661 }
1662 
1663 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1664                                      float_status *s)
1665 {
1666     float32_unpack_raw(p, f);
1667     parts_canonicalize(p, s, &float32_params);
1668 }
1669 
1670 static float32 float32_round_pack_canonical(FloatParts64 *p,
1671                                             float_status *s)
1672 {
1673     parts_uncanon(p, s, &float32_params);
1674     return float32_pack_raw(p);
1675 }
1676 
1677 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1678                                      float_status *s)
1679 {
1680     float64_unpack_raw(p, f);
1681     parts_canonicalize(p, s, &float64_params);
1682 }
1683 
1684 static float64 float64_round_pack_canonical(FloatParts64 *p,
1685                                             float_status *s)
1686 {
1687     parts_uncanon(p, s, &float64_params);
1688     return float64_pack_raw(p);
1689 }
1690 
1691 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1692                                       float_status *s)
1693 {
1694     float128_unpack_raw(p, f);
1695     parts_canonicalize(p, s, &float128_params);
1696 }
1697 
1698 static float128 float128_round_pack_canonical(FloatParts128 *p,
1699                                               float_status *s)
1700 {
1701     parts_uncanon(p, s, &float128_params);
1702     return float128_pack_raw(p);
1703 }
1704 
1705 /* Returns false if the encoding is invalid. */
1706 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1707                                       float_status *s)
1708 {
1709     /* Ensure rounding precision is set before beginning. */
1710     switch (s->floatx80_rounding_precision) {
1711     case floatx80_precision_x:
1712     case floatx80_precision_d:
1713     case floatx80_precision_s:
1714         break;
1715     default:
1716         g_assert_not_reached();
1717     }
1718 
1719     if (unlikely(floatx80_invalid_encoding(f))) {
1720         float_raise(float_flag_invalid, s);
1721         return false;
1722     }
1723 
1724     floatx80_unpack_raw(p, f);
1725 
1726     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1727         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1728     } else {
1729         /* The explicit integer bit is ignored, after invalid checks. */
1730         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1731         p->cls = (p->frac_hi == 0 ? float_class_inf
1732                   : parts_is_snan_frac(p->frac_hi, s)
1733                   ? float_class_snan : float_class_qnan);
1734     }
1735     return true;
1736 }
1737 
1738 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1739                                               float_status *s)
1740 {
1741     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1742     uint64_t frac;
1743     int exp;
1744 
1745     switch (p->cls) {
1746     case float_class_normal:
1747         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1748             parts_uncanon_normal(p, s, fmt);
1749             frac = p->frac_hi;
1750             exp = p->exp;
1751         } else {
1752             FloatParts64 p64;
1753 
1754             p64.sign = p->sign;
1755             p64.exp = p->exp;
1756             frac_truncjam(&p64, p);
1757             parts_uncanon_normal(&p64, s, fmt);
1758             frac = p64.frac;
1759             exp = p64.exp;
1760         }
1761         if (exp != fmt->exp_max) {
1762             break;
1763         }
1764         /* rounded to inf -- fall through to set frac correctly */
1765 
1766     case float_class_inf:
1767         /* x86 and m68k differ in the setting of the integer bit. */
1768         frac = floatx80_infinity_low;
1769         exp = fmt->exp_max;
1770         break;
1771 
1772     case float_class_zero:
1773         frac = 0;
1774         exp = 0;
1775         break;
1776 
1777     case float_class_snan:
1778     case float_class_qnan:
1779         /* NaNs have the integer bit set. */
1780         frac = p->frac_hi | (1ull << 63);
1781         exp = fmt->exp_max;
1782         break;
1783 
1784     default:
1785         g_assert_not_reached();
1786     }
1787 
1788     return packFloatx80(p->sign, exp, frac);
1789 }
1790 
1791 /*
1792  * Addition and subtraction
1793  */
1794 
1795 static float16 QEMU_FLATTEN
1796 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1797 {
1798     FloatParts64 pa, pb, *pr;
1799 
1800     float16_unpack_canonical(&pa, a, status);
1801     float16_unpack_canonical(&pb, b, status);
1802     pr = parts_addsub(&pa, &pb, status, subtract);
1803 
1804     return float16_round_pack_canonical(pr, status);
1805 }
1806 
1807 float16 float16_add(float16 a, float16 b, float_status *status)
1808 {
1809     return float16_addsub(a, b, status, false);
1810 }
1811 
1812 float16 float16_sub(float16 a, float16 b, float_status *status)
1813 {
1814     return float16_addsub(a, b, status, true);
1815 }
1816 
1817 static float32 QEMU_SOFTFLOAT_ATTR
1818 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1819 {
1820     FloatParts64 pa, pb, *pr;
1821 
1822     float32_unpack_canonical(&pa, a, status);
1823     float32_unpack_canonical(&pb, b, status);
1824     pr = parts_addsub(&pa, &pb, status, subtract);
1825 
1826     return float32_round_pack_canonical(pr, status);
1827 }
1828 
1829 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1830 {
1831     return soft_f32_addsub(a, b, status, false);
1832 }
1833 
1834 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1835 {
1836     return soft_f32_addsub(a, b, status, true);
1837 }
1838 
1839 static float64 QEMU_SOFTFLOAT_ATTR
1840 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1841 {
1842     FloatParts64 pa, pb, *pr;
1843 
1844     float64_unpack_canonical(&pa, a, status);
1845     float64_unpack_canonical(&pb, b, status);
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847 
1848     return float64_round_pack_canonical(pr, status);
1849 }
1850 
1851 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1852 {
1853     return soft_f64_addsub(a, b, status, false);
1854 }
1855 
1856 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1857 {
1858     return soft_f64_addsub(a, b, status, true);
1859 }
1860 
1861 static float hard_f32_add(float a, float b)
1862 {
1863     return a + b;
1864 }
1865 
1866 static float hard_f32_sub(float a, float b)
1867 {
1868     return a - b;
1869 }
1870 
1871 static double hard_f64_add(double a, double b)
1872 {
1873     return a + b;
1874 }
1875 
1876 static double hard_f64_sub(double a, double b)
1877 {
1878     return a - b;
1879 }
1880 
1881 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1882 {
1883     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1884         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1885     }
1886     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1887 }
1888 
1889 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1890 {
1891     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1892         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1893     } else {
1894         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1895     }
1896 }
1897 
1898 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1899                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1900 {
1901     return float32_gen2(a, b, s, hard, soft,
1902                         f32_is_zon2, f32_addsubmul_post);
1903 }
1904 
1905 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1906                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1907 {
1908     return float64_gen2(a, b, s, hard, soft,
1909                         f64_is_zon2, f64_addsubmul_post);
1910 }
1911 
1912 float32 QEMU_FLATTEN
1913 float32_add(float32 a, float32 b, float_status *s)
1914 {
1915     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1916 }
1917 
1918 float32 QEMU_FLATTEN
1919 float32_sub(float32 a, float32 b, float_status *s)
1920 {
1921     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1922 }
1923 
1924 float64 QEMU_FLATTEN
1925 float64_add(float64 a, float64 b, float_status *s)
1926 {
1927     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1928 }
1929 
1930 float64 QEMU_FLATTEN
1931 float64_sub(float64 a, float64 b, float_status *s)
1932 {
1933     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1934 }
1935 
1936 static bfloat16 QEMU_FLATTEN
1937 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1938 {
1939     FloatParts64 pa, pb, *pr;
1940 
1941     bfloat16_unpack_canonical(&pa, a, status);
1942     bfloat16_unpack_canonical(&pb, b, status);
1943     pr = parts_addsub(&pa, &pb, status, subtract);
1944 
1945     return bfloat16_round_pack_canonical(pr, status);
1946 }
1947 
1948 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1949 {
1950     return bfloat16_addsub(a, b, status, false);
1951 }
1952 
1953 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1954 {
1955     return bfloat16_addsub(a, b, status, true);
1956 }
1957 
1958 static float128 QEMU_FLATTEN
1959 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1960 {
1961     FloatParts128 pa, pb, *pr;
1962 
1963     float128_unpack_canonical(&pa, a, status);
1964     float128_unpack_canonical(&pb, b, status);
1965     pr = parts_addsub(&pa, &pb, status, subtract);
1966 
1967     return float128_round_pack_canonical(pr, status);
1968 }
1969 
1970 float128 float128_add(float128 a, float128 b, float_status *status)
1971 {
1972     return float128_addsub(a, b, status, false);
1973 }
1974 
1975 float128 float128_sub(float128 a, float128 b, float_status *status)
1976 {
1977     return float128_addsub(a, b, status, true);
1978 }
1979 
1980 static floatx80 QEMU_FLATTEN
1981 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1982 {
1983     FloatParts128 pa, pb, *pr;
1984 
1985     if (!floatx80_unpack_canonical(&pa, a, status) ||
1986         !floatx80_unpack_canonical(&pb, b, status)) {
1987         return floatx80_default_nan(status);
1988     }
1989 
1990     pr = parts_addsub(&pa, &pb, status, subtract);
1991     return floatx80_round_pack_canonical(pr, status);
1992 }
1993 
1994 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1995 {
1996     return floatx80_addsub(a, b, status, false);
1997 }
1998 
1999 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2000 {
2001     return floatx80_addsub(a, b, status, true);
2002 }
2003 
2004 /*
2005  * Multiplication
2006  */
2007 
2008 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2009 {
2010     FloatParts64 pa, pb, *pr;
2011 
2012     float16_unpack_canonical(&pa, a, status);
2013     float16_unpack_canonical(&pb, b, status);
2014     pr = parts_mul(&pa, &pb, status);
2015 
2016     return float16_round_pack_canonical(pr, status);
2017 }
2018 
2019 static float32 QEMU_SOFTFLOAT_ATTR
2020 soft_f32_mul(float32 a, float32 b, float_status *status)
2021 {
2022     FloatParts64 pa, pb, *pr;
2023 
2024     float32_unpack_canonical(&pa, a, status);
2025     float32_unpack_canonical(&pb, b, status);
2026     pr = parts_mul(&pa, &pb, status);
2027 
2028     return float32_round_pack_canonical(pr, status);
2029 }
2030 
2031 static float64 QEMU_SOFTFLOAT_ATTR
2032 soft_f64_mul(float64 a, float64 b, float_status *status)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     float64_unpack_canonical(&pa, a, status);
2037     float64_unpack_canonical(&pb, b, status);
2038     pr = parts_mul(&pa, &pb, status);
2039 
2040     return float64_round_pack_canonical(pr, status);
2041 }
2042 
2043 static float hard_f32_mul(float a, float b)
2044 {
2045     return a * b;
2046 }
2047 
2048 static double hard_f64_mul(double a, double b)
2049 {
2050     return a * b;
2051 }
2052 
2053 float32 QEMU_FLATTEN
2054 float32_mul(float32 a, float32 b, float_status *s)
2055 {
2056     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2057                         f32_is_zon2, f32_addsubmul_post);
2058 }
2059 
2060 float64 QEMU_FLATTEN
2061 float64_mul(float64 a, float64 b, float_status *s)
2062 {
2063     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2064                         f64_is_zon2, f64_addsubmul_post);
2065 }
2066 
2067 bfloat16 QEMU_FLATTEN
2068 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2069 {
2070     FloatParts64 pa, pb, *pr;
2071 
2072     bfloat16_unpack_canonical(&pa, a, status);
2073     bfloat16_unpack_canonical(&pb, b, status);
2074     pr = parts_mul(&pa, &pb, status);
2075 
2076     return bfloat16_round_pack_canonical(pr, status);
2077 }
2078 
2079 float128 QEMU_FLATTEN
2080 float128_mul(float128 a, float128 b, float_status *status)
2081 {
2082     FloatParts128 pa, pb, *pr;
2083 
2084     float128_unpack_canonical(&pa, a, status);
2085     float128_unpack_canonical(&pb, b, status);
2086     pr = parts_mul(&pa, &pb, status);
2087 
2088     return float128_round_pack_canonical(pr, status);
2089 }
2090 
2091 floatx80 QEMU_FLATTEN
2092 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2093 {
2094     FloatParts128 pa, pb, *pr;
2095 
2096     if (!floatx80_unpack_canonical(&pa, a, status) ||
2097         !floatx80_unpack_canonical(&pb, b, status)) {
2098         return floatx80_default_nan(status);
2099     }
2100 
2101     pr = parts_mul(&pa, &pb, status);
2102     return floatx80_round_pack_canonical(pr, status);
2103 }
2104 
2105 /*
2106  * Fused multiply-add
2107  */
2108 
2109 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2110                                     int flags, float_status *status)
2111 {
2112     FloatParts64 pa, pb, pc, *pr;
2113 
2114     float16_unpack_canonical(&pa, a, status);
2115     float16_unpack_canonical(&pb, b, status);
2116     float16_unpack_canonical(&pc, c, status);
2117     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2118 
2119     return float16_round_pack_canonical(pr, status);
2120 }
2121 
2122 static float32 QEMU_SOFTFLOAT_ATTR
2123 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2124                 float_status *status)
2125 {
2126     FloatParts64 pa, pb, pc, *pr;
2127 
2128     float32_unpack_canonical(&pa, a, status);
2129     float32_unpack_canonical(&pb, b, status);
2130     float32_unpack_canonical(&pc, c, status);
2131     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2132 
2133     return float32_round_pack_canonical(pr, status);
2134 }
2135 
2136 static float64 QEMU_SOFTFLOAT_ATTR
2137 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2138                 float_status *status)
2139 {
2140     FloatParts64 pa, pb, pc, *pr;
2141 
2142     float64_unpack_canonical(&pa, a, status);
2143     float64_unpack_canonical(&pb, b, status);
2144     float64_unpack_canonical(&pc, c, status);
2145     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2146 
2147     return float64_round_pack_canonical(pr, status);
2148 }
2149 
2150 static bool force_soft_fma;
2151 
2152 float32 QEMU_FLATTEN
2153 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2154 {
2155     union_float32 ua, ub, uc, ur;
2156 
2157     ua.s = xa;
2158     ub.s = xb;
2159     uc.s = xc;
2160 
2161     if (unlikely(!can_use_fpu(s))) {
2162         goto soft;
2163     }
2164     if (unlikely(flags & float_muladd_halve_result)) {
2165         goto soft;
2166     }
2167 
2168     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2169     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2170         goto soft;
2171     }
2172 
2173     if (unlikely(force_soft_fma)) {
2174         goto soft;
2175     }
2176 
2177     /*
2178      * When (a || b) == 0, there's no need to check for under/over flow,
2179      * since we know the addend is (normal || 0) and the product is 0.
2180      */
2181     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2182         union_float32 up;
2183         bool prod_sign;
2184 
2185         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2186         prod_sign ^= !!(flags & float_muladd_negate_product);
2187         up.s = float32_set_sign(float32_zero, prod_sign);
2188 
2189         if (flags & float_muladd_negate_c) {
2190             uc.h = -uc.h;
2191         }
2192         ur.h = up.h + uc.h;
2193     } else {
2194         union_float32 ua_orig = ua;
2195         union_float32 uc_orig = uc;
2196 
2197         if (flags & float_muladd_negate_product) {
2198             ua.h = -ua.h;
2199         }
2200         if (flags & float_muladd_negate_c) {
2201             uc.h = -uc.h;
2202         }
2203 
2204         ur.h = fmaf(ua.h, ub.h, uc.h);
2205 
2206         if (unlikely(f32_is_inf(ur))) {
2207             float_raise(float_flag_overflow, s);
2208         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2209             ua = ua_orig;
2210             uc = uc_orig;
2211             goto soft;
2212         }
2213     }
2214     if (flags & float_muladd_negate_result) {
2215         return float32_chs(ur.s);
2216     }
2217     return ur.s;
2218 
2219  soft:
2220     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2221 }
2222 
2223 float64 QEMU_FLATTEN
2224 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2225 {
2226     union_float64 ua, ub, uc, ur;
2227 
2228     ua.s = xa;
2229     ub.s = xb;
2230     uc.s = xc;
2231 
2232     if (unlikely(!can_use_fpu(s))) {
2233         goto soft;
2234     }
2235     if (unlikely(flags & float_muladd_halve_result)) {
2236         goto soft;
2237     }
2238 
2239     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2240     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2241         goto soft;
2242     }
2243 
2244     if (unlikely(force_soft_fma)) {
2245         goto soft;
2246     }
2247 
2248     /*
2249      * When (a || b) == 0, there's no need to check for under/over flow,
2250      * since we know the addend is (normal || 0) and the product is 0.
2251      */
2252     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2253         union_float64 up;
2254         bool prod_sign;
2255 
2256         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2257         prod_sign ^= !!(flags & float_muladd_negate_product);
2258         up.s = float64_set_sign(float64_zero, prod_sign);
2259 
2260         if (flags & float_muladd_negate_c) {
2261             uc.h = -uc.h;
2262         }
2263         ur.h = up.h + uc.h;
2264     } else {
2265         union_float64 ua_orig = ua;
2266         union_float64 uc_orig = uc;
2267 
2268         if (flags & float_muladd_negate_product) {
2269             ua.h = -ua.h;
2270         }
2271         if (flags & float_muladd_negate_c) {
2272             uc.h = -uc.h;
2273         }
2274 
2275         ur.h = fma(ua.h, ub.h, uc.h);
2276 
2277         if (unlikely(f64_is_inf(ur))) {
2278             float_raise(float_flag_overflow, s);
2279         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2280             ua = ua_orig;
2281             uc = uc_orig;
2282             goto soft;
2283         }
2284     }
2285     if (flags & float_muladd_negate_result) {
2286         return float64_chs(ur.s);
2287     }
2288     return ur.s;
2289 
2290  soft:
2291     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2292 }
2293 
2294 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2295                                       int flags, float_status *status)
2296 {
2297     FloatParts64 pa, pb, pc, *pr;
2298 
2299     bfloat16_unpack_canonical(&pa, a, status);
2300     bfloat16_unpack_canonical(&pb, b, status);
2301     bfloat16_unpack_canonical(&pc, c, status);
2302     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2303 
2304     return bfloat16_round_pack_canonical(pr, status);
2305 }
2306 
2307 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2308                                       int flags, float_status *status)
2309 {
2310     FloatParts128 pa, pb, pc, *pr;
2311 
2312     float128_unpack_canonical(&pa, a, status);
2313     float128_unpack_canonical(&pb, b, status);
2314     float128_unpack_canonical(&pc, c, status);
2315     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2316 
2317     return float128_round_pack_canonical(pr, status);
2318 }
2319 
2320 /*
2321  * Division
2322  */
2323 
2324 float16 float16_div(float16 a, float16 b, float_status *status)
2325 {
2326     FloatParts64 pa, pb, *pr;
2327 
2328     float16_unpack_canonical(&pa, a, status);
2329     float16_unpack_canonical(&pb, b, status);
2330     pr = parts_div(&pa, &pb, status);
2331 
2332     return float16_round_pack_canonical(pr, status);
2333 }
2334 
2335 static float32 QEMU_SOFTFLOAT_ATTR
2336 soft_f32_div(float32 a, float32 b, float_status *status)
2337 {
2338     FloatParts64 pa, pb, *pr;
2339 
2340     float32_unpack_canonical(&pa, a, status);
2341     float32_unpack_canonical(&pb, b, status);
2342     pr = parts_div(&pa, &pb, status);
2343 
2344     return float32_round_pack_canonical(pr, status);
2345 }
2346 
2347 static float64 QEMU_SOFTFLOAT_ATTR
2348 soft_f64_div(float64 a, float64 b, float_status *status)
2349 {
2350     FloatParts64 pa, pb, *pr;
2351 
2352     float64_unpack_canonical(&pa, a, status);
2353     float64_unpack_canonical(&pb, b, status);
2354     pr = parts_div(&pa, &pb, status);
2355 
2356     return float64_round_pack_canonical(pr, status);
2357 }
2358 
2359 static float hard_f32_div(float a, float b)
2360 {
2361     return a / b;
2362 }
2363 
2364 static double hard_f64_div(double a, double b)
2365 {
2366     return a / b;
2367 }
2368 
2369 static bool f32_div_pre(union_float32 a, union_float32 b)
2370 {
2371     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2372         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2373                fpclassify(b.h) == FP_NORMAL;
2374     }
2375     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2376 }
2377 
2378 static bool f64_div_pre(union_float64 a, union_float64 b)
2379 {
2380     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2381         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2382                fpclassify(b.h) == FP_NORMAL;
2383     }
2384     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2385 }
2386 
2387 static bool f32_div_post(union_float32 a, union_float32 b)
2388 {
2389     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2390         return fpclassify(a.h) != FP_ZERO;
2391     }
2392     return !float32_is_zero(a.s);
2393 }
2394 
2395 static bool f64_div_post(union_float64 a, union_float64 b)
2396 {
2397     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2398         return fpclassify(a.h) != FP_ZERO;
2399     }
2400     return !float64_is_zero(a.s);
2401 }
2402 
2403 float32 QEMU_FLATTEN
2404 float32_div(float32 a, float32 b, float_status *s)
2405 {
2406     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2407                         f32_div_pre, f32_div_post);
2408 }
2409 
2410 float64 QEMU_FLATTEN
2411 float64_div(float64 a, float64 b, float_status *s)
2412 {
2413     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2414                         f64_div_pre, f64_div_post);
2415 }
2416 
2417 bfloat16 QEMU_FLATTEN
2418 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2419 {
2420     FloatParts64 pa, pb, *pr;
2421 
2422     bfloat16_unpack_canonical(&pa, a, status);
2423     bfloat16_unpack_canonical(&pb, b, status);
2424     pr = parts_div(&pa, &pb, status);
2425 
2426     return bfloat16_round_pack_canonical(pr, status);
2427 }
2428 
2429 float128 QEMU_FLATTEN
2430 float128_div(float128 a, float128 b, float_status *status)
2431 {
2432     FloatParts128 pa, pb, *pr;
2433 
2434     float128_unpack_canonical(&pa, a, status);
2435     float128_unpack_canonical(&pb, b, status);
2436     pr = parts_div(&pa, &pb, status);
2437 
2438     return float128_round_pack_canonical(pr, status);
2439 }
2440 
2441 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2442 {
2443     FloatParts128 pa, pb, *pr;
2444 
2445     if (!floatx80_unpack_canonical(&pa, a, status) ||
2446         !floatx80_unpack_canonical(&pb, b, status)) {
2447         return floatx80_default_nan(status);
2448     }
2449 
2450     pr = parts_div(&pa, &pb, status);
2451     return floatx80_round_pack_canonical(pr, status);
2452 }
2453 
2454 /*
2455  * Remainder
2456  */
2457 
2458 float32 float32_rem(float32 a, float32 b, float_status *status)
2459 {
2460     FloatParts64 pa, pb, *pr;
2461 
2462     float32_unpack_canonical(&pa, a, status);
2463     float32_unpack_canonical(&pb, b, status);
2464     pr = parts_modrem(&pa, &pb, NULL, status);
2465 
2466     return float32_round_pack_canonical(pr, status);
2467 }
2468 
2469 float64 float64_rem(float64 a, float64 b, float_status *status)
2470 {
2471     FloatParts64 pa, pb, *pr;
2472 
2473     float64_unpack_canonical(&pa, a, status);
2474     float64_unpack_canonical(&pb, b, status);
2475     pr = parts_modrem(&pa, &pb, NULL, status);
2476 
2477     return float64_round_pack_canonical(pr, status);
2478 }
2479 
2480 float128 float128_rem(float128 a, float128 b, float_status *status)
2481 {
2482     FloatParts128 pa, pb, *pr;
2483 
2484     float128_unpack_canonical(&pa, a, status);
2485     float128_unpack_canonical(&pb, b, status);
2486     pr = parts_modrem(&pa, &pb, NULL, status);
2487 
2488     return float128_round_pack_canonical(pr, status);
2489 }
2490 
2491 /*
2492  * Returns the remainder of the extended double-precision floating-point value
2493  * `a' with respect to the corresponding value `b'.
2494  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2495  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2496  * the remainder based on truncating the quotient toward zero instead and
2497  * *quotient is set to the low 64 bits of the absolute value of the integer
2498  * quotient.
2499  */
2500 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2501                          uint64_t *quotient, float_status *status)
2502 {
2503     FloatParts128 pa, pb, *pr;
2504 
2505     *quotient = 0;
2506     if (!floatx80_unpack_canonical(&pa, a, status) ||
2507         !floatx80_unpack_canonical(&pb, b, status)) {
2508         return floatx80_default_nan(status);
2509     }
2510     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2511 
2512     return floatx80_round_pack_canonical(pr, status);
2513 }
2514 
2515 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2516 {
2517     uint64_t quotient;
2518     return floatx80_modrem(a, b, false, &quotient, status);
2519 }
2520 
2521 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2522 {
2523     uint64_t quotient;
2524     return floatx80_modrem(a, b, true, &quotient, status);
2525 }
2526 
2527 /*
2528  * Float to Float conversions
2529  *
2530  * Returns the result of converting one float format to another. The
2531  * conversion is performed according to the IEC/IEEE Standard for
2532  * Binary Floating-Point Arithmetic.
2533  *
2534  * Usually this only needs to take care of raising invalid exceptions
2535  * and handling the conversion on NaNs.
2536  */
2537 
2538 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2539 {
2540     switch (a->cls) {
2541     case float_class_qnan:
2542     case float_class_snan:
2543         /*
2544          * There is no NaN in the destination format.  Raise Invalid
2545          * and return a zero with the sign of the input NaN.
2546          */
2547         float_raise(float_flag_invalid, s);
2548         a->cls = float_class_zero;
2549         break;
2550 
2551     case float_class_inf:
2552         /*
2553          * There is no Inf in the destination format.  Raise Invalid
2554          * and return the maximum normal with the correct sign.
2555          */
2556         float_raise(float_flag_invalid, s);
2557         a->cls = float_class_normal;
2558         a->exp = float16_params_ahp.exp_max;
2559         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2560                                   float16_params_ahp.frac_size + 1);
2561         break;
2562 
2563     case float_class_normal:
2564     case float_class_zero:
2565         break;
2566 
2567     default:
2568         g_assert_not_reached();
2569     }
2570 }
2571 
2572 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2573 {
2574     if (is_nan(a->cls)) {
2575         parts_return_nan(a, s);
2576     }
2577 }
2578 
2579 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2580 {
2581     if (is_nan(a->cls)) {
2582         parts_return_nan(a, s);
2583     }
2584 }
2585 
2586 #define parts_float_to_float(P, S) \
2587     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2588 
2589 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2590                                         float_status *s)
2591 {
2592     a->cls = b->cls;
2593     a->sign = b->sign;
2594     a->exp = b->exp;
2595 
2596     if (a->cls == float_class_normal) {
2597         frac_truncjam(a, b);
2598     } else if (is_nan(a->cls)) {
2599         /* Discard the low bits of the NaN. */
2600         a->frac = b->frac_hi;
2601         parts_return_nan(a, s);
2602     }
2603 }
2604 
2605 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2606                                        float_status *s)
2607 {
2608     a->cls = b->cls;
2609     a->sign = b->sign;
2610     a->exp = b->exp;
2611     frac_widen(a, b);
2612 
2613     if (is_nan(a->cls)) {
2614         parts_return_nan(a, s);
2615     }
2616 }
2617 
2618 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2619 {
2620     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2621     FloatParts64 p;
2622 
2623     float16a_unpack_canonical(&p, a, s, fmt16);
2624     parts_float_to_float(&p, s);
2625     return float32_round_pack_canonical(&p, s);
2626 }
2627 
2628 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2629 {
2630     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2631     FloatParts64 p;
2632 
2633     float16a_unpack_canonical(&p, a, s, fmt16);
2634     parts_float_to_float(&p, s);
2635     return float64_round_pack_canonical(&p, s);
2636 }
2637 
2638 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2639 {
2640     FloatParts64 p;
2641     const FloatFmt *fmt;
2642 
2643     float32_unpack_canonical(&p, a, s);
2644     if (ieee) {
2645         parts_float_to_float(&p, s);
2646         fmt = &float16_params;
2647     } else {
2648         parts_float_to_ahp(&p, s);
2649         fmt = &float16_params_ahp;
2650     }
2651     return float16a_round_pack_canonical(&p, s, fmt);
2652 }
2653 
2654 static float64 QEMU_SOFTFLOAT_ATTR
2655 soft_float32_to_float64(float32 a, float_status *s)
2656 {
2657     FloatParts64 p;
2658 
2659     float32_unpack_canonical(&p, a, s);
2660     parts_float_to_float(&p, s);
2661     return float64_round_pack_canonical(&p, s);
2662 }
2663 
2664 float64 float32_to_float64(float32 a, float_status *s)
2665 {
2666     if (likely(float32_is_normal(a))) {
2667         /* Widening conversion can never produce inexact results.  */
2668         union_float32 uf;
2669         union_float64 ud;
2670         uf.s = a;
2671         ud.h = uf.h;
2672         return ud.s;
2673     } else if (float32_is_zero(a)) {
2674         return float64_set_sign(float64_zero, float32_is_neg(a));
2675     } else {
2676         return soft_float32_to_float64(a, s);
2677     }
2678 }
2679 
2680 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2681 {
2682     FloatParts64 p;
2683     const FloatFmt *fmt;
2684 
2685     float64_unpack_canonical(&p, a, s);
2686     if (ieee) {
2687         parts_float_to_float(&p, s);
2688         fmt = &float16_params;
2689     } else {
2690         parts_float_to_ahp(&p, s);
2691         fmt = &float16_params_ahp;
2692     }
2693     return float16a_round_pack_canonical(&p, s, fmt);
2694 }
2695 
2696 float32 float64_to_float32(float64 a, float_status *s)
2697 {
2698     FloatParts64 p;
2699 
2700     float64_unpack_canonical(&p, a, s);
2701     parts_float_to_float(&p, s);
2702     return float32_round_pack_canonical(&p, s);
2703 }
2704 
2705 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2706 {
2707     FloatParts64 p;
2708 
2709     bfloat16_unpack_canonical(&p, a, s);
2710     parts_float_to_float(&p, s);
2711     return float32_round_pack_canonical(&p, s);
2712 }
2713 
2714 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2715 {
2716     FloatParts64 p;
2717 
2718     bfloat16_unpack_canonical(&p, a, s);
2719     parts_float_to_float(&p, s);
2720     return float64_round_pack_canonical(&p, s);
2721 }
2722 
2723 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2724 {
2725     FloatParts64 p;
2726 
2727     float32_unpack_canonical(&p, a, s);
2728     parts_float_to_float(&p, s);
2729     return bfloat16_round_pack_canonical(&p, s);
2730 }
2731 
2732 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2733 {
2734     FloatParts64 p;
2735 
2736     float64_unpack_canonical(&p, a, s);
2737     parts_float_to_float(&p, s);
2738     return bfloat16_round_pack_canonical(&p, s);
2739 }
2740 
2741 float32 float128_to_float32(float128 a, float_status *s)
2742 {
2743     FloatParts64 p64;
2744     FloatParts128 p128;
2745 
2746     float128_unpack_canonical(&p128, a, s);
2747     parts_float_to_float_narrow(&p64, &p128, s);
2748     return float32_round_pack_canonical(&p64, s);
2749 }
2750 
2751 float64 float128_to_float64(float128 a, float_status *s)
2752 {
2753     FloatParts64 p64;
2754     FloatParts128 p128;
2755 
2756     float128_unpack_canonical(&p128, a, s);
2757     parts_float_to_float_narrow(&p64, &p128, s);
2758     return float64_round_pack_canonical(&p64, s);
2759 }
2760 
2761 float128 float32_to_float128(float32 a, float_status *s)
2762 {
2763     FloatParts64 p64;
2764     FloatParts128 p128;
2765 
2766     float32_unpack_canonical(&p64, a, s);
2767     parts_float_to_float_widen(&p128, &p64, s);
2768     return float128_round_pack_canonical(&p128, s);
2769 }
2770 
2771 float128 float64_to_float128(float64 a, float_status *s)
2772 {
2773     FloatParts64 p64;
2774     FloatParts128 p128;
2775 
2776     float64_unpack_canonical(&p64, a, s);
2777     parts_float_to_float_widen(&p128, &p64, s);
2778     return float128_round_pack_canonical(&p128, s);
2779 }
2780 
2781 float32 floatx80_to_float32(floatx80 a, float_status *s)
2782 {
2783     FloatParts64 p64;
2784     FloatParts128 p128;
2785 
2786     if (floatx80_unpack_canonical(&p128, a, s)) {
2787         parts_float_to_float_narrow(&p64, &p128, s);
2788     } else {
2789         parts_default_nan(&p64, s);
2790     }
2791     return float32_round_pack_canonical(&p64, s);
2792 }
2793 
2794 float64 floatx80_to_float64(floatx80 a, float_status *s)
2795 {
2796     FloatParts64 p64;
2797     FloatParts128 p128;
2798 
2799     if (floatx80_unpack_canonical(&p128, a, s)) {
2800         parts_float_to_float_narrow(&p64, &p128, s);
2801     } else {
2802         parts_default_nan(&p64, s);
2803     }
2804     return float64_round_pack_canonical(&p64, s);
2805 }
2806 
2807 float128 floatx80_to_float128(floatx80 a, float_status *s)
2808 {
2809     FloatParts128 p;
2810 
2811     if (floatx80_unpack_canonical(&p, a, s)) {
2812         parts_float_to_float(&p, s);
2813     } else {
2814         parts_default_nan(&p, s);
2815     }
2816     return float128_round_pack_canonical(&p, s);
2817 }
2818 
2819 floatx80 float32_to_floatx80(float32 a, float_status *s)
2820 {
2821     FloatParts64 p64;
2822     FloatParts128 p128;
2823 
2824     float32_unpack_canonical(&p64, a, s);
2825     parts_float_to_float_widen(&p128, &p64, s);
2826     return floatx80_round_pack_canonical(&p128, s);
2827 }
2828 
2829 floatx80 float64_to_floatx80(float64 a, float_status *s)
2830 {
2831     FloatParts64 p64;
2832     FloatParts128 p128;
2833 
2834     float64_unpack_canonical(&p64, a, s);
2835     parts_float_to_float_widen(&p128, &p64, s);
2836     return floatx80_round_pack_canonical(&p128, s);
2837 }
2838 
2839 floatx80 float128_to_floatx80(float128 a, float_status *s)
2840 {
2841     FloatParts128 p;
2842 
2843     float128_unpack_canonical(&p, a, s);
2844     parts_float_to_float(&p, s);
2845     return floatx80_round_pack_canonical(&p, s);
2846 }
2847 
2848 /*
2849  * Round to integral value
2850  */
2851 
2852 float16 float16_round_to_int(float16 a, float_status *s)
2853 {
2854     FloatParts64 p;
2855 
2856     float16_unpack_canonical(&p, a, s);
2857     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2858     return float16_round_pack_canonical(&p, s);
2859 }
2860 
2861 float32 float32_round_to_int(float32 a, float_status *s)
2862 {
2863     FloatParts64 p;
2864 
2865     float32_unpack_canonical(&p, a, s);
2866     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2867     return float32_round_pack_canonical(&p, s);
2868 }
2869 
2870 float64 float64_round_to_int(float64 a, float_status *s)
2871 {
2872     FloatParts64 p;
2873 
2874     float64_unpack_canonical(&p, a, s);
2875     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2876     return float64_round_pack_canonical(&p, s);
2877 }
2878 
2879 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2880 {
2881     FloatParts64 p;
2882 
2883     bfloat16_unpack_canonical(&p, a, s);
2884     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2885     return bfloat16_round_pack_canonical(&p, s);
2886 }
2887 
2888 float128 float128_round_to_int(float128 a, float_status *s)
2889 {
2890     FloatParts128 p;
2891 
2892     float128_unpack_canonical(&p, a, s);
2893     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2894     return float128_round_pack_canonical(&p, s);
2895 }
2896 
2897 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2898 {
2899     FloatParts128 p;
2900 
2901     if (!floatx80_unpack_canonical(&p, a, status)) {
2902         return floatx80_default_nan(status);
2903     }
2904 
2905     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2906                        &floatx80_params[status->floatx80_rounding_precision]);
2907     return floatx80_round_pack_canonical(&p, status);
2908 }
2909 
2910 /*
2911  * Floating-point to signed integer conversions
2912  */
2913 
2914 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2915                               float_status *s)
2916 {
2917     FloatParts64 p;
2918 
2919     float16_unpack_canonical(&p, a, s);
2920     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2921 }
2922 
2923 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2924                                 float_status *s)
2925 {
2926     FloatParts64 p;
2927 
2928     float16_unpack_canonical(&p, a, s);
2929     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2930 }
2931 
2932 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2933                                 float_status *s)
2934 {
2935     FloatParts64 p;
2936 
2937     float16_unpack_canonical(&p, a, s);
2938     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2939 }
2940 
2941 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2942                                 float_status *s)
2943 {
2944     FloatParts64 p;
2945 
2946     float16_unpack_canonical(&p, a, s);
2947     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2948 }
2949 
2950 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2951                                 float_status *s)
2952 {
2953     FloatParts64 p;
2954 
2955     float32_unpack_canonical(&p, a, s);
2956     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2957 }
2958 
2959 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2960                                 float_status *s)
2961 {
2962     FloatParts64 p;
2963 
2964     float32_unpack_canonical(&p, a, s);
2965     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2966 }
2967 
2968 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2969                                 float_status *s)
2970 {
2971     FloatParts64 p;
2972 
2973     float32_unpack_canonical(&p, a, s);
2974     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2975 }
2976 
2977 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2978                                 float_status *s)
2979 {
2980     FloatParts64 p;
2981 
2982     float64_unpack_canonical(&p, a, s);
2983     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2984 }
2985 
2986 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2987                                 float_status *s)
2988 {
2989     FloatParts64 p;
2990 
2991     float64_unpack_canonical(&p, a, s);
2992     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2993 }
2994 
2995 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2996                                 float_status *s)
2997 {
2998     FloatParts64 p;
2999 
3000     float64_unpack_canonical(&p, a, s);
3001     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3002 }
3003 
3004 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3005                                  float_status *s)
3006 {
3007     FloatParts64 p;
3008 
3009     bfloat16_unpack_canonical(&p, a, s);
3010     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3011 }
3012 
3013 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3014                                  float_status *s)
3015 {
3016     FloatParts64 p;
3017 
3018     bfloat16_unpack_canonical(&p, a, s);
3019     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3020 }
3021 
3022 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3023                                  float_status *s)
3024 {
3025     FloatParts64 p;
3026 
3027     bfloat16_unpack_canonical(&p, a, s);
3028     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3029 }
3030 
3031 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3032                                         int scale, float_status *s)
3033 {
3034     FloatParts128 p;
3035 
3036     float128_unpack_canonical(&p, a, s);
3037     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3038 }
3039 
3040 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3041                                         int scale, float_status *s)
3042 {
3043     FloatParts128 p;
3044 
3045     float128_unpack_canonical(&p, a, s);
3046     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3047 }
3048 
3049 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3050                                         int scale, float_status *s)
3051 {
3052     FloatParts128 p;
3053 
3054     if (!floatx80_unpack_canonical(&p, a, s)) {
3055         parts_default_nan(&p, s);
3056     }
3057     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3058 }
3059 
3060 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3061                                         int scale, float_status *s)
3062 {
3063     FloatParts128 p;
3064 
3065     if (!floatx80_unpack_canonical(&p, a, s)) {
3066         parts_default_nan(&p, s);
3067     }
3068     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3069 }
3070 
3071 int8_t float16_to_int8(float16 a, float_status *s)
3072 {
3073     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3074 }
3075 
3076 int16_t float16_to_int16(float16 a, float_status *s)
3077 {
3078     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080 
3081 int32_t float16_to_int32(float16 a, float_status *s)
3082 {
3083     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085 
3086 int64_t float16_to_int64(float16 a, float_status *s)
3087 {
3088     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090 
3091 int16_t float32_to_int16(float32 a, float_status *s)
3092 {
3093     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095 
3096 int32_t float32_to_int32(float32 a, float_status *s)
3097 {
3098     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3099 }
3100 
3101 int64_t float32_to_int64(float32 a, float_status *s)
3102 {
3103     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3104 }
3105 
3106 int16_t float64_to_int16(float64 a, float_status *s)
3107 {
3108     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3109 }
3110 
3111 int32_t float64_to_int32(float64 a, float_status *s)
3112 {
3113     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3114 }
3115 
3116 int64_t float64_to_int64(float64 a, float_status *s)
3117 {
3118     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3119 }
3120 
3121 int32_t float128_to_int32(float128 a, float_status *s)
3122 {
3123     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3124 }
3125 
3126 int64_t float128_to_int64(float128 a, float_status *s)
3127 {
3128     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3129 }
3130 
3131 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3132 {
3133     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3134 }
3135 
3136 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3137 {
3138     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3139 }
3140 
3141 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3142 {
3143     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3144 }
3145 
3146 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3147 {
3148     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150 
3151 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3152 {
3153     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3154 }
3155 
3156 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3157 {
3158     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3159 }
3160 
3161 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3162 {
3163     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3164 }
3165 
3166 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3167 {
3168     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170 
3171 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3172 {
3173     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175 
3176 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3177 {
3178     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180 
3181 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3182 {
3183     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3184 }
3185 
3186 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3187 {
3188     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3189 }
3190 
3191 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3192 {
3193     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3194 }
3195 
3196 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3197 {
3198     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3199 }
3200 
3201 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3202 {
3203     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3204 }
3205 
3206 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3207 {
3208     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3209 }
3210 
3211 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3212 {
3213     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3214 }
3215 
3216 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3217 {
3218     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3219 }
3220 
3221 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3222 {
3223     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3224 }
3225 
3226 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3227 {
3228     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3229 }
3230 
3231 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3232 {
3233     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3234 }
3235 
3236 /*
3237  * Floating-point to unsigned integer conversions
3238  */
3239 
3240 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3241                                 float_status *s)
3242 {
3243     FloatParts64 p;
3244 
3245     float16_unpack_canonical(&p, a, s);
3246     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3247 }
3248 
3249 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3250                                   float_status *s)
3251 {
3252     FloatParts64 p;
3253 
3254     float16_unpack_canonical(&p, a, s);
3255     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3256 }
3257 
3258 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3259                                   float_status *s)
3260 {
3261     FloatParts64 p;
3262 
3263     float16_unpack_canonical(&p, a, s);
3264     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3265 }
3266 
3267 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3268                                   float_status *s)
3269 {
3270     FloatParts64 p;
3271 
3272     float16_unpack_canonical(&p, a, s);
3273     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3274 }
3275 
3276 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3277                                   float_status *s)
3278 {
3279     FloatParts64 p;
3280 
3281     float32_unpack_canonical(&p, a, s);
3282     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3283 }
3284 
3285 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3286                                   float_status *s)
3287 {
3288     FloatParts64 p;
3289 
3290     float32_unpack_canonical(&p, a, s);
3291     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3292 }
3293 
3294 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3295                                   float_status *s)
3296 {
3297     FloatParts64 p;
3298 
3299     float32_unpack_canonical(&p, a, s);
3300     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3301 }
3302 
3303 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3304                                   float_status *s)
3305 {
3306     FloatParts64 p;
3307 
3308     float64_unpack_canonical(&p, a, s);
3309     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3310 }
3311 
3312 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3313                                   float_status *s)
3314 {
3315     FloatParts64 p;
3316 
3317     float64_unpack_canonical(&p, a, s);
3318     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3319 }
3320 
3321 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3322                                   float_status *s)
3323 {
3324     FloatParts64 p;
3325 
3326     float64_unpack_canonical(&p, a, s);
3327     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3328 }
3329 
3330 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3331                                    int scale, float_status *s)
3332 {
3333     FloatParts64 p;
3334 
3335     bfloat16_unpack_canonical(&p, a, s);
3336     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3337 }
3338 
3339 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3340                                    int scale, float_status *s)
3341 {
3342     FloatParts64 p;
3343 
3344     bfloat16_unpack_canonical(&p, a, s);
3345     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3346 }
3347 
3348 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3349                                    int scale, float_status *s)
3350 {
3351     FloatParts64 p;
3352 
3353     bfloat16_unpack_canonical(&p, a, s);
3354     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3355 }
3356 
3357 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3358                                           int scale, float_status *s)
3359 {
3360     FloatParts128 p;
3361 
3362     float128_unpack_canonical(&p, a, s);
3363     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3364 }
3365 
3366 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3367                                           int scale, float_status *s)
3368 {
3369     FloatParts128 p;
3370 
3371     float128_unpack_canonical(&p, a, s);
3372     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3373 }
3374 
3375 uint8_t float16_to_uint8(float16 a, float_status *s)
3376 {
3377     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3378 }
3379 
3380 uint16_t float16_to_uint16(float16 a, float_status *s)
3381 {
3382     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3383 }
3384 
3385 uint32_t float16_to_uint32(float16 a, float_status *s)
3386 {
3387     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3388 }
3389 
3390 uint64_t float16_to_uint64(float16 a, float_status *s)
3391 {
3392     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3393 }
3394 
3395 uint16_t float32_to_uint16(float32 a, float_status *s)
3396 {
3397     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3398 }
3399 
3400 uint32_t float32_to_uint32(float32 a, float_status *s)
3401 {
3402     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3403 }
3404 
3405 uint64_t float32_to_uint64(float32 a, float_status *s)
3406 {
3407     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3408 }
3409 
3410 uint16_t float64_to_uint16(float64 a, float_status *s)
3411 {
3412     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3413 }
3414 
3415 uint32_t float64_to_uint32(float64 a, float_status *s)
3416 {
3417     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3418 }
3419 
3420 uint64_t float64_to_uint64(float64 a, float_status *s)
3421 {
3422     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3423 }
3424 
3425 uint32_t float128_to_uint32(float128 a, float_status *s)
3426 {
3427     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3428 }
3429 
3430 uint64_t float128_to_uint64(float128 a, float_status *s)
3431 {
3432     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3433 }
3434 
3435 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3436 {
3437     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3438 }
3439 
3440 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3441 {
3442     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3443 }
3444 
3445 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3446 {
3447     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3448 }
3449 
3450 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3451 {
3452     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3453 }
3454 
3455 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3456 {
3457     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3458 }
3459 
3460 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3461 {
3462     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3463 }
3464 
3465 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3466 {
3467     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3468 }
3469 
3470 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3471 {
3472     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3473 }
3474 
3475 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3476 {
3477     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3478 }
3479 
3480 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3481 {
3482     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3483 }
3484 
3485 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3486 {
3487     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3488 }
3489 
3490 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3491 {
3492     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3493 }
3494 
3495 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3496 {
3497     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3498 }
3499 
3500 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3501 {
3502     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3503 }
3504 
3505 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3506 {
3507     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3508 }
3509 
3510 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3511 {
3512     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3513 }
3514 
3515 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3516 {
3517     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3518 }
3519 
3520 /*
3521  * Signed integer to floating-point conversions
3522  */
3523 
3524 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3525 {
3526     FloatParts64 p;
3527 
3528     parts_sint_to_float(&p, a, scale, status);
3529     return float16_round_pack_canonical(&p, status);
3530 }
3531 
3532 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3533 {
3534     return int64_to_float16_scalbn(a, scale, status);
3535 }
3536 
3537 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3538 {
3539     return int64_to_float16_scalbn(a, scale, status);
3540 }
3541 
3542 float16 int64_to_float16(int64_t a, float_status *status)
3543 {
3544     return int64_to_float16_scalbn(a, 0, status);
3545 }
3546 
3547 float16 int32_to_float16(int32_t a, float_status *status)
3548 {
3549     return int64_to_float16_scalbn(a, 0, status);
3550 }
3551 
3552 float16 int16_to_float16(int16_t a, float_status *status)
3553 {
3554     return int64_to_float16_scalbn(a, 0, status);
3555 }
3556 
3557 float16 int8_to_float16(int8_t a, float_status *status)
3558 {
3559     return int64_to_float16_scalbn(a, 0, status);
3560 }
3561 
3562 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3563 {
3564     FloatParts64 p;
3565 
3566     /* Without scaling, there are no overflow concerns. */
3567     if (likely(scale == 0) && can_use_fpu(status)) {
3568         union_float32 ur;
3569         ur.h = a;
3570         return ur.s;
3571     }
3572 
3573     parts64_sint_to_float(&p, a, scale, status);
3574     return float32_round_pack_canonical(&p, status);
3575 }
3576 
3577 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3578 {
3579     return int64_to_float32_scalbn(a, scale, status);
3580 }
3581 
3582 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3583 {
3584     return int64_to_float32_scalbn(a, scale, status);
3585 }
3586 
3587 float32 int64_to_float32(int64_t a, float_status *status)
3588 {
3589     return int64_to_float32_scalbn(a, 0, status);
3590 }
3591 
3592 float32 int32_to_float32(int32_t a, float_status *status)
3593 {
3594     return int64_to_float32_scalbn(a, 0, status);
3595 }
3596 
3597 float32 int16_to_float32(int16_t a, float_status *status)
3598 {
3599     return int64_to_float32_scalbn(a, 0, status);
3600 }
3601 
3602 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3603 {
3604     FloatParts64 p;
3605 
3606     /* Without scaling, there are no overflow concerns. */
3607     if (likely(scale == 0) && can_use_fpu(status)) {
3608         union_float64 ur;
3609         ur.h = a;
3610         return ur.s;
3611     }
3612 
3613     parts_sint_to_float(&p, a, scale, status);
3614     return float64_round_pack_canonical(&p, status);
3615 }
3616 
3617 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3618 {
3619     return int64_to_float64_scalbn(a, scale, status);
3620 }
3621 
3622 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3623 {
3624     return int64_to_float64_scalbn(a, scale, status);
3625 }
3626 
3627 float64 int64_to_float64(int64_t a, float_status *status)
3628 {
3629     return int64_to_float64_scalbn(a, 0, status);
3630 }
3631 
3632 float64 int32_to_float64(int32_t a, float_status *status)
3633 {
3634     return int64_to_float64_scalbn(a, 0, status);
3635 }
3636 
3637 float64 int16_to_float64(int16_t a, float_status *status)
3638 {
3639     return int64_to_float64_scalbn(a, 0, status);
3640 }
3641 
3642 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3643 {
3644     FloatParts64 p;
3645 
3646     parts_sint_to_float(&p, a, scale, status);
3647     return bfloat16_round_pack_canonical(&p, status);
3648 }
3649 
3650 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3651 {
3652     return int64_to_bfloat16_scalbn(a, scale, status);
3653 }
3654 
3655 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3656 {
3657     return int64_to_bfloat16_scalbn(a, scale, status);
3658 }
3659 
3660 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3661 {
3662     return int64_to_bfloat16_scalbn(a, 0, status);
3663 }
3664 
3665 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3666 {
3667     return int64_to_bfloat16_scalbn(a, 0, status);
3668 }
3669 
3670 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3671 {
3672     return int64_to_bfloat16_scalbn(a, 0, status);
3673 }
3674 
3675 float128 int64_to_float128(int64_t a, float_status *status)
3676 {
3677     FloatParts128 p;
3678 
3679     parts_sint_to_float(&p, a, 0, status);
3680     return float128_round_pack_canonical(&p, status);
3681 }
3682 
3683 float128 int32_to_float128(int32_t a, float_status *status)
3684 {
3685     return int64_to_float128(a, status);
3686 }
3687 
3688 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3689 {
3690     FloatParts128 p;
3691 
3692     parts_sint_to_float(&p, a, 0, status);
3693     return floatx80_round_pack_canonical(&p, status);
3694 }
3695 
3696 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3697 {
3698     return int64_to_floatx80(a, status);
3699 }
3700 
3701 /*
3702  * Unsigned Integer to floating-point conversions
3703  */
3704 
3705 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3706 {
3707     FloatParts64 p;
3708 
3709     parts_uint_to_float(&p, a, scale, status);
3710     return float16_round_pack_canonical(&p, status);
3711 }
3712 
3713 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3714 {
3715     return uint64_to_float16_scalbn(a, scale, status);
3716 }
3717 
3718 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3719 {
3720     return uint64_to_float16_scalbn(a, scale, status);
3721 }
3722 
3723 float16 uint64_to_float16(uint64_t a, float_status *status)
3724 {
3725     return uint64_to_float16_scalbn(a, 0, status);
3726 }
3727 
3728 float16 uint32_to_float16(uint32_t a, float_status *status)
3729 {
3730     return uint64_to_float16_scalbn(a, 0, status);
3731 }
3732 
3733 float16 uint16_to_float16(uint16_t a, float_status *status)
3734 {
3735     return uint64_to_float16_scalbn(a, 0, status);
3736 }
3737 
3738 float16 uint8_to_float16(uint8_t a, float_status *status)
3739 {
3740     return uint64_to_float16_scalbn(a, 0, status);
3741 }
3742 
3743 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3744 {
3745     FloatParts64 p;
3746 
3747     /* Without scaling, there are no overflow concerns. */
3748     if (likely(scale == 0) && can_use_fpu(status)) {
3749         union_float32 ur;
3750         ur.h = a;
3751         return ur.s;
3752     }
3753 
3754     parts_uint_to_float(&p, a, scale, status);
3755     return float32_round_pack_canonical(&p, status);
3756 }
3757 
3758 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3759 {
3760     return uint64_to_float32_scalbn(a, scale, status);
3761 }
3762 
3763 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3764 {
3765     return uint64_to_float32_scalbn(a, scale, status);
3766 }
3767 
3768 float32 uint64_to_float32(uint64_t a, float_status *status)
3769 {
3770     return uint64_to_float32_scalbn(a, 0, status);
3771 }
3772 
3773 float32 uint32_to_float32(uint32_t a, float_status *status)
3774 {
3775     return uint64_to_float32_scalbn(a, 0, status);
3776 }
3777 
3778 float32 uint16_to_float32(uint16_t a, float_status *status)
3779 {
3780     return uint64_to_float32_scalbn(a, 0, status);
3781 }
3782 
3783 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3784 {
3785     FloatParts64 p;
3786 
3787     /* Without scaling, there are no overflow concerns. */
3788     if (likely(scale == 0) && can_use_fpu(status)) {
3789         union_float64 ur;
3790         ur.h = a;
3791         return ur.s;
3792     }
3793 
3794     parts_uint_to_float(&p, a, scale, status);
3795     return float64_round_pack_canonical(&p, status);
3796 }
3797 
3798 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3799 {
3800     return uint64_to_float64_scalbn(a, scale, status);
3801 }
3802 
3803 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3804 {
3805     return uint64_to_float64_scalbn(a, scale, status);
3806 }
3807 
3808 float64 uint64_to_float64(uint64_t a, float_status *status)
3809 {
3810     return uint64_to_float64_scalbn(a, 0, status);
3811 }
3812 
3813 float64 uint32_to_float64(uint32_t a, float_status *status)
3814 {
3815     return uint64_to_float64_scalbn(a, 0, status);
3816 }
3817 
3818 float64 uint16_to_float64(uint16_t a, float_status *status)
3819 {
3820     return uint64_to_float64_scalbn(a, 0, status);
3821 }
3822 
3823 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3824 {
3825     FloatParts64 p;
3826 
3827     parts_uint_to_float(&p, a, scale, status);
3828     return bfloat16_round_pack_canonical(&p, status);
3829 }
3830 
3831 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3832 {
3833     return uint64_to_bfloat16_scalbn(a, scale, status);
3834 }
3835 
3836 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3837 {
3838     return uint64_to_bfloat16_scalbn(a, scale, status);
3839 }
3840 
3841 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3842 {
3843     return uint64_to_bfloat16_scalbn(a, 0, status);
3844 }
3845 
3846 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3847 {
3848     return uint64_to_bfloat16_scalbn(a, 0, status);
3849 }
3850 
3851 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3852 {
3853     return uint64_to_bfloat16_scalbn(a, 0, status);
3854 }
3855 
3856 float128 uint64_to_float128(uint64_t a, float_status *status)
3857 {
3858     FloatParts128 p;
3859 
3860     parts_uint_to_float(&p, a, 0, status);
3861     return float128_round_pack_canonical(&p, status);
3862 }
3863 
3864 /*
3865  * Minimum and maximum
3866  */
3867 
3868 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3869 {
3870     FloatParts64 pa, pb, *pr;
3871 
3872     float16_unpack_canonical(&pa, a, s);
3873     float16_unpack_canonical(&pb, b, s);
3874     pr = parts_minmax(&pa, &pb, s, flags);
3875 
3876     return float16_round_pack_canonical(pr, s);
3877 }
3878 
3879 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3880                                 float_status *s, int flags)
3881 {
3882     FloatParts64 pa, pb, *pr;
3883 
3884     bfloat16_unpack_canonical(&pa, a, s);
3885     bfloat16_unpack_canonical(&pb, b, s);
3886     pr = parts_minmax(&pa, &pb, s, flags);
3887 
3888     return bfloat16_round_pack_canonical(pr, s);
3889 }
3890 
3891 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3892 {
3893     FloatParts64 pa, pb, *pr;
3894 
3895     float32_unpack_canonical(&pa, a, s);
3896     float32_unpack_canonical(&pb, b, s);
3897     pr = parts_minmax(&pa, &pb, s, flags);
3898 
3899     return float32_round_pack_canonical(pr, s);
3900 }
3901 
3902 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3903 {
3904     FloatParts64 pa, pb, *pr;
3905 
3906     float64_unpack_canonical(&pa, a, s);
3907     float64_unpack_canonical(&pb, b, s);
3908     pr = parts_minmax(&pa, &pb, s, flags);
3909 
3910     return float64_round_pack_canonical(pr, s);
3911 }
3912 
3913 static float128 float128_minmax(float128 a, float128 b,
3914                                 float_status *s, int flags)
3915 {
3916     FloatParts128 pa, pb, *pr;
3917 
3918     float128_unpack_canonical(&pa, a, s);
3919     float128_unpack_canonical(&pb, b, s);
3920     pr = parts_minmax(&pa, &pb, s, flags);
3921 
3922     return float128_round_pack_canonical(pr, s);
3923 }
3924 
3925 #define MINMAX_1(type, name, flags) \
3926     type type##_##name(type a, type b, float_status *s) \
3927     { return type##_minmax(a, b, s, flags); }
3928 
3929 #define MINMAX_2(type) \
3930     MINMAX_1(type, max, 0)                                      \
3931     MINMAX_1(type, maxnum, minmax_isnum)                        \
3932     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3933     MINMAX_1(type, min, minmax_ismin)                           \
3934     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3935     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3936 
3937 MINMAX_2(float16)
3938 MINMAX_2(bfloat16)
3939 MINMAX_2(float32)
3940 MINMAX_2(float64)
3941 MINMAX_2(float128)
3942 
3943 #undef MINMAX_1
3944 #undef MINMAX_2
3945 
3946 /*
3947  * Floating point compare
3948  */
3949 
3950 static FloatRelation QEMU_FLATTEN
3951 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3952 {
3953     FloatParts64 pa, pb;
3954 
3955     float16_unpack_canonical(&pa, a, s);
3956     float16_unpack_canonical(&pb, b, s);
3957     return parts_compare(&pa, &pb, s, is_quiet);
3958 }
3959 
3960 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3961 {
3962     return float16_do_compare(a, b, s, false);
3963 }
3964 
3965 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3966 {
3967     return float16_do_compare(a, b, s, true);
3968 }
3969 
3970 static FloatRelation QEMU_SOFTFLOAT_ATTR
3971 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3972 {
3973     FloatParts64 pa, pb;
3974 
3975     float32_unpack_canonical(&pa, a, s);
3976     float32_unpack_canonical(&pb, b, s);
3977     return parts_compare(&pa, &pb, s, is_quiet);
3978 }
3979 
3980 static FloatRelation QEMU_FLATTEN
3981 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3982 {
3983     union_float32 ua, ub;
3984 
3985     ua.s = xa;
3986     ub.s = xb;
3987 
3988     if (QEMU_NO_HARDFLOAT) {
3989         goto soft;
3990     }
3991 
3992     float32_input_flush2(&ua.s, &ub.s, s);
3993     if (isgreaterequal(ua.h, ub.h)) {
3994         if (isgreater(ua.h, ub.h)) {
3995             return float_relation_greater;
3996         }
3997         return float_relation_equal;
3998     }
3999     if (likely(isless(ua.h, ub.h))) {
4000         return float_relation_less;
4001     }
4002     /*
4003      * The only condition remaining is unordered.
4004      * Fall through to set flags.
4005      */
4006  soft:
4007     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4008 }
4009 
4010 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4011 {
4012     return float32_hs_compare(a, b, s, false);
4013 }
4014 
4015 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4016 {
4017     return float32_hs_compare(a, b, s, true);
4018 }
4019 
4020 static FloatRelation QEMU_SOFTFLOAT_ATTR
4021 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4022 {
4023     FloatParts64 pa, pb;
4024 
4025     float64_unpack_canonical(&pa, a, s);
4026     float64_unpack_canonical(&pb, b, s);
4027     return parts_compare(&pa, &pb, s, is_quiet);
4028 }
4029 
4030 static FloatRelation QEMU_FLATTEN
4031 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4032 {
4033     union_float64 ua, ub;
4034 
4035     ua.s = xa;
4036     ub.s = xb;
4037 
4038     if (QEMU_NO_HARDFLOAT) {
4039         goto soft;
4040     }
4041 
4042     float64_input_flush2(&ua.s, &ub.s, s);
4043     if (isgreaterequal(ua.h, ub.h)) {
4044         if (isgreater(ua.h, ub.h)) {
4045             return float_relation_greater;
4046         }
4047         return float_relation_equal;
4048     }
4049     if (likely(isless(ua.h, ub.h))) {
4050         return float_relation_less;
4051     }
4052     /*
4053      * The only condition remaining is unordered.
4054      * Fall through to set flags.
4055      */
4056  soft:
4057     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4058 }
4059 
4060 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4061 {
4062     return float64_hs_compare(a, b, s, false);
4063 }
4064 
4065 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4066 {
4067     return float64_hs_compare(a, b, s, true);
4068 }
4069 
4070 static FloatRelation QEMU_FLATTEN
4071 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4072 {
4073     FloatParts64 pa, pb;
4074 
4075     bfloat16_unpack_canonical(&pa, a, s);
4076     bfloat16_unpack_canonical(&pb, b, s);
4077     return parts_compare(&pa, &pb, s, is_quiet);
4078 }
4079 
4080 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4081 {
4082     return bfloat16_do_compare(a, b, s, false);
4083 }
4084 
4085 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4086 {
4087     return bfloat16_do_compare(a, b, s, true);
4088 }
4089 
4090 static FloatRelation QEMU_FLATTEN
4091 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4092 {
4093     FloatParts128 pa, pb;
4094 
4095     float128_unpack_canonical(&pa, a, s);
4096     float128_unpack_canonical(&pb, b, s);
4097     return parts_compare(&pa, &pb, s, is_quiet);
4098 }
4099 
4100 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4101 {
4102     return float128_do_compare(a, b, s, false);
4103 }
4104 
4105 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4106 {
4107     return float128_do_compare(a, b, s, true);
4108 }
4109 
4110 static FloatRelation QEMU_FLATTEN
4111 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4112 {
4113     FloatParts128 pa, pb;
4114 
4115     if (!floatx80_unpack_canonical(&pa, a, s) ||
4116         !floatx80_unpack_canonical(&pb, b, s)) {
4117         return float_relation_unordered;
4118     }
4119     return parts_compare(&pa, &pb, s, is_quiet);
4120 }
4121 
4122 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4123 {
4124     return floatx80_do_compare(a, b, s, false);
4125 }
4126 
4127 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4128 {
4129     return floatx80_do_compare(a, b, s, true);
4130 }
4131 
4132 /*
4133  * Scale by 2**N
4134  */
4135 
4136 float16 float16_scalbn(float16 a, int n, float_status *status)
4137 {
4138     FloatParts64 p;
4139 
4140     float16_unpack_canonical(&p, a, status);
4141     parts_scalbn(&p, n, status);
4142     return float16_round_pack_canonical(&p, status);
4143 }
4144 
4145 float32 float32_scalbn(float32 a, int n, float_status *status)
4146 {
4147     FloatParts64 p;
4148 
4149     float32_unpack_canonical(&p, a, status);
4150     parts_scalbn(&p, n, status);
4151     return float32_round_pack_canonical(&p, status);
4152 }
4153 
4154 float64 float64_scalbn(float64 a, int n, float_status *status)
4155 {
4156     FloatParts64 p;
4157 
4158     float64_unpack_canonical(&p, a, status);
4159     parts_scalbn(&p, n, status);
4160     return float64_round_pack_canonical(&p, status);
4161 }
4162 
4163 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4164 {
4165     FloatParts64 p;
4166 
4167     bfloat16_unpack_canonical(&p, a, status);
4168     parts_scalbn(&p, n, status);
4169     return bfloat16_round_pack_canonical(&p, status);
4170 }
4171 
4172 float128 float128_scalbn(float128 a, int n, float_status *status)
4173 {
4174     FloatParts128 p;
4175 
4176     float128_unpack_canonical(&p, a, status);
4177     parts_scalbn(&p, n, status);
4178     return float128_round_pack_canonical(&p, status);
4179 }
4180 
4181 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4182 {
4183     FloatParts128 p;
4184 
4185     if (!floatx80_unpack_canonical(&p, a, status)) {
4186         return floatx80_default_nan(status);
4187     }
4188     parts_scalbn(&p, n, status);
4189     return floatx80_round_pack_canonical(&p, status);
4190 }
4191 
4192 /*
4193  * Square Root
4194  */
4195 
4196 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4197 {
4198     FloatParts64 p;
4199 
4200     float16_unpack_canonical(&p, a, status);
4201     parts_sqrt(&p, status, &float16_params);
4202     return float16_round_pack_canonical(&p, status);
4203 }
4204 
4205 static float32 QEMU_SOFTFLOAT_ATTR
4206 soft_f32_sqrt(float32 a, float_status *status)
4207 {
4208     FloatParts64 p;
4209 
4210     float32_unpack_canonical(&p, a, status);
4211     parts_sqrt(&p, status, &float32_params);
4212     return float32_round_pack_canonical(&p, status);
4213 }
4214 
4215 static float64 QEMU_SOFTFLOAT_ATTR
4216 soft_f64_sqrt(float64 a, float_status *status)
4217 {
4218     FloatParts64 p;
4219 
4220     float64_unpack_canonical(&p, a, status);
4221     parts_sqrt(&p, status, &float64_params);
4222     return float64_round_pack_canonical(&p, status);
4223 }
4224 
4225 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4226 {
4227     union_float32 ua, ur;
4228 
4229     ua.s = xa;
4230     if (unlikely(!can_use_fpu(s))) {
4231         goto soft;
4232     }
4233 
4234     float32_input_flush1(&ua.s, s);
4235     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4236         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4237                        fpclassify(ua.h) == FP_ZERO) ||
4238                      signbit(ua.h))) {
4239             goto soft;
4240         }
4241     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4242                         float32_is_neg(ua.s))) {
4243         goto soft;
4244     }
4245     ur.h = sqrtf(ua.h);
4246     return ur.s;
4247 
4248  soft:
4249     return soft_f32_sqrt(ua.s, s);
4250 }
4251 
4252 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4253 {
4254     union_float64 ua, ur;
4255 
4256     ua.s = xa;
4257     if (unlikely(!can_use_fpu(s))) {
4258         goto soft;
4259     }
4260 
4261     float64_input_flush1(&ua.s, s);
4262     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4263         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4264                        fpclassify(ua.h) == FP_ZERO) ||
4265                      signbit(ua.h))) {
4266             goto soft;
4267         }
4268     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4269                         float64_is_neg(ua.s))) {
4270         goto soft;
4271     }
4272     ur.h = sqrt(ua.h);
4273     return ur.s;
4274 
4275  soft:
4276     return soft_f64_sqrt(ua.s, s);
4277 }
4278 
4279 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4280 {
4281     FloatParts64 p;
4282 
4283     bfloat16_unpack_canonical(&p, a, status);
4284     parts_sqrt(&p, status, &bfloat16_params);
4285     return bfloat16_round_pack_canonical(&p, status);
4286 }
4287 
4288 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4289 {
4290     FloatParts128 p;
4291 
4292     float128_unpack_canonical(&p, a, status);
4293     parts_sqrt(&p, status, &float128_params);
4294     return float128_round_pack_canonical(&p, status);
4295 }
4296 
4297 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4298 {
4299     FloatParts128 p;
4300 
4301     if (!floatx80_unpack_canonical(&p, a, s)) {
4302         return floatx80_default_nan(s);
4303     }
4304     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4305     return floatx80_round_pack_canonical(&p, s);
4306 }
4307 
4308 /*
4309  * log2
4310  */
4311 float32 float32_log2(float32 a, float_status *status)
4312 {
4313     FloatParts64 p;
4314 
4315     float32_unpack_canonical(&p, a, status);
4316     parts_log2(&p, status, &float32_params);
4317     return float32_round_pack_canonical(&p, status);
4318 }
4319 
4320 float64 float64_log2(float64 a, float_status *status)
4321 {
4322     FloatParts64 p;
4323 
4324     float64_unpack_canonical(&p, a, status);
4325     parts_log2(&p, status, &float64_params);
4326     return float64_round_pack_canonical(&p, status);
4327 }
4328 
4329 /*----------------------------------------------------------------------------
4330 | The pattern for a default generated NaN.
4331 *----------------------------------------------------------------------------*/
4332 
4333 float16 float16_default_nan(float_status *status)
4334 {
4335     FloatParts64 p;
4336 
4337     parts_default_nan(&p, status);
4338     p.frac >>= float16_params.frac_shift;
4339     return float16_pack_raw(&p);
4340 }
4341 
4342 float32 float32_default_nan(float_status *status)
4343 {
4344     FloatParts64 p;
4345 
4346     parts_default_nan(&p, status);
4347     p.frac >>= float32_params.frac_shift;
4348     return float32_pack_raw(&p);
4349 }
4350 
4351 float64 float64_default_nan(float_status *status)
4352 {
4353     FloatParts64 p;
4354 
4355     parts_default_nan(&p, status);
4356     p.frac >>= float64_params.frac_shift;
4357     return float64_pack_raw(&p);
4358 }
4359 
4360 float128 float128_default_nan(float_status *status)
4361 {
4362     FloatParts128 p;
4363 
4364     parts_default_nan(&p, status);
4365     frac_shr(&p, float128_params.frac_shift);
4366     return float128_pack_raw(&p);
4367 }
4368 
4369 bfloat16 bfloat16_default_nan(float_status *status)
4370 {
4371     FloatParts64 p;
4372 
4373     parts_default_nan(&p, status);
4374     p.frac >>= bfloat16_params.frac_shift;
4375     return bfloat16_pack_raw(&p);
4376 }
4377 
4378 /*----------------------------------------------------------------------------
4379 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4380 *----------------------------------------------------------------------------*/
4381 
4382 float16 float16_silence_nan(float16 a, float_status *status)
4383 {
4384     FloatParts64 p;
4385 
4386     float16_unpack_raw(&p, a);
4387     p.frac <<= float16_params.frac_shift;
4388     parts_silence_nan(&p, status);
4389     p.frac >>= float16_params.frac_shift;
4390     return float16_pack_raw(&p);
4391 }
4392 
4393 float32 float32_silence_nan(float32 a, float_status *status)
4394 {
4395     FloatParts64 p;
4396 
4397     float32_unpack_raw(&p, a);
4398     p.frac <<= float32_params.frac_shift;
4399     parts_silence_nan(&p, status);
4400     p.frac >>= float32_params.frac_shift;
4401     return float32_pack_raw(&p);
4402 }
4403 
4404 float64 float64_silence_nan(float64 a, float_status *status)
4405 {
4406     FloatParts64 p;
4407 
4408     float64_unpack_raw(&p, a);
4409     p.frac <<= float64_params.frac_shift;
4410     parts_silence_nan(&p, status);
4411     p.frac >>= float64_params.frac_shift;
4412     return float64_pack_raw(&p);
4413 }
4414 
4415 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4416 {
4417     FloatParts64 p;
4418 
4419     bfloat16_unpack_raw(&p, a);
4420     p.frac <<= bfloat16_params.frac_shift;
4421     parts_silence_nan(&p, status);
4422     p.frac >>= bfloat16_params.frac_shift;
4423     return bfloat16_pack_raw(&p);
4424 }
4425 
4426 float128 float128_silence_nan(float128 a, float_status *status)
4427 {
4428     FloatParts128 p;
4429 
4430     float128_unpack_raw(&p, a);
4431     frac_shl(&p, float128_params.frac_shift);
4432     parts_silence_nan(&p, status);
4433     frac_shr(&p, float128_params.frac_shift);
4434     return float128_pack_raw(&p);
4435 }
4436 
4437 /*----------------------------------------------------------------------------
4438 | If `a' is denormal and we are in flush-to-zero mode then set the
4439 | input-denormal exception and return zero. Otherwise just return the value.
4440 *----------------------------------------------------------------------------*/
4441 
4442 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4443 {
4444     if (p.exp == 0 && p.frac != 0) {
4445         float_raise(float_flag_input_denormal, status);
4446         return true;
4447     }
4448 
4449     return false;
4450 }
4451 
4452 float16 float16_squash_input_denormal(float16 a, float_status *status)
4453 {
4454     if (status->flush_inputs_to_zero) {
4455         FloatParts64 p;
4456 
4457         float16_unpack_raw(&p, a);
4458         if (parts_squash_denormal(p, status)) {
4459             return float16_set_sign(float16_zero, p.sign);
4460         }
4461     }
4462     return a;
4463 }
4464 
4465 float32 float32_squash_input_denormal(float32 a, float_status *status)
4466 {
4467     if (status->flush_inputs_to_zero) {
4468         FloatParts64 p;
4469 
4470         float32_unpack_raw(&p, a);
4471         if (parts_squash_denormal(p, status)) {
4472             return float32_set_sign(float32_zero, p.sign);
4473         }
4474     }
4475     return a;
4476 }
4477 
4478 float64 float64_squash_input_denormal(float64 a, float_status *status)
4479 {
4480     if (status->flush_inputs_to_zero) {
4481         FloatParts64 p;
4482 
4483         float64_unpack_raw(&p, a);
4484         if (parts_squash_denormal(p, status)) {
4485             return float64_set_sign(float64_zero, p.sign);
4486         }
4487     }
4488     return a;
4489 }
4490 
4491 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4492 {
4493     if (status->flush_inputs_to_zero) {
4494         FloatParts64 p;
4495 
4496         bfloat16_unpack_raw(&p, a);
4497         if (parts_squash_denormal(p, status)) {
4498             return bfloat16_set_sign(bfloat16_zero, p.sign);
4499         }
4500     }
4501     return a;
4502 }
4503 
4504 /*----------------------------------------------------------------------------
4505 | Normalizes the subnormal extended double-precision floating-point value
4506 | represented by the denormalized significand `aSig'.  The normalized exponent
4507 | and significand are stored at the locations pointed to by `zExpPtr' and
4508 | `zSigPtr', respectively.
4509 *----------------------------------------------------------------------------*/
4510 
4511 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4512                                 uint64_t *zSigPtr)
4513 {
4514     int8_t shiftCount;
4515 
4516     shiftCount = clz64(aSig);
4517     *zSigPtr = aSig<<shiftCount;
4518     *zExpPtr = 1 - shiftCount;
4519 }
4520 
4521 /*----------------------------------------------------------------------------
4522 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4523 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4524 | and returns the proper extended double-precision floating-point value
4525 | corresponding to the abstract input.  Ordinarily, the abstract value is
4526 | rounded and packed into the extended double-precision format, with the
4527 | inexact exception raised if the abstract input cannot be represented
4528 | exactly.  However, if the abstract value is too large, the overflow and
4529 | inexact exceptions are raised and an infinity or maximal finite value is
4530 | returned.  If the abstract value is too small, the input value is rounded to
4531 | a subnormal number, and the underflow and inexact exceptions are raised if
4532 | the abstract input cannot be represented exactly as a subnormal extended
4533 | double-precision floating-point number.
4534 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4535 | the result is rounded to the same number of bits as single or double
4536 | precision, respectively.  Otherwise, the result is rounded to the full
4537 | precision of the extended double-precision format.
4538 |     The input significand must be normalized or smaller.  If the input
4539 | significand is not normalized, `zExp' must be 0; in that case, the result
4540 | returned is a subnormal number, and it must not require rounding.  The
4541 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4542 | Floating-Point Arithmetic.
4543 *----------------------------------------------------------------------------*/
4544 
4545 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4546                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4547                               float_status *status)
4548 {
4549     FloatRoundMode roundingMode;
4550     bool roundNearestEven, increment, isTiny;
4551     int64_t roundIncrement, roundMask, roundBits;
4552 
4553     roundingMode = status->float_rounding_mode;
4554     roundNearestEven = ( roundingMode == float_round_nearest_even );
4555     switch (roundingPrecision) {
4556     case floatx80_precision_x:
4557         goto precision80;
4558     case floatx80_precision_d:
4559         roundIncrement = UINT64_C(0x0000000000000400);
4560         roundMask = UINT64_C(0x00000000000007FF);
4561         break;
4562     case floatx80_precision_s:
4563         roundIncrement = UINT64_C(0x0000008000000000);
4564         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4565         break;
4566     default:
4567         g_assert_not_reached();
4568     }
4569     zSig0 |= ( zSig1 != 0 );
4570     switch (roundingMode) {
4571     case float_round_nearest_even:
4572     case float_round_ties_away:
4573         break;
4574     case float_round_to_zero:
4575         roundIncrement = 0;
4576         break;
4577     case float_round_up:
4578         roundIncrement = zSign ? 0 : roundMask;
4579         break;
4580     case float_round_down:
4581         roundIncrement = zSign ? roundMask : 0;
4582         break;
4583     default:
4584         abort();
4585     }
4586     roundBits = zSig0 & roundMask;
4587     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4588         if (    ( 0x7FFE < zExp )
4589              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4590            ) {
4591             goto overflow;
4592         }
4593         if ( zExp <= 0 ) {
4594             if (status->flush_to_zero) {
4595                 float_raise(float_flag_output_denormal, status);
4596                 return packFloatx80(zSign, 0, 0);
4597             }
4598             isTiny = status->tininess_before_rounding
4599                   || (zExp < 0 )
4600                   || (zSig0 <= zSig0 + roundIncrement);
4601             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4602             zExp = 0;
4603             roundBits = zSig0 & roundMask;
4604             if (isTiny && roundBits) {
4605                 float_raise(float_flag_underflow, status);
4606             }
4607             if (roundBits) {
4608                 float_raise(float_flag_inexact, status);
4609             }
4610             zSig0 += roundIncrement;
4611             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4612             roundIncrement = roundMask + 1;
4613             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4614                 roundMask |= roundIncrement;
4615             }
4616             zSig0 &= ~ roundMask;
4617             return packFloatx80( zSign, zExp, zSig0 );
4618         }
4619     }
4620     if (roundBits) {
4621         float_raise(float_flag_inexact, status);
4622     }
4623     zSig0 += roundIncrement;
4624     if ( zSig0 < roundIncrement ) {
4625         ++zExp;
4626         zSig0 = UINT64_C(0x8000000000000000);
4627     }
4628     roundIncrement = roundMask + 1;
4629     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4630         roundMask |= roundIncrement;
4631     }
4632     zSig0 &= ~ roundMask;
4633     if ( zSig0 == 0 ) zExp = 0;
4634     return packFloatx80( zSign, zExp, zSig0 );
4635  precision80:
4636     switch (roundingMode) {
4637     case float_round_nearest_even:
4638     case float_round_ties_away:
4639         increment = ((int64_t)zSig1 < 0);
4640         break;
4641     case float_round_to_zero:
4642         increment = 0;
4643         break;
4644     case float_round_up:
4645         increment = !zSign && zSig1;
4646         break;
4647     case float_round_down:
4648         increment = zSign && zSig1;
4649         break;
4650     default:
4651         abort();
4652     }
4653     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4654         if (    ( 0x7FFE < zExp )
4655              || (    ( zExp == 0x7FFE )
4656                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4657                   && increment
4658                 )
4659            ) {
4660             roundMask = 0;
4661  overflow:
4662             float_raise(float_flag_overflow | float_flag_inexact, status);
4663             if (    ( roundingMode == float_round_to_zero )
4664                  || ( zSign && ( roundingMode == float_round_up ) )
4665                  || ( ! zSign && ( roundingMode == float_round_down ) )
4666                ) {
4667                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4668             }
4669             return packFloatx80(zSign,
4670                                 floatx80_infinity_high,
4671                                 floatx80_infinity_low);
4672         }
4673         if ( zExp <= 0 ) {
4674             isTiny = status->tininess_before_rounding
4675                   || (zExp < 0)
4676                   || !increment
4677                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4678             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4679             zExp = 0;
4680             if (isTiny && zSig1) {
4681                 float_raise(float_flag_underflow, status);
4682             }
4683             if (zSig1) {
4684                 float_raise(float_flag_inexact, status);
4685             }
4686             switch (roundingMode) {
4687             case float_round_nearest_even:
4688             case float_round_ties_away:
4689                 increment = ((int64_t)zSig1 < 0);
4690                 break;
4691             case float_round_to_zero:
4692                 increment = 0;
4693                 break;
4694             case float_round_up:
4695                 increment = !zSign && zSig1;
4696                 break;
4697             case float_round_down:
4698                 increment = zSign && zSig1;
4699                 break;
4700             default:
4701                 abort();
4702             }
4703             if ( increment ) {
4704                 ++zSig0;
4705                 if (!(zSig1 << 1) && roundNearestEven) {
4706                     zSig0 &= ~1;
4707                 }
4708                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4709             }
4710             return packFloatx80( zSign, zExp, zSig0 );
4711         }
4712     }
4713     if (zSig1) {
4714         float_raise(float_flag_inexact, status);
4715     }
4716     if ( increment ) {
4717         ++zSig0;
4718         if ( zSig0 == 0 ) {
4719             ++zExp;
4720             zSig0 = UINT64_C(0x8000000000000000);
4721         }
4722         else {
4723             if (!(zSig1 << 1) && roundNearestEven) {
4724                 zSig0 &= ~1;
4725             }
4726         }
4727     }
4728     else {
4729         if ( zSig0 == 0 ) zExp = 0;
4730     }
4731     return packFloatx80( zSign, zExp, zSig0 );
4732 
4733 }
4734 
4735 /*----------------------------------------------------------------------------
4736 | Takes an abstract floating-point value having sign `zSign', exponent
4737 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4738 | and returns the proper extended double-precision floating-point value
4739 | corresponding to the abstract input.  This routine is just like
4740 | `roundAndPackFloatx80' except that the input significand does not have to be
4741 | normalized.
4742 *----------------------------------------------------------------------------*/
4743 
4744 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4745                                        bool zSign, int32_t zExp,
4746                                        uint64_t zSig0, uint64_t zSig1,
4747                                        float_status *status)
4748 {
4749     int8_t shiftCount;
4750 
4751     if ( zSig0 == 0 ) {
4752         zSig0 = zSig1;
4753         zSig1 = 0;
4754         zExp -= 64;
4755     }
4756     shiftCount = clz64(zSig0);
4757     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4758     zExp -= shiftCount;
4759     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4760                                 zSig0, zSig1, status);
4761 
4762 }
4763 
4764 /*----------------------------------------------------------------------------
4765 | Returns the binary exponential of the single-precision floating-point value
4766 | `a'. The operation is performed according to the IEC/IEEE Standard for
4767 | Binary Floating-Point Arithmetic.
4768 |
4769 | Uses the following identities:
4770 |
4771 | 1. -------------------------------------------------------------------------
4772 |      x    x*ln(2)
4773 |     2  = e
4774 |
4775 | 2. -------------------------------------------------------------------------
4776 |                      2     3     4     5           n
4777 |      x        x     x     x     x     x           x
4778 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4779 |               1!    2!    3!    4!    5!          n!
4780 *----------------------------------------------------------------------------*/
4781 
4782 static const float64 float32_exp2_coefficients[15] =
4783 {
4784     const_float64( 0x3ff0000000000000ll ), /*  1 */
4785     const_float64( 0x3fe0000000000000ll ), /*  2 */
4786     const_float64( 0x3fc5555555555555ll ), /*  3 */
4787     const_float64( 0x3fa5555555555555ll ), /*  4 */
4788     const_float64( 0x3f81111111111111ll ), /*  5 */
4789     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4790     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4791     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4792     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4793     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4794     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4795     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4796     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4797     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4798     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4799 };
4800 
4801 float32 float32_exp2(float32 a, float_status *status)
4802 {
4803     FloatParts64 xp, xnp, tp, rp;
4804     int i;
4805 
4806     float32_unpack_canonical(&xp, a, status);
4807     if (unlikely(xp.cls != float_class_normal)) {
4808         switch (xp.cls) {
4809         case float_class_snan:
4810         case float_class_qnan:
4811             parts_return_nan(&xp, status);
4812             return float32_round_pack_canonical(&xp, status);
4813         case float_class_inf:
4814             return xp.sign ? float32_zero : a;
4815         case float_class_zero:
4816             return float32_one;
4817         default:
4818             break;
4819         }
4820         g_assert_not_reached();
4821     }
4822 
4823     float_raise(float_flag_inexact, status);
4824 
4825     float64_unpack_canonical(&tp, float64_ln2, status);
4826     xp = *parts_mul(&xp, &tp, status);
4827     xnp = xp;
4828 
4829     float64_unpack_canonical(&rp, float64_one, status);
4830     for (i = 0 ; i < 15 ; i++) {
4831         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4832         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4833         xnp = *parts_mul(&xnp, &xp, status);
4834     }
4835 
4836     return float32_round_pack_canonical(&rp, status);
4837 }
4838 
4839 /*----------------------------------------------------------------------------
4840 | Rounds the extended double-precision floating-point value `a'
4841 | to the precision provided by floatx80_rounding_precision and returns the
4842 | result as an extended double-precision floating-point value.
4843 | The operation is performed according to the IEC/IEEE Standard for Binary
4844 | Floating-Point Arithmetic.
4845 *----------------------------------------------------------------------------*/
4846 
4847 floatx80 floatx80_round(floatx80 a, float_status *status)
4848 {
4849     FloatParts128 p;
4850 
4851     if (!floatx80_unpack_canonical(&p, a, status)) {
4852         return floatx80_default_nan(status);
4853     }
4854     return floatx80_round_pack_canonical(&p, status);
4855 }
4856 
4857 static void __attribute__((constructor)) softfloat_init(void)
4858 {
4859     union_float64 ua, ub, uc, ur;
4860 
4861     if (QEMU_NO_HARDFLOAT) {
4862         return;
4863     }
4864     /*
4865      * Test that the host's FMA is not obviously broken. For example,
4866      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4867      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4868      */
4869     ua.s = 0x0020000000000001ULL;
4870     ub.s = 0x3ca0000000000000ULL;
4871     uc.s = 0x0020000000000000ULL;
4872     ur.h = fma(ua.h, ub.h, uc.h);
4873     if (ur.s != 0x0020000000000001ULL) {
4874         force_soft_fma = true;
4875     }
4876 }
4877