xref: /openbmc/qemu/fpu/softfloat.c (revision 719f0f60)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439 };
440 
441 /* Simple helpers for checking if, or what kind of, NaN we have */
442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
443 {
444     return unlikely(c >= float_class_qnan);
445 }
446 
447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
448 {
449     return c == float_class_snan;
450 }
451 
452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
453 {
454     return c == float_class_qnan;
455 }
456 
457 /*
458  * Structure holding all of the decomposed parts of a float.
459  * The exponent is unbiased and the fraction is normalized.
460  *
461  * The fraction words are stored in big-endian word ordering,
462  * so that truncation from a larger format to a smaller format
463  * can be done simply by ignoring subsequent elements.
464  */
465 
466 typedef struct {
467     FloatClass cls;
468     bool sign;
469     int32_t exp;
470     union {
471         /* Routines that know the structure may reference the singular name. */
472         uint64_t frac;
473         /*
474          * Routines expanded with multiple structures reference "hi" and "lo"
475          * depending on the operation.  In FloatParts64, "hi" and "lo" are
476          * both the same word and aliased here.
477          */
478         uint64_t frac_hi;
479         uint64_t frac_lo;
480     };
481 } FloatParts64;
482 
483 typedef struct {
484     FloatClass cls;
485     bool sign;
486     int32_t exp;
487     uint64_t frac_hi;
488     uint64_t frac_lo;
489 } FloatParts128;
490 
491 typedef struct {
492     FloatClass cls;
493     bool sign;
494     int32_t exp;
495     uint64_t frac_hi;
496     uint64_t frac_hm;  /* high-middle */
497     uint64_t frac_lm;  /* low-middle */
498     uint64_t frac_lo;
499 } FloatParts256;
500 
501 /* These apply to the most significant word of each FloatPartsN. */
502 #define DECOMPOSED_BINARY_POINT    63
503 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
504 
505 /* Structure holding all of the relevant parameters for a format.
506  *   exp_size: the size of the exponent field
507  *   exp_bias: the offset applied to the exponent field
508  *   exp_max: the maximum normalised exponent
509  *   frac_size: the size of the fraction field
510  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
511  * The following are computed based the size of fraction
512  *   round_mask: bits below lsb which must be rounded
513  * The following optional modifiers are available:
514  *   arm_althp: handle ARM Alternative Half Precision
515  */
516 typedef struct {
517     int exp_size;
518     int exp_bias;
519     int exp_max;
520     int frac_size;
521     int frac_shift;
522     bool arm_althp;
523     uint64_t round_mask;
524 } FloatFmt;
525 
526 /* Expand fields based on the size of exponent and fraction */
527 #define FLOAT_PARAMS_(E)                                \
528     .exp_size       = E,                                \
529     .exp_bias       = ((1 << E) - 1) >> 1,              \
530     .exp_max        = (1 << E) - 1
531 
532 #define FLOAT_PARAMS(E, F)                              \
533     FLOAT_PARAMS_(E),                                   \
534     .frac_size      = F,                                \
535     .frac_shift     = (-F - 1) & 63,                    \
536     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
537 
538 static const FloatFmt float16_params = {
539     FLOAT_PARAMS(5, 10)
540 };
541 
542 static const FloatFmt float16_params_ahp = {
543     FLOAT_PARAMS(5, 10),
544     .arm_althp = true
545 };
546 
547 static const FloatFmt bfloat16_params = {
548     FLOAT_PARAMS(8, 7)
549 };
550 
551 static const FloatFmt float32_params = {
552     FLOAT_PARAMS(8, 23)
553 };
554 
555 static const FloatFmt float64_params = {
556     FLOAT_PARAMS(11, 52)
557 };
558 
559 static const FloatFmt float128_params = {
560     FLOAT_PARAMS(15, 112)
561 };
562 
563 #define FLOATX80_PARAMS(R)              \
564     FLOAT_PARAMS_(15),                  \
565     .frac_size = R == 64 ? 63 : R,      \
566     .frac_shift = 0,                    \
567     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
568 
569 static const FloatFmt floatx80_params[3] = {
570     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
571     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
572     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
573 };
574 
575 /* Unpack a float to parts, but do not canonicalize.  */
576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
577 {
578     const int f_size = fmt->frac_size;
579     const int e_size = fmt->exp_size;
580 
581     *r = (FloatParts64) {
582         .cls = float_class_unclassified,
583         .sign = extract64(raw, f_size + e_size, 1),
584         .exp = extract64(raw, f_size, e_size),
585         .frac = extract64(raw, 0, f_size)
586     };
587 }
588 
589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
590 {
591     unpack_raw64(p, &float16_params, f);
592 }
593 
594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
595 {
596     unpack_raw64(p, &bfloat16_params, f);
597 }
598 
599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
600 {
601     unpack_raw64(p, &float32_params, f);
602 }
603 
604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
605 {
606     unpack_raw64(p, &float64_params, f);
607 }
608 
609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
610 {
611     *p = (FloatParts128) {
612         .cls = float_class_unclassified,
613         .sign = extract32(f.high, 15, 1),
614         .exp = extract32(f.high, 0, 15),
615         .frac_hi = f.low
616     };
617 }
618 
619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
620 {
621     const int f_size = float128_params.frac_size - 64;
622     const int e_size = float128_params.exp_size;
623 
624     *p = (FloatParts128) {
625         .cls = float_class_unclassified,
626         .sign = extract64(f.high, f_size + e_size, 1),
627         .exp = extract64(f.high, f_size, e_size),
628         .frac_hi = extract64(f.high, 0, f_size),
629         .frac_lo = f.low,
630     };
631 }
632 
633 /* Pack a float from parts, but do not canonicalize.  */
634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
635 {
636     const int f_size = fmt->frac_size;
637     const int e_size = fmt->exp_size;
638     uint64_t ret;
639 
640     ret = (uint64_t)p->sign << (f_size + e_size);
641     ret = deposit64(ret, f_size, e_size, p->exp);
642     ret = deposit64(ret, 0, f_size, p->frac);
643     return ret;
644 }
645 
646 static inline float16 float16_pack_raw(const FloatParts64 *p)
647 {
648     return make_float16(pack_raw64(p, &float16_params));
649 }
650 
651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
652 {
653     return pack_raw64(p, &bfloat16_params);
654 }
655 
656 static inline float32 float32_pack_raw(const FloatParts64 *p)
657 {
658     return make_float32(pack_raw64(p, &float32_params));
659 }
660 
661 static inline float64 float64_pack_raw(const FloatParts64 *p)
662 {
663     return make_float64(pack_raw64(p, &float64_params));
664 }
665 
666 static float128 float128_pack_raw(const FloatParts128 *p)
667 {
668     const int f_size = float128_params.frac_size - 64;
669     const int e_size = float128_params.exp_size;
670     uint64_t hi;
671 
672     hi = (uint64_t)p->sign << (f_size + e_size);
673     hi = deposit64(hi, f_size, e_size, p->exp);
674     hi = deposit64(hi, 0, f_size, p->frac_hi);
675     return make_float128(hi, p->frac_lo);
676 }
677 
678 /*----------------------------------------------------------------------------
679 | Functions and definitions to determine:  (1) whether tininess for underflow
680 | is detected before or after rounding by default, (2) what (if anything)
681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
683 | are propagated from function inputs to output.  These details are target-
684 | specific.
685 *----------------------------------------------------------------------------*/
686 #include "softfloat-specialize.c.inc"
687 
688 #define PARTS_GENERIC_64_128(NAME, P) \
689     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
690 
691 #define PARTS_GENERIC_64_128_256(NAME, P) \
692     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
693                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
694 
695 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
696 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
697 
698 static void parts64_return_nan(FloatParts64 *a, float_status *s);
699 static void parts128_return_nan(FloatParts128 *a, float_status *s);
700 
701 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
702 
703 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
704                                       float_status *s);
705 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
706                                         float_status *s);
707 
708 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
709 
710 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
711                                              FloatParts64 *c, float_status *s,
712                                              int ab_mask, int abc_mask);
713 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
714                                                FloatParts128 *b,
715                                                FloatParts128 *c,
716                                                float_status *s,
717                                                int ab_mask, int abc_mask);
718 
719 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
720     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
721 
722 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
723                                  const FloatFmt *fmt);
724 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
725                                   const FloatFmt *fmt);
726 
727 #define parts_canonicalize(A, S, F) \
728     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
729 
730 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
731                                    const FloatFmt *fmt);
732 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
733                                     const FloatFmt *fmt);
734 
735 #define parts_uncanon_normal(A, S, F) \
736     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
737 
738 static void parts64_uncanon(FloatParts64 *p, float_status *status,
739                             const FloatFmt *fmt);
740 static void parts128_uncanon(FloatParts128 *p, float_status *status,
741                              const FloatFmt *fmt);
742 
743 #define parts_uncanon(A, S, F) \
744     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
745 
746 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
747 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
748 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
749 
750 #define parts_add_normal(A, B) \
751     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
752 
753 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
754 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
755 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
756 
757 #define parts_sub_normal(A, B) \
758     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
759 
760 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
761                                     float_status *s, bool subtract);
762 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
763                                       float_status *s, bool subtract);
764 
765 #define parts_addsub(A, B, S, Z) \
766     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
767 
768 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
769                                  float_status *s);
770 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
771                                    float_status *s);
772 
773 #define parts_mul(A, B, S) \
774     PARTS_GENERIC_64_128(mul, A)(A, B, S)
775 
776 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
777                                     FloatParts64 *c, int flags,
778                                     float_status *s);
779 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
780                                       FloatParts128 *c, int flags,
781                                       float_status *s);
782 
783 #define parts_muladd(A, B, C, Z, S) \
784     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
785 
786 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
787                                  float_status *s);
788 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
789                                    float_status *s);
790 
791 #define parts_div(A, B, S) \
792     PARTS_GENERIC_64_128(div, A)(A, B, S)
793 
794 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
795                                     uint64_t *mod_quot, float_status *s);
796 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
797                                       uint64_t *mod_quot, float_status *s);
798 
799 #define parts_modrem(A, B, Q, S) \
800     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
801 
802 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
803 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
804 
805 #define parts_sqrt(A, S, F) \
806     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
807 
808 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
809                                         int scale, int frac_size);
810 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
811                                          int scale, int frac_size);
812 
813 #define parts_round_to_int_normal(A, R, C, F) \
814     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
815 
816 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
817                                  int scale, float_status *s,
818                                  const FloatFmt *fmt);
819 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
820                                   int scale, float_status *s,
821                                   const FloatFmt *fmt);
822 
823 #define parts_round_to_int(A, R, C, S, F) \
824     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
825 
826 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
827                                      int scale, int64_t min, int64_t max,
828                                      float_status *s);
829 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
830                                      int scale, int64_t min, int64_t max,
831                                      float_status *s);
832 
833 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
834     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
835 
836 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
837                                       int scale, uint64_t max,
838                                       float_status *s);
839 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
840                                        int scale, uint64_t max,
841                                        float_status *s);
842 
843 #define parts_float_to_uint(P, R, Z, M, S) \
844     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
845 
846 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
847                                   int scale, float_status *s);
848 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
849                                    int scale, float_status *s);
850 
851 #define parts_sint_to_float(P, I, Z, S) \
852     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
853 
854 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
855                                   int scale, float_status *s);
856 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
857                                    int scale, float_status *s);
858 
859 #define parts_uint_to_float(P, I, Z, S) \
860     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
861 
862 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
863                                     float_status *s, int flags);
864 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
865                                       float_status *s, int flags);
866 
867 #define parts_minmax(A, B, S, F) \
868     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
869 
870 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
871                            float_status *s, bool q);
872 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
873                             float_status *s, bool q);
874 
875 #define parts_compare(A, B, S, Q) \
876     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
877 
878 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
879 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
880 
881 #define parts_scalbn(A, N, S) \
882     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
883 
884 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
885 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
886 
887 #define parts_log2(A, S, F) \
888     PARTS_GENERIC_64_128(log2, A)(A, S, F)
889 
890 /*
891  * Helper functions for softfloat-parts.c.inc, per-size operations.
892  */
893 
894 #define FRAC_GENERIC_64_128(NAME, P) \
895     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
896 
897 #define FRAC_GENERIC_64_128_256(NAME, P) \
898     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
899                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
900 
901 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
902 {
903     return uadd64_overflow(a->frac, b->frac, &r->frac);
904 }
905 
906 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
907 {
908     bool c = 0;
909     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
910     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
911     return c;
912 }
913 
914 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
915 {
916     bool c = 0;
917     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
918     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
919     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
920     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
921     return c;
922 }
923 
924 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
925 
926 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
927 {
928     return uadd64_overflow(a->frac, c, &r->frac);
929 }
930 
931 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
932 {
933     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
934     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
935 }
936 
937 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
938 
939 static void frac64_allones(FloatParts64 *a)
940 {
941     a->frac = -1;
942 }
943 
944 static void frac128_allones(FloatParts128 *a)
945 {
946     a->frac_hi = a->frac_lo = -1;
947 }
948 
949 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
950 
951 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
952 {
953     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
954 }
955 
956 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
957 {
958     uint64_t ta = a->frac_hi, tb = b->frac_hi;
959     if (ta == tb) {
960         ta = a->frac_lo, tb = b->frac_lo;
961         if (ta == tb) {
962             return 0;
963         }
964     }
965     return ta < tb ? -1 : 1;
966 }
967 
968 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
969 
970 static void frac64_clear(FloatParts64 *a)
971 {
972     a->frac = 0;
973 }
974 
975 static void frac128_clear(FloatParts128 *a)
976 {
977     a->frac_hi = a->frac_lo = 0;
978 }
979 
980 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
981 
982 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
983 {
984     uint64_t n1, n0, r, q;
985     bool ret;
986 
987     /*
988      * We want a 2*N / N-bit division to produce exactly an N-bit
989      * result, so that we do not lose any precision and so that we
990      * do not have to renormalize afterward.  If A.frac < B.frac,
991      * then division would produce an (N-1)-bit result; shift A left
992      * by one to produce the an N-bit result, and return true to
993      * decrement the exponent to match.
994      *
995      * The udiv_qrnnd algorithm that we're using requires normalization,
996      * i.e. the msb of the denominator must be set, which is already true.
997      */
998     ret = a->frac < b->frac;
999     if (ret) {
1000         n0 = a->frac;
1001         n1 = 0;
1002     } else {
1003         n0 = a->frac >> 1;
1004         n1 = a->frac << 63;
1005     }
1006     q = udiv_qrnnd(&r, n0, n1, b->frac);
1007 
1008     /* Set lsb if there is a remainder, to set inexact. */
1009     a->frac = q | (r != 0);
1010 
1011     return ret;
1012 }
1013 
1014 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1015 {
1016     uint64_t q0, q1, a0, a1, b0, b1;
1017     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1018     bool ret = false;
1019 
1020     a0 = a->frac_hi, a1 = a->frac_lo;
1021     b0 = b->frac_hi, b1 = b->frac_lo;
1022 
1023     ret = lt128(a0, a1, b0, b1);
1024     if (!ret) {
1025         a1 = shr_double(a0, a1, 1);
1026         a0 = a0 >> 1;
1027     }
1028 
1029     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1030     q0 = estimateDiv128To64(a0, a1, b0);
1031 
1032     /*
1033      * Estimate is high because B1 was not included (unless B1 == 0).
1034      * Reduce quotient and increase remainder until remainder is non-negative.
1035      * This loop will execute 0 to 2 times.
1036      */
1037     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1038     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1039     while (r0 != 0) {
1040         q0--;
1041         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1042     }
1043 
1044     /* Repeat using the remainder, producing a second word of quotient. */
1045     q1 = estimateDiv128To64(r1, r2, b0);
1046     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1047     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1048     while (r1 != 0) {
1049         q1--;
1050         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1051     }
1052 
1053     /* Any remainder indicates inexact; set sticky bit. */
1054     q1 |= (r2 | r3) != 0;
1055 
1056     a->frac_hi = q0;
1057     a->frac_lo = q1;
1058     return ret;
1059 }
1060 
1061 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1062 
1063 static bool frac64_eqz(FloatParts64 *a)
1064 {
1065     return a->frac == 0;
1066 }
1067 
1068 static bool frac128_eqz(FloatParts128 *a)
1069 {
1070     return (a->frac_hi | a->frac_lo) == 0;
1071 }
1072 
1073 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1074 
1075 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1076 {
1077     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1078 }
1079 
1080 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1081 {
1082     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1083                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1084 }
1085 
1086 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1087 
1088 static void frac64_neg(FloatParts64 *a)
1089 {
1090     a->frac = -a->frac;
1091 }
1092 
1093 static void frac128_neg(FloatParts128 *a)
1094 {
1095     bool c = 0;
1096     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1097     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1098 }
1099 
1100 static void frac256_neg(FloatParts256 *a)
1101 {
1102     bool c = 0;
1103     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1104     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1105     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108 
1109 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1110 
1111 static int frac64_normalize(FloatParts64 *a)
1112 {
1113     if (a->frac) {
1114         int shift = clz64(a->frac);
1115         a->frac <<= shift;
1116         return shift;
1117     }
1118     return 64;
1119 }
1120 
1121 static int frac128_normalize(FloatParts128 *a)
1122 {
1123     if (a->frac_hi) {
1124         int shl = clz64(a->frac_hi);
1125         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1126         a->frac_lo <<= shl;
1127         return shl;
1128     } else if (a->frac_lo) {
1129         int shl = clz64(a->frac_lo);
1130         a->frac_hi = a->frac_lo << shl;
1131         a->frac_lo = 0;
1132         return shl + 64;
1133     }
1134     return 128;
1135 }
1136 
1137 static int frac256_normalize(FloatParts256 *a)
1138 {
1139     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1140     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1141     int ret, shl;
1142 
1143     if (likely(a0)) {
1144         shl = clz64(a0);
1145         if (shl == 0) {
1146             return 0;
1147         }
1148         ret = shl;
1149     } else {
1150         if (a1) {
1151             ret = 64;
1152             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1153         } else if (a2) {
1154             ret = 128;
1155             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1156         } else if (a3) {
1157             ret = 192;
1158             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1159         } else {
1160             ret = 256;
1161             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1162             goto done;
1163         }
1164         shl = clz64(a0);
1165         if (shl == 0) {
1166             goto done;
1167         }
1168         ret += shl;
1169     }
1170 
1171     a0 = shl_double(a0, a1, shl);
1172     a1 = shl_double(a1, a2, shl);
1173     a2 = shl_double(a2, a3, shl);
1174     a3 <<= shl;
1175 
1176  done:
1177     a->frac_hi = a0;
1178     a->frac_hm = a1;
1179     a->frac_lm = a2;
1180     a->frac_lo = a3;
1181     return ret;
1182 }
1183 
1184 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1185 
1186 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1187 {
1188     uint64_t a0, a1, b0, t0, t1, q, quot;
1189     int exp_diff = a->exp - b->exp;
1190     int shift;
1191 
1192     a0 = a->frac;
1193     a1 = 0;
1194 
1195     if (exp_diff < -1) {
1196         if (mod_quot) {
1197             *mod_quot = 0;
1198         }
1199         return;
1200     }
1201     if (exp_diff == -1) {
1202         a0 >>= 1;
1203         exp_diff = 0;
1204     }
1205 
1206     b0 = b->frac;
1207     quot = q = b0 <= a0;
1208     if (q) {
1209         a0 -= b0;
1210     }
1211 
1212     exp_diff -= 64;
1213     while (exp_diff > 0) {
1214         q = estimateDiv128To64(a0, a1, b0);
1215         q = q > 2 ? q - 2 : 0;
1216         mul64To128(b0, q, &t0, &t1);
1217         sub128(a0, a1, t0, t1, &a0, &a1);
1218         shortShift128Left(a0, a1, 62, &a0, &a1);
1219         exp_diff -= 62;
1220         quot = (quot << 62) + q;
1221     }
1222 
1223     exp_diff += 64;
1224     if (exp_diff > 0) {
1225         q = estimateDiv128To64(a0, a1, b0);
1226         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1227         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1228         sub128(a0, a1, t0, t1, &a0, &a1);
1229         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1230         while (le128(t0, t1, a0, a1)) {
1231             ++q;
1232             sub128(a0, a1, t0, t1, &a0, &a1);
1233         }
1234         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1235     } else {
1236         t0 = b0;
1237         t1 = 0;
1238     }
1239 
1240     if (mod_quot) {
1241         *mod_quot = quot;
1242     } else {
1243         sub128(t0, t1, a0, a1, &t0, &t1);
1244         if (lt128(t0, t1, a0, a1) ||
1245             (eq128(t0, t1, a0, a1) && (q & 1))) {
1246             a0 = t0;
1247             a1 = t1;
1248             a->sign = !a->sign;
1249         }
1250     }
1251 
1252     if (likely(a0)) {
1253         shift = clz64(a0);
1254         shortShift128Left(a0, a1, shift, &a0, &a1);
1255     } else if (likely(a1)) {
1256         shift = clz64(a1);
1257         a0 = a1 << shift;
1258         a1 = 0;
1259         shift += 64;
1260     } else {
1261         a->cls = float_class_zero;
1262         return;
1263     }
1264 
1265     a->exp = b->exp + exp_diff - shift;
1266     a->frac = a0 | (a1 != 0);
1267 }
1268 
1269 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1270                            uint64_t *mod_quot)
1271 {
1272     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1273     int exp_diff = a->exp - b->exp;
1274     int shift;
1275 
1276     a0 = a->frac_hi;
1277     a1 = a->frac_lo;
1278     a2 = 0;
1279 
1280     if (exp_diff < -1) {
1281         if (mod_quot) {
1282             *mod_quot = 0;
1283         }
1284         return;
1285     }
1286     if (exp_diff == -1) {
1287         shift128Right(a0, a1, 1, &a0, &a1);
1288         exp_diff = 0;
1289     }
1290 
1291     b0 = b->frac_hi;
1292     b1 = b->frac_lo;
1293 
1294     quot = q = le128(b0, b1, a0, a1);
1295     if (q) {
1296         sub128(a0, a1, b0, b1, &a0, &a1);
1297     }
1298 
1299     exp_diff -= 64;
1300     while (exp_diff > 0) {
1301         q = estimateDiv128To64(a0, a1, b0);
1302         q = q > 4 ? q - 4 : 0;
1303         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1304         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1305         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1306         exp_diff -= 61;
1307         quot = (quot << 61) + q;
1308     }
1309 
1310     exp_diff += 64;
1311     if (exp_diff > 0) {
1312         q = estimateDiv128To64(a0, a1, b0);
1313         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1314         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1315         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1317         while (le192(t0, t1, t2, a0, a1, a2)) {
1318             ++q;
1319             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320         }
1321         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1322     } else {
1323         t0 = b0;
1324         t1 = b1;
1325         t2 = 0;
1326     }
1327 
1328     if (mod_quot) {
1329         *mod_quot = quot;
1330     } else {
1331         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1332         if (lt192(t0, t1, t2, a0, a1, a2) ||
1333             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1334             a0 = t0;
1335             a1 = t1;
1336             a2 = t2;
1337             a->sign = !a->sign;
1338         }
1339     }
1340 
1341     if (likely(a0)) {
1342         shift = clz64(a0);
1343         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1344     } else if (likely(a1)) {
1345         shift = clz64(a1);
1346         shortShift128Left(a1, a2, shift, &a0, &a1);
1347         a2 = 0;
1348         shift += 64;
1349     } else if (likely(a2)) {
1350         shift = clz64(a2);
1351         a0 = a2 << shift;
1352         a1 = a2 = 0;
1353         shift += 128;
1354     } else {
1355         a->cls = float_class_zero;
1356         return;
1357     }
1358 
1359     a->exp = b->exp + exp_diff - shift;
1360     a->frac_hi = a0;
1361     a->frac_lo = a1 | (a2 != 0);
1362 }
1363 
1364 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1365 
1366 static void frac64_shl(FloatParts64 *a, int c)
1367 {
1368     a->frac <<= c;
1369 }
1370 
1371 static void frac128_shl(FloatParts128 *a, int c)
1372 {
1373     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1374 
1375     if (c & 64) {
1376         a0 = a1, a1 = 0;
1377     }
1378 
1379     c &= 63;
1380     if (c) {
1381         a0 = shl_double(a0, a1, c);
1382         a1 = a1 << c;
1383     }
1384 
1385     a->frac_hi = a0;
1386     a->frac_lo = a1;
1387 }
1388 
1389 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1390 
1391 static void frac64_shr(FloatParts64 *a, int c)
1392 {
1393     a->frac >>= c;
1394 }
1395 
1396 static void frac128_shr(FloatParts128 *a, int c)
1397 {
1398     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1399 
1400     if (c & 64) {
1401         a1 = a0, a0 = 0;
1402     }
1403 
1404     c &= 63;
1405     if (c) {
1406         a1 = shr_double(a0, a1, c);
1407         a0 = a0 >> c;
1408     }
1409 
1410     a->frac_hi = a0;
1411     a->frac_lo = a1;
1412 }
1413 
1414 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1415 
1416 static void frac64_shrjam(FloatParts64 *a, int c)
1417 {
1418     uint64_t a0 = a->frac;
1419 
1420     if (likely(c != 0)) {
1421         if (likely(c < 64)) {
1422             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1423         } else {
1424             a0 = a0 != 0;
1425         }
1426         a->frac = a0;
1427     }
1428 }
1429 
1430 static void frac128_shrjam(FloatParts128 *a, int c)
1431 {
1432     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1433     uint64_t sticky = 0;
1434 
1435     if (unlikely(c == 0)) {
1436         return;
1437     } else if (likely(c < 64)) {
1438         /* nothing */
1439     } else if (likely(c < 128)) {
1440         sticky = a1;
1441         a1 = a0;
1442         a0 = 0;
1443         c &= 63;
1444         if (c == 0) {
1445             goto done;
1446         }
1447     } else {
1448         sticky = a0 | a1;
1449         a0 = a1 = 0;
1450         goto done;
1451     }
1452 
1453     sticky |= shr_double(a1, 0, c);
1454     a1 = shr_double(a0, a1, c);
1455     a0 = a0 >> c;
1456 
1457  done:
1458     a->frac_lo = a1 | (sticky != 0);
1459     a->frac_hi = a0;
1460 }
1461 
1462 static void frac256_shrjam(FloatParts256 *a, int c)
1463 {
1464     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1465     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1466     uint64_t sticky = 0;
1467 
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 256)) {
1473         if (unlikely(c & 128)) {
1474             sticky |= a2 | a3;
1475             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1476         }
1477         if (unlikely(c & 64)) {
1478             sticky |= a3;
1479             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1480         }
1481         c &= 63;
1482         if (c == 0) {
1483             goto done;
1484         }
1485     } else {
1486         sticky = a0 | a1 | a2 | a3;
1487         a0 = a1 = a2 = a3 = 0;
1488         goto done;
1489     }
1490 
1491     sticky |= shr_double(a3, 0, c);
1492     a3 = shr_double(a2, a3, c);
1493     a2 = shr_double(a1, a2, c);
1494     a1 = shr_double(a0, a1, c);
1495     a0 = a0 >> c;
1496 
1497  done:
1498     a->frac_lo = a3 | (sticky != 0);
1499     a->frac_lm = a2;
1500     a->frac_hm = a1;
1501     a->frac_hi = a0;
1502 }
1503 
1504 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1505 
1506 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1507 {
1508     return usub64_overflow(a->frac, b->frac, &r->frac);
1509 }
1510 
1511 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1512 {
1513     bool c = 0;
1514     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1515     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1516     return c;
1517 }
1518 
1519 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1520 {
1521     bool c = 0;
1522     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1523     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1524     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1525     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1526     return c;
1527 }
1528 
1529 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1530 
1531 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1532 {
1533     r->frac = a->frac_hi | (a->frac_lo != 0);
1534 }
1535 
1536 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1537 {
1538     r->frac_hi = a->frac_hi;
1539     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1540 }
1541 
1542 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1543 
1544 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1545 {
1546     r->frac_hi = a->frac;
1547     r->frac_lo = 0;
1548 }
1549 
1550 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1551 {
1552     r->frac_hi = a->frac_hi;
1553     r->frac_hm = a->frac_lo;
1554     r->frac_lm = 0;
1555     r->frac_lo = 0;
1556 }
1557 
1558 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1559 
1560 /*
1561  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1562  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1563  * and thus MIT licenced.
1564  */
1565 static const uint16_t rsqrt_tab[128] = {
1566     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1567     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1568     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1569     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1570     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1571     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1572     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1573     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1574     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1575     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1576     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1577     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1578     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1579     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1580     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1581     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1582 };
1583 
1584 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1585 #define FloatPartsN    glue(FloatParts,N)
1586 #define FloatPartsW    glue(FloatParts,W)
1587 
1588 #define N 64
1589 #define W 128
1590 
1591 #include "softfloat-parts-addsub.c.inc"
1592 #include "softfloat-parts.c.inc"
1593 
1594 #undef  N
1595 #undef  W
1596 #define N 128
1597 #define W 256
1598 
1599 #include "softfloat-parts-addsub.c.inc"
1600 #include "softfloat-parts.c.inc"
1601 
1602 #undef  N
1603 #undef  W
1604 #define N            256
1605 
1606 #include "softfloat-parts-addsub.c.inc"
1607 
1608 #undef  N
1609 #undef  W
1610 #undef  partsN
1611 #undef  FloatPartsN
1612 #undef  FloatPartsW
1613 
1614 /*
1615  * Pack/unpack routines with a specific FloatFmt.
1616  */
1617 
1618 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1619                                       float_status *s, const FloatFmt *params)
1620 {
1621     float16_unpack_raw(p, f);
1622     parts_canonicalize(p, s, params);
1623 }
1624 
1625 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1626                                      float_status *s)
1627 {
1628     float16a_unpack_canonical(p, f, s, &float16_params);
1629 }
1630 
1631 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1632                                       float_status *s)
1633 {
1634     bfloat16_unpack_raw(p, f);
1635     parts_canonicalize(p, s, &bfloat16_params);
1636 }
1637 
1638 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1639                                              float_status *s,
1640                                              const FloatFmt *params)
1641 {
1642     parts_uncanon(p, s, params);
1643     return float16_pack_raw(p);
1644 }
1645 
1646 static float16 float16_round_pack_canonical(FloatParts64 *p,
1647                                             float_status *s)
1648 {
1649     return float16a_round_pack_canonical(p, s, &float16_params);
1650 }
1651 
1652 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1653                                               float_status *s)
1654 {
1655     parts_uncanon(p, s, &bfloat16_params);
1656     return bfloat16_pack_raw(p);
1657 }
1658 
1659 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1660                                      float_status *s)
1661 {
1662     float32_unpack_raw(p, f);
1663     parts_canonicalize(p, s, &float32_params);
1664 }
1665 
1666 static float32 float32_round_pack_canonical(FloatParts64 *p,
1667                                             float_status *s)
1668 {
1669     parts_uncanon(p, s, &float32_params);
1670     return float32_pack_raw(p);
1671 }
1672 
1673 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1674                                      float_status *s)
1675 {
1676     float64_unpack_raw(p, f);
1677     parts_canonicalize(p, s, &float64_params);
1678 }
1679 
1680 static float64 float64_round_pack_canonical(FloatParts64 *p,
1681                                             float_status *s)
1682 {
1683     parts_uncanon(p, s, &float64_params);
1684     return float64_pack_raw(p);
1685 }
1686 
1687 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1688                                       float_status *s)
1689 {
1690     float128_unpack_raw(p, f);
1691     parts_canonicalize(p, s, &float128_params);
1692 }
1693 
1694 static float128 float128_round_pack_canonical(FloatParts128 *p,
1695                                               float_status *s)
1696 {
1697     parts_uncanon(p, s, &float128_params);
1698     return float128_pack_raw(p);
1699 }
1700 
1701 /* Returns false if the encoding is invalid. */
1702 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1703                                       float_status *s)
1704 {
1705     /* Ensure rounding precision is set before beginning. */
1706     switch (s->floatx80_rounding_precision) {
1707     case floatx80_precision_x:
1708     case floatx80_precision_d:
1709     case floatx80_precision_s:
1710         break;
1711     default:
1712         g_assert_not_reached();
1713     }
1714 
1715     if (unlikely(floatx80_invalid_encoding(f))) {
1716         float_raise(float_flag_invalid, s);
1717         return false;
1718     }
1719 
1720     floatx80_unpack_raw(p, f);
1721 
1722     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1723         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1724     } else {
1725         /* The explicit integer bit is ignored, after invalid checks. */
1726         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1727         p->cls = (p->frac_hi == 0 ? float_class_inf
1728                   : parts_is_snan_frac(p->frac_hi, s)
1729                   ? float_class_snan : float_class_qnan);
1730     }
1731     return true;
1732 }
1733 
1734 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1735                                               float_status *s)
1736 {
1737     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1738     uint64_t frac;
1739     int exp;
1740 
1741     switch (p->cls) {
1742     case float_class_normal:
1743         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1744             parts_uncanon_normal(p, s, fmt);
1745             frac = p->frac_hi;
1746             exp = p->exp;
1747         } else {
1748             FloatParts64 p64;
1749 
1750             p64.sign = p->sign;
1751             p64.exp = p->exp;
1752             frac_truncjam(&p64, p);
1753             parts_uncanon_normal(&p64, s, fmt);
1754             frac = p64.frac;
1755             exp = p64.exp;
1756         }
1757         if (exp != fmt->exp_max) {
1758             break;
1759         }
1760         /* rounded to inf -- fall through to set frac correctly */
1761 
1762     case float_class_inf:
1763         /* x86 and m68k differ in the setting of the integer bit. */
1764         frac = floatx80_infinity_low;
1765         exp = fmt->exp_max;
1766         break;
1767 
1768     case float_class_zero:
1769         frac = 0;
1770         exp = 0;
1771         break;
1772 
1773     case float_class_snan:
1774     case float_class_qnan:
1775         /* NaNs have the integer bit set. */
1776         frac = p->frac_hi | (1ull << 63);
1777         exp = fmt->exp_max;
1778         break;
1779 
1780     default:
1781         g_assert_not_reached();
1782     }
1783 
1784     return packFloatx80(p->sign, exp, frac);
1785 }
1786 
1787 /*
1788  * Addition and subtraction
1789  */
1790 
1791 static float16 QEMU_FLATTEN
1792 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1793 {
1794     FloatParts64 pa, pb, *pr;
1795 
1796     float16_unpack_canonical(&pa, a, status);
1797     float16_unpack_canonical(&pb, b, status);
1798     pr = parts_addsub(&pa, &pb, status, subtract);
1799 
1800     return float16_round_pack_canonical(pr, status);
1801 }
1802 
1803 float16 float16_add(float16 a, float16 b, float_status *status)
1804 {
1805     return float16_addsub(a, b, status, false);
1806 }
1807 
1808 float16 float16_sub(float16 a, float16 b, float_status *status)
1809 {
1810     return float16_addsub(a, b, status, true);
1811 }
1812 
1813 static float32 QEMU_SOFTFLOAT_ATTR
1814 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1815 {
1816     FloatParts64 pa, pb, *pr;
1817 
1818     float32_unpack_canonical(&pa, a, status);
1819     float32_unpack_canonical(&pb, b, status);
1820     pr = parts_addsub(&pa, &pb, status, subtract);
1821 
1822     return float32_round_pack_canonical(pr, status);
1823 }
1824 
1825 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1826 {
1827     return soft_f32_addsub(a, b, status, false);
1828 }
1829 
1830 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1831 {
1832     return soft_f32_addsub(a, b, status, true);
1833 }
1834 
1835 static float64 QEMU_SOFTFLOAT_ATTR
1836 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1837 {
1838     FloatParts64 pa, pb, *pr;
1839 
1840     float64_unpack_canonical(&pa, a, status);
1841     float64_unpack_canonical(&pb, b, status);
1842     pr = parts_addsub(&pa, &pb, status, subtract);
1843 
1844     return float64_round_pack_canonical(pr, status);
1845 }
1846 
1847 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1848 {
1849     return soft_f64_addsub(a, b, status, false);
1850 }
1851 
1852 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1853 {
1854     return soft_f64_addsub(a, b, status, true);
1855 }
1856 
1857 static float hard_f32_add(float a, float b)
1858 {
1859     return a + b;
1860 }
1861 
1862 static float hard_f32_sub(float a, float b)
1863 {
1864     return a - b;
1865 }
1866 
1867 static double hard_f64_add(double a, double b)
1868 {
1869     return a + b;
1870 }
1871 
1872 static double hard_f64_sub(double a, double b)
1873 {
1874     return a - b;
1875 }
1876 
1877 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1878 {
1879     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1880         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1881     }
1882     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1883 }
1884 
1885 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1886 {
1887     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1888         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1889     } else {
1890         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1891     }
1892 }
1893 
1894 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1895                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1896 {
1897     return float32_gen2(a, b, s, hard, soft,
1898                         f32_is_zon2, f32_addsubmul_post);
1899 }
1900 
1901 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1902                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1903 {
1904     return float64_gen2(a, b, s, hard, soft,
1905                         f64_is_zon2, f64_addsubmul_post);
1906 }
1907 
1908 float32 QEMU_FLATTEN
1909 float32_add(float32 a, float32 b, float_status *s)
1910 {
1911     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1912 }
1913 
1914 float32 QEMU_FLATTEN
1915 float32_sub(float32 a, float32 b, float_status *s)
1916 {
1917     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1918 }
1919 
1920 float64 QEMU_FLATTEN
1921 float64_add(float64 a, float64 b, float_status *s)
1922 {
1923     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1924 }
1925 
1926 float64 QEMU_FLATTEN
1927 float64_sub(float64 a, float64 b, float_status *s)
1928 {
1929     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1930 }
1931 
1932 static bfloat16 QEMU_FLATTEN
1933 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1934 {
1935     FloatParts64 pa, pb, *pr;
1936 
1937     bfloat16_unpack_canonical(&pa, a, status);
1938     bfloat16_unpack_canonical(&pb, b, status);
1939     pr = parts_addsub(&pa, &pb, status, subtract);
1940 
1941     return bfloat16_round_pack_canonical(pr, status);
1942 }
1943 
1944 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1945 {
1946     return bfloat16_addsub(a, b, status, false);
1947 }
1948 
1949 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1950 {
1951     return bfloat16_addsub(a, b, status, true);
1952 }
1953 
1954 static float128 QEMU_FLATTEN
1955 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1956 {
1957     FloatParts128 pa, pb, *pr;
1958 
1959     float128_unpack_canonical(&pa, a, status);
1960     float128_unpack_canonical(&pb, b, status);
1961     pr = parts_addsub(&pa, &pb, status, subtract);
1962 
1963     return float128_round_pack_canonical(pr, status);
1964 }
1965 
1966 float128 float128_add(float128 a, float128 b, float_status *status)
1967 {
1968     return float128_addsub(a, b, status, false);
1969 }
1970 
1971 float128 float128_sub(float128 a, float128 b, float_status *status)
1972 {
1973     return float128_addsub(a, b, status, true);
1974 }
1975 
1976 static floatx80 QEMU_FLATTEN
1977 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1978 {
1979     FloatParts128 pa, pb, *pr;
1980 
1981     if (!floatx80_unpack_canonical(&pa, a, status) ||
1982         !floatx80_unpack_canonical(&pb, b, status)) {
1983         return floatx80_default_nan(status);
1984     }
1985 
1986     pr = parts_addsub(&pa, &pb, status, subtract);
1987     return floatx80_round_pack_canonical(pr, status);
1988 }
1989 
1990 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1991 {
1992     return floatx80_addsub(a, b, status, false);
1993 }
1994 
1995 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1996 {
1997     return floatx80_addsub(a, b, status, true);
1998 }
1999 
2000 /*
2001  * Multiplication
2002  */
2003 
2004 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2005 {
2006     FloatParts64 pa, pb, *pr;
2007 
2008     float16_unpack_canonical(&pa, a, status);
2009     float16_unpack_canonical(&pb, b, status);
2010     pr = parts_mul(&pa, &pb, status);
2011 
2012     return float16_round_pack_canonical(pr, status);
2013 }
2014 
2015 static float32 QEMU_SOFTFLOAT_ATTR
2016 soft_f32_mul(float32 a, float32 b, float_status *status)
2017 {
2018     FloatParts64 pa, pb, *pr;
2019 
2020     float32_unpack_canonical(&pa, a, status);
2021     float32_unpack_canonical(&pb, b, status);
2022     pr = parts_mul(&pa, &pb, status);
2023 
2024     return float32_round_pack_canonical(pr, status);
2025 }
2026 
2027 static float64 QEMU_SOFTFLOAT_ATTR
2028 soft_f64_mul(float64 a, float64 b, float_status *status)
2029 {
2030     FloatParts64 pa, pb, *pr;
2031 
2032     float64_unpack_canonical(&pa, a, status);
2033     float64_unpack_canonical(&pb, b, status);
2034     pr = parts_mul(&pa, &pb, status);
2035 
2036     return float64_round_pack_canonical(pr, status);
2037 }
2038 
2039 static float hard_f32_mul(float a, float b)
2040 {
2041     return a * b;
2042 }
2043 
2044 static double hard_f64_mul(double a, double b)
2045 {
2046     return a * b;
2047 }
2048 
2049 float32 QEMU_FLATTEN
2050 float32_mul(float32 a, float32 b, float_status *s)
2051 {
2052     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2053                         f32_is_zon2, f32_addsubmul_post);
2054 }
2055 
2056 float64 QEMU_FLATTEN
2057 float64_mul(float64 a, float64 b, float_status *s)
2058 {
2059     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2060                         f64_is_zon2, f64_addsubmul_post);
2061 }
2062 
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     FloatParts64 pa, pb, *pr;
2067 
2068     bfloat16_unpack_canonical(&pa, a, status);
2069     bfloat16_unpack_canonical(&pb, b, status);
2070     pr = parts_mul(&pa, &pb, status);
2071 
2072     return bfloat16_round_pack_canonical(pr, status);
2073 }
2074 
2075 float128 QEMU_FLATTEN
2076 float128_mul(float128 a, float128 b, float_status *status)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_mul(&pa, &pb, status);
2083 
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086 
2087 floatx80 QEMU_FLATTEN
2088 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2089 {
2090     FloatParts128 pa, pb, *pr;
2091 
2092     if (!floatx80_unpack_canonical(&pa, a, status) ||
2093         !floatx80_unpack_canonical(&pb, b, status)) {
2094         return floatx80_default_nan(status);
2095     }
2096 
2097     pr = parts_mul(&pa, &pb, status);
2098     return floatx80_round_pack_canonical(pr, status);
2099 }
2100 
2101 /*
2102  * Fused multiply-add
2103  */
2104 
2105 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2106                                     int flags, float_status *status)
2107 {
2108     FloatParts64 pa, pb, pc, *pr;
2109 
2110     float16_unpack_canonical(&pa, a, status);
2111     float16_unpack_canonical(&pb, b, status);
2112     float16_unpack_canonical(&pc, c, status);
2113     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2114 
2115     return float16_round_pack_canonical(pr, status);
2116 }
2117 
2118 static float32 QEMU_SOFTFLOAT_ATTR
2119 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2120                 float_status *status)
2121 {
2122     FloatParts64 pa, pb, pc, *pr;
2123 
2124     float32_unpack_canonical(&pa, a, status);
2125     float32_unpack_canonical(&pb, b, status);
2126     float32_unpack_canonical(&pc, c, status);
2127     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2128 
2129     return float32_round_pack_canonical(pr, status);
2130 }
2131 
2132 static float64 QEMU_SOFTFLOAT_ATTR
2133 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2134                 float_status *status)
2135 {
2136     FloatParts64 pa, pb, pc, *pr;
2137 
2138     float64_unpack_canonical(&pa, a, status);
2139     float64_unpack_canonical(&pb, b, status);
2140     float64_unpack_canonical(&pc, c, status);
2141     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2142 
2143     return float64_round_pack_canonical(pr, status);
2144 }
2145 
2146 static bool force_soft_fma;
2147 
2148 float32 QEMU_FLATTEN
2149 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2150 {
2151     union_float32 ua, ub, uc, ur;
2152 
2153     ua.s = xa;
2154     ub.s = xb;
2155     uc.s = xc;
2156 
2157     if (unlikely(!can_use_fpu(s))) {
2158         goto soft;
2159     }
2160     if (unlikely(flags & float_muladd_halve_result)) {
2161         goto soft;
2162     }
2163 
2164     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2165     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2166         goto soft;
2167     }
2168 
2169     if (unlikely(force_soft_fma)) {
2170         goto soft;
2171     }
2172 
2173     /*
2174      * When (a || b) == 0, there's no need to check for under/over flow,
2175      * since we know the addend is (normal || 0) and the product is 0.
2176      */
2177     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2178         union_float32 up;
2179         bool prod_sign;
2180 
2181         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2182         prod_sign ^= !!(flags & float_muladd_negate_product);
2183         up.s = float32_set_sign(float32_zero, prod_sign);
2184 
2185         if (flags & float_muladd_negate_c) {
2186             uc.h = -uc.h;
2187         }
2188         ur.h = up.h + uc.h;
2189     } else {
2190         union_float32 ua_orig = ua;
2191         union_float32 uc_orig = uc;
2192 
2193         if (flags & float_muladd_negate_product) {
2194             ua.h = -ua.h;
2195         }
2196         if (flags & float_muladd_negate_c) {
2197             uc.h = -uc.h;
2198         }
2199 
2200         ur.h = fmaf(ua.h, ub.h, uc.h);
2201 
2202         if (unlikely(f32_is_inf(ur))) {
2203             float_raise(float_flag_overflow, s);
2204         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2205             ua = ua_orig;
2206             uc = uc_orig;
2207             goto soft;
2208         }
2209     }
2210     if (flags & float_muladd_negate_result) {
2211         return float32_chs(ur.s);
2212     }
2213     return ur.s;
2214 
2215  soft:
2216     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2217 }
2218 
2219 float64 QEMU_FLATTEN
2220 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2221 {
2222     union_float64 ua, ub, uc, ur;
2223 
2224     ua.s = xa;
2225     ub.s = xb;
2226     uc.s = xc;
2227 
2228     if (unlikely(!can_use_fpu(s))) {
2229         goto soft;
2230     }
2231     if (unlikely(flags & float_muladd_halve_result)) {
2232         goto soft;
2233     }
2234 
2235     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2236     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2237         goto soft;
2238     }
2239 
2240     if (unlikely(force_soft_fma)) {
2241         goto soft;
2242     }
2243 
2244     /*
2245      * When (a || b) == 0, there's no need to check for under/over flow,
2246      * since we know the addend is (normal || 0) and the product is 0.
2247      */
2248     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2249         union_float64 up;
2250         bool prod_sign;
2251 
2252         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2253         prod_sign ^= !!(flags & float_muladd_negate_product);
2254         up.s = float64_set_sign(float64_zero, prod_sign);
2255 
2256         if (flags & float_muladd_negate_c) {
2257             uc.h = -uc.h;
2258         }
2259         ur.h = up.h + uc.h;
2260     } else {
2261         union_float64 ua_orig = ua;
2262         union_float64 uc_orig = uc;
2263 
2264         if (flags & float_muladd_negate_product) {
2265             ua.h = -ua.h;
2266         }
2267         if (flags & float_muladd_negate_c) {
2268             uc.h = -uc.h;
2269         }
2270 
2271         ur.h = fma(ua.h, ub.h, uc.h);
2272 
2273         if (unlikely(f64_is_inf(ur))) {
2274             float_raise(float_flag_overflow, s);
2275         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2276             ua = ua_orig;
2277             uc = uc_orig;
2278             goto soft;
2279         }
2280     }
2281     if (flags & float_muladd_negate_result) {
2282         return float64_chs(ur.s);
2283     }
2284     return ur.s;
2285 
2286  soft:
2287     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2288 }
2289 
2290 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2291                                       int flags, float_status *status)
2292 {
2293     FloatParts64 pa, pb, pc, *pr;
2294 
2295     bfloat16_unpack_canonical(&pa, a, status);
2296     bfloat16_unpack_canonical(&pb, b, status);
2297     bfloat16_unpack_canonical(&pc, c, status);
2298     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2299 
2300     return bfloat16_round_pack_canonical(pr, status);
2301 }
2302 
2303 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2304                                       int flags, float_status *status)
2305 {
2306     FloatParts128 pa, pb, pc, *pr;
2307 
2308     float128_unpack_canonical(&pa, a, status);
2309     float128_unpack_canonical(&pb, b, status);
2310     float128_unpack_canonical(&pc, c, status);
2311     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2312 
2313     return float128_round_pack_canonical(pr, status);
2314 }
2315 
2316 /*
2317  * Division
2318  */
2319 
2320 float16 float16_div(float16 a, float16 b, float_status *status)
2321 {
2322     FloatParts64 pa, pb, *pr;
2323 
2324     float16_unpack_canonical(&pa, a, status);
2325     float16_unpack_canonical(&pb, b, status);
2326     pr = parts_div(&pa, &pb, status);
2327 
2328     return float16_round_pack_canonical(pr, status);
2329 }
2330 
2331 static float32 QEMU_SOFTFLOAT_ATTR
2332 soft_f32_div(float32 a, float32 b, float_status *status)
2333 {
2334     FloatParts64 pa, pb, *pr;
2335 
2336     float32_unpack_canonical(&pa, a, status);
2337     float32_unpack_canonical(&pb, b, status);
2338     pr = parts_div(&pa, &pb, status);
2339 
2340     return float32_round_pack_canonical(pr, status);
2341 }
2342 
2343 static float64 QEMU_SOFTFLOAT_ATTR
2344 soft_f64_div(float64 a, float64 b, float_status *status)
2345 {
2346     FloatParts64 pa, pb, *pr;
2347 
2348     float64_unpack_canonical(&pa, a, status);
2349     float64_unpack_canonical(&pb, b, status);
2350     pr = parts_div(&pa, &pb, status);
2351 
2352     return float64_round_pack_canonical(pr, status);
2353 }
2354 
2355 static float hard_f32_div(float a, float b)
2356 {
2357     return a / b;
2358 }
2359 
2360 static double hard_f64_div(double a, double b)
2361 {
2362     return a / b;
2363 }
2364 
2365 static bool f32_div_pre(union_float32 a, union_float32 b)
2366 {
2367     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2368         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2369                fpclassify(b.h) == FP_NORMAL;
2370     }
2371     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2372 }
2373 
2374 static bool f64_div_pre(union_float64 a, union_float64 b)
2375 {
2376     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2377         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378                fpclassify(b.h) == FP_NORMAL;
2379     }
2380     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2381 }
2382 
2383 static bool f32_div_post(union_float32 a, union_float32 b)
2384 {
2385     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2386         return fpclassify(a.h) != FP_ZERO;
2387     }
2388     return !float32_is_zero(a.s);
2389 }
2390 
2391 static bool f64_div_post(union_float64 a, union_float64 b)
2392 {
2393     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2394         return fpclassify(a.h) != FP_ZERO;
2395     }
2396     return !float64_is_zero(a.s);
2397 }
2398 
2399 float32 QEMU_FLATTEN
2400 float32_div(float32 a, float32 b, float_status *s)
2401 {
2402     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2403                         f32_div_pre, f32_div_post);
2404 }
2405 
2406 float64 QEMU_FLATTEN
2407 float64_div(float64 a, float64 b, float_status *s)
2408 {
2409     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2410                         f64_div_pre, f64_div_post);
2411 }
2412 
2413 bfloat16 QEMU_FLATTEN
2414 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2415 {
2416     FloatParts64 pa, pb, *pr;
2417 
2418     bfloat16_unpack_canonical(&pa, a, status);
2419     bfloat16_unpack_canonical(&pb, b, status);
2420     pr = parts_div(&pa, &pb, status);
2421 
2422     return bfloat16_round_pack_canonical(pr, status);
2423 }
2424 
2425 float128 QEMU_FLATTEN
2426 float128_div(float128 a, float128 b, float_status *status)
2427 {
2428     FloatParts128 pa, pb, *pr;
2429 
2430     float128_unpack_canonical(&pa, a, status);
2431     float128_unpack_canonical(&pb, b, status);
2432     pr = parts_div(&pa, &pb, status);
2433 
2434     return float128_round_pack_canonical(pr, status);
2435 }
2436 
2437 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2438 {
2439     FloatParts128 pa, pb, *pr;
2440 
2441     if (!floatx80_unpack_canonical(&pa, a, status) ||
2442         !floatx80_unpack_canonical(&pb, b, status)) {
2443         return floatx80_default_nan(status);
2444     }
2445 
2446     pr = parts_div(&pa, &pb, status);
2447     return floatx80_round_pack_canonical(pr, status);
2448 }
2449 
2450 /*
2451  * Remainder
2452  */
2453 
2454 float32 float32_rem(float32 a, float32 b, float_status *status)
2455 {
2456     FloatParts64 pa, pb, *pr;
2457 
2458     float32_unpack_canonical(&pa, a, status);
2459     float32_unpack_canonical(&pb, b, status);
2460     pr = parts_modrem(&pa, &pb, NULL, status);
2461 
2462     return float32_round_pack_canonical(pr, status);
2463 }
2464 
2465 float64 float64_rem(float64 a, float64 b, float_status *status)
2466 {
2467     FloatParts64 pa, pb, *pr;
2468 
2469     float64_unpack_canonical(&pa, a, status);
2470     float64_unpack_canonical(&pb, b, status);
2471     pr = parts_modrem(&pa, &pb, NULL, status);
2472 
2473     return float64_round_pack_canonical(pr, status);
2474 }
2475 
2476 float128 float128_rem(float128 a, float128 b, float_status *status)
2477 {
2478     FloatParts128 pa, pb, *pr;
2479 
2480     float128_unpack_canonical(&pa, a, status);
2481     float128_unpack_canonical(&pb, b, status);
2482     pr = parts_modrem(&pa, &pb, NULL, status);
2483 
2484     return float128_round_pack_canonical(pr, status);
2485 }
2486 
2487 /*
2488  * Returns the remainder of the extended double-precision floating-point value
2489  * `a' with respect to the corresponding value `b'.
2490  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2491  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2492  * the remainder based on truncating the quotient toward zero instead and
2493  * *quotient is set to the low 64 bits of the absolute value of the integer
2494  * quotient.
2495  */
2496 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2497                          uint64_t *quotient, float_status *status)
2498 {
2499     FloatParts128 pa, pb, *pr;
2500 
2501     *quotient = 0;
2502     if (!floatx80_unpack_canonical(&pa, a, status) ||
2503         !floatx80_unpack_canonical(&pb, b, status)) {
2504         return floatx80_default_nan(status);
2505     }
2506     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2507 
2508     return floatx80_round_pack_canonical(pr, status);
2509 }
2510 
2511 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2512 {
2513     uint64_t quotient;
2514     return floatx80_modrem(a, b, false, &quotient, status);
2515 }
2516 
2517 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2518 {
2519     uint64_t quotient;
2520     return floatx80_modrem(a, b, true, &quotient, status);
2521 }
2522 
2523 /*
2524  * Float to Float conversions
2525  *
2526  * Returns the result of converting one float format to another. The
2527  * conversion is performed according to the IEC/IEEE Standard for
2528  * Binary Floating-Point Arithmetic.
2529  *
2530  * Usually this only needs to take care of raising invalid exceptions
2531  * and handling the conversion on NaNs.
2532  */
2533 
2534 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2535 {
2536     switch (a->cls) {
2537     case float_class_qnan:
2538     case float_class_snan:
2539         /*
2540          * There is no NaN in the destination format.  Raise Invalid
2541          * and return a zero with the sign of the input NaN.
2542          */
2543         float_raise(float_flag_invalid, s);
2544         a->cls = float_class_zero;
2545         break;
2546 
2547     case float_class_inf:
2548         /*
2549          * There is no Inf in the destination format.  Raise Invalid
2550          * and return the maximum normal with the correct sign.
2551          */
2552         float_raise(float_flag_invalid, s);
2553         a->cls = float_class_normal;
2554         a->exp = float16_params_ahp.exp_max;
2555         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2556                                   float16_params_ahp.frac_size + 1);
2557         break;
2558 
2559     case float_class_normal:
2560     case float_class_zero:
2561         break;
2562 
2563     default:
2564         g_assert_not_reached();
2565     }
2566 }
2567 
2568 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2569 {
2570     if (is_nan(a->cls)) {
2571         parts_return_nan(a, s);
2572     }
2573 }
2574 
2575 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2576 {
2577     if (is_nan(a->cls)) {
2578         parts_return_nan(a, s);
2579     }
2580 }
2581 
2582 #define parts_float_to_float(P, S) \
2583     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2584 
2585 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2586                                         float_status *s)
2587 {
2588     a->cls = b->cls;
2589     a->sign = b->sign;
2590     a->exp = b->exp;
2591 
2592     if (a->cls == float_class_normal) {
2593         frac_truncjam(a, b);
2594     } else if (is_nan(a->cls)) {
2595         /* Discard the low bits of the NaN. */
2596         a->frac = b->frac_hi;
2597         parts_return_nan(a, s);
2598     }
2599 }
2600 
2601 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2602                                        float_status *s)
2603 {
2604     a->cls = b->cls;
2605     a->sign = b->sign;
2606     a->exp = b->exp;
2607     frac_widen(a, b);
2608 
2609     if (is_nan(a->cls)) {
2610         parts_return_nan(a, s);
2611     }
2612 }
2613 
2614 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2615 {
2616     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2617     FloatParts64 p;
2618 
2619     float16a_unpack_canonical(&p, a, s, fmt16);
2620     parts_float_to_float(&p, s);
2621     return float32_round_pack_canonical(&p, s);
2622 }
2623 
2624 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2625 {
2626     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2627     FloatParts64 p;
2628 
2629     float16a_unpack_canonical(&p, a, s, fmt16);
2630     parts_float_to_float(&p, s);
2631     return float64_round_pack_canonical(&p, s);
2632 }
2633 
2634 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2635 {
2636     FloatParts64 p;
2637     const FloatFmt *fmt;
2638 
2639     float32_unpack_canonical(&p, a, s);
2640     if (ieee) {
2641         parts_float_to_float(&p, s);
2642         fmt = &float16_params;
2643     } else {
2644         parts_float_to_ahp(&p, s);
2645         fmt = &float16_params_ahp;
2646     }
2647     return float16a_round_pack_canonical(&p, s, fmt);
2648 }
2649 
2650 static float64 QEMU_SOFTFLOAT_ATTR
2651 soft_float32_to_float64(float32 a, float_status *s)
2652 {
2653     FloatParts64 p;
2654 
2655     float32_unpack_canonical(&p, a, s);
2656     parts_float_to_float(&p, s);
2657     return float64_round_pack_canonical(&p, s);
2658 }
2659 
2660 float64 float32_to_float64(float32 a, float_status *s)
2661 {
2662     if (likely(float32_is_normal(a))) {
2663         /* Widening conversion can never produce inexact results.  */
2664         union_float32 uf;
2665         union_float64 ud;
2666         uf.s = a;
2667         ud.h = uf.h;
2668         return ud.s;
2669     } else if (float32_is_zero(a)) {
2670         return float64_set_sign(float64_zero, float32_is_neg(a));
2671     } else {
2672         return soft_float32_to_float64(a, s);
2673     }
2674 }
2675 
2676 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2677 {
2678     FloatParts64 p;
2679     const FloatFmt *fmt;
2680 
2681     float64_unpack_canonical(&p, a, s);
2682     if (ieee) {
2683         parts_float_to_float(&p, s);
2684         fmt = &float16_params;
2685     } else {
2686         parts_float_to_ahp(&p, s);
2687         fmt = &float16_params_ahp;
2688     }
2689     return float16a_round_pack_canonical(&p, s, fmt);
2690 }
2691 
2692 float32 float64_to_float32(float64 a, float_status *s)
2693 {
2694     FloatParts64 p;
2695 
2696     float64_unpack_canonical(&p, a, s);
2697     parts_float_to_float(&p, s);
2698     return float32_round_pack_canonical(&p, s);
2699 }
2700 
2701 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2702 {
2703     FloatParts64 p;
2704 
2705     bfloat16_unpack_canonical(&p, a, s);
2706     parts_float_to_float(&p, s);
2707     return float32_round_pack_canonical(&p, s);
2708 }
2709 
2710 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2711 {
2712     FloatParts64 p;
2713 
2714     bfloat16_unpack_canonical(&p, a, s);
2715     parts_float_to_float(&p, s);
2716     return float64_round_pack_canonical(&p, s);
2717 }
2718 
2719 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2720 {
2721     FloatParts64 p;
2722 
2723     float32_unpack_canonical(&p, a, s);
2724     parts_float_to_float(&p, s);
2725     return bfloat16_round_pack_canonical(&p, s);
2726 }
2727 
2728 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2729 {
2730     FloatParts64 p;
2731 
2732     float64_unpack_canonical(&p, a, s);
2733     parts_float_to_float(&p, s);
2734     return bfloat16_round_pack_canonical(&p, s);
2735 }
2736 
2737 float32 float128_to_float32(float128 a, float_status *s)
2738 {
2739     FloatParts64 p64;
2740     FloatParts128 p128;
2741 
2742     float128_unpack_canonical(&p128, a, s);
2743     parts_float_to_float_narrow(&p64, &p128, s);
2744     return float32_round_pack_canonical(&p64, s);
2745 }
2746 
2747 float64 float128_to_float64(float128 a, float_status *s)
2748 {
2749     FloatParts64 p64;
2750     FloatParts128 p128;
2751 
2752     float128_unpack_canonical(&p128, a, s);
2753     parts_float_to_float_narrow(&p64, &p128, s);
2754     return float64_round_pack_canonical(&p64, s);
2755 }
2756 
2757 float128 float32_to_float128(float32 a, float_status *s)
2758 {
2759     FloatParts64 p64;
2760     FloatParts128 p128;
2761 
2762     float32_unpack_canonical(&p64, a, s);
2763     parts_float_to_float_widen(&p128, &p64, s);
2764     return float128_round_pack_canonical(&p128, s);
2765 }
2766 
2767 float128 float64_to_float128(float64 a, float_status *s)
2768 {
2769     FloatParts64 p64;
2770     FloatParts128 p128;
2771 
2772     float64_unpack_canonical(&p64, a, s);
2773     parts_float_to_float_widen(&p128, &p64, s);
2774     return float128_round_pack_canonical(&p128, s);
2775 }
2776 
2777 float32 floatx80_to_float32(floatx80 a, float_status *s)
2778 {
2779     FloatParts64 p64;
2780     FloatParts128 p128;
2781 
2782     if (floatx80_unpack_canonical(&p128, a, s)) {
2783         parts_float_to_float_narrow(&p64, &p128, s);
2784     } else {
2785         parts_default_nan(&p64, s);
2786     }
2787     return float32_round_pack_canonical(&p64, s);
2788 }
2789 
2790 float64 floatx80_to_float64(floatx80 a, float_status *s)
2791 {
2792     FloatParts64 p64;
2793     FloatParts128 p128;
2794 
2795     if (floatx80_unpack_canonical(&p128, a, s)) {
2796         parts_float_to_float_narrow(&p64, &p128, s);
2797     } else {
2798         parts_default_nan(&p64, s);
2799     }
2800     return float64_round_pack_canonical(&p64, s);
2801 }
2802 
2803 float128 floatx80_to_float128(floatx80 a, float_status *s)
2804 {
2805     FloatParts128 p;
2806 
2807     if (floatx80_unpack_canonical(&p, a, s)) {
2808         parts_float_to_float(&p, s);
2809     } else {
2810         parts_default_nan(&p, s);
2811     }
2812     return float128_round_pack_canonical(&p, s);
2813 }
2814 
2815 floatx80 float32_to_floatx80(float32 a, float_status *s)
2816 {
2817     FloatParts64 p64;
2818     FloatParts128 p128;
2819 
2820     float32_unpack_canonical(&p64, a, s);
2821     parts_float_to_float_widen(&p128, &p64, s);
2822     return floatx80_round_pack_canonical(&p128, s);
2823 }
2824 
2825 floatx80 float64_to_floatx80(float64 a, float_status *s)
2826 {
2827     FloatParts64 p64;
2828     FloatParts128 p128;
2829 
2830     float64_unpack_canonical(&p64, a, s);
2831     parts_float_to_float_widen(&p128, &p64, s);
2832     return floatx80_round_pack_canonical(&p128, s);
2833 }
2834 
2835 floatx80 float128_to_floatx80(float128 a, float_status *s)
2836 {
2837     FloatParts128 p;
2838 
2839     float128_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return floatx80_round_pack_canonical(&p, s);
2842 }
2843 
2844 /*
2845  * Round to integral value
2846  */
2847 
2848 float16 float16_round_to_int(float16 a, float_status *s)
2849 {
2850     FloatParts64 p;
2851 
2852     float16_unpack_canonical(&p, a, s);
2853     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2854     return float16_round_pack_canonical(&p, s);
2855 }
2856 
2857 float32 float32_round_to_int(float32 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860 
2861     float32_unpack_canonical(&p, a, s);
2862     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2863     return float32_round_pack_canonical(&p, s);
2864 }
2865 
2866 float64 float64_round_to_int(float64 a, float_status *s)
2867 {
2868     FloatParts64 p;
2869 
2870     float64_unpack_canonical(&p, a, s);
2871     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2872     return float64_round_pack_canonical(&p, s);
2873 }
2874 
2875 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2876 {
2877     FloatParts64 p;
2878 
2879     bfloat16_unpack_canonical(&p, a, s);
2880     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2881     return bfloat16_round_pack_canonical(&p, s);
2882 }
2883 
2884 float128 float128_round_to_int(float128 a, float_status *s)
2885 {
2886     FloatParts128 p;
2887 
2888     float128_unpack_canonical(&p, a, s);
2889     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2890     return float128_round_pack_canonical(&p, s);
2891 }
2892 
2893 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2894 {
2895     FloatParts128 p;
2896 
2897     if (!floatx80_unpack_canonical(&p, a, status)) {
2898         return floatx80_default_nan(status);
2899     }
2900 
2901     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2902                        &floatx80_params[status->floatx80_rounding_precision]);
2903     return floatx80_round_pack_canonical(&p, status);
2904 }
2905 
2906 /*
2907  * Floating-point to signed integer conversions
2908  */
2909 
2910 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911                               float_status *s)
2912 {
2913     FloatParts64 p;
2914 
2915     float16_unpack_canonical(&p, a, s);
2916     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2917 }
2918 
2919 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                                 float_status *s)
2921 {
2922     FloatParts64 p;
2923 
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2926 }
2927 
2928 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                 float_status *s)
2930 {
2931     FloatParts64 p;
2932 
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2935 }
2936 
2937 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938                                 float_status *s)
2939 {
2940     FloatParts64 p;
2941 
2942     float16_unpack_canonical(&p, a, s);
2943     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2944 }
2945 
2946 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947                                 float_status *s)
2948 {
2949     FloatParts64 p;
2950 
2951     float32_unpack_canonical(&p, a, s);
2952     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2953 }
2954 
2955 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                 float_status *s)
2957 {
2958     FloatParts64 p;
2959 
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2962 }
2963 
2964 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965                                 float_status *s)
2966 {
2967     FloatParts64 p;
2968 
2969     float32_unpack_canonical(&p, a, s);
2970     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2971 }
2972 
2973 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974                                 float_status *s)
2975 {
2976     FloatParts64 p;
2977 
2978     float64_unpack_canonical(&p, a, s);
2979     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2980 }
2981 
2982 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                 float_status *s)
2984 {
2985     FloatParts64 p;
2986 
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2989 }
2990 
2991 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992                                 float_status *s)
2993 {
2994     FloatParts64 p;
2995 
2996     float64_unpack_canonical(&p, a, s);
2997     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2998 }
2999 
3000 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3001                                  float_status *s)
3002 {
3003     FloatParts64 p;
3004 
3005     bfloat16_unpack_canonical(&p, a, s);
3006     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3007 }
3008 
3009 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010                                  float_status *s)
3011 {
3012     FloatParts64 p;
3013 
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3016 }
3017 
3018 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019                                  float_status *s)
3020 {
3021     FloatParts64 p;
3022 
3023     bfloat16_unpack_canonical(&p, a, s);
3024     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3025 }
3026 
3027 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3028                                         int scale, float_status *s)
3029 {
3030     FloatParts128 p;
3031 
3032     float128_unpack_canonical(&p, a, s);
3033     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3034 }
3035 
3036 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3037                                         int scale, float_status *s)
3038 {
3039     FloatParts128 p;
3040 
3041     float128_unpack_canonical(&p, a, s);
3042     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3043 }
3044 
3045 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3046                                         int scale, float_status *s)
3047 {
3048     FloatParts128 p;
3049 
3050     if (!floatx80_unpack_canonical(&p, a, s)) {
3051         parts_default_nan(&p, s);
3052     }
3053     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3054 }
3055 
3056 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3057                                         int scale, float_status *s)
3058 {
3059     FloatParts128 p;
3060 
3061     if (!floatx80_unpack_canonical(&p, a, s)) {
3062         parts_default_nan(&p, s);
3063     }
3064     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3065 }
3066 
3067 int8_t float16_to_int8(float16 a, float_status *s)
3068 {
3069     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3070 }
3071 
3072 int16_t float16_to_int16(float16 a, float_status *s)
3073 {
3074     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3075 }
3076 
3077 int32_t float16_to_int32(float16 a, float_status *s)
3078 {
3079     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3080 }
3081 
3082 int64_t float16_to_int64(float16 a, float_status *s)
3083 {
3084     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3085 }
3086 
3087 int16_t float32_to_int16(float32 a, float_status *s)
3088 {
3089     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3090 }
3091 
3092 int32_t float32_to_int32(float32 a, float_status *s)
3093 {
3094     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3095 }
3096 
3097 int64_t float32_to_int64(float32 a, float_status *s)
3098 {
3099     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3100 }
3101 
3102 int16_t float64_to_int16(float64 a, float_status *s)
3103 {
3104     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3105 }
3106 
3107 int32_t float64_to_int32(float64 a, float_status *s)
3108 {
3109     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3110 }
3111 
3112 int64_t float64_to_int64(float64 a, float_status *s)
3113 {
3114     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3115 }
3116 
3117 int32_t float128_to_int32(float128 a, float_status *s)
3118 {
3119     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3120 }
3121 
3122 int64_t float128_to_int64(float128 a, float_status *s)
3123 {
3124     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3125 }
3126 
3127 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3128 {
3129     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3130 }
3131 
3132 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3133 {
3134     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3135 }
3136 
3137 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3138 {
3139     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3140 }
3141 
3142 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3143 {
3144     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3145 }
3146 
3147 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3148 {
3149     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3150 }
3151 
3152 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3153 {
3154     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3155 }
3156 
3157 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3158 {
3159     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3160 }
3161 
3162 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3163 {
3164     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3165 }
3166 
3167 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3168 {
3169     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3170 }
3171 
3172 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3173 {
3174     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3175 }
3176 
3177 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3178 {
3179     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3180 }
3181 
3182 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3183 {
3184     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3185 }
3186 
3187 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3188 {
3189     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3190 }
3191 
3192 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3193 {
3194     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3195 }
3196 
3197 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3198 {
3199     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3200 }
3201 
3202 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3203 {
3204     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3205 }
3206 
3207 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3208 {
3209     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3210 }
3211 
3212 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3213 {
3214     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3215 }
3216 
3217 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3218 {
3219     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3220 }
3221 
3222 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3223 {
3224     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3225 }
3226 
3227 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3228 {
3229     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3230 }
3231 
3232 /*
3233  * Floating-point to unsigned integer conversions
3234  */
3235 
3236 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3237                                 float_status *s)
3238 {
3239     FloatParts64 p;
3240 
3241     float16_unpack_canonical(&p, a, s);
3242     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3243 }
3244 
3245 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246                                   float_status *s)
3247 {
3248     FloatParts64 p;
3249 
3250     float16_unpack_canonical(&p, a, s);
3251     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3252 }
3253 
3254 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255                                   float_status *s)
3256 {
3257     FloatParts64 p;
3258 
3259     float16_unpack_canonical(&p, a, s);
3260     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3261 }
3262 
3263 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264                                   float_status *s)
3265 {
3266     FloatParts64 p;
3267 
3268     float16_unpack_canonical(&p, a, s);
3269     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3270 }
3271 
3272 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3273                                   float_status *s)
3274 {
3275     FloatParts64 p;
3276 
3277     float32_unpack_canonical(&p, a, s);
3278     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3279 }
3280 
3281 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282                                   float_status *s)
3283 {
3284     FloatParts64 p;
3285 
3286     float32_unpack_canonical(&p, a, s);
3287     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3288 }
3289 
3290 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291                                   float_status *s)
3292 {
3293     FloatParts64 p;
3294 
3295     float32_unpack_canonical(&p, a, s);
3296     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3297 }
3298 
3299 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3300                                   float_status *s)
3301 {
3302     FloatParts64 p;
3303 
3304     float64_unpack_canonical(&p, a, s);
3305     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3306 }
3307 
3308 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309                                   float_status *s)
3310 {
3311     FloatParts64 p;
3312 
3313     float64_unpack_canonical(&p, a, s);
3314     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3315 }
3316 
3317 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318                                   float_status *s)
3319 {
3320     FloatParts64 p;
3321 
3322     float64_unpack_canonical(&p, a, s);
3323     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3324 }
3325 
3326 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3327                                    int scale, float_status *s)
3328 {
3329     FloatParts64 p;
3330 
3331     bfloat16_unpack_canonical(&p, a, s);
3332     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3333 }
3334 
3335 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3336                                    int scale, float_status *s)
3337 {
3338     FloatParts64 p;
3339 
3340     bfloat16_unpack_canonical(&p, a, s);
3341     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3342 }
3343 
3344 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3345                                    int scale, float_status *s)
3346 {
3347     FloatParts64 p;
3348 
3349     bfloat16_unpack_canonical(&p, a, s);
3350     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3351 }
3352 
3353 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3354                                           int scale, float_status *s)
3355 {
3356     FloatParts128 p;
3357 
3358     float128_unpack_canonical(&p, a, s);
3359     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3360 }
3361 
3362 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3363                                           int scale, float_status *s)
3364 {
3365     FloatParts128 p;
3366 
3367     float128_unpack_canonical(&p, a, s);
3368     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3369 }
3370 
3371 uint8_t float16_to_uint8(float16 a, float_status *s)
3372 {
3373     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3374 }
3375 
3376 uint16_t float16_to_uint16(float16 a, float_status *s)
3377 {
3378     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3379 }
3380 
3381 uint32_t float16_to_uint32(float16 a, float_status *s)
3382 {
3383     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3384 }
3385 
3386 uint64_t float16_to_uint64(float16 a, float_status *s)
3387 {
3388     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3389 }
3390 
3391 uint16_t float32_to_uint16(float32 a, float_status *s)
3392 {
3393     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3394 }
3395 
3396 uint32_t float32_to_uint32(float32 a, float_status *s)
3397 {
3398     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3399 }
3400 
3401 uint64_t float32_to_uint64(float32 a, float_status *s)
3402 {
3403     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3404 }
3405 
3406 uint16_t float64_to_uint16(float64 a, float_status *s)
3407 {
3408     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3409 }
3410 
3411 uint32_t float64_to_uint32(float64 a, float_status *s)
3412 {
3413     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415 
3416 uint64_t float64_to_uint64(float64 a, float_status *s)
3417 {
3418     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420 
3421 uint32_t float128_to_uint32(float128 a, float_status *s)
3422 {
3423     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425 
3426 uint64_t float128_to_uint64(float128 a, float_status *s)
3427 {
3428     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430 
3431 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3432 {
3433     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435 
3436 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3437 {
3438     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440 
3441 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3442 {
3443     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445 
3446 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3447 {
3448     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450 
3451 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3452 {
3453     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3454 }
3455 
3456 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3457 {
3458     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3459 }
3460 
3461 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3462 {
3463     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3464 }
3465 
3466 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3467 {
3468     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3469 }
3470 
3471 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3472 {
3473     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3474 }
3475 
3476 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3477 {
3478     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3479 }
3480 
3481 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3482 {
3483     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3484 }
3485 
3486 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3487 {
3488     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3489 }
3490 
3491 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3492 {
3493     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3494 }
3495 
3496 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3497 {
3498     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3499 }
3500 
3501 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3502 {
3503     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3504 }
3505 
3506 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3507 {
3508     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3509 }
3510 
3511 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3512 {
3513     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3514 }
3515 
3516 /*
3517  * Signed integer to floating-point conversions
3518  */
3519 
3520 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3521 {
3522     FloatParts64 p;
3523 
3524     parts_sint_to_float(&p, a, scale, status);
3525     return float16_round_pack_canonical(&p, status);
3526 }
3527 
3528 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3529 {
3530     return int64_to_float16_scalbn(a, scale, status);
3531 }
3532 
3533 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3534 {
3535     return int64_to_float16_scalbn(a, scale, status);
3536 }
3537 
3538 float16 int64_to_float16(int64_t a, float_status *status)
3539 {
3540     return int64_to_float16_scalbn(a, 0, status);
3541 }
3542 
3543 float16 int32_to_float16(int32_t a, float_status *status)
3544 {
3545     return int64_to_float16_scalbn(a, 0, status);
3546 }
3547 
3548 float16 int16_to_float16(int16_t a, float_status *status)
3549 {
3550     return int64_to_float16_scalbn(a, 0, status);
3551 }
3552 
3553 float16 int8_to_float16(int8_t a, float_status *status)
3554 {
3555     return int64_to_float16_scalbn(a, 0, status);
3556 }
3557 
3558 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3559 {
3560     FloatParts64 p;
3561 
3562     /* Without scaling, there are no overflow concerns. */
3563     if (likely(scale == 0) && can_use_fpu(status)) {
3564         union_float32 ur;
3565         ur.h = a;
3566         return ur.s;
3567     }
3568 
3569     parts64_sint_to_float(&p, a, scale, status);
3570     return float32_round_pack_canonical(&p, status);
3571 }
3572 
3573 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3574 {
3575     return int64_to_float32_scalbn(a, scale, status);
3576 }
3577 
3578 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3579 {
3580     return int64_to_float32_scalbn(a, scale, status);
3581 }
3582 
3583 float32 int64_to_float32(int64_t a, float_status *status)
3584 {
3585     return int64_to_float32_scalbn(a, 0, status);
3586 }
3587 
3588 float32 int32_to_float32(int32_t a, float_status *status)
3589 {
3590     return int64_to_float32_scalbn(a, 0, status);
3591 }
3592 
3593 float32 int16_to_float32(int16_t a, float_status *status)
3594 {
3595     return int64_to_float32_scalbn(a, 0, status);
3596 }
3597 
3598 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3599 {
3600     FloatParts64 p;
3601 
3602     /* Without scaling, there are no overflow concerns. */
3603     if (likely(scale == 0) && can_use_fpu(status)) {
3604         union_float64 ur;
3605         ur.h = a;
3606         return ur.s;
3607     }
3608 
3609     parts_sint_to_float(&p, a, scale, status);
3610     return float64_round_pack_canonical(&p, status);
3611 }
3612 
3613 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3614 {
3615     return int64_to_float64_scalbn(a, scale, status);
3616 }
3617 
3618 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3619 {
3620     return int64_to_float64_scalbn(a, scale, status);
3621 }
3622 
3623 float64 int64_to_float64(int64_t a, float_status *status)
3624 {
3625     return int64_to_float64_scalbn(a, 0, status);
3626 }
3627 
3628 float64 int32_to_float64(int32_t a, float_status *status)
3629 {
3630     return int64_to_float64_scalbn(a, 0, status);
3631 }
3632 
3633 float64 int16_to_float64(int16_t a, float_status *status)
3634 {
3635     return int64_to_float64_scalbn(a, 0, status);
3636 }
3637 
3638 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3639 {
3640     FloatParts64 p;
3641 
3642     parts_sint_to_float(&p, a, scale, status);
3643     return bfloat16_round_pack_canonical(&p, status);
3644 }
3645 
3646 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3647 {
3648     return int64_to_bfloat16_scalbn(a, scale, status);
3649 }
3650 
3651 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3652 {
3653     return int64_to_bfloat16_scalbn(a, scale, status);
3654 }
3655 
3656 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3657 {
3658     return int64_to_bfloat16_scalbn(a, 0, status);
3659 }
3660 
3661 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3662 {
3663     return int64_to_bfloat16_scalbn(a, 0, status);
3664 }
3665 
3666 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3667 {
3668     return int64_to_bfloat16_scalbn(a, 0, status);
3669 }
3670 
3671 float128 int64_to_float128(int64_t a, float_status *status)
3672 {
3673     FloatParts128 p;
3674 
3675     parts_sint_to_float(&p, a, 0, status);
3676     return float128_round_pack_canonical(&p, status);
3677 }
3678 
3679 float128 int32_to_float128(int32_t a, float_status *status)
3680 {
3681     return int64_to_float128(a, status);
3682 }
3683 
3684 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3685 {
3686     FloatParts128 p;
3687 
3688     parts_sint_to_float(&p, a, 0, status);
3689     return floatx80_round_pack_canonical(&p, status);
3690 }
3691 
3692 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3693 {
3694     return int64_to_floatx80(a, status);
3695 }
3696 
3697 /*
3698  * Unsigned Integer to floating-point conversions
3699  */
3700 
3701 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3702 {
3703     FloatParts64 p;
3704 
3705     parts_uint_to_float(&p, a, scale, status);
3706     return float16_round_pack_canonical(&p, status);
3707 }
3708 
3709 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3710 {
3711     return uint64_to_float16_scalbn(a, scale, status);
3712 }
3713 
3714 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3715 {
3716     return uint64_to_float16_scalbn(a, scale, status);
3717 }
3718 
3719 float16 uint64_to_float16(uint64_t a, float_status *status)
3720 {
3721     return uint64_to_float16_scalbn(a, 0, status);
3722 }
3723 
3724 float16 uint32_to_float16(uint32_t a, float_status *status)
3725 {
3726     return uint64_to_float16_scalbn(a, 0, status);
3727 }
3728 
3729 float16 uint16_to_float16(uint16_t a, float_status *status)
3730 {
3731     return uint64_to_float16_scalbn(a, 0, status);
3732 }
3733 
3734 float16 uint8_to_float16(uint8_t a, float_status *status)
3735 {
3736     return uint64_to_float16_scalbn(a, 0, status);
3737 }
3738 
3739 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3740 {
3741     FloatParts64 p;
3742 
3743     /* Without scaling, there are no overflow concerns. */
3744     if (likely(scale == 0) && can_use_fpu(status)) {
3745         union_float32 ur;
3746         ur.h = a;
3747         return ur.s;
3748     }
3749 
3750     parts_uint_to_float(&p, a, scale, status);
3751     return float32_round_pack_canonical(&p, status);
3752 }
3753 
3754 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3755 {
3756     return uint64_to_float32_scalbn(a, scale, status);
3757 }
3758 
3759 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3760 {
3761     return uint64_to_float32_scalbn(a, scale, status);
3762 }
3763 
3764 float32 uint64_to_float32(uint64_t a, float_status *status)
3765 {
3766     return uint64_to_float32_scalbn(a, 0, status);
3767 }
3768 
3769 float32 uint32_to_float32(uint32_t a, float_status *status)
3770 {
3771     return uint64_to_float32_scalbn(a, 0, status);
3772 }
3773 
3774 float32 uint16_to_float32(uint16_t a, float_status *status)
3775 {
3776     return uint64_to_float32_scalbn(a, 0, status);
3777 }
3778 
3779 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3780 {
3781     FloatParts64 p;
3782 
3783     /* Without scaling, there are no overflow concerns. */
3784     if (likely(scale == 0) && can_use_fpu(status)) {
3785         union_float64 ur;
3786         ur.h = a;
3787         return ur.s;
3788     }
3789 
3790     parts_uint_to_float(&p, a, scale, status);
3791     return float64_round_pack_canonical(&p, status);
3792 }
3793 
3794 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3795 {
3796     return uint64_to_float64_scalbn(a, scale, status);
3797 }
3798 
3799 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3800 {
3801     return uint64_to_float64_scalbn(a, scale, status);
3802 }
3803 
3804 float64 uint64_to_float64(uint64_t a, float_status *status)
3805 {
3806     return uint64_to_float64_scalbn(a, 0, status);
3807 }
3808 
3809 float64 uint32_to_float64(uint32_t a, float_status *status)
3810 {
3811     return uint64_to_float64_scalbn(a, 0, status);
3812 }
3813 
3814 float64 uint16_to_float64(uint16_t a, float_status *status)
3815 {
3816     return uint64_to_float64_scalbn(a, 0, status);
3817 }
3818 
3819 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3820 {
3821     FloatParts64 p;
3822 
3823     parts_uint_to_float(&p, a, scale, status);
3824     return bfloat16_round_pack_canonical(&p, status);
3825 }
3826 
3827 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3828 {
3829     return uint64_to_bfloat16_scalbn(a, scale, status);
3830 }
3831 
3832 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3833 {
3834     return uint64_to_bfloat16_scalbn(a, scale, status);
3835 }
3836 
3837 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3838 {
3839     return uint64_to_bfloat16_scalbn(a, 0, status);
3840 }
3841 
3842 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3843 {
3844     return uint64_to_bfloat16_scalbn(a, 0, status);
3845 }
3846 
3847 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3848 {
3849     return uint64_to_bfloat16_scalbn(a, 0, status);
3850 }
3851 
3852 float128 uint64_to_float128(uint64_t a, float_status *status)
3853 {
3854     FloatParts128 p;
3855 
3856     parts_uint_to_float(&p, a, 0, status);
3857     return float128_round_pack_canonical(&p, status);
3858 }
3859 
3860 /*
3861  * Minimum and maximum
3862  */
3863 
3864 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3865 {
3866     FloatParts64 pa, pb, *pr;
3867 
3868     float16_unpack_canonical(&pa, a, s);
3869     float16_unpack_canonical(&pb, b, s);
3870     pr = parts_minmax(&pa, &pb, s, flags);
3871 
3872     return float16_round_pack_canonical(pr, s);
3873 }
3874 
3875 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3876                                 float_status *s, int flags)
3877 {
3878     FloatParts64 pa, pb, *pr;
3879 
3880     bfloat16_unpack_canonical(&pa, a, s);
3881     bfloat16_unpack_canonical(&pb, b, s);
3882     pr = parts_minmax(&pa, &pb, s, flags);
3883 
3884     return bfloat16_round_pack_canonical(pr, s);
3885 }
3886 
3887 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3888 {
3889     FloatParts64 pa, pb, *pr;
3890 
3891     float32_unpack_canonical(&pa, a, s);
3892     float32_unpack_canonical(&pb, b, s);
3893     pr = parts_minmax(&pa, &pb, s, flags);
3894 
3895     return float32_round_pack_canonical(pr, s);
3896 }
3897 
3898 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3899 {
3900     FloatParts64 pa, pb, *pr;
3901 
3902     float64_unpack_canonical(&pa, a, s);
3903     float64_unpack_canonical(&pb, b, s);
3904     pr = parts_minmax(&pa, &pb, s, flags);
3905 
3906     return float64_round_pack_canonical(pr, s);
3907 }
3908 
3909 static float128 float128_minmax(float128 a, float128 b,
3910                                 float_status *s, int flags)
3911 {
3912     FloatParts128 pa, pb, *pr;
3913 
3914     float128_unpack_canonical(&pa, a, s);
3915     float128_unpack_canonical(&pb, b, s);
3916     pr = parts_minmax(&pa, &pb, s, flags);
3917 
3918     return float128_round_pack_canonical(pr, s);
3919 }
3920 
3921 #define MINMAX_1(type, name, flags) \
3922     type type##_##name(type a, type b, float_status *s) \
3923     { return type##_minmax(a, b, s, flags); }
3924 
3925 #define MINMAX_2(type) \
3926     MINMAX_1(type, max, 0)                                      \
3927     MINMAX_1(type, maxnum, minmax_isnum)                        \
3928     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3929     MINMAX_1(type, min, minmax_ismin)                           \
3930     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3931     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3932 
3933 MINMAX_2(float16)
3934 MINMAX_2(bfloat16)
3935 MINMAX_2(float32)
3936 MINMAX_2(float64)
3937 MINMAX_2(float128)
3938 
3939 #undef MINMAX_1
3940 #undef MINMAX_2
3941 
3942 /*
3943  * Floating point compare
3944  */
3945 
3946 static FloatRelation QEMU_FLATTEN
3947 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3948 {
3949     FloatParts64 pa, pb;
3950 
3951     float16_unpack_canonical(&pa, a, s);
3952     float16_unpack_canonical(&pb, b, s);
3953     return parts_compare(&pa, &pb, s, is_quiet);
3954 }
3955 
3956 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3957 {
3958     return float16_do_compare(a, b, s, false);
3959 }
3960 
3961 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3962 {
3963     return float16_do_compare(a, b, s, true);
3964 }
3965 
3966 static FloatRelation QEMU_SOFTFLOAT_ATTR
3967 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3968 {
3969     FloatParts64 pa, pb;
3970 
3971     float32_unpack_canonical(&pa, a, s);
3972     float32_unpack_canonical(&pb, b, s);
3973     return parts_compare(&pa, &pb, s, is_quiet);
3974 }
3975 
3976 static FloatRelation QEMU_FLATTEN
3977 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3978 {
3979     union_float32 ua, ub;
3980 
3981     ua.s = xa;
3982     ub.s = xb;
3983 
3984     if (QEMU_NO_HARDFLOAT) {
3985         goto soft;
3986     }
3987 
3988     float32_input_flush2(&ua.s, &ub.s, s);
3989     if (isgreaterequal(ua.h, ub.h)) {
3990         if (isgreater(ua.h, ub.h)) {
3991             return float_relation_greater;
3992         }
3993         return float_relation_equal;
3994     }
3995     if (likely(isless(ua.h, ub.h))) {
3996         return float_relation_less;
3997     }
3998     /*
3999      * The only condition remaining is unordered.
4000      * Fall through to set flags.
4001      */
4002  soft:
4003     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4004 }
4005 
4006 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4007 {
4008     return float32_hs_compare(a, b, s, false);
4009 }
4010 
4011 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4012 {
4013     return float32_hs_compare(a, b, s, true);
4014 }
4015 
4016 static FloatRelation QEMU_SOFTFLOAT_ATTR
4017 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4018 {
4019     FloatParts64 pa, pb;
4020 
4021     float64_unpack_canonical(&pa, a, s);
4022     float64_unpack_canonical(&pb, b, s);
4023     return parts_compare(&pa, &pb, s, is_quiet);
4024 }
4025 
4026 static FloatRelation QEMU_FLATTEN
4027 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4028 {
4029     union_float64 ua, ub;
4030 
4031     ua.s = xa;
4032     ub.s = xb;
4033 
4034     if (QEMU_NO_HARDFLOAT) {
4035         goto soft;
4036     }
4037 
4038     float64_input_flush2(&ua.s, &ub.s, s);
4039     if (isgreaterequal(ua.h, ub.h)) {
4040         if (isgreater(ua.h, ub.h)) {
4041             return float_relation_greater;
4042         }
4043         return float_relation_equal;
4044     }
4045     if (likely(isless(ua.h, ub.h))) {
4046         return float_relation_less;
4047     }
4048     /*
4049      * The only condition remaining is unordered.
4050      * Fall through to set flags.
4051      */
4052  soft:
4053     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4054 }
4055 
4056 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4057 {
4058     return float64_hs_compare(a, b, s, false);
4059 }
4060 
4061 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4062 {
4063     return float64_hs_compare(a, b, s, true);
4064 }
4065 
4066 static FloatRelation QEMU_FLATTEN
4067 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4068 {
4069     FloatParts64 pa, pb;
4070 
4071     bfloat16_unpack_canonical(&pa, a, s);
4072     bfloat16_unpack_canonical(&pb, b, s);
4073     return parts_compare(&pa, &pb, s, is_quiet);
4074 }
4075 
4076 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4077 {
4078     return bfloat16_do_compare(a, b, s, false);
4079 }
4080 
4081 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4082 {
4083     return bfloat16_do_compare(a, b, s, true);
4084 }
4085 
4086 static FloatRelation QEMU_FLATTEN
4087 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4088 {
4089     FloatParts128 pa, pb;
4090 
4091     float128_unpack_canonical(&pa, a, s);
4092     float128_unpack_canonical(&pb, b, s);
4093     return parts_compare(&pa, &pb, s, is_quiet);
4094 }
4095 
4096 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4097 {
4098     return float128_do_compare(a, b, s, false);
4099 }
4100 
4101 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4102 {
4103     return float128_do_compare(a, b, s, true);
4104 }
4105 
4106 static FloatRelation QEMU_FLATTEN
4107 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4108 {
4109     FloatParts128 pa, pb;
4110 
4111     if (!floatx80_unpack_canonical(&pa, a, s) ||
4112         !floatx80_unpack_canonical(&pb, b, s)) {
4113         return float_relation_unordered;
4114     }
4115     return parts_compare(&pa, &pb, s, is_quiet);
4116 }
4117 
4118 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4119 {
4120     return floatx80_do_compare(a, b, s, false);
4121 }
4122 
4123 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4124 {
4125     return floatx80_do_compare(a, b, s, true);
4126 }
4127 
4128 /*
4129  * Scale by 2**N
4130  */
4131 
4132 float16 float16_scalbn(float16 a, int n, float_status *status)
4133 {
4134     FloatParts64 p;
4135 
4136     float16_unpack_canonical(&p, a, status);
4137     parts_scalbn(&p, n, status);
4138     return float16_round_pack_canonical(&p, status);
4139 }
4140 
4141 float32 float32_scalbn(float32 a, int n, float_status *status)
4142 {
4143     FloatParts64 p;
4144 
4145     float32_unpack_canonical(&p, a, status);
4146     parts_scalbn(&p, n, status);
4147     return float32_round_pack_canonical(&p, status);
4148 }
4149 
4150 float64 float64_scalbn(float64 a, int n, float_status *status)
4151 {
4152     FloatParts64 p;
4153 
4154     float64_unpack_canonical(&p, a, status);
4155     parts_scalbn(&p, n, status);
4156     return float64_round_pack_canonical(&p, status);
4157 }
4158 
4159 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4160 {
4161     FloatParts64 p;
4162 
4163     bfloat16_unpack_canonical(&p, a, status);
4164     parts_scalbn(&p, n, status);
4165     return bfloat16_round_pack_canonical(&p, status);
4166 }
4167 
4168 float128 float128_scalbn(float128 a, int n, float_status *status)
4169 {
4170     FloatParts128 p;
4171 
4172     float128_unpack_canonical(&p, a, status);
4173     parts_scalbn(&p, n, status);
4174     return float128_round_pack_canonical(&p, status);
4175 }
4176 
4177 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4178 {
4179     FloatParts128 p;
4180 
4181     if (!floatx80_unpack_canonical(&p, a, status)) {
4182         return floatx80_default_nan(status);
4183     }
4184     parts_scalbn(&p, n, status);
4185     return floatx80_round_pack_canonical(&p, status);
4186 }
4187 
4188 /*
4189  * Square Root
4190  */
4191 
4192 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4193 {
4194     FloatParts64 p;
4195 
4196     float16_unpack_canonical(&p, a, status);
4197     parts_sqrt(&p, status, &float16_params);
4198     return float16_round_pack_canonical(&p, status);
4199 }
4200 
4201 static float32 QEMU_SOFTFLOAT_ATTR
4202 soft_f32_sqrt(float32 a, float_status *status)
4203 {
4204     FloatParts64 p;
4205 
4206     float32_unpack_canonical(&p, a, status);
4207     parts_sqrt(&p, status, &float32_params);
4208     return float32_round_pack_canonical(&p, status);
4209 }
4210 
4211 static float64 QEMU_SOFTFLOAT_ATTR
4212 soft_f64_sqrt(float64 a, float_status *status)
4213 {
4214     FloatParts64 p;
4215 
4216     float64_unpack_canonical(&p, a, status);
4217     parts_sqrt(&p, status, &float64_params);
4218     return float64_round_pack_canonical(&p, status);
4219 }
4220 
4221 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4222 {
4223     union_float32 ua, ur;
4224 
4225     ua.s = xa;
4226     if (unlikely(!can_use_fpu(s))) {
4227         goto soft;
4228     }
4229 
4230     float32_input_flush1(&ua.s, s);
4231     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4232         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4233                        fpclassify(ua.h) == FP_ZERO) ||
4234                      signbit(ua.h))) {
4235             goto soft;
4236         }
4237     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4238                         float32_is_neg(ua.s))) {
4239         goto soft;
4240     }
4241     ur.h = sqrtf(ua.h);
4242     return ur.s;
4243 
4244  soft:
4245     return soft_f32_sqrt(ua.s, s);
4246 }
4247 
4248 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4249 {
4250     union_float64 ua, ur;
4251 
4252     ua.s = xa;
4253     if (unlikely(!can_use_fpu(s))) {
4254         goto soft;
4255     }
4256 
4257     float64_input_flush1(&ua.s, s);
4258     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4259         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4260                        fpclassify(ua.h) == FP_ZERO) ||
4261                      signbit(ua.h))) {
4262             goto soft;
4263         }
4264     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4265                         float64_is_neg(ua.s))) {
4266         goto soft;
4267     }
4268     ur.h = sqrt(ua.h);
4269     return ur.s;
4270 
4271  soft:
4272     return soft_f64_sqrt(ua.s, s);
4273 }
4274 
4275 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4276 {
4277     FloatParts64 p;
4278 
4279     bfloat16_unpack_canonical(&p, a, status);
4280     parts_sqrt(&p, status, &bfloat16_params);
4281     return bfloat16_round_pack_canonical(&p, status);
4282 }
4283 
4284 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4285 {
4286     FloatParts128 p;
4287 
4288     float128_unpack_canonical(&p, a, status);
4289     parts_sqrt(&p, status, &float128_params);
4290     return float128_round_pack_canonical(&p, status);
4291 }
4292 
4293 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4294 {
4295     FloatParts128 p;
4296 
4297     if (!floatx80_unpack_canonical(&p, a, s)) {
4298         return floatx80_default_nan(s);
4299     }
4300     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4301     return floatx80_round_pack_canonical(&p, s);
4302 }
4303 
4304 /*
4305  * log2
4306  */
4307 float32 float32_log2(float32 a, float_status *status)
4308 {
4309     FloatParts64 p;
4310 
4311     float32_unpack_canonical(&p, a, status);
4312     parts_log2(&p, status, &float32_params);
4313     return float32_round_pack_canonical(&p, status);
4314 }
4315 
4316 float64 float64_log2(float64 a, float_status *status)
4317 {
4318     FloatParts64 p;
4319 
4320     float64_unpack_canonical(&p, a, status);
4321     parts_log2(&p, status, &float64_params);
4322     return float64_round_pack_canonical(&p, status);
4323 }
4324 
4325 /*----------------------------------------------------------------------------
4326 | The pattern for a default generated NaN.
4327 *----------------------------------------------------------------------------*/
4328 
4329 float16 float16_default_nan(float_status *status)
4330 {
4331     FloatParts64 p;
4332 
4333     parts_default_nan(&p, status);
4334     p.frac >>= float16_params.frac_shift;
4335     return float16_pack_raw(&p);
4336 }
4337 
4338 float32 float32_default_nan(float_status *status)
4339 {
4340     FloatParts64 p;
4341 
4342     parts_default_nan(&p, status);
4343     p.frac >>= float32_params.frac_shift;
4344     return float32_pack_raw(&p);
4345 }
4346 
4347 float64 float64_default_nan(float_status *status)
4348 {
4349     FloatParts64 p;
4350 
4351     parts_default_nan(&p, status);
4352     p.frac >>= float64_params.frac_shift;
4353     return float64_pack_raw(&p);
4354 }
4355 
4356 float128 float128_default_nan(float_status *status)
4357 {
4358     FloatParts128 p;
4359 
4360     parts_default_nan(&p, status);
4361     frac_shr(&p, float128_params.frac_shift);
4362     return float128_pack_raw(&p);
4363 }
4364 
4365 bfloat16 bfloat16_default_nan(float_status *status)
4366 {
4367     FloatParts64 p;
4368 
4369     parts_default_nan(&p, status);
4370     p.frac >>= bfloat16_params.frac_shift;
4371     return bfloat16_pack_raw(&p);
4372 }
4373 
4374 /*----------------------------------------------------------------------------
4375 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4376 *----------------------------------------------------------------------------*/
4377 
4378 float16 float16_silence_nan(float16 a, float_status *status)
4379 {
4380     FloatParts64 p;
4381 
4382     float16_unpack_raw(&p, a);
4383     p.frac <<= float16_params.frac_shift;
4384     parts_silence_nan(&p, status);
4385     p.frac >>= float16_params.frac_shift;
4386     return float16_pack_raw(&p);
4387 }
4388 
4389 float32 float32_silence_nan(float32 a, float_status *status)
4390 {
4391     FloatParts64 p;
4392 
4393     float32_unpack_raw(&p, a);
4394     p.frac <<= float32_params.frac_shift;
4395     parts_silence_nan(&p, status);
4396     p.frac >>= float32_params.frac_shift;
4397     return float32_pack_raw(&p);
4398 }
4399 
4400 float64 float64_silence_nan(float64 a, float_status *status)
4401 {
4402     FloatParts64 p;
4403 
4404     float64_unpack_raw(&p, a);
4405     p.frac <<= float64_params.frac_shift;
4406     parts_silence_nan(&p, status);
4407     p.frac >>= float64_params.frac_shift;
4408     return float64_pack_raw(&p);
4409 }
4410 
4411 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4412 {
4413     FloatParts64 p;
4414 
4415     bfloat16_unpack_raw(&p, a);
4416     p.frac <<= bfloat16_params.frac_shift;
4417     parts_silence_nan(&p, status);
4418     p.frac >>= bfloat16_params.frac_shift;
4419     return bfloat16_pack_raw(&p);
4420 }
4421 
4422 float128 float128_silence_nan(float128 a, float_status *status)
4423 {
4424     FloatParts128 p;
4425 
4426     float128_unpack_raw(&p, a);
4427     frac_shl(&p, float128_params.frac_shift);
4428     parts_silence_nan(&p, status);
4429     frac_shr(&p, float128_params.frac_shift);
4430     return float128_pack_raw(&p);
4431 }
4432 
4433 /*----------------------------------------------------------------------------
4434 | If `a' is denormal and we are in flush-to-zero mode then set the
4435 | input-denormal exception and return zero. Otherwise just return the value.
4436 *----------------------------------------------------------------------------*/
4437 
4438 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4439 {
4440     if (p.exp == 0 && p.frac != 0) {
4441         float_raise(float_flag_input_denormal, status);
4442         return true;
4443     }
4444 
4445     return false;
4446 }
4447 
4448 float16 float16_squash_input_denormal(float16 a, float_status *status)
4449 {
4450     if (status->flush_inputs_to_zero) {
4451         FloatParts64 p;
4452 
4453         float16_unpack_raw(&p, a);
4454         if (parts_squash_denormal(p, status)) {
4455             return float16_set_sign(float16_zero, p.sign);
4456         }
4457     }
4458     return a;
4459 }
4460 
4461 float32 float32_squash_input_denormal(float32 a, float_status *status)
4462 {
4463     if (status->flush_inputs_to_zero) {
4464         FloatParts64 p;
4465 
4466         float32_unpack_raw(&p, a);
4467         if (parts_squash_denormal(p, status)) {
4468             return float32_set_sign(float32_zero, p.sign);
4469         }
4470     }
4471     return a;
4472 }
4473 
4474 float64 float64_squash_input_denormal(float64 a, float_status *status)
4475 {
4476     if (status->flush_inputs_to_zero) {
4477         FloatParts64 p;
4478 
4479         float64_unpack_raw(&p, a);
4480         if (parts_squash_denormal(p, status)) {
4481             return float64_set_sign(float64_zero, p.sign);
4482         }
4483     }
4484     return a;
4485 }
4486 
4487 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4488 {
4489     if (status->flush_inputs_to_zero) {
4490         FloatParts64 p;
4491 
4492         bfloat16_unpack_raw(&p, a);
4493         if (parts_squash_denormal(p, status)) {
4494             return bfloat16_set_sign(bfloat16_zero, p.sign);
4495         }
4496     }
4497     return a;
4498 }
4499 
4500 /*----------------------------------------------------------------------------
4501 | Normalizes the subnormal extended double-precision floating-point value
4502 | represented by the denormalized significand `aSig'.  The normalized exponent
4503 | and significand are stored at the locations pointed to by `zExpPtr' and
4504 | `zSigPtr', respectively.
4505 *----------------------------------------------------------------------------*/
4506 
4507 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4508                                 uint64_t *zSigPtr)
4509 {
4510     int8_t shiftCount;
4511 
4512     shiftCount = clz64(aSig);
4513     *zSigPtr = aSig<<shiftCount;
4514     *zExpPtr = 1 - shiftCount;
4515 }
4516 
4517 /*----------------------------------------------------------------------------
4518 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4519 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4520 | and returns the proper extended double-precision floating-point value
4521 | corresponding to the abstract input.  Ordinarily, the abstract value is
4522 | rounded and packed into the extended double-precision format, with the
4523 | inexact exception raised if the abstract input cannot be represented
4524 | exactly.  However, if the abstract value is too large, the overflow and
4525 | inexact exceptions are raised and an infinity or maximal finite value is
4526 | returned.  If the abstract value is too small, the input value is rounded to
4527 | a subnormal number, and the underflow and inexact exceptions are raised if
4528 | the abstract input cannot be represented exactly as a subnormal extended
4529 | double-precision floating-point number.
4530 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4531 | the result is rounded to the same number of bits as single or double
4532 | precision, respectively.  Otherwise, the result is rounded to the full
4533 | precision of the extended double-precision format.
4534 |     The input significand must be normalized or smaller.  If the input
4535 | significand is not normalized, `zExp' must be 0; in that case, the result
4536 | returned is a subnormal number, and it must not require rounding.  The
4537 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4538 | Floating-Point Arithmetic.
4539 *----------------------------------------------------------------------------*/
4540 
4541 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4542                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4543                               float_status *status)
4544 {
4545     FloatRoundMode roundingMode;
4546     bool roundNearestEven, increment, isTiny;
4547     int64_t roundIncrement, roundMask, roundBits;
4548 
4549     roundingMode = status->float_rounding_mode;
4550     roundNearestEven = ( roundingMode == float_round_nearest_even );
4551     switch (roundingPrecision) {
4552     case floatx80_precision_x:
4553         goto precision80;
4554     case floatx80_precision_d:
4555         roundIncrement = UINT64_C(0x0000000000000400);
4556         roundMask = UINT64_C(0x00000000000007FF);
4557         break;
4558     case floatx80_precision_s:
4559         roundIncrement = UINT64_C(0x0000008000000000);
4560         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4561         break;
4562     default:
4563         g_assert_not_reached();
4564     }
4565     zSig0 |= ( zSig1 != 0 );
4566     switch (roundingMode) {
4567     case float_round_nearest_even:
4568     case float_round_ties_away:
4569         break;
4570     case float_round_to_zero:
4571         roundIncrement = 0;
4572         break;
4573     case float_round_up:
4574         roundIncrement = zSign ? 0 : roundMask;
4575         break;
4576     case float_round_down:
4577         roundIncrement = zSign ? roundMask : 0;
4578         break;
4579     default:
4580         abort();
4581     }
4582     roundBits = zSig0 & roundMask;
4583     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4584         if (    ( 0x7FFE < zExp )
4585              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4586            ) {
4587             goto overflow;
4588         }
4589         if ( zExp <= 0 ) {
4590             if (status->flush_to_zero) {
4591                 float_raise(float_flag_output_denormal, status);
4592                 return packFloatx80(zSign, 0, 0);
4593             }
4594             isTiny = status->tininess_before_rounding
4595                   || (zExp < 0 )
4596                   || (zSig0 <= zSig0 + roundIncrement);
4597             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4598             zExp = 0;
4599             roundBits = zSig0 & roundMask;
4600             if (isTiny && roundBits) {
4601                 float_raise(float_flag_underflow, status);
4602             }
4603             if (roundBits) {
4604                 float_raise(float_flag_inexact, status);
4605             }
4606             zSig0 += roundIncrement;
4607             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4608             roundIncrement = roundMask + 1;
4609             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4610                 roundMask |= roundIncrement;
4611             }
4612             zSig0 &= ~ roundMask;
4613             return packFloatx80( zSign, zExp, zSig0 );
4614         }
4615     }
4616     if (roundBits) {
4617         float_raise(float_flag_inexact, status);
4618     }
4619     zSig0 += roundIncrement;
4620     if ( zSig0 < roundIncrement ) {
4621         ++zExp;
4622         zSig0 = UINT64_C(0x8000000000000000);
4623     }
4624     roundIncrement = roundMask + 1;
4625     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4626         roundMask |= roundIncrement;
4627     }
4628     zSig0 &= ~ roundMask;
4629     if ( zSig0 == 0 ) zExp = 0;
4630     return packFloatx80( zSign, zExp, zSig0 );
4631  precision80:
4632     switch (roundingMode) {
4633     case float_round_nearest_even:
4634     case float_round_ties_away:
4635         increment = ((int64_t)zSig1 < 0);
4636         break;
4637     case float_round_to_zero:
4638         increment = 0;
4639         break;
4640     case float_round_up:
4641         increment = !zSign && zSig1;
4642         break;
4643     case float_round_down:
4644         increment = zSign && zSig1;
4645         break;
4646     default:
4647         abort();
4648     }
4649     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4650         if (    ( 0x7FFE < zExp )
4651              || (    ( zExp == 0x7FFE )
4652                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4653                   && increment
4654                 )
4655            ) {
4656             roundMask = 0;
4657  overflow:
4658             float_raise(float_flag_overflow | float_flag_inexact, status);
4659             if (    ( roundingMode == float_round_to_zero )
4660                  || ( zSign && ( roundingMode == float_round_up ) )
4661                  || ( ! zSign && ( roundingMode == float_round_down ) )
4662                ) {
4663                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4664             }
4665             return packFloatx80(zSign,
4666                                 floatx80_infinity_high,
4667                                 floatx80_infinity_low);
4668         }
4669         if ( zExp <= 0 ) {
4670             isTiny = status->tininess_before_rounding
4671                   || (zExp < 0)
4672                   || !increment
4673                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4674             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4675             zExp = 0;
4676             if (isTiny && zSig1) {
4677                 float_raise(float_flag_underflow, status);
4678             }
4679             if (zSig1) {
4680                 float_raise(float_flag_inexact, status);
4681             }
4682             switch (roundingMode) {
4683             case float_round_nearest_even:
4684             case float_round_ties_away:
4685                 increment = ((int64_t)zSig1 < 0);
4686                 break;
4687             case float_round_to_zero:
4688                 increment = 0;
4689                 break;
4690             case float_round_up:
4691                 increment = !zSign && zSig1;
4692                 break;
4693             case float_round_down:
4694                 increment = zSign && zSig1;
4695                 break;
4696             default:
4697                 abort();
4698             }
4699             if ( increment ) {
4700                 ++zSig0;
4701                 if (!(zSig1 << 1) && roundNearestEven) {
4702                     zSig0 &= ~1;
4703                 }
4704                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4705             }
4706             return packFloatx80( zSign, zExp, zSig0 );
4707         }
4708     }
4709     if (zSig1) {
4710         float_raise(float_flag_inexact, status);
4711     }
4712     if ( increment ) {
4713         ++zSig0;
4714         if ( zSig0 == 0 ) {
4715             ++zExp;
4716             zSig0 = UINT64_C(0x8000000000000000);
4717         }
4718         else {
4719             if (!(zSig1 << 1) && roundNearestEven) {
4720                 zSig0 &= ~1;
4721             }
4722         }
4723     }
4724     else {
4725         if ( zSig0 == 0 ) zExp = 0;
4726     }
4727     return packFloatx80( zSign, zExp, zSig0 );
4728 
4729 }
4730 
4731 /*----------------------------------------------------------------------------
4732 | Takes an abstract floating-point value having sign `zSign', exponent
4733 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4734 | and returns the proper extended double-precision floating-point value
4735 | corresponding to the abstract input.  This routine is just like
4736 | `roundAndPackFloatx80' except that the input significand does not have to be
4737 | normalized.
4738 *----------------------------------------------------------------------------*/
4739 
4740 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4741                                        bool zSign, int32_t zExp,
4742                                        uint64_t zSig0, uint64_t zSig1,
4743                                        float_status *status)
4744 {
4745     int8_t shiftCount;
4746 
4747     if ( zSig0 == 0 ) {
4748         zSig0 = zSig1;
4749         zSig1 = 0;
4750         zExp -= 64;
4751     }
4752     shiftCount = clz64(zSig0);
4753     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4754     zExp -= shiftCount;
4755     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4756                                 zSig0, zSig1, status);
4757 
4758 }
4759 
4760 /*----------------------------------------------------------------------------
4761 | Returns the binary exponential of the single-precision floating-point value
4762 | `a'. The operation is performed according to the IEC/IEEE Standard for
4763 | Binary Floating-Point Arithmetic.
4764 |
4765 | Uses the following identities:
4766 |
4767 | 1. -------------------------------------------------------------------------
4768 |      x    x*ln(2)
4769 |     2  = e
4770 |
4771 | 2. -------------------------------------------------------------------------
4772 |                      2     3     4     5           n
4773 |      x        x     x     x     x     x           x
4774 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4775 |               1!    2!    3!    4!    5!          n!
4776 *----------------------------------------------------------------------------*/
4777 
4778 static const float64 float32_exp2_coefficients[15] =
4779 {
4780     const_float64( 0x3ff0000000000000ll ), /*  1 */
4781     const_float64( 0x3fe0000000000000ll ), /*  2 */
4782     const_float64( 0x3fc5555555555555ll ), /*  3 */
4783     const_float64( 0x3fa5555555555555ll ), /*  4 */
4784     const_float64( 0x3f81111111111111ll ), /*  5 */
4785     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4786     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4787     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4788     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4789     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4790     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4791     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4792     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4793     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4794     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4795 };
4796 
4797 float32 float32_exp2(float32 a, float_status *status)
4798 {
4799     FloatParts64 xp, xnp, tp, rp;
4800     int i;
4801 
4802     float32_unpack_canonical(&xp, a, status);
4803     if (unlikely(xp.cls != float_class_normal)) {
4804         switch (xp.cls) {
4805         case float_class_snan:
4806         case float_class_qnan:
4807             parts_return_nan(&xp, status);
4808             return float32_round_pack_canonical(&xp, status);
4809         case float_class_inf:
4810             return xp.sign ? float32_zero : a;
4811         case float_class_zero:
4812             return float32_one;
4813         default:
4814             break;
4815         }
4816         g_assert_not_reached();
4817     }
4818 
4819     float_raise(float_flag_inexact, status);
4820 
4821     float64_unpack_canonical(&xnp, float64_ln2, status);
4822     xp = *parts_mul(&xp, &tp, status);
4823     xnp = xp;
4824 
4825     float64_unpack_canonical(&rp, float64_one, status);
4826     for (i = 0 ; i < 15 ; i++) {
4827         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4828         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4829         xnp = *parts_mul(&xnp, &xp, status);
4830     }
4831 
4832     return float32_round_pack_canonical(&rp, status);
4833 }
4834 
4835 /*----------------------------------------------------------------------------
4836 | Rounds the extended double-precision floating-point value `a'
4837 | to the precision provided by floatx80_rounding_precision and returns the
4838 | result as an extended double-precision floating-point value.
4839 | The operation is performed according to the IEC/IEEE Standard for Binary
4840 | Floating-Point Arithmetic.
4841 *----------------------------------------------------------------------------*/
4842 
4843 floatx80 floatx80_round(floatx80 a, float_status *status)
4844 {
4845     FloatParts128 p;
4846 
4847     if (!floatx80_unpack_canonical(&p, a, status)) {
4848         return floatx80_default_nan(status);
4849     }
4850     return floatx80_round_pack_canonical(&p, status);
4851 }
4852 
4853 static void __attribute__((constructor)) softfloat_init(void)
4854 {
4855     union_float64 ua, ub, uc, ur;
4856 
4857     if (QEMU_NO_HARDFLOAT) {
4858         return;
4859     }
4860     /*
4861      * Test that the host's FMA is not obviously broken. For example,
4862      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4863      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4864      */
4865     ua.s = 0x0020000000000001ULL;
4866     ub.s = 0x3ca0000000000000ULL;
4867     uc.s = 0x0020000000000000ULL;
4868     ur.h = fma(ua.h, ub.h, uc.h);
4869     if (ur.s != 0x0020000000000001ULL) {
4870         force_soft_fma = true;
4871     }
4872 }
4873