xref: /openbmc/qemu/fpu/softfloat.c (revision e706d445)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  */
521 typedef struct {
522     int exp_size;
523     int exp_bias;
524     int exp_max;
525     int frac_size;
526     int frac_shift;
527     bool arm_althp;
528     uint64_t round_mask;
529 } FloatFmt;
530 
531 /* Expand fields based on the size of exponent and fraction */
532 #define FLOAT_PARAMS_(E)                                \
533     .exp_size       = E,                                \
534     .exp_bias       = ((1 << E) - 1) >> 1,              \
535     .exp_max        = (1 << E) - 1
536 
537 #define FLOAT_PARAMS(E, F)                              \
538     FLOAT_PARAMS_(E),                                   \
539     .frac_size      = F,                                \
540     .frac_shift     = (-F - 1) & 63,                    \
541     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
542 
543 static const FloatFmt float16_params = {
544     FLOAT_PARAMS(5, 10)
545 };
546 
547 static const FloatFmt float16_params_ahp = {
548     FLOAT_PARAMS(5, 10),
549     .arm_althp = true
550 };
551 
552 static const FloatFmt bfloat16_params = {
553     FLOAT_PARAMS(8, 7)
554 };
555 
556 static const FloatFmt float32_params = {
557     FLOAT_PARAMS(8, 23)
558 };
559 
560 static const FloatFmt float64_params = {
561     FLOAT_PARAMS(11, 52)
562 };
563 
564 static const FloatFmt float128_params = {
565     FLOAT_PARAMS(15, 112)
566 };
567 
568 #define FLOATX80_PARAMS(R)              \
569     FLOAT_PARAMS_(15),                  \
570     .frac_size = R == 64 ? 63 : R,      \
571     .frac_shift = 0,                    \
572     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
573 
574 static const FloatFmt floatx80_params[3] = {
575     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
576     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
577     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
578 };
579 
580 /* Unpack a float to parts, but do not canonicalize.  */
581 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
582 {
583     const int f_size = fmt->frac_size;
584     const int e_size = fmt->exp_size;
585 
586     *r = (FloatParts64) {
587         .cls = float_class_unclassified,
588         .sign = extract64(raw, f_size + e_size, 1),
589         .exp = extract64(raw, f_size, e_size),
590         .frac = extract64(raw, 0, f_size)
591     };
592 }
593 
594 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
595 {
596     unpack_raw64(p, &float16_params, f);
597 }
598 
599 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
600 {
601     unpack_raw64(p, &bfloat16_params, f);
602 }
603 
604 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
605 {
606     unpack_raw64(p, &float32_params, f);
607 }
608 
609 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
610 {
611     unpack_raw64(p, &float64_params, f);
612 }
613 
614 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
615 {
616     *p = (FloatParts128) {
617         .cls = float_class_unclassified,
618         .sign = extract32(f.high, 15, 1),
619         .exp = extract32(f.high, 0, 15),
620         .frac_hi = f.low
621     };
622 }
623 
624 static void float128_unpack_raw(FloatParts128 *p, float128 f)
625 {
626     const int f_size = float128_params.frac_size - 64;
627     const int e_size = float128_params.exp_size;
628 
629     *p = (FloatParts128) {
630         .cls = float_class_unclassified,
631         .sign = extract64(f.high, f_size + e_size, 1),
632         .exp = extract64(f.high, f_size, e_size),
633         .frac_hi = extract64(f.high, 0, f_size),
634         .frac_lo = f.low,
635     };
636 }
637 
638 /* Pack a float from parts, but do not canonicalize.  */
639 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
640 {
641     const int f_size = fmt->frac_size;
642     const int e_size = fmt->exp_size;
643     uint64_t ret;
644 
645     ret = (uint64_t)p->sign << (f_size + e_size);
646     ret = deposit64(ret, f_size, e_size, p->exp);
647     ret = deposit64(ret, 0, f_size, p->frac);
648     return ret;
649 }
650 
651 static inline float16 float16_pack_raw(const FloatParts64 *p)
652 {
653     return make_float16(pack_raw64(p, &float16_params));
654 }
655 
656 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
657 {
658     return pack_raw64(p, &bfloat16_params);
659 }
660 
661 static inline float32 float32_pack_raw(const FloatParts64 *p)
662 {
663     return make_float32(pack_raw64(p, &float32_params));
664 }
665 
666 static inline float64 float64_pack_raw(const FloatParts64 *p)
667 {
668     return make_float64(pack_raw64(p, &float64_params));
669 }
670 
671 static float128 float128_pack_raw(const FloatParts128 *p)
672 {
673     const int f_size = float128_params.frac_size - 64;
674     const int e_size = float128_params.exp_size;
675     uint64_t hi;
676 
677     hi = (uint64_t)p->sign << (f_size + e_size);
678     hi = deposit64(hi, f_size, e_size, p->exp);
679     hi = deposit64(hi, 0, f_size, p->frac_hi);
680     return make_float128(hi, p->frac_lo);
681 }
682 
683 /*----------------------------------------------------------------------------
684 | Functions and definitions to determine:  (1) whether tininess for underflow
685 | is detected before or after rounding by default, (2) what (if anything)
686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
688 | are propagated from function inputs to output.  These details are target-
689 | specific.
690 *----------------------------------------------------------------------------*/
691 #include "softfloat-specialize.c.inc"
692 
693 #define PARTS_GENERIC_64_128(NAME, P) \
694     _Generic((P), FloatParts64 *: parts64_##NAME, \
695                   FloatParts128 *: parts128_##NAME)
696 
697 #define PARTS_GENERIC_64_128_256(NAME, P) \
698     _Generic((P), FloatParts64 *: parts64_##NAME, \
699                   FloatParts128 *: parts128_##NAME, \
700                   FloatParts256 *: parts256_##NAME)
701 
702 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
703 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
704 
705 static void parts64_return_nan(FloatParts64 *a, float_status *s);
706 static void parts128_return_nan(FloatParts128 *a, float_status *s);
707 
708 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
709 
710 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
711                                       float_status *s);
712 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
713                                         float_status *s);
714 
715 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
716 
717 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
718                                              FloatParts64 *c, float_status *s,
719                                              int ab_mask, int abc_mask);
720 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
721                                                FloatParts128 *b,
722                                                FloatParts128 *c,
723                                                float_status *s,
724                                                int ab_mask, int abc_mask);
725 
726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
727     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
728 
729 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
730                                  const FloatFmt *fmt);
731 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
732                                   const FloatFmt *fmt);
733 
734 #define parts_canonicalize(A, S, F) \
735     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
736 
737 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
738                                    const FloatFmt *fmt);
739 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
740                                     const FloatFmt *fmt);
741 
742 #define parts_uncanon_normal(A, S, F) \
743     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
744 
745 static void parts64_uncanon(FloatParts64 *p, float_status *status,
746                             const FloatFmt *fmt);
747 static void parts128_uncanon(FloatParts128 *p, float_status *status,
748                              const FloatFmt *fmt);
749 
750 #define parts_uncanon(A, S, F) \
751     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
752 
753 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
754 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
755 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
756 
757 #define parts_add_normal(A, B) \
758     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
759 
760 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
761 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
762 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
763 
764 #define parts_sub_normal(A, B) \
765     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
766 
767 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
768                                     float_status *s, bool subtract);
769 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
770                                       float_status *s, bool subtract);
771 
772 #define parts_addsub(A, B, S, Z) \
773     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
774 
775 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
776                                  float_status *s);
777 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
778                                    float_status *s);
779 
780 #define parts_mul(A, B, S) \
781     PARTS_GENERIC_64_128(mul, A)(A, B, S)
782 
783 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
784                                     FloatParts64 *c, int flags,
785                                     float_status *s);
786 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
787                                       FloatParts128 *c, int flags,
788                                       float_status *s);
789 
790 #define parts_muladd(A, B, C, Z, S) \
791     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
792 
793 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
794                                  float_status *s);
795 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
796                                    float_status *s);
797 
798 #define parts_div(A, B, S) \
799     PARTS_GENERIC_64_128(div, A)(A, B, S)
800 
801 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
802                                     uint64_t *mod_quot, float_status *s);
803 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
804                                       uint64_t *mod_quot, float_status *s);
805 
806 #define parts_modrem(A, B, Q, S) \
807     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
808 
809 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
810 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
811 
812 #define parts_sqrt(A, S, F) \
813     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
814 
815 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
816                                         int scale, int frac_size);
817 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
818                                          int scale, int frac_size);
819 
820 #define parts_round_to_int_normal(A, R, C, F) \
821     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
822 
823 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
824                                  int scale, float_status *s,
825                                  const FloatFmt *fmt);
826 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
827                                   int scale, float_status *s,
828                                   const FloatFmt *fmt);
829 
830 #define parts_round_to_int(A, R, C, S, F) \
831     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
832 
833 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
834                                      int scale, int64_t min, int64_t max,
835                                      float_status *s);
836 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
837                                      int scale, int64_t min, int64_t max,
838                                      float_status *s);
839 
840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
841     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
842 
843 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
844                                       int scale, uint64_t max,
845                                       float_status *s);
846 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
847                                        int scale, uint64_t max,
848                                        float_status *s);
849 
850 #define parts_float_to_uint(P, R, Z, M, S) \
851     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
852 
853 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
854                                   int scale, float_status *s);
855 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
856                                    int scale, float_status *s);
857 
858 #define parts_sint_to_float(P, I, Z, S) \
859     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
860 
861 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
862                                   int scale, float_status *s);
863 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
864                                    int scale, float_status *s);
865 
866 #define parts_uint_to_float(P, I, Z, S) \
867     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
868 
869 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
870                                     float_status *s, int flags);
871 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
872                                       float_status *s, int flags);
873 
874 #define parts_minmax(A, B, S, F) \
875     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
876 
877 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
878                            float_status *s, bool q);
879 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
880                             float_status *s, bool q);
881 
882 #define parts_compare(A, B, S, Q) \
883     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
884 
885 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
886 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
887 
888 #define parts_scalbn(A, N, S) \
889     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
890 
891 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
892 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
893 
894 #define parts_log2(A, S, F) \
895     PARTS_GENERIC_64_128(log2, A)(A, S, F)
896 
897 /*
898  * Helper functions for softfloat-parts.c.inc, per-size operations.
899  */
900 
901 #define FRAC_GENERIC_64_128(NAME, P) \
902     _Generic((P), FloatParts64 *: frac64_##NAME, \
903                   FloatParts128 *: frac128_##NAME)
904 
905 #define FRAC_GENERIC_64_128_256(NAME, P) \
906     _Generic((P), FloatParts64 *: frac64_##NAME, \
907                   FloatParts128 *: frac128_##NAME, \
908                   FloatParts256 *: frac256_##NAME)
909 
910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
911 {
912     return uadd64_overflow(a->frac, b->frac, &r->frac);
913 }
914 
915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
916 {
917     bool c = 0;
918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
920     return c;
921 }
922 
923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
924 {
925     bool c = 0;
926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
930     return c;
931 }
932 
933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
934 
935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
936 {
937     return uadd64_overflow(a->frac, c, &r->frac);
938 }
939 
940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
941 {
942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
944 }
945 
946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
947 
948 static void frac64_allones(FloatParts64 *a)
949 {
950     a->frac = -1;
951 }
952 
953 static void frac128_allones(FloatParts128 *a)
954 {
955     a->frac_hi = a->frac_lo = -1;
956 }
957 
958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
959 
960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
961 {
962     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
963 }
964 
965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
966 {
967     uint64_t ta = a->frac_hi, tb = b->frac_hi;
968     if (ta == tb) {
969         ta = a->frac_lo, tb = b->frac_lo;
970         if (ta == tb) {
971             return 0;
972         }
973     }
974     return ta < tb ? -1 : 1;
975 }
976 
977 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
978 
979 static void frac64_clear(FloatParts64 *a)
980 {
981     a->frac = 0;
982 }
983 
984 static void frac128_clear(FloatParts128 *a)
985 {
986     a->frac_hi = a->frac_lo = 0;
987 }
988 
989 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
990 
991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
992 {
993     uint64_t n1, n0, r, q;
994     bool ret;
995 
996     /*
997      * We want a 2*N / N-bit division to produce exactly an N-bit
998      * result, so that we do not lose any precision and so that we
999      * do not have to renormalize afterward.  If A.frac < B.frac,
1000      * then division would produce an (N-1)-bit result; shift A left
1001      * by one to produce the an N-bit result, and return true to
1002      * decrement the exponent to match.
1003      *
1004      * The udiv_qrnnd algorithm that we're using requires normalization,
1005      * i.e. the msb of the denominator must be set, which is already true.
1006      */
1007     ret = a->frac < b->frac;
1008     if (ret) {
1009         n0 = a->frac;
1010         n1 = 0;
1011     } else {
1012         n0 = a->frac >> 1;
1013         n1 = a->frac << 63;
1014     }
1015     q = udiv_qrnnd(&r, n0, n1, b->frac);
1016 
1017     /* Set lsb if there is a remainder, to set inexact. */
1018     a->frac = q | (r != 0);
1019 
1020     return ret;
1021 }
1022 
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025     uint64_t q0, q1, a0, a1, b0, b1;
1026     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027     bool ret = false;
1028 
1029     a0 = a->frac_hi, a1 = a->frac_lo;
1030     b0 = b->frac_hi, b1 = b->frac_lo;
1031 
1032     ret = lt128(a0, a1, b0, b1);
1033     if (!ret) {
1034         a1 = shr_double(a0, a1, 1);
1035         a0 = a0 >> 1;
1036     }
1037 
1038     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039     q0 = estimateDiv128To64(a0, a1, b0);
1040 
1041     /*
1042      * Estimate is high because B1 was not included (unless B1 == 0).
1043      * Reduce quotient and increase remainder until remainder is non-negative.
1044      * This loop will execute 0 to 2 times.
1045      */
1046     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048     while (r0 != 0) {
1049         q0--;
1050         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051     }
1052 
1053     /* Repeat using the remainder, producing a second word of quotient. */
1054     q1 = estimateDiv128To64(r1, r2, b0);
1055     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057     while (r1 != 0) {
1058         q1--;
1059         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060     }
1061 
1062     /* Any remainder indicates inexact; set sticky bit. */
1063     q1 |= (r2 | r3) != 0;
1064 
1065     a->frac_hi = q0;
1066     a->frac_lo = q1;
1067     return ret;
1068 }
1069 
1070 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071 
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074     return a->frac == 0;
1075 }
1076 
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079     return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081 
1082 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083 
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088 
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094 
1095 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096 
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099     a->frac = -a->frac;
1100 }
1101 
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104     bool c = 0;
1105     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108 
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111     bool c = 0;
1112     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117 
1118 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119 
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122     if (a->frac) {
1123         int shift = clz64(a->frac);
1124         a->frac <<= shift;
1125         return shift;
1126     }
1127     return 64;
1128 }
1129 
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132     if (a->frac_hi) {
1133         int shl = clz64(a->frac_hi);
1134         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135         a->frac_lo <<= shl;
1136         return shl;
1137     } else if (a->frac_lo) {
1138         int shl = clz64(a->frac_lo);
1139         a->frac_hi = a->frac_lo << shl;
1140         a->frac_lo = 0;
1141         return shl + 64;
1142     }
1143     return 128;
1144 }
1145 
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150     int ret, shl;
1151 
1152     if (likely(a0)) {
1153         shl = clz64(a0);
1154         if (shl == 0) {
1155             return 0;
1156         }
1157         ret = shl;
1158     } else {
1159         if (a1) {
1160             ret = 64;
1161             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162         } else if (a2) {
1163             ret = 128;
1164             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165         } else if (a3) {
1166             ret = 192;
1167             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168         } else {
1169             ret = 256;
1170             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171             goto done;
1172         }
1173         shl = clz64(a0);
1174         if (shl == 0) {
1175             goto done;
1176         }
1177         ret += shl;
1178     }
1179 
1180     a0 = shl_double(a0, a1, shl);
1181     a1 = shl_double(a1, a2, shl);
1182     a2 = shl_double(a2, a3, shl);
1183     a3 <<= shl;
1184 
1185  done:
1186     a->frac_hi = a0;
1187     a->frac_hm = a1;
1188     a->frac_lm = a2;
1189     a->frac_lo = a3;
1190     return ret;
1191 }
1192 
1193 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194 
1195 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196 {
1197     uint64_t a0, a1, b0, t0, t1, q, quot;
1198     int exp_diff = a->exp - b->exp;
1199     int shift;
1200 
1201     a0 = a->frac;
1202     a1 = 0;
1203 
1204     if (exp_diff < -1) {
1205         if (mod_quot) {
1206             *mod_quot = 0;
1207         }
1208         return;
1209     }
1210     if (exp_diff == -1) {
1211         a0 >>= 1;
1212         exp_diff = 0;
1213     }
1214 
1215     b0 = b->frac;
1216     quot = q = b0 <= a0;
1217     if (q) {
1218         a0 -= b0;
1219     }
1220 
1221     exp_diff -= 64;
1222     while (exp_diff > 0) {
1223         q = estimateDiv128To64(a0, a1, b0);
1224         q = q > 2 ? q - 2 : 0;
1225         mul64To128(b0, q, &t0, &t1);
1226         sub128(a0, a1, t0, t1, &a0, &a1);
1227         shortShift128Left(a0, a1, 62, &a0, &a1);
1228         exp_diff -= 62;
1229         quot = (quot << 62) + q;
1230     }
1231 
1232     exp_diff += 64;
1233     if (exp_diff > 0) {
1234         q = estimateDiv128To64(a0, a1, b0);
1235         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237         sub128(a0, a1, t0, t1, &a0, &a1);
1238         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239         while (le128(t0, t1, a0, a1)) {
1240             ++q;
1241             sub128(a0, a1, t0, t1, &a0, &a1);
1242         }
1243         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244     } else {
1245         t0 = b0;
1246         t1 = 0;
1247     }
1248 
1249     if (mod_quot) {
1250         *mod_quot = quot;
1251     } else {
1252         sub128(t0, t1, a0, a1, &t0, &t1);
1253         if (lt128(t0, t1, a0, a1) ||
1254             (eq128(t0, t1, a0, a1) && (q & 1))) {
1255             a0 = t0;
1256             a1 = t1;
1257             a->sign = !a->sign;
1258         }
1259     }
1260 
1261     if (likely(a0)) {
1262         shift = clz64(a0);
1263         shortShift128Left(a0, a1, shift, &a0, &a1);
1264     } else if (likely(a1)) {
1265         shift = clz64(a1);
1266         a0 = a1 << shift;
1267         a1 = 0;
1268         shift += 64;
1269     } else {
1270         a->cls = float_class_zero;
1271         return;
1272     }
1273 
1274     a->exp = b->exp + exp_diff - shift;
1275     a->frac = a0 | (a1 != 0);
1276 }
1277 
1278 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                            uint64_t *mod_quot)
1280 {
1281     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282     int exp_diff = a->exp - b->exp;
1283     int shift;
1284 
1285     a0 = a->frac_hi;
1286     a1 = a->frac_lo;
1287     a2 = 0;
1288 
1289     if (exp_diff < -1) {
1290         if (mod_quot) {
1291             *mod_quot = 0;
1292         }
1293         return;
1294     }
1295     if (exp_diff == -1) {
1296         shift128Right(a0, a1, 1, &a0, &a1);
1297         exp_diff = 0;
1298     }
1299 
1300     b0 = b->frac_hi;
1301     b1 = b->frac_lo;
1302 
1303     quot = q = le128(b0, b1, a0, a1);
1304     if (q) {
1305         sub128(a0, a1, b0, b1, &a0, &a1);
1306     }
1307 
1308     exp_diff -= 64;
1309     while (exp_diff > 0) {
1310         q = estimateDiv128To64(a0, a1, b0);
1311         q = q > 4 ? q - 4 : 0;
1312         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315         exp_diff -= 61;
1316         quot = (quot << 61) + q;
1317     }
1318 
1319     exp_diff += 64;
1320     if (exp_diff > 0) {
1321         q = estimateDiv128To64(a0, a1, b0);
1322         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326         while (le192(t0, t1, t2, a0, a1, a2)) {
1327             ++q;
1328             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         }
1330         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331     } else {
1332         t0 = b0;
1333         t1 = b1;
1334         t2 = 0;
1335     }
1336 
1337     if (mod_quot) {
1338         *mod_quot = quot;
1339     } else {
1340         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341         if (lt192(t0, t1, t2, a0, a1, a2) ||
1342             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343             a0 = t0;
1344             a1 = t1;
1345             a2 = t2;
1346             a->sign = !a->sign;
1347         }
1348     }
1349 
1350     if (likely(a0)) {
1351         shift = clz64(a0);
1352         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353     } else if (likely(a1)) {
1354         shift = clz64(a1);
1355         shortShift128Left(a1, a2, shift, &a0, &a1);
1356         a2 = 0;
1357         shift += 64;
1358     } else if (likely(a2)) {
1359         shift = clz64(a2);
1360         a0 = a2 << shift;
1361         a1 = a2 = 0;
1362         shift += 128;
1363     } else {
1364         a->cls = float_class_zero;
1365         return;
1366     }
1367 
1368     a->exp = b->exp + exp_diff - shift;
1369     a->frac_hi = a0;
1370     a->frac_lo = a1 | (a2 != 0);
1371 }
1372 
1373 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374 
1375 static void frac64_shl(FloatParts64 *a, int c)
1376 {
1377     a->frac <<= c;
1378 }
1379 
1380 static void frac128_shl(FloatParts128 *a, int c)
1381 {
1382     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383 
1384     if (c & 64) {
1385         a0 = a1, a1 = 0;
1386     }
1387 
1388     c &= 63;
1389     if (c) {
1390         a0 = shl_double(a0, a1, c);
1391         a1 = a1 << c;
1392     }
1393 
1394     a->frac_hi = a0;
1395     a->frac_lo = a1;
1396 }
1397 
1398 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399 
1400 static void frac64_shr(FloatParts64 *a, int c)
1401 {
1402     a->frac >>= c;
1403 }
1404 
1405 static void frac128_shr(FloatParts128 *a, int c)
1406 {
1407     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408 
1409     if (c & 64) {
1410         a1 = a0, a0 = 0;
1411     }
1412 
1413     c &= 63;
1414     if (c) {
1415         a1 = shr_double(a0, a1, c);
1416         a0 = a0 >> c;
1417     }
1418 
1419     a->frac_hi = a0;
1420     a->frac_lo = a1;
1421 }
1422 
1423 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424 
1425 static void frac64_shrjam(FloatParts64 *a, int c)
1426 {
1427     uint64_t a0 = a->frac;
1428 
1429     if (likely(c != 0)) {
1430         if (likely(c < 64)) {
1431             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432         } else {
1433             a0 = a0 != 0;
1434         }
1435         a->frac = a0;
1436     }
1437 }
1438 
1439 static void frac128_shrjam(FloatParts128 *a, int c)
1440 {
1441     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442     uint64_t sticky = 0;
1443 
1444     if (unlikely(c == 0)) {
1445         return;
1446     } else if (likely(c < 64)) {
1447         /* nothing */
1448     } else if (likely(c < 128)) {
1449         sticky = a1;
1450         a1 = a0;
1451         a0 = 0;
1452         c &= 63;
1453         if (c == 0) {
1454             goto done;
1455         }
1456     } else {
1457         sticky = a0 | a1;
1458         a0 = a1 = 0;
1459         goto done;
1460     }
1461 
1462     sticky |= shr_double(a1, 0, c);
1463     a1 = shr_double(a0, a1, c);
1464     a0 = a0 >> c;
1465 
1466  done:
1467     a->frac_lo = a1 | (sticky != 0);
1468     a->frac_hi = a0;
1469 }
1470 
1471 static void frac256_shrjam(FloatParts256 *a, int c)
1472 {
1473     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475     uint64_t sticky = 0;
1476 
1477     if (unlikely(c == 0)) {
1478         return;
1479     } else if (likely(c < 64)) {
1480         /* nothing */
1481     } else if (likely(c < 256)) {
1482         if (unlikely(c & 128)) {
1483             sticky |= a2 | a3;
1484             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485         }
1486         if (unlikely(c & 64)) {
1487             sticky |= a3;
1488             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489         }
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1 | a2 | a3;
1496         a0 = a1 = a2 = a3 = 0;
1497         goto done;
1498     }
1499 
1500     sticky |= shr_double(a3, 0, c);
1501     a3 = shr_double(a2, a3, c);
1502     a2 = shr_double(a1, a2, c);
1503     a1 = shr_double(a0, a1, c);
1504     a0 = a0 >> c;
1505 
1506  done:
1507     a->frac_lo = a3 | (sticky != 0);
1508     a->frac_lm = a2;
1509     a->frac_hm = a1;
1510     a->frac_hi = a0;
1511 }
1512 
1513 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514 
1515 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516 {
1517     return usub64_overflow(a->frac, b->frac, &r->frac);
1518 }
1519 
1520 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521 {
1522     bool c = 0;
1523     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525     return c;
1526 }
1527 
1528 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529 {
1530     bool c = 0;
1531     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535     return c;
1536 }
1537 
1538 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539 
1540 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541 {
1542     r->frac = a->frac_hi | (a->frac_lo != 0);
1543 }
1544 
1545 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546 {
1547     r->frac_hi = a->frac_hi;
1548     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549 }
1550 
1551 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552 
1553 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554 {
1555     r->frac_hi = a->frac;
1556     r->frac_lo = 0;
1557 }
1558 
1559 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560 {
1561     r->frac_hi = a->frac_hi;
1562     r->frac_hm = a->frac_lo;
1563     r->frac_lm = 0;
1564     r->frac_lo = 0;
1565 }
1566 
1567 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568 
1569 /*
1570  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572  * and thus MIT licenced.
1573  */
1574 static const uint16_t rsqrt_tab[128] = {
1575     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591 };
1592 
1593 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594 #define FloatPartsN    glue(FloatParts,N)
1595 #define FloatPartsW    glue(FloatParts,W)
1596 
1597 #define N 64
1598 #define W 128
1599 
1600 #include "softfloat-parts-addsub.c.inc"
1601 #include "softfloat-parts.c.inc"
1602 
1603 #undef  N
1604 #undef  W
1605 #define N 128
1606 #define W 256
1607 
1608 #include "softfloat-parts-addsub.c.inc"
1609 #include "softfloat-parts.c.inc"
1610 
1611 #undef  N
1612 #undef  W
1613 #define N            256
1614 
1615 #include "softfloat-parts-addsub.c.inc"
1616 
1617 #undef  N
1618 #undef  W
1619 #undef  partsN
1620 #undef  FloatPartsN
1621 #undef  FloatPartsW
1622 
1623 /*
1624  * Pack/unpack routines with a specific FloatFmt.
1625  */
1626 
1627 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                       float_status *s, const FloatFmt *params)
1629 {
1630     float16_unpack_raw(p, f);
1631     parts_canonicalize(p, s, params);
1632 }
1633 
1634 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                      float_status *s)
1636 {
1637     float16a_unpack_canonical(p, f, s, &float16_params);
1638 }
1639 
1640 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                       float_status *s)
1642 {
1643     bfloat16_unpack_raw(p, f);
1644     parts_canonicalize(p, s, &bfloat16_params);
1645 }
1646 
1647 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                              float_status *s,
1649                                              const FloatFmt *params)
1650 {
1651     parts_uncanon(p, s, params);
1652     return float16_pack_raw(p);
1653 }
1654 
1655 static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                             float_status *s)
1657 {
1658     return float16a_round_pack_canonical(p, s, &float16_params);
1659 }
1660 
1661 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                               float_status *s)
1663 {
1664     parts_uncanon(p, s, &bfloat16_params);
1665     return bfloat16_pack_raw(p);
1666 }
1667 
1668 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                      float_status *s)
1670 {
1671     float32_unpack_raw(p, f);
1672     parts_canonicalize(p, s, &float32_params);
1673 }
1674 
1675 static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                             float_status *s)
1677 {
1678     parts_uncanon(p, s, &float32_params);
1679     return float32_pack_raw(p);
1680 }
1681 
1682 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                      float_status *s)
1684 {
1685     float64_unpack_raw(p, f);
1686     parts_canonicalize(p, s, &float64_params);
1687 }
1688 
1689 static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                             float_status *s)
1691 {
1692     parts_uncanon(p, s, &float64_params);
1693     return float64_pack_raw(p);
1694 }
1695 
1696 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1697                                       float_status *s)
1698 {
1699     float128_unpack_raw(p, f);
1700     parts_canonicalize(p, s, &float128_params);
1701 }
1702 
1703 static float128 float128_round_pack_canonical(FloatParts128 *p,
1704                                               float_status *s)
1705 {
1706     parts_uncanon(p, s, &float128_params);
1707     return float128_pack_raw(p);
1708 }
1709 
1710 /* Returns false if the encoding is invalid. */
1711 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1712                                       float_status *s)
1713 {
1714     /* Ensure rounding precision is set before beginning. */
1715     switch (s->floatx80_rounding_precision) {
1716     case floatx80_precision_x:
1717     case floatx80_precision_d:
1718     case floatx80_precision_s:
1719         break;
1720     default:
1721         g_assert_not_reached();
1722     }
1723 
1724     if (unlikely(floatx80_invalid_encoding(f))) {
1725         float_raise(float_flag_invalid, s);
1726         return false;
1727     }
1728 
1729     floatx80_unpack_raw(p, f);
1730 
1731     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1732         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1733     } else {
1734         /* The explicit integer bit is ignored, after invalid checks. */
1735         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1736         p->cls = (p->frac_hi == 0 ? float_class_inf
1737                   : parts_is_snan_frac(p->frac_hi, s)
1738                   ? float_class_snan : float_class_qnan);
1739     }
1740     return true;
1741 }
1742 
1743 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1744                                               float_status *s)
1745 {
1746     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1747     uint64_t frac;
1748     int exp;
1749 
1750     switch (p->cls) {
1751     case float_class_normal:
1752         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1753             parts_uncanon_normal(p, s, fmt);
1754             frac = p->frac_hi;
1755             exp = p->exp;
1756         } else {
1757             FloatParts64 p64;
1758 
1759             p64.sign = p->sign;
1760             p64.exp = p->exp;
1761             frac_truncjam(&p64, p);
1762             parts_uncanon_normal(&p64, s, fmt);
1763             frac = p64.frac;
1764             exp = p64.exp;
1765         }
1766         if (exp != fmt->exp_max) {
1767             break;
1768         }
1769         /* rounded to inf -- fall through to set frac correctly */
1770 
1771     case float_class_inf:
1772         /* x86 and m68k differ in the setting of the integer bit. */
1773         frac = floatx80_infinity_low;
1774         exp = fmt->exp_max;
1775         break;
1776 
1777     case float_class_zero:
1778         frac = 0;
1779         exp = 0;
1780         break;
1781 
1782     case float_class_snan:
1783     case float_class_qnan:
1784         /* NaNs have the integer bit set. */
1785         frac = p->frac_hi | (1ull << 63);
1786         exp = fmt->exp_max;
1787         break;
1788 
1789     default:
1790         g_assert_not_reached();
1791     }
1792 
1793     return packFloatx80(p->sign, exp, frac);
1794 }
1795 
1796 /*
1797  * Addition and subtraction
1798  */
1799 
1800 static float16 QEMU_FLATTEN
1801 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1802 {
1803     FloatParts64 pa, pb, *pr;
1804 
1805     float16_unpack_canonical(&pa, a, status);
1806     float16_unpack_canonical(&pb, b, status);
1807     pr = parts_addsub(&pa, &pb, status, subtract);
1808 
1809     return float16_round_pack_canonical(pr, status);
1810 }
1811 
1812 float16 float16_add(float16 a, float16 b, float_status *status)
1813 {
1814     return float16_addsub(a, b, status, false);
1815 }
1816 
1817 float16 float16_sub(float16 a, float16 b, float_status *status)
1818 {
1819     return float16_addsub(a, b, status, true);
1820 }
1821 
1822 static float32 QEMU_SOFTFLOAT_ATTR
1823 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1824 {
1825     FloatParts64 pa, pb, *pr;
1826 
1827     float32_unpack_canonical(&pa, a, status);
1828     float32_unpack_canonical(&pb, b, status);
1829     pr = parts_addsub(&pa, &pb, status, subtract);
1830 
1831     return float32_round_pack_canonical(pr, status);
1832 }
1833 
1834 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1835 {
1836     return soft_f32_addsub(a, b, status, false);
1837 }
1838 
1839 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1840 {
1841     return soft_f32_addsub(a, b, status, true);
1842 }
1843 
1844 static float64 QEMU_SOFTFLOAT_ATTR
1845 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1846 {
1847     FloatParts64 pa, pb, *pr;
1848 
1849     float64_unpack_canonical(&pa, a, status);
1850     float64_unpack_canonical(&pb, b, status);
1851     pr = parts_addsub(&pa, &pb, status, subtract);
1852 
1853     return float64_round_pack_canonical(pr, status);
1854 }
1855 
1856 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1857 {
1858     return soft_f64_addsub(a, b, status, false);
1859 }
1860 
1861 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1862 {
1863     return soft_f64_addsub(a, b, status, true);
1864 }
1865 
1866 static float hard_f32_add(float a, float b)
1867 {
1868     return a + b;
1869 }
1870 
1871 static float hard_f32_sub(float a, float b)
1872 {
1873     return a - b;
1874 }
1875 
1876 static double hard_f64_add(double a, double b)
1877 {
1878     return a + b;
1879 }
1880 
1881 static double hard_f64_sub(double a, double b)
1882 {
1883     return a - b;
1884 }
1885 
1886 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1887 {
1888     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1889         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1890     }
1891     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1892 }
1893 
1894 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1895 {
1896     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1897         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1898     } else {
1899         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1900     }
1901 }
1902 
1903 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1904                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1905 {
1906     return float32_gen2(a, b, s, hard, soft,
1907                         f32_is_zon2, f32_addsubmul_post);
1908 }
1909 
1910 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1911                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1912 {
1913     return float64_gen2(a, b, s, hard, soft,
1914                         f64_is_zon2, f64_addsubmul_post);
1915 }
1916 
1917 float32 QEMU_FLATTEN
1918 float32_add(float32 a, float32 b, float_status *s)
1919 {
1920     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1921 }
1922 
1923 float32 QEMU_FLATTEN
1924 float32_sub(float32 a, float32 b, float_status *s)
1925 {
1926     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1927 }
1928 
1929 float64 QEMU_FLATTEN
1930 float64_add(float64 a, float64 b, float_status *s)
1931 {
1932     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1933 }
1934 
1935 float64 QEMU_FLATTEN
1936 float64_sub(float64 a, float64 b, float_status *s)
1937 {
1938     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1939 }
1940 
1941 static bfloat16 QEMU_FLATTEN
1942 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1943 {
1944     FloatParts64 pa, pb, *pr;
1945 
1946     bfloat16_unpack_canonical(&pa, a, status);
1947     bfloat16_unpack_canonical(&pb, b, status);
1948     pr = parts_addsub(&pa, &pb, status, subtract);
1949 
1950     return bfloat16_round_pack_canonical(pr, status);
1951 }
1952 
1953 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1954 {
1955     return bfloat16_addsub(a, b, status, false);
1956 }
1957 
1958 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1959 {
1960     return bfloat16_addsub(a, b, status, true);
1961 }
1962 
1963 static float128 QEMU_FLATTEN
1964 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1965 {
1966     FloatParts128 pa, pb, *pr;
1967 
1968     float128_unpack_canonical(&pa, a, status);
1969     float128_unpack_canonical(&pb, b, status);
1970     pr = parts_addsub(&pa, &pb, status, subtract);
1971 
1972     return float128_round_pack_canonical(pr, status);
1973 }
1974 
1975 float128 float128_add(float128 a, float128 b, float_status *status)
1976 {
1977     return float128_addsub(a, b, status, false);
1978 }
1979 
1980 float128 float128_sub(float128 a, float128 b, float_status *status)
1981 {
1982     return float128_addsub(a, b, status, true);
1983 }
1984 
1985 static floatx80 QEMU_FLATTEN
1986 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1987 {
1988     FloatParts128 pa, pb, *pr;
1989 
1990     if (!floatx80_unpack_canonical(&pa, a, status) ||
1991         !floatx80_unpack_canonical(&pb, b, status)) {
1992         return floatx80_default_nan(status);
1993     }
1994 
1995     pr = parts_addsub(&pa, &pb, status, subtract);
1996     return floatx80_round_pack_canonical(pr, status);
1997 }
1998 
1999 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2000 {
2001     return floatx80_addsub(a, b, status, false);
2002 }
2003 
2004 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2005 {
2006     return floatx80_addsub(a, b, status, true);
2007 }
2008 
2009 /*
2010  * Multiplication
2011  */
2012 
2013 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2014 {
2015     FloatParts64 pa, pb, *pr;
2016 
2017     float16_unpack_canonical(&pa, a, status);
2018     float16_unpack_canonical(&pb, b, status);
2019     pr = parts_mul(&pa, &pb, status);
2020 
2021     return float16_round_pack_canonical(pr, status);
2022 }
2023 
2024 static float32 QEMU_SOFTFLOAT_ATTR
2025 soft_f32_mul(float32 a, float32 b, float_status *status)
2026 {
2027     FloatParts64 pa, pb, *pr;
2028 
2029     float32_unpack_canonical(&pa, a, status);
2030     float32_unpack_canonical(&pb, b, status);
2031     pr = parts_mul(&pa, &pb, status);
2032 
2033     return float32_round_pack_canonical(pr, status);
2034 }
2035 
2036 static float64 QEMU_SOFTFLOAT_ATTR
2037 soft_f64_mul(float64 a, float64 b, float_status *status)
2038 {
2039     FloatParts64 pa, pb, *pr;
2040 
2041     float64_unpack_canonical(&pa, a, status);
2042     float64_unpack_canonical(&pb, b, status);
2043     pr = parts_mul(&pa, &pb, status);
2044 
2045     return float64_round_pack_canonical(pr, status);
2046 }
2047 
2048 static float hard_f32_mul(float a, float b)
2049 {
2050     return a * b;
2051 }
2052 
2053 static double hard_f64_mul(double a, double b)
2054 {
2055     return a * b;
2056 }
2057 
2058 float32 QEMU_FLATTEN
2059 float32_mul(float32 a, float32 b, float_status *s)
2060 {
2061     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2062                         f32_is_zon2, f32_addsubmul_post);
2063 }
2064 
2065 float64 QEMU_FLATTEN
2066 float64_mul(float64 a, float64 b, float_status *s)
2067 {
2068     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2069                         f64_is_zon2, f64_addsubmul_post);
2070 }
2071 
2072 bfloat16 QEMU_FLATTEN
2073 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2074 {
2075     FloatParts64 pa, pb, *pr;
2076 
2077     bfloat16_unpack_canonical(&pa, a, status);
2078     bfloat16_unpack_canonical(&pb, b, status);
2079     pr = parts_mul(&pa, &pb, status);
2080 
2081     return bfloat16_round_pack_canonical(pr, status);
2082 }
2083 
2084 float128 QEMU_FLATTEN
2085 float128_mul(float128 a, float128 b, float_status *status)
2086 {
2087     FloatParts128 pa, pb, *pr;
2088 
2089     float128_unpack_canonical(&pa, a, status);
2090     float128_unpack_canonical(&pb, b, status);
2091     pr = parts_mul(&pa, &pb, status);
2092 
2093     return float128_round_pack_canonical(pr, status);
2094 }
2095 
2096 floatx80 QEMU_FLATTEN
2097 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2098 {
2099     FloatParts128 pa, pb, *pr;
2100 
2101     if (!floatx80_unpack_canonical(&pa, a, status) ||
2102         !floatx80_unpack_canonical(&pb, b, status)) {
2103         return floatx80_default_nan(status);
2104     }
2105 
2106     pr = parts_mul(&pa, &pb, status);
2107     return floatx80_round_pack_canonical(pr, status);
2108 }
2109 
2110 /*
2111  * Fused multiply-add
2112  */
2113 
2114 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2115                                     int flags, float_status *status)
2116 {
2117     FloatParts64 pa, pb, pc, *pr;
2118 
2119     float16_unpack_canonical(&pa, a, status);
2120     float16_unpack_canonical(&pb, b, status);
2121     float16_unpack_canonical(&pc, c, status);
2122     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2123 
2124     return float16_round_pack_canonical(pr, status);
2125 }
2126 
2127 static float32 QEMU_SOFTFLOAT_ATTR
2128 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2129                 float_status *status)
2130 {
2131     FloatParts64 pa, pb, pc, *pr;
2132 
2133     float32_unpack_canonical(&pa, a, status);
2134     float32_unpack_canonical(&pb, b, status);
2135     float32_unpack_canonical(&pc, c, status);
2136     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2137 
2138     return float32_round_pack_canonical(pr, status);
2139 }
2140 
2141 static float64 QEMU_SOFTFLOAT_ATTR
2142 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2143                 float_status *status)
2144 {
2145     FloatParts64 pa, pb, pc, *pr;
2146 
2147     float64_unpack_canonical(&pa, a, status);
2148     float64_unpack_canonical(&pb, b, status);
2149     float64_unpack_canonical(&pc, c, status);
2150     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2151 
2152     return float64_round_pack_canonical(pr, status);
2153 }
2154 
2155 static bool force_soft_fma;
2156 
2157 float32 QEMU_FLATTEN
2158 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2159 {
2160     union_float32 ua, ub, uc, ur;
2161 
2162     ua.s = xa;
2163     ub.s = xb;
2164     uc.s = xc;
2165 
2166     if (unlikely(!can_use_fpu(s))) {
2167         goto soft;
2168     }
2169     if (unlikely(flags & float_muladd_halve_result)) {
2170         goto soft;
2171     }
2172 
2173     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2174     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2175         goto soft;
2176     }
2177 
2178     if (unlikely(force_soft_fma)) {
2179         goto soft;
2180     }
2181 
2182     /*
2183      * When (a || b) == 0, there's no need to check for under/over flow,
2184      * since we know the addend is (normal || 0) and the product is 0.
2185      */
2186     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2187         union_float32 up;
2188         bool prod_sign;
2189 
2190         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2191         prod_sign ^= !!(flags & float_muladd_negate_product);
2192         up.s = float32_set_sign(float32_zero, prod_sign);
2193 
2194         if (flags & float_muladd_negate_c) {
2195             uc.h = -uc.h;
2196         }
2197         ur.h = up.h + uc.h;
2198     } else {
2199         union_float32 ua_orig = ua;
2200         union_float32 uc_orig = uc;
2201 
2202         if (flags & float_muladd_negate_product) {
2203             ua.h = -ua.h;
2204         }
2205         if (flags & float_muladd_negate_c) {
2206             uc.h = -uc.h;
2207         }
2208 
2209         ur.h = fmaf(ua.h, ub.h, uc.h);
2210 
2211         if (unlikely(f32_is_inf(ur))) {
2212             float_raise(float_flag_overflow, s);
2213         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2214             ua = ua_orig;
2215             uc = uc_orig;
2216             goto soft;
2217         }
2218     }
2219     if (flags & float_muladd_negate_result) {
2220         return float32_chs(ur.s);
2221     }
2222     return ur.s;
2223 
2224  soft:
2225     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2226 }
2227 
2228 float64 QEMU_FLATTEN
2229 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2230 {
2231     union_float64 ua, ub, uc, ur;
2232 
2233     ua.s = xa;
2234     ub.s = xb;
2235     uc.s = xc;
2236 
2237     if (unlikely(!can_use_fpu(s))) {
2238         goto soft;
2239     }
2240     if (unlikely(flags & float_muladd_halve_result)) {
2241         goto soft;
2242     }
2243 
2244     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2245     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2246         goto soft;
2247     }
2248 
2249     if (unlikely(force_soft_fma)) {
2250         goto soft;
2251     }
2252 
2253     /*
2254      * When (a || b) == 0, there's no need to check for under/over flow,
2255      * since we know the addend is (normal || 0) and the product is 0.
2256      */
2257     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2258         union_float64 up;
2259         bool prod_sign;
2260 
2261         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2262         prod_sign ^= !!(flags & float_muladd_negate_product);
2263         up.s = float64_set_sign(float64_zero, prod_sign);
2264 
2265         if (flags & float_muladd_negate_c) {
2266             uc.h = -uc.h;
2267         }
2268         ur.h = up.h + uc.h;
2269     } else {
2270         union_float64 ua_orig = ua;
2271         union_float64 uc_orig = uc;
2272 
2273         if (flags & float_muladd_negate_product) {
2274             ua.h = -ua.h;
2275         }
2276         if (flags & float_muladd_negate_c) {
2277             uc.h = -uc.h;
2278         }
2279 
2280         ur.h = fma(ua.h, ub.h, uc.h);
2281 
2282         if (unlikely(f64_is_inf(ur))) {
2283             float_raise(float_flag_overflow, s);
2284         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2285             ua = ua_orig;
2286             uc = uc_orig;
2287             goto soft;
2288         }
2289     }
2290     if (flags & float_muladd_negate_result) {
2291         return float64_chs(ur.s);
2292     }
2293     return ur.s;
2294 
2295  soft:
2296     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2297 }
2298 
2299 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2300                                       int flags, float_status *status)
2301 {
2302     FloatParts64 pa, pb, pc, *pr;
2303 
2304     bfloat16_unpack_canonical(&pa, a, status);
2305     bfloat16_unpack_canonical(&pb, b, status);
2306     bfloat16_unpack_canonical(&pc, c, status);
2307     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2308 
2309     return bfloat16_round_pack_canonical(pr, status);
2310 }
2311 
2312 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2313                                       int flags, float_status *status)
2314 {
2315     FloatParts128 pa, pb, pc, *pr;
2316 
2317     float128_unpack_canonical(&pa, a, status);
2318     float128_unpack_canonical(&pb, b, status);
2319     float128_unpack_canonical(&pc, c, status);
2320     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2321 
2322     return float128_round_pack_canonical(pr, status);
2323 }
2324 
2325 /*
2326  * Division
2327  */
2328 
2329 float16 float16_div(float16 a, float16 b, float_status *status)
2330 {
2331     FloatParts64 pa, pb, *pr;
2332 
2333     float16_unpack_canonical(&pa, a, status);
2334     float16_unpack_canonical(&pb, b, status);
2335     pr = parts_div(&pa, &pb, status);
2336 
2337     return float16_round_pack_canonical(pr, status);
2338 }
2339 
2340 static float32 QEMU_SOFTFLOAT_ATTR
2341 soft_f32_div(float32 a, float32 b, float_status *status)
2342 {
2343     FloatParts64 pa, pb, *pr;
2344 
2345     float32_unpack_canonical(&pa, a, status);
2346     float32_unpack_canonical(&pb, b, status);
2347     pr = parts_div(&pa, &pb, status);
2348 
2349     return float32_round_pack_canonical(pr, status);
2350 }
2351 
2352 static float64 QEMU_SOFTFLOAT_ATTR
2353 soft_f64_div(float64 a, float64 b, float_status *status)
2354 {
2355     FloatParts64 pa, pb, *pr;
2356 
2357     float64_unpack_canonical(&pa, a, status);
2358     float64_unpack_canonical(&pb, b, status);
2359     pr = parts_div(&pa, &pb, status);
2360 
2361     return float64_round_pack_canonical(pr, status);
2362 }
2363 
2364 static float hard_f32_div(float a, float b)
2365 {
2366     return a / b;
2367 }
2368 
2369 static double hard_f64_div(double a, double b)
2370 {
2371     return a / b;
2372 }
2373 
2374 static bool f32_div_pre(union_float32 a, union_float32 b)
2375 {
2376     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2377         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378                fpclassify(b.h) == FP_NORMAL;
2379     }
2380     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2381 }
2382 
2383 static bool f64_div_pre(union_float64 a, union_float64 b)
2384 {
2385     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2386         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2387                fpclassify(b.h) == FP_NORMAL;
2388     }
2389     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2390 }
2391 
2392 static bool f32_div_post(union_float32 a, union_float32 b)
2393 {
2394     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2395         return fpclassify(a.h) != FP_ZERO;
2396     }
2397     return !float32_is_zero(a.s);
2398 }
2399 
2400 static bool f64_div_post(union_float64 a, union_float64 b)
2401 {
2402     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2403         return fpclassify(a.h) != FP_ZERO;
2404     }
2405     return !float64_is_zero(a.s);
2406 }
2407 
2408 float32 QEMU_FLATTEN
2409 float32_div(float32 a, float32 b, float_status *s)
2410 {
2411     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2412                         f32_div_pre, f32_div_post);
2413 }
2414 
2415 float64 QEMU_FLATTEN
2416 float64_div(float64 a, float64 b, float_status *s)
2417 {
2418     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2419                         f64_div_pre, f64_div_post);
2420 }
2421 
2422 bfloat16 QEMU_FLATTEN
2423 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2424 {
2425     FloatParts64 pa, pb, *pr;
2426 
2427     bfloat16_unpack_canonical(&pa, a, status);
2428     bfloat16_unpack_canonical(&pb, b, status);
2429     pr = parts_div(&pa, &pb, status);
2430 
2431     return bfloat16_round_pack_canonical(pr, status);
2432 }
2433 
2434 float128 QEMU_FLATTEN
2435 float128_div(float128 a, float128 b, float_status *status)
2436 {
2437     FloatParts128 pa, pb, *pr;
2438 
2439     float128_unpack_canonical(&pa, a, status);
2440     float128_unpack_canonical(&pb, b, status);
2441     pr = parts_div(&pa, &pb, status);
2442 
2443     return float128_round_pack_canonical(pr, status);
2444 }
2445 
2446 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2447 {
2448     FloatParts128 pa, pb, *pr;
2449 
2450     if (!floatx80_unpack_canonical(&pa, a, status) ||
2451         !floatx80_unpack_canonical(&pb, b, status)) {
2452         return floatx80_default_nan(status);
2453     }
2454 
2455     pr = parts_div(&pa, &pb, status);
2456     return floatx80_round_pack_canonical(pr, status);
2457 }
2458 
2459 /*
2460  * Remainder
2461  */
2462 
2463 float32 float32_rem(float32 a, float32 b, float_status *status)
2464 {
2465     FloatParts64 pa, pb, *pr;
2466 
2467     float32_unpack_canonical(&pa, a, status);
2468     float32_unpack_canonical(&pb, b, status);
2469     pr = parts_modrem(&pa, &pb, NULL, status);
2470 
2471     return float32_round_pack_canonical(pr, status);
2472 }
2473 
2474 float64 float64_rem(float64 a, float64 b, float_status *status)
2475 {
2476     FloatParts64 pa, pb, *pr;
2477 
2478     float64_unpack_canonical(&pa, a, status);
2479     float64_unpack_canonical(&pb, b, status);
2480     pr = parts_modrem(&pa, &pb, NULL, status);
2481 
2482     return float64_round_pack_canonical(pr, status);
2483 }
2484 
2485 float128 float128_rem(float128 a, float128 b, float_status *status)
2486 {
2487     FloatParts128 pa, pb, *pr;
2488 
2489     float128_unpack_canonical(&pa, a, status);
2490     float128_unpack_canonical(&pb, b, status);
2491     pr = parts_modrem(&pa, &pb, NULL, status);
2492 
2493     return float128_round_pack_canonical(pr, status);
2494 }
2495 
2496 /*
2497  * Returns the remainder of the extended double-precision floating-point value
2498  * `a' with respect to the corresponding value `b'.
2499  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2500  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2501  * the remainder based on truncating the quotient toward zero instead and
2502  * *quotient is set to the low 64 bits of the absolute value of the integer
2503  * quotient.
2504  */
2505 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2506                          uint64_t *quotient, float_status *status)
2507 {
2508     FloatParts128 pa, pb, *pr;
2509 
2510     *quotient = 0;
2511     if (!floatx80_unpack_canonical(&pa, a, status) ||
2512         !floatx80_unpack_canonical(&pb, b, status)) {
2513         return floatx80_default_nan(status);
2514     }
2515     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2516 
2517     return floatx80_round_pack_canonical(pr, status);
2518 }
2519 
2520 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2521 {
2522     uint64_t quotient;
2523     return floatx80_modrem(a, b, false, &quotient, status);
2524 }
2525 
2526 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2527 {
2528     uint64_t quotient;
2529     return floatx80_modrem(a, b, true, &quotient, status);
2530 }
2531 
2532 /*
2533  * Float to Float conversions
2534  *
2535  * Returns the result of converting one float format to another. The
2536  * conversion is performed according to the IEC/IEEE Standard for
2537  * Binary Floating-Point Arithmetic.
2538  *
2539  * Usually this only needs to take care of raising invalid exceptions
2540  * and handling the conversion on NaNs.
2541  */
2542 
2543 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2544 {
2545     switch (a->cls) {
2546     case float_class_snan:
2547         float_raise(float_flag_invalid_snan, s);
2548         /* fall through */
2549     case float_class_qnan:
2550         /*
2551          * There is no NaN in the destination format.  Raise Invalid
2552          * and return a zero with the sign of the input NaN.
2553          */
2554         float_raise(float_flag_invalid, s);
2555         a->cls = float_class_zero;
2556         break;
2557 
2558     case float_class_inf:
2559         /*
2560          * There is no Inf in the destination format.  Raise Invalid
2561          * and return the maximum normal with the correct sign.
2562          */
2563         float_raise(float_flag_invalid, s);
2564         a->cls = float_class_normal;
2565         a->exp = float16_params_ahp.exp_max;
2566         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2567                                   float16_params_ahp.frac_size + 1);
2568         break;
2569 
2570     case float_class_normal:
2571     case float_class_zero:
2572         break;
2573 
2574     default:
2575         g_assert_not_reached();
2576     }
2577 }
2578 
2579 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2580 {
2581     if (is_nan(a->cls)) {
2582         parts_return_nan(a, s);
2583     }
2584 }
2585 
2586 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2587 {
2588     if (is_nan(a->cls)) {
2589         parts_return_nan(a, s);
2590     }
2591 }
2592 
2593 #define parts_float_to_float(P, S) \
2594     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2595 
2596 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2597                                         float_status *s)
2598 {
2599     a->cls = b->cls;
2600     a->sign = b->sign;
2601     a->exp = b->exp;
2602 
2603     if (a->cls == float_class_normal) {
2604         frac_truncjam(a, b);
2605     } else if (is_nan(a->cls)) {
2606         /* Discard the low bits of the NaN. */
2607         a->frac = b->frac_hi;
2608         parts_return_nan(a, s);
2609     }
2610 }
2611 
2612 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2613                                        float_status *s)
2614 {
2615     a->cls = b->cls;
2616     a->sign = b->sign;
2617     a->exp = b->exp;
2618     frac_widen(a, b);
2619 
2620     if (is_nan(a->cls)) {
2621         parts_return_nan(a, s);
2622     }
2623 }
2624 
2625 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2626 {
2627     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2628     FloatParts64 p;
2629 
2630     float16a_unpack_canonical(&p, a, s, fmt16);
2631     parts_float_to_float(&p, s);
2632     return float32_round_pack_canonical(&p, s);
2633 }
2634 
2635 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2636 {
2637     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2638     FloatParts64 p;
2639 
2640     float16a_unpack_canonical(&p, a, s, fmt16);
2641     parts_float_to_float(&p, s);
2642     return float64_round_pack_canonical(&p, s);
2643 }
2644 
2645 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2646 {
2647     FloatParts64 p;
2648     const FloatFmt *fmt;
2649 
2650     float32_unpack_canonical(&p, a, s);
2651     if (ieee) {
2652         parts_float_to_float(&p, s);
2653         fmt = &float16_params;
2654     } else {
2655         parts_float_to_ahp(&p, s);
2656         fmt = &float16_params_ahp;
2657     }
2658     return float16a_round_pack_canonical(&p, s, fmt);
2659 }
2660 
2661 static float64 QEMU_SOFTFLOAT_ATTR
2662 soft_float32_to_float64(float32 a, float_status *s)
2663 {
2664     FloatParts64 p;
2665 
2666     float32_unpack_canonical(&p, a, s);
2667     parts_float_to_float(&p, s);
2668     return float64_round_pack_canonical(&p, s);
2669 }
2670 
2671 float64 float32_to_float64(float32 a, float_status *s)
2672 {
2673     if (likely(float32_is_normal(a))) {
2674         /* Widening conversion can never produce inexact results.  */
2675         union_float32 uf;
2676         union_float64 ud;
2677         uf.s = a;
2678         ud.h = uf.h;
2679         return ud.s;
2680     } else if (float32_is_zero(a)) {
2681         return float64_set_sign(float64_zero, float32_is_neg(a));
2682     } else {
2683         return soft_float32_to_float64(a, s);
2684     }
2685 }
2686 
2687 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2688 {
2689     FloatParts64 p;
2690     const FloatFmt *fmt;
2691 
2692     float64_unpack_canonical(&p, a, s);
2693     if (ieee) {
2694         parts_float_to_float(&p, s);
2695         fmt = &float16_params;
2696     } else {
2697         parts_float_to_ahp(&p, s);
2698         fmt = &float16_params_ahp;
2699     }
2700     return float16a_round_pack_canonical(&p, s, fmt);
2701 }
2702 
2703 float32 float64_to_float32(float64 a, float_status *s)
2704 {
2705     FloatParts64 p;
2706 
2707     float64_unpack_canonical(&p, a, s);
2708     parts_float_to_float(&p, s);
2709     return float32_round_pack_canonical(&p, s);
2710 }
2711 
2712 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2713 {
2714     FloatParts64 p;
2715 
2716     bfloat16_unpack_canonical(&p, a, s);
2717     parts_float_to_float(&p, s);
2718     return float32_round_pack_canonical(&p, s);
2719 }
2720 
2721 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2722 {
2723     FloatParts64 p;
2724 
2725     bfloat16_unpack_canonical(&p, a, s);
2726     parts_float_to_float(&p, s);
2727     return float64_round_pack_canonical(&p, s);
2728 }
2729 
2730 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2731 {
2732     FloatParts64 p;
2733 
2734     float32_unpack_canonical(&p, a, s);
2735     parts_float_to_float(&p, s);
2736     return bfloat16_round_pack_canonical(&p, s);
2737 }
2738 
2739 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2740 {
2741     FloatParts64 p;
2742 
2743     float64_unpack_canonical(&p, a, s);
2744     parts_float_to_float(&p, s);
2745     return bfloat16_round_pack_canonical(&p, s);
2746 }
2747 
2748 float32 float128_to_float32(float128 a, float_status *s)
2749 {
2750     FloatParts64 p64;
2751     FloatParts128 p128;
2752 
2753     float128_unpack_canonical(&p128, a, s);
2754     parts_float_to_float_narrow(&p64, &p128, s);
2755     return float32_round_pack_canonical(&p64, s);
2756 }
2757 
2758 float64 float128_to_float64(float128 a, float_status *s)
2759 {
2760     FloatParts64 p64;
2761     FloatParts128 p128;
2762 
2763     float128_unpack_canonical(&p128, a, s);
2764     parts_float_to_float_narrow(&p64, &p128, s);
2765     return float64_round_pack_canonical(&p64, s);
2766 }
2767 
2768 float128 float32_to_float128(float32 a, float_status *s)
2769 {
2770     FloatParts64 p64;
2771     FloatParts128 p128;
2772 
2773     float32_unpack_canonical(&p64, a, s);
2774     parts_float_to_float_widen(&p128, &p64, s);
2775     return float128_round_pack_canonical(&p128, s);
2776 }
2777 
2778 float128 float64_to_float128(float64 a, float_status *s)
2779 {
2780     FloatParts64 p64;
2781     FloatParts128 p128;
2782 
2783     float64_unpack_canonical(&p64, a, s);
2784     parts_float_to_float_widen(&p128, &p64, s);
2785     return float128_round_pack_canonical(&p128, s);
2786 }
2787 
2788 float32 floatx80_to_float32(floatx80 a, float_status *s)
2789 {
2790     FloatParts64 p64;
2791     FloatParts128 p128;
2792 
2793     if (floatx80_unpack_canonical(&p128, a, s)) {
2794         parts_float_to_float_narrow(&p64, &p128, s);
2795     } else {
2796         parts_default_nan(&p64, s);
2797     }
2798     return float32_round_pack_canonical(&p64, s);
2799 }
2800 
2801 float64 floatx80_to_float64(floatx80 a, float_status *s)
2802 {
2803     FloatParts64 p64;
2804     FloatParts128 p128;
2805 
2806     if (floatx80_unpack_canonical(&p128, a, s)) {
2807         parts_float_to_float_narrow(&p64, &p128, s);
2808     } else {
2809         parts_default_nan(&p64, s);
2810     }
2811     return float64_round_pack_canonical(&p64, s);
2812 }
2813 
2814 float128 floatx80_to_float128(floatx80 a, float_status *s)
2815 {
2816     FloatParts128 p;
2817 
2818     if (floatx80_unpack_canonical(&p, a, s)) {
2819         parts_float_to_float(&p, s);
2820     } else {
2821         parts_default_nan(&p, s);
2822     }
2823     return float128_round_pack_canonical(&p, s);
2824 }
2825 
2826 floatx80 float32_to_floatx80(float32 a, float_status *s)
2827 {
2828     FloatParts64 p64;
2829     FloatParts128 p128;
2830 
2831     float32_unpack_canonical(&p64, a, s);
2832     parts_float_to_float_widen(&p128, &p64, s);
2833     return floatx80_round_pack_canonical(&p128, s);
2834 }
2835 
2836 floatx80 float64_to_floatx80(float64 a, float_status *s)
2837 {
2838     FloatParts64 p64;
2839     FloatParts128 p128;
2840 
2841     float64_unpack_canonical(&p64, a, s);
2842     parts_float_to_float_widen(&p128, &p64, s);
2843     return floatx80_round_pack_canonical(&p128, s);
2844 }
2845 
2846 floatx80 float128_to_floatx80(float128 a, float_status *s)
2847 {
2848     FloatParts128 p;
2849 
2850     float128_unpack_canonical(&p, a, s);
2851     parts_float_to_float(&p, s);
2852     return floatx80_round_pack_canonical(&p, s);
2853 }
2854 
2855 /*
2856  * Round to integral value
2857  */
2858 
2859 float16 float16_round_to_int(float16 a, float_status *s)
2860 {
2861     FloatParts64 p;
2862 
2863     float16_unpack_canonical(&p, a, s);
2864     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2865     return float16_round_pack_canonical(&p, s);
2866 }
2867 
2868 float32 float32_round_to_int(float32 a, float_status *s)
2869 {
2870     FloatParts64 p;
2871 
2872     float32_unpack_canonical(&p, a, s);
2873     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2874     return float32_round_pack_canonical(&p, s);
2875 }
2876 
2877 float64 float64_round_to_int(float64 a, float_status *s)
2878 {
2879     FloatParts64 p;
2880 
2881     float64_unpack_canonical(&p, a, s);
2882     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2883     return float64_round_pack_canonical(&p, s);
2884 }
2885 
2886 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2887 {
2888     FloatParts64 p;
2889 
2890     bfloat16_unpack_canonical(&p, a, s);
2891     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2892     return bfloat16_round_pack_canonical(&p, s);
2893 }
2894 
2895 float128 float128_round_to_int(float128 a, float_status *s)
2896 {
2897     FloatParts128 p;
2898 
2899     float128_unpack_canonical(&p, a, s);
2900     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2901     return float128_round_pack_canonical(&p, s);
2902 }
2903 
2904 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2905 {
2906     FloatParts128 p;
2907 
2908     if (!floatx80_unpack_canonical(&p, a, status)) {
2909         return floatx80_default_nan(status);
2910     }
2911 
2912     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2913                        &floatx80_params[status->floatx80_rounding_precision]);
2914     return floatx80_round_pack_canonical(&p, status);
2915 }
2916 
2917 /*
2918  * Floating-point to signed integer conversions
2919  */
2920 
2921 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2922                               float_status *s)
2923 {
2924     FloatParts64 p;
2925 
2926     float16_unpack_canonical(&p, a, s);
2927     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2928 }
2929 
2930 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2931                                 float_status *s)
2932 {
2933     FloatParts64 p;
2934 
2935     float16_unpack_canonical(&p, a, s);
2936     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2937 }
2938 
2939 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2940                                 float_status *s)
2941 {
2942     FloatParts64 p;
2943 
2944     float16_unpack_canonical(&p, a, s);
2945     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2946 }
2947 
2948 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2949                                 float_status *s)
2950 {
2951     FloatParts64 p;
2952 
2953     float16_unpack_canonical(&p, a, s);
2954     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2955 }
2956 
2957 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2958                                 float_status *s)
2959 {
2960     FloatParts64 p;
2961 
2962     float32_unpack_canonical(&p, a, s);
2963     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2964 }
2965 
2966 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2967                                 float_status *s)
2968 {
2969     FloatParts64 p;
2970 
2971     float32_unpack_canonical(&p, a, s);
2972     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2973 }
2974 
2975 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2976                                 float_status *s)
2977 {
2978     FloatParts64 p;
2979 
2980     float32_unpack_canonical(&p, a, s);
2981     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2982 }
2983 
2984 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2985                                 float_status *s)
2986 {
2987     FloatParts64 p;
2988 
2989     float64_unpack_canonical(&p, a, s);
2990     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2991 }
2992 
2993 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2994                                 float_status *s)
2995 {
2996     FloatParts64 p;
2997 
2998     float64_unpack_canonical(&p, a, s);
2999     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3000 }
3001 
3002 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3003                                 float_status *s)
3004 {
3005     FloatParts64 p;
3006 
3007     float64_unpack_canonical(&p, a, s);
3008     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3009 }
3010 
3011 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3012                                  float_status *s)
3013 {
3014     FloatParts64 p;
3015 
3016     bfloat16_unpack_canonical(&p, a, s);
3017     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3018 }
3019 
3020 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3021                                  float_status *s)
3022 {
3023     FloatParts64 p;
3024 
3025     bfloat16_unpack_canonical(&p, a, s);
3026     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3027 }
3028 
3029 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3030                                  float_status *s)
3031 {
3032     FloatParts64 p;
3033 
3034     bfloat16_unpack_canonical(&p, a, s);
3035     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3036 }
3037 
3038 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3039                                         int scale, float_status *s)
3040 {
3041     FloatParts128 p;
3042 
3043     float128_unpack_canonical(&p, a, s);
3044     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3045 }
3046 
3047 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3048                                         int scale, float_status *s)
3049 {
3050     FloatParts128 p;
3051 
3052     float128_unpack_canonical(&p, a, s);
3053     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3054 }
3055 
3056 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3057                                         int scale, float_status *s)
3058 {
3059     FloatParts128 p;
3060 
3061     if (!floatx80_unpack_canonical(&p, a, s)) {
3062         parts_default_nan(&p, s);
3063     }
3064     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3065 }
3066 
3067 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3068                                         int scale, float_status *s)
3069 {
3070     FloatParts128 p;
3071 
3072     if (!floatx80_unpack_canonical(&p, a, s)) {
3073         parts_default_nan(&p, s);
3074     }
3075     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3076 }
3077 
3078 int8_t float16_to_int8(float16 a, float_status *s)
3079 {
3080     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3081 }
3082 
3083 int16_t float16_to_int16(float16 a, float_status *s)
3084 {
3085     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3086 }
3087 
3088 int32_t float16_to_int32(float16 a, float_status *s)
3089 {
3090     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3091 }
3092 
3093 int64_t float16_to_int64(float16 a, float_status *s)
3094 {
3095     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3096 }
3097 
3098 int16_t float32_to_int16(float32 a, float_status *s)
3099 {
3100     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3101 }
3102 
3103 int32_t float32_to_int32(float32 a, float_status *s)
3104 {
3105     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3106 }
3107 
3108 int64_t float32_to_int64(float32 a, float_status *s)
3109 {
3110     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3111 }
3112 
3113 int16_t float64_to_int16(float64 a, float_status *s)
3114 {
3115     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3116 }
3117 
3118 int32_t float64_to_int32(float64 a, float_status *s)
3119 {
3120     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3121 }
3122 
3123 int64_t float64_to_int64(float64 a, float_status *s)
3124 {
3125     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3126 }
3127 
3128 int32_t float128_to_int32(float128 a, float_status *s)
3129 {
3130     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3131 }
3132 
3133 int64_t float128_to_int64(float128 a, float_status *s)
3134 {
3135     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3136 }
3137 
3138 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3139 {
3140     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3141 }
3142 
3143 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3144 {
3145     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3146 }
3147 
3148 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3149 {
3150     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3151 }
3152 
3153 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3154 {
3155     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3156 }
3157 
3158 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3159 {
3160     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3161 }
3162 
3163 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3164 {
3165     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3166 }
3167 
3168 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3169 {
3170     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3171 }
3172 
3173 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3174 {
3175     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3176 }
3177 
3178 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3179 {
3180     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3181 }
3182 
3183 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3184 {
3185     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3186 }
3187 
3188 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3189 {
3190     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3191 }
3192 
3193 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3194 {
3195     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3196 }
3197 
3198 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3199 {
3200     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3201 }
3202 
3203 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3204 {
3205     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3206 }
3207 
3208 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3209 {
3210     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3211 }
3212 
3213 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3214 {
3215     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3216 }
3217 
3218 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3219 {
3220     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3221 }
3222 
3223 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3224 {
3225     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3226 }
3227 
3228 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3229 {
3230     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3231 }
3232 
3233 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3234 {
3235     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3236 }
3237 
3238 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3239 {
3240     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3241 }
3242 
3243 /*
3244  * Floating-point to unsigned integer conversions
3245  */
3246 
3247 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3248                                 float_status *s)
3249 {
3250     FloatParts64 p;
3251 
3252     float16_unpack_canonical(&p, a, s);
3253     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3254 }
3255 
3256 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3257                                   float_status *s)
3258 {
3259     FloatParts64 p;
3260 
3261     float16_unpack_canonical(&p, a, s);
3262     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3263 }
3264 
3265 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3266                                   float_status *s)
3267 {
3268     FloatParts64 p;
3269 
3270     float16_unpack_canonical(&p, a, s);
3271     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3272 }
3273 
3274 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3275                                   float_status *s)
3276 {
3277     FloatParts64 p;
3278 
3279     float16_unpack_canonical(&p, a, s);
3280     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3281 }
3282 
3283 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3284                                   float_status *s)
3285 {
3286     FloatParts64 p;
3287 
3288     float32_unpack_canonical(&p, a, s);
3289     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3290 }
3291 
3292 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3293                                   float_status *s)
3294 {
3295     FloatParts64 p;
3296 
3297     float32_unpack_canonical(&p, a, s);
3298     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3299 }
3300 
3301 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3302                                   float_status *s)
3303 {
3304     FloatParts64 p;
3305 
3306     float32_unpack_canonical(&p, a, s);
3307     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3308 }
3309 
3310 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3311                                   float_status *s)
3312 {
3313     FloatParts64 p;
3314 
3315     float64_unpack_canonical(&p, a, s);
3316     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3317 }
3318 
3319 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3320                                   float_status *s)
3321 {
3322     FloatParts64 p;
3323 
3324     float64_unpack_canonical(&p, a, s);
3325     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3326 }
3327 
3328 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3329                                   float_status *s)
3330 {
3331     FloatParts64 p;
3332 
3333     float64_unpack_canonical(&p, a, s);
3334     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3335 }
3336 
3337 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3338                                    int scale, float_status *s)
3339 {
3340     FloatParts64 p;
3341 
3342     bfloat16_unpack_canonical(&p, a, s);
3343     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3344 }
3345 
3346 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3347                                    int scale, float_status *s)
3348 {
3349     FloatParts64 p;
3350 
3351     bfloat16_unpack_canonical(&p, a, s);
3352     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3353 }
3354 
3355 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3356                                    int scale, float_status *s)
3357 {
3358     FloatParts64 p;
3359 
3360     bfloat16_unpack_canonical(&p, a, s);
3361     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3362 }
3363 
3364 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3365                                           int scale, float_status *s)
3366 {
3367     FloatParts128 p;
3368 
3369     float128_unpack_canonical(&p, a, s);
3370     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3371 }
3372 
3373 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3374                                           int scale, float_status *s)
3375 {
3376     FloatParts128 p;
3377 
3378     float128_unpack_canonical(&p, a, s);
3379     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3380 }
3381 
3382 uint8_t float16_to_uint8(float16 a, float_status *s)
3383 {
3384     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3385 }
3386 
3387 uint16_t float16_to_uint16(float16 a, float_status *s)
3388 {
3389     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3390 }
3391 
3392 uint32_t float16_to_uint32(float16 a, float_status *s)
3393 {
3394     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3395 }
3396 
3397 uint64_t float16_to_uint64(float16 a, float_status *s)
3398 {
3399     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3400 }
3401 
3402 uint16_t float32_to_uint16(float32 a, float_status *s)
3403 {
3404     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3405 }
3406 
3407 uint32_t float32_to_uint32(float32 a, float_status *s)
3408 {
3409     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3410 }
3411 
3412 uint64_t float32_to_uint64(float32 a, float_status *s)
3413 {
3414     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3415 }
3416 
3417 uint16_t float64_to_uint16(float64 a, float_status *s)
3418 {
3419     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3420 }
3421 
3422 uint32_t float64_to_uint32(float64 a, float_status *s)
3423 {
3424     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3425 }
3426 
3427 uint64_t float64_to_uint64(float64 a, float_status *s)
3428 {
3429     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3430 }
3431 
3432 uint32_t float128_to_uint32(float128 a, float_status *s)
3433 {
3434     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3435 }
3436 
3437 uint64_t float128_to_uint64(float128 a, float_status *s)
3438 {
3439     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3440 }
3441 
3442 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3443 {
3444     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3445 }
3446 
3447 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3448 {
3449     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3450 }
3451 
3452 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3453 {
3454     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3455 }
3456 
3457 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3458 {
3459     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3460 }
3461 
3462 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3463 {
3464     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3465 }
3466 
3467 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3468 {
3469     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3470 }
3471 
3472 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3473 {
3474     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3475 }
3476 
3477 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3478 {
3479     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3480 }
3481 
3482 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3483 {
3484     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3485 }
3486 
3487 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3488 {
3489     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3490 }
3491 
3492 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3493 {
3494     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3495 }
3496 
3497 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3498 {
3499     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3500 }
3501 
3502 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3503 {
3504     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3505 }
3506 
3507 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3508 {
3509     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3510 }
3511 
3512 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3513 {
3514     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3515 }
3516 
3517 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3518 {
3519     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3520 }
3521 
3522 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3523 {
3524     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3525 }
3526 
3527 /*
3528  * Signed integer to floating-point conversions
3529  */
3530 
3531 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3532 {
3533     FloatParts64 p;
3534 
3535     parts_sint_to_float(&p, a, scale, status);
3536     return float16_round_pack_canonical(&p, status);
3537 }
3538 
3539 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3540 {
3541     return int64_to_float16_scalbn(a, scale, status);
3542 }
3543 
3544 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3545 {
3546     return int64_to_float16_scalbn(a, scale, status);
3547 }
3548 
3549 float16 int64_to_float16(int64_t a, float_status *status)
3550 {
3551     return int64_to_float16_scalbn(a, 0, status);
3552 }
3553 
3554 float16 int32_to_float16(int32_t a, float_status *status)
3555 {
3556     return int64_to_float16_scalbn(a, 0, status);
3557 }
3558 
3559 float16 int16_to_float16(int16_t a, float_status *status)
3560 {
3561     return int64_to_float16_scalbn(a, 0, status);
3562 }
3563 
3564 float16 int8_to_float16(int8_t a, float_status *status)
3565 {
3566     return int64_to_float16_scalbn(a, 0, status);
3567 }
3568 
3569 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3570 {
3571     FloatParts64 p;
3572 
3573     /* Without scaling, there are no overflow concerns. */
3574     if (likely(scale == 0) && can_use_fpu(status)) {
3575         union_float32 ur;
3576         ur.h = a;
3577         return ur.s;
3578     }
3579 
3580     parts64_sint_to_float(&p, a, scale, status);
3581     return float32_round_pack_canonical(&p, status);
3582 }
3583 
3584 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3585 {
3586     return int64_to_float32_scalbn(a, scale, status);
3587 }
3588 
3589 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3590 {
3591     return int64_to_float32_scalbn(a, scale, status);
3592 }
3593 
3594 float32 int64_to_float32(int64_t a, float_status *status)
3595 {
3596     return int64_to_float32_scalbn(a, 0, status);
3597 }
3598 
3599 float32 int32_to_float32(int32_t a, float_status *status)
3600 {
3601     return int64_to_float32_scalbn(a, 0, status);
3602 }
3603 
3604 float32 int16_to_float32(int16_t a, float_status *status)
3605 {
3606     return int64_to_float32_scalbn(a, 0, status);
3607 }
3608 
3609 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3610 {
3611     FloatParts64 p;
3612 
3613     /* Without scaling, there are no overflow concerns. */
3614     if (likely(scale == 0) && can_use_fpu(status)) {
3615         union_float64 ur;
3616         ur.h = a;
3617         return ur.s;
3618     }
3619 
3620     parts_sint_to_float(&p, a, scale, status);
3621     return float64_round_pack_canonical(&p, status);
3622 }
3623 
3624 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3625 {
3626     return int64_to_float64_scalbn(a, scale, status);
3627 }
3628 
3629 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3630 {
3631     return int64_to_float64_scalbn(a, scale, status);
3632 }
3633 
3634 float64 int64_to_float64(int64_t a, float_status *status)
3635 {
3636     return int64_to_float64_scalbn(a, 0, status);
3637 }
3638 
3639 float64 int32_to_float64(int32_t a, float_status *status)
3640 {
3641     return int64_to_float64_scalbn(a, 0, status);
3642 }
3643 
3644 float64 int16_to_float64(int16_t a, float_status *status)
3645 {
3646     return int64_to_float64_scalbn(a, 0, status);
3647 }
3648 
3649 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3650 {
3651     FloatParts64 p;
3652 
3653     parts_sint_to_float(&p, a, scale, status);
3654     return bfloat16_round_pack_canonical(&p, status);
3655 }
3656 
3657 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3658 {
3659     return int64_to_bfloat16_scalbn(a, scale, status);
3660 }
3661 
3662 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3663 {
3664     return int64_to_bfloat16_scalbn(a, scale, status);
3665 }
3666 
3667 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3668 {
3669     return int64_to_bfloat16_scalbn(a, 0, status);
3670 }
3671 
3672 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3673 {
3674     return int64_to_bfloat16_scalbn(a, 0, status);
3675 }
3676 
3677 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3678 {
3679     return int64_to_bfloat16_scalbn(a, 0, status);
3680 }
3681 
3682 float128 int64_to_float128(int64_t a, float_status *status)
3683 {
3684     FloatParts128 p;
3685 
3686     parts_sint_to_float(&p, a, 0, status);
3687     return float128_round_pack_canonical(&p, status);
3688 }
3689 
3690 float128 int32_to_float128(int32_t a, float_status *status)
3691 {
3692     return int64_to_float128(a, status);
3693 }
3694 
3695 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3696 {
3697     FloatParts128 p;
3698 
3699     parts_sint_to_float(&p, a, 0, status);
3700     return floatx80_round_pack_canonical(&p, status);
3701 }
3702 
3703 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3704 {
3705     return int64_to_floatx80(a, status);
3706 }
3707 
3708 /*
3709  * Unsigned Integer to floating-point conversions
3710  */
3711 
3712 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3713 {
3714     FloatParts64 p;
3715 
3716     parts_uint_to_float(&p, a, scale, status);
3717     return float16_round_pack_canonical(&p, status);
3718 }
3719 
3720 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3721 {
3722     return uint64_to_float16_scalbn(a, scale, status);
3723 }
3724 
3725 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3726 {
3727     return uint64_to_float16_scalbn(a, scale, status);
3728 }
3729 
3730 float16 uint64_to_float16(uint64_t a, float_status *status)
3731 {
3732     return uint64_to_float16_scalbn(a, 0, status);
3733 }
3734 
3735 float16 uint32_to_float16(uint32_t a, float_status *status)
3736 {
3737     return uint64_to_float16_scalbn(a, 0, status);
3738 }
3739 
3740 float16 uint16_to_float16(uint16_t a, float_status *status)
3741 {
3742     return uint64_to_float16_scalbn(a, 0, status);
3743 }
3744 
3745 float16 uint8_to_float16(uint8_t a, float_status *status)
3746 {
3747     return uint64_to_float16_scalbn(a, 0, status);
3748 }
3749 
3750 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3751 {
3752     FloatParts64 p;
3753 
3754     /* Without scaling, there are no overflow concerns. */
3755     if (likely(scale == 0) && can_use_fpu(status)) {
3756         union_float32 ur;
3757         ur.h = a;
3758         return ur.s;
3759     }
3760 
3761     parts_uint_to_float(&p, a, scale, status);
3762     return float32_round_pack_canonical(&p, status);
3763 }
3764 
3765 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3766 {
3767     return uint64_to_float32_scalbn(a, scale, status);
3768 }
3769 
3770 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3771 {
3772     return uint64_to_float32_scalbn(a, scale, status);
3773 }
3774 
3775 float32 uint64_to_float32(uint64_t a, float_status *status)
3776 {
3777     return uint64_to_float32_scalbn(a, 0, status);
3778 }
3779 
3780 float32 uint32_to_float32(uint32_t a, float_status *status)
3781 {
3782     return uint64_to_float32_scalbn(a, 0, status);
3783 }
3784 
3785 float32 uint16_to_float32(uint16_t a, float_status *status)
3786 {
3787     return uint64_to_float32_scalbn(a, 0, status);
3788 }
3789 
3790 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3791 {
3792     FloatParts64 p;
3793 
3794     /* Without scaling, there are no overflow concerns. */
3795     if (likely(scale == 0) && can_use_fpu(status)) {
3796         union_float64 ur;
3797         ur.h = a;
3798         return ur.s;
3799     }
3800 
3801     parts_uint_to_float(&p, a, scale, status);
3802     return float64_round_pack_canonical(&p, status);
3803 }
3804 
3805 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3806 {
3807     return uint64_to_float64_scalbn(a, scale, status);
3808 }
3809 
3810 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3811 {
3812     return uint64_to_float64_scalbn(a, scale, status);
3813 }
3814 
3815 float64 uint64_to_float64(uint64_t a, float_status *status)
3816 {
3817     return uint64_to_float64_scalbn(a, 0, status);
3818 }
3819 
3820 float64 uint32_to_float64(uint32_t a, float_status *status)
3821 {
3822     return uint64_to_float64_scalbn(a, 0, status);
3823 }
3824 
3825 float64 uint16_to_float64(uint16_t a, float_status *status)
3826 {
3827     return uint64_to_float64_scalbn(a, 0, status);
3828 }
3829 
3830 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3831 {
3832     FloatParts64 p;
3833 
3834     parts_uint_to_float(&p, a, scale, status);
3835     return bfloat16_round_pack_canonical(&p, status);
3836 }
3837 
3838 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3839 {
3840     return uint64_to_bfloat16_scalbn(a, scale, status);
3841 }
3842 
3843 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3844 {
3845     return uint64_to_bfloat16_scalbn(a, scale, status);
3846 }
3847 
3848 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3849 {
3850     return uint64_to_bfloat16_scalbn(a, 0, status);
3851 }
3852 
3853 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3854 {
3855     return uint64_to_bfloat16_scalbn(a, 0, status);
3856 }
3857 
3858 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3859 {
3860     return uint64_to_bfloat16_scalbn(a, 0, status);
3861 }
3862 
3863 float128 uint64_to_float128(uint64_t a, float_status *status)
3864 {
3865     FloatParts128 p;
3866 
3867     parts_uint_to_float(&p, a, 0, status);
3868     return float128_round_pack_canonical(&p, status);
3869 }
3870 
3871 /*
3872  * Minimum and maximum
3873  */
3874 
3875 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3876 {
3877     FloatParts64 pa, pb, *pr;
3878 
3879     float16_unpack_canonical(&pa, a, s);
3880     float16_unpack_canonical(&pb, b, s);
3881     pr = parts_minmax(&pa, &pb, s, flags);
3882 
3883     return float16_round_pack_canonical(pr, s);
3884 }
3885 
3886 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3887                                 float_status *s, int flags)
3888 {
3889     FloatParts64 pa, pb, *pr;
3890 
3891     bfloat16_unpack_canonical(&pa, a, s);
3892     bfloat16_unpack_canonical(&pb, b, s);
3893     pr = parts_minmax(&pa, &pb, s, flags);
3894 
3895     return bfloat16_round_pack_canonical(pr, s);
3896 }
3897 
3898 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3899 {
3900     FloatParts64 pa, pb, *pr;
3901 
3902     float32_unpack_canonical(&pa, a, s);
3903     float32_unpack_canonical(&pb, b, s);
3904     pr = parts_minmax(&pa, &pb, s, flags);
3905 
3906     return float32_round_pack_canonical(pr, s);
3907 }
3908 
3909 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3910 {
3911     FloatParts64 pa, pb, *pr;
3912 
3913     float64_unpack_canonical(&pa, a, s);
3914     float64_unpack_canonical(&pb, b, s);
3915     pr = parts_minmax(&pa, &pb, s, flags);
3916 
3917     return float64_round_pack_canonical(pr, s);
3918 }
3919 
3920 static float128 float128_minmax(float128 a, float128 b,
3921                                 float_status *s, int flags)
3922 {
3923     FloatParts128 pa, pb, *pr;
3924 
3925     float128_unpack_canonical(&pa, a, s);
3926     float128_unpack_canonical(&pb, b, s);
3927     pr = parts_minmax(&pa, &pb, s, flags);
3928 
3929     return float128_round_pack_canonical(pr, s);
3930 }
3931 
3932 #define MINMAX_1(type, name, flags) \
3933     type type##_##name(type a, type b, float_status *s) \
3934     { return type##_minmax(a, b, s, flags); }
3935 
3936 #define MINMAX_2(type) \
3937     MINMAX_1(type, max, 0)                                                \
3938     MINMAX_1(type, maxnum, minmax_isnum)                                  \
3939     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
3940     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
3941     MINMAX_1(type, min, minmax_ismin)                                     \
3942     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
3943     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
3944     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
3945 
3946 MINMAX_2(float16)
3947 MINMAX_2(bfloat16)
3948 MINMAX_2(float32)
3949 MINMAX_2(float64)
3950 MINMAX_2(float128)
3951 
3952 #undef MINMAX_1
3953 #undef MINMAX_2
3954 
3955 /*
3956  * Floating point compare
3957  */
3958 
3959 static FloatRelation QEMU_FLATTEN
3960 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3961 {
3962     FloatParts64 pa, pb;
3963 
3964     float16_unpack_canonical(&pa, a, s);
3965     float16_unpack_canonical(&pb, b, s);
3966     return parts_compare(&pa, &pb, s, is_quiet);
3967 }
3968 
3969 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3970 {
3971     return float16_do_compare(a, b, s, false);
3972 }
3973 
3974 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3975 {
3976     return float16_do_compare(a, b, s, true);
3977 }
3978 
3979 static FloatRelation QEMU_SOFTFLOAT_ATTR
3980 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3981 {
3982     FloatParts64 pa, pb;
3983 
3984     float32_unpack_canonical(&pa, a, s);
3985     float32_unpack_canonical(&pb, b, s);
3986     return parts_compare(&pa, &pb, s, is_quiet);
3987 }
3988 
3989 static FloatRelation QEMU_FLATTEN
3990 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3991 {
3992     union_float32 ua, ub;
3993 
3994     ua.s = xa;
3995     ub.s = xb;
3996 
3997     if (QEMU_NO_HARDFLOAT) {
3998         goto soft;
3999     }
4000 
4001     float32_input_flush2(&ua.s, &ub.s, s);
4002     if (isgreaterequal(ua.h, ub.h)) {
4003         if (isgreater(ua.h, ub.h)) {
4004             return float_relation_greater;
4005         }
4006         return float_relation_equal;
4007     }
4008     if (likely(isless(ua.h, ub.h))) {
4009         return float_relation_less;
4010     }
4011     /*
4012      * The only condition remaining is unordered.
4013      * Fall through to set flags.
4014      */
4015  soft:
4016     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4017 }
4018 
4019 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4020 {
4021     return float32_hs_compare(a, b, s, false);
4022 }
4023 
4024 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4025 {
4026     return float32_hs_compare(a, b, s, true);
4027 }
4028 
4029 static FloatRelation QEMU_SOFTFLOAT_ATTR
4030 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4031 {
4032     FloatParts64 pa, pb;
4033 
4034     float64_unpack_canonical(&pa, a, s);
4035     float64_unpack_canonical(&pb, b, s);
4036     return parts_compare(&pa, &pb, s, is_quiet);
4037 }
4038 
4039 static FloatRelation QEMU_FLATTEN
4040 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4041 {
4042     union_float64 ua, ub;
4043 
4044     ua.s = xa;
4045     ub.s = xb;
4046 
4047     if (QEMU_NO_HARDFLOAT) {
4048         goto soft;
4049     }
4050 
4051     float64_input_flush2(&ua.s, &ub.s, s);
4052     if (isgreaterequal(ua.h, ub.h)) {
4053         if (isgreater(ua.h, ub.h)) {
4054             return float_relation_greater;
4055         }
4056         return float_relation_equal;
4057     }
4058     if (likely(isless(ua.h, ub.h))) {
4059         return float_relation_less;
4060     }
4061     /*
4062      * The only condition remaining is unordered.
4063      * Fall through to set flags.
4064      */
4065  soft:
4066     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4067 }
4068 
4069 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4070 {
4071     return float64_hs_compare(a, b, s, false);
4072 }
4073 
4074 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4075 {
4076     return float64_hs_compare(a, b, s, true);
4077 }
4078 
4079 static FloatRelation QEMU_FLATTEN
4080 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4081 {
4082     FloatParts64 pa, pb;
4083 
4084     bfloat16_unpack_canonical(&pa, a, s);
4085     bfloat16_unpack_canonical(&pb, b, s);
4086     return parts_compare(&pa, &pb, s, is_quiet);
4087 }
4088 
4089 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4090 {
4091     return bfloat16_do_compare(a, b, s, false);
4092 }
4093 
4094 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4095 {
4096     return bfloat16_do_compare(a, b, s, true);
4097 }
4098 
4099 static FloatRelation QEMU_FLATTEN
4100 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4101 {
4102     FloatParts128 pa, pb;
4103 
4104     float128_unpack_canonical(&pa, a, s);
4105     float128_unpack_canonical(&pb, b, s);
4106     return parts_compare(&pa, &pb, s, is_quiet);
4107 }
4108 
4109 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4110 {
4111     return float128_do_compare(a, b, s, false);
4112 }
4113 
4114 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4115 {
4116     return float128_do_compare(a, b, s, true);
4117 }
4118 
4119 static FloatRelation QEMU_FLATTEN
4120 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4121 {
4122     FloatParts128 pa, pb;
4123 
4124     if (!floatx80_unpack_canonical(&pa, a, s) ||
4125         !floatx80_unpack_canonical(&pb, b, s)) {
4126         return float_relation_unordered;
4127     }
4128     return parts_compare(&pa, &pb, s, is_quiet);
4129 }
4130 
4131 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4132 {
4133     return floatx80_do_compare(a, b, s, false);
4134 }
4135 
4136 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4137 {
4138     return floatx80_do_compare(a, b, s, true);
4139 }
4140 
4141 /*
4142  * Scale by 2**N
4143  */
4144 
4145 float16 float16_scalbn(float16 a, int n, float_status *status)
4146 {
4147     FloatParts64 p;
4148 
4149     float16_unpack_canonical(&p, a, status);
4150     parts_scalbn(&p, n, status);
4151     return float16_round_pack_canonical(&p, status);
4152 }
4153 
4154 float32 float32_scalbn(float32 a, int n, float_status *status)
4155 {
4156     FloatParts64 p;
4157 
4158     float32_unpack_canonical(&p, a, status);
4159     parts_scalbn(&p, n, status);
4160     return float32_round_pack_canonical(&p, status);
4161 }
4162 
4163 float64 float64_scalbn(float64 a, int n, float_status *status)
4164 {
4165     FloatParts64 p;
4166 
4167     float64_unpack_canonical(&p, a, status);
4168     parts_scalbn(&p, n, status);
4169     return float64_round_pack_canonical(&p, status);
4170 }
4171 
4172 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4173 {
4174     FloatParts64 p;
4175 
4176     bfloat16_unpack_canonical(&p, a, status);
4177     parts_scalbn(&p, n, status);
4178     return bfloat16_round_pack_canonical(&p, status);
4179 }
4180 
4181 float128 float128_scalbn(float128 a, int n, float_status *status)
4182 {
4183     FloatParts128 p;
4184 
4185     float128_unpack_canonical(&p, a, status);
4186     parts_scalbn(&p, n, status);
4187     return float128_round_pack_canonical(&p, status);
4188 }
4189 
4190 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4191 {
4192     FloatParts128 p;
4193 
4194     if (!floatx80_unpack_canonical(&p, a, status)) {
4195         return floatx80_default_nan(status);
4196     }
4197     parts_scalbn(&p, n, status);
4198     return floatx80_round_pack_canonical(&p, status);
4199 }
4200 
4201 /*
4202  * Square Root
4203  */
4204 
4205 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4206 {
4207     FloatParts64 p;
4208 
4209     float16_unpack_canonical(&p, a, status);
4210     parts_sqrt(&p, status, &float16_params);
4211     return float16_round_pack_canonical(&p, status);
4212 }
4213 
4214 static float32 QEMU_SOFTFLOAT_ATTR
4215 soft_f32_sqrt(float32 a, float_status *status)
4216 {
4217     FloatParts64 p;
4218 
4219     float32_unpack_canonical(&p, a, status);
4220     parts_sqrt(&p, status, &float32_params);
4221     return float32_round_pack_canonical(&p, status);
4222 }
4223 
4224 static float64 QEMU_SOFTFLOAT_ATTR
4225 soft_f64_sqrt(float64 a, float_status *status)
4226 {
4227     FloatParts64 p;
4228 
4229     float64_unpack_canonical(&p, a, status);
4230     parts_sqrt(&p, status, &float64_params);
4231     return float64_round_pack_canonical(&p, status);
4232 }
4233 
4234 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4235 {
4236     union_float32 ua, ur;
4237 
4238     ua.s = xa;
4239     if (unlikely(!can_use_fpu(s))) {
4240         goto soft;
4241     }
4242 
4243     float32_input_flush1(&ua.s, s);
4244     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4245         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4246                        fpclassify(ua.h) == FP_ZERO) ||
4247                      signbit(ua.h))) {
4248             goto soft;
4249         }
4250     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4251                         float32_is_neg(ua.s))) {
4252         goto soft;
4253     }
4254     ur.h = sqrtf(ua.h);
4255     return ur.s;
4256 
4257  soft:
4258     return soft_f32_sqrt(ua.s, s);
4259 }
4260 
4261 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4262 {
4263     union_float64 ua, ur;
4264 
4265     ua.s = xa;
4266     if (unlikely(!can_use_fpu(s))) {
4267         goto soft;
4268     }
4269 
4270     float64_input_flush1(&ua.s, s);
4271     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4272         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4273                        fpclassify(ua.h) == FP_ZERO) ||
4274                      signbit(ua.h))) {
4275             goto soft;
4276         }
4277     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4278                         float64_is_neg(ua.s))) {
4279         goto soft;
4280     }
4281     ur.h = sqrt(ua.h);
4282     return ur.s;
4283 
4284  soft:
4285     return soft_f64_sqrt(ua.s, s);
4286 }
4287 
4288 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4289 {
4290     FloatParts64 p;
4291 
4292     bfloat16_unpack_canonical(&p, a, status);
4293     parts_sqrt(&p, status, &bfloat16_params);
4294     return bfloat16_round_pack_canonical(&p, status);
4295 }
4296 
4297 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4298 {
4299     FloatParts128 p;
4300 
4301     float128_unpack_canonical(&p, a, status);
4302     parts_sqrt(&p, status, &float128_params);
4303     return float128_round_pack_canonical(&p, status);
4304 }
4305 
4306 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4307 {
4308     FloatParts128 p;
4309 
4310     if (!floatx80_unpack_canonical(&p, a, s)) {
4311         return floatx80_default_nan(s);
4312     }
4313     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4314     return floatx80_round_pack_canonical(&p, s);
4315 }
4316 
4317 /*
4318  * log2
4319  */
4320 float32 float32_log2(float32 a, float_status *status)
4321 {
4322     FloatParts64 p;
4323 
4324     float32_unpack_canonical(&p, a, status);
4325     parts_log2(&p, status, &float32_params);
4326     return float32_round_pack_canonical(&p, status);
4327 }
4328 
4329 float64 float64_log2(float64 a, float_status *status)
4330 {
4331     FloatParts64 p;
4332 
4333     float64_unpack_canonical(&p, a, status);
4334     parts_log2(&p, status, &float64_params);
4335     return float64_round_pack_canonical(&p, status);
4336 }
4337 
4338 /*----------------------------------------------------------------------------
4339 | The pattern for a default generated NaN.
4340 *----------------------------------------------------------------------------*/
4341 
4342 float16 float16_default_nan(float_status *status)
4343 {
4344     FloatParts64 p;
4345 
4346     parts_default_nan(&p, status);
4347     p.frac >>= float16_params.frac_shift;
4348     return float16_pack_raw(&p);
4349 }
4350 
4351 float32 float32_default_nan(float_status *status)
4352 {
4353     FloatParts64 p;
4354 
4355     parts_default_nan(&p, status);
4356     p.frac >>= float32_params.frac_shift;
4357     return float32_pack_raw(&p);
4358 }
4359 
4360 float64 float64_default_nan(float_status *status)
4361 {
4362     FloatParts64 p;
4363 
4364     parts_default_nan(&p, status);
4365     p.frac >>= float64_params.frac_shift;
4366     return float64_pack_raw(&p);
4367 }
4368 
4369 float128 float128_default_nan(float_status *status)
4370 {
4371     FloatParts128 p;
4372 
4373     parts_default_nan(&p, status);
4374     frac_shr(&p, float128_params.frac_shift);
4375     return float128_pack_raw(&p);
4376 }
4377 
4378 bfloat16 bfloat16_default_nan(float_status *status)
4379 {
4380     FloatParts64 p;
4381 
4382     parts_default_nan(&p, status);
4383     p.frac >>= bfloat16_params.frac_shift;
4384     return bfloat16_pack_raw(&p);
4385 }
4386 
4387 /*----------------------------------------------------------------------------
4388 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4389 *----------------------------------------------------------------------------*/
4390 
4391 float16 float16_silence_nan(float16 a, float_status *status)
4392 {
4393     FloatParts64 p;
4394 
4395     float16_unpack_raw(&p, a);
4396     p.frac <<= float16_params.frac_shift;
4397     parts_silence_nan(&p, status);
4398     p.frac >>= float16_params.frac_shift;
4399     return float16_pack_raw(&p);
4400 }
4401 
4402 float32 float32_silence_nan(float32 a, float_status *status)
4403 {
4404     FloatParts64 p;
4405 
4406     float32_unpack_raw(&p, a);
4407     p.frac <<= float32_params.frac_shift;
4408     parts_silence_nan(&p, status);
4409     p.frac >>= float32_params.frac_shift;
4410     return float32_pack_raw(&p);
4411 }
4412 
4413 float64 float64_silence_nan(float64 a, float_status *status)
4414 {
4415     FloatParts64 p;
4416 
4417     float64_unpack_raw(&p, a);
4418     p.frac <<= float64_params.frac_shift;
4419     parts_silence_nan(&p, status);
4420     p.frac >>= float64_params.frac_shift;
4421     return float64_pack_raw(&p);
4422 }
4423 
4424 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4425 {
4426     FloatParts64 p;
4427 
4428     bfloat16_unpack_raw(&p, a);
4429     p.frac <<= bfloat16_params.frac_shift;
4430     parts_silence_nan(&p, status);
4431     p.frac >>= bfloat16_params.frac_shift;
4432     return bfloat16_pack_raw(&p);
4433 }
4434 
4435 float128 float128_silence_nan(float128 a, float_status *status)
4436 {
4437     FloatParts128 p;
4438 
4439     float128_unpack_raw(&p, a);
4440     frac_shl(&p, float128_params.frac_shift);
4441     parts_silence_nan(&p, status);
4442     frac_shr(&p, float128_params.frac_shift);
4443     return float128_pack_raw(&p);
4444 }
4445 
4446 /*----------------------------------------------------------------------------
4447 | If `a' is denormal and we are in flush-to-zero mode then set the
4448 | input-denormal exception and return zero. Otherwise just return the value.
4449 *----------------------------------------------------------------------------*/
4450 
4451 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4452 {
4453     if (p.exp == 0 && p.frac != 0) {
4454         float_raise(float_flag_input_denormal, status);
4455         return true;
4456     }
4457 
4458     return false;
4459 }
4460 
4461 float16 float16_squash_input_denormal(float16 a, float_status *status)
4462 {
4463     if (status->flush_inputs_to_zero) {
4464         FloatParts64 p;
4465 
4466         float16_unpack_raw(&p, a);
4467         if (parts_squash_denormal(p, status)) {
4468             return float16_set_sign(float16_zero, p.sign);
4469         }
4470     }
4471     return a;
4472 }
4473 
4474 float32 float32_squash_input_denormal(float32 a, float_status *status)
4475 {
4476     if (status->flush_inputs_to_zero) {
4477         FloatParts64 p;
4478 
4479         float32_unpack_raw(&p, a);
4480         if (parts_squash_denormal(p, status)) {
4481             return float32_set_sign(float32_zero, p.sign);
4482         }
4483     }
4484     return a;
4485 }
4486 
4487 float64 float64_squash_input_denormal(float64 a, float_status *status)
4488 {
4489     if (status->flush_inputs_to_zero) {
4490         FloatParts64 p;
4491 
4492         float64_unpack_raw(&p, a);
4493         if (parts_squash_denormal(p, status)) {
4494             return float64_set_sign(float64_zero, p.sign);
4495         }
4496     }
4497     return a;
4498 }
4499 
4500 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4501 {
4502     if (status->flush_inputs_to_zero) {
4503         FloatParts64 p;
4504 
4505         bfloat16_unpack_raw(&p, a);
4506         if (parts_squash_denormal(p, status)) {
4507             return bfloat16_set_sign(bfloat16_zero, p.sign);
4508         }
4509     }
4510     return a;
4511 }
4512 
4513 /*----------------------------------------------------------------------------
4514 | Normalizes the subnormal extended double-precision floating-point value
4515 | represented by the denormalized significand `aSig'.  The normalized exponent
4516 | and significand are stored at the locations pointed to by `zExpPtr' and
4517 | `zSigPtr', respectively.
4518 *----------------------------------------------------------------------------*/
4519 
4520 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4521                                 uint64_t *zSigPtr)
4522 {
4523     int8_t shiftCount;
4524 
4525     shiftCount = clz64(aSig);
4526     *zSigPtr = aSig<<shiftCount;
4527     *zExpPtr = 1 - shiftCount;
4528 }
4529 
4530 /*----------------------------------------------------------------------------
4531 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4532 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4533 | and returns the proper extended double-precision floating-point value
4534 | corresponding to the abstract input.  Ordinarily, the abstract value is
4535 | rounded and packed into the extended double-precision format, with the
4536 | inexact exception raised if the abstract input cannot be represented
4537 | exactly.  However, if the abstract value is too large, the overflow and
4538 | inexact exceptions are raised and an infinity or maximal finite value is
4539 | returned.  If the abstract value is too small, the input value is rounded to
4540 | a subnormal number, and the underflow and inexact exceptions are raised if
4541 | the abstract input cannot be represented exactly as a subnormal extended
4542 | double-precision floating-point number.
4543 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4544 | the result is rounded to the same number of bits as single or double
4545 | precision, respectively.  Otherwise, the result is rounded to the full
4546 | precision of the extended double-precision format.
4547 |     The input significand must be normalized or smaller.  If the input
4548 | significand is not normalized, `zExp' must be 0; in that case, the result
4549 | returned is a subnormal number, and it must not require rounding.  The
4550 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4551 | Floating-Point Arithmetic.
4552 *----------------------------------------------------------------------------*/
4553 
4554 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4555                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4556                               float_status *status)
4557 {
4558     FloatRoundMode roundingMode;
4559     bool roundNearestEven, increment, isTiny;
4560     int64_t roundIncrement, roundMask, roundBits;
4561 
4562     roundingMode = status->float_rounding_mode;
4563     roundNearestEven = ( roundingMode == float_round_nearest_even );
4564     switch (roundingPrecision) {
4565     case floatx80_precision_x:
4566         goto precision80;
4567     case floatx80_precision_d:
4568         roundIncrement = UINT64_C(0x0000000000000400);
4569         roundMask = UINT64_C(0x00000000000007FF);
4570         break;
4571     case floatx80_precision_s:
4572         roundIncrement = UINT64_C(0x0000008000000000);
4573         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4574         break;
4575     default:
4576         g_assert_not_reached();
4577     }
4578     zSig0 |= ( zSig1 != 0 );
4579     switch (roundingMode) {
4580     case float_round_nearest_even:
4581     case float_round_ties_away:
4582         break;
4583     case float_round_to_zero:
4584         roundIncrement = 0;
4585         break;
4586     case float_round_up:
4587         roundIncrement = zSign ? 0 : roundMask;
4588         break;
4589     case float_round_down:
4590         roundIncrement = zSign ? roundMask : 0;
4591         break;
4592     default:
4593         abort();
4594     }
4595     roundBits = zSig0 & roundMask;
4596     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4597         if (    ( 0x7FFE < zExp )
4598              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4599            ) {
4600             goto overflow;
4601         }
4602         if ( zExp <= 0 ) {
4603             if (status->flush_to_zero) {
4604                 float_raise(float_flag_output_denormal, status);
4605                 return packFloatx80(zSign, 0, 0);
4606             }
4607             isTiny = status->tininess_before_rounding
4608                   || (zExp < 0 )
4609                   || (zSig0 <= zSig0 + roundIncrement);
4610             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4611             zExp = 0;
4612             roundBits = zSig0 & roundMask;
4613             if (isTiny && roundBits) {
4614                 float_raise(float_flag_underflow, status);
4615             }
4616             if (roundBits) {
4617                 float_raise(float_flag_inexact, status);
4618             }
4619             zSig0 += roundIncrement;
4620             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4621             roundIncrement = roundMask + 1;
4622             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4623                 roundMask |= roundIncrement;
4624             }
4625             zSig0 &= ~ roundMask;
4626             return packFloatx80( zSign, zExp, zSig0 );
4627         }
4628     }
4629     if (roundBits) {
4630         float_raise(float_flag_inexact, status);
4631     }
4632     zSig0 += roundIncrement;
4633     if ( zSig0 < roundIncrement ) {
4634         ++zExp;
4635         zSig0 = UINT64_C(0x8000000000000000);
4636     }
4637     roundIncrement = roundMask + 1;
4638     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4639         roundMask |= roundIncrement;
4640     }
4641     zSig0 &= ~ roundMask;
4642     if ( zSig0 == 0 ) zExp = 0;
4643     return packFloatx80( zSign, zExp, zSig0 );
4644  precision80:
4645     switch (roundingMode) {
4646     case float_round_nearest_even:
4647     case float_round_ties_away:
4648         increment = ((int64_t)zSig1 < 0);
4649         break;
4650     case float_round_to_zero:
4651         increment = 0;
4652         break;
4653     case float_round_up:
4654         increment = !zSign && zSig1;
4655         break;
4656     case float_round_down:
4657         increment = zSign && zSig1;
4658         break;
4659     default:
4660         abort();
4661     }
4662     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4663         if (    ( 0x7FFE < zExp )
4664              || (    ( zExp == 0x7FFE )
4665                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4666                   && increment
4667                 )
4668            ) {
4669             roundMask = 0;
4670  overflow:
4671             float_raise(float_flag_overflow | float_flag_inexact, status);
4672             if (    ( roundingMode == float_round_to_zero )
4673                  || ( zSign && ( roundingMode == float_round_up ) )
4674                  || ( ! zSign && ( roundingMode == float_round_down ) )
4675                ) {
4676                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4677             }
4678             return packFloatx80(zSign,
4679                                 floatx80_infinity_high,
4680                                 floatx80_infinity_low);
4681         }
4682         if ( zExp <= 0 ) {
4683             isTiny = status->tininess_before_rounding
4684                   || (zExp < 0)
4685                   || !increment
4686                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4687             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4688             zExp = 0;
4689             if (isTiny && zSig1) {
4690                 float_raise(float_flag_underflow, status);
4691             }
4692             if (zSig1) {
4693                 float_raise(float_flag_inexact, status);
4694             }
4695             switch (roundingMode) {
4696             case float_round_nearest_even:
4697             case float_round_ties_away:
4698                 increment = ((int64_t)zSig1 < 0);
4699                 break;
4700             case float_round_to_zero:
4701                 increment = 0;
4702                 break;
4703             case float_round_up:
4704                 increment = !zSign && zSig1;
4705                 break;
4706             case float_round_down:
4707                 increment = zSign && zSig1;
4708                 break;
4709             default:
4710                 abort();
4711             }
4712             if ( increment ) {
4713                 ++zSig0;
4714                 if (!(zSig1 << 1) && roundNearestEven) {
4715                     zSig0 &= ~1;
4716                 }
4717                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4718             }
4719             return packFloatx80( zSign, zExp, zSig0 );
4720         }
4721     }
4722     if (zSig1) {
4723         float_raise(float_flag_inexact, status);
4724     }
4725     if ( increment ) {
4726         ++zSig0;
4727         if ( zSig0 == 0 ) {
4728             ++zExp;
4729             zSig0 = UINT64_C(0x8000000000000000);
4730         }
4731         else {
4732             if (!(zSig1 << 1) && roundNearestEven) {
4733                 zSig0 &= ~1;
4734             }
4735         }
4736     }
4737     else {
4738         if ( zSig0 == 0 ) zExp = 0;
4739     }
4740     return packFloatx80( zSign, zExp, zSig0 );
4741 
4742 }
4743 
4744 /*----------------------------------------------------------------------------
4745 | Takes an abstract floating-point value having sign `zSign', exponent
4746 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4747 | and returns the proper extended double-precision floating-point value
4748 | corresponding to the abstract input.  This routine is just like
4749 | `roundAndPackFloatx80' except that the input significand does not have to be
4750 | normalized.
4751 *----------------------------------------------------------------------------*/
4752 
4753 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4754                                        bool zSign, int32_t zExp,
4755                                        uint64_t zSig0, uint64_t zSig1,
4756                                        float_status *status)
4757 {
4758     int8_t shiftCount;
4759 
4760     if ( zSig0 == 0 ) {
4761         zSig0 = zSig1;
4762         zSig1 = 0;
4763         zExp -= 64;
4764     }
4765     shiftCount = clz64(zSig0);
4766     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4767     zExp -= shiftCount;
4768     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4769                                 zSig0, zSig1, status);
4770 
4771 }
4772 
4773 /*----------------------------------------------------------------------------
4774 | Returns the binary exponential of the single-precision floating-point value
4775 | `a'. The operation is performed according to the IEC/IEEE Standard for
4776 | Binary Floating-Point Arithmetic.
4777 |
4778 | Uses the following identities:
4779 |
4780 | 1. -------------------------------------------------------------------------
4781 |      x    x*ln(2)
4782 |     2  = e
4783 |
4784 | 2. -------------------------------------------------------------------------
4785 |                      2     3     4     5           n
4786 |      x        x     x     x     x     x           x
4787 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4788 |               1!    2!    3!    4!    5!          n!
4789 *----------------------------------------------------------------------------*/
4790 
4791 static const float64 float32_exp2_coefficients[15] =
4792 {
4793     const_float64( 0x3ff0000000000000ll ), /*  1 */
4794     const_float64( 0x3fe0000000000000ll ), /*  2 */
4795     const_float64( 0x3fc5555555555555ll ), /*  3 */
4796     const_float64( 0x3fa5555555555555ll ), /*  4 */
4797     const_float64( 0x3f81111111111111ll ), /*  5 */
4798     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4799     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4800     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4801     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4802     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4803     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4804     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4805     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4806     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4807     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4808 };
4809 
4810 float32 float32_exp2(float32 a, float_status *status)
4811 {
4812     FloatParts64 xp, xnp, tp, rp;
4813     int i;
4814 
4815     float32_unpack_canonical(&xp, a, status);
4816     if (unlikely(xp.cls != float_class_normal)) {
4817         switch (xp.cls) {
4818         case float_class_snan:
4819         case float_class_qnan:
4820             parts_return_nan(&xp, status);
4821             return float32_round_pack_canonical(&xp, status);
4822         case float_class_inf:
4823             return xp.sign ? float32_zero : a;
4824         case float_class_zero:
4825             return float32_one;
4826         default:
4827             break;
4828         }
4829         g_assert_not_reached();
4830     }
4831 
4832     float_raise(float_flag_inexact, status);
4833 
4834     float64_unpack_canonical(&tp, float64_ln2, status);
4835     xp = *parts_mul(&xp, &tp, status);
4836     xnp = xp;
4837 
4838     float64_unpack_canonical(&rp, float64_one, status);
4839     for (i = 0 ; i < 15 ; i++) {
4840         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4841         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4842         xnp = *parts_mul(&xnp, &xp, status);
4843     }
4844 
4845     return float32_round_pack_canonical(&rp, status);
4846 }
4847 
4848 /*----------------------------------------------------------------------------
4849 | Rounds the extended double-precision floating-point value `a'
4850 | to the precision provided by floatx80_rounding_precision and returns the
4851 | result as an extended double-precision floating-point value.
4852 | The operation is performed according to the IEC/IEEE Standard for Binary
4853 | Floating-Point Arithmetic.
4854 *----------------------------------------------------------------------------*/
4855 
4856 floatx80 floatx80_round(floatx80 a, float_status *status)
4857 {
4858     FloatParts128 p;
4859 
4860     if (!floatx80_unpack_canonical(&p, a, status)) {
4861         return floatx80_default_nan(status);
4862     }
4863     return floatx80_round_pack_canonical(&p, status);
4864 }
4865 
4866 static void __attribute__((constructor)) softfloat_init(void)
4867 {
4868     union_float64 ua, ub, uc, ur;
4869 
4870     if (QEMU_NO_HARDFLOAT) {
4871         return;
4872     }
4873     /*
4874      * Test that the host's FMA is not obviously broken. For example,
4875      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4876      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4877      */
4878     ua.s = 0x0020000000000001ULL;
4879     ub.s = 0x3ca0000000000000ULL;
4880     uc.s = 0x0020000000000000ULL;
4881     ur.h = fma(ua.h, ub.h, uc.h);
4882     if (ur.s != 0x0020000000000001ULL) {
4883         force_soft_fma = true;
4884     }
4885 }
4886