xref: /openbmc/qemu/fpu/softfloat.c (revision d7fb5693d9ffbeb9b49c981e1f9774392f1d41e5)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 #include "qemu/osdep.h"
83 #include <math.h>
84 #include "qemu/bitops.h"
85 #include "fpu/softfloat.h"
86 
87 /* We only need stdlib for abort() */
88 
89 /*----------------------------------------------------------------------------
90 | Primitive arithmetic functions, including multi-word arithmetic, and
91 | division and square root approximations.  (Can be specialized to target if
92 | desired.)
93 *----------------------------------------------------------------------------*/
94 #include "fpu/softfloat-macros.h"
95 
96 /*
97  * Hardfloat
98  *
99  * Fast emulation of guest FP instructions is challenging for two reasons.
100  * First, FP instruction semantics are similar but not identical, particularly
101  * when handling NaNs. Second, emulating at reasonable speed the guest FP
102  * exception flags is not trivial: reading the host's flags register with a
103  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
104  * and trapping on every FP exception is not fast nor pleasant to work with.
105  *
106  * We address these challenges by leveraging the host FPU for a subset of the
107  * operations. To do this we expand on the idea presented in this paper:
108  *
109  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
110  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
111  *
112  * The idea is thus to leverage the host FPU to (1) compute FP operations
113  * and (2) identify whether FP exceptions occurred while avoiding
114  * expensive exception flag register accesses.
115  *
116  * An important optimization shown in the paper is that given that exception
117  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
118  * This is particularly useful for the inexact flag, which is very frequently
119  * raised in floating-point workloads.
120  *
121  * We optimize the code further by deferring to soft-fp whenever FP exception
122  * detection might get hairy. Two examples: (1) when at least one operand is
123  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
124  * and the result is < the minimum normal.
125  */
126 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
127     static inline void name(soft_t *a, float_status *s)                 \
128     {                                                                   \
129         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
130             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
131                                      soft_t ## _is_neg(*a));            \
132             float_raise(float_flag_input_denormal_flushed, s);          \
133         }                                                               \
134     }
135 
136 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
137 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
138 #undef GEN_INPUT_FLUSH__NOCHECK
139 
140 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
141     static inline void name(soft_t *a, float_status *s) \
142     {                                                   \
143         if (likely(!s->flush_inputs_to_zero)) {         \
144             return;                                     \
145         }                                               \
146         soft_t ## _input_flush__nocheck(a, s);          \
147     }
148 
149 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
150 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
151 #undef GEN_INPUT_FLUSH1
152 
153 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
154     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
155     {                                                                   \
156         if (likely(!s->flush_inputs_to_zero)) {                         \
157             return;                                                     \
158         }                                                               \
159         soft_t ## _input_flush__nocheck(a, s);                          \
160         soft_t ## _input_flush__nocheck(b, s);                          \
161     }
162 
163 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
164 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
165 #undef GEN_INPUT_FLUSH2
166 
167 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
168     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
169     {                                                                   \
170         if (likely(!s->flush_inputs_to_zero)) {                         \
171             return;                                                     \
172         }                                                               \
173         soft_t ## _input_flush__nocheck(a, s);                          \
174         soft_t ## _input_flush__nocheck(b, s);                          \
175         soft_t ## _input_flush__nocheck(c, s);                          \
176     }
177 
178 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
179 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
180 #undef GEN_INPUT_FLUSH3
181 
182 /*
183  * Choose whether to use fpclassify or float32/64_* primitives in the generated
184  * hardfloat functions. Each combination of number of inputs and float size
185  * gets its own value.
186  */
187 #if defined(__x86_64__)
188 # define QEMU_HARDFLOAT_1F32_USE_FP 0
189 # define QEMU_HARDFLOAT_1F64_USE_FP 1
190 # define QEMU_HARDFLOAT_2F32_USE_FP 0
191 # define QEMU_HARDFLOAT_2F64_USE_FP 1
192 # define QEMU_HARDFLOAT_3F32_USE_FP 0
193 # define QEMU_HARDFLOAT_3F64_USE_FP 1
194 #else
195 # define QEMU_HARDFLOAT_1F32_USE_FP 0
196 # define QEMU_HARDFLOAT_1F64_USE_FP 0
197 # define QEMU_HARDFLOAT_2F32_USE_FP 0
198 # define QEMU_HARDFLOAT_2F64_USE_FP 0
199 # define QEMU_HARDFLOAT_3F32_USE_FP 0
200 # define QEMU_HARDFLOAT_3F64_USE_FP 0
201 #endif
202 
203 /*
204  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
205  * float{32,64}_is_infinity when !USE_FP.
206  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
207  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
208  */
209 #if defined(__x86_64__) || defined(__aarch64__)
210 # define QEMU_HARDFLOAT_USE_ISINF   1
211 #else
212 # define QEMU_HARDFLOAT_USE_ISINF   0
213 #endif
214 
215 /*
216  * Some targets clear the FP flags before most FP operations. This prevents
217  * the use of hardfloat, since hardfloat relies on the inexact flag being
218  * already set.
219  */
220 # if defined(__FAST_MATH__)
221 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
222     IEEE implementation
223 # define QEMU_NO_HARDFLOAT 1
224 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
225 #else
226 # define QEMU_NO_HARDFLOAT 0
227 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
228 #endif
229 
230 static inline bool can_use_fpu(const float_status *s)
231 {
232     if (QEMU_NO_HARDFLOAT) {
233         return false;
234     }
235     return likely(s->float_exception_flags & float_flag_inexact &&
236                   s->float_rounding_mode == float_round_nearest_even);
237 }
238 
239 /*
240  * Hardfloat generation functions. Each operation can have two flavors:
241  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
242  * most condition checks, or native ones (e.g. fpclassify).
243  *
244  * The flavor is chosen by the callers. Instead of using macros, we rely on the
245  * compiler to propagate constants and inline everything into the callers.
246  *
247  * We only generate functions for operations with two inputs, since only
248  * these are common enough to justify consolidating them into common code.
249  */
250 
251 typedef union {
252     float32 s;
253     float h;
254 } union_float32;
255 
256 typedef union {
257     float64 s;
258     double h;
259 } union_float64;
260 
261 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
262 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
263 
264 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
265 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
266 typedef float   (*hard_f32_op2_fn)(float a, float b);
267 typedef double  (*hard_f64_op2_fn)(double a, double b);
268 
269 /* 2-input is-zero-or-normal */
270 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
271 {
272     if (QEMU_HARDFLOAT_2F32_USE_FP) {
273         /*
274          * Not using a temp variable for consecutive fpclassify calls ends up
275          * generating faster code.
276          */
277         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
278                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
279     }
280     return float32_is_zero_or_normal(a.s) &&
281            float32_is_zero_or_normal(b.s);
282 }
283 
284 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
285 {
286     if (QEMU_HARDFLOAT_2F64_USE_FP) {
287         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
288                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
289     }
290     return float64_is_zero_or_normal(a.s) &&
291            float64_is_zero_or_normal(b.s);
292 }
293 
294 /* 3-input is-zero-or-normal */
295 static inline
296 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
297 {
298     if (QEMU_HARDFLOAT_3F32_USE_FP) {
299         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
300                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
301                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
302     }
303     return float32_is_zero_or_normal(a.s) &&
304            float32_is_zero_or_normal(b.s) &&
305            float32_is_zero_or_normal(c.s);
306 }
307 
308 static inline
309 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
310 {
311     if (QEMU_HARDFLOAT_3F64_USE_FP) {
312         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
313                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
314                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
315     }
316     return float64_is_zero_or_normal(a.s) &&
317            float64_is_zero_or_normal(b.s) &&
318            float64_is_zero_or_normal(c.s);
319 }
320 
321 static inline bool f32_is_inf(union_float32 a)
322 {
323     if (QEMU_HARDFLOAT_USE_ISINF) {
324         return isinf(a.h);
325     }
326     return float32_is_infinity(a.s);
327 }
328 
329 static inline bool f64_is_inf(union_float64 a)
330 {
331     if (QEMU_HARDFLOAT_USE_ISINF) {
332         return isinf(a.h);
333     }
334     return float64_is_infinity(a.s);
335 }
336 
337 static inline float32
338 float32_gen2(float32 xa, float32 xb, float_status *s,
339              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
340              f32_check_fn pre, f32_check_fn post)
341 {
342     union_float32 ua, ub, ur;
343 
344     ua.s = xa;
345     ub.s = xb;
346 
347     if (unlikely(!can_use_fpu(s))) {
348         goto soft;
349     }
350 
351     float32_input_flush2(&ua.s, &ub.s, s);
352     if (unlikely(!pre(ua, ub))) {
353         goto soft;
354     }
355 
356     ur.h = hard(ua.h, ub.h);
357     if (unlikely(f32_is_inf(ur))) {
358         float_raise(float_flag_overflow, s);
359     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
360         goto soft;
361     }
362     return ur.s;
363 
364  soft:
365     return soft(ua.s, ub.s, s);
366 }
367 
368 static inline float64
369 float64_gen2(float64 xa, float64 xb, float_status *s,
370              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
371              f64_check_fn pre, f64_check_fn post)
372 {
373     union_float64 ua, ub, ur;
374 
375     ua.s = xa;
376     ub.s = xb;
377 
378     if (unlikely(!can_use_fpu(s))) {
379         goto soft;
380     }
381 
382     float64_input_flush2(&ua.s, &ub.s, s);
383     if (unlikely(!pre(ua, ub))) {
384         goto soft;
385     }
386 
387     ur.h = hard(ua.h, ub.h);
388     if (unlikely(f64_is_inf(ur))) {
389         float_raise(float_flag_overflow, s);
390     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
391         goto soft;
392     }
393     return ur.s;
394 
395  soft:
396     return soft(ua.s, ub.s, s);
397 }
398 
399 /*
400  * Classify a floating point number. Everything above float_class_qnan
401  * is a NaN so cls >= float_class_qnan is any NaN.
402  *
403  * Note that we canonicalize denormals, so most code should treat
404  * class_normal and class_denormal identically.
405  */
406 
407 typedef enum __attribute__ ((__packed__)) {
408     float_class_unclassified,
409     float_class_zero,
410     float_class_normal,
411     float_class_denormal, /* input was a non-squashed denormal */
412     float_class_inf,
413     float_class_qnan,  /* all NaNs from here */
414     float_class_snan,
415 } FloatClass;
416 
417 #define float_cmask(bit)  (1u << (bit))
418 
419 enum {
420     float_cmask_zero    = float_cmask(float_class_zero),
421     float_cmask_normal  = float_cmask(float_class_normal),
422     float_cmask_denormal = float_cmask(float_class_denormal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
430 };
431 
432 /* Flags for parts_minmax. */
433 enum {
434     /* Set for minimum; clear for maximum. */
435     minmax_ismin = 1,
436     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437     minmax_isnum = 2,
438     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
439     minmax_ismag = 4,
440     /*
441      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
442      * operations.
443      */
444     minmax_isnumber = 8,
445 };
446 
447 /* Simple helpers for checking if, or what kind of, NaN we have */
448 static inline __attribute__((unused)) bool is_nan(FloatClass c)
449 {
450     return unlikely(c >= float_class_qnan);
451 }
452 
453 static inline __attribute__((unused)) bool is_snan(FloatClass c)
454 {
455     return c == float_class_snan;
456 }
457 
458 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
459 {
460     return c == float_class_qnan;
461 }
462 
463 /*
464  * Return true if the float_cmask has only normals in it
465  * (including input denormals that were canonicalized)
466  */
467 static inline bool cmask_is_only_normals(int cmask)
468 {
469     return !(cmask & ~float_cmask_anynorm);
470 }
471 
472 static inline bool is_anynorm(FloatClass c)
473 {
474     return float_cmask(c) & float_cmask_anynorm;
475 }
476 
477 /*
478  * Structure holding all of the decomposed parts of a float.
479  * The exponent is unbiased and the fraction is normalized.
480  *
481  * The fraction words are stored in big-endian word ordering,
482  * so that truncation from a larger format to a smaller format
483  * can be done simply by ignoring subsequent elements.
484  */
485 
486 typedef struct {
487     FloatClass cls;
488     bool sign;
489     int32_t exp;
490     union {
491         /* Routines that know the structure may reference the singular name. */
492         uint64_t frac;
493         /*
494          * Routines expanded with multiple structures reference "hi" and "lo"
495          * depending on the operation.  In FloatParts64, "hi" and "lo" are
496          * both the same word and aliased here.
497          */
498         uint64_t frac_hi;
499         uint64_t frac_lo;
500     };
501 } FloatParts64;
502 
503 typedef struct {
504     FloatClass cls;
505     bool sign;
506     int32_t exp;
507     uint64_t frac_hi;
508     uint64_t frac_lo;
509 } FloatParts128;
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     uint64_t frac_hi;
516     uint64_t frac_hm;  /* high-middle */
517     uint64_t frac_lm;  /* low-middle */
518     uint64_t frac_lo;
519 } FloatParts256;
520 
521 /* These apply to the most significant word of each FloatPartsN. */
522 #define DECOMPOSED_BINARY_POINT    63
523 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
524 
525 /* Structure holding all of the relevant parameters for a format.
526  *   exp_size: the size of the exponent field
527  *   exp_bias: the offset applied to the exponent field
528  *   exp_max: the maximum normalised exponent
529  *   frac_size: the size of the fraction field
530  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
531  * The following are computed based the size of fraction
532  *   round_mask: bits below lsb which must be rounded
533  * The following optional modifiers are available:
534  *   arm_althp: handle ARM Alternative Half Precision
535  *   has_explicit_bit: has an explicit integer bit; this affects whether
536  *   the float_status floatx80_behaviour handling applies
537  */
538 typedef struct {
539     int exp_size;
540     int exp_bias;
541     int exp_re_bias;
542     int exp_max;
543     int frac_size;
544     int frac_shift;
545     bool arm_althp;
546     bool has_explicit_bit;
547     uint64_t round_mask;
548 } FloatFmt;
549 
550 /* Expand fields based on the size of exponent and fraction */
551 #define FLOAT_PARAMS_(E)                                \
552     .exp_size       = E,                                \
553     .exp_bias       = ((1 << E) - 1) >> 1,              \
554     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
555     .exp_max        = (1 << E) - 1
556 
557 #define FLOAT_PARAMS(E, F)                              \
558     FLOAT_PARAMS_(E),                                   \
559     .frac_size      = F,                                \
560     .frac_shift     = (-F - 1) & 63,                    \
561     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
562 
563 static const FloatFmt float16_params = {
564     FLOAT_PARAMS(5, 10)
565 };
566 
567 static const FloatFmt float16_params_ahp = {
568     FLOAT_PARAMS(5, 10),
569     .arm_althp = true
570 };
571 
572 static const FloatFmt bfloat16_params = {
573     FLOAT_PARAMS(8, 7)
574 };
575 
576 static const FloatFmt float32_params = {
577     FLOAT_PARAMS(8, 23)
578 };
579 
580 static const FloatFmt float64_params = {
581     FLOAT_PARAMS(11, 52)
582 };
583 
584 static const FloatFmt float128_params = {
585     FLOAT_PARAMS(15, 112)
586 };
587 
588 #define FLOATX80_PARAMS(R)              \
589     FLOAT_PARAMS_(15),                  \
590     .frac_size = R == 64 ? 63 : R,      \
591     .frac_shift = 0,                    \
592     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
593 
594 static const FloatFmt floatx80_params[3] = {
595     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
596     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
597     [floatx80_precision_x] = {
598         FLOATX80_PARAMS(64),
599         .has_explicit_bit = true,
600     },
601 };
602 
603 /* Unpack a float to parts, but do not canonicalize.  */
604 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
605 {
606     const int f_size = fmt->frac_size;
607     const int e_size = fmt->exp_size;
608 
609     *r = (FloatParts64) {
610         .cls = float_class_unclassified,
611         .sign = extract64(raw, f_size + e_size, 1),
612         .exp = extract64(raw, f_size, e_size),
613         .frac = extract64(raw, 0, f_size)
614     };
615 }
616 
617 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
618 {
619     unpack_raw64(p, &float16_params, f);
620 }
621 
622 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
623 {
624     unpack_raw64(p, &bfloat16_params, f);
625 }
626 
627 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
628 {
629     unpack_raw64(p, &float32_params, f);
630 }
631 
632 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
633 {
634     unpack_raw64(p, &float64_params, f);
635 }
636 
637 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
638 {
639     *p = (FloatParts128) {
640         .cls = float_class_unclassified,
641         .sign = extract32(f.high, 15, 1),
642         .exp = extract32(f.high, 0, 15),
643         .frac_hi = f.low
644     };
645 }
646 
647 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     _Generic((P), FloatParts64 *: parts64_##NAME, \
718                   FloatParts128 *: parts128_##NAME)
719 
720 #define PARTS_GENERIC_64_128_256(NAME, P) \
721     _Generic((P), FloatParts64 *: parts64_##NAME, \
722                   FloatParts128 *: parts128_##NAME, \
723                   FloatParts256 *: parts256_##NAME)
724 
725 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
726 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
727 
728 static void parts64_return_nan(FloatParts64 *a, float_status *s);
729 static void parts128_return_nan(FloatParts128 *a, float_status *s);
730 
731 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
732 
733 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
734                                       float_status *s);
735 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
736                                         float_status *s);
737 
738 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
739 
740 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
741                                              FloatParts64 *c, float_status *s,
742                                              int ab_mask, int abc_mask);
743 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
744                                                FloatParts128 *b,
745                                                FloatParts128 *c,
746                                                float_status *s,
747                                                int ab_mask, int abc_mask);
748 
749 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
750     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
751 
752 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
753                                  const FloatFmt *fmt);
754 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
755                                   const FloatFmt *fmt);
756 
757 #define parts_canonicalize(A, S, F) \
758     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
759 
760 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
761                                    const FloatFmt *fmt);
762 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
763                                     const FloatFmt *fmt);
764 
765 #define parts_uncanon_normal(A, S, F) \
766     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
767 
768 static void parts64_uncanon(FloatParts64 *p, float_status *status,
769                             const FloatFmt *fmt);
770 static void parts128_uncanon(FloatParts128 *p, float_status *status,
771                              const FloatFmt *fmt);
772 
773 #define parts_uncanon(A, S, F) \
774     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
775 
776 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
777 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
778 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
779 
780 #define parts_add_normal(A, B) \
781     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
782 
783 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
784 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
785 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
786 
787 #define parts_sub_normal(A, B) \
788     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
789 
790 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
791                                     float_status *s, bool subtract);
792 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
793                                       float_status *s, bool subtract);
794 
795 #define parts_addsub(A, B, S, Z) \
796     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
797 
798 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
799                                  float_status *s);
800 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
801                                    float_status *s);
802 
803 #define parts_mul(A, B, S) \
804     PARTS_GENERIC_64_128(mul, A)(A, B, S)
805 
806 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
807                                            FloatParts64 *c, int scale,
808                                            int flags, float_status *s);
809 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
810                                              FloatParts128 *c, int scale,
811                                              int flags, float_status *s);
812 
813 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
814     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
815 
816 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
817                                  float_status *s);
818 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
819                                    float_status *s);
820 
821 #define parts_div(A, B, S) \
822     PARTS_GENERIC_64_128(div, A)(A, B, S)
823 
824 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
825                                     uint64_t *mod_quot, float_status *s);
826 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
827                                       uint64_t *mod_quot, float_status *s);
828 
829 #define parts_modrem(A, B, Q, S) \
830     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
831 
832 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
833 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
834 
835 #define parts_sqrt(A, S, F) \
836     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
837 
838 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
839                                         int scale, int frac_size);
840 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
841                                          int scale, int frac_size);
842 
843 #define parts_round_to_int_normal(A, R, C, F) \
844     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
845 
846 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
847                                  int scale, float_status *s,
848                                  const FloatFmt *fmt);
849 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
850                                   int scale, float_status *s,
851                                   const FloatFmt *fmt);
852 
853 #define parts_round_to_int(A, R, C, S, F) \
854     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
855 
856 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
857                                      int scale, int64_t min, int64_t max,
858                                      float_status *s);
859 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
860                                      int scale, int64_t min, int64_t max,
861                                      float_status *s);
862 
863 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
864     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
865 
866 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
867                                       int scale, uint64_t max,
868                                       float_status *s);
869 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
870                                        int scale, uint64_t max,
871                                        float_status *s);
872 
873 #define parts_float_to_uint(P, R, Z, M, S) \
874     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
875 
876 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
877                                             FloatRoundMode rmode,
878                                             int bitsm1, float_status *s);
879 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
880                                              FloatRoundMode rmode,
881                                              int bitsm1, float_status *s);
882 
883 #define parts_float_to_sint_modulo(P, R, M, S) \
884     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
885 
886 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
887                                   int scale, float_status *s);
888 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
889                                    int scale, float_status *s);
890 
891 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
892     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
893 
894 #define parts_sint_to_float(P, I, Z, S) \
895     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
896 
897 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
898                                   int scale, float_status *s);
899 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
900                                    int scale, float_status *s);
901 
902 #define parts_uint_to_float(P, I, Z, S) \
903     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
904 
905 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
906                                     float_status *s, int flags);
907 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
908                                       float_status *s, int flags);
909 
910 #define parts_minmax(A, B, S, F) \
911     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
912 
913 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
914                                      float_status *s, bool q);
915 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
916                                       float_status *s, bool q);
917 
918 #define parts_compare(A, B, S, Q) \
919     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
920 
921 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
922 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
923 
924 #define parts_scalbn(A, N, S) \
925     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
926 
927 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
928 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
929 
930 #define parts_log2(A, S, F) \
931     PARTS_GENERIC_64_128(log2, A)(A, S, F)
932 
933 /*
934  * Helper functions for softfloat-parts.c.inc, per-size operations.
935  */
936 
937 #define FRAC_GENERIC_64_128(NAME, P) \
938     _Generic((P), FloatParts64 *: frac64_##NAME, \
939                   FloatParts128 *: frac128_##NAME)
940 
941 #define FRAC_GENERIC_64_128_256(NAME, P) \
942     _Generic((P), FloatParts64 *: frac64_##NAME, \
943                   FloatParts128 *: frac128_##NAME, \
944                   FloatParts256 *: frac256_##NAME)
945 
946 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
947 {
948     return uadd64_overflow(a->frac, b->frac, &r->frac);
949 }
950 
951 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
952 {
953     bool c = 0;
954     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
955     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
956     return c;
957 }
958 
959 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
960 {
961     bool c = 0;
962     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
963     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
964     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
965     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
966     return c;
967 }
968 
969 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
970 
971 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
972 {
973     return uadd64_overflow(a->frac, c, &r->frac);
974 }
975 
976 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
977 {
978     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
979     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
980 }
981 
982 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
983 
984 static void frac64_allones(FloatParts64 *a)
985 {
986     a->frac = -1;
987 }
988 
989 static void frac128_allones(FloatParts128 *a)
990 {
991     a->frac_hi = a->frac_lo = -1;
992 }
993 
994 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
995 
996 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
997 {
998     return (a->frac == b->frac ? float_relation_equal
999             : a->frac < b->frac ? float_relation_less
1000             : float_relation_greater);
1001 }
1002 
1003 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1004 {
1005     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1006     if (ta == tb) {
1007         ta = a->frac_lo, tb = b->frac_lo;
1008         if (ta == tb) {
1009             return float_relation_equal;
1010         }
1011     }
1012     return ta < tb ? float_relation_less : float_relation_greater;
1013 }
1014 
1015 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1016 
1017 static void frac64_clear(FloatParts64 *a)
1018 {
1019     a->frac = 0;
1020 }
1021 
1022 static void frac128_clear(FloatParts128 *a)
1023 {
1024     a->frac_hi = a->frac_lo = 0;
1025 }
1026 
1027 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1028 
1029 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1030 {
1031     uint64_t n1, n0, r, q;
1032     bool ret;
1033 
1034     /*
1035      * We want a 2*N / N-bit division to produce exactly an N-bit
1036      * result, so that we do not lose any precision and so that we
1037      * do not have to renormalize afterward.  If A.frac < B.frac,
1038      * then division would produce an (N-1)-bit result; shift A left
1039      * by one to produce the an N-bit result, and return true to
1040      * decrement the exponent to match.
1041      *
1042      * The udiv_qrnnd algorithm that we're using requires normalization,
1043      * i.e. the msb of the denominator must be set, which is already true.
1044      */
1045     ret = a->frac < b->frac;
1046     if (ret) {
1047         n0 = a->frac;
1048         n1 = 0;
1049     } else {
1050         n0 = a->frac >> 1;
1051         n1 = a->frac << 63;
1052     }
1053     q = udiv_qrnnd(&r, n0, n1, b->frac);
1054 
1055     /* Set lsb if there is a remainder, to set inexact. */
1056     a->frac = q | (r != 0);
1057 
1058     return ret;
1059 }
1060 
1061 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1062 {
1063     uint64_t q0, q1, a0, a1, b0, b1;
1064     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1065     bool ret = false;
1066 
1067     a0 = a->frac_hi, a1 = a->frac_lo;
1068     b0 = b->frac_hi, b1 = b->frac_lo;
1069 
1070     ret = lt128(a0, a1, b0, b1);
1071     if (!ret) {
1072         a1 = shr_double(a0, a1, 1);
1073         a0 = a0 >> 1;
1074     }
1075 
1076     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1077     q0 = estimateDiv128To64(a0, a1, b0);
1078 
1079     /*
1080      * Estimate is high because B1 was not included (unless B1 == 0).
1081      * Reduce quotient and increase remainder until remainder is non-negative.
1082      * This loop will execute 0 to 2 times.
1083      */
1084     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1085     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1086     while (r0 != 0) {
1087         q0--;
1088         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1089     }
1090 
1091     /* Repeat using the remainder, producing a second word of quotient. */
1092     q1 = estimateDiv128To64(r1, r2, b0);
1093     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1094     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1095     while (r1 != 0) {
1096         q1--;
1097         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1098     }
1099 
1100     /* Any remainder indicates inexact; set sticky bit. */
1101     q1 |= (r2 | r3) != 0;
1102 
1103     a->frac_hi = q0;
1104     a->frac_lo = q1;
1105     return ret;
1106 }
1107 
1108 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1109 
1110 static bool frac64_eqz(FloatParts64 *a)
1111 {
1112     return a->frac == 0;
1113 }
1114 
1115 static bool frac128_eqz(FloatParts128 *a)
1116 {
1117     return (a->frac_hi | a->frac_lo) == 0;
1118 }
1119 
1120 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1121 
1122 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1123 {
1124     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1125 }
1126 
1127 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1128 {
1129     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1130                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1131 }
1132 
1133 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1134 
1135 static void frac64_neg(FloatParts64 *a)
1136 {
1137     a->frac = -a->frac;
1138 }
1139 
1140 static void frac128_neg(FloatParts128 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1145 }
1146 
1147 static void frac256_neg(FloatParts256 *a)
1148 {
1149     bool c = 0;
1150     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1151     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1152     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1153     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1154 }
1155 
1156 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1157 
1158 static int frac64_normalize(FloatParts64 *a)
1159 {
1160     if (a->frac) {
1161         int shift = clz64(a->frac);
1162         a->frac <<= shift;
1163         return shift;
1164     }
1165     return 64;
1166 }
1167 
1168 static int frac128_normalize(FloatParts128 *a)
1169 {
1170     if (a->frac_hi) {
1171         int shl = clz64(a->frac_hi);
1172         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1173         a->frac_lo <<= shl;
1174         return shl;
1175     } else if (a->frac_lo) {
1176         int shl = clz64(a->frac_lo);
1177         a->frac_hi = a->frac_lo << shl;
1178         a->frac_lo = 0;
1179         return shl + 64;
1180     }
1181     return 128;
1182 }
1183 
1184 static int frac256_normalize(FloatParts256 *a)
1185 {
1186     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1187     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1188     int ret, shl;
1189 
1190     if (likely(a0)) {
1191         shl = clz64(a0);
1192         if (shl == 0) {
1193             return 0;
1194         }
1195         ret = shl;
1196     } else {
1197         if (a1) {
1198             ret = 64;
1199             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1200         } else if (a2) {
1201             ret = 128;
1202             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1203         } else if (a3) {
1204             ret = 192;
1205             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1206         } else {
1207             ret = 256;
1208             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1209             goto done;
1210         }
1211         shl = clz64(a0);
1212         if (shl == 0) {
1213             goto done;
1214         }
1215         ret += shl;
1216     }
1217 
1218     a0 = shl_double(a0, a1, shl);
1219     a1 = shl_double(a1, a2, shl);
1220     a2 = shl_double(a2, a3, shl);
1221     a3 <<= shl;
1222 
1223  done:
1224     a->frac_hi = a0;
1225     a->frac_hm = a1;
1226     a->frac_lm = a2;
1227     a->frac_lo = a3;
1228     return ret;
1229 }
1230 
1231 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1232 
1233 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1234 {
1235     uint64_t a0, a1, b0, t0, t1, q, quot;
1236     int exp_diff = a->exp - b->exp;
1237     int shift;
1238 
1239     a0 = a->frac;
1240     a1 = 0;
1241 
1242     if (exp_diff < -1) {
1243         if (mod_quot) {
1244             *mod_quot = 0;
1245         }
1246         return;
1247     }
1248     if (exp_diff == -1) {
1249         a0 >>= 1;
1250         exp_diff = 0;
1251     }
1252 
1253     b0 = b->frac;
1254     quot = q = b0 <= a0;
1255     if (q) {
1256         a0 -= b0;
1257     }
1258 
1259     exp_diff -= 64;
1260     while (exp_diff > 0) {
1261         q = estimateDiv128To64(a0, a1, b0);
1262         q = q > 2 ? q - 2 : 0;
1263         mul64To128(b0, q, &t0, &t1);
1264         sub128(a0, a1, t0, t1, &a0, &a1);
1265         shortShift128Left(a0, a1, 62, &a0, &a1);
1266         exp_diff -= 62;
1267         quot = (quot << 62) + q;
1268     }
1269 
1270     exp_diff += 64;
1271     if (exp_diff > 0) {
1272         q = estimateDiv128To64(a0, a1, b0);
1273         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1274         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1275         sub128(a0, a1, t0, t1, &a0, &a1);
1276         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1277         while (le128(t0, t1, a0, a1)) {
1278             ++q;
1279             sub128(a0, a1, t0, t1, &a0, &a1);
1280         }
1281         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1282     } else {
1283         t0 = b0;
1284         t1 = 0;
1285     }
1286 
1287     if (mod_quot) {
1288         *mod_quot = quot;
1289     } else {
1290         sub128(t0, t1, a0, a1, &t0, &t1);
1291         if (lt128(t0, t1, a0, a1) ||
1292             (eq128(t0, t1, a0, a1) && (q & 1))) {
1293             a0 = t0;
1294             a1 = t1;
1295             a->sign = !a->sign;
1296         }
1297     }
1298 
1299     if (likely(a0)) {
1300         shift = clz64(a0);
1301         shortShift128Left(a0, a1, shift, &a0, &a1);
1302     } else if (likely(a1)) {
1303         shift = clz64(a1);
1304         a0 = a1 << shift;
1305         a1 = 0;
1306         shift += 64;
1307     } else {
1308         a->cls = float_class_zero;
1309         return;
1310     }
1311 
1312     a->exp = b->exp + exp_diff - shift;
1313     a->frac = a0 | (a1 != 0);
1314 }
1315 
1316 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1317                            uint64_t *mod_quot)
1318 {
1319     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1320     int exp_diff = a->exp - b->exp;
1321     int shift;
1322 
1323     a0 = a->frac_hi;
1324     a1 = a->frac_lo;
1325     a2 = 0;
1326 
1327     if (exp_diff < -1) {
1328         if (mod_quot) {
1329             *mod_quot = 0;
1330         }
1331         return;
1332     }
1333     if (exp_diff == -1) {
1334         shift128Right(a0, a1, 1, &a0, &a1);
1335         exp_diff = 0;
1336     }
1337 
1338     b0 = b->frac_hi;
1339     b1 = b->frac_lo;
1340 
1341     quot = q = le128(b0, b1, a0, a1);
1342     if (q) {
1343         sub128(a0, a1, b0, b1, &a0, &a1);
1344     }
1345 
1346     exp_diff -= 64;
1347     while (exp_diff > 0) {
1348         q = estimateDiv128To64(a0, a1, b0);
1349         q = q > 4 ? q - 4 : 0;
1350         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1351         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1352         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1353         exp_diff -= 61;
1354         quot = (quot << 61) + q;
1355     }
1356 
1357     exp_diff += 64;
1358     if (exp_diff > 0) {
1359         q = estimateDiv128To64(a0, a1, b0);
1360         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1361         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1362         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1363         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1364         while (le192(t0, t1, t2, a0, a1, a2)) {
1365             ++q;
1366             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1367         }
1368         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1369     } else {
1370         t0 = b0;
1371         t1 = b1;
1372         t2 = 0;
1373     }
1374 
1375     if (mod_quot) {
1376         *mod_quot = quot;
1377     } else {
1378         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1379         if (lt192(t0, t1, t2, a0, a1, a2) ||
1380             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1381             a0 = t0;
1382             a1 = t1;
1383             a2 = t2;
1384             a->sign = !a->sign;
1385         }
1386     }
1387 
1388     if (likely(a0)) {
1389         shift = clz64(a0);
1390         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1391     } else if (likely(a1)) {
1392         shift = clz64(a1);
1393         shortShift128Left(a1, a2, shift, &a0, &a1);
1394         a2 = 0;
1395         shift += 64;
1396     } else if (likely(a2)) {
1397         shift = clz64(a2);
1398         a0 = a2 << shift;
1399         a1 = a2 = 0;
1400         shift += 128;
1401     } else {
1402         a->cls = float_class_zero;
1403         return;
1404     }
1405 
1406     a->exp = b->exp + exp_diff - shift;
1407     a->frac_hi = a0;
1408     a->frac_lo = a1 | (a2 != 0);
1409 }
1410 
1411 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1412 
1413 static void frac64_shl(FloatParts64 *a, int c)
1414 {
1415     a->frac <<= c;
1416 }
1417 
1418 static void frac128_shl(FloatParts128 *a, int c)
1419 {
1420     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1421 
1422     if (c & 64) {
1423         a0 = a1, a1 = 0;
1424     }
1425 
1426     c &= 63;
1427     if (c) {
1428         a0 = shl_double(a0, a1, c);
1429         a1 = a1 << c;
1430     }
1431 
1432     a->frac_hi = a0;
1433     a->frac_lo = a1;
1434 }
1435 
1436 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1437 
1438 static void frac64_shr(FloatParts64 *a, int c)
1439 {
1440     a->frac >>= c;
1441 }
1442 
1443 static void frac128_shr(FloatParts128 *a, int c)
1444 {
1445     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446 
1447     if (c & 64) {
1448         a1 = a0, a0 = 0;
1449     }
1450 
1451     c &= 63;
1452     if (c) {
1453         a1 = shr_double(a0, a1, c);
1454         a0 = a0 >> c;
1455     }
1456 
1457     a->frac_hi = a0;
1458     a->frac_lo = a1;
1459 }
1460 
1461 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1462 
1463 static void frac64_shrjam(FloatParts64 *a, int c)
1464 {
1465     uint64_t a0 = a->frac;
1466 
1467     if (likely(c != 0)) {
1468         if (likely(c < 64)) {
1469             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1470         } else {
1471             a0 = a0 != 0;
1472         }
1473         a->frac = a0;
1474     }
1475 }
1476 
1477 static void frac128_shrjam(FloatParts128 *a, int c)
1478 {
1479     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1480     uint64_t sticky = 0;
1481 
1482     if (unlikely(c == 0)) {
1483         return;
1484     } else if (likely(c < 64)) {
1485         /* nothing */
1486     } else if (likely(c < 128)) {
1487         sticky = a1;
1488         a1 = a0;
1489         a0 = 0;
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1;
1496         a0 = a1 = 0;
1497         goto done;
1498     }
1499 
1500     sticky |= shr_double(a1, 0, c);
1501     a1 = shr_double(a0, a1, c);
1502     a0 = a0 >> c;
1503 
1504  done:
1505     a->frac_lo = a1 | (sticky != 0);
1506     a->frac_hi = a0;
1507 }
1508 
1509 static void frac256_shrjam(FloatParts256 *a, int c)
1510 {
1511     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1512     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1513     uint64_t sticky = 0;
1514 
1515     if (unlikely(c == 0)) {
1516         return;
1517     } else if (likely(c < 64)) {
1518         /* nothing */
1519     } else if (likely(c < 256)) {
1520         if (unlikely(c & 128)) {
1521             sticky |= a2 | a3;
1522             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1523         }
1524         if (unlikely(c & 64)) {
1525             sticky |= a3;
1526             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1527         }
1528         c &= 63;
1529         if (c == 0) {
1530             goto done;
1531         }
1532     } else {
1533         sticky = a0 | a1 | a2 | a3;
1534         a0 = a1 = a2 = a3 = 0;
1535         goto done;
1536     }
1537 
1538     sticky |= shr_double(a3, 0, c);
1539     a3 = shr_double(a2, a3, c);
1540     a2 = shr_double(a1, a2, c);
1541     a1 = shr_double(a0, a1, c);
1542     a0 = a0 >> c;
1543 
1544  done:
1545     a->frac_lo = a3 | (sticky != 0);
1546     a->frac_lm = a2;
1547     a->frac_hm = a1;
1548     a->frac_hi = a0;
1549 }
1550 
1551 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1552 
1553 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1554 {
1555     return usub64_overflow(a->frac, b->frac, &r->frac);
1556 }
1557 
1558 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1559 {
1560     bool c = 0;
1561     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1562     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1563     return c;
1564 }
1565 
1566 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1567 {
1568     bool c = 0;
1569     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1570     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1571     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1572     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1573     return c;
1574 }
1575 
1576 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1577 
1578 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1579 {
1580     r->frac = a->frac_hi | (a->frac_lo != 0);
1581 }
1582 
1583 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1587 }
1588 
1589 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1590 
1591 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1592 {
1593     r->frac_hi = a->frac;
1594     r->frac_lo = 0;
1595 }
1596 
1597 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1598 {
1599     r->frac_hi = a->frac_hi;
1600     r->frac_hm = a->frac_lo;
1601     r->frac_lm = 0;
1602     r->frac_lo = 0;
1603 }
1604 
1605 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1606 
1607 /*
1608  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1609  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1610  * and thus MIT licenced.
1611  */
1612 static const uint16_t rsqrt_tab[128] = {
1613     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1614     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1615     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1616     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1617     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1618     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1619     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1620     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1621     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1622     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1623     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1624     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1625     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1626     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1627     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1628     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1629 };
1630 
1631 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1632 #define FloatPartsN    glue(FloatParts,N)
1633 #define FloatPartsW    glue(FloatParts,W)
1634 
1635 #define N 64
1636 #define W 128
1637 
1638 #include "softfloat-parts-addsub.c.inc"
1639 #include "softfloat-parts.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #define N 128
1644 #define W 256
1645 
1646 #include "softfloat-parts-addsub.c.inc"
1647 #include "softfloat-parts.c.inc"
1648 
1649 #undef  N
1650 #undef  W
1651 #define N            256
1652 
1653 #include "softfloat-parts-addsub.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #undef  partsN
1658 #undef  FloatPartsN
1659 #undef  FloatPartsW
1660 
1661 /*
1662  * Pack/unpack routines with a specific FloatFmt.
1663  */
1664 
1665 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1666                                       float_status *s, const FloatFmt *params)
1667 {
1668     float16_unpack_raw(p, f);
1669     parts_canonicalize(p, s, params);
1670 }
1671 
1672 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1673                                      float_status *s)
1674 {
1675     float16a_unpack_canonical(p, f, s, &float16_params);
1676 }
1677 
1678 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1679                                       float_status *s)
1680 {
1681     bfloat16_unpack_raw(p, f);
1682     parts_canonicalize(p, s, &bfloat16_params);
1683 }
1684 
1685 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1686                                              float_status *s,
1687                                              const FloatFmt *params)
1688 {
1689     parts_uncanon(p, s, params);
1690     return float16_pack_raw(p);
1691 }
1692 
1693 static float16 float16_round_pack_canonical(FloatParts64 *p,
1694                                             float_status *s)
1695 {
1696     return float16a_round_pack_canonical(p, s, &float16_params);
1697 }
1698 
1699 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1700                                               float_status *s)
1701 {
1702     parts_uncanon(p, s, &bfloat16_params);
1703     return bfloat16_pack_raw(p);
1704 }
1705 
1706 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1707                                      float_status *s)
1708 {
1709     float32_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float32_params);
1711 }
1712 
1713 static float32 float32_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float32_params);
1717     return float32_pack_raw(p);
1718 }
1719 
1720 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1721                                      float_status *s)
1722 {
1723     float64_unpack_raw(p, f);
1724     parts_canonicalize(p, s, &float64_params);
1725 }
1726 
1727 static float64 float64_round_pack_canonical(FloatParts64 *p,
1728                                             float_status *s)
1729 {
1730     parts_uncanon(p, s, &float64_params);
1731     return float64_pack_raw(p);
1732 }
1733 
1734 static float64 float64r32_pack_raw(FloatParts64 *p)
1735 {
1736     /*
1737      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1738      * We need to adjust the fraction higher so that the least N bits are
1739      * zero, and the fraction is adjacent to the float64 implicit bit.
1740      */
1741     switch (p->cls) {
1742     case float_class_normal:
1743     case float_class_denormal:
1744         if (unlikely(p->exp == 0)) {
1745             /*
1746              * The result is denormal for float32, but can be represented
1747              * in normalized form for float64.  Adjust, per canonicalize.
1748              */
1749             int shift = frac_normalize(p);
1750             p->exp = (float32_params.frac_shift -
1751                       float32_params.exp_bias - shift + 1 +
1752                       float64_params.exp_bias);
1753             frac_shr(p, float64_params.frac_shift);
1754         } else {
1755             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1756             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1757         }
1758         break;
1759     case float_class_snan:
1760     case float_class_qnan:
1761         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1762         p->exp = float64_params.exp_max;
1763         break;
1764     case float_class_inf:
1765         p->exp = float64_params.exp_max;
1766         break;
1767     case float_class_zero:
1768         break;
1769     default:
1770         g_assert_not_reached();
1771     }
1772 
1773     return float64_pack_raw(p);
1774 }
1775 
1776 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1777                                                float_status *s)
1778 {
1779     parts_uncanon(p, s, &float32_params);
1780     return float64r32_pack_raw(p);
1781 }
1782 
1783 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1784                                       float_status *s)
1785 {
1786     float128_unpack_raw(p, f);
1787     parts_canonicalize(p, s, &float128_params);
1788 }
1789 
1790 static float128 float128_round_pack_canonical(FloatParts128 *p,
1791                                               float_status *s)
1792 {
1793     parts_uncanon(p, s, &float128_params);
1794     return float128_pack_raw(p);
1795 }
1796 
1797 /* Returns false if the encoding is invalid. */
1798 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1799                                       float_status *s)
1800 {
1801     /* Ensure rounding precision is set before beginning. */
1802     switch (s->floatx80_rounding_precision) {
1803     case floatx80_precision_x:
1804     case floatx80_precision_d:
1805     case floatx80_precision_s:
1806         break;
1807     default:
1808         g_assert_not_reached();
1809     }
1810 
1811     if (unlikely(floatx80_invalid_encoding(f, s))) {
1812         float_raise(float_flag_invalid, s);
1813         return false;
1814     }
1815 
1816     floatx80_unpack_raw(p, f);
1817 
1818     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1819         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1820     } else {
1821         /* The explicit integer bit is ignored, after invalid checks. */
1822         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1823         p->cls = (p->frac_hi == 0 ? float_class_inf
1824                   : parts_is_snan_frac(p->frac_hi, s)
1825                   ? float_class_snan : float_class_qnan);
1826     }
1827     return true;
1828 }
1829 
1830 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1831                                               float_status *s)
1832 {
1833     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1834     uint64_t frac;
1835     int exp;
1836 
1837     switch (p->cls) {
1838     case float_class_normal:
1839     case float_class_denormal:
1840         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1841             parts_uncanon_normal(p, s, fmt);
1842             frac = p->frac_hi;
1843             exp = p->exp;
1844         } else {
1845             FloatParts64 p64;
1846 
1847             p64.sign = p->sign;
1848             p64.exp = p->exp;
1849             frac_truncjam(&p64, p);
1850             parts_uncanon_normal(&p64, s, fmt);
1851             frac = p64.frac;
1852             exp = p64.exp;
1853         }
1854         if (exp != fmt->exp_max) {
1855             break;
1856         }
1857         /* rounded to inf -- fall through to set frac correctly */
1858 
1859     case float_class_inf:
1860         /* x86 and m68k differ in the setting of the integer bit. */
1861         frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
1862             0 : (1ULL << 63);
1863         exp = fmt->exp_max;
1864         break;
1865 
1866     case float_class_zero:
1867         frac = 0;
1868         exp = 0;
1869         break;
1870 
1871     case float_class_snan:
1872     case float_class_qnan:
1873         /* NaNs have the integer bit set. */
1874         frac = p->frac_hi | (1ull << 63);
1875         exp = fmt->exp_max;
1876         break;
1877 
1878     default:
1879         g_assert_not_reached();
1880     }
1881 
1882     return packFloatx80(p->sign, exp, frac);
1883 }
1884 
1885 /*
1886  * Addition and subtraction
1887  */
1888 
1889 static float16 QEMU_FLATTEN
1890 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1891 {
1892     FloatParts64 pa, pb, *pr;
1893 
1894     float16_unpack_canonical(&pa, a, status);
1895     float16_unpack_canonical(&pb, b, status);
1896     pr = parts_addsub(&pa, &pb, status, subtract);
1897 
1898     return float16_round_pack_canonical(pr, status);
1899 }
1900 
1901 float16 float16_add(float16 a, float16 b, float_status *status)
1902 {
1903     return float16_addsub(a, b, status, false);
1904 }
1905 
1906 float16 float16_sub(float16 a, float16 b, float_status *status)
1907 {
1908     return float16_addsub(a, b, status, true);
1909 }
1910 
1911 static float32 QEMU_SOFTFLOAT_ATTR
1912 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1913 {
1914     FloatParts64 pa, pb, *pr;
1915 
1916     float32_unpack_canonical(&pa, a, status);
1917     float32_unpack_canonical(&pb, b, status);
1918     pr = parts_addsub(&pa, &pb, status, subtract);
1919 
1920     return float32_round_pack_canonical(pr, status);
1921 }
1922 
1923 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1924 {
1925     return soft_f32_addsub(a, b, status, false);
1926 }
1927 
1928 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1929 {
1930     return soft_f32_addsub(a, b, status, true);
1931 }
1932 
1933 static float64 QEMU_SOFTFLOAT_ATTR
1934 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1935 {
1936     FloatParts64 pa, pb, *pr;
1937 
1938     float64_unpack_canonical(&pa, a, status);
1939     float64_unpack_canonical(&pb, b, status);
1940     pr = parts_addsub(&pa, &pb, status, subtract);
1941 
1942     return float64_round_pack_canonical(pr, status);
1943 }
1944 
1945 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1946 {
1947     return soft_f64_addsub(a, b, status, false);
1948 }
1949 
1950 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1951 {
1952     return soft_f64_addsub(a, b, status, true);
1953 }
1954 
1955 static float hard_f32_add(float a, float b)
1956 {
1957     return a + b;
1958 }
1959 
1960 static float hard_f32_sub(float a, float b)
1961 {
1962     return a - b;
1963 }
1964 
1965 static double hard_f64_add(double a, double b)
1966 {
1967     return a + b;
1968 }
1969 
1970 static double hard_f64_sub(double a, double b)
1971 {
1972     return a - b;
1973 }
1974 
1975 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1976 {
1977     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1978         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1979     }
1980     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1981 }
1982 
1983 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1984 {
1985     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1986         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1987     } else {
1988         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1989     }
1990 }
1991 
1992 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1993                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1994 {
1995     return float32_gen2(a, b, s, hard, soft,
1996                         f32_is_zon2, f32_addsubmul_post);
1997 }
1998 
1999 static float64 float64_addsub(float64 a, float64 b, float_status *s,
2000                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2001 {
2002     return float64_gen2(a, b, s, hard, soft,
2003                         f64_is_zon2, f64_addsubmul_post);
2004 }
2005 
2006 float32 QEMU_FLATTEN
2007 float32_add(float32 a, float32 b, float_status *s)
2008 {
2009     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2010 }
2011 
2012 float32 QEMU_FLATTEN
2013 float32_sub(float32 a, float32 b, float_status *s)
2014 {
2015     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2016 }
2017 
2018 float64 QEMU_FLATTEN
2019 float64_add(float64 a, float64 b, float_status *s)
2020 {
2021     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2022 }
2023 
2024 float64 QEMU_FLATTEN
2025 float64_sub(float64 a, float64 b, float_status *s)
2026 {
2027     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2028 }
2029 
2030 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2031                                  bool subtract)
2032 {
2033     FloatParts64 pa, pb, *pr;
2034 
2035     float64_unpack_canonical(&pa, a, status);
2036     float64_unpack_canonical(&pb, b, status);
2037     pr = parts_addsub(&pa, &pb, status, subtract);
2038 
2039     return float64r32_round_pack_canonical(pr, status);
2040 }
2041 
2042 float64 float64r32_add(float64 a, float64 b, float_status *status)
2043 {
2044     return float64r32_addsub(a, b, status, false);
2045 }
2046 
2047 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2048 {
2049     return float64r32_addsub(a, b, status, true);
2050 }
2051 
2052 static bfloat16 QEMU_FLATTEN
2053 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2054 {
2055     FloatParts64 pa, pb, *pr;
2056 
2057     bfloat16_unpack_canonical(&pa, a, status);
2058     bfloat16_unpack_canonical(&pb, b, status);
2059     pr = parts_addsub(&pa, &pb, status, subtract);
2060 
2061     return bfloat16_round_pack_canonical(pr, status);
2062 }
2063 
2064 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     return bfloat16_addsub(a, b, status, false);
2067 }
2068 
2069 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2070 {
2071     return bfloat16_addsub(a, b, status, true);
2072 }
2073 
2074 static float128 QEMU_FLATTEN
2075 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2076 {
2077     FloatParts128 pa, pb, *pr;
2078 
2079     float128_unpack_canonical(&pa, a, status);
2080     float128_unpack_canonical(&pb, b, status);
2081     pr = parts_addsub(&pa, &pb, status, subtract);
2082 
2083     return float128_round_pack_canonical(pr, status);
2084 }
2085 
2086 float128 float128_add(float128 a, float128 b, float_status *status)
2087 {
2088     return float128_addsub(a, b, status, false);
2089 }
2090 
2091 float128 float128_sub(float128 a, float128 b, float_status *status)
2092 {
2093     return float128_addsub(a, b, status, true);
2094 }
2095 
2096 static floatx80 QEMU_FLATTEN
2097 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2098 {
2099     FloatParts128 pa, pb, *pr;
2100 
2101     if (!floatx80_unpack_canonical(&pa, a, status) ||
2102         !floatx80_unpack_canonical(&pb, b, status)) {
2103         return floatx80_default_nan(status);
2104     }
2105 
2106     pr = parts_addsub(&pa, &pb, status, subtract);
2107     return floatx80_round_pack_canonical(pr, status);
2108 }
2109 
2110 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2111 {
2112     return floatx80_addsub(a, b, status, false);
2113 }
2114 
2115 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2116 {
2117     return floatx80_addsub(a, b, status, true);
2118 }
2119 
2120 /*
2121  * Multiplication
2122  */
2123 
2124 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2125 {
2126     FloatParts64 pa, pb, *pr;
2127 
2128     float16_unpack_canonical(&pa, a, status);
2129     float16_unpack_canonical(&pb, b, status);
2130     pr = parts_mul(&pa, &pb, status);
2131 
2132     return float16_round_pack_canonical(pr, status);
2133 }
2134 
2135 static float32 QEMU_SOFTFLOAT_ATTR
2136 soft_f32_mul(float32 a, float32 b, float_status *status)
2137 {
2138     FloatParts64 pa, pb, *pr;
2139 
2140     float32_unpack_canonical(&pa, a, status);
2141     float32_unpack_canonical(&pb, b, status);
2142     pr = parts_mul(&pa, &pb, status);
2143 
2144     return float32_round_pack_canonical(pr, status);
2145 }
2146 
2147 static float64 QEMU_SOFTFLOAT_ATTR
2148 soft_f64_mul(float64 a, float64 b, float_status *status)
2149 {
2150     FloatParts64 pa, pb, *pr;
2151 
2152     float64_unpack_canonical(&pa, a, status);
2153     float64_unpack_canonical(&pb, b, status);
2154     pr = parts_mul(&pa, &pb, status);
2155 
2156     return float64_round_pack_canonical(pr, status);
2157 }
2158 
2159 static float hard_f32_mul(float a, float b)
2160 {
2161     return a * b;
2162 }
2163 
2164 static double hard_f64_mul(double a, double b)
2165 {
2166     return a * b;
2167 }
2168 
2169 float32 QEMU_FLATTEN
2170 float32_mul(float32 a, float32 b, float_status *s)
2171 {
2172     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2173                         f32_is_zon2, f32_addsubmul_post);
2174 }
2175 
2176 float64 QEMU_FLATTEN
2177 float64_mul(float64 a, float64 b, float_status *s)
2178 {
2179     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2180                         f64_is_zon2, f64_addsubmul_post);
2181 }
2182 
2183 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2184 {
2185     FloatParts64 pa, pb, *pr;
2186 
2187     float64_unpack_canonical(&pa, a, status);
2188     float64_unpack_canonical(&pb, b, status);
2189     pr = parts_mul(&pa, &pb, status);
2190 
2191     return float64r32_round_pack_canonical(pr, status);
2192 }
2193 
2194 bfloat16 QEMU_FLATTEN
2195 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2196 {
2197     FloatParts64 pa, pb, *pr;
2198 
2199     bfloat16_unpack_canonical(&pa, a, status);
2200     bfloat16_unpack_canonical(&pb, b, status);
2201     pr = parts_mul(&pa, &pb, status);
2202 
2203     return bfloat16_round_pack_canonical(pr, status);
2204 }
2205 
2206 float128 QEMU_FLATTEN
2207 float128_mul(float128 a, float128 b, float_status *status)
2208 {
2209     FloatParts128 pa, pb, *pr;
2210 
2211     float128_unpack_canonical(&pa, a, status);
2212     float128_unpack_canonical(&pb, b, status);
2213     pr = parts_mul(&pa, &pb, status);
2214 
2215     return float128_round_pack_canonical(pr, status);
2216 }
2217 
2218 floatx80 QEMU_FLATTEN
2219 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2220 {
2221     FloatParts128 pa, pb, *pr;
2222 
2223     if (!floatx80_unpack_canonical(&pa, a, status) ||
2224         !floatx80_unpack_canonical(&pb, b, status)) {
2225         return floatx80_default_nan(status);
2226     }
2227 
2228     pr = parts_mul(&pa, &pb, status);
2229     return floatx80_round_pack_canonical(pr, status);
2230 }
2231 
2232 /*
2233  * Fused multiply-add
2234  */
2235 
2236 float16 QEMU_FLATTEN
2237 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2238                       int scale, int flags, float_status *status)
2239 {
2240     FloatParts64 pa, pb, pc, *pr;
2241 
2242     float16_unpack_canonical(&pa, a, status);
2243     float16_unpack_canonical(&pb, b, status);
2244     float16_unpack_canonical(&pc, c, status);
2245     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2246 
2247     /* Round before applying negate result. */
2248     parts_uncanon(pr, status, &float16_params);
2249     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2250         pr->sign ^= 1;
2251     }
2252     return float16_pack_raw(pr);
2253 }
2254 
2255 float16 float16_muladd(float16 a, float16 b, float16 c,
2256                        int flags, float_status *status)
2257 {
2258     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2259 }
2260 
2261 float32 QEMU_SOFTFLOAT_ATTR
2262 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2263                       int scale, int flags, float_status *status)
2264 {
2265     FloatParts64 pa, pb, pc, *pr;
2266 
2267     float32_unpack_canonical(&pa, a, status);
2268     float32_unpack_canonical(&pb, b, status);
2269     float32_unpack_canonical(&pc, c, status);
2270     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2271 
2272     /* Round before applying negate result. */
2273     parts_uncanon(pr, status, &float32_params);
2274     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2275         pr->sign ^= 1;
2276     }
2277     return float32_pack_raw(pr);
2278 }
2279 
2280 float64 QEMU_SOFTFLOAT_ATTR
2281 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2282                       int scale, int flags, float_status *status)
2283 {
2284     FloatParts64 pa, pb, pc, *pr;
2285 
2286     float64_unpack_canonical(&pa, a, status);
2287     float64_unpack_canonical(&pb, b, status);
2288     float64_unpack_canonical(&pc, c, status);
2289     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2290 
2291     /* Round before applying negate result. */
2292     parts_uncanon(pr, status, &float64_params);
2293     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2294         pr->sign ^= 1;
2295     }
2296     return float64_pack_raw(pr);
2297 }
2298 
2299 static bool force_soft_fma;
2300 
2301 float32 QEMU_FLATTEN
2302 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2303 {
2304     union_float32 ua, ub, uc, ur;
2305 
2306     ua.s = xa;
2307     ub.s = xb;
2308     uc.s = xc;
2309 
2310     if (unlikely(!can_use_fpu(s))) {
2311         goto soft;
2312     }
2313     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2314         goto soft;
2315     }
2316 
2317     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2318     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2319         goto soft;
2320     }
2321 
2322     if (unlikely(force_soft_fma)) {
2323         goto soft;
2324     }
2325 
2326     /*
2327      * When (a || b) == 0, there's no need to check for under/over flow,
2328      * since we know the addend is (normal || 0) and the product is 0.
2329      */
2330     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2331         union_float32 up;
2332         bool prod_sign;
2333 
2334         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2335         prod_sign ^= !!(flags & float_muladd_negate_product);
2336         up.s = float32_set_sign(float32_zero, prod_sign);
2337 
2338         if (flags & float_muladd_negate_c) {
2339             uc.h = -uc.h;
2340         }
2341         ur.h = up.h + uc.h;
2342     } else {
2343         union_float32 ua_orig = ua;
2344         union_float32 uc_orig = uc;
2345 
2346         if (flags & float_muladd_negate_product) {
2347             ua.h = -ua.h;
2348         }
2349         if (flags & float_muladd_negate_c) {
2350             uc.h = -uc.h;
2351         }
2352 
2353         ur.h = fmaf(ua.h, ub.h, uc.h);
2354 
2355         if (unlikely(f32_is_inf(ur))) {
2356             float_raise(float_flag_overflow, s);
2357         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2358             ua = ua_orig;
2359             uc = uc_orig;
2360             goto soft;
2361         }
2362     }
2363     if (flags & float_muladd_negate_result) {
2364         return float32_chs(ur.s);
2365     }
2366     return ur.s;
2367 
2368  soft:
2369     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2370 }
2371 
2372 float64 QEMU_FLATTEN
2373 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2374 {
2375     union_float64 ua, ub, uc, ur;
2376 
2377     ua.s = xa;
2378     ub.s = xb;
2379     uc.s = xc;
2380 
2381     if (unlikely(!can_use_fpu(s))) {
2382         goto soft;
2383     }
2384 
2385     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2386     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2387         goto soft;
2388     }
2389 
2390     if (unlikely(force_soft_fma)) {
2391         goto soft;
2392     }
2393 
2394     /*
2395      * When (a || b) == 0, there's no need to check for under/over flow,
2396      * since we know the addend is (normal || 0) and the product is 0.
2397      */
2398     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2399         union_float64 up;
2400         bool prod_sign;
2401 
2402         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2403         prod_sign ^= !!(flags & float_muladd_negate_product);
2404         up.s = float64_set_sign(float64_zero, prod_sign);
2405 
2406         if (flags & float_muladd_negate_c) {
2407             uc.h = -uc.h;
2408         }
2409         ur.h = up.h + uc.h;
2410     } else {
2411         union_float64 ua_orig = ua;
2412         union_float64 uc_orig = uc;
2413 
2414         if (flags & float_muladd_negate_product) {
2415             ua.h = -ua.h;
2416         }
2417         if (flags & float_muladd_negate_c) {
2418             uc.h = -uc.h;
2419         }
2420 
2421         ur.h = fma(ua.h, ub.h, uc.h);
2422 
2423         if (unlikely(f64_is_inf(ur))) {
2424             float_raise(float_flag_overflow, s);
2425         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2426             ua = ua_orig;
2427             uc = uc_orig;
2428             goto soft;
2429         }
2430     }
2431     if (flags & float_muladd_negate_result) {
2432         return float64_chs(ur.s);
2433     }
2434     return ur.s;
2435 
2436  soft:
2437     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2438 }
2439 
2440 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2441                           int flags, float_status *status)
2442 {
2443     FloatParts64 pa, pb, pc, *pr;
2444 
2445     float64_unpack_canonical(&pa, a, status);
2446     float64_unpack_canonical(&pb, b, status);
2447     float64_unpack_canonical(&pc, c, status);
2448     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2449 
2450     /* Round before applying negate result. */
2451     parts_uncanon(pr, status, &float32_params);
2452     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2453         pr->sign ^= 1;
2454     }
2455     return float64r32_pack_raw(pr);
2456 }
2457 
2458 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2459                                       int flags, float_status *status)
2460 {
2461     FloatParts64 pa, pb, pc, *pr;
2462 
2463     bfloat16_unpack_canonical(&pa, a, status);
2464     bfloat16_unpack_canonical(&pb, b, status);
2465     bfloat16_unpack_canonical(&pc, c, status);
2466     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2467 
2468     /* Round before applying negate result. */
2469     parts_uncanon(pr, status, &bfloat16_params);
2470     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2471         pr->sign ^= 1;
2472     }
2473     return bfloat16_pack_raw(pr);
2474 }
2475 
2476 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2477                                       int flags, float_status *status)
2478 {
2479     FloatParts128 pa, pb, pc, *pr;
2480 
2481     float128_unpack_canonical(&pa, a, status);
2482     float128_unpack_canonical(&pb, b, status);
2483     float128_unpack_canonical(&pc, c, status);
2484     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2485 
2486     /* Round before applying negate result. */
2487     parts_uncanon(pr, status, &float128_params);
2488     if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
2489         pr->sign ^= 1;
2490     }
2491     return float128_pack_raw(pr);
2492 }
2493 
2494 /*
2495  * Division
2496  */
2497 
2498 float16 float16_div(float16 a, float16 b, float_status *status)
2499 {
2500     FloatParts64 pa, pb, *pr;
2501 
2502     float16_unpack_canonical(&pa, a, status);
2503     float16_unpack_canonical(&pb, b, status);
2504     pr = parts_div(&pa, &pb, status);
2505 
2506     return float16_round_pack_canonical(pr, status);
2507 }
2508 
2509 static float32 QEMU_SOFTFLOAT_ATTR
2510 soft_f32_div(float32 a, float32 b, float_status *status)
2511 {
2512     FloatParts64 pa, pb, *pr;
2513 
2514     float32_unpack_canonical(&pa, a, status);
2515     float32_unpack_canonical(&pb, b, status);
2516     pr = parts_div(&pa, &pb, status);
2517 
2518     return float32_round_pack_canonical(pr, status);
2519 }
2520 
2521 static float64 QEMU_SOFTFLOAT_ATTR
2522 soft_f64_div(float64 a, float64 b, float_status *status)
2523 {
2524     FloatParts64 pa, pb, *pr;
2525 
2526     float64_unpack_canonical(&pa, a, status);
2527     float64_unpack_canonical(&pb, b, status);
2528     pr = parts_div(&pa, &pb, status);
2529 
2530     return float64_round_pack_canonical(pr, status);
2531 }
2532 
2533 static float hard_f32_div(float a, float b)
2534 {
2535     return a / b;
2536 }
2537 
2538 static double hard_f64_div(double a, double b)
2539 {
2540     return a / b;
2541 }
2542 
2543 static bool f32_div_pre(union_float32 a, union_float32 b)
2544 {
2545     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2546         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2547                fpclassify(b.h) == FP_NORMAL;
2548     }
2549     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2550 }
2551 
2552 static bool f64_div_pre(union_float64 a, union_float64 b)
2553 {
2554     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2555         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2556                fpclassify(b.h) == FP_NORMAL;
2557     }
2558     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2559 }
2560 
2561 static bool f32_div_post(union_float32 a, union_float32 b)
2562 {
2563     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2564         return fpclassify(a.h) != FP_ZERO;
2565     }
2566     return !float32_is_zero(a.s);
2567 }
2568 
2569 static bool f64_div_post(union_float64 a, union_float64 b)
2570 {
2571     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2572         return fpclassify(a.h) != FP_ZERO;
2573     }
2574     return !float64_is_zero(a.s);
2575 }
2576 
2577 float32 QEMU_FLATTEN
2578 float32_div(float32 a, float32 b, float_status *s)
2579 {
2580     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2581                         f32_div_pre, f32_div_post);
2582 }
2583 
2584 float64 QEMU_FLATTEN
2585 float64_div(float64 a, float64 b, float_status *s)
2586 {
2587     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2588                         f64_div_pre, f64_div_post);
2589 }
2590 
2591 float64 float64r32_div(float64 a, float64 b, float_status *status)
2592 {
2593     FloatParts64 pa, pb, *pr;
2594 
2595     float64_unpack_canonical(&pa, a, status);
2596     float64_unpack_canonical(&pb, b, status);
2597     pr = parts_div(&pa, &pb, status);
2598 
2599     return float64r32_round_pack_canonical(pr, status);
2600 }
2601 
2602 bfloat16 QEMU_FLATTEN
2603 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2604 {
2605     FloatParts64 pa, pb, *pr;
2606 
2607     bfloat16_unpack_canonical(&pa, a, status);
2608     bfloat16_unpack_canonical(&pb, b, status);
2609     pr = parts_div(&pa, &pb, status);
2610 
2611     return bfloat16_round_pack_canonical(pr, status);
2612 }
2613 
2614 float128 QEMU_FLATTEN
2615 float128_div(float128 a, float128 b, float_status *status)
2616 {
2617     FloatParts128 pa, pb, *pr;
2618 
2619     float128_unpack_canonical(&pa, a, status);
2620     float128_unpack_canonical(&pb, b, status);
2621     pr = parts_div(&pa, &pb, status);
2622 
2623     return float128_round_pack_canonical(pr, status);
2624 }
2625 
2626 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2627 {
2628     FloatParts128 pa, pb, *pr;
2629 
2630     if (!floatx80_unpack_canonical(&pa, a, status) ||
2631         !floatx80_unpack_canonical(&pb, b, status)) {
2632         return floatx80_default_nan(status);
2633     }
2634 
2635     pr = parts_div(&pa, &pb, status);
2636     return floatx80_round_pack_canonical(pr, status);
2637 }
2638 
2639 /*
2640  * Remainder
2641  */
2642 
2643 float32 float32_rem(float32 a, float32 b, float_status *status)
2644 {
2645     FloatParts64 pa, pb, *pr;
2646 
2647     float32_unpack_canonical(&pa, a, status);
2648     float32_unpack_canonical(&pb, b, status);
2649     pr = parts_modrem(&pa, &pb, NULL, status);
2650 
2651     return float32_round_pack_canonical(pr, status);
2652 }
2653 
2654 float64 float64_rem(float64 a, float64 b, float_status *status)
2655 {
2656     FloatParts64 pa, pb, *pr;
2657 
2658     float64_unpack_canonical(&pa, a, status);
2659     float64_unpack_canonical(&pb, b, status);
2660     pr = parts_modrem(&pa, &pb, NULL, status);
2661 
2662     return float64_round_pack_canonical(pr, status);
2663 }
2664 
2665 float128 float128_rem(float128 a, float128 b, float_status *status)
2666 {
2667     FloatParts128 pa, pb, *pr;
2668 
2669     float128_unpack_canonical(&pa, a, status);
2670     float128_unpack_canonical(&pb, b, status);
2671     pr = parts_modrem(&pa, &pb, NULL, status);
2672 
2673     return float128_round_pack_canonical(pr, status);
2674 }
2675 
2676 /*
2677  * Returns the remainder of the extended double-precision floating-point value
2678  * `a' with respect to the corresponding value `b'.
2679  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2680  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2681  * the remainder based on truncating the quotient toward zero instead and
2682  * *quotient is set to the low 64 bits of the absolute value of the integer
2683  * quotient.
2684  */
2685 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2686                          uint64_t *quotient, float_status *status)
2687 {
2688     FloatParts128 pa, pb, *pr;
2689 
2690     *quotient = 0;
2691     if (!floatx80_unpack_canonical(&pa, a, status) ||
2692         !floatx80_unpack_canonical(&pb, b, status)) {
2693         return floatx80_default_nan(status);
2694     }
2695     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2696 
2697     return floatx80_round_pack_canonical(pr, status);
2698 }
2699 
2700 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2701 {
2702     uint64_t quotient;
2703     return floatx80_modrem(a, b, false, &quotient, status);
2704 }
2705 
2706 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2707 {
2708     uint64_t quotient;
2709     return floatx80_modrem(a, b, true, &quotient, status);
2710 }
2711 
2712 /*
2713  * Float to Float conversions
2714  *
2715  * Returns the result of converting one float format to another. The
2716  * conversion is performed according to the IEC/IEEE Standard for
2717  * Binary Floating-Point Arithmetic.
2718  *
2719  * Usually this only needs to take care of raising invalid exceptions
2720  * and handling the conversion on NaNs.
2721  */
2722 
2723 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2724 {
2725     switch (a->cls) {
2726     case float_class_snan:
2727         float_raise(float_flag_invalid_snan, s);
2728         /* fall through */
2729     case float_class_qnan:
2730         /*
2731          * There is no NaN in the destination format.  Raise Invalid
2732          * and return a zero with the sign of the input NaN.
2733          */
2734         float_raise(float_flag_invalid, s);
2735         a->cls = float_class_zero;
2736         break;
2737 
2738     case float_class_inf:
2739         /*
2740          * There is no Inf in the destination format.  Raise Invalid
2741          * and return the maximum normal with the correct sign.
2742          */
2743         float_raise(float_flag_invalid, s);
2744         a->cls = float_class_normal;
2745         a->exp = float16_params_ahp.exp_max;
2746         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2747                                   float16_params_ahp.frac_size + 1);
2748         break;
2749 
2750     case float_class_denormal:
2751         float_raise(float_flag_input_denormal_used, s);
2752         break;
2753     case float_class_normal:
2754     case float_class_zero:
2755         break;
2756 
2757     default:
2758         g_assert_not_reached();
2759     }
2760 }
2761 
2762 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2763 {
2764     if (is_nan(a->cls)) {
2765         parts_return_nan(a, s);
2766     }
2767     if (a->cls == float_class_denormal) {
2768         float_raise(float_flag_input_denormal_used, s);
2769     }
2770 }
2771 
2772 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2773 {
2774     if (is_nan(a->cls)) {
2775         parts_return_nan(a, s);
2776     }
2777     if (a->cls == float_class_denormal) {
2778         float_raise(float_flag_input_denormal_used, s);
2779     }
2780 }
2781 
2782 #define parts_float_to_float(P, S) \
2783     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2784 
2785 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2786                                         float_status *s)
2787 {
2788     a->cls = b->cls;
2789     a->sign = b->sign;
2790     a->exp = b->exp;
2791 
2792     switch (a->cls) {
2793     case float_class_denormal:
2794         float_raise(float_flag_input_denormal_used, s);
2795         /* fall through */
2796     case float_class_normal:
2797         frac_truncjam(a, b);
2798         break;
2799     case float_class_snan:
2800     case float_class_qnan:
2801         /* Discard the low bits of the NaN. */
2802         a->frac = b->frac_hi;
2803         parts_return_nan(a, s);
2804         break;
2805     default:
2806         break;
2807     }
2808 }
2809 
2810 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2811                                        float_status *s)
2812 {
2813     a->cls = b->cls;
2814     a->sign = b->sign;
2815     a->exp = b->exp;
2816     frac_widen(a, b);
2817 
2818     if (is_nan(a->cls)) {
2819         parts_return_nan(a, s);
2820     }
2821     if (a->cls == float_class_denormal) {
2822         float_raise(float_flag_input_denormal_used, s);
2823     }
2824 }
2825 
2826 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2827 {
2828     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2829     FloatParts64 p;
2830 
2831     float16a_unpack_canonical(&p, a, s, fmt16);
2832     parts_float_to_float(&p, s);
2833     return float32_round_pack_canonical(&p, s);
2834 }
2835 
2836 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2837 {
2838     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2839     FloatParts64 p;
2840 
2841     float16a_unpack_canonical(&p, a, s, fmt16);
2842     parts_float_to_float(&p, s);
2843     return float64_round_pack_canonical(&p, s);
2844 }
2845 
2846 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2847 {
2848     FloatParts64 p;
2849     const FloatFmt *fmt;
2850 
2851     float32_unpack_canonical(&p, a, s);
2852     if (ieee) {
2853         parts_float_to_float(&p, s);
2854         fmt = &float16_params;
2855     } else {
2856         parts_float_to_ahp(&p, s);
2857         fmt = &float16_params_ahp;
2858     }
2859     return float16a_round_pack_canonical(&p, s, fmt);
2860 }
2861 
2862 static float64 QEMU_SOFTFLOAT_ATTR
2863 soft_float32_to_float64(float32 a, float_status *s)
2864 {
2865     FloatParts64 p;
2866 
2867     float32_unpack_canonical(&p, a, s);
2868     parts_float_to_float(&p, s);
2869     return float64_round_pack_canonical(&p, s);
2870 }
2871 
2872 float64 float32_to_float64(float32 a, float_status *s)
2873 {
2874     if (likely(float32_is_normal(a))) {
2875         /* Widening conversion can never produce inexact results.  */
2876         union_float32 uf;
2877         union_float64 ud;
2878         uf.s = a;
2879         ud.h = uf.h;
2880         return ud.s;
2881     } else if (float32_is_zero(a)) {
2882         return float64_set_sign(float64_zero, float32_is_neg(a));
2883     } else {
2884         return soft_float32_to_float64(a, s);
2885     }
2886 }
2887 
2888 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2889 {
2890     FloatParts64 p;
2891     const FloatFmt *fmt;
2892 
2893     float64_unpack_canonical(&p, a, s);
2894     if (ieee) {
2895         parts_float_to_float(&p, s);
2896         fmt = &float16_params;
2897     } else {
2898         parts_float_to_ahp(&p, s);
2899         fmt = &float16_params_ahp;
2900     }
2901     return float16a_round_pack_canonical(&p, s, fmt);
2902 }
2903 
2904 float32 float64_to_float32(float64 a, float_status *s)
2905 {
2906     FloatParts64 p;
2907 
2908     float64_unpack_canonical(&p, a, s);
2909     parts_float_to_float(&p, s);
2910     return float32_round_pack_canonical(&p, s);
2911 }
2912 
2913 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2914 {
2915     FloatParts64 p;
2916 
2917     bfloat16_unpack_canonical(&p, a, s);
2918     parts_float_to_float(&p, s);
2919     return float32_round_pack_canonical(&p, s);
2920 }
2921 
2922 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2923 {
2924     FloatParts64 p;
2925 
2926     bfloat16_unpack_canonical(&p, a, s);
2927     parts_float_to_float(&p, s);
2928     return float64_round_pack_canonical(&p, s);
2929 }
2930 
2931 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2932 {
2933     FloatParts64 p;
2934 
2935     float32_unpack_canonical(&p, a, s);
2936     parts_float_to_float(&p, s);
2937     return bfloat16_round_pack_canonical(&p, s);
2938 }
2939 
2940 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2941 {
2942     FloatParts64 p;
2943 
2944     float64_unpack_canonical(&p, a, s);
2945     parts_float_to_float(&p, s);
2946     return bfloat16_round_pack_canonical(&p, s);
2947 }
2948 
2949 float32 float128_to_float32(float128 a, float_status *s)
2950 {
2951     FloatParts64 p64;
2952     FloatParts128 p128;
2953 
2954     float128_unpack_canonical(&p128, a, s);
2955     parts_float_to_float_narrow(&p64, &p128, s);
2956     return float32_round_pack_canonical(&p64, s);
2957 }
2958 
2959 float64 float128_to_float64(float128 a, float_status *s)
2960 {
2961     FloatParts64 p64;
2962     FloatParts128 p128;
2963 
2964     float128_unpack_canonical(&p128, a, s);
2965     parts_float_to_float_narrow(&p64, &p128, s);
2966     return float64_round_pack_canonical(&p64, s);
2967 }
2968 
2969 float128 float32_to_float128(float32 a, float_status *s)
2970 {
2971     FloatParts64 p64;
2972     FloatParts128 p128;
2973 
2974     float32_unpack_canonical(&p64, a, s);
2975     parts_float_to_float_widen(&p128, &p64, s);
2976     return float128_round_pack_canonical(&p128, s);
2977 }
2978 
2979 float128 float64_to_float128(float64 a, float_status *s)
2980 {
2981     FloatParts64 p64;
2982     FloatParts128 p128;
2983 
2984     float64_unpack_canonical(&p64, a, s);
2985     parts_float_to_float_widen(&p128, &p64, s);
2986     return float128_round_pack_canonical(&p128, s);
2987 }
2988 
2989 float32 floatx80_to_float32(floatx80 a, float_status *s)
2990 {
2991     FloatParts64 p64;
2992     FloatParts128 p128;
2993 
2994     if (floatx80_unpack_canonical(&p128, a, s)) {
2995         parts_float_to_float_narrow(&p64, &p128, s);
2996     } else {
2997         parts_default_nan(&p64, s);
2998     }
2999     return float32_round_pack_canonical(&p64, s);
3000 }
3001 
3002 float64 floatx80_to_float64(floatx80 a, float_status *s)
3003 {
3004     FloatParts64 p64;
3005     FloatParts128 p128;
3006 
3007     if (floatx80_unpack_canonical(&p128, a, s)) {
3008         parts_float_to_float_narrow(&p64, &p128, s);
3009     } else {
3010         parts_default_nan(&p64, s);
3011     }
3012     return float64_round_pack_canonical(&p64, s);
3013 }
3014 
3015 float128 floatx80_to_float128(floatx80 a, float_status *s)
3016 {
3017     FloatParts128 p;
3018 
3019     if (floatx80_unpack_canonical(&p, a, s)) {
3020         parts_float_to_float(&p, s);
3021     } else {
3022         parts_default_nan(&p, s);
3023     }
3024     return float128_round_pack_canonical(&p, s);
3025 }
3026 
3027 floatx80 float32_to_floatx80(float32 a, float_status *s)
3028 {
3029     FloatParts64 p64;
3030     FloatParts128 p128;
3031 
3032     float32_unpack_canonical(&p64, a, s);
3033     parts_float_to_float_widen(&p128, &p64, s);
3034     return floatx80_round_pack_canonical(&p128, s);
3035 }
3036 
3037 floatx80 float64_to_floatx80(float64 a, float_status *s)
3038 {
3039     FloatParts64 p64;
3040     FloatParts128 p128;
3041 
3042     float64_unpack_canonical(&p64, a, s);
3043     parts_float_to_float_widen(&p128, &p64, s);
3044     return floatx80_round_pack_canonical(&p128, s);
3045 }
3046 
3047 floatx80 float128_to_floatx80(float128 a, float_status *s)
3048 {
3049     FloatParts128 p;
3050 
3051     float128_unpack_canonical(&p, a, s);
3052     parts_float_to_float(&p, s);
3053     return floatx80_round_pack_canonical(&p, s);
3054 }
3055 
3056 /*
3057  * Round to integral value
3058  */
3059 
3060 float16 float16_round_to_int(float16 a, float_status *s)
3061 {
3062     FloatParts64 p;
3063 
3064     float16_unpack_canonical(&p, a, s);
3065     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3066     return float16_round_pack_canonical(&p, s);
3067 }
3068 
3069 float32 float32_round_to_int(float32 a, float_status *s)
3070 {
3071     FloatParts64 p;
3072 
3073     float32_unpack_canonical(&p, a, s);
3074     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3075     return float32_round_pack_canonical(&p, s);
3076 }
3077 
3078 float64 float64_round_to_int(float64 a, float_status *s)
3079 {
3080     FloatParts64 p;
3081 
3082     float64_unpack_canonical(&p, a, s);
3083     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3084     return float64_round_pack_canonical(&p, s);
3085 }
3086 
3087 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3088 {
3089     FloatParts64 p;
3090 
3091     bfloat16_unpack_canonical(&p, a, s);
3092     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3093     return bfloat16_round_pack_canonical(&p, s);
3094 }
3095 
3096 float128 float128_round_to_int(float128 a, float_status *s)
3097 {
3098     FloatParts128 p;
3099 
3100     float128_unpack_canonical(&p, a, s);
3101     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3102     return float128_round_pack_canonical(&p, s);
3103 }
3104 
3105 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3106 {
3107     FloatParts128 p;
3108 
3109     if (!floatx80_unpack_canonical(&p, a, status)) {
3110         return floatx80_default_nan(status);
3111     }
3112 
3113     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3114                        &floatx80_params[status->floatx80_rounding_precision]);
3115     return floatx80_round_pack_canonical(&p, status);
3116 }
3117 
3118 /*
3119  * Floating-point to signed integer conversions
3120  */
3121 
3122 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3123                               float_status *s)
3124 {
3125     FloatParts64 p;
3126 
3127     float16_unpack_canonical(&p, a, s);
3128     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3129 }
3130 
3131 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3132                                 float_status *s)
3133 {
3134     FloatParts64 p;
3135 
3136     float16_unpack_canonical(&p, a, s);
3137     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3138 }
3139 
3140 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3141                                 float_status *s)
3142 {
3143     FloatParts64 p;
3144 
3145     float16_unpack_canonical(&p, a, s);
3146     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3147 }
3148 
3149 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3150                                 float_status *s)
3151 {
3152     FloatParts64 p;
3153 
3154     float16_unpack_canonical(&p, a, s);
3155     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3156 }
3157 
3158 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3159                                 float_status *s)
3160 {
3161     FloatParts64 p;
3162 
3163     float32_unpack_canonical(&p, a, s);
3164     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3165 }
3166 
3167 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3168                                 float_status *s)
3169 {
3170     FloatParts64 p;
3171 
3172     float32_unpack_canonical(&p, a, s);
3173     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3174 }
3175 
3176 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3177                                 float_status *s)
3178 {
3179     FloatParts64 p;
3180 
3181     float32_unpack_canonical(&p, a, s);
3182     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3183 }
3184 
3185 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3186                                 float_status *s)
3187 {
3188     FloatParts64 p;
3189 
3190     float64_unpack_canonical(&p, a, s);
3191     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3192 }
3193 
3194 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3195                                 float_status *s)
3196 {
3197     FloatParts64 p;
3198 
3199     float64_unpack_canonical(&p, a, s);
3200     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3201 }
3202 
3203 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3204                                 float_status *s)
3205 {
3206     FloatParts64 p;
3207 
3208     float64_unpack_canonical(&p, a, s);
3209     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3210 }
3211 
3212 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3213                                float_status *s)
3214 {
3215     FloatParts64 p;
3216 
3217     bfloat16_unpack_canonical(&p, a, s);
3218     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3219 }
3220 
3221 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3222                                  float_status *s)
3223 {
3224     FloatParts64 p;
3225 
3226     bfloat16_unpack_canonical(&p, a, s);
3227     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3228 }
3229 
3230 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3231                                  float_status *s)
3232 {
3233     FloatParts64 p;
3234 
3235     bfloat16_unpack_canonical(&p, a, s);
3236     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3237 }
3238 
3239 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3240                                  float_status *s)
3241 {
3242     FloatParts64 p;
3243 
3244     bfloat16_unpack_canonical(&p, a, s);
3245     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3246 }
3247 
3248 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3249                                         int scale, float_status *s)
3250 {
3251     FloatParts128 p;
3252 
3253     float128_unpack_canonical(&p, a, s);
3254     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3255 }
3256 
3257 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3258                                         int scale, float_status *s)
3259 {
3260     FloatParts128 p;
3261 
3262     float128_unpack_canonical(&p, a, s);
3263     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264 }
3265 
3266 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3267                                         int scale, float_status *s)
3268 {
3269     int flags = 0;
3270     Int128 r;
3271     FloatParts128 p;
3272 
3273     float128_unpack_canonical(&p, a, s);
3274 
3275     switch (p.cls) {
3276     case float_class_snan:
3277         flags |= float_flag_invalid_snan;
3278         /* fall through */
3279     case float_class_qnan:
3280         flags |= float_flag_invalid;
3281         r = UINT128_MAX;
3282         break;
3283 
3284     case float_class_inf:
3285         flags = float_flag_invalid | float_flag_invalid_cvti;
3286         r = p.sign ? INT128_MIN : INT128_MAX;
3287         break;
3288 
3289     case float_class_zero:
3290         return int128_zero();
3291 
3292     case float_class_normal:
3293     case float_class_denormal:
3294         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3295             flags = float_flag_inexact;
3296         }
3297 
3298         if (p.exp < 127) {
3299             int shift = 127 - p.exp;
3300             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3301             if (p.sign) {
3302                 r = int128_neg(r);
3303             }
3304         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3305                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3306             r = INT128_MIN;
3307         } else {
3308             flags = float_flag_invalid | float_flag_invalid_cvti;
3309             r = p.sign ? INT128_MIN : INT128_MAX;
3310         }
3311         break;
3312 
3313     default:
3314         g_assert_not_reached();
3315     }
3316 
3317     float_raise(flags, s);
3318     return r;
3319 }
3320 
3321 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3322                                         int scale, float_status *s)
3323 {
3324     FloatParts128 p;
3325 
3326     if (!floatx80_unpack_canonical(&p, a, s)) {
3327         parts_default_nan(&p, s);
3328     }
3329     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3330 }
3331 
3332 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3333                                         int scale, float_status *s)
3334 {
3335     FloatParts128 p;
3336 
3337     if (!floatx80_unpack_canonical(&p, a, s)) {
3338         parts_default_nan(&p, s);
3339     }
3340     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3341 }
3342 
3343 int8_t float16_to_int8(float16 a, float_status *s)
3344 {
3345     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3346 }
3347 
3348 int16_t float16_to_int16(float16 a, float_status *s)
3349 {
3350     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3351 }
3352 
3353 int32_t float16_to_int32(float16 a, float_status *s)
3354 {
3355     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3356 }
3357 
3358 int64_t float16_to_int64(float16 a, float_status *s)
3359 {
3360     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3361 }
3362 
3363 int16_t float32_to_int16(float32 a, float_status *s)
3364 {
3365     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3366 }
3367 
3368 int32_t float32_to_int32(float32 a, float_status *s)
3369 {
3370     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3371 }
3372 
3373 int64_t float32_to_int64(float32 a, float_status *s)
3374 {
3375     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3376 }
3377 
3378 int16_t float64_to_int16(float64 a, float_status *s)
3379 {
3380     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3381 }
3382 
3383 int32_t float64_to_int32(float64 a, float_status *s)
3384 {
3385     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3386 }
3387 
3388 int64_t float64_to_int64(float64 a, float_status *s)
3389 {
3390     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3391 }
3392 
3393 int32_t float128_to_int32(float128 a, float_status *s)
3394 {
3395     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3396 }
3397 
3398 int64_t float128_to_int64(float128 a, float_status *s)
3399 {
3400     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3401 }
3402 
3403 Int128 float128_to_int128(float128 a, float_status *s)
3404 {
3405     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3406 }
3407 
3408 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3409 {
3410     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3411 }
3412 
3413 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3414 {
3415     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3416 }
3417 
3418 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3419 {
3420     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3421 }
3422 
3423 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3424 {
3425     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3426 }
3427 
3428 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3429 {
3430     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3431 }
3432 
3433 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3434 {
3435     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3436 }
3437 
3438 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3439 {
3440     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3441 }
3442 
3443 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3444 {
3445     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3446 }
3447 
3448 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3449 {
3450     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3451 }
3452 
3453 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3454 {
3455     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3456 }
3457 
3458 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3459 {
3460     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3461 }
3462 
3463 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3464 {
3465     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3466 }
3467 
3468 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3469 {
3470     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3471 }
3472 
3473 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3474 {
3475     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3476 }
3477 
3478 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3479 {
3480     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3481 }
3482 
3483 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3484 {
3485     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3486 }
3487 
3488 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3489 {
3490     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3491 }
3492 
3493 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3494 {
3495     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3496 }
3497 
3498 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3499 {
3500     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3501 }
3502 
3503 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3504 {
3505     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3506 }
3507 
3508 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3509 {
3510     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3511 }
3512 
3513 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3514 {
3515     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3516 }
3517 
3518 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3519 {
3520     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3521 }
3522 
3523 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3524 {
3525     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3526 }
3527 
3528 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3529                                 float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float64_unpack_canonical(&p, a, s);
3534     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3535 }
3536 
3537 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3538                                 float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     float64_unpack_canonical(&p, a, s);
3543     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3544 }
3545 
3546 /*
3547  * Floating-point to unsigned integer conversions
3548  */
3549 
3550 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3551                                 float_status *s)
3552 {
3553     FloatParts64 p;
3554 
3555     float16_unpack_canonical(&p, a, s);
3556     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3557 }
3558 
3559 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3560                                   float_status *s)
3561 {
3562     FloatParts64 p;
3563 
3564     float16_unpack_canonical(&p, a, s);
3565     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3566 }
3567 
3568 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3569                                   float_status *s)
3570 {
3571     FloatParts64 p;
3572 
3573     float16_unpack_canonical(&p, a, s);
3574     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3575 }
3576 
3577 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3578                                   float_status *s)
3579 {
3580     FloatParts64 p;
3581 
3582     float16_unpack_canonical(&p, a, s);
3583     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3584 }
3585 
3586 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3587                                   float_status *s)
3588 {
3589     FloatParts64 p;
3590 
3591     float32_unpack_canonical(&p, a, s);
3592     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3593 }
3594 
3595 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3596                                   float_status *s)
3597 {
3598     FloatParts64 p;
3599 
3600     float32_unpack_canonical(&p, a, s);
3601     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3602 }
3603 
3604 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3605                                   float_status *s)
3606 {
3607     FloatParts64 p;
3608 
3609     float32_unpack_canonical(&p, a, s);
3610     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3611 }
3612 
3613 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3614                                   float_status *s)
3615 {
3616     FloatParts64 p;
3617 
3618     float64_unpack_canonical(&p, a, s);
3619     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3620 }
3621 
3622 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3623                                   float_status *s)
3624 {
3625     FloatParts64 p;
3626 
3627     float64_unpack_canonical(&p, a, s);
3628     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3629 }
3630 
3631 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3632                                   float_status *s)
3633 {
3634     FloatParts64 p;
3635 
3636     float64_unpack_canonical(&p, a, s);
3637     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3638 }
3639 
3640 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3641                                  int scale, float_status *s)
3642 {
3643     FloatParts64 p;
3644 
3645     bfloat16_unpack_canonical(&p, a, s);
3646     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3647 }
3648 
3649 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3650                                    int scale, float_status *s)
3651 {
3652     FloatParts64 p;
3653 
3654     bfloat16_unpack_canonical(&p, a, s);
3655     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3656 }
3657 
3658 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3659                                    int scale, float_status *s)
3660 {
3661     FloatParts64 p;
3662 
3663     bfloat16_unpack_canonical(&p, a, s);
3664     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3665 }
3666 
3667 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3668                                    int scale, float_status *s)
3669 {
3670     FloatParts64 p;
3671 
3672     bfloat16_unpack_canonical(&p, a, s);
3673     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3674 }
3675 
3676 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3677                                           int scale, float_status *s)
3678 {
3679     FloatParts128 p;
3680 
3681     float128_unpack_canonical(&p, a, s);
3682     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3683 }
3684 
3685 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3686                                           int scale, float_status *s)
3687 {
3688     FloatParts128 p;
3689 
3690     float128_unpack_canonical(&p, a, s);
3691     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3692 }
3693 
3694 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3695                                          int scale, float_status *s)
3696 {
3697     int flags = 0;
3698     Int128 r;
3699     FloatParts128 p;
3700 
3701     float128_unpack_canonical(&p, a, s);
3702 
3703     switch (p.cls) {
3704     case float_class_snan:
3705         flags |= float_flag_invalid_snan;
3706         /* fall through */
3707     case float_class_qnan:
3708         flags |= float_flag_invalid;
3709         r = UINT128_MAX;
3710         break;
3711 
3712     case float_class_inf:
3713         flags = float_flag_invalid | float_flag_invalid_cvti;
3714         r = p.sign ? int128_zero() : UINT128_MAX;
3715         break;
3716 
3717     case float_class_zero:
3718         return int128_zero();
3719 
3720     case float_class_normal:
3721     case float_class_denormal:
3722         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3723             flags = float_flag_inexact;
3724             if (p.cls == float_class_zero) {
3725                 r = int128_zero();
3726                 break;
3727             }
3728         }
3729 
3730         if (p.sign) {
3731             flags = float_flag_invalid | float_flag_invalid_cvti;
3732             r = int128_zero();
3733         } else if (p.exp <= 127) {
3734             int shift = 127 - p.exp;
3735             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3736         } else {
3737             flags = float_flag_invalid | float_flag_invalid_cvti;
3738             r = UINT128_MAX;
3739         }
3740         break;
3741 
3742     default:
3743         g_assert_not_reached();
3744     }
3745 
3746     float_raise(flags, s);
3747     return r;
3748 }
3749 
3750 uint8_t float16_to_uint8(float16 a, float_status *s)
3751 {
3752     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3753 }
3754 
3755 uint16_t float16_to_uint16(float16 a, float_status *s)
3756 {
3757     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3758 }
3759 
3760 uint32_t float16_to_uint32(float16 a, float_status *s)
3761 {
3762     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3763 }
3764 
3765 uint64_t float16_to_uint64(float16 a, float_status *s)
3766 {
3767     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3768 }
3769 
3770 uint16_t float32_to_uint16(float32 a, float_status *s)
3771 {
3772     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3773 }
3774 
3775 uint32_t float32_to_uint32(float32 a, float_status *s)
3776 {
3777     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3778 }
3779 
3780 uint64_t float32_to_uint64(float32 a, float_status *s)
3781 {
3782     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3783 }
3784 
3785 uint16_t float64_to_uint16(float64 a, float_status *s)
3786 {
3787     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3788 }
3789 
3790 uint32_t float64_to_uint32(float64 a, float_status *s)
3791 {
3792     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3793 }
3794 
3795 uint64_t float64_to_uint64(float64 a, float_status *s)
3796 {
3797     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3798 }
3799 
3800 uint32_t float128_to_uint32(float128 a, float_status *s)
3801 {
3802     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3803 }
3804 
3805 uint64_t float128_to_uint64(float128 a, float_status *s)
3806 {
3807     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3808 }
3809 
3810 Int128 float128_to_uint128(float128 a, float_status *s)
3811 {
3812     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3813 }
3814 
3815 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3816 {
3817     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3818 }
3819 
3820 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3821 {
3822     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3823 }
3824 
3825 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3826 {
3827     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3828 }
3829 
3830 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3831 {
3832     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3833 }
3834 
3835 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3836 {
3837     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3838 }
3839 
3840 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3841 {
3842     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3843 }
3844 
3845 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3846 {
3847     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3848 }
3849 
3850 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3851 {
3852     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3853 }
3854 
3855 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3856 {
3857     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3858 }
3859 
3860 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3861 {
3862     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3863 }
3864 
3865 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3866 {
3867     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3868 }
3869 
3870 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3871 {
3872     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3873 }
3874 
3875 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3876 {
3877     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3878 }
3879 
3880 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3881 {
3882     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3883 }
3884 
3885 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3886 {
3887     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3888 }
3889 
3890 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3891 {
3892     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3893 }
3894 
3895 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3896 {
3897     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3898 }
3899 
3900 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3901 {
3902     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3903 }
3904 
3905 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3906 {
3907     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3908 }
3909 
3910 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3911 {
3912     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3913 }
3914 
3915 /*
3916  * Signed integer to floating-point conversions
3917  */
3918 
3919 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3920 {
3921     FloatParts64 p;
3922 
3923     parts_sint_to_float(&p, a, scale, status);
3924     return float16_round_pack_canonical(&p, status);
3925 }
3926 
3927 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3928 {
3929     return int64_to_float16_scalbn(a, scale, status);
3930 }
3931 
3932 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3933 {
3934     return int64_to_float16_scalbn(a, scale, status);
3935 }
3936 
3937 float16 int64_to_float16(int64_t a, float_status *status)
3938 {
3939     return int64_to_float16_scalbn(a, 0, status);
3940 }
3941 
3942 float16 int32_to_float16(int32_t a, float_status *status)
3943 {
3944     return int64_to_float16_scalbn(a, 0, status);
3945 }
3946 
3947 float16 int16_to_float16(int16_t a, float_status *status)
3948 {
3949     return int64_to_float16_scalbn(a, 0, status);
3950 }
3951 
3952 float16 int8_to_float16(int8_t a, float_status *status)
3953 {
3954     return int64_to_float16_scalbn(a, 0, status);
3955 }
3956 
3957 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3958 {
3959     FloatParts64 p;
3960 
3961     /* Without scaling, there are no overflow concerns. */
3962     if (likely(scale == 0) && can_use_fpu(status)) {
3963         union_float32 ur;
3964         ur.h = a;
3965         return ur.s;
3966     }
3967 
3968     parts64_sint_to_float(&p, a, scale, status);
3969     return float32_round_pack_canonical(&p, status);
3970 }
3971 
3972 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3973 {
3974     return int64_to_float32_scalbn(a, scale, status);
3975 }
3976 
3977 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3978 {
3979     return int64_to_float32_scalbn(a, scale, status);
3980 }
3981 
3982 float32 int64_to_float32(int64_t a, float_status *status)
3983 {
3984     return int64_to_float32_scalbn(a, 0, status);
3985 }
3986 
3987 float32 int32_to_float32(int32_t a, float_status *status)
3988 {
3989     return int64_to_float32_scalbn(a, 0, status);
3990 }
3991 
3992 float32 int16_to_float32(int16_t a, float_status *status)
3993 {
3994     return int64_to_float32_scalbn(a, 0, status);
3995 }
3996 
3997 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3998 {
3999     FloatParts64 p;
4000 
4001     /* Without scaling, there are no overflow concerns. */
4002     if (likely(scale == 0) && can_use_fpu(status)) {
4003         union_float64 ur;
4004         ur.h = a;
4005         return ur.s;
4006     }
4007 
4008     parts_sint_to_float(&p, a, scale, status);
4009     return float64_round_pack_canonical(&p, status);
4010 }
4011 
4012 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
4013 {
4014     return int64_to_float64_scalbn(a, scale, status);
4015 }
4016 
4017 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
4018 {
4019     return int64_to_float64_scalbn(a, scale, status);
4020 }
4021 
4022 float64 int64_to_float64(int64_t a, float_status *status)
4023 {
4024     return int64_to_float64_scalbn(a, 0, status);
4025 }
4026 
4027 float64 int32_to_float64(int32_t a, float_status *status)
4028 {
4029     return int64_to_float64_scalbn(a, 0, status);
4030 }
4031 
4032 float64 int16_to_float64(int16_t a, float_status *status)
4033 {
4034     return int64_to_float64_scalbn(a, 0, status);
4035 }
4036 
4037 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4038 {
4039     FloatParts64 p;
4040 
4041     parts_sint_to_float(&p, a, scale, status);
4042     return bfloat16_round_pack_canonical(&p, status);
4043 }
4044 
4045 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4046 {
4047     return int64_to_bfloat16_scalbn(a, scale, status);
4048 }
4049 
4050 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4051 {
4052     return int64_to_bfloat16_scalbn(a, scale, status);
4053 }
4054 
4055 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4056 {
4057     return int64_to_bfloat16_scalbn(a, scale, status);
4058 }
4059 
4060 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4061 {
4062     return int64_to_bfloat16_scalbn(a, 0, status);
4063 }
4064 
4065 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4066 {
4067     return int64_to_bfloat16_scalbn(a, 0, status);
4068 }
4069 
4070 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4071 {
4072     return int64_to_bfloat16_scalbn(a, 0, status);
4073 }
4074 
4075 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4076 {
4077     return int64_to_bfloat16_scalbn(a, 0, status);
4078 }
4079 
4080 float128 int128_to_float128(Int128 a, float_status *status)
4081 {
4082     FloatParts128 p = { };
4083     int shift;
4084 
4085     if (int128_nz(a)) {
4086         p.cls = float_class_normal;
4087         if (!int128_nonneg(a)) {
4088             p.sign = true;
4089             a = int128_neg(a);
4090         }
4091 
4092         shift = clz64(int128_gethi(a));
4093         if (shift == 64) {
4094             shift += clz64(int128_getlo(a));
4095         }
4096 
4097         p.exp = 127 - shift;
4098         a = int128_lshift(a, shift);
4099 
4100         p.frac_hi = int128_gethi(a);
4101         p.frac_lo = int128_getlo(a);
4102     } else {
4103         p.cls = float_class_zero;
4104     }
4105 
4106     return float128_round_pack_canonical(&p, status);
4107 }
4108 
4109 float128 int64_to_float128(int64_t a, float_status *status)
4110 {
4111     FloatParts128 p;
4112 
4113     parts_sint_to_float(&p, a, 0, status);
4114     return float128_round_pack_canonical(&p, status);
4115 }
4116 
4117 float128 int32_to_float128(int32_t a, float_status *status)
4118 {
4119     return int64_to_float128(a, status);
4120 }
4121 
4122 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4123 {
4124     FloatParts128 p;
4125 
4126     parts_sint_to_float(&p, a, 0, status);
4127     return floatx80_round_pack_canonical(&p, status);
4128 }
4129 
4130 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4131 {
4132     return int64_to_floatx80(a, status);
4133 }
4134 
4135 /*
4136  * Unsigned Integer to floating-point conversions
4137  */
4138 
4139 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4140 {
4141     FloatParts64 p;
4142 
4143     parts_uint_to_float(&p, a, scale, status);
4144     return float16_round_pack_canonical(&p, status);
4145 }
4146 
4147 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4148 {
4149     return uint64_to_float16_scalbn(a, scale, status);
4150 }
4151 
4152 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4153 {
4154     return uint64_to_float16_scalbn(a, scale, status);
4155 }
4156 
4157 float16 uint64_to_float16(uint64_t a, float_status *status)
4158 {
4159     return uint64_to_float16_scalbn(a, 0, status);
4160 }
4161 
4162 float16 uint32_to_float16(uint32_t a, float_status *status)
4163 {
4164     return uint64_to_float16_scalbn(a, 0, status);
4165 }
4166 
4167 float16 uint16_to_float16(uint16_t a, float_status *status)
4168 {
4169     return uint64_to_float16_scalbn(a, 0, status);
4170 }
4171 
4172 float16 uint8_to_float16(uint8_t a, float_status *status)
4173 {
4174     return uint64_to_float16_scalbn(a, 0, status);
4175 }
4176 
4177 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4178 {
4179     FloatParts64 p;
4180 
4181     /* Without scaling, there are no overflow concerns. */
4182     if (likely(scale == 0) && can_use_fpu(status)) {
4183         union_float32 ur;
4184         ur.h = a;
4185         return ur.s;
4186     }
4187 
4188     parts_uint_to_float(&p, a, scale, status);
4189     return float32_round_pack_canonical(&p, status);
4190 }
4191 
4192 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4193 {
4194     return uint64_to_float32_scalbn(a, scale, status);
4195 }
4196 
4197 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4198 {
4199     return uint64_to_float32_scalbn(a, scale, status);
4200 }
4201 
4202 float32 uint64_to_float32(uint64_t a, float_status *status)
4203 {
4204     return uint64_to_float32_scalbn(a, 0, status);
4205 }
4206 
4207 float32 uint32_to_float32(uint32_t a, float_status *status)
4208 {
4209     return uint64_to_float32_scalbn(a, 0, status);
4210 }
4211 
4212 float32 uint16_to_float32(uint16_t a, float_status *status)
4213 {
4214     return uint64_to_float32_scalbn(a, 0, status);
4215 }
4216 
4217 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4218 {
4219     FloatParts64 p;
4220 
4221     /* Without scaling, there are no overflow concerns. */
4222     if (likely(scale == 0) && can_use_fpu(status)) {
4223         union_float64 ur;
4224         ur.h = a;
4225         return ur.s;
4226     }
4227 
4228     parts_uint_to_float(&p, a, scale, status);
4229     return float64_round_pack_canonical(&p, status);
4230 }
4231 
4232 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4233 {
4234     return uint64_to_float64_scalbn(a, scale, status);
4235 }
4236 
4237 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4238 {
4239     return uint64_to_float64_scalbn(a, scale, status);
4240 }
4241 
4242 float64 uint64_to_float64(uint64_t a, float_status *status)
4243 {
4244     return uint64_to_float64_scalbn(a, 0, status);
4245 }
4246 
4247 float64 uint32_to_float64(uint32_t a, float_status *status)
4248 {
4249     return uint64_to_float64_scalbn(a, 0, status);
4250 }
4251 
4252 float64 uint16_to_float64(uint16_t a, float_status *status)
4253 {
4254     return uint64_to_float64_scalbn(a, 0, status);
4255 }
4256 
4257 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4258 {
4259     FloatParts64 p;
4260 
4261     parts_uint_to_float(&p, a, scale, status);
4262     return bfloat16_round_pack_canonical(&p, status);
4263 }
4264 
4265 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4266 {
4267     return uint64_to_bfloat16_scalbn(a, scale, status);
4268 }
4269 
4270 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4271 {
4272     return uint64_to_bfloat16_scalbn(a, scale, status);
4273 }
4274 
4275 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4276 {
4277     return uint64_to_bfloat16_scalbn(a, scale, status);
4278 }
4279 
4280 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4281 {
4282     return uint64_to_bfloat16_scalbn(a, 0, status);
4283 }
4284 
4285 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4286 {
4287     return uint64_to_bfloat16_scalbn(a, 0, status);
4288 }
4289 
4290 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4291 {
4292     return uint64_to_bfloat16_scalbn(a, 0, status);
4293 }
4294 
4295 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4296 {
4297     return uint64_to_bfloat16_scalbn(a, 0, status);
4298 }
4299 
4300 float128 uint64_to_float128(uint64_t a, float_status *status)
4301 {
4302     FloatParts128 p;
4303 
4304     parts_uint_to_float(&p, a, 0, status);
4305     return float128_round_pack_canonical(&p, status);
4306 }
4307 
4308 float128 uint128_to_float128(Int128 a, float_status *status)
4309 {
4310     FloatParts128 p = { };
4311     int shift;
4312 
4313     if (int128_nz(a)) {
4314         p.cls = float_class_normal;
4315 
4316         shift = clz64(int128_gethi(a));
4317         if (shift == 64) {
4318             shift += clz64(int128_getlo(a));
4319         }
4320 
4321         p.exp = 127 - shift;
4322         a = int128_lshift(a, shift);
4323 
4324         p.frac_hi = int128_gethi(a);
4325         p.frac_lo = int128_getlo(a);
4326     } else {
4327         p.cls = float_class_zero;
4328     }
4329 
4330     return float128_round_pack_canonical(&p, status);
4331 }
4332 
4333 /*
4334  * Minimum and maximum
4335  */
4336 
4337 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4338 {
4339     FloatParts64 pa, pb, *pr;
4340 
4341     float16_unpack_canonical(&pa, a, s);
4342     float16_unpack_canonical(&pb, b, s);
4343     pr = parts_minmax(&pa, &pb, s, flags);
4344 
4345     return float16_round_pack_canonical(pr, s);
4346 }
4347 
4348 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4349                                 float_status *s, int flags)
4350 {
4351     FloatParts64 pa, pb, *pr;
4352 
4353     bfloat16_unpack_canonical(&pa, a, s);
4354     bfloat16_unpack_canonical(&pb, b, s);
4355     pr = parts_minmax(&pa, &pb, s, flags);
4356 
4357     return bfloat16_round_pack_canonical(pr, s);
4358 }
4359 
4360 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4361 {
4362     FloatParts64 pa, pb, *pr;
4363 
4364     float32_unpack_canonical(&pa, a, s);
4365     float32_unpack_canonical(&pb, b, s);
4366     pr = parts_minmax(&pa, &pb, s, flags);
4367 
4368     return float32_round_pack_canonical(pr, s);
4369 }
4370 
4371 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4372 {
4373     FloatParts64 pa, pb, *pr;
4374 
4375     float64_unpack_canonical(&pa, a, s);
4376     float64_unpack_canonical(&pb, b, s);
4377     pr = parts_minmax(&pa, &pb, s, flags);
4378 
4379     return float64_round_pack_canonical(pr, s);
4380 }
4381 
4382 static float128 float128_minmax(float128 a, float128 b,
4383                                 float_status *s, int flags)
4384 {
4385     FloatParts128 pa, pb, *pr;
4386 
4387     float128_unpack_canonical(&pa, a, s);
4388     float128_unpack_canonical(&pb, b, s);
4389     pr = parts_minmax(&pa, &pb, s, flags);
4390 
4391     return float128_round_pack_canonical(pr, s);
4392 }
4393 
4394 #define MINMAX_1(type, name, flags) \
4395     type type##_##name(type a, type b, float_status *s) \
4396     { return type##_minmax(a, b, s, flags); }
4397 
4398 #define MINMAX_2(type) \
4399     MINMAX_1(type, max, 0)                                                \
4400     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4401     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4402     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4403     MINMAX_1(type, min, minmax_ismin)                                     \
4404     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4405     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4406     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4407 
4408 MINMAX_2(float16)
4409 MINMAX_2(bfloat16)
4410 MINMAX_2(float32)
4411 MINMAX_2(float64)
4412 MINMAX_2(float128)
4413 
4414 #undef MINMAX_1
4415 #undef MINMAX_2
4416 
4417 /*
4418  * Floating point compare
4419  */
4420 
4421 static FloatRelation QEMU_FLATTEN
4422 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4423 {
4424     FloatParts64 pa, pb;
4425 
4426     float16_unpack_canonical(&pa, a, s);
4427     float16_unpack_canonical(&pb, b, s);
4428     return parts_compare(&pa, &pb, s, is_quiet);
4429 }
4430 
4431 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4432 {
4433     return float16_do_compare(a, b, s, false);
4434 }
4435 
4436 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4437 {
4438     return float16_do_compare(a, b, s, true);
4439 }
4440 
4441 static FloatRelation QEMU_SOFTFLOAT_ATTR
4442 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4443 {
4444     FloatParts64 pa, pb;
4445 
4446     float32_unpack_canonical(&pa, a, s);
4447     float32_unpack_canonical(&pb, b, s);
4448     return parts_compare(&pa, &pb, s, is_quiet);
4449 }
4450 
4451 static FloatRelation QEMU_FLATTEN
4452 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4453 {
4454     union_float32 ua, ub;
4455 
4456     ua.s = xa;
4457     ub.s = xb;
4458 
4459     if (QEMU_NO_HARDFLOAT) {
4460         goto soft;
4461     }
4462 
4463     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4464         /* We may need to set the input_denormal_used flag */
4465         goto soft;
4466     }
4467 
4468     if (isgreaterequal(ua.h, ub.h)) {
4469         if (isgreater(ua.h, ub.h)) {
4470             return float_relation_greater;
4471         }
4472         return float_relation_equal;
4473     }
4474     if (likely(isless(ua.h, ub.h))) {
4475         return float_relation_less;
4476     }
4477     /*
4478      * The only condition remaining is unordered.
4479      * Fall through to set flags.
4480      */
4481  soft:
4482     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4483 }
4484 
4485 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4486 {
4487     return float32_hs_compare(a, b, s, false);
4488 }
4489 
4490 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4491 {
4492     return float32_hs_compare(a, b, s, true);
4493 }
4494 
4495 static FloatRelation QEMU_SOFTFLOAT_ATTR
4496 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4497 {
4498     FloatParts64 pa, pb;
4499 
4500     float64_unpack_canonical(&pa, a, s);
4501     float64_unpack_canonical(&pb, b, s);
4502     return parts_compare(&pa, &pb, s, is_quiet);
4503 }
4504 
4505 static FloatRelation QEMU_FLATTEN
4506 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4507 {
4508     union_float64 ua, ub;
4509 
4510     ua.s = xa;
4511     ub.s = xb;
4512 
4513     if (QEMU_NO_HARDFLOAT) {
4514         goto soft;
4515     }
4516 
4517     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4518         /* We may need to set the input_denormal_used flag */
4519         goto soft;
4520     }
4521 
4522     if (isgreaterequal(ua.h, ub.h)) {
4523         if (isgreater(ua.h, ub.h)) {
4524             return float_relation_greater;
4525         }
4526         return float_relation_equal;
4527     }
4528     if (likely(isless(ua.h, ub.h))) {
4529         return float_relation_less;
4530     }
4531     /*
4532      * The only condition remaining is unordered.
4533      * Fall through to set flags.
4534      */
4535  soft:
4536     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4537 }
4538 
4539 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4540 {
4541     return float64_hs_compare(a, b, s, false);
4542 }
4543 
4544 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4545 {
4546     return float64_hs_compare(a, b, s, true);
4547 }
4548 
4549 static FloatRelation QEMU_FLATTEN
4550 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4551 {
4552     FloatParts64 pa, pb;
4553 
4554     bfloat16_unpack_canonical(&pa, a, s);
4555     bfloat16_unpack_canonical(&pb, b, s);
4556     return parts_compare(&pa, &pb, s, is_quiet);
4557 }
4558 
4559 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4560 {
4561     return bfloat16_do_compare(a, b, s, false);
4562 }
4563 
4564 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4565 {
4566     return bfloat16_do_compare(a, b, s, true);
4567 }
4568 
4569 static FloatRelation QEMU_FLATTEN
4570 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4571 {
4572     FloatParts128 pa, pb;
4573 
4574     float128_unpack_canonical(&pa, a, s);
4575     float128_unpack_canonical(&pb, b, s);
4576     return parts_compare(&pa, &pb, s, is_quiet);
4577 }
4578 
4579 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4580 {
4581     return float128_do_compare(a, b, s, false);
4582 }
4583 
4584 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4585 {
4586     return float128_do_compare(a, b, s, true);
4587 }
4588 
4589 static FloatRelation QEMU_FLATTEN
4590 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4591 {
4592     FloatParts128 pa, pb;
4593 
4594     if (!floatx80_unpack_canonical(&pa, a, s) ||
4595         !floatx80_unpack_canonical(&pb, b, s)) {
4596         return float_relation_unordered;
4597     }
4598     return parts_compare(&pa, &pb, s, is_quiet);
4599 }
4600 
4601 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4602 {
4603     return floatx80_do_compare(a, b, s, false);
4604 }
4605 
4606 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4607 {
4608     return floatx80_do_compare(a, b, s, true);
4609 }
4610 
4611 /*
4612  * Scale by 2**N
4613  */
4614 
4615 float16 float16_scalbn(float16 a, int n, float_status *status)
4616 {
4617     FloatParts64 p;
4618 
4619     float16_unpack_canonical(&p, a, status);
4620     parts_scalbn(&p, n, status);
4621     return float16_round_pack_canonical(&p, status);
4622 }
4623 
4624 float32 float32_scalbn(float32 a, int n, float_status *status)
4625 {
4626     FloatParts64 p;
4627 
4628     float32_unpack_canonical(&p, a, status);
4629     parts_scalbn(&p, n, status);
4630     return float32_round_pack_canonical(&p, status);
4631 }
4632 
4633 float64 float64_scalbn(float64 a, int n, float_status *status)
4634 {
4635     FloatParts64 p;
4636 
4637     float64_unpack_canonical(&p, a, status);
4638     parts_scalbn(&p, n, status);
4639     return float64_round_pack_canonical(&p, status);
4640 }
4641 
4642 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4643 {
4644     FloatParts64 p;
4645 
4646     bfloat16_unpack_canonical(&p, a, status);
4647     parts_scalbn(&p, n, status);
4648     return bfloat16_round_pack_canonical(&p, status);
4649 }
4650 
4651 float128 float128_scalbn(float128 a, int n, float_status *status)
4652 {
4653     FloatParts128 p;
4654 
4655     float128_unpack_canonical(&p, a, status);
4656     parts_scalbn(&p, n, status);
4657     return float128_round_pack_canonical(&p, status);
4658 }
4659 
4660 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4661 {
4662     FloatParts128 p;
4663 
4664     if (!floatx80_unpack_canonical(&p, a, status)) {
4665         return floatx80_default_nan(status);
4666     }
4667     parts_scalbn(&p, n, status);
4668     return floatx80_round_pack_canonical(&p, status);
4669 }
4670 
4671 /*
4672  * Square Root
4673  */
4674 
4675 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4676 {
4677     FloatParts64 p;
4678 
4679     float16_unpack_canonical(&p, a, status);
4680     parts_sqrt(&p, status, &float16_params);
4681     return float16_round_pack_canonical(&p, status);
4682 }
4683 
4684 static float32 QEMU_SOFTFLOAT_ATTR
4685 soft_f32_sqrt(float32 a, float_status *status)
4686 {
4687     FloatParts64 p;
4688 
4689     float32_unpack_canonical(&p, a, status);
4690     parts_sqrt(&p, status, &float32_params);
4691     return float32_round_pack_canonical(&p, status);
4692 }
4693 
4694 static float64 QEMU_SOFTFLOAT_ATTR
4695 soft_f64_sqrt(float64 a, float_status *status)
4696 {
4697     FloatParts64 p;
4698 
4699     float64_unpack_canonical(&p, a, status);
4700     parts_sqrt(&p, status, &float64_params);
4701     return float64_round_pack_canonical(&p, status);
4702 }
4703 
4704 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4705 {
4706     union_float32 ua, ur;
4707 
4708     ua.s = xa;
4709     if (unlikely(!can_use_fpu(s))) {
4710         goto soft;
4711     }
4712 
4713     float32_input_flush1(&ua.s, s);
4714     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4715         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4716                        fpclassify(ua.h) == FP_ZERO) ||
4717                      signbit(ua.h))) {
4718             goto soft;
4719         }
4720     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4721                         float32_is_neg(ua.s))) {
4722         goto soft;
4723     }
4724     ur.h = sqrtf(ua.h);
4725     return ur.s;
4726 
4727  soft:
4728     return soft_f32_sqrt(ua.s, s);
4729 }
4730 
4731 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4732 {
4733     union_float64 ua, ur;
4734 
4735     ua.s = xa;
4736     if (unlikely(!can_use_fpu(s))) {
4737         goto soft;
4738     }
4739 
4740     float64_input_flush1(&ua.s, s);
4741     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4742         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4743                        fpclassify(ua.h) == FP_ZERO) ||
4744                      signbit(ua.h))) {
4745             goto soft;
4746         }
4747     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4748                         float64_is_neg(ua.s))) {
4749         goto soft;
4750     }
4751     ur.h = sqrt(ua.h);
4752     return ur.s;
4753 
4754  soft:
4755     return soft_f64_sqrt(ua.s, s);
4756 }
4757 
4758 float64 float64r32_sqrt(float64 a, float_status *status)
4759 {
4760     FloatParts64 p;
4761 
4762     float64_unpack_canonical(&p, a, status);
4763     parts_sqrt(&p, status, &float64_params);
4764     return float64r32_round_pack_canonical(&p, status);
4765 }
4766 
4767 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4768 {
4769     FloatParts64 p;
4770 
4771     bfloat16_unpack_canonical(&p, a, status);
4772     parts_sqrt(&p, status, &bfloat16_params);
4773     return bfloat16_round_pack_canonical(&p, status);
4774 }
4775 
4776 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4777 {
4778     FloatParts128 p;
4779 
4780     float128_unpack_canonical(&p, a, status);
4781     parts_sqrt(&p, status, &float128_params);
4782     return float128_round_pack_canonical(&p, status);
4783 }
4784 
4785 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4786 {
4787     FloatParts128 p;
4788 
4789     if (!floatx80_unpack_canonical(&p, a, s)) {
4790         return floatx80_default_nan(s);
4791     }
4792     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4793     return floatx80_round_pack_canonical(&p, s);
4794 }
4795 
4796 /*
4797  * log2
4798  */
4799 float32 float32_log2(float32 a, float_status *status)
4800 {
4801     FloatParts64 p;
4802 
4803     float32_unpack_canonical(&p, a, status);
4804     parts_log2(&p, status, &float32_params);
4805     return float32_round_pack_canonical(&p, status);
4806 }
4807 
4808 float64 float64_log2(float64 a, float_status *status)
4809 {
4810     FloatParts64 p;
4811 
4812     float64_unpack_canonical(&p, a, status);
4813     parts_log2(&p, status, &float64_params);
4814     return float64_round_pack_canonical(&p, status);
4815 }
4816 
4817 /*----------------------------------------------------------------------------
4818 | The pattern for a default generated NaN.
4819 *----------------------------------------------------------------------------*/
4820 
4821 float16 float16_default_nan(float_status *status)
4822 {
4823     FloatParts64 p;
4824 
4825     parts_default_nan(&p, status);
4826     p.frac >>= float16_params.frac_shift;
4827     return float16_pack_raw(&p);
4828 }
4829 
4830 float32 float32_default_nan(float_status *status)
4831 {
4832     FloatParts64 p;
4833 
4834     parts_default_nan(&p, status);
4835     p.frac >>= float32_params.frac_shift;
4836     return float32_pack_raw(&p);
4837 }
4838 
4839 float64 float64_default_nan(float_status *status)
4840 {
4841     FloatParts64 p;
4842 
4843     parts_default_nan(&p, status);
4844     p.frac >>= float64_params.frac_shift;
4845     return float64_pack_raw(&p);
4846 }
4847 
4848 float128 float128_default_nan(float_status *status)
4849 {
4850     FloatParts128 p;
4851 
4852     parts_default_nan(&p, status);
4853     frac_shr(&p, float128_params.frac_shift);
4854     return float128_pack_raw(&p);
4855 }
4856 
4857 bfloat16 bfloat16_default_nan(float_status *status)
4858 {
4859     FloatParts64 p;
4860 
4861     parts_default_nan(&p, status);
4862     p.frac >>= bfloat16_params.frac_shift;
4863     return bfloat16_pack_raw(&p);
4864 }
4865 
4866 /*----------------------------------------------------------------------------
4867 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4868 *----------------------------------------------------------------------------*/
4869 
4870 float16 float16_silence_nan(float16 a, float_status *status)
4871 {
4872     FloatParts64 p;
4873 
4874     float16_unpack_raw(&p, a);
4875     p.frac <<= float16_params.frac_shift;
4876     parts_silence_nan(&p, status);
4877     p.frac >>= float16_params.frac_shift;
4878     return float16_pack_raw(&p);
4879 }
4880 
4881 float32 float32_silence_nan(float32 a, float_status *status)
4882 {
4883     FloatParts64 p;
4884 
4885     float32_unpack_raw(&p, a);
4886     p.frac <<= float32_params.frac_shift;
4887     parts_silence_nan(&p, status);
4888     p.frac >>= float32_params.frac_shift;
4889     return float32_pack_raw(&p);
4890 }
4891 
4892 float64 float64_silence_nan(float64 a, float_status *status)
4893 {
4894     FloatParts64 p;
4895 
4896     float64_unpack_raw(&p, a);
4897     p.frac <<= float64_params.frac_shift;
4898     parts_silence_nan(&p, status);
4899     p.frac >>= float64_params.frac_shift;
4900     return float64_pack_raw(&p);
4901 }
4902 
4903 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4904 {
4905     FloatParts64 p;
4906 
4907     bfloat16_unpack_raw(&p, a);
4908     p.frac <<= bfloat16_params.frac_shift;
4909     parts_silence_nan(&p, status);
4910     p.frac >>= bfloat16_params.frac_shift;
4911     return bfloat16_pack_raw(&p);
4912 }
4913 
4914 float128 float128_silence_nan(float128 a, float_status *status)
4915 {
4916     FloatParts128 p;
4917 
4918     float128_unpack_raw(&p, a);
4919     frac_shl(&p, float128_params.frac_shift);
4920     parts_silence_nan(&p, status);
4921     frac_shr(&p, float128_params.frac_shift);
4922     return float128_pack_raw(&p);
4923 }
4924 
4925 /*----------------------------------------------------------------------------
4926 | If `a' is denormal and we are in flush-to-zero mode then set the
4927 | input-denormal exception and return zero. Otherwise just return the value.
4928 *----------------------------------------------------------------------------*/
4929 
4930 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4931 {
4932     if (p.exp == 0 && p.frac != 0) {
4933         float_raise(float_flag_input_denormal_flushed, status);
4934         return true;
4935     }
4936 
4937     return false;
4938 }
4939 
4940 float16 float16_squash_input_denormal(float16 a, float_status *status)
4941 {
4942     if (status->flush_inputs_to_zero) {
4943         FloatParts64 p;
4944 
4945         float16_unpack_raw(&p, a);
4946         if (parts_squash_denormal(p, status)) {
4947             return float16_set_sign(float16_zero, p.sign);
4948         }
4949     }
4950     return a;
4951 }
4952 
4953 float32 float32_squash_input_denormal(float32 a, float_status *status)
4954 {
4955     if (status->flush_inputs_to_zero) {
4956         FloatParts64 p;
4957 
4958         float32_unpack_raw(&p, a);
4959         if (parts_squash_denormal(p, status)) {
4960             return float32_set_sign(float32_zero, p.sign);
4961         }
4962     }
4963     return a;
4964 }
4965 
4966 float64 float64_squash_input_denormal(float64 a, float_status *status)
4967 {
4968     if (status->flush_inputs_to_zero) {
4969         FloatParts64 p;
4970 
4971         float64_unpack_raw(&p, a);
4972         if (parts_squash_denormal(p, status)) {
4973             return float64_set_sign(float64_zero, p.sign);
4974         }
4975     }
4976     return a;
4977 }
4978 
4979 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4980 {
4981     if (status->flush_inputs_to_zero) {
4982         FloatParts64 p;
4983 
4984         bfloat16_unpack_raw(&p, a);
4985         if (parts_squash_denormal(p, status)) {
4986             return bfloat16_set_sign(bfloat16_zero, p.sign);
4987         }
4988     }
4989     return a;
4990 }
4991 
4992 /*----------------------------------------------------------------------------
4993 | Normalizes the subnormal extended double-precision floating-point value
4994 | represented by the denormalized significand `aSig'.  The normalized exponent
4995 | and significand are stored at the locations pointed to by `zExpPtr' and
4996 | `zSigPtr', respectively.
4997 *----------------------------------------------------------------------------*/
4998 
4999 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
5000                                 uint64_t *zSigPtr)
5001 {
5002     int8_t shiftCount;
5003 
5004     shiftCount = clz64(aSig);
5005     *zSigPtr = aSig<<shiftCount;
5006     *zExpPtr = 1 - shiftCount;
5007 }
5008 
5009 /*----------------------------------------------------------------------------
5010 | Takes two extended double-precision floating-point values `a' and `b', one
5011 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
5012 | `b' is a signaling NaN, the invalid exception is raised.
5013 *----------------------------------------------------------------------------*/
5014 
5015 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
5016 {
5017     FloatParts128 pa, pb, *pr;
5018 
5019     if (!floatx80_unpack_canonical(&pa, a, status) ||
5020         !floatx80_unpack_canonical(&pb, b, status)) {
5021         return floatx80_default_nan(status);
5022     }
5023 
5024     pr = parts_pick_nan(&pa, &pb, status);
5025     return floatx80_round_pack_canonical(pr, status);
5026 }
5027 
5028 /*----------------------------------------------------------------------------
5029 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5030 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
5031 | and returns the proper extended double-precision floating-point value
5032 | corresponding to the abstract input.  Ordinarily, the abstract value is
5033 | rounded and packed into the extended double-precision format, with the
5034 | inexact exception raised if the abstract input cannot be represented
5035 | exactly.  However, if the abstract value is too large, the overflow and
5036 | inexact exceptions are raised and an infinity or maximal finite value is
5037 | returned.  If the abstract value is too small, the input value is rounded to
5038 | a subnormal number, and the underflow and inexact exceptions are raised if
5039 | the abstract input cannot be represented exactly as a subnormal extended
5040 | double-precision floating-point number.
5041 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5042 | the result is rounded to the same number of bits as single or double
5043 | precision, respectively.  Otherwise, the result is rounded to the full
5044 | precision of the extended double-precision format.
5045 |     The input significand must be normalized or smaller.  If the input
5046 | significand is not normalized, `zExp' must be 0; in that case, the result
5047 | returned is a subnormal number, and it must not require rounding.  The
5048 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5049 | Floating-Point Arithmetic.
5050 *----------------------------------------------------------------------------*/
5051 
5052 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5053                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5054                               float_status *status)
5055 {
5056     FloatRoundMode roundingMode;
5057     bool roundNearestEven, increment, isTiny;
5058     int64_t roundIncrement, roundMask, roundBits;
5059 
5060     roundingMode = status->float_rounding_mode;
5061     roundNearestEven = ( roundingMode == float_round_nearest_even );
5062     switch (roundingPrecision) {
5063     case floatx80_precision_x:
5064         goto precision80;
5065     case floatx80_precision_d:
5066         roundIncrement = UINT64_C(0x0000000000000400);
5067         roundMask = UINT64_C(0x00000000000007FF);
5068         break;
5069     case floatx80_precision_s:
5070         roundIncrement = UINT64_C(0x0000008000000000);
5071         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5072         break;
5073     default:
5074         g_assert_not_reached();
5075     }
5076     zSig0 |= ( zSig1 != 0 );
5077     switch (roundingMode) {
5078     case float_round_nearest_even:
5079     case float_round_ties_away:
5080         break;
5081     case float_round_to_zero:
5082         roundIncrement = 0;
5083         break;
5084     case float_round_up:
5085         roundIncrement = zSign ? 0 : roundMask;
5086         break;
5087     case float_round_down:
5088         roundIncrement = zSign ? roundMask : 0;
5089         break;
5090     default:
5091         abort();
5092     }
5093     roundBits = zSig0 & roundMask;
5094     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5095         if (    ( 0x7FFE < zExp )
5096              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5097            ) {
5098             goto overflow;
5099         }
5100         if ( zExp <= 0 ) {
5101             if (status->flush_to_zero) {
5102                 float_raise(float_flag_output_denormal_flushed, status);
5103                 return packFloatx80(zSign, 0, 0);
5104             }
5105             isTiny = status->tininess_before_rounding
5106                   || (zExp < 0 )
5107                   || (zSig0 <= zSig0 + roundIncrement);
5108             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5109             zExp = 0;
5110             roundBits = zSig0 & roundMask;
5111             if (isTiny && roundBits) {
5112                 float_raise(float_flag_underflow, status);
5113             }
5114             if (roundBits) {
5115                 float_raise(float_flag_inexact, status);
5116             }
5117             zSig0 += roundIncrement;
5118             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5119             roundIncrement = roundMask + 1;
5120             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5121                 roundMask |= roundIncrement;
5122             }
5123             zSig0 &= ~ roundMask;
5124             return packFloatx80( zSign, zExp, zSig0 );
5125         }
5126     }
5127     if (roundBits) {
5128         float_raise(float_flag_inexact, status);
5129     }
5130     zSig0 += roundIncrement;
5131     if ( zSig0 < roundIncrement ) {
5132         ++zExp;
5133         zSig0 = UINT64_C(0x8000000000000000);
5134     }
5135     roundIncrement = roundMask + 1;
5136     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5137         roundMask |= roundIncrement;
5138     }
5139     zSig0 &= ~ roundMask;
5140     if ( zSig0 == 0 ) zExp = 0;
5141     return packFloatx80( zSign, zExp, zSig0 );
5142  precision80:
5143     switch (roundingMode) {
5144     case float_round_nearest_even:
5145     case float_round_ties_away:
5146         increment = ((int64_t)zSig1 < 0);
5147         break;
5148     case float_round_to_zero:
5149         increment = 0;
5150         break;
5151     case float_round_up:
5152         increment = !zSign && zSig1;
5153         break;
5154     case float_round_down:
5155         increment = zSign && zSig1;
5156         break;
5157     default:
5158         abort();
5159     }
5160     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5161         if (    ( 0x7FFE < zExp )
5162              || (    ( zExp == 0x7FFE )
5163                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5164                   && increment
5165                 )
5166            ) {
5167             roundMask = 0;
5168  overflow:
5169             float_raise(float_flag_overflow | float_flag_inexact, status);
5170             if (    ( roundingMode == float_round_to_zero )
5171                  || ( zSign && ( roundingMode == float_round_up ) )
5172                  || ( ! zSign && ( roundingMode == float_round_down ) )
5173                ) {
5174                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5175             }
5176             return floatx80_default_inf(zSign, status);
5177         }
5178         if ( zExp <= 0 ) {
5179             isTiny = status->tininess_before_rounding
5180                   || (zExp < 0)
5181                   || !increment
5182                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5183             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5184             zExp = 0;
5185             if (isTiny && zSig1) {
5186                 float_raise(float_flag_underflow, status);
5187             }
5188             if (zSig1) {
5189                 float_raise(float_flag_inexact, status);
5190             }
5191             switch (roundingMode) {
5192             case float_round_nearest_even:
5193             case float_round_ties_away:
5194                 increment = ((int64_t)zSig1 < 0);
5195                 break;
5196             case float_round_to_zero:
5197                 increment = 0;
5198                 break;
5199             case float_round_up:
5200                 increment = !zSign && zSig1;
5201                 break;
5202             case float_round_down:
5203                 increment = zSign && zSig1;
5204                 break;
5205             default:
5206                 abort();
5207             }
5208             if ( increment ) {
5209                 ++zSig0;
5210                 if (!(zSig1 << 1) && roundNearestEven) {
5211                     zSig0 &= ~1;
5212                 }
5213                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5214             }
5215             return packFloatx80( zSign, zExp, zSig0 );
5216         }
5217     }
5218     if (zSig1) {
5219         float_raise(float_flag_inexact, status);
5220     }
5221     if ( increment ) {
5222         ++zSig0;
5223         if ( zSig0 == 0 ) {
5224             ++zExp;
5225             zSig0 = UINT64_C(0x8000000000000000);
5226         }
5227         else {
5228             if (!(zSig1 << 1) && roundNearestEven) {
5229                 zSig0 &= ~1;
5230             }
5231         }
5232     }
5233     else {
5234         if ( zSig0 == 0 ) zExp = 0;
5235     }
5236     return packFloatx80( zSign, zExp, zSig0 );
5237 
5238 }
5239 
5240 /*----------------------------------------------------------------------------
5241 | Takes an abstract floating-point value having sign `zSign', exponent
5242 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5243 | and returns the proper extended double-precision floating-point value
5244 | corresponding to the abstract input.  This routine is just like
5245 | `roundAndPackFloatx80' except that the input significand does not have to be
5246 | normalized.
5247 *----------------------------------------------------------------------------*/
5248 
5249 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5250                                        bool zSign, int32_t zExp,
5251                                        uint64_t zSig0, uint64_t zSig1,
5252                                        float_status *status)
5253 {
5254     int8_t shiftCount;
5255 
5256     if ( zSig0 == 0 ) {
5257         zSig0 = zSig1;
5258         zSig1 = 0;
5259         zExp -= 64;
5260     }
5261     shiftCount = clz64(zSig0);
5262     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5263     zExp -= shiftCount;
5264     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5265                                 zSig0, zSig1, status);
5266 
5267 }
5268 
5269 /*----------------------------------------------------------------------------
5270 | Returns the binary exponential of the single-precision floating-point value
5271 | `a'. The operation is performed according to the IEC/IEEE Standard for
5272 | Binary Floating-Point Arithmetic.
5273 |
5274 | Uses the following identities:
5275 |
5276 | 1. -------------------------------------------------------------------------
5277 |      x    x*ln(2)
5278 |     2  = e
5279 |
5280 | 2. -------------------------------------------------------------------------
5281 |                      2     3     4     5           n
5282 |      x        x     x     x     x     x           x
5283 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5284 |               1!    2!    3!    4!    5!          n!
5285 *----------------------------------------------------------------------------*/
5286 
5287 static const float64 float32_exp2_coefficients[15] =
5288 {
5289     const_float64( 0x3ff0000000000000ll ), /*  1 */
5290     const_float64( 0x3fe0000000000000ll ), /*  2 */
5291     const_float64( 0x3fc5555555555555ll ), /*  3 */
5292     const_float64( 0x3fa5555555555555ll ), /*  4 */
5293     const_float64( 0x3f81111111111111ll ), /*  5 */
5294     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5295     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5296     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5297     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5298     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5299     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5300     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5301     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5302     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5303     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5304 };
5305 
5306 float32 float32_exp2(float32 a, float_status *status)
5307 {
5308     FloatParts64 xp, xnp, tp, rp;
5309     int i;
5310 
5311     float32_unpack_canonical(&xp, a, status);
5312     if (unlikely(xp.cls != float_class_normal)) {
5313         switch (xp.cls) {
5314         case float_class_denormal:
5315             break;
5316         case float_class_snan:
5317         case float_class_qnan:
5318             parts_return_nan(&xp, status);
5319             return float32_round_pack_canonical(&xp, status);
5320         case float_class_inf:
5321             return xp.sign ? float32_zero : a;
5322         case float_class_zero:
5323             return float32_one;
5324         default:
5325             g_assert_not_reached();
5326         }
5327     }
5328 
5329     float_raise(float_flag_inexact, status);
5330 
5331     float64_unpack_canonical(&tp, float64_ln2, status);
5332     xp = *parts_mul(&xp, &tp, status);
5333     xnp = xp;
5334 
5335     float64_unpack_canonical(&rp, float64_one, status);
5336     for (i = 0 ; i < 15 ; i++) {
5337 
5338         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5339         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5340         xnp = *parts_mul(&xnp, &xp, status);
5341     }
5342 
5343     return float32_round_pack_canonical(&rp, status);
5344 }
5345 
5346 /*----------------------------------------------------------------------------
5347 | Rounds the extended double-precision floating-point value `a'
5348 | to the precision provided by floatx80_rounding_precision and returns the
5349 | result as an extended double-precision floating-point value.
5350 | The operation is performed according to the IEC/IEEE Standard for Binary
5351 | Floating-Point Arithmetic.
5352 *----------------------------------------------------------------------------*/
5353 
5354 floatx80 floatx80_round(floatx80 a, float_status *status)
5355 {
5356     FloatParts128 p;
5357 
5358     if (!floatx80_unpack_canonical(&p, a, status)) {
5359         return floatx80_default_nan(status);
5360     }
5361     return floatx80_round_pack_canonical(&p, status);
5362 }
5363 
5364 static void __attribute__((constructor)) softfloat_init(void)
5365 {
5366     union_float64 ua, ub, uc, ur;
5367 
5368     if (QEMU_NO_HARDFLOAT) {
5369         return;
5370     }
5371     /*
5372      * Test that the host's FMA is not obviously broken. For example,
5373      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5374      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5375      */
5376     ua.s = 0x0020000000000001ULL;
5377     ub.s = 0x3ca0000000000000ULL;
5378     uc.s = 0x0020000000000000ULL;
5379     ur.h = fma(ua.h, ub.h, uc.h);
5380     if (ur.s != 0x0020000000000001ULL) {
5381         force_soft_fma = true;
5382     }
5383 }
5384