xref: /openbmc/qemu/fpu/softfloat.c (revision 37c954a1)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 typedef struct {
537     FloatClass cls;
538     bool sign;
539     int32_t exp;
540     uint64_t frac_hi;
541     uint64_t frac_hm;  /* high-middle */
542     uint64_t frac_lm;  /* low-middle */
543     uint64_t frac_lo;
544 } FloatParts256;
545 
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT    63
548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
549 
550 /* Structure holding all of the relevant parameters for a format.
551  *   exp_size: the size of the exponent field
552  *   exp_bias: the offset applied to the exponent field
553  *   exp_max: the maximum normalised exponent
554  *   frac_size: the size of the fraction field
555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556  * The following are computed based the size of fraction
557  *   frac_lsb: least significant bit of fraction
558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
559  *   round_mask/roundeven_mask: masks used for rounding
560  * The following optional modifiers are available:
561  *   arm_althp: handle ARM Alternative Half Precision
562  */
563 typedef struct {
564     int exp_size;
565     int exp_bias;
566     int exp_max;
567     int frac_size;
568     int frac_shift;
569     uint64_t frac_lsb;
570     uint64_t frac_lsbm1;
571     uint64_t round_mask;
572     uint64_t roundeven_mask;
573     bool arm_althp;
574 } FloatFmt;
575 
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F)                                           \
578     .exp_size       = E,                                             \
579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
580     .exp_max        = (1 << E) - 1,                                  \
581     .frac_size      = F,                                             \
582     .frac_shift     = (-F - 1) & 63,                                 \
583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587 
588 static const FloatFmt float16_params = {
589     FLOAT_PARAMS(5, 10)
590 };
591 
592 static const FloatFmt float16_params_ahp = {
593     FLOAT_PARAMS(5, 10),
594     .arm_althp = true
595 };
596 
597 static const FloatFmt bfloat16_params = {
598     FLOAT_PARAMS(8, 7)
599 };
600 
601 static const FloatFmt float32_params = {
602     FLOAT_PARAMS(8, 23)
603 };
604 
605 static const FloatFmt float64_params = {
606     FLOAT_PARAMS(11, 52)
607 };
608 
609 static const FloatFmt float128_params = {
610     FLOAT_PARAMS(15, 112)
611 };
612 
613 /* Unpack a float to parts, but do not canonicalize.  */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616     const int f_size = fmt->frac_size;
617     const int e_size = fmt->exp_size;
618 
619     *r = (FloatParts64) {
620         .cls = float_class_unclassified,
621         .sign = extract64(raw, f_size + e_size, 1),
622         .exp = extract64(raw, f_size, e_size),
623         .frac = extract64(raw, 0, f_size)
624     };
625 }
626 
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629     unpack_raw64(p, &float16_params, f);
630 }
631 
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634     unpack_raw64(p, &bfloat16_params, f);
635 }
636 
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639     unpack_raw64(p, &float32_params, f);
640 }
641 
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644     unpack_raw64(p, &float64_params, f);
645 }
646 
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718 
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722 
723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725 
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728 
729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
730 
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732                                       float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734                                         float_status *s);
735 
736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737 
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739                                              FloatParts64 *c, float_status *s,
740                                              int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742                                                FloatParts128 *b,
743                                                FloatParts128 *c,
744                                                float_status *s,
745                                                int ab_mask, int abc_mask);
746 
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749 
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751                                  const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753                                   const FloatFmt *fmt);
754 
755 #define parts_canonicalize(A, S, F) \
756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757 
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759                             const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761                              const FloatFmt *fmt);
762 
763 #define parts_uncanon(A, S, F) \
764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765 
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769 
770 #define parts_add_normal(A, B) \
771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772 
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776 
777 #define parts_sub_normal(A, B) \
778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779 
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781                                     float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783                                       float_status *s, bool subtract);
784 
785 #define parts_addsub(A, B, S, Z) \
786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787 
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_mul(A, B, S) \
794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
795 
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797                                     FloatParts64 *c, int flags,
798                                     float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800                                       FloatParts128 *c, int flags,
801                                       float_status *s);
802 
803 #define parts_muladd(A, B, C, Z, S) \
804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805 
806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807                                  float_status *s);
808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809                                    float_status *s);
810 
811 #define parts_div(A, B, S) \
812     PARTS_GENERIC_64_128(div, A)(A, B, S)
813 
814 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
815                                         int scale, int frac_size);
816 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
817                                          int scale, int frac_size);
818 
819 #define parts_round_to_int_normal(A, R, C, F) \
820     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
821 
822 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
823                                  int scale, float_status *s,
824                                  const FloatFmt *fmt);
825 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
826                                   int scale, float_status *s,
827                                   const FloatFmt *fmt);
828 
829 #define parts_round_to_int(A, R, C, S, F) \
830     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
831 
832 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
833                                      int scale, int64_t min, int64_t max,
834                                      float_status *s);
835 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
836                                      int scale, int64_t min, int64_t max,
837                                      float_status *s);
838 
839 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
840     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
841 
842 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
843                                       int scale, uint64_t max,
844                                       float_status *s);
845 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
846                                        int scale, uint64_t max,
847                                        float_status *s);
848 
849 #define parts_float_to_uint(P, R, Z, M, S) \
850     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
851 
852 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
853                                   int scale, float_status *s);
854 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
855                                    int scale, float_status *s);
856 
857 #define parts_sint_to_float(P, I, Z, S) \
858     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
859 
860 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
861                                   int scale, float_status *s);
862 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
863                                    int scale, float_status *s);
864 
865 #define parts_uint_to_float(P, I, Z, S) \
866     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
867 
868 /*
869  * Helper functions for softfloat-parts.c.inc, per-size operations.
870  */
871 
872 #define FRAC_GENERIC_64_128(NAME, P) \
873     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
874 
875 #define FRAC_GENERIC_64_128_256(NAME, P) \
876     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
877                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
878 
879 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
880 {
881     return uadd64_overflow(a->frac, b->frac, &r->frac);
882 }
883 
884 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
885 {
886     bool c = 0;
887     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
888     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
889     return c;
890 }
891 
892 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
893 {
894     bool c = 0;
895     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
896     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
897     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
898     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
899     return c;
900 }
901 
902 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
903 
904 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
905 {
906     return uadd64_overflow(a->frac, c, &r->frac);
907 }
908 
909 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
910 {
911     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
912     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
913 }
914 
915 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
916 
917 static void frac64_allones(FloatParts64 *a)
918 {
919     a->frac = -1;
920 }
921 
922 static void frac128_allones(FloatParts128 *a)
923 {
924     a->frac_hi = a->frac_lo = -1;
925 }
926 
927 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
928 
929 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
930 {
931     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
932 }
933 
934 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
935 {
936     uint64_t ta = a->frac_hi, tb = b->frac_hi;
937     if (ta == tb) {
938         ta = a->frac_lo, tb = b->frac_lo;
939         if (ta == tb) {
940             return 0;
941         }
942     }
943     return ta < tb ? -1 : 1;
944 }
945 
946 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
947 
948 static void frac64_clear(FloatParts64 *a)
949 {
950     a->frac = 0;
951 }
952 
953 static void frac128_clear(FloatParts128 *a)
954 {
955     a->frac_hi = a->frac_lo = 0;
956 }
957 
958 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
959 
960 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
961 {
962     uint64_t n1, n0, r, q;
963     bool ret;
964 
965     /*
966      * We want a 2*N / N-bit division to produce exactly an N-bit
967      * result, so that we do not lose any precision and so that we
968      * do not have to renormalize afterward.  If A.frac < B.frac,
969      * then division would produce an (N-1)-bit result; shift A left
970      * by one to produce the an N-bit result, and return true to
971      * decrement the exponent to match.
972      *
973      * The udiv_qrnnd algorithm that we're using requires normalization,
974      * i.e. the msb of the denominator must be set, which is already true.
975      */
976     ret = a->frac < b->frac;
977     if (ret) {
978         n0 = a->frac;
979         n1 = 0;
980     } else {
981         n0 = a->frac >> 1;
982         n1 = a->frac << 63;
983     }
984     q = udiv_qrnnd(&r, n0, n1, b->frac);
985 
986     /* Set lsb if there is a remainder, to set inexact. */
987     a->frac = q | (r != 0);
988 
989     return ret;
990 }
991 
992 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
993 {
994     uint64_t q0, q1, a0, a1, b0, b1;
995     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
996     bool ret = false;
997 
998     a0 = a->frac_hi, a1 = a->frac_lo;
999     b0 = b->frac_hi, b1 = b->frac_lo;
1000 
1001     ret = lt128(a0, a1, b0, b1);
1002     if (!ret) {
1003         a1 = shr_double(a0, a1, 1);
1004         a0 = a0 >> 1;
1005     }
1006 
1007     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1008     q0 = estimateDiv128To64(a0, a1, b0);
1009 
1010     /*
1011      * Estimate is high because B1 was not included (unless B1 == 0).
1012      * Reduce quotient and increase remainder until remainder is non-negative.
1013      * This loop will execute 0 to 2 times.
1014      */
1015     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1016     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1017     while (r0 != 0) {
1018         q0--;
1019         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1020     }
1021 
1022     /* Repeat using the remainder, producing a second word of quotient. */
1023     q1 = estimateDiv128To64(r1, r2, b0);
1024     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1025     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1026     while (r1 != 0) {
1027         q1--;
1028         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1029     }
1030 
1031     /* Any remainder indicates inexact; set sticky bit. */
1032     q1 |= (r2 | r3) != 0;
1033 
1034     a->frac_hi = q0;
1035     a->frac_lo = q1;
1036     return ret;
1037 }
1038 
1039 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1040 
1041 static bool frac64_eqz(FloatParts64 *a)
1042 {
1043     return a->frac == 0;
1044 }
1045 
1046 static bool frac128_eqz(FloatParts128 *a)
1047 {
1048     return (a->frac_hi | a->frac_lo) == 0;
1049 }
1050 
1051 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1052 
1053 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1054 {
1055     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1056 }
1057 
1058 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1059 {
1060     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1061                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1062 }
1063 
1064 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1065 
1066 static void frac64_neg(FloatParts64 *a)
1067 {
1068     a->frac = -a->frac;
1069 }
1070 
1071 static void frac128_neg(FloatParts128 *a)
1072 {
1073     bool c = 0;
1074     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1075     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1076 }
1077 
1078 static void frac256_neg(FloatParts256 *a)
1079 {
1080     bool c = 0;
1081     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1082     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1083     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1084     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1085 }
1086 
1087 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1088 
1089 static int frac64_normalize(FloatParts64 *a)
1090 {
1091     if (a->frac) {
1092         int shift = clz64(a->frac);
1093         a->frac <<= shift;
1094         return shift;
1095     }
1096     return 64;
1097 }
1098 
1099 static int frac128_normalize(FloatParts128 *a)
1100 {
1101     if (a->frac_hi) {
1102         int shl = clz64(a->frac_hi);
1103         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1104         a->frac_lo <<= shl;
1105         return shl;
1106     } else if (a->frac_lo) {
1107         int shl = clz64(a->frac_lo);
1108         a->frac_hi = a->frac_lo << shl;
1109         a->frac_lo = 0;
1110         return shl + 64;
1111     }
1112     return 128;
1113 }
1114 
1115 static int frac256_normalize(FloatParts256 *a)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1118     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1119     int ret, shl;
1120 
1121     if (likely(a0)) {
1122         shl = clz64(a0);
1123         if (shl == 0) {
1124             return 0;
1125         }
1126         ret = shl;
1127     } else {
1128         if (a1) {
1129             ret = 64;
1130             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1131         } else if (a2) {
1132             ret = 128;
1133             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1134         } else if (a3) {
1135             ret = 192;
1136             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1137         } else {
1138             ret = 256;
1139             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1140             goto done;
1141         }
1142         shl = clz64(a0);
1143         if (shl == 0) {
1144             goto done;
1145         }
1146         ret += shl;
1147     }
1148 
1149     a0 = shl_double(a0, a1, shl);
1150     a1 = shl_double(a1, a2, shl);
1151     a2 = shl_double(a2, a3, shl);
1152     a3 <<= shl;
1153 
1154  done:
1155     a->frac_hi = a0;
1156     a->frac_hm = a1;
1157     a->frac_lm = a2;
1158     a->frac_lo = a3;
1159     return ret;
1160 }
1161 
1162 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1163 
1164 static void frac64_shl(FloatParts64 *a, int c)
1165 {
1166     a->frac <<= c;
1167 }
1168 
1169 static void frac128_shl(FloatParts128 *a, int c)
1170 {
1171     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1172 
1173     if (c & 64) {
1174         a0 = a1, a1 = 0;
1175     }
1176 
1177     c &= 63;
1178     if (c) {
1179         a0 = shl_double(a0, a1, c);
1180         a1 = a1 << c;
1181     }
1182 
1183     a->frac_hi = a0;
1184     a->frac_lo = a1;
1185 }
1186 
1187 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1188 
1189 static void frac64_shr(FloatParts64 *a, int c)
1190 {
1191     a->frac >>= c;
1192 }
1193 
1194 static void frac128_shr(FloatParts128 *a, int c)
1195 {
1196     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1197 
1198     if (c & 64) {
1199         a1 = a0, a0 = 0;
1200     }
1201 
1202     c &= 63;
1203     if (c) {
1204         a1 = shr_double(a0, a1, c);
1205         a0 = a0 >> c;
1206     }
1207 
1208     a->frac_hi = a0;
1209     a->frac_lo = a1;
1210 }
1211 
1212 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1213 
1214 static void frac64_shrjam(FloatParts64 *a, int c)
1215 {
1216     uint64_t a0 = a->frac;
1217 
1218     if (likely(c != 0)) {
1219         if (likely(c < 64)) {
1220             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1221         } else {
1222             a0 = a0 != 0;
1223         }
1224         a->frac = a0;
1225     }
1226 }
1227 
1228 static void frac128_shrjam(FloatParts128 *a, int c)
1229 {
1230     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1231     uint64_t sticky = 0;
1232 
1233     if (unlikely(c == 0)) {
1234         return;
1235     } else if (likely(c < 64)) {
1236         /* nothing */
1237     } else if (likely(c < 128)) {
1238         sticky = a1;
1239         a1 = a0;
1240         a0 = 0;
1241         c &= 63;
1242         if (c == 0) {
1243             goto done;
1244         }
1245     } else {
1246         sticky = a0 | a1;
1247         a0 = a1 = 0;
1248         goto done;
1249     }
1250 
1251     sticky |= shr_double(a1, 0, c);
1252     a1 = shr_double(a0, a1, c);
1253     a0 = a0 >> c;
1254 
1255  done:
1256     a->frac_lo = a1 | (sticky != 0);
1257     a->frac_hi = a0;
1258 }
1259 
1260 static void frac256_shrjam(FloatParts256 *a, int c)
1261 {
1262     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1263     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1264     uint64_t sticky = 0;
1265 
1266     if (unlikely(c == 0)) {
1267         return;
1268     } else if (likely(c < 64)) {
1269         /* nothing */
1270     } else if (likely(c < 256)) {
1271         if (unlikely(c & 128)) {
1272             sticky |= a2 | a3;
1273             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1274         }
1275         if (unlikely(c & 64)) {
1276             sticky |= a3;
1277             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1278         }
1279         c &= 63;
1280         if (c == 0) {
1281             goto done;
1282         }
1283     } else {
1284         sticky = a0 | a1 | a2 | a3;
1285         a0 = a1 = a2 = a3 = 0;
1286         goto done;
1287     }
1288 
1289     sticky |= shr_double(a3, 0, c);
1290     a3 = shr_double(a2, a3, c);
1291     a2 = shr_double(a1, a2, c);
1292     a1 = shr_double(a0, a1, c);
1293     a0 = a0 >> c;
1294 
1295  done:
1296     a->frac_lo = a3 | (sticky != 0);
1297     a->frac_lm = a2;
1298     a->frac_hm = a1;
1299     a->frac_hi = a0;
1300 }
1301 
1302 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1303 
1304 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1305 {
1306     return usub64_overflow(a->frac, b->frac, &r->frac);
1307 }
1308 
1309 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1310 {
1311     bool c = 0;
1312     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1313     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1314     return c;
1315 }
1316 
1317 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1318 {
1319     bool c = 0;
1320     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1321     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1322     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1323     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1324     return c;
1325 }
1326 
1327 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1328 
1329 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1330 {
1331     r->frac = a->frac_hi | (a->frac_lo != 0);
1332 }
1333 
1334 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1335 {
1336     r->frac_hi = a->frac_hi;
1337     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1338 }
1339 
1340 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1341 
1342 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1343 {
1344     r->frac_hi = a->frac;
1345     r->frac_lo = 0;
1346 }
1347 
1348 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1349 {
1350     r->frac_hi = a->frac_hi;
1351     r->frac_hm = a->frac_lo;
1352     r->frac_lm = 0;
1353     r->frac_lo = 0;
1354 }
1355 
1356 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1357 
1358 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1359 #define FloatPartsN    glue(FloatParts,N)
1360 #define FloatPartsW    glue(FloatParts,W)
1361 
1362 #define N 64
1363 #define W 128
1364 
1365 #include "softfloat-parts-addsub.c.inc"
1366 #include "softfloat-parts.c.inc"
1367 
1368 #undef  N
1369 #undef  W
1370 #define N 128
1371 #define W 256
1372 
1373 #include "softfloat-parts-addsub.c.inc"
1374 #include "softfloat-parts.c.inc"
1375 
1376 #undef  N
1377 #undef  W
1378 #define N            256
1379 
1380 #include "softfloat-parts-addsub.c.inc"
1381 
1382 #undef  N
1383 #undef  W
1384 #undef  partsN
1385 #undef  FloatPartsN
1386 #undef  FloatPartsW
1387 
1388 /*
1389  * Pack/unpack routines with a specific FloatFmt.
1390  */
1391 
1392 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1393                                       float_status *s, const FloatFmt *params)
1394 {
1395     float16_unpack_raw(p, f);
1396     parts_canonicalize(p, s, params);
1397 }
1398 
1399 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1400                                      float_status *s)
1401 {
1402     float16a_unpack_canonical(p, f, s, &float16_params);
1403 }
1404 
1405 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1406                                       float_status *s)
1407 {
1408     bfloat16_unpack_raw(p, f);
1409     parts_canonicalize(p, s, &bfloat16_params);
1410 }
1411 
1412 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1413                                              float_status *s,
1414                                              const FloatFmt *params)
1415 {
1416     parts_uncanon(p, s, params);
1417     return float16_pack_raw(p);
1418 }
1419 
1420 static float16 float16_round_pack_canonical(FloatParts64 *p,
1421                                             float_status *s)
1422 {
1423     return float16a_round_pack_canonical(p, s, &float16_params);
1424 }
1425 
1426 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1427                                               float_status *s)
1428 {
1429     parts_uncanon(p, s, &bfloat16_params);
1430     return bfloat16_pack_raw(p);
1431 }
1432 
1433 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1434                                      float_status *s)
1435 {
1436     float32_unpack_raw(p, f);
1437     parts_canonicalize(p, s, &float32_params);
1438 }
1439 
1440 static float32 float32_round_pack_canonical(FloatParts64 *p,
1441                                             float_status *s)
1442 {
1443     parts_uncanon(p, s, &float32_params);
1444     return float32_pack_raw(p);
1445 }
1446 
1447 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1448                                      float_status *s)
1449 {
1450     float64_unpack_raw(p, f);
1451     parts_canonicalize(p, s, &float64_params);
1452 }
1453 
1454 static float64 float64_round_pack_canonical(FloatParts64 *p,
1455                                             float_status *s)
1456 {
1457     parts_uncanon(p, s, &float64_params);
1458     return float64_pack_raw(p);
1459 }
1460 
1461 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1462                                       float_status *s)
1463 {
1464     float128_unpack_raw(p, f);
1465     parts_canonicalize(p, s, &float128_params);
1466 }
1467 
1468 static float128 float128_round_pack_canonical(FloatParts128 *p,
1469                                               float_status *s)
1470 {
1471     parts_uncanon(p, s, &float128_params);
1472     return float128_pack_raw(p);
1473 }
1474 
1475 /*
1476  * Addition and subtraction
1477  */
1478 
1479 static float16 QEMU_FLATTEN
1480 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1481 {
1482     FloatParts64 pa, pb, *pr;
1483 
1484     float16_unpack_canonical(&pa, a, status);
1485     float16_unpack_canonical(&pb, b, status);
1486     pr = parts_addsub(&pa, &pb, status, subtract);
1487 
1488     return float16_round_pack_canonical(pr, status);
1489 }
1490 
1491 float16 float16_add(float16 a, float16 b, float_status *status)
1492 {
1493     return float16_addsub(a, b, status, false);
1494 }
1495 
1496 float16 float16_sub(float16 a, float16 b, float_status *status)
1497 {
1498     return float16_addsub(a, b, status, true);
1499 }
1500 
1501 static float32 QEMU_SOFTFLOAT_ATTR
1502 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1503 {
1504     FloatParts64 pa, pb, *pr;
1505 
1506     float32_unpack_canonical(&pa, a, status);
1507     float32_unpack_canonical(&pb, b, status);
1508     pr = parts_addsub(&pa, &pb, status, subtract);
1509 
1510     return float32_round_pack_canonical(pr, status);
1511 }
1512 
1513 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1514 {
1515     return soft_f32_addsub(a, b, status, false);
1516 }
1517 
1518 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1519 {
1520     return soft_f32_addsub(a, b, status, true);
1521 }
1522 
1523 static float64 QEMU_SOFTFLOAT_ATTR
1524 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1525 {
1526     FloatParts64 pa, pb, *pr;
1527 
1528     float64_unpack_canonical(&pa, a, status);
1529     float64_unpack_canonical(&pb, b, status);
1530     pr = parts_addsub(&pa, &pb, status, subtract);
1531 
1532     return float64_round_pack_canonical(pr, status);
1533 }
1534 
1535 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1536 {
1537     return soft_f64_addsub(a, b, status, false);
1538 }
1539 
1540 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1541 {
1542     return soft_f64_addsub(a, b, status, true);
1543 }
1544 
1545 static float hard_f32_add(float a, float b)
1546 {
1547     return a + b;
1548 }
1549 
1550 static float hard_f32_sub(float a, float b)
1551 {
1552     return a - b;
1553 }
1554 
1555 static double hard_f64_add(double a, double b)
1556 {
1557     return a + b;
1558 }
1559 
1560 static double hard_f64_sub(double a, double b)
1561 {
1562     return a - b;
1563 }
1564 
1565 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1566 {
1567     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1568         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1569     }
1570     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1571 }
1572 
1573 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1574 {
1575     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1576         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1577     } else {
1578         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1579     }
1580 }
1581 
1582 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1583                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1584 {
1585     return float32_gen2(a, b, s, hard, soft,
1586                         f32_is_zon2, f32_addsubmul_post);
1587 }
1588 
1589 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1590                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1591 {
1592     return float64_gen2(a, b, s, hard, soft,
1593                         f64_is_zon2, f64_addsubmul_post);
1594 }
1595 
1596 float32 QEMU_FLATTEN
1597 float32_add(float32 a, float32 b, float_status *s)
1598 {
1599     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1600 }
1601 
1602 float32 QEMU_FLATTEN
1603 float32_sub(float32 a, float32 b, float_status *s)
1604 {
1605     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1606 }
1607 
1608 float64 QEMU_FLATTEN
1609 float64_add(float64 a, float64 b, float_status *s)
1610 {
1611     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1612 }
1613 
1614 float64 QEMU_FLATTEN
1615 float64_sub(float64 a, float64 b, float_status *s)
1616 {
1617     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1618 }
1619 
1620 static bfloat16 QEMU_FLATTEN
1621 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1622 {
1623     FloatParts64 pa, pb, *pr;
1624 
1625     bfloat16_unpack_canonical(&pa, a, status);
1626     bfloat16_unpack_canonical(&pb, b, status);
1627     pr = parts_addsub(&pa, &pb, status, subtract);
1628 
1629     return bfloat16_round_pack_canonical(pr, status);
1630 }
1631 
1632 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1633 {
1634     return bfloat16_addsub(a, b, status, false);
1635 }
1636 
1637 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1638 {
1639     return bfloat16_addsub(a, b, status, true);
1640 }
1641 
1642 static float128 QEMU_FLATTEN
1643 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1644 {
1645     FloatParts128 pa, pb, *pr;
1646 
1647     float128_unpack_canonical(&pa, a, status);
1648     float128_unpack_canonical(&pb, b, status);
1649     pr = parts_addsub(&pa, &pb, status, subtract);
1650 
1651     return float128_round_pack_canonical(pr, status);
1652 }
1653 
1654 float128 float128_add(float128 a, float128 b, float_status *status)
1655 {
1656     return float128_addsub(a, b, status, false);
1657 }
1658 
1659 float128 float128_sub(float128 a, float128 b, float_status *status)
1660 {
1661     return float128_addsub(a, b, status, true);
1662 }
1663 
1664 /*
1665  * Multiplication
1666  */
1667 
1668 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1669 {
1670     FloatParts64 pa, pb, *pr;
1671 
1672     float16_unpack_canonical(&pa, a, status);
1673     float16_unpack_canonical(&pb, b, status);
1674     pr = parts_mul(&pa, &pb, status);
1675 
1676     return float16_round_pack_canonical(pr, status);
1677 }
1678 
1679 static float32 QEMU_SOFTFLOAT_ATTR
1680 soft_f32_mul(float32 a, float32 b, float_status *status)
1681 {
1682     FloatParts64 pa, pb, *pr;
1683 
1684     float32_unpack_canonical(&pa, a, status);
1685     float32_unpack_canonical(&pb, b, status);
1686     pr = parts_mul(&pa, &pb, status);
1687 
1688     return float32_round_pack_canonical(pr, status);
1689 }
1690 
1691 static float64 QEMU_SOFTFLOAT_ATTR
1692 soft_f64_mul(float64 a, float64 b, float_status *status)
1693 {
1694     FloatParts64 pa, pb, *pr;
1695 
1696     float64_unpack_canonical(&pa, a, status);
1697     float64_unpack_canonical(&pb, b, status);
1698     pr = parts_mul(&pa, &pb, status);
1699 
1700     return float64_round_pack_canonical(pr, status);
1701 }
1702 
1703 static float hard_f32_mul(float a, float b)
1704 {
1705     return a * b;
1706 }
1707 
1708 static double hard_f64_mul(double a, double b)
1709 {
1710     return a * b;
1711 }
1712 
1713 float32 QEMU_FLATTEN
1714 float32_mul(float32 a, float32 b, float_status *s)
1715 {
1716     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1717                         f32_is_zon2, f32_addsubmul_post);
1718 }
1719 
1720 float64 QEMU_FLATTEN
1721 float64_mul(float64 a, float64 b, float_status *s)
1722 {
1723     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1724                         f64_is_zon2, f64_addsubmul_post);
1725 }
1726 
1727 bfloat16 QEMU_FLATTEN
1728 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1729 {
1730     FloatParts64 pa, pb, *pr;
1731 
1732     bfloat16_unpack_canonical(&pa, a, status);
1733     bfloat16_unpack_canonical(&pb, b, status);
1734     pr = parts_mul(&pa, &pb, status);
1735 
1736     return bfloat16_round_pack_canonical(pr, status);
1737 }
1738 
1739 float128 QEMU_FLATTEN
1740 float128_mul(float128 a, float128 b, float_status *status)
1741 {
1742     FloatParts128 pa, pb, *pr;
1743 
1744     float128_unpack_canonical(&pa, a, status);
1745     float128_unpack_canonical(&pb, b, status);
1746     pr = parts_mul(&pa, &pb, status);
1747 
1748     return float128_round_pack_canonical(pr, status);
1749 }
1750 
1751 /*
1752  * Fused multiply-add
1753  */
1754 
1755 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1756                                     int flags, float_status *status)
1757 {
1758     FloatParts64 pa, pb, pc, *pr;
1759 
1760     float16_unpack_canonical(&pa, a, status);
1761     float16_unpack_canonical(&pb, b, status);
1762     float16_unpack_canonical(&pc, c, status);
1763     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1764 
1765     return float16_round_pack_canonical(pr, status);
1766 }
1767 
1768 static float32 QEMU_SOFTFLOAT_ATTR
1769 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1770                 float_status *status)
1771 {
1772     FloatParts64 pa, pb, pc, *pr;
1773 
1774     float32_unpack_canonical(&pa, a, status);
1775     float32_unpack_canonical(&pb, b, status);
1776     float32_unpack_canonical(&pc, c, status);
1777     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1778 
1779     return float32_round_pack_canonical(pr, status);
1780 }
1781 
1782 static float64 QEMU_SOFTFLOAT_ATTR
1783 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1784                 float_status *status)
1785 {
1786     FloatParts64 pa, pb, pc, *pr;
1787 
1788     float64_unpack_canonical(&pa, a, status);
1789     float64_unpack_canonical(&pb, b, status);
1790     float64_unpack_canonical(&pc, c, status);
1791     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1792 
1793     return float64_round_pack_canonical(pr, status);
1794 }
1795 
1796 static bool force_soft_fma;
1797 
1798 float32 QEMU_FLATTEN
1799 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1800 {
1801     union_float32 ua, ub, uc, ur;
1802 
1803     ua.s = xa;
1804     ub.s = xb;
1805     uc.s = xc;
1806 
1807     if (unlikely(!can_use_fpu(s))) {
1808         goto soft;
1809     }
1810     if (unlikely(flags & float_muladd_halve_result)) {
1811         goto soft;
1812     }
1813 
1814     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1815     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1816         goto soft;
1817     }
1818 
1819     if (unlikely(force_soft_fma)) {
1820         goto soft;
1821     }
1822 
1823     /*
1824      * When (a || b) == 0, there's no need to check for under/over flow,
1825      * since we know the addend is (normal || 0) and the product is 0.
1826      */
1827     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1828         union_float32 up;
1829         bool prod_sign;
1830 
1831         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1832         prod_sign ^= !!(flags & float_muladd_negate_product);
1833         up.s = float32_set_sign(float32_zero, prod_sign);
1834 
1835         if (flags & float_muladd_negate_c) {
1836             uc.h = -uc.h;
1837         }
1838         ur.h = up.h + uc.h;
1839     } else {
1840         union_float32 ua_orig = ua;
1841         union_float32 uc_orig = uc;
1842 
1843         if (flags & float_muladd_negate_product) {
1844             ua.h = -ua.h;
1845         }
1846         if (flags & float_muladd_negate_c) {
1847             uc.h = -uc.h;
1848         }
1849 
1850         ur.h = fmaf(ua.h, ub.h, uc.h);
1851 
1852         if (unlikely(f32_is_inf(ur))) {
1853             float_raise(float_flag_overflow, s);
1854         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1855             ua = ua_orig;
1856             uc = uc_orig;
1857             goto soft;
1858         }
1859     }
1860     if (flags & float_muladd_negate_result) {
1861         return float32_chs(ur.s);
1862     }
1863     return ur.s;
1864 
1865  soft:
1866     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1867 }
1868 
1869 float64 QEMU_FLATTEN
1870 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1871 {
1872     union_float64 ua, ub, uc, ur;
1873 
1874     ua.s = xa;
1875     ub.s = xb;
1876     uc.s = xc;
1877 
1878     if (unlikely(!can_use_fpu(s))) {
1879         goto soft;
1880     }
1881     if (unlikely(flags & float_muladd_halve_result)) {
1882         goto soft;
1883     }
1884 
1885     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1886     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1887         goto soft;
1888     }
1889 
1890     if (unlikely(force_soft_fma)) {
1891         goto soft;
1892     }
1893 
1894     /*
1895      * When (a || b) == 0, there's no need to check for under/over flow,
1896      * since we know the addend is (normal || 0) and the product is 0.
1897      */
1898     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1899         union_float64 up;
1900         bool prod_sign;
1901 
1902         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1903         prod_sign ^= !!(flags & float_muladd_negate_product);
1904         up.s = float64_set_sign(float64_zero, prod_sign);
1905 
1906         if (flags & float_muladd_negate_c) {
1907             uc.h = -uc.h;
1908         }
1909         ur.h = up.h + uc.h;
1910     } else {
1911         union_float64 ua_orig = ua;
1912         union_float64 uc_orig = uc;
1913 
1914         if (flags & float_muladd_negate_product) {
1915             ua.h = -ua.h;
1916         }
1917         if (flags & float_muladd_negate_c) {
1918             uc.h = -uc.h;
1919         }
1920 
1921         ur.h = fma(ua.h, ub.h, uc.h);
1922 
1923         if (unlikely(f64_is_inf(ur))) {
1924             float_raise(float_flag_overflow, s);
1925         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1926             ua = ua_orig;
1927             uc = uc_orig;
1928             goto soft;
1929         }
1930     }
1931     if (flags & float_muladd_negate_result) {
1932         return float64_chs(ur.s);
1933     }
1934     return ur.s;
1935 
1936  soft:
1937     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1938 }
1939 
1940 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1941                                       int flags, float_status *status)
1942 {
1943     FloatParts64 pa, pb, pc, *pr;
1944 
1945     bfloat16_unpack_canonical(&pa, a, status);
1946     bfloat16_unpack_canonical(&pb, b, status);
1947     bfloat16_unpack_canonical(&pc, c, status);
1948     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1949 
1950     return bfloat16_round_pack_canonical(pr, status);
1951 }
1952 
1953 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1954                                       int flags, float_status *status)
1955 {
1956     FloatParts128 pa, pb, pc, *pr;
1957 
1958     float128_unpack_canonical(&pa, a, status);
1959     float128_unpack_canonical(&pb, b, status);
1960     float128_unpack_canonical(&pc, c, status);
1961     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1962 
1963     return float128_round_pack_canonical(pr, status);
1964 }
1965 
1966 /*
1967  * Division
1968  */
1969 
1970 float16 float16_div(float16 a, float16 b, float_status *status)
1971 {
1972     FloatParts64 pa, pb, *pr;
1973 
1974     float16_unpack_canonical(&pa, a, status);
1975     float16_unpack_canonical(&pb, b, status);
1976     pr = parts_div(&pa, &pb, status);
1977 
1978     return float16_round_pack_canonical(pr, status);
1979 }
1980 
1981 static float32 QEMU_SOFTFLOAT_ATTR
1982 soft_f32_div(float32 a, float32 b, float_status *status)
1983 {
1984     FloatParts64 pa, pb, *pr;
1985 
1986     float32_unpack_canonical(&pa, a, status);
1987     float32_unpack_canonical(&pb, b, status);
1988     pr = parts_div(&pa, &pb, status);
1989 
1990     return float32_round_pack_canonical(pr, status);
1991 }
1992 
1993 static float64 QEMU_SOFTFLOAT_ATTR
1994 soft_f64_div(float64 a, float64 b, float_status *status)
1995 {
1996     FloatParts64 pa, pb, *pr;
1997 
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     pr = parts_div(&pa, &pb, status);
2001 
2002     return float64_round_pack_canonical(pr, status);
2003 }
2004 
2005 static float hard_f32_div(float a, float b)
2006 {
2007     return a / b;
2008 }
2009 
2010 static double hard_f64_div(double a, double b)
2011 {
2012     return a / b;
2013 }
2014 
2015 static bool f32_div_pre(union_float32 a, union_float32 b)
2016 {
2017     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2018         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2019                fpclassify(b.h) == FP_NORMAL;
2020     }
2021     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2022 }
2023 
2024 static bool f64_div_pre(union_float64 a, union_float64 b)
2025 {
2026     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2027         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2028                fpclassify(b.h) == FP_NORMAL;
2029     }
2030     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2031 }
2032 
2033 static bool f32_div_post(union_float32 a, union_float32 b)
2034 {
2035     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2036         return fpclassify(a.h) != FP_ZERO;
2037     }
2038     return !float32_is_zero(a.s);
2039 }
2040 
2041 static bool f64_div_post(union_float64 a, union_float64 b)
2042 {
2043     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2044         return fpclassify(a.h) != FP_ZERO;
2045     }
2046     return !float64_is_zero(a.s);
2047 }
2048 
2049 float32 QEMU_FLATTEN
2050 float32_div(float32 a, float32 b, float_status *s)
2051 {
2052     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2053                         f32_div_pre, f32_div_post);
2054 }
2055 
2056 float64 QEMU_FLATTEN
2057 float64_div(float64 a, float64 b, float_status *s)
2058 {
2059     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2060                         f64_div_pre, f64_div_post);
2061 }
2062 
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     FloatParts64 pa, pb, *pr;
2067 
2068     bfloat16_unpack_canonical(&pa, a, status);
2069     bfloat16_unpack_canonical(&pb, b, status);
2070     pr = parts_div(&pa, &pb, status);
2071 
2072     return bfloat16_round_pack_canonical(pr, status);
2073 }
2074 
2075 float128 QEMU_FLATTEN
2076 float128_div(float128 a, float128 b, float_status *status)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_div(&pa, &pb, status);
2083 
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086 
2087 /*
2088  * Float to Float conversions
2089  *
2090  * Returns the result of converting one float format to another. The
2091  * conversion is performed according to the IEC/IEEE Standard for
2092  * Binary Floating-Point Arithmetic.
2093  *
2094  * Usually this only needs to take care of raising invalid exceptions
2095  * and handling the conversion on NaNs.
2096  */
2097 
2098 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2099 {
2100     switch (a->cls) {
2101     case float_class_qnan:
2102     case float_class_snan:
2103         /*
2104          * There is no NaN in the destination format.  Raise Invalid
2105          * and return a zero with the sign of the input NaN.
2106          */
2107         float_raise(float_flag_invalid, s);
2108         a->cls = float_class_zero;
2109         break;
2110 
2111     case float_class_inf:
2112         /*
2113          * There is no Inf in the destination format.  Raise Invalid
2114          * and return the maximum normal with the correct sign.
2115          */
2116         float_raise(float_flag_invalid, s);
2117         a->cls = float_class_normal;
2118         a->exp = float16_params_ahp.exp_max;
2119         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2120                                   float16_params_ahp.frac_size + 1);
2121         break;
2122 
2123     case float_class_normal:
2124     case float_class_zero:
2125         break;
2126 
2127     default:
2128         g_assert_not_reached();
2129     }
2130 }
2131 
2132 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2133 {
2134     if (is_nan(a->cls)) {
2135         parts_return_nan(a, s);
2136     }
2137 }
2138 
2139 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2140 {
2141     if (is_nan(a->cls)) {
2142         parts_return_nan(a, s);
2143     }
2144 }
2145 
2146 #define parts_float_to_float(P, S) \
2147     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2148 
2149 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2150                                         float_status *s)
2151 {
2152     a->cls = b->cls;
2153     a->sign = b->sign;
2154     a->exp = b->exp;
2155 
2156     if (a->cls == float_class_normal) {
2157         frac_truncjam(a, b);
2158     } else if (is_nan(a->cls)) {
2159         /* Discard the low bits of the NaN. */
2160         a->frac = b->frac_hi;
2161         parts_return_nan(a, s);
2162     }
2163 }
2164 
2165 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2166                                        float_status *s)
2167 {
2168     a->cls = b->cls;
2169     a->sign = b->sign;
2170     a->exp = b->exp;
2171     frac_widen(a, b);
2172 
2173     if (is_nan(a->cls)) {
2174         parts_return_nan(a, s);
2175     }
2176 }
2177 
2178 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2179 {
2180     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2181     FloatParts64 p;
2182 
2183     float16a_unpack_canonical(&p, a, s, fmt16);
2184     parts_float_to_float(&p, s);
2185     return float32_round_pack_canonical(&p, s);
2186 }
2187 
2188 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2189 {
2190     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2191     FloatParts64 p;
2192 
2193     float16a_unpack_canonical(&p, a, s, fmt16);
2194     parts_float_to_float(&p, s);
2195     return float64_round_pack_canonical(&p, s);
2196 }
2197 
2198 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2199 {
2200     FloatParts64 p;
2201     const FloatFmt *fmt;
2202 
2203     float32_unpack_canonical(&p, a, s);
2204     if (ieee) {
2205         parts_float_to_float(&p, s);
2206         fmt = &float16_params;
2207     } else {
2208         parts_float_to_ahp(&p, s);
2209         fmt = &float16_params_ahp;
2210     }
2211     return float16a_round_pack_canonical(&p, s, fmt);
2212 }
2213 
2214 static float64 QEMU_SOFTFLOAT_ATTR
2215 soft_float32_to_float64(float32 a, float_status *s)
2216 {
2217     FloatParts64 p;
2218 
2219     float32_unpack_canonical(&p, a, s);
2220     parts_float_to_float(&p, s);
2221     return float64_round_pack_canonical(&p, s);
2222 }
2223 
2224 float64 float32_to_float64(float32 a, float_status *s)
2225 {
2226     if (likely(float32_is_normal(a))) {
2227         /* Widening conversion can never produce inexact results.  */
2228         union_float32 uf;
2229         union_float64 ud;
2230         uf.s = a;
2231         ud.h = uf.h;
2232         return ud.s;
2233     } else if (float32_is_zero(a)) {
2234         return float64_set_sign(float64_zero, float32_is_neg(a));
2235     } else {
2236         return soft_float32_to_float64(a, s);
2237     }
2238 }
2239 
2240 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2241 {
2242     FloatParts64 p;
2243     const FloatFmt *fmt;
2244 
2245     float64_unpack_canonical(&p, a, s);
2246     if (ieee) {
2247         parts_float_to_float(&p, s);
2248         fmt = &float16_params;
2249     } else {
2250         parts_float_to_ahp(&p, s);
2251         fmt = &float16_params_ahp;
2252     }
2253     return float16a_round_pack_canonical(&p, s, fmt);
2254 }
2255 
2256 float32 float64_to_float32(float64 a, float_status *s)
2257 {
2258     FloatParts64 p;
2259 
2260     float64_unpack_canonical(&p, a, s);
2261     parts_float_to_float(&p, s);
2262     return float32_round_pack_canonical(&p, s);
2263 }
2264 
2265 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2266 {
2267     FloatParts64 p;
2268 
2269     bfloat16_unpack_canonical(&p, a, s);
2270     parts_float_to_float(&p, s);
2271     return float32_round_pack_canonical(&p, s);
2272 }
2273 
2274 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2275 {
2276     FloatParts64 p;
2277 
2278     bfloat16_unpack_canonical(&p, a, s);
2279     parts_float_to_float(&p, s);
2280     return float64_round_pack_canonical(&p, s);
2281 }
2282 
2283 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2284 {
2285     FloatParts64 p;
2286 
2287     float32_unpack_canonical(&p, a, s);
2288     parts_float_to_float(&p, s);
2289     return bfloat16_round_pack_canonical(&p, s);
2290 }
2291 
2292 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2293 {
2294     FloatParts64 p;
2295 
2296     float64_unpack_canonical(&p, a, s);
2297     parts_float_to_float(&p, s);
2298     return bfloat16_round_pack_canonical(&p, s);
2299 }
2300 
2301 float32 float128_to_float32(float128 a, float_status *s)
2302 {
2303     FloatParts64 p64;
2304     FloatParts128 p128;
2305 
2306     float128_unpack_canonical(&p128, a, s);
2307     parts_float_to_float_narrow(&p64, &p128, s);
2308     return float32_round_pack_canonical(&p64, s);
2309 }
2310 
2311 float64 float128_to_float64(float128 a, float_status *s)
2312 {
2313     FloatParts64 p64;
2314     FloatParts128 p128;
2315 
2316     float128_unpack_canonical(&p128, a, s);
2317     parts_float_to_float_narrow(&p64, &p128, s);
2318     return float64_round_pack_canonical(&p64, s);
2319 }
2320 
2321 float128 float32_to_float128(float32 a, float_status *s)
2322 {
2323     FloatParts64 p64;
2324     FloatParts128 p128;
2325 
2326     float32_unpack_canonical(&p64, a, s);
2327     parts_float_to_float_widen(&p128, &p64, s);
2328     return float128_round_pack_canonical(&p128, s);
2329 }
2330 
2331 float128 float64_to_float128(float64 a, float_status *s)
2332 {
2333     FloatParts64 p64;
2334     FloatParts128 p128;
2335 
2336     float64_unpack_canonical(&p64, a, s);
2337     parts_float_to_float_widen(&p128, &p64, s);
2338     return float128_round_pack_canonical(&p128, s);
2339 }
2340 
2341 /*
2342  * Round to integral value
2343  */
2344 
2345 float16 float16_round_to_int(float16 a, float_status *s)
2346 {
2347     FloatParts64 p;
2348 
2349     float16_unpack_canonical(&p, a, s);
2350     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2351     return float16_round_pack_canonical(&p, s);
2352 }
2353 
2354 float32 float32_round_to_int(float32 a, float_status *s)
2355 {
2356     FloatParts64 p;
2357 
2358     float32_unpack_canonical(&p, a, s);
2359     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2360     return float32_round_pack_canonical(&p, s);
2361 }
2362 
2363 float64 float64_round_to_int(float64 a, float_status *s)
2364 {
2365     FloatParts64 p;
2366 
2367     float64_unpack_canonical(&p, a, s);
2368     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2369     return float64_round_pack_canonical(&p, s);
2370 }
2371 
2372 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2373 {
2374     FloatParts64 p;
2375 
2376     bfloat16_unpack_canonical(&p, a, s);
2377     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2378     return bfloat16_round_pack_canonical(&p, s);
2379 }
2380 
2381 float128 float128_round_to_int(float128 a, float_status *s)
2382 {
2383     FloatParts128 p;
2384 
2385     float128_unpack_canonical(&p, a, s);
2386     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2387     return float128_round_pack_canonical(&p, s);
2388 }
2389 
2390 /*
2391  * Floating-point to signed integer conversions
2392  */
2393 
2394 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2395                               float_status *s)
2396 {
2397     FloatParts64 p;
2398 
2399     float16_unpack_canonical(&p, a, s);
2400     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2401 }
2402 
2403 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2404                                 float_status *s)
2405 {
2406     FloatParts64 p;
2407 
2408     float16_unpack_canonical(&p, a, s);
2409     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2410 }
2411 
2412 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2413                                 float_status *s)
2414 {
2415     FloatParts64 p;
2416 
2417     float16_unpack_canonical(&p, a, s);
2418     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2419 }
2420 
2421 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2422                                 float_status *s)
2423 {
2424     FloatParts64 p;
2425 
2426     float16_unpack_canonical(&p, a, s);
2427     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2428 }
2429 
2430 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2431                                 float_status *s)
2432 {
2433     FloatParts64 p;
2434 
2435     float32_unpack_canonical(&p, a, s);
2436     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2437 }
2438 
2439 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2440                                 float_status *s)
2441 {
2442     FloatParts64 p;
2443 
2444     float32_unpack_canonical(&p, a, s);
2445     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2446 }
2447 
2448 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2449                                 float_status *s)
2450 {
2451     FloatParts64 p;
2452 
2453     float32_unpack_canonical(&p, a, s);
2454     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2455 }
2456 
2457 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2458                                 float_status *s)
2459 {
2460     FloatParts64 p;
2461 
2462     float64_unpack_canonical(&p, a, s);
2463     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2464 }
2465 
2466 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2467                                 float_status *s)
2468 {
2469     FloatParts64 p;
2470 
2471     float64_unpack_canonical(&p, a, s);
2472     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2473 }
2474 
2475 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2476                                 float_status *s)
2477 {
2478     FloatParts64 p;
2479 
2480     float64_unpack_canonical(&p, a, s);
2481     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2482 }
2483 
2484 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2485                                  float_status *s)
2486 {
2487     FloatParts64 p;
2488 
2489     bfloat16_unpack_canonical(&p, a, s);
2490     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2491 }
2492 
2493 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2494                                  float_status *s)
2495 {
2496     FloatParts64 p;
2497 
2498     bfloat16_unpack_canonical(&p, a, s);
2499     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2500 }
2501 
2502 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2503                                  float_status *s)
2504 {
2505     FloatParts64 p;
2506 
2507     bfloat16_unpack_canonical(&p, a, s);
2508     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2509 }
2510 
2511 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2512                                         int scale, float_status *s)
2513 {
2514     FloatParts128 p;
2515 
2516     float128_unpack_canonical(&p, a, s);
2517     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2518 }
2519 
2520 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2521                                         int scale, float_status *s)
2522 {
2523     FloatParts128 p;
2524 
2525     float128_unpack_canonical(&p, a, s);
2526     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2527 }
2528 
2529 int8_t float16_to_int8(float16 a, float_status *s)
2530 {
2531     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2532 }
2533 
2534 int16_t float16_to_int16(float16 a, float_status *s)
2535 {
2536     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2537 }
2538 
2539 int32_t float16_to_int32(float16 a, float_status *s)
2540 {
2541     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2542 }
2543 
2544 int64_t float16_to_int64(float16 a, float_status *s)
2545 {
2546     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2547 }
2548 
2549 int16_t float32_to_int16(float32 a, float_status *s)
2550 {
2551     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2552 }
2553 
2554 int32_t float32_to_int32(float32 a, float_status *s)
2555 {
2556     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2557 }
2558 
2559 int64_t float32_to_int64(float32 a, float_status *s)
2560 {
2561     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2562 }
2563 
2564 int16_t float64_to_int16(float64 a, float_status *s)
2565 {
2566     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2567 }
2568 
2569 int32_t float64_to_int32(float64 a, float_status *s)
2570 {
2571     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2572 }
2573 
2574 int64_t float64_to_int64(float64 a, float_status *s)
2575 {
2576     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2577 }
2578 
2579 int32_t float128_to_int32(float128 a, float_status *s)
2580 {
2581     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2582 }
2583 
2584 int64_t float128_to_int64(float128 a, float_status *s)
2585 {
2586     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2587 }
2588 
2589 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2590 {
2591     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2592 }
2593 
2594 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2595 {
2596     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2597 }
2598 
2599 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2600 {
2601     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2602 }
2603 
2604 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2605 {
2606     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2607 }
2608 
2609 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2610 {
2611     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2612 }
2613 
2614 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2615 {
2616     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2617 }
2618 
2619 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2620 {
2621     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2622 }
2623 
2624 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2625 {
2626     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2627 }
2628 
2629 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2630 {
2631     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2632 }
2633 
2634 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2635 {
2636     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2637 }
2638 
2639 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2640 {
2641     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2642 }
2643 
2644 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2645 {
2646     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2647 }
2648 
2649 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2650 {
2651     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2652 }
2653 
2654 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2655 {
2656     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2657 }
2658 
2659 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2660 {
2661     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2662 }
2663 
2664 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2665 {
2666     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2667 }
2668 
2669 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2670 {
2671     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2672 }
2673 
2674 /*
2675  * Floating-point to unsigned integer conversions
2676  */
2677 
2678 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2679                                 float_status *s)
2680 {
2681     FloatParts64 p;
2682 
2683     float16_unpack_canonical(&p, a, s);
2684     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2685 }
2686 
2687 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2688                                   float_status *s)
2689 {
2690     FloatParts64 p;
2691 
2692     float16_unpack_canonical(&p, a, s);
2693     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2694 }
2695 
2696 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2697                                   float_status *s)
2698 {
2699     FloatParts64 p;
2700 
2701     float16_unpack_canonical(&p, a, s);
2702     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2703 }
2704 
2705 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2706                                   float_status *s)
2707 {
2708     FloatParts64 p;
2709 
2710     float16_unpack_canonical(&p, a, s);
2711     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2712 }
2713 
2714 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2715                                   float_status *s)
2716 {
2717     FloatParts64 p;
2718 
2719     float32_unpack_canonical(&p, a, s);
2720     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2721 }
2722 
2723 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2724                                   float_status *s)
2725 {
2726     FloatParts64 p;
2727 
2728     float32_unpack_canonical(&p, a, s);
2729     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2730 }
2731 
2732 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2733                                   float_status *s)
2734 {
2735     FloatParts64 p;
2736 
2737     float32_unpack_canonical(&p, a, s);
2738     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2739 }
2740 
2741 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2742                                   float_status *s)
2743 {
2744     FloatParts64 p;
2745 
2746     float64_unpack_canonical(&p, a, s);
2747     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2748 }
2749 
2750 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2751                                   float_status *s)
2752 {
2753     FloatParts64 p;
2754 
2755     float64_unpack_canonical(&p, a, s);
2756     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2757 }
2758 
2759 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2760                                   float_status *s)
2761 {
2762     FloatParts64 p;
2763 
2764     float64_unpack_canonical(&p, a, s);
2765     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2766 }
2767 
2768 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2769                                    int scale, float_status *s)
2770 {
2771     FloatParts64 p;
2772 
2773     bfloat16_unpack_canonical(&p, a, s);
2774     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2775 }
2776 
2777 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2778                                    int scale, float_status *s)
2779 {
2780     FloatParts64 p;
2781 
2782     bfloat16_unpack_canonical(&p, a, s);
2783     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2784 }
2785 
2786 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2787                                    int scale, float_status *s)
2788 {
2789     FloatParts64 p;
2790 
2791     bfloat16_unpack_canonical(&p, a, s);
2792     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2793 }
2794 
2795 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2796                                           int scale, float_status *s)
2797 {
2798     FloatParts128 p;
2799 
2800     float128_unpack_canonical(&p, a, s);
2801     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2802 }
2803 
2804 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2805                                           int scale, float_status *s)
2806 {
2807     FloatParts128 p;
2808 
2809     float128_unpack_canonical(&p, a, s);
2810     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2811 }
2812 
2813 uint8_t float16_to_uint8(float16 a, float_status *s)
2814 {
2815     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2816 }
2817 
2818 uint16_t float16_to_uint16(float16 a, float_status *s)
2819 {
2820     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2821 }
2822 
2823 uint32_t float16_to_uint32(float16 a, float_status *s)
2824 {
2825     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2826 }
2827 
2828 uint64_t float16_to_uint64(float16 a, float_status *s)
2829 {
2830     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2831 }
2832 
2833 uint16_t float32_to_uint16(float32 a, float_status *s)
2834 {
2835     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2836 }
2837 
2838 uint32_t float32_to_uint32(float32 a, float_status *s)
2839 {
2840     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2841 }
2842 
2843 uint64_t float32_to_uint64(float32 a, float_status *s)
2844 {
2845     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2846 }
2847 
2848 uint16_t float64_to_uint16(float64 a, float_status *s)
2849 {
2850     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2851 }
2852 
2853 uint32_t float64_to_uint32(float64 a, float_status *s)
2854 {
2855     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2856 }
2857 
2858 uint64_t float64_to_uint64(float64 a, float_status *s)
2859 {
2860     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2861 }
2862 
2863 uint32_t float128_to_uint32(float128 a, float_status *s)
2864 {
2865     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2866 }
2867 
2868 uint64_t float128_to_uint64(float128 a, float_status *s)
2869 {
2870     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2871 }
2872 
2873 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2874 {
2875     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2876 }
2877 
2878 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2879 {
2880     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2881 }
2882 
2883 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2884 {
2885     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2886 }
2887 
2888 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2889 {
2890     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2891 }
2892 
2893 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2894 {
2895     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2896 }
2897 
2898 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2899 {
2900     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2901 }
2902 
2903 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2904 {
2905     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2906 }
2907 
2908 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2909 {
2910     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2911 }
2912 
2913 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2914 {
2915     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2916 }
2917 
2918 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2919 {
2920     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2921 }
2922 
2923 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2924 {
2925     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2926 }
2927 
2928 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2929 {
2930     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2931 }
2932 
2933 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2934 {
2935     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2936 }
2937 
2938 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2939 {
2940     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2941 }
2942 
2943 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2944 {
2945     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2946 }
2947 
2948 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2949 {
2950     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2951 }
2952 
2953 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2954 {
2955     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2956 }
2957 
2958 /*
2959  * Signed integer to floating-point conversions
2960  */
2961 
2962 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2963 {
2964     FloatParts64 p;
2965 
2966     parts_sint_to_float(&p, a, scale, status);
2967     return float16_round_pack_canonical(&p, status);
2968 }
2969 
2970 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2971 {
2972     return int64_to_float16_scalbn(a, scale, status);
2973 }
2974 
2975 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2976 {
2977     return int64_to_float16_scalbn(a, scale, status);
2978 }
2979 
2980 float16 int64_to_float16(int64_t a, float_status *status)
2981 {
2982     return int64_to_float16_scalbn(a, 0, status);
2983 }
2984 
2985 float16 int32_to_float16(int32_t a, float_status *status)
2986 {
2987     return int64_to_float16_scalbn(a, 0, status);
2988 }
2989 
2990 float16 int16_to_float16(int16_t a, float_status *status)
2991 {
2992     return int64_to_float16_scalbn(a, 0, status);
2993 }
2994 
2995 float16 int8_to_float16(int8_t a, float_status *status)
2996 {
2997     return int64_to_float16_scalbn(a, 0, status);
2998 }
2999 
3000 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3001 {
3002     FloatParts64 p;
3003 
3004     parts64_sint_to_float(&p, a, scale, status);
3005     return float32_round_pack_canonical(&p, status);
3006 }
3007 
3008 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3009 {
3010     return int64_to_float32_scalbn(a, scale, status);
3011 }
3012 
3013 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3014 {
3015     return int64_to_float32_scalbn(a, scale, status);
3016 }
3017 
3018 float32 int64_to_float32(int64_t a, float_status *status)
3019 {
3020     return int64_to_float32_scalbn(a, 0, status);
3021 }
3022 
3023 float32 int32_to_float32(int32_t a, float_status *status)
3024 {
3025     return int64_to_float32_scalbn(a, 0, status);
3026 }
3027 
3028 float32 int16_to_float32(int16_t a, float_status *status)
3029 {
3030     return int64_to_float32_scalbn(a, 0, status);
3031 }
3032 
3033 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3034 {
3035     FloatParts64 p;
3036 
3037     parts_sint_to_float(&p, a, scale, status);
3038     return float64_round_pack_canonical(&p, status);
3039 }
3040 
3041 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3042 {
3043     return int64_to_float64_scalbn(a, scale, status);
3044 }
3045 
3046 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3047 {
3048     return int64_to_float64_scalbn(a, scale, status);
3049 }
3050 
3051 float64 int64_to_float64(int64_t a, float_status *status)
3052 {
3053     return int64_to_float64_scalbn(a, 0, status);
3054 }
3055 
3056 float64 int32_to_float64(int32_t a, float_status *status)
3057 {
3058     return int64_to_float64_scalbn(a, 0, status);
3059 }
3060 
3061 float64 int16_to_float64(int16_t a, float_status *status)
3062 {
3063     return int64_to_float64_scalbn(a, 0, status);
3064 }
3065 
3066 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3067 {
3068     FloatParts64 p;
3069 
3070     parts_sint_to_float(&p, a, scale, status);
3071     return bfloat16_round_pack_canonical(&p, status);
3072 }
3073 
3074 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3075 {
3076     return int64_to_bfloat16_scalbn(a, scale, status);
3077 }
3078 
3079 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3080 {
3081     return int64_to_bfloat16_scalbn(a, scale, status);
3082 }
3083 
3084 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3085 {
3086     return int64_to_bfloat16_scalbn(a, 0, status);
3087 }
3088 
3089 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3090 {
3091     return int64_to_bfloat16_scalbn(a, 0, status);
3092 }
3093 
3094 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3095 {
3096     return int64_to_bfloat16_scalbn(a, 0, status);
3097 }
3098 
3099 float128 int64_to_float128(int64_t a, float_status *status)
3100 {
3101     FloatParts128 p;
3102 
3103     parts_sint_to_float(&p, a, 0, status);
3104     return float128_round_pack_canonical(&p, status);
3105 }
3106 
3107 float128 int32_to_float128(int32_t a, float_status *status)
3108 {
3109     return int64_to_float128(a, status);
3110 }
3111 
3112 /*
3113  * Unsigned Integer to floating-point conversions
3114  */
3115 
3116 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3117 {
3118     FloatParts64 p;
3119 
3120     parts_uint_to_float(&p, a, scale, status);
3121     return float16_round_pack_canonical(&p, status);
3122 }
3123 
3124 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3125 {
3126     return uint64_to_float16_scalbn(a, scale, status);
3127 }
3128 
3129 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3130 {
3131     return uint64_to_float16_scalbn(a, scale, status);
3132 }
3133 
3134 float16 uint64_to_float16(uint64_t a, float_status *status)
3135 {
3136     return uint64_to_float16_scalbn(a, 0, status);
3137 }
3138 
3139 float16 uint32_to_float16(uint32_t a, float_status *status)
3140 {
3141     return uint64_to_float16_scalbn(a, 0, status);
3142 }
3143 
3144 float16 uint16_to_float16(uint16_t a, float_status *status)
3145 {
3146     return uint64_to_float16_scalbn(a, 0, status);
3147 }
3148 
3149 float16 uint8_to_float16(uint8_t a, float_status *status)
3150 {
3151     return uint64_to_float16_scalbn(a, 0, status);
3152 }
3153 
3154 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3155 {
3156     FloatParts64 p;
3157 
3158     parts_uint_to_float(&p, a, scale, status);
3159     return float32_round_pack_canonical(&p, status);
3160 }
3161 
3162 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3163 {
3164     return uint64_to_float32_scalbn(a, scale, status);
3165 }
3166 
3167 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3168 {
3169     return uint64_to_float32_scalbn(a, scale, status);
3170 }
3171 
3172 float32 uint64_to_float32(uint64_t a, float_status *status)
3173 {
3174     return uint64_to_float32_scalbn(a, 0, status);
3175 }
3176 
3177 float32 uint32_to_float32(uint32_t a, float_status *status)
3178 {
3179     return uint64_to_float32_scalbn(a, 0, status);
3180 }
3181 
3182 float32 uint16_to_float32(uint16_t a, float_status *status)
3183 {
3184     return uint64_to_float32_scalbn(a, 0, status);
3185 }
3186 
3187 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3188 {
3189     FloatParts64 p;
3190 
3191     parts_uint_to_float(&p, a, scale, status);
3192     return float64_round_pack_canonical(&p, status);
3193 }
3194 
3195 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3196 {
3197     return uint64_to_float64_scalbn(a, scale, status);
3198 }
3199 
3200 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3201 {
3202     return uint64_to_float64_scalbn(a, scale, status);
3203 }
3204 
3205 float64 uint64_to_float64(uint64_t a, float_status *status)
3206 {
3207     return uint64_to_float64_scalbn(a, 0, status);
3208 }
3209 
3210 float64 uint32_to_float64(uint32_t a, float_status *status)
3211 {
3212     return uint64_to_float64_scalbn(a, 0, status);
3213 }
3214 
3215 float64 uint16_to_float64(uint16_t a, float_status *status)
3216 {
3217     return uint64_to_float64_scalbn(a, 0, status);
3218 }
3219 
3220 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3221 {
3222     FloatParts64 p;
3223 
3224     parts_uint_to_float(&p, a, scale, status);
3225     return bfloat16_round_pack_canonical(&p, status);
3226 }
3227 
3228 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3229 {
3230     return uint64_to_bfloat16_scalbn(a, scale, status);
3231 }
3232 
3233 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3234 {
3235     return uint64_to_bfloat16_scalbn(a, scale, status);
3236 }
3237 
3238 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3239 {
3240     return uint64_to_bfloat16_scalbn(a, 0, status);
3241 }
3242 
3243 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3244 {
3245     return uint64_to_bfloat16_scalbn(a, 0, status);
3246 }
3247 
3248 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3249 {
3250     return uint64_to_bfloat16_scalbn(a, 0, status);
3251 }
3252 
3253 float128 uint64_to_float128(uint64_t a, float_status *status)
3254 {
3255     FloatParts128 p;
3256 
3257     parts_uint_to_float(&p, a, 0, status);
3258     return float128_round_pack_canonical(&p, status);
3259 }
3260 
3261 /* Float Min/Max */
3262 /* min() and max() functions. These can't be implemented as
3263  * 'compare and pick one input' because that would mishandle
3264  * NaNs and +0 vs -0.
3265  *
3266  * minnum() and maxnum() functions. These are similar to the min()
3267  * and max() functions but if one of the arguments is a QNaN and
3268  * the other is numerical then the numerical argument is returned.
3269  * SNaNs will get quietened before being returned.
3270  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3271  * and maxNum() operations. min() and max() are the typical min/max
3272  * semantics provided by many CPUs which predate that specification.
3273  *
3274  * minnummag() and maxnummag() functions correspond to minNumMag()
3275  * and minNumMag() from the IEEE-754 2008.
3276  */
3277 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3278                                 bool ieee, bool ismag, float_status *s)
3279 {
3280     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3281         if (ieee) {
3282             /* Takes two floating-point values `a' and `b', one of
3283              * which is a NaN, and returns the appropriate NaN
3284              * result. If either `a' or `b' is a signaling NaN,
3285              * the invalid exception is raised.
3286              */
3287             if (is_snan(a.cls) || is_snan(b.cls)) {
3288                 return *parts_pick_nan(&a, &b, s);
3289             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3290                 return b;
3291             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3292                 return a;
3293             }
3294         }
3295         return *parts_pick_nan(&a, &b, s);
3296     } else {
3297         int a_exp, b_exp;
3298 
3299         switch (a.cls) {
3300         case float_class_normal:
3301             a_exp = a.exp;
3302             break;
3303         case float_class_inf:
3304             a_exp = INT_MAX;
3305             break;
3306         case float_class_zero:
3307             a_exp = INT_MIN;
3308             break;
3309         default:
3310             g_assert_not_reached();
3311             break;
3312         }
3313         switch (b.cls) {
3314         case float_class_normal:
3315             b_exp = b.exp;
3316             break;
3317         case float_class_inf:
3318             b_exp = INT_MAX;
3319             break;
3320         case float_class_zero:
3321             b_exp = INT_MIN;
3322             break;
3323         default:
3324             g_assert_not_reached();
3325             break;
3326         }
3327 
3328         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3329             bool a_less = a_exp < b_exp;
3330             if (a_exp == b_exp) {
3331                 a_less = a.frac < b.frac;
3332             }
3333             return a_less ^ ismin ? b : a;
3334         }
3335 
3336         if (a.sign == b.sign) {
3337             bool a_less = a_exp < b_exp;
3338             if (a_exp == b_exp) {
3339                 a_less = a.frac < b.frac;
3340             }
3341             return a.sign ^ a_less ^ ismin ? b : a;
3342         } else {
3343             return a.sign ^ ismin ? b : a;
3344         }
3345     }
3346 }
3347 
3348 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3349 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3350                                      float_status *s)                   \
3351 {                                                                       \
3352     FloatParts64 pa, pb, pr;                                            \
3353     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3354     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3355     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3356     return float ## sz ## _round_pack_canonical(&pr, s);                \
3357 }
3358 
3359 MINMAX(16, min, true, false, false)
3360 MINMAX(16, minnum, true, true, false)
3361 MINMAX(16, minnummag, true, true, true)
3362 MINMAX(16, max, false, false, false)
3363 MINMAX(16, maxnum, false, true, false)
3364 MINMAX(16, maxnummag, false, true, true)
3365 
3366 MINMAX(32, min, true, false, false)
3367 MINMAX(32, minnum, true, true, false)
3368 MINMAX(32, minnummag, true, true, true)
3369 MINMAX(32, max, false, false, false)
3370 MINMAX(32, maxnum, false, true, false)
3371 MINMAX(32, maxnummag, false, true, true)
3372 
3373 MINMAX(64, min, true, false, false)
3374 MINMAX(64, minnum, true, true, false)
3375 MINMAX(64, minnummag, true, true, true)
3376 MINMAX(64, max, false, false, false)
3377 MINMAX(64, maxnum, false, true, false)
3378 MINMAX(64, maxnummag, false, true, true)
3379 
3380 #undef MINMAX
3381 
3382 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3383 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3384 {                                                                       \
3385     FloatParts64 pa, pb, pr;                                            \
3386     bfloat16_unpack_canonical(&pa, a, s);                               \
3387     bfloat16_unpack_canonical(&pb, b, s);                               \
3388     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3389     return bfloat16_round_pack_canonical(&pr, s);                       \
3390 }
3391 
3392 BF16_MINMAX(min, true, false, false)
3393 BF16_MINMAX(minnum, true, true, false)
3394 BF16_MINMAX(minnummag, true, true, true)
3395 BF16_MINMAX(max, false, false, false)
3396 BF16_MINMAX(maxnum, false, true, false)
3397 BF16_MINMAX(maxnummag, false, true, true)
3398 
3399 #undef BF16_MINMAX
3400 
3401 /* Floating point compare */
3402 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3403                                     float_status *s)
3404 {
3405     if (is_nan(a.cls) || is_nan(b.cls)) {
3406         if (!is_quiet ||
3407             a.cls == float_class_snan ||
3408             b.cls == float_class_snan) {
3409             float_raise(float_flag_invalid, s);
3410         }
3411         return float_relation_unordered;
3412     }
3413 
3414     if (a.cls == float_class_zero) {
3415         if (b.cls == float_class_zero) {
3416             return float_relation_equal;
3417         }
3418         return b.sign ? float_relation_greater : float_relation_less;
3419     } else if (b.cls == float_class_zero) {
3420         return a.sign ? float_relation_less : float_relation_greater;
3421     }
3422 
3423     /* The only really important thing about infinity is its sign. If
3424      * both are infinities the sign marks the smallest of the two.
3425      */
3426     if (a.cls == float_class_inf) {
3427         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3428             return float_relation_equal;
3429         }
3430         return a.sign ? float_relation_less : float_relation_greater;
3431     } else if (b.cls == float_class_inf) {
3432         return b.sign ? float_relation_greater : float_relation_less;
3433     }
3434 
3435     if (a.sign != b.sign) {
3436         return a.sign ? float_relation_less : float_relation_greater;
3437     }
3438 
3439     if (a.exp == b.exp) {
3440         if (a.frac == b.frac) {
3441             return float_relation_equal;
3442         }
3443         if (a.sign) {
3444             return a.frac > b.frac ?
3445                 float_relation_less : float_relation_greater;
3446         } else {
3447             return a.frac > b.frac ?
3448                 float_relation_greater : float_relation_less;
3449         }
3450     } else {
3451         if (a.sign) {
3452             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3453         } else {
3454             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3455         }
3456     }
3457 }
3458 
3459 #define COMPARE(name, attr, sz)                                         \
3460 static int attr                                                         \
3461 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3462 {                                                                       \
3463     FloatParts64 pa, pb;                                                \
3464     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3465     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3466     return compare_floats(pa, pb, is_quiet, s);                         \
3467 }
3468 
3469 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3470 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3471 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3472 
3473 #undef COMPARE
3474 
3475 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3476 {
3477     return soft_f16_compare(a, b, false, s);
3478 }
3479 
3480 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3481 {
3482     return soft_f16_compare(a, b, true, s);
3483 }
3484 
3485 static FloatRelation QEMU_FLATTEN
3486 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3487 {
3488     union_float32 ua, ub;
3489 
3490     ua.s = xa;
3491     ub.s = xb;
3492 
3493     if (QEMU_NO_HARDFLOAT) {
3494         goto soft;
3495     }
3496 
3497     float32_input_flush2(&ua.s, &ub.s, s);
3498     if (isgreaterequal(ua.h, ub.h)) {
3499         if (isgreater(ua.h, ub.h)) {
3500             return float_relation_greater;
3501         }
3502         return float_relation_equal;
3503     }
3504     if (likely(isless(ua.h, ub.h))) {
3505         return float_relation_less;
3506     }
3507     /* The only condition remaining is unordered.
3508      * Fall through to set flags.
3509      */
3510  soft:
3511     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3512 }
3513 
3514 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3515 {
3516     return f32_compare(a, b, false, s);
3517 }
3518 
3519 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3520 {
3521     return f32_compare(a, b, true, s);
3522 }
3523 
3524 static FloatRelation QEMU_FLATTEN
3525 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3526 {
3527     union_float64 ua, ub;
3528 
3529     ua.s = xa;
3530     ub.s = xb;
3531 
3532     if (QEMU_NO_HARDFLOAT) {
3533         goto soft;
3534     }
3535 
3536     float64_input_flush2(&ua.s, &ub.s, s);
3537     if (isgreaterequal(ua.h, ub.h)) {
3538         if (isgreater(ua.h, ub.h)) {
3539             return float_relation_greater;
3540         }
3541         return float_relation_equal;
3542     }
3543     if (likely(isless(ua.h, ub.h))) {
3544         return float_relation_less;
3545     }
3546     /* The only condition remaining is unordered.
3547      * Fall through to set flags.
3548      */
3549  soft:
3550     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3551 }
3552 
3553 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3554 {
3555     return f64_compare(a, b, false, s);
3556 }
3557 
3558 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3559 {
3560     return f64_compare(a, b, true, s);
3561 }
3562 
3563 static FloatRelation QEMU_FLATTEN
3564 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3565 {
3566     FloatParts64 pa, pb;
3567 
3568     bfloat16_unpack_canonical(&pa, a, s);
3569     bfloat16_unpack_canonical(&pb, b, s);
3570     return compare_floats(pa, pb, is_quiet, s);
3571 }
3572 
3573 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3574 {
3575     return soft_bf16_compare(a, b, false, s);
3576 }
3577 
3578 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3579 {
3580     return soft_bf16_compare(a, b, true, s);
3581 }
3582 
3583 /* Multiply A by 2 raised to the power N.  */
3584 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3585 {
3586     if (unlikely(is_nan(a.cls))) {
3587         parts_return_nan(&a, s);
3588     }
3589     if (a.cls == float_class_normal) {
3590         /* The largest float type (even though not supported by FloatParts64)
3591          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3592          * still allows rounding to infinity, without allowing overflow
3593          * within the int32_t that backs FloatParts64.exp.
3594          */
3595         n = MIN(MAX(n, -0x10000), 0x10000);
3596         a.exp += n;
3597     }
3598     return a;
3599 }
3600 
3601 float16 float16_scalbn(float16 a, int n, float_status *status)
3602 {
3603     FloatParts64 pa, pr;
3604 
3605     float16_unpack_canonical(&pa, a, status);
3606     pr = scalbn_decomposed(pa, n, status);
3607     return float16_round_pack_canonical(&pr, status);
3608 }
3609 
3610 float32 float32_scalbn(float32 a, int n, float_status *status)
3611 {
3612     FloatParts64 pa, pr;
3613 
3614     float32_unpack_canonical(&pa, a, status);
3615     pr = scalbn_decomposed(pa, n, status);
3616     return float32_round_pack_canonical(&pr, status);
3617 }
3618 
3619 float64 float64_scalbn(float64 a, int n, float_status *status)
3620 {
3621     FloatParts64 pa, pr;
3622 
3623     float64_unpack_canonical(&pa, a, status);
3624     pr = scalbn_decomposed(pa, n, status);
3625     return float64_round_pack_canonical(&pr, status);
3626 }
3627 
3628 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3629 {
3630     FloatParts64 pa, pr;
3631 
3632     bfloat16_unpack_canonical(&pa, a, status);
3633     pr = scalbn_decomposed(pa, n, status);
3634     return bfloat16_round_pack_canonical(&pr, status);
3635 }
3636 
3637 /*
3638  * Square Root
3639  *
3640  * The old softfloat code did an approximation step before zeroing in
3641  * on the final result. However for simpleness we just compute the
3642  * square root by iterating down from the implicit bit to enough extra
3643  * bits to ensure we get a correctly rounded result.
3644  *
3645  * This does mean however the calculation is slower than before,
3646  * especially for 64 bit floats.
3647  */
3648 
3649 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3650 {
3651     uint64_t a_frac, r_frac, s_frac;
3652     int bit, last_bit;
3653 
3654     if (is_nan(a.cls)) {
3655         parts_return_nan(&a, s);
3656         return a;
3657     }
3658     if (a.cls == float_class_zero) {
3659         return a;  /* sqrt(+-0) = +-0 */
3660     }
3661     if (a.sign) {
3662         float_raise(float_flag_invalid, s);
3663         parts_default_nan(&a, s);
3664         return a;
3665     }
3666     if (a.cls == float_class_inf) {
3667         return a;  /* sqrt(+inf) = +inf */
3668     }
3669 
3670     assert(a.cls == float_class_normal);
3671 
3672     /* We need two overflow bits at the top. Adding room for that is a
3673      * right shift. If the exponent is odd, we can discard the low bit
3674      * by multiplying the fraction by 2; that's a left shift. Combine
3675      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3676      */
3677     a_frac = a.frac >> (2 - (a.exp & 1));
3678     a.exp >>= 1;
3679 
3680     /* Bit-by-bit computation of sqrt.  */
3681     r_frac = 0;
3682     s_frac = 0;
3683 
3684     /* Iterate from implicit bit down to the 3 extra bits to compute a
3685      * properly rounded result. Remember we've inserted two more bits
3686      * at the top, so these positions are two less.
3687      */
3688     bit = DECOMPOSED_BINARY_POINT - 2;
3689     last_bit = MAX(p->frac_shift - 4, 0);
3690     do {
3691         uint64_t q = 1ULL << bit;
3692         uint64_t t_frac = s_frac + q;
3693         if (t_frac <= a_frac) {
3694             s_frac = t_frac + q;
3695             a_frac -= t_frac;
3696             r_frac += q;
3697         }
3698         a_frac <<= 1;
3699     } while (--bit >= last_bit);
3700 
3701     /* Undo the right shift done above. If there is any remaining
3702      * fraction, the result is inexact. Set the sticky bit.
3703      */
3704     a.frac = (r_frac << 2) + (a_frac != 0);
3705 
3706     return a;
3707 }
3708 
3709 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3710 {
3711     FloatParts64 pa, pr;
3712 
3713     float16_unpack_canonical(&pa, a, status);
3714     pr = sqrt_float(pa, status, &float16_params);
3715     return float16_round_pack_canonical(&pr, status);
3716 }
3717 
3718 static float32 QEMU_SOFTFLOAT_ATTR
3719 soft_f32_sqrt(float32 a, float_status *status)
3720 {
3721     FloatParts64 pa, pr;
3722 
3723     float32_unpack_canonical(&pa, a, status);
3724     pr = sqrt_float(pa, status, &float32_params);
3725     return float32_round_pack_canonical(&pr, status);
3726 }
3727 
3728 static float64 QEMU_SOFTFLOAT_ATTR
3729 soft_f64_sqrt(float64 a, float_status *status)
3730 {
3731     FloatParts64 pa, pr;
3732 
3733     float64_unpack_canonical(&pa, a, status);
3734     pr = sqrt_float(pa, status, &float64_params);
3735     return float64_round_pack_canonical(&pr, status);
3736 }
3737 
3738 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3739 {
3740     union_float32 ua, ur;
3741 
3742     ua.s = xa;
3743     if (unlikely(!can_use_fpu(s))) {
3744         goto soft;
3745     }
3746 
3747     float32_input_flush1(&ua.s, s);
3748     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3749         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3750                        fpclassify(ua.h) == FP_ZERO) ||
3751                      signbit(ua.h))) {
3752             goto soft;
3753         }
3754     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3755                         float32_is_neg(ua.s))) {
3756         goto soft;
3757     }
3758     ur.h = sqrtf(ua.h);
3759     return ur.s;
3760 
3761  soft:
3762     return soft_f32_sqrt(ua.s, s);
3763 }
3764 
3765 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3766 {
3767     union_float64 ua, ur;
3768 
3769     ua.s = xa;
3770     if (unlikely(!can_use_fpu(s))) {
3771         goto soft;
3772     }
3773 
3774     float64_input_flush1(&ua.s, s);
3775     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3776         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3777                        fpclassify(ua.h) == FP_ZERO) ||
3778                      signbit(ua.h))) {
3779             goto soft;
3780         }
3781     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3782                         float64_is_neg(ua.s))) {
3783         goto soft;
3784     }
3785     ur.h = sqrt(ua.h);
3786     return ur.s;
3787 
3788  soft:
3789     return soft_f64_sqrt(ua.s, s);
3790 }
3791 
3792 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3793 {
3794     FloatParts64 pa, pr;
3795 
3796     bfloat16_unpack_canonical(&pa, a, status);
3797     pr = sqrt_float(pa, status, &bfloat16_params);
3798     return bfloat16_round_pack_canonical(&pr, status);
3799 }
3800 
3801 /*----------------------------------------------------------------------------
3802 | The pattern for a default generated NaN.
3803 *----------------------------------------------------------------------------*/
3804 
3805 float16 float16_default_nan(float_status *status)
3806 {
3807     FloatParts64 p;
3808 
3809     parts_default_nan(&p, status);
3810     p.frac >>= float16_params.frac_shift;
3811     return float16_pack_raw(&p);
3812 }
3813 
3814 float32 float32_default_nan(float_status *status)
3815 {
3816     FloatParts64 p;
3817 
3818     parts_default_nan(&p, status);
3819     p.frac >>= float32_params.frac_shift;
3820     return float32_pack_raw(&p);
3821 }
3822 
3823 float64 float64_default_nan(float_status *status)
3824 {
3825     FloatParts64 p;
3826 
3827     parts_default_nan(&p, status);
3828     p.frac >>= float64_params.frac_shift;
3829     return float64_pack_raw(&p);
3830 }
3831 
3832 float128 float128_default_nan(float_status *status)
3833 {
3834     FloatParts128 p;
3835 
3836     parts_default_nan(&p, status);
3837     frac_shr(&p, float128_params.frac_shift);
3838     return float128_pack_raw(&p);
3839 }
3840 
3841 bfloat16 bfloat16_default_nan(float_status *status)
3842 {
3843     FloatParts64 p;
3844 
3845     parts_default_nan(&p, status);
3846     p.frac >>= bfloat16_params.frac_shift;
3847     return bfloat16_pack_raw(&p);
3848 }
3849 
3850 /*----------------------------------------------------------------------------
3851 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3852 *----------------------------------------------------------------------------*/
3853 
3854 float16 float16_silence_nan(float16 a, float_status *status)
3855 {
3856     FloatParts64 p;
3857 
3858     float16_unpack_raw(&p, a);
3859     p.frac <<= float16_params.frac_shift;
3860     parts_silence_nan(&p, status);
3861     p.frac >>= float16_params.frac_shift;
3862     return float16_pack_raw(&p);
3863 }
3864 
3865 float32 float32_silence_nan(float32 a, float_status *status)
3866 {
3867     FloatParts64 p;
3868 
3869     float32_unpack_raw(&p, a);
3870     p.frac <<= float32_params.frac_shift;
3871     parts_silence_nan(&p, status);
3872     p.frac >>= float32_params.frac_shift;
3873     return float32_pack_raw(&p);
3874 }
3875 
3876 float64 float64_silence_nan(float64 a, float_status *status)
3877 {
3878     FloatParts64 p;
3879 
3880     float64_unpack_raw(&p, a);
3881     p.frac <<= float64_params.frac_shift;
3882     parts_silence_nan(&p, status);
3883     p.frac >>= float64_params.frac_shift;
3884     return float64_pack_raw(&p);
3885 }
3886 
3887 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3888 {
3889     FloatParts64 p;
3890 
3891     bfloat16_unpack_raw(&p, a);
3892     p.frac <<= bfloat16_params.frac_shift;
3893     parts_silence_nan(&p, status);
3894     p.frac >>= bfloat16_params.frac_shift;
3895     return bfloat16_pack_raw(&p);
3896 }
3897 
3898 float128 float128_silence_nan(float128 a, float_status *status)
3899 {
3900     FloatParts128 p;
3901 
3902     float128_unpack_raw(&p, a);
3903     frac_shl(&p, float128_params.frac_shift);
3904     parts_silence_nan(&p, status);
3905     frac_shr(&p, float128_params.frac_shift);
3906     return float128_pack_raw(&p);
3907 }
3908 
3909 /*----------------------------------------------------------------------------
3910 | If `a' is denormal and we are in flush-to-zero mode then set the
3911 | input-denormal exception and return zero. Otherwise just return the value.
3912 *----------------------------------------------------------------------------*/
3913 
3914 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3915 {
3916     if (p.exp == 0 && p.frac != 0) {
3917         float_raise(float_flag_input_denormal, status);
3918         return true;
3919     }
3920 
3921     return false;
3922 }
3923 
3924 float16 float16_squash_input_denormal(float16 a, float_status *status)
3925 {
3926     if (status->flush_inputs_to_zero) {
3927         FloatParts64 p;
3928 
3929         float16_unpack_raw(&p, a);
3930         if (parts_squash_denormal(p, status)) {
3931             return float16_set_sign(float16_zero, p.sign);
3932         }
3933     }
3934     return a;
3935 }
3936 
3937 float32 float32_squash_input_denormal(float32 a, float_status *status)
3938 {
3939     if (status->flush_inputs_to_zero) {
3940         FloatParts64 p;
3941 
3942         float32_unpack_raw(&p, a);
3943         if (parts_squash_denormal(p, status)) {
3944             return float32_set_sign(float32_zero, p.sign);
3945         }
3946     }
3947     return a;
3948 }
3949 
3950 float64 float64_squash_input_denormal(float64 a, float_status *status)
3951 {
3952     if (status->flush_inputs_to_zero) {
3953         FloatParts64 p;
3954 
3955         float64_unpack_raw(&p, a);
3956         if (parts_squash_denormal(p, status)) {
3957             return float64_set_sign(float64_zero, p.sign);
3958         }
3959     }
3960     return a;
3961 }
3962 
3963 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3964 {
3965     if (status->flush_inputs_to_zero) {
3966         FloatParts64 p;
3967 
3968         bfloat16_unpack_raw(&p, a);
3969         if (parts_squash_denormal(p, status)) {
3970             return bfloat16_set_sign(bfloat16_zero, p.sign);
3971         }
3972     }
3973     return a;
3974 }
3975 
3976 /*----------------------------------------------------------------------------
3977 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3978 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3979 | input.  If `zSign' is 1, the input is negated before being converted to an
3980 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3981 | is simply rounded to an integer, with the inexact exception raised if the
3982 | input cannot be represented exactly as an integer.  However, if the fixed-
3983 | point input is too large, the invalid exception is raised and the largest
3984 | positive or negative integer is returned.
3985 *----------------------------------------------------------------------------*/
3986 
3987 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3988                                  float_status *status)
3989 {
3990     int8_t roundingMode;
3991     bool roundNearestEven;
3992     int8_t roundIncrement, roundBits;
3993     int32_t z;
3994 
3995     roundingMode = status->float_rounding_mode;
3996     roundNearestEven = ( roundingMode == float_round_nearest_even );
3997     switch (roundingMode) {
3998     case float_round_nearest_even:
3999     case float_round_ties_away:
4000         roundIncrement = 0x40;
4001         break;
4002     case float_round_to_zero:
4003         roundIncrement = 0;
4004         break;
4005     case float_round_up:
4006         roundIncrement = zSign ? 0 : 0x7f;
4007         break;
4008     case float_round_down:
4009         roundIncrement = zSign ? 0x7f : 0;
4010         break;
4011     case float_round_to_odd:
4012         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4013         break;
4014     default:
4015         abort();
4016     }
4017     roundBits = absZ & 0x7F;
4018     absZ = ( absZ + roundIncrement )>>7;
4019     if (!(roundBits ^ 0x40) && roundNearestEven) {
4020         absZ &= ~1;
4021     }
4022     z = absZ;
4023     if ( zSign ) z = - z;
4024     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4025         float_raise(float_flag_invalid, status);
4026         return zSign ? INT32_MIN : INT32_MAX;
4027     }
4028     if (roundBits) {
4029         float_raise(float_flag_inexact, status);
4030     }
4031     return z;
4032 
4033 }
4034 
4035 /*----------------------------------------------------------------------------
4036 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4037 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4038 | and returns the properly rounded 64-bit integer corresponding to the input.
4039 | If `zSign' is 1, the input is negated before being converted to an integer.
4040 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4041 | the inexact exception raised if the input cannot be represented exactly as
4042 | an integer.  However, if the fixed-point input is too large, the invalid
4043 | exception is raised and the largest positive or negative integer is
4044 | returned.
4045 *----------------------------------------------------------------------------*/
4046 
4047 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4048                                float_status *status)
4049 {
4050     int8_t roundingMode;
4051     bool roundNearestEven, increment;
4052     int64_t z;
4053 
4054     roundingMode = status->float_rounding_mode;
4055     roundNearestEven = ( roundingMode == float_round_nearest_even );
4056     switch (roundingMode) {
4057     case float_round_nearest_even:
4058     case float_round_ties_away:
4059         increment = ((int64_t) absZ1 < 0);
4060         break;
4061     case float_round_to_zero:
4062         increment = 0;
4063         break;
4064     case float_round_up:
4065         increment = !zSign && absZ1;
4066         break;
4067     case float_round_down:
4068         increment = zSign && absZ1;
4069         break;
4070     case float_round_to_odd:
4071         increment = !(absZ0 & 1) && absZ1;
4072         break;
4073     default:
4074         abort();
4075     }
4076     if ( increment ) {
4077         ++absZ0;
4078         if ( absZ0 == 0 ) goto overflow;
4079         if (!(absZ1 << 1) && roundNearestEven) {
4080             absZ0 &= ~1;
4081         }
4082     }
4083     z = absZ0;
4084     if ( zSign ) z = - z;
4085     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4086  overflow:
4087         float_raise(float_flag_invalid, status);
4088         return zSign ? INT64_MIN : INT64_MAX;
4089     }
4090     if (absZ1) {
4091         float_raise(float_flag_inexact, status);
4092     }
4093     return z;
4094 
4095 }
4096 
4097 /*----------------------------------------------------------------------------
4098 | Normalizes the subnormal single-precision floating-point value represented
4099 | by the denormalized significand `aSig'.  The normalized exponent and
4100 | significand are stored at the locations pointed to by `zExpPtr' and
4101 | `zSigPtr', respectively.
4102 *----------------------------------------------------------------------------*/
4103 
4104 static void
4105  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4106 {
4107     int8_t shiftCount;
4108 
4109     shiftCount = clz32(aSig) - 8;
4110     *zSigPtr = aSig<<shiftCount;
4111     *zExpPtr = 1 - shiftCount;
4112 
4113 }
4114 
4115 /*----------------------------------------------------------------------------
4116 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4117 | and significand `zSig', and returns the proper single-precision floating-
4118 | point value corresponding to the abstract input.  Ordinarily, the abstract
4119 | value is simply rounded and packed into the single-precision format, with
4120 | the inexact exception raised if the abstract input cannot be represented
4121 | exactly.  However, if the abstract value is too large, the overflow and
4122 | inexact exceptions are raised and an infinity or maximal finite value is
4123 | returned.  If the abstract value is too small, the input value is rounded to
4124 | a subnormal number, and the underflow and inexact exceptions are raised if
4125 | the abstract input cannot be represented exactly as a subnormal single-
4126 | precision floating-point number.
4127 |     The input significand `zSig' has its binary point between bits 30
4128 | and 29, which is 7 bits to the left of the usual location.  This shifted
4129 | significand must be normalized or smaller.  If `zSig' is not normalized,
4130 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4131 | and it must not require rounding.  In the usual case that `zSig' is
4132 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4133 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4134 | Binary Floating-Point Arithmetic.
4135 *----------------------------------------------------------------------------*/
4136 
4137 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4138                                    float_status *status)
4139 {
4140     int8_t roundingMode;
4141     bool roundNearestEven;
4142     int8_t roundIncrement, roundBits;
4143     bool isTiny;
4144 
4145     roundingMode = status->float_rounding_mode;
4146     roundNearestEven = ( roundingMode == float_round_nearest_even );
4147     switch (roundingMode) {
4148     case float_round_nearest_even:
4149     case float_round_ties_away:
4150         roundIncrement = 0x40;
4151         break;
4152     case float_round_to_zero:
4153         roundIncrement = 0;
4154         break;
4155     case float_round_up:
4156         roundIncrement = zSign ? 0 : 0x7f;
4157         break;
4158     case float_round_down:
4159         roundIncrement = zSign ? 0x7f : 0;
4160         break;
4161     case float_round_to_odd:
4162         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4163         break;
4164     default:
4165         abort();
4166         break;
4167     }
4168     roundBits = zSig & 0x7F;
4169     if ( 0xFD <= (uint16_t) zExp ) {
4170         if (    ( 0xFD < zExp )
4171              || (    ( zExp == 0xFD )
4172                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4173            ) {
4174             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4175                                    roundIncrement != 0;
4176             float_raise(float_flag_overflow | float_flag_inexact, status);
4177             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4178         }
4179         if ( zExp < 0 ) {
4180             if (status->flush_to_zero) {
4181                 float_raise(float_flag_output_denormal, status);
4182                 return packFloat32(zSign, 0, 0);
4183             }
4184             isTiny = status->tininess_before_rounding
4185                   || (zExp < -1)
4186                   || (zSig + roundIncrement < 0x80000000);
4187             shift32RightJamming( zSig, - zExp, &zSig );
4188             zExp = 0;
4189             roundBits = zSig & 0x7F;
4190             if (isTiny && roundBits) {
4191                 float_raise(float_flag_underflow, status);
4192             }
4193             if (roundingMode == float_round_to_odd) {
4194                 /*
4195                  * For round-to-odd case, the roundIncrement depends on
4196                  * zSig which just changed.
4197                  */
4198                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4199             }
4200         }
4201     }
4202     if (roundBits) {
4203         float_raise(float_flag_inexact, status);
4204     }
4205     zSig = ( zSig + roundIncrement )>>7;
4206     if (!(roundBits ^ 0x40) && roundNearestEven) {
4207         zSig &= ~1;
4208     }
4209     if ( zSig == 0 ) zExp = 0;
4210     return packFloat32( zSign, zExp, zSig );
4211 
4212 }
4213 
4214 /*----------------------------------------------------------------------------
4215 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4216 | and significand `zSig', and returns the proper single-precision floating-
4217 | point value corresponding to the abstract input.  This routine is just like
4218 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4219 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4220 | floating-point exponent.
4221 *----------------------------------------------------------------------------*/
4222 
4223 static float32
4224  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4225                               float_status *status)
4226 {
4227     int8_t shiftCount;
4228 
4229     shiftCount = clz32(zSig) - 1;
4230     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4231                                status);
4232 
4233 }
4234 
4235 /*----------------------------------------------------------------------------
4236 | Normalizes the subnormal double-precision floating-point value represented
4237 | by the denormalized significand `aSig'.  The normalized exponent and
4238 | significand are stored at the locations pointed to by `zExpPtr' and
4239 | `zSigPtr', respectively.
4240 *----------------------------------------------------------------------------*/
4241 
4242 static void
4243  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4244 {
4245     int8_t shiftCount;
4246 
4247     shiftCount = clz64(aSig) - 11;
4248     *zSigPtr = aSig<<shiftCount;
4249     *zExpPtr = 1 - shiftCount;
4250 
4251 }
4252 
4253 /*----------------------------------------------------------------------------
4254 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4255 | double-precision floating-point value, returning the result.  After being
4256 | shifted into the proper positions, the three fields are simply added
4257 | together to form the result.  This means that any integer portion of `zSig'
4258 | will be added into the exponent.  Since a properly normalized significand
4259 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4260 | than the desired result exponent whenever `zSig' is a complete, normalized
4261 | significand.
4262 *----------------------------------------------------------------------------*/
4263 
4264 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4265 {
4266 
4267     return make_float64(
4268         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4269 
4270 }
4271 
4272 /*----------------------------------------------------------------------------
4273 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4274 | and significand `zSig', and returns the proper double-precision floating-
4275 | point value corresponding to the abstract input.  Ordinarily, the abstract
4276 | value is simply rounded and packed into the double-precision format, with
4277 | the inexact exception raised if the abstract input cannot be represented
4278 | exactly.  However, if the abstract value is too large, the overflow and
4279 | inexact exceptions are raised and an infinity or maximal finite value is
4280 | returned.  If the abstract value is too small, the input value is rounded to
4281 | a subnormal number, and the underflow and inexact exceptions are raised if
4282 | the abstract input cannot be represented exactly as a subnormal double-
4283 | precision floating-point number.
4284 |     The input significand `zSig' has its binary point between bits 62
4285 | and 61, which is 10 bits to the left of the usual location.  This shifted
4286 | significand must be normalized or smaller.  If `zSig' is not normalized,
4287 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4288 | and it must not require rounding.  In the usual case that `zSig' is
4289 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4290 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4291 | Binary Floating-Point Arithmetic.
4292 *----------------------------------------------------------------------------*/
4293 
4294 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4295                                    float_status *status)
4296 {
4297     int8_t roundingMode;
4298     bool roundNearestEven;
4299     int roundIncrement, roundBits;
4300     bool isTiny;
4301 
4302     roundingMode = status->float_rounding_mode;
4303     roundNearestEven = ( roundingMode == float_round_nearest_even );
4304     switch (roundingMode) {
4305     case float_round_nearest_even:
4306     case float_round_ties_away:
4307         roundIncrement = 0x200;
4308         break;
4309     case float_round_to_zero:
4310         roundIncrement = 0;
4311         break;
4312     case float_round_up:
4313         roundIncrement = zSign ? 0 : 0x3ff;
4314         break;
4315     case float_round_down:
4316         roundIncrement = zSign ? 0x3ff : 0;
4317         break;
4318     case float_round_to_odd:
4319         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4320         break;
4321     default:
4322         abort();
4323     }
4324     roundBits = zSig & 0x3FF;
4325     if ( 0x7FD <= (uint16_t) zExp ) {
4326         if (    ( 0x7FD < zExp )
4327              || (    ( zExp == 0x7FD )
4328                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4329            ) {
4330             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4331                                    roundIncrement != 0;
4332             float_raise(float_flag_overflow | float_flag_inexact, status);
4333             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4334         }
4335         if ( zExp < 0 ) {
4336             if (status->flush_to_zero) {
4337                 float_raise(float_flag_output_denormal, status);
4338                 return packFloat64(zSign, 0, 0);
4339             }
4340             isTiny = status->tininess_before_rounding
4341                   || (zExp < -1)
4342                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4343             shift64RightJamming( zSig, - zExp, &zSig );
4344             zExp = 0;
4345             roundBits = zSig & 0x3FF;
4346             if (isTiny && roundBits) {
4347                 float_raise(float_flag_underflow, status);
4348             }
4349             if (roundingMode == float_round_to_odd) {
4350                 /*
4351                  * For round-to-odd case, the roundIncrement depends on
4352                  * zSig which just changed.
4353                  */
4354                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4355             }
4356         }
4357     }
4358     if (roundBits) {
4359         float_raise(float_flag_inexact, status);
4360     }
4361     zSig = ( zSig + roundIncrement )>>10;
4362     if (!(roundBits ^ 0x200) && roundNearestEven) {
4363         zSig &= ~1;
4364     }
4365     if ( zSig == 0 ) zExp = 0;
4366     return packFloat64( zSign, zExp, zSig );
4367 
4368 }
4369 
4370 /*----------------------------------------------------------------------------
4371 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4372 | and significand `zSig', and returns the proper double-precision floating-
4373 | point value corresponding to the abstract input.  This routine is just like
4374 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4375 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4376 | floating-point exponent.
4377 *----------------------------------------------------------------------------*/
4378 
4379 static float64
4380  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4381                               float_status *status)
4382 {
4383     int8_t shiftCount;
4384 
4385     shiftCount = clz64(zSig) - 1;
4386     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4387                                status);
4388 
4389 }
4390 
4391 /*----------------------------------------------------------------------------
4392 | Normalizes the subnormal extended double-precision floating-point value
4393 | represented by the denormalized significand `aSig'.  The normalized exponent
4394 | and significand are stored at the locations pointed to by `zExpPtr' and
4395 | `zSigPtr', respectively.
4396 *----------------------------------------------------------------------------*/
4397 
4398 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4399                                 uint64_t *zSigPtr)
4400 {
4401     int8_t shiftCount;
4402 
4403     shiftCount = clz64(aSig);
4404     *zSigPtr = aSig<<shiftCount;
4405     *zExpPtr = 1 - shiftCount;
4406 }
4407 
4408 /*----------------------------------------------------------------------------
4409 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4410 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4411 | and returns the proper extended double-precision floating-point value
4412 | corresponding to the abstract input.  Ordinarily, the abstract value is
4413 | rounded and packed into the extended double-precision format, with the
4414 | inexact exception raised if the abstract input cannot be represented
4415 | exactly.  However, if the abstract value is too large, the overflow and
4416 | inexact exceptions are raised and an infinity or maximal finite value is
4417 | returned.  If the abstract value is too small, the input value is rounded to
4418 | a subnormal number, and the underflow and inexact exceptions are raised if
4419 | the abstract input cannot be represented exactly as a subnormal extended
4420 | double-precision floating-point number.
4421 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4422 | number of bits as single or double precision, respectively.  Otherwise, the
4423 | result is rounded to the full precision of the extended double-precision
4424 | format.
4425 |     The input significand must be normalized or smaller.  If the input
4426 | significand is not normalized, `zExp' must be 0; in that case, the result
4427 | returned is a subnormal number, and it must not require rounding.  The
4428 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4429 | Floating-Point Arithmetic.
4430 *----------------------------------------------------------------------------*/
4431 
4432 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4433                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4434                               float_status *status)
4435 {
4436     int8_t roundingMode;
4437     bool roundNearestEven, increment, isTiny;
4438     int64_t roundIncrement, roundMask, roundBits;
4439 
4440     roundingMode = status->float_rounding_mode;
4441     roundNearestEven = ( roundingMode == float_round_nearest_even );
4442     if ( roundingPrecision == 80 ) goto precision80;
4443     if ( roundingPrecision == 64 ) {
4444         roundIncrement = UINT64_C(0x0000000000000400);
4445         roundMask = UINT64_C(0x00000000000007FF);
4446     }
4447     else if ( roundingPrecision == 32 ) {
4448         roundIncrement = UINT64_C(0x0000008000000000);
4449         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4450     }
4451     else {
4452         goto precision80;
4453     }
4454     zSig0 |= ( zSig1 != 0 );
4455     switch (roundingMode) {
4456     case float_round_nearest_even:
4457     case float_round_ties_away:
4458         break;
4459     case float_round_to_zero:
4460         roundIncrement = 0;
4461         break;
4462     case float_round_up:
4463         roundIncrement = zSign ? 0 : roundMask;
4464         break;
4465     case float_round_down:
4466         roundIncrement = zSign ? roundMask : 0;
4467         break;
4468     default:
4469         abort();
4470     }
4471     roundBits = zSig0 & roundMask;
4472     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4473         if (    ( 0x7FFE < zExp )
4474              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4475            ) {
4476             goto overflow;
4477         }
4478         if ( zExp <= 0 ) {
4479             if (status->flush_to_zero) {
4480                 float_raise(float_flag_output_denormal, status);
4481                 return packFloatx80(zSign, 0, 0);
4482             }
4483             isTiny = status->tininess_before_rounding
4484                   || (zExp < 0 )
4485                   || (zSig0 <= zSig0 + roundIncrement);
4486             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4487             zExp = 0;
4488             roundBits = zSig0 & roundMask;
4489             if (isTiny && roundBits) {
4490                 float_raise(float_flag_underflow, status);
4491             }
4492             if (roundBits) {
4493                 float_raise(float_flag_inexact, status);
4494             }
4495             zSig0 += roundIncrement;
4496             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4497             roundIncrement = roundMask + 1;
4498             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4499                 roundMask |= roundIncrement;
4500             }
4501             zSig0 &= ~ roundMask;
4502             return packFloatx80( zSign, zExp, zSig0 );
4503         }
4504     }
4505     if (roundBits) {
4506         float_raise(float_flag_inexact, status);
4507     }
4508     zSig0 += roundIncrement;
4509     if ( zSig0 < roundIncrement ) {
4510         ++zExp;
4511         zSig0 = UINT64_C(0x8000000000000000);
4512     }
4513     roundIncrement = roundMask + 1;
4514     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4515         roundMask |= roundIncrement;
4516     }
4517     zSig0 &= ~ roundMask;
4518     if ( zSig0 == 0 ) zExp = 0;
4519     return packFloatx80( zSign, zExp, zSig0 );
4520  precision80:
4521     switch (roundingMode) {
4522     case float_round_nearest_even:
4523     case float_round_ties_away:
4524         increment = ((int64_t)zSig1 < 0);
4525         break;
4526     case float_round_to_zero:
4527         increment = 0;
4528         break;
4529     case float_round_up:
4530         increment = !zSign && zSig1;
4531         break;
4532     case float_round_down:
4533         increment = zSign && zSig1;
4534         break;
4535     default:
4536         abort();
4537     }
4538     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4539         if (    ( 0x7FFE < zExp )
4540              || (    ( zExp == 0x7FFE )
4541                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4542                   && increment
4543                 )
4544            ) {
4545             roundMask = 0;
4546  overflow:
4547             float_raise(float_flag_overflow | float_flag_inexact, status);
4548             if (    ( roundingMode == float_round_to_zero )
4549                  || ( zSign && ( roundingMode == float_round_up ) )
4550                  || ( ! zSign && ( roundingMode == float_round_down ) )
4551                ) {
4552                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4553             }
4554             return packFloatx80(zSign,
4555                                 floatx80_infinity_high,
4556                                 floatx80_infinity_low);
4557         }
4558         if ( zExp <= 0 ) {
4559             isTiny = status->tininess_before_rounding
4560                   || (zExp < 0)
4561                   || !increment
4562                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4563             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4564             zExp = 0;
4565             if (isTiny && zSig1) {
4566                 float_raise(float_flag_underflow, status);
4567             }
4568             if (zSig1) {
4569                 float_raise(float_flag_inexact, status);
4570             }
4571             switch (roundingMode) {
4572             case float_round_nearest_even:
4573             case float_round_ties_away:
4574                 increment = ((int64_t)zSig1 < 0);
4575                 break;
4576             case float_round_to_zero:
4577                 increment = 0;
4578                 break;
4579             case float_round_up:
4580                 increment = !zSign && zSig1;
4581                 break;
4582             case float_round_down:
4583                 increment = zSign && zSig1;
4584                 break;
4585             default:
4586                 abort();
4587             }
4588             if ( increment ) {
4589                 ++zSig0;
4590                 if (!(zSig1 << 1) && roundNearestEven) {
4591                     zSig0 &= ~1;
4592                 }
4593                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4594             }
4595             return packFloatx80( zSign, zExp, zSig0 );
4596         }
4597     }
4598     if (zSig1) {
4599         float_raise(float_flag_inexact, status);
4600     }
4601     if ( increment ) {
4602         ++zSig0;
4603         if ( zSig0 == 0 ) {
4604             ++zExp;
4605             zSig0 = UINT64_C(0x8000000000000000);
4606         }
4607         else {
4608             if (!(zSig1 << 1) && roundNearestEven) {
4609                 zSig0 &= ~1;
4610             }
4611         }
4612     }
4613     else {
4614         if ( zSig0 == 0 ) zExp = 0;
4615     }
4616     return packFloatx80( zSign, zExp, zSig0 );
4617 
4618 }
4619 
4620 /*----------------------------------------------------------------------------
4621 | Takes an abstract floating-point value having sign `zSign', exponent
4622 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4623 | and returns the proper extended double-precision floating-point value
4624 | corresponding to the abstract input.  This routine is just like
4625 | `roundAndPackFloatx80' except that the input significand does not have to be
4626 | normalized.
4627 *----------------------------------------------------------------------------*/
4628 
4629 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4630                                        bool zSign, int32_t zExp,
4631                                        uint64_t zSig0, uint64_t zSig1,
4632                                        float_status *status)
4633 {
4634     int8_t shiftCount;
4635 
4636     if ( zSig0 == 0 ) {
4637         zSig0 = zSig1;
4638         zSig1 = 0;
4639         zExp -= 64;
4640     }
4641     shiftCount = clz64(zSig0);
4642     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4643     zExp -= shiftCount;
4644     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4645                                 zSig0, zSig1, status);
4646 
4647 }
4648 
4649 /*----------------------------------------------------------------------------
4650 | Returns the least-significant 64 fraction bits of the quadruple-precision
4651 | floating-point value `a'.
4652 *----------------------------------------------------------------------------*/
4653 
4654 static inline uint64_t extractFloat128Frac1( float128 a )
4655 {
4656 
4657     return a.low;
4658 
4659 }
4660 
4661 /*----------------------------------------------------------------------------
4662 | Returns the most-significant 48 fraction bits of the quadruple-precision
4663 | floating-point value `a'.
4664 *----------------------------------------------------------------------------*/
4665 
4666 static inline uint64_t extractFloat128Frac0( float128 a )
4667 {
4668 
4669     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4670 
4671 }
4672 
4673 /*----------------------------------------------------------------------------
4674 | Returns the exponent bits of the quadruple-precision floating-point value
4675 | `a'.
4676 *----------------------------------------------------------------------------*/
4677 
4678 static inline int32_t extractFloat128Exp( float128 a )
4679 {
4680 
4681     return ( a.high>>48 ) & 0x7FFF;
4682 
4683 }
4684 
4685 /*----------------------------------------------------------------------------
4686 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4687 *----------------------------------------------------------------------------*/
4688 
4689 static inline bool extractFloat128Sign(float128 a)
4690 {
4691     return a.high >> 63;
4692 }
4693 
4694 /*----------------------------------------------------------------------------
4695 | Normalizes the subnormal quadruple-precision floating-point value
4696 | represented by the denormalized significand formed by the concatenation of
4697 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4698 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4699 | significand are stored at the location pointed to by `zSig0Ptr', and the
4700 | least significant 64 bits of the normalized significand are stored at the
4701 | location pointed to by `zSig1Ptr'.
4702 *----------------------------------------------------------------------------*/
4703 
4704 static void
4705  normalizeFloat128Subnormal(
4706      uint64_t aSig0,
4707      uint64_t aSig1,
4708      int32_t *zExpPtr,
4709      uint64_t *zSig0Ptr,
4710      uint64_t *zSig1Ptr
4711  )
4712 {
4713     int8_t shiftCount;
4714 
4715     if ( aSig0 == 0 ) {
4716         shiftCount = clz64(aSig1) - 15;
4717         if ( shiftCount < 0 ) {
4718             *zSig0Ptr = aSig1>>( - shiftCount );
4719             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4720         }
4721         else {
4722             *zSig0Ptr = aSig1<<shiftCount;
4723             *zSig1Ptr = 0;
4724         }
4725         *zExpPtr = - shiftCount - 63;
4726     }
4727     else {
4728         shiftCount = clz64(aSig0) - 15;
4729         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4730         *zExpPtr = 1 - shiftCount;
4731     }
4732 
4733 }
4734 
4735 /*----------------------------------------------------------------------------
4736 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4737 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4738 | floating-point value, returning the result.  After being shifted into the
4739 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4740 | added together to form the most significant 32 bits of the result.  This
4741 | means that any integer portion of `zSig0' will be added into the exponent.
4742 | Since a properly normalized significand will have an integer portion equal
4743 | to 1, the `zExp' input should be 1 less than the desired result exponent
4744 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4745 | significand.
4746 *----------------------------------------------------------------------------*/
4747 
4748 static inline float128
4749 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4750 {
4751     float128 z;
4752 
4753     z.low = zSig1;
4754     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4755     return z;
4756 }
4757 
4758 /*----------------------------------------------------------------------------
4759 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4760 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4761 | and `zSig2', and returns the proper quadruple-precision floating-point value
4762 | corresponding to the abstract input.  Ordinarily, the abstract value is
4763 | simply rounded and packed into the quadruple-precision format, with the
4764 | inexact exception raised if the abstract input cannot be represented
4765 | exactly.  However, if the abstract value is too large, the overflow and
4766 | inexact exceptions are raised and an infinity or maximal finite value is
4767 | returned.  If the abstract value is too small, the input value is rounded to
4768 | a subnormal number, and the underflow and inexact exceptions are raised if
4769 | the abstract input cannot be represented exactly as a subnormal quadruple-
4770 | precision floating-point number.
4771 |     The input significand must be normalized or smaller.  If the input
4772 | significand is not normalized, `zExp' must be 0; in that case, the result
4773 | returned is a subnormal number, and it must not require rounding.  In the
4774 | usual case that the input significand is normalized, `zExp' must be 1 less
4775 | than the ``true'' floating-point exponent.  The handling of underflow and
4776 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777 *----------------------------------------------------------------------------*/
4778 
4779 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4780                                      uint64_t zSig0, uint64_t zSig1,
4781                                      uint64_t zSig2, float_status *status)
4782 {
4783     int8_t roundingMode;
4784     bool roundNearestEven, increment, isTiny;
4785 
4786     roundingMode = status->float_rounding_mode;
4787     roundNearestEven = ( roundingMode == float_round_nearest_even );
4788     switch (roundingMode) {
4789     case float_round_nearest_even:
4790     case float_round_ties_away:
4791         increment = ((int64_t)zSig2 < 0);
4792         break;
4793     case float_round_to_zero:
4794         increment = 0;
4795         break;
4796     case float_round_up:
4797         increment = !zSign && zSig2;
4798         break;
4799     case float_round_down:
4800         increment = zSign && zSig2;
4801         break;
4802     case float_round_to_odd:
4803         increment = !(zSig1 & 0x1) && zSig2;
4804         break;
4805     default:
4806         abort();
4807     }
4808     if ( 0x7FFD <= (uint32_t) zExp ) {
4809         if (    ( 0x7FFD < zExp )
4810              || (    ( zExp == 0x7FFD )
4811                   && eq128(
4812                          UINT64_C(0x0001FFFFFFFFFFFF),
4813                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4814                          zSig0,
4815                          zSig1
4816                      )
4817                   && increment
4818                 )
4819            ) {
4820             float_raise(float_flag_overflow | float_flag_inexact, status);
4821             if (    ( roundingMode == float_round_to_zero )
4822                  || ( zSign && ( roundingMode == float_round_up ) )
4823                  || ( ! zSign && ( roundingMode == float_round_down ) )
4824                  || (roundingMode == float_round_to_odd)
4825                ) {
4826                 return
4827                     packFloat128(
4828                         zSign,
4829                         0x7FFE,
4830                         UINT64_C(0x0000FFFFFFFFFFFF),
4831                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4832                     );
4833             }
4834             return packFloat128( zSign, 0x7FFF, 0, 0 );
4835         }
4836         if ( zExp < 0 ) {
4837             if (status->flush_to_zero) {
4838                 float_raise(float_flag_output_denormal, status);
4839                 return packFloat128(zSign, 0, 0, 0);
4840             }
4841             isTiny = status->tininess_before_rounding
4842                   || (zExp < -1)
4843                   || !increment
4844                   || lt128(zSig0, zSig1,
4845                            UINT64_C(0x0001FFFFFFFFFFFF),
4846                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4847             shift128ExtraRightJamming(
4848                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4849             zExp = 0;
4850             if (isTiny && zSig2) {
4851                 float_raise(float_flag_underflow, status);
4852             }
4853             switch (roundingMode) {
4854             case float_round_nearest_even:
4855             case float_round_ties_away:
4856                 increment = ((int64_t)zSig2 < 0);
4857                 break;
4858             case float_round_to_zero:
4859                 increment = 0;
4860                 break;
4861             case float_round_up:
4862                 increment = !zSign && zSig2;
4863                 break;
4864             case float_round_down:
4865                 increment = zSign && zSig2;
4866                 break;
4867             case float_round_to_odd:
4868                 increment = !(zSig1 & 0x1) && zSig2;
4869                 break;
4870             default:
4871                 abort();
4872             }
4873         }
4874     }
4875     if (zSig2) {
4876         float_raise(float_flag_inexact, status);
4877     }
4878     if ( increment ) {
4879         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4880         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4881             zSig1 &= ~1;
4882         }
4883     }
4884     else {
4885         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4886     }
4887     return packFloat128( zSign, zExp, zSig0, zSig1 );
4888 
4889 }
4890 
4891 /*----------------------------------------------------------------------------
4892 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4893 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4894 | returns the proper quadruple-precision floating-point value corresponding
4895 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4896 | except that the input significand has fewer bits and does not have to be
4897 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4898 | point exponent.
4899 *----------------------------------------------------------------------------*/
4900 
4901 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4902                                               uint64_t zSig0, uint64_t zSig1,
4903                                               float_status *status)
4904 {
4905     int8_t shiftCount;
4906     uint64_t zSig2;
4907 
4908     if ( zSig0 == 0 ) {
4909         zSig0 = zSig1;
4910         zSig1 = 0;
4911         zExp -= 64;
4912     }
4913     shiftCount = clz64(zSig0) - 15;
4914     if ( 0 <= shiftCount ) {
4915         zSig2 = 0;
4916         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4917     }
4918     else {
4919         shift128ExtraRightJamming(
4920             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4921     }
4922     zExp -= shiftCount;
4923     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4924 
4925 }
4926 
4927 
4928 /*----------------------------------------------------------------------------
4929 | Returns the result of converting the 32-bit two's complement integer `a'
4930 | to the extended double-precision floating-point format.  The conversion
4931 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4932 | Arithmetic.
4933 *----------------------------------------------------------------------------*/
4934 
4935 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4936 {
4937     bool zSign;
4938     uint32_t absA;
4939     int8_t shiftCount;
4940     uint64_t zSig;
4941 
4942     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4943     zSign = ( a < 0 );
4944     absA = zSign ? - a : a;
4945     shiftCount = clz32(absA) + 32;
4946     zSig = absA;
4947     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4948 
4949 }
4950 
4951 /*----------------------------------------------------------------------------
4952 | Returns the result of converting the 64-bit two's complement integer `a'
4953 | to the extended double-precision floating-point format.  The conversion
4954 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4955 | Arithmetic.
4956 *----------------------------------------------------------------------------*/
4957 
4958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4959 {
4960     bool zSign;
4961     uint64_t absA;
4962     int8_t shiftCount;
4963 
4964     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4965     zSign = ( a < 0 );
4966     absA = zSign ? - a : a;
4967     shiftCount = clz64(absA);
4968     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4969 
4970 }
4971 
4972 /*----------------------------------------------------------------------------
4973 | Returns the result of converting the single-precision floating-point value
4974 | `a' to the extended double-precision floating-point format.  The conversion
4975 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4976 | Arithmetic.
4977 *----------------------------------------------------------------------------*/
4978 
4979 floatx80 float32_to_floatx80(float32 a, float_status *status)
4980 {
4981     bool aSign;
4982     int aExp;
4983     uint32_t aSig;
4984 
4985     a = float32_squash_input_denormal(a, status);
4986     aSig = extractFloat32Frac( a );
4987     aExp = extractFloat32Exp( a );
4988     aSign = extractFloat32Sign( a );
4989     if ( aExp == 0xFF ) {
4990         if (aSig) {
4991             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4992                                                status);
4993             return floatx80_silence_nan(res, status);
4994         }
4995         return packFloatx80(aSign,
4996                             floatx80_infinity_high,
4997                             floatx80_infinity_low);
4998     }
4999     if ( aExp == 0 ) {
5000         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5001         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5002     }
5003     aSig |= 0x00800000;
5004     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5005 
5006 }
5007 
5008 /*----------------------------------------------------------------------------
5009 | Returns the remainder of the single-precision floating-point value `a'
5010 | with respect to the corresponding value `b'.  The operation is performed
5011 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5012 *----------------------------------------------------------------------------*/
5013 
5014 float32 float32_rem(float32 a, float32 b, float_status *status)
5015 {
5016     bool aSign, zSign;
5017     int aExp, bExp, expDiff;
5018     uint32_t aSig, bSig;
5019     uint32_t q;
5020     uint64_t aSig64, bSig64, q64;
5021     uint32_t alternateASig;
5022     int32_t sigMean;
5023     a = float32_squash_input_denormal(a, status);
5024     b = float32_squash_input_denormal(b, status);
5025 
5026     aSig = extractFloat32Frac( a );
5027     aExp = extractFloat32Exp( a );
5028     aSign = extractFloat32Sign( a );
5029     bSig = extractFloat32Frac( b );
5030     bExp = extractFloat32Exp( b );
5031     if ( aExp == 0xFF ) {
5032         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5033             return propagateFloat32NaN(a, b, status);
5034         }
5035         float_raise(float_flag_invalid, status);
5036         return float32_default_nan(status);
5037     }
5038     if ( bExp == 0xFF ) {
5039         if (bSig) {
5040             return propagateFloat32NaN(a, b, status);
5041         }
5042         return a;
5043     }
5044     if ( bExp == 0 ) {
5045         if ( bSig == 0 ) {
5046             float_raise(float_flag_invalid, status);
5047             return float32_default_nan(status);
5048         }
5049         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5050     }
5051     if ( aExp == 0 ) {
5052         if ( aSig == 0 ) return a;
5053         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5054     }
5055     expDiff = aExp - bExp;
5056     aSig |= 0x00800000;
5057     bSig |= 0x00800000;
5058     if ( expDiff < 32 ) {
5059         aSig <<= 8;
5060         bSig <<= 8;
5061         if ( expDiff < 0 ) {
5062             if ( expDiff < -1 ) return a;
5063             aSig >>= 1;
5064         }
5065         q = ( bSig <= aSig );
5066         if ( q ) aSig -= bSig;
5067         if ( 0 < expDiff ) {
5068             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5069             q >>= 32 - expDiff;
5070             bSig >>= 2;
5071             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5072         }
5073         else {
5074             aSig >>= 2;
5075             bSig >>= 2;
5076         }
5077     }
5078     else {
5079         if ( bSig <= aSig ) aSig -= bSig;
5080         aSig64 = ( (uint64_t) aSig )<<40;
5081         bSig64 = ( (uint64_t) bSig )<<40;
5082         expDiff -= 64;
5083         while ( 0 < expDiff ) {
5084             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5085             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5086             aSig64 = - ( ( bSig * q64 )<<38 );
5087             expDiff -= 62;
5088         }
5089         expDiff += 64;
5090         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5091         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5092         q = q64>>( 64 - expDiff );
5093         bSig <<= 6;
5094         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5095     }
5096     do {
5097         alternateASig = aSig;
5098         ++q;
5099         aSig -= bSig;
5100     } while ( 0 <= (int32_t) aSig );
5101     sigMean = aSig + alternateASig;
5102     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5103         aSig = alternateASig;
5104     }
5105     zSign = ( (int32_t) aSig < 0 );
5106     if ( zSign ) aSig = - aSig;
5107     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5108 }
5109 
5110 
5111 
5112 /*----------------------------------------------------------------------------
5113 | Returns the binary exponential of the single-precision floating-point value
5114 | `a'. The operation is performed according to the IEC/IEEE Standard for
5115 | Binary Floating-Point Arithmetic.
5116 |
5117 | Uses the following identities:
5118 |
5119 | 1. -------------------------------------------------------------------------
5120 |      x    x*ln(2)
5121 |     2  = e
5122 |
5123 | 2. -------------------------------------------------------------------------
5124 |                      2     3     4     5           n
5125 |      x        x     x     x     x     x           x
5126 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5127 |               1!    2!    3!    4!    5!          n!
5128 *----------------------------------------------------------------------------*/
5129 
5130 static const float64 float32_exp2_coefficients[15] =
5131 {
5132     const_float64( 0x3ff0000000000000ll ), /*  1 */
5133     const_float64( 0x3fe0000000000000ll ), /*  2 */
5134     const_float64( 0x3fc5555555555555ll ), /*  3 */
5135     const_float64( 0x3fa5555555555555ll ), /*  4 */
5136     const_float64( 0x3f81111111111111ll ), /*  5 */
5137     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5138     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5139     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5140     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5141     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5142     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5143     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5144     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5145     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5146     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5147 };
5148 
5149 float32 float32_exp2(float32 a, float_status *status)
5150 {
5151     bool aSign;
5152     int aExp;
5153     uint32_t aSig;
5154     float64 r, x, xn;
5155     int i;
5156     a = float32_squash_input_denormal(a, status);
5157 
5158     aSig = extractFloat32Frac( a );
5159     aExp = extractFloat32Exp( a );
5160     aSign = extractFloat32Sign( a );
5161 
5162     if ( aExp == 0xFF) {
5163         if (aSig) {
5164             return propagateFloat32NaN(a, float32_zero, status);
5165         }
5166         return (aSign) ? float32_zero : a;
5167     }
5168     if (aExp == 0) {
5169         if (aSig == 0) return float32_one;
5170     }
5171 
5172     float_raise(float_flag_inexact, status);
5173 
5174     /* ******************************* */
5175     /* using float64 for approximation */
5176     /* ******************************* */
5177     x = float32_to_float64(a, status);
5178     x = float64_mul(x, float64_ln2, status);
5179 
5180     xn = x;
5181     r = float64_one;
5182     for (i = 0 ; i < 15 ; i++) {
5183         float64 f;
5184 
5185         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5186         r = float64_add(r, f, status);
5187 
5188         xn = float64_mul(xn, x, status);
5189     }
5190 
5191     return float64_to_float32(r, status);
5192 }
5193 
5194 /*----------------------------------------------------------------------------
5195 | Returns the binary log of the single-precision floating-point value `a'.
5196 | The operation is performed according to the IEC/IEEE Standard for Binary
5197 | Floating-Point Arithmetic.
5198 *----------------------------------------------------------------------------*/
5199 float32 float32_log2(float32 a, float_status *status)
5200 {
5201     bool aSign, zSign;
5202     int aExp;
5203     uint32_t aSig, zSig, i;
5204 
5205     a = float32_squash_input_denormal(a, status);
5206     aSig = extractFloat32Frac( a );
5207     aExp = extractFloat32Exp( a );
5208     aSign = extractFloat32Sign( a );
5209 
5210     if ( aExp == 0 ) {
5211         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5212         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5213     }
5214     if ( aSign ) {
5215         float_raise(float_flag_invalid, status);
5216         return float32_default_nan(status);
5217     }
5218     if ( aExp == 0xFF ) {
5219         if (aSig) {
5220             return propagateFloat32NaN(a, float32_zero, status);
5221         }
5222         return a;
5223     }
5224 
5225     aExp -= 0x7F;
5226     aSig |= 0x00800000;
5227     zSign = aExp < 0;
5228     zSig = aExp << 23;
5229 
5230     for (i = 1 << 22; i > 0; i >>= 1) {
5231         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5232         if ( aSig & 0x01000000 ) {
5233             aSig >>= 1;
5234             zSig |= i;
5235         }
5236     }
5237 
5238     if ( zSign )
5239         zSig = -zSig;
5240 
5241     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5242 }
5243 
5244 /*----------------------------------------------------------------------------
5245 | Returns the result of converting the double-precision floating-point value
5246 | `a' to the extended double-precision floating-point format.  The conversion
5247 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5248 | Arithmetic.
5249 *----------------------------------------------------------------------------*/
5250 
5251 floatx80 float64_to_floatx80(float64 a, float_status *status)
5252 {
5253     bool aSign;
5254     int aExp;
5255     uint64_t aSig;
5256 
5257     a = float64_squash_input_denormal(a, status);
5258     aSig = extractFloat64Frac( a );
5259     aExp = extractFloat64Exp( a );
5260     aSign = extractFloat64Sign( a );
5261     if ( aExp == 0x7FF ) {
5262         if (aSig) {
5263             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5264                                                status);
5265             return floatx80_silence_nan(res, status);
5266         }
5267         return packFloatx80(aSign,
5268                             floatx80_infinity_high,
5269                             floatx80_infinity_low);
5270     }
5271     if ( aExp == 0 ) {
5272         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5273         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5274     }
5275     return
5276         packFloatx80(
5277             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5278 
5279 }
5280 
5281 /*----------------------------------------------------------------------------
5282 | Returns the remainder of the double-precision floating-point value `a'
5283 | with respect to the corresponding value `b'.  The operation is performed
5284 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5285 *----------------------------------------------------------------------------*/
5286 
5287 float64 float64_rem(float64 a, float64 b, float_status *status)
5288 {
5289     bool aSign, zSign;
5290     int aExp, bExp, expDiff;
5291     uint64_t aSig, bSig;
5292     uint64_t q, alternateASig;
5293     int64_t sigMean;
5294 
5295     a = float64_squash_input_denormal(a, status);
5296     b = float64_squash_input_denormal(b, status);
5297     aSig = extractFloat64Frac( a );
5298     aExp = extractFloat64Exp( a );
5299     aSign = extractFloat64Sign( a );
5300     bSig = extractFloat64Frac( b );
5301     bExp = extractFloat64Exp( b );
5302     if ( aExp == 0x7FF ) {
5303         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5304             return propagateFloat64NaN(a, b, status);
5305         }
5306         float_raise(float_flag_invalid, status);
5307         return float64_default_nan(status);
5308     }
5309     if ( bExp == 0x7FF ) {
5310         if (bSig) {
5311             return propagateFloat64NaN(a, b, status);
5312         }
5313         return a;
5314     }
5315     if ( bExp == 0 ) {
5316         if ( bSig == 0 ) {
5317             float_raise(float_flag_invalid, status);
5318             return float64_default_nan(status);
5319         }
5320         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5321     }
5322     if ( aExp == 0 ) {
5323         if ( aSig == 0 ) return a;
5324         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5325     }
5326     expDiff = aExp - bExp;
5327     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5328     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5329     if ( expDiff < 0 ) {
5330         if ( expDiff < -1 ) return a;
5331         aSig >>= 1;
5332     }
5333     q = ( bSig <= aSig );
5334     if ( q ) aSig -= bSig;
5335     expDiff -= 64;
5336     while ( 0 < expDiff ) {
5337         q = estimateDiv128To64( aSig, 0, bSig );
5338         q = ( 2 < q ) ? q - 2 : 0;
5339         aSig = - ( ( bSig>>2 ) * q );
5340         expDiff -= 62;
5341     }
5342     expDiff += 64;
5343     if ( 0 < expDiff ) {
5344         q = estimateDiv128To64( aSig, 0, bSig );
5345         q = ( 2 < q ) ? q - 2 : 0;
5346         q >>= 64 - expDiff;
5347         bSig >>= 2;
5348         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5349     }
5350     else {
5351         aSig >>= 2;
5352         bSig >>= 2;
5353     }
5354     do {
5355         alternateASig = aSig;
5356         ++q;
5357         aSig -= bSig;
5358     } while ( 0 <= (int64_t) aSig );
5359     sigMean = aSig + alternateASig;
5360     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5361         aSig = alternateASig;
5362     }
5363     zSign = ( (int64_t) aSig < 0 );
5364     if ( zSign ) aSig = - aSig;
5365     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5366 
5367 }
5368 
5369 /*----------------------------------------------------------------------------
5370 | Returns the binary log of the double-precision floating-point value `a'.
5371 | The operation is performed according to the IEC/IEEE Standard for Binary
5372 | Floating-Point Arithmetic.
5373 *----------------------------------------------------------------------------*/
5374 float64 float64_log2(float64 a, float_status *status)
5375 {
5376     bool aSign, zSign;
5377     int aExp;
5378     uint64_t aSig, aSig0, aSig1, zSig, i;
5379     a = float64_squash_input_denormal(a, status);
5380 
5381     aSig = extractFloat64Frac( a );
5382     aExp = extractFloat64Exp( a );
5383     aSign = extractFloat64Sign( a );
5384 
5385     if ( aExp == 0 ) {
5386         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5387         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5388     }
5389     if ( aSign ) {
5390         float_raise(float_flag_invalid, status);
5391         return float64_default_nan(status);
5392     }
5393     if ( aExp == 0x7FF ) {
5394         if (aSig) {
5395             return propagateFloat64NaN(a, float64_zero, status);
5396         }
5397         return a;
5398     }
5399 
5400     aExp -= 0x3FF;
5401     aSig |= UINT64_C(0x0010000000000000);
5402     zSign = aExp < 0;
5403     zSig = (uint64_t)aExp << 52;
5404     for (i = 1LL << 51; i > 0; i >>= 1) {
5405         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5406         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5407         if ( aSig & UINT64_C(0x0020000000000000) ) {
5408             aSig >>= 1;
5409             zSig |= i;
5410         }
5411     }
5412 
5413     if ( zSign )
5414         zSig = -zSig;
5415     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5416 }
5417 
5418 /*----------------------------------------------------------------------------
5419 | Returns the result of converting the extended double-precision floating-
5420 | point value `a' to the 32-bit two's complement integer format.  The
5421 | conversion is performed according to the IEC/IEEE Standard for Binary
5422 | Floating-Point Arithmetic---which means in particular that the conversion
5423 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5424 | largest positive integer is returned.  Otherwise, if the conversion
5425 | overflows, the largest integer with the same sign as `a' is returned.
5426 *----------------------------------------------------------------------------*/
5427 
5428 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5429 {
5430     bool aSign;
5431     int32_t aExp, shiftCount;
5432     uint64_t aSig;
5433 
5434     if (floatx80_invalid_encoding(a)) {
5435         float_raise(float_flag_invalid, status);
5436         return 1 << 31;
5437     }
5438     aSig = extractFloatx80Frac( a );
5439     aExp = extractFloatx80Exp( a );
5440     aSign = extractFloatx80Sign( a );
5441     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5442     shiftCount = 0x4037 - aExp;
5443     if ( shiftCount <= 0 ) shiftCount = 1;
5444     shift64RightJamming( aSig, shiftCount, &aSig );
5445     return roundAndPackInt32(aSign, aSig, status);
5446 
5447 }
5448 
5449 /*----------------------------------------------------------------------------
5450 | Returns the result of converting the extended double-precision floating-
5451 | point value `a' to the 32-bit two's complement integer format.  The
5452 | conversion is performed according to the IEC/IEEE Standard for Binary
5453 | Floating-Point Arithmetic, except that the conversion is always rounded
5454 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5455 | Otherwise, if the conversion overflows, the largest integer with the same
5456 | sign as `a' is returned.
5457 *----------------------------------------------------------------------------*/
5458 
5459 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5460 {
5461     bool aSign;
5462     int32_t aExp, shiftCount;
5463     uint64_t aSig, savedASig;
5464     int32_t z;
5465 
5466     if (floatx80_invalid_encoding(a)) {
5467         float_raise(float_flag_invalid, status);
5468         return 1 << 31;
5469     }
5470     aSig = extractFloatx80Frac( a );
5471     aExp = extractFloatx80Exp( a );
5472     aSign = extractFloatx80Sign( a );
5473     if ( 0x401E < aExp ) {
5474         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5475         goto invalid;
5476     }
5477     else if ( aExp < 0x3FFF ) {
5478         if (aExp || aSig) {
5479             float_raise(float_flag_inexact, status);
5480         }
5481         return 0;
5482     }
5483     shiftCount = 0x403E - aExp;
5484     savedASig = aSig;
5485     aSig >>= shiftCount;
5486     z = aSig;
5487     if ( aSign ) z = - z;
5488     if ( ( z < 0 ) ^ aSign ) {
5489  invalid:
5490         float_raise(float_flag_invalid, status);
5491         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5492     }
5493     if ( ( aSig<<shiftCount ) != savedASig ) {
5494         float_raise(float_flag_inexact, status);
5495     }
5496     return z;
5497 
5498 }
5499 
5500 /*----------------------------------------------------------------------------
5501 | Returns the result of converting the extended double-precision floating-
5502 | point value `a' to the 64-bit two's complement integer format.  The
5503 | conversion is performed according to the IEC/IEEE Standard for Binary
5504 | Floating-Point Arithmetic---which means in particular that the conversion
5505 | is rounded according to the current rounding mode.  If `a' is a NaN,
5506 | the largest positive integer is returned.  Otherwise, if the conversion
5507 | overflows, the largest integer with the same sign as `a' is returned.
5508 *----------------------------------------------------------------------------*/
5509 
5510 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5511 {
5512     bool aSign;
5513     int32_t aExp, shiftCount;
5514     uint64_t aSig, aSigExtra;
5515 
5516     if (floatx80_invalid_encoding(a)) {
5517         float_raise(float_flag_invalid, status);
5518         return 1ULL << 63;
5519     }
5520     aSig = extractFloatx80Frac( a );
5521     aExp = extractFloatx80Exp( a );
5522     aSign = extractFloatx80Sign( a );
5523     shiftCount = 0x403E - aExp;
5524     if ( shiftCount <= 0 ) {
5525         if ( shiftCount ) {
5526             float_raise(float_flag_invalid, status);
5527             if (!aSign || floatx80_is_any_nan(a)) {
5528                 return INT64_MAX;
5529             }
5530             return INT64_MIN;
5531         }
5532         aSigExtra = 0;
5533     }
5534     else {
5535         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5536     }
5537     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5538 
5539 }
5540 
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the 64-bit two's complement integer format.  The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic, except that the conversion is always rounded
5546 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5547 | Otherwise, if the conversion overflows, the largest integer with the same
5548 | sign as `a' is returned.
5549 *----------------------------------------------------------------------------*/
5550 
5551 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5552 {
5553     bool aSign;
5554     int32_t aExp, shiftCount;
5555     uint64_t aSig;
5556     int64_t z;
5557 
5558     if (floatx80_invalid_encoding(a)) {
5559         float_raise(float_flag_invalid, status);
5560         return 1ULL << 63;
5561     }
5562     aSig = extractFloatx80Frac( a );
5563     aExp = extractFloatx80Exp( a );
5564     aSign = extractFloatx80Sign( a );
5565     shiftCount = aExp - 0x403E;
5566     if ( 0 <= shiftCount ) {
5567         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5568         if ( ( a.high != 0xC03E ) || aSig ) {
5569             float_raise(float_flag_invalid, status);
5570             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5571                 return INT64_MAX;
5572             }
5573         }
5574         return INT64_MIN;
5575     }
5576     else if ( aExp < 0x3FFF ) {
5577         if (aExp | aSig) {
5578             float_raise(float_flag_inexact, status);
5579         }
5580         return 0;
5581     }
5582     z = aSig>>( - shiftCount );
5583     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5584         float_raise(float_flag_inexact, status);
5585     }
5586     if ( aSign ) z = - z;
5587     return z;
5588 
5589 }
5590 
5591 /*----------------------------------------------------------------------------
5592 | Returns the result of converting the extended double-precision floating-
5593 | point value `a' to the single-precision floating-point format.  The
5594 | conversion is performed according to the IEC/IEEE Standard for Binary
5595 | Floating-Point Arithmetic.
5596 *----------------------------------------------------------------------------*/
5597 
5598 float32 floatx80_to_float32(floatx80 a, float_status *status)
5599 {
5600     bool aSign;
5601     int32_t aExp;
5602     uint64_t aSig;
5603 
5604     if (floatx80_invalid_encoding(a)) {
5605         float_raise(float_flag_invalid, status);
5606         return float32_default_nan(status);
5607     }
5608     aSig = extractFloatx80Frac( a );
5609     aExp = extractFloatx80Exp( a );
5610     aSign = extractFloatx80Sign( a );
5611     if ( aExp == 0x7FFF ) {
5612         if ( (uint64_t) ( aSig<<1 ) ) {
5613             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5614                                              status);
5615             return float32_silence_nan(res, status);
5616         }
5617         return packFloat32( aSign, 0xFF, 0 );
5618     }
5619     shift64RightJamming( aSig, 33, &aSig );
5620     if ( aExp || aSig ) aExp -= 0x3F81;
5621     return roundAndPackFloat32(aSign, aExp, aSig, status);
5622 
5623 }
5624 
5625 /*----------------------------------------------------------------------------
5626 | Returns the result of converting the extended double-precision floating-
5627 | point value `a' to the double-precision floating-point format.  The
5628 | conversion is performed according to the IEC/IEEE Standard for Binary
5629 | Floating-Point Arithmetic.
5630 *----------------------------------------------------------------------------*/
5631 
5632 float64 floatx80_to_float64(floatx80 a, float_status *status)
5633 {
5634     bool aSign;
5635     int32_t aExp;
5636     uint64_t aSig, zSig;
5637 
5638     if (floatx80_invalid_encoding(a)) {
5639         float_raise(float_flag_invalid, status);
5640         return float64_default_nan(status);
5641     }
5642     aSig = extractFloatx80Frac( a );
5643     aExp = extractFloatx80Exp( a );
5644     aSign = extractFloatx80Sign( a );
5645     if ( aExp == 0x7FFF ) {
5646         if ( (uint64_t) ( aSig<<1 ) ) {
5647             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5648                                              status);
5649             return float64_silence_nan(res, status);
5650         }
5651         return packFloat64( aSign, 0x7FF, 0 );
5652     }
5653     shift64RightJamming( aSig, 1, &zSig );
5654     if ( aExp || aSig ) aExp -= 0x3C01;
5655     return roundAndPackFloat64(aSign, aExp, zSig, status);
5656 
5657 }
5658 
5659 /*----------------------------------------------------------------------------
5660 | Returns the result of converting the extended double-precision floating-
5661 | point value `a' to the quadruple-precision floating-point format.  The
5662 | conversion is performed according to the IEC/IEEE Standard for Binary
5663 | Floating-Point Arithmetic.
5664 *----------------------------------------------------------------------------*/
5665 
5666 float128 floatx80_to_float128(floatx80 a, float_status *status)
5667 {
5668     bool aSign;
5669     int aExp;
5670     uint64_t aSig, zSig0, zSig1;
5671 
5672     if (floatx80_invalid_encoding(a)) {
5673         float_raise(float_flag_invalid, status);
5674         return float128_default_nan(status);
5675     }
5676     aSig = extractFloatx80Frac( a );
5677     aExp = extractFloatx80Exp( a );
5678     aSign = extractFloatx80Sign( a );
5679     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5680         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5681                                            status);
5682         return float128_silence_nan(res, status);
5683     }
5684     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5685     return packFloat128( aSign, aExp, zSig0, zSig1 );
5686 
5687 }
5688 
5689 /*----------------------------------------------------------------------------
5690 | Rounds the extended double-precision floating-point value `a'
5691 | to the precision provided by floatx80_rounding_precision and returns the
5692 | result as an extended double-precision floating-point value.
5693 | The operation is performed according to the IEC/IEEE Standard for Binary
5694 | Floating-Point Arithmetic.
5695 *----------------------------------------------------------------------------*/
5696 
5697 floatx80 floatx80_round(floatx80 a, float_status *status)
5698 {
5699     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5700                                 extractFloatx80Sign(a),
5701                                 extractFloatx80Exp(a),
5702                                 extractFloatx80Frac(a), 0, status);
5703 }
5704 
5705 /*----------------------------------------------------------------------------
5706 | Rounds the extended double-precision floating-point value `a' to an integer,
5707 | and returns the result as an extended quadruple-precision floating-point
5708 | value.  The operation is performed according to the IEC/IEEE Standard for
5709 | Binary Floating-Point Arithmetic.
5710 *----------------------------------------------------------------------------*/
5711 
5712 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5713 {
5714     bool aSign;
5715     int32_t aExp;
5716     uint64_t lastBitMask, roundBitsMask;
5717     floatx80 z;
5718 
5719     if (floatx80_invalid_encoding(a)) {
5720         float_raise(float_flag_invalid, status);
5721         return floatx80_default_nan(status);
5722     }
5723     aExp = extractFloatx80Exp( a );
5724     if ( 0x403E <= aExp ) {
5725         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5726             return propagateFloatx80NaN(a, a, status);
5727         }
5728         return a;
5729     }
5730     if ( aExp < 0x3FFF ) {
5731         if (    ( aExp == 0 )
5732              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5733             return a;
5734         }
5735         float_raise(float_flag_inexact, status);
5736         aSign = extractFloatx80Sign( a );
5737         switch (status->float_rounding_mode) {
5738          case float_round_nearest_even:
5739             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5740                ) {
5741                 return
5742                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5743             }
5744             break;
5745         case float_round_ties_away:
5746             if (aExp == 0x3FFE) {
5747                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5748             }
5749             break;
5750          case float_round_down:
5751             return
5752                   aSign ?
5753                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5754                 : packFloatx80( 0, 0, 0 );
5755          case float_round_up:
5756             return
5757                   aSign ? packFloatx80( 1, 0, 0 )
5758                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5759 
5760         case float_round_to_zero:
5761             break;
5762         default:
5763             g_assert_not_reached();
5764         }
5765         return packFloatx80( aSign, 0, 0 );
5766     }
5767     lastBitMask = 1;
5768     lastBitMask <<= 0x403E - aExp;
5769     roundBitsMask = lastBitMask - 1;
5770     z = a;
5771     switch (status->float_rounding_mode) {
5772     case float_round_nearest_even:
5773         z.low += lastBitMask>>1;
5774         if ((z.low & roundBitsMask) == 0) {
5775             z.low &= ~lastBitMask;
5776         }
5777         break;
5778     case float_round_ties_away:
5779         z.low += lastBitMask >> 1;
5780         break;
5781     case float_round_to_zero:
5782         break;
5783     case float_round_up:
5784         if (!extractFloatx80Sign(z)) {
5785             z.low += roundBitsMask;
5786         }
5787         break;
5788     case float_round_down:
5789         if (extractFloatx80Sign(z)) {
5790             z.low += roundBitsMask;
5791         }
5792         break;
5793     default:
5794         abort();
5795     }
5796     z.low &= ~ roundBitsMask;
5797     if ( z.low == 0 ) {
5798         ++z.high;
5799         z.low = UINT64_C(0x8000000000000000);
5800     }
5801     if (z.low != a.low) {
5802         float_raise(float_flag_inexact, status);
5803     }
5804     return z;
5805 
5806 }
5807 
5808 /*----------------------------------------------------------------------------
5809 | Returns the result of adding the absolute values of the extended double-
5810 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5811 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5812 | The addition is performed according to the IEC/IEEE Standard for Binary
5813 | Floating-Point Arithmetic.
5814 *----------------------------------------------------------------------------*/
5815 
5816 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5817                                 float_status *status)
5818 {
5819     int32_t aExp, bExp, zExp;
5820     uint64_t aSig, bSig, zSig0, zSig1;
5821     int32_t expDiff;
5822 
5823     aSig = extractFloatx80Frac( a );
5824     aExp = extractFloatx80Exp( a );
5825     bSig = extractFloatx80Frac( b );
5826     bExp = extractFloatx80Exp( b );
5827     expDiff = aExp - bExp;
5828     if ( 0 < expDiff ) {
5829         if ( aExp == 0x7FFF ) {
5830             if ((uint64_t)(aSig << 1)) {
5831                 return propagateFloatx80NaN(a, b, status);
5832             }
5833             return a;
5834         }
5835         if ( bExp == 0 ) --expDiff;
5836         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5837         zExp = aExp;
5838     }
5839     else if ( expDiff < 0 ) {
5840         if ( bExp == 0x7FFF ) {
5841             if ((uint64_t)(bSig << 1)) {
5842                 return propagateFloatx80NaN(a, b, status);
5843             }
5844             return packFloatx80(zSign,
5845                                 floatx80_infinity_high,
5846                                 floatx80_infinity_low);
5847         }
5848         if ( aExp == 0 ) ++expDiff;
5849         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5850         zExp = bExp;
5851     }
5852     else {
5853         if ( aExp == 0x7FFF ) {
5854             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5855                 return propagateFloatx80NaN(a, b, status);
5856             }
5857             return a;
5858         }
5859         zSig1 = 0;
5860         zSig0 = aSig + bSig;
5861         if ( aExp == 0 ) {
5862             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5863                 /* At least one of the values is a pseudo-denormal,
5864                  * and there is a carry out of the result.  */
5865                 zExp = 1;
5866                 goto shiftRight1;
5867             }
5868             if (zSig0 == 0) {
5869                 return packFloatx80(zSign, 0, 0);
5870             }
5871             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5872             goto roundAndPack;
5873         }
5874         zExp = aExp;
5875         goto shiftRight1;
5876     }
5877     zSig0 = aSig + bSig;
5878     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5879  shiftRight1:
5880     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5881     zSig0 |= UINT64_C(0x8000000000000000);
5882     ++zExp;
5883  roundAndPack:
5884     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5885                                 zSign, zExp, zSig0, zSig1, status);
5886 }
5887 
5888 /*----------------------------------------------------------------------------
5889 | Returns the result of subtracting the absolute values of the extended
5890 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5891 | difference is negated before being returned.  `zSign' is ignored if the
5892 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5893 | Standard for Binary Floating-Point Arithmetic.
5894 *----------------------------------------------------------------------------*/
5895 
5896 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5897                                 float_status *status)
5898 {
5899     int32_t aExp, bExp, zExp;
5900     uint64_t aSig, bSig, zSig0, zSig1;
5901     int32_t expDiff;
5902 
5903     aSig = extractFloatx80Frac( a );
5904     aExp = extractFloatx80Exp( a );
5905     bSig = extractFloatx80Frac( b );
5906     bExp = extractFloatx80Exp( b );
5907     expDiff = aExp - bExp;
5908     if ( 0 < expDiff ) goto aExpBigger;
5909     if ( expDiff < 0 ) goto bExpBigger;
5910     if ( aExp == 0x7FFF ) {
5911         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5912             return propagateFloatx80NaN(a, b, status);
5913         }
5914         float_raise(float_flag_invalid, status);
5915         return floatx80_default_nan(status);
5916     }
5917     if ( aExp == 0 ) {
5918         aExp = 1;
5919         bExp = 1;
5920     }
5921     zSig1 = 0;
5922     if ( bSig < aSig ) goto aBigger;
5923     if ( aSig < bSig ) goto bBigger;
5924     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5925  bExpBigger:
5926     if ( bExp == 0x7FFF ) {
5927         if ((uint64_t)(bSig << 1)) {
5928             return propagateFloatx80NaN(a, b, status);
5929         }
5930         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5931                             floatx80_infinity_low);
5932     }
5933     if ( aExp == 0 ) ++expDiff;
5934     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5935  bBigger:
5936     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5937     zExp = bExp;
5938     zSign ^= 1;
5939     goto normalizeRoundAndPack;
5940  aExpBigger:
5941     if ( aExp == 0x7FFF ) {
5942         if ((uint64_t)(aSig << 1)) {
5943             return propagateFloatx80NaN(a, b, status);
5944         }
5945         return a;
5946     }
5947     if ( bExp == 0 ) --expDiff;
5948     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5949  aBigger:
5950     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5951     zExp = aExp;
5952  normalizeRoundAndPack:
5953     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5954                                          zSign, zExp, zSig0, zSig1, status);
5955 }
5956 
5957 /*----------------------------------------------------------------------------
5958 | Returns the result of adding the extended double-precision floating-point
5959 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5960 | Standard for Binary Floating-Point Arithmetic.
5961 *----------------------------------------------------------------------------*/
5962 
5963 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5964 {
5965     bool aSign, bSign;
5966 
5967     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5968         float_raise(float_flag_invalid, status);
5969         return floatx80_default_nan(status);
5970     }
5971     aSign = extractFloatx80Sign( a );
5972     bSign = extractFloatx80Sign( b );
5973     if ( aSign == bSign ) {
5974         return addFloatx80Sigs(a, b, aSign, status);
5975     }
5976     else {
5977         return subFloatx80Sigs(a, b, aSign, status);
5978     }
5979 
5980 }
5981 
5982 /*----------------------------------------------------------------------------
5983 | Returns the result of subtracting the extended double-precision floating-
5984 | point values `a' and `b'.  The operation is performed according to the
5985 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5986 *----------------------------------------------------------------------------*/
5987 
5988 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5989 {
5990     bool aSign, bSign;
5991 
5992     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5993         float_raise(float_flag_invalid, status);
5994         return floatx80_default_nan(status);
5995     }
5996     aSign = extractFloatx80Sign( a );
5997     bSign = extractFloatx80Sign( b );
5998     if ( aSign == bSign ) {
5999         return subFloatx80Sigs(a, b, aSign, status);
6000     }
6001     else {
6002         return addFloatx80Sigs(a, b, aSign, status);
6003     }
6004 
6005 }
6006 
6007 /*----------------------------------------------------------------------------
6008 | Returns the result of multiplying the extended double-precision floating-
6009 | point values `a' and `b'.  The operation is performed according to the
6010 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6011 *----------------------------------------------------------------------------*/
6012 
6013 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6014 {
6015     bool aSign, bSign, zSign;
6016     int32_t aExp, bExp, zExp;
6017     uint64_t aSig, bSig, zSig0, zSig1;
6018 
6019     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6020         float_raise(float_flag_invalid, status);
6021         return floatx80_default_nan(status);
6022     }
6023     aSig = extractFloatx80Frac( a );
6024     aExp = extractFloatx80Exp( a );
6025     aSign = extractFloatx80Sign( a );
6026     bSig = extractFloatx80Frac( b );
6027     bExp = extractFloatx80Exp( b );
6028     bSign = extractFloatx80Sign( b );
6029     zSign = aSign ^ bSign;
6030     if ( aExp == 0x7FFF ) {
6031         if (    (uint64_t) ( aSig<<1 )
6032              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6033             return propagateFloatx80NaN(a, b, status);
6034         }
6035         if ( ( bExp | bSig ) == 0 ) goto invalid;
6036         return packFloatx80(zSign, floatx80_infinity_high,
6037                                    floatx80_infinity_low);
6038     }
6039     if ( bExp == 0x7FFF ) {
6040         if ((uint64_t)(bSig << 1)) {
6041             return propagateFloatx80NaN(a, b, status);
6042         }
6043         if ( ( aExp | aSig ) == 0 ) {
6044  invalid:
6045             float_raise(float_flag_invalid, status);
6046             return floatx80_default_nan(status);
6047         }
6048         return packFloatx80(zSign, floatx80_infinity_high,
6049                                    floatx80_infinity_low);
6050     }
6051     if ( aExp == 0 ) {
6052         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6053         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6054     }
6055     if ( bExp == 0 ) {
6056         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6057         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6058     }
6059     zExp = aExp + bExp - 0x3FFE;
6060     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6061     if ( 0 < (int64_t) zSig0 ) {
6062         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6063         --zExp;
6064     }
6065     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6066                                 zSign, zExp, zSig0, zSig1, status);
6067 }
6068 
6069 /*----------------------------------------------------------------------------
6070 | Returns the result of dividing the extended double-precision floating-point
6071 | value `a' by the corresponding value `b'.  The operation is performed
6072 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6073 *----------------------------------------------------------------------------*/
6074 
6075 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6076 {
6077     bool aSign, bSign, zSign;
6078     int32_t aExp, bExp, zExp;
6079     uint64_t aSig, bSig, zSig0, zSig1;
6080     uint64_t rem0, rem1, rem2, term0, term1, term2;
6081 
6082     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6083         float_raise(float_flag_invalid, status);
6084         return floatx80_default_nan(status);
6085     }
6086     aSig = extractFloatx80Frac( a );
6087     aExp = extractFloatx80Exp( a );
6088     aSign = extractFloatx80Sign( a );
6089     bSig = extractFloatx80Frac( b );
6090     bExp = extractFloatx80Exp( b );
6091     bSign = extractFloatx80Sign( b );
6092     zSign = aSign ^ bSign;
6093     if ( aExp == 0x7FFF ) {
6094         if ((uint64_t)(aSig << 1)) {
6095             return propagateFloatx80NaN(a, b, status);
6096         }
6097         if ( bExp == 0x7FFF ) {
6098             if ((uint64_t)(bSig << 1)) {
6099                 return propagateFloatx80NaN(a, b, status);
6100             }
6101             goto invalid;
6102         }
6103         return packFloatx80(zSign, floatx80_infinity_high,
6104                                    floatx80_infinity_low);
6105     }
6106     if ( bExp == 0x7FFF ) {
6107         if ((uint64_t)(bSig << 1)) {
6108             return propagateFloatx80NaN(a, b, status);
6109         }
6110         return packFloatx80( zSign, 0, 0 );
6111     }
6112     if ( bExp == 0 ) {
6113         if ( bSig == 0 ) {
6114             if ( ( aExp | aSig ) == 0 ) {
6115  invalid:
6116                 float_raise(float_flag_invalid, status);
6117                 return floatx80_default_nan(status);
6118             }
6119             float_raise(float_flag_divbyzero, status);
6120             return packFloatx80(zSign, floatx80_infinity_high,
6121                                        floatx80_infinity_low);
6122         }
6123         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6124     }
6125     if ( aExp == 0 ) {
6126         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6127         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6128     }
6129     zExp = aExp - bExp + 0x3FFE;
6130     rem1 = 0;
6131     if ( bSig <= aSig ) {
6132         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6133         ++zExp;
6134     }
6135     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6136     mul64To128( bSig, zSig0, &term0, &term1 );
6137     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6138     while ( (int64_t) rem0 < 0 ) {
6139         --zSig0;
6140         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6141     }
6142     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6143     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6144         mul64To128( bSig, zSig1, &term1, &term2 );
6145         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6146         while ( (int64_t) rem1 < 0 ) {
6147             --zSig1;
6148             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6149         }
6150         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6151     }
6152     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6153                                 zSign, zExp, zSig0, zSig1, status);
6154 }
6155 
6156 /*----------------------------------------------------------------------------
6157 | Returns the remainder of the extended double-precision floating-point value
6158 | `a' with respect to the corresponding value `b'.  The operation is performed
6159 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6160 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6161 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6162 | the absolute value of the integer quotient.
6163 *----------------------------------------------------------------------------*/
6164 
6165 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6166                          float_status *status)
6167 {
6168     bool aSign, zSign;
6169     int32_t aExp, bExp, expDiff, aExpOrig;
6170     uint64_t aSig0, aSig1, bSig;
6171     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6172 
6173     *quotient = 0;
6174     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6175         float_raise(float_flag_invalid, status);
6176         return floatx80_default_nan(status);
6177     }
6178     aSig0 = extractFloatx80Frac( a );
6179     aExpOrig = aExp = extractFloatx80Exp( a );
6180     aSign = extractFloatx80Sign( a );
6181     bSig = extractFloatx80Frac( b );
6182     bExp = extractFloatx80Exp( b );
6183     if ( aExp == 0x7FFF ) {
6184         if (    (uint64_t) ( aSig0<<1 )
6185              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6186             return propagateFloatx80NaN(a, b, status);
6187         }
6188         goto invalid;
6189     }
6190     if ( bExp == 0x7FFF ) {
6191         if ((uint64_t)(bSig << 1)) {
6192             return propagateFloatx80NaN(a, b, status);
6193         }
6194         if (aExp == 0 && aSig0 >> 63) {
6195             /*
6196              * Pseudo-denormal argument must be returned in normalized
6197              * form.
6198              */
6199             return packFloatx80(aSign, 1, aSig0);
6200         }
6201         return a;
6202     }
6203     if ( bExp == 0 ) {
6204         if ( bSig == 0 ) {
6205  invalid:
6206             float_raise(float_flag_invalid, status);
6207             return floatx80_default_nan(status);
6208         }
6209         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6210     }
6211     if ( aExp == 0 ) {
6212         if ( aSig0 == 0 ) return a;
6213         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6214     }
6215     zSign = aSign;
6216     expDiff = aExp - bExp;
6217     aSig1 = 0;
6218     if ( expDiff < 0 ) {
6219         if ( mod || expDiff < -1 ) {
6220             if (aExp == 1 && aExpOrig == 0) {
6221                 /*
6222                  * Pseudo-denormal argument must be returned in
6223                  * normalized form.
6224                  */
6225                 return packFloatx80(aSign, aExp, aSig0);
6226             }
6227             return a;
6228         }
6229         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6230         expDiff = 0;
6231     }
6232     *quotient = q = ( bSig <= aSig0 );
6233     if ( q ) aSig0 -= bSig;
6234     expDiff -= 64;
6235     while ( 0 < expDiff ) {
6236         q = estimateDiv128To64( aSig0, aSig1, bSig );
6237         q = ( 2 < q ) ? q - 2 : 0;
6238         mul64To128( bSig, q, &term0, &term1 );
6239         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6240         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6241         expDiff -= 62;
6242         *quotient <<= 62;
6243         *quotient += q;
6244     }
6245     expDiff += 64;
6246     if ( 0 < expDiff ) {
6247         q = estimateDiv128To64( aSig0, aSig1, bSig );
6248         q = ( 2 < q ) ? q - 2 : 0;
6249         q >>= 64 - expDiff;
6250         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6251         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6252         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6253         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6254             ++q;
6255             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6256         }
6257         if (expDiff < 64) {
6258             *quotient <<= expDiff;
6259         } else {
6260             *quotient = 0;
6261         }
6262         *quotient += q;
6263     }
6264     else {
6265         term1 = 0;
6266         term0 = bSig;
6267     }
6268     if (!mod) {
6269         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6270         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6271                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6272                         && ( q & 1 ) )
6273             ) {
6274             aSig0 = alternateASig0;
6275             aSig1 = alternateASig1;
6276             zSign = ! zSign;
6277             ++*quotient;
6278         }
6279     }
6280     return
6281         normalizeRoundAndPackFloatx80(
6282             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6283 
6284 }
6285 
6286 /*----------------------------------------------------------------------------
6287 | Returns the remainder of the extended double-precision floating-point value
6288 | `a' with respect to the corresponding value `b'.  The operation is performed
6289 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6290 *----------------------------------------------------------------------------*/
6291 
6292 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6293 {
6294     uint64_t quotient;
6295     return floatx80_modrem(a, b, false, &quotient, status);
6296 }
6297 
6298 /*----------------------------------------------------------------------------
6299 | Returns the remainder of the extended double-precision floating-point value
6300 | `a' with respect to the corresponding value `b', with the quotient truncated
6301 | toward zero.
6302 *----------------------------------------------------------------------------*/
6303 
6304 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6305 {
6306     uint64_t quotient;
6307     return floatx80_modrem(a, b, true, &quotient, status);
6308 }
6309 
6310 /*----------------------------------------------------------------------------
6311 | Returns the square root of the extended double-precision floating-point
6312 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6313 | for Binary Floating-Point Arithmetic.
6314 *----------------------------------------------------------------------------*/
6315 
6316 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6317 {
6318     bool aSign;
6319     int32_t aExp, zExp;
6320     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6321     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6322 
6323     if (floatx80_invalid_encoding(a)) {
6324         float_raise(float_flag_invalid, status);
6325         return floatx80_default_nan(status);
6326     }
6327     aSig0 = extractFloatx80Frac( a );
6328     aExp = extractFloatx80Exp( a );
6329     aSign = extractFloatx80Sign( a );
6330     if ( aExp == 0x7FFF ) {
6331         if ((uint64_t)(aSig0 << 1)) {
6332             return propagateFloatx80NaN(a, a, status);
6333         }
6334         if ( ! aSign ) return a;
6335         goto invalid;
6336     }
6337     if ( aSign ) {
6338         if ( ( aExp | aSig0 ) == 0 ) return a;
6339  invalid:
6340         float_raise(float_flag_invalid, status);
6341         return floatx80_default_nan(status);
6342     }
6343     if ( aExp == 0 ) {
6344         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6345         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6346     }
6347     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6348     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6349     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6350     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6351     doubleZSig0 = zSig0<<1;
6352     mul64To128( zSig0, zSig0, &term0, &term1 );
6353     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6354     while ( (int64_t) rem0 < 0 ) {
6355         --zSig0;
6356         doubleZSig0 -= 2;
6357         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6358     }
6359     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6360     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6361         if ( zSig1 == 0 ) zSig1 = 1;
6362         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6363         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6364         mul64To128( zSig1, zSig1, &term2, &term3 );
6365         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6366         while ( (int64_t) rem1 < 0 ) {
6367             --zSig1;
6368             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6369             term3 |= 1;
6370             term2 |= doubleZSig0;
6371             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6372         }
6373         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6374     }
6375     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6376     zSig0 |= doubleZSig0;
6377     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6378                                 0, zExp, zSig0, zSig1, status);
6379 }
6380 
6381 /*----------------------------------------------------------------------------
6382 | Returns the result of converting the quadruple-precision floating-point
6383 | value `a' to the extended double-precision floating-point format.  The
6384 | conversion is performed according to the IEC/IEEE Standard for Binary
6385 | Floating-Point Arithmetic.
6386 *----------------------------------------------------------------------------*/
6387 
6388 floatx80 float128_to_floatx80(float128 a, float_status *status)
6389 {
6390     bool aSign;
6391     int32_t aExp;
6392     uint64_t aSig0, aSig1;
6393 
6394     aSig1 = extractFloat128Frac1( a );
6395     aSig0 = extractFloat128Frac0( a );
6396     aExp = extractFloat128Exp( a );
6397     aSign = extractFloat128Sign( a );
6398     if ( aExp == 0x7FFF ) {
6399         if ( aSig0 | aSig1 ) {
6400             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6401                                                status);
6402             return floatx80_silence_nan(res, status);
6403         }
6404         return packFloatx80(aSign, floatx80_infinity_high,
6405                                    floatx80_infinity_low);
6406     }
6407     if ( aExp == 0 ) {
6408         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6409         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6410     }
6411     else {
6412         aSig0 |= UINT64_C(0x0001000000000000);
6413     }
6414     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6415     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6416 
6417 }
6418 
6419 /*----------------------------------------------------------------------------
6420 | Returns the remainder of the quadruple-precision floating-point value `a'
6421 | with respect to the corresponding value `b'.  The operation is performed
6422 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6423 *----------------------------------------------------------------------------*/
6424 
6425 float128 float128_rem(float128 a, float128 b, float_status *status)
6426 {
6427     bool aSign, zSign;
6428     int32_t aExp, bExp, expDiff;
6429     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6430     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6431     int64_t sigMean0;
6432 
6433     aSig1 = extractFloat128Frac1( a );
6434     aSig0 = extractFloat128Frac0( a );
6435     aExp = extractFloat128Exp( a );
6436     aSign = extractFloat128Sign( a );
6437     bSig1 = extractFloat128Frac1( b );
6438     bSig0 = extractFloat128Frac0( b );
6439     bExp = extractFloat128Exp( b );
6440     if ( aExp == 0x7FFF ) {
6441         if (    ( aSig0 | aSig1 )
6442              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6443             return propagateFloat128NaN(a, b, status);
6444         }
6445         goto invalid;
6446     }
6447     if ( bExp == 0x7FFF ) {
6448         if (bSig0 | bSig1) {
6449             return propagateFloat128NaN(a, b, status);
6450         }
6451         return a;
6452     }
6453     if ( bExp == 0 ) {
6454         if ( ( bSig0 | bSig1 ) == 0 ) {
6455  invalid:
6456             float_raise(float_flag_invalid, status);
6457             return float128_default_nan(status);
6458         }
6459         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6460     }
6461     if ( aExp == 0 ) {
6462         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6463         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6464     }
6465     expDiff = aExp - bExp;
6466     if ( expDiff < -1 ) return a;
6467     shortShift128Left(
6468         aSig0 | UINT64_C(0x0001000000000000),
6469         aSig1,
6470         15 - ( expDiff < 0 ),
6471         &aSig0,
6472         &aSig1
6473     );
6474     shortShift128Left(
6475         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6476     q = le128( bSig0, bSig1, aSig0, aSig1 );
6477     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6478     expDiff -= 64;
6479     while ( 0 < expDiff ) {
6480         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6481         q = ( 4 < q ) ? q - 4 : 0;
6482         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6483         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6484         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6485         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6486         expDiff -= 61;
6487     }
6488     if ( -64 < expDiff ) {
6489         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6490         q = ( 4 < q ) ? q - 4 : 0;
6491         q >>= - expDiff;
6492         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6493         expDiff += 52;
6494         if ( expDiff < 0 ) {
6495             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6496         }
6497         else {
6498             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6499         }
6500         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6501         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6502     }
6503     else {
6504         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6505         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6506     }
6507     do {
6508         alternateASig0 = aSig0;
6509         alternateASig1 = aSig1;
6510         ++q;
6511         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6512     } while ( 0 <= (int64_t) aSig0 );
6513     add128(
6514         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6515     if (    ( sigMean0 < 0 )
6516          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6517         aSig0 = alternateASig0;
6518         aSig1 = alternateASig1;
6519     }
6520     zSign = ( (int64_t) aSig0 < 0 );
6521     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6522     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6523                                          status);
6524 }
6525 
6526 /*----------------------------------------------------------------------------
6527 | Returns the square root of the quadruple-precision floating-point value `a'.
6528 | The operation is performed according to the IEC/IEEE Standard for Binary
6529 | Floating-Point Arithmetic.
6530 *----------------------------------------------------------------------------*/
6531 
6532 float128 float128_sqrt(float128 a, float_status *status)
6533 {
6534     bool aSign;
6535     int32_t aExp, zExp;
6536     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6537     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6538 
6539     aSig1 = extractFloat128Frac1( a );
6540     aSig0 = extractFloat128Frac0( a );
6541     aExp = extractFloat128Exp( a );
6542     aSign = extractFloat128Sign( a );
6543     if ( aExp == 0x7FFF ) {
6544         if (aSig0 | aSig1) {
6545             return propagateFloat128NaN(a, a, status);
6546         }
6547         if ( ! aSign ) return a;
6548         goto invalid;
6549     }
6550     if ( aSign ) {
6551         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6552  invalid:
6553         float_raise(float_flag_invalid, status);
6554         return float128_default_nan(status);
6555     }
6556     if ( aExp == 0 ) {
6557         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6558         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6559     }
6560     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6561     aSig0 |= UINT64_C(0x0001000000000000);
6562     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6563     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6564     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6565     doubleZSig0 = zSig0<<1;
6566     mul64To128( zSig0, zSig0, &term0, &term1 );
6567     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6568     while ( (int64_t) rem0 < 0 ) {
6569         --zSig0;
6570         doubleZSig0 -= 2;
6571         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6572     }
6573     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6574     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6575         if ( zSig1 == 0 ) zSig1 = 1;
6576         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6577         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6578         mul64To128( zSig1, zSig1, &term2, &term3 );
6579         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6580         while ( (int64_t) rem1 < 0 ) {
6581             --zSig1;
6582             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6583             term3 |= 1;
6584             term2 |= doubleZSig0;
6585             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6586         }
6587         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6588     }
6589     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6590     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6591 
6592 }
6593 
6594 static inline FloatRelation
6595 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6596                           float_status *status)
6597 {
6598     bool aSign, bSign;
6599 
6600     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6601         float_raise(float_flag_invalid, status);
6602         return float_relation_unordered;
6603     }
6604     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6605           ( extractFloatx80Frac( a )<<1 ) ) ||
6606         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6607           ( extractFloatx80Frac( b )<<1 ) )) {
6608         if (!is_quiet ||
6609             floatx80_is_signaling_nan(a, status) ||
6610             floatx80_is_signaling_nan(b, status)) {
6611             float_raise(float_flag_invalid, status);
6612         }
6613         return float_relation_unordered;
6614     }
6615     aSign = extractFloatx80Sign( a );
6616     bSign = extractFloatx80Sign( b );
6617     if ( aSign != bSign ) {
6618 
6619         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6620              ( ( a.low | b.low ) == 0 ) ) {
6621             /* zero case */
6622             return float_relation_equal;
6623         } else {
6624             return 1 - (2 * aSign);
6625         }
6626     } else {
6627         /* Normalize pseudo-denormals before comparison.  */
6628         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6629             ++a.high;
6630         }
6631         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6632             ++b.high;
6633         }
6634         if (a.low == b.low && a.high == b.high) {
6635             return float_relation_equal;
6636         } else {
6637             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6638         }
6639     }
6640 }
6641 
6642 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6643 {
6644     return floatx80_compare_internal(a, b, 0, status);
6645 }
6646 
6647 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6648                                      float_status *status)
6649 {
6650     return floatx80_compare_internal(a, b, 1, status);
6651 }
6652 
6653 static inline FloatRelation
6654 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6655                           float_status *status)
6656 {
6657     bool aSign, bSign;
6658 
6659     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6660           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6661         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6662           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6663         if (!is_quiet ||
6664             float128_is_signaling_nan(a, status) ||
6665             float128_is_signaling_nan(b, status)) {
6666             float_raise(float_flag_invalid, status);
6667         }
6668         return float_relation_unordered;
6669     }
6670     aSign = extractFloat128Sign( a );
6671     bSign = extractFloat128Sign( b );
6672     if ( aSign != bSign ) {
6673         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6674             /* zero case */
6675             return float_relation_equal;
6676         } else {
6677             return 1 - (2 * aSign);
6678         }
6679     } else {
6680         if (a.low == b.low && a.high == b.high) {
6681             return float_relation_equal;
6682         } else {
6683             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6684         }
6685     }
6686 }
6687 
6688 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6689 {
6690     return float128_compare_internal(a, b, 0, status);
6691 }
6692 
6693 FloatRelation float128_compare_quiet(float128 a, float128 b,
6694                                      float_status *status)
6695 {
6696     return float128_compare_internal(a, b, 1, status);
6697 }
6698 
6699 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6700 {
6701     bool aSign;
6702     int32_t aExp;
6703     uint64_t aSig;
6704 
6705     if (floatx80_invalid_encoding(a)) {
6706         float_raise(float_flag_invalid, status);
6707         return floatx80_default_nan(status);
6708     }
6709     aSig = extractFloatx80Frac( a );
6710     aExp = extractFloatx80Exp( a );
6711     aSign = extractFloatx80Sign( a );
6712 
6713     if ( aExp == 0x7FFF ) {
6714         if ( aSig<<1 ) {
6715             return propagateFloatx80NaN(a, a, status);
6716         }
6717         return a;
6718     }
6719 
6720     if (aExp == 0) {
6721         if (aSig == 0) {
6722             return a;
6723         }
6724         aExp++;
6725     }
6726 
6727     if (n > 0x10000) {
6728         n = 0x10000;
6729     } else if (n < -0x10000) {
6730         n = -0x10000;
6731     }
6732 
6733     aExp += n;
6734     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6735                                          aSign, aExp, aSig, 0, status);
6736 }
6737 
6738 float128 float128_scalbn(float128 a, int n, float_status *status)
6739 {
6740     bool aSign;
6741     int32_t aExp;
6742     uint64_t aSig0, aSig1;
6743 
6744     aSig1 = extractFloat128Frac1( a );
6745     aSig0 = extractFloat128Frac0( a );
6746     aExp = extractFloat128Exp( a );
6747     aSign = extractFloat128Sign( a );
6748     if ( aExp == 0x7FFF ) {
6749         if ( aSig0 | aSig1 ) {
6750             return propagateFloat128NaN(a, a, status);
6751         }
6752         return a;
6753     }
6754     if (aExp != 0) {
6755         aSig0 |= UINT64_C(0x0001000000000000);
6756     } else if (aSig0 == 0 && aSig1 == 0) {
6757         return a;
6758     } else {
6759         aExp++;
6760     }
6761 
6762     if (n > 0x10000) {
6763         n = 0x10000;
6764     } else if (n < -0x10000) {
6765         n = -0x10000;
6766     }
6767 
6768     aExp += n - 1;
6769     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6770                                          , status);
6771 
6772 }
6773 
6774 static void __attribute__((constructor)) softfloat_init(void)
6775 {
6776     union_float64 ua, ub, uc, ur;
6777 
6778     if (QEMU_NO_HARDFLOAT) {
6779         return;
6780     }
6781     /*
6782      * Test that the host's FMA is not obviously broken. For example,
6783      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6784      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6785      */
6786     ua.s = 0x0020000000000001ULL;
6787     ub.s = 0x3ca0000000000000ULL;
6788     uc.s = 0x0020000000000000ULL;
6789     ur.h = fma(ua.h, ub.h, uc.h);
6790     if (ur.s != 0x0020000000000001ULL) {
6791         force_soft_fma = true;
6792     }
6793 }
6794