xref: /openbmc/qemu/fpu/softfloat.c (revision dedd123c)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407 
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410     return float32_val(a) & 0x007FFFFF;
411 }
412 
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416 
417 static inline int extractFloat32Exp(float32 a)
418 {
419     return (float32_val(a) >> 23) & 0xFF;
420 }
421 
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425 
426 static inline bool extractFloat32Sign(float32 a)
427 {
428     return float32_val(a) >> 31;
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434 
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439 
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443 
444 static inline int extractFloat64Exp(float64 a)
445 {
446     return (float64_val(a) >> 52) & 0x7FF;
447 }
448 
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452 
453 static inline bool extractFloat64Sign(float64 a)
454 {
455     return float64_val(a) >> 63;
456 }
457 
458 /*
459  * Classify a floating point number. Everything above float_class_qnan
460  * is a NaN so cls >= float_class_qnan is any NaN.
461  */
462 
463 typedef enum __attribute__ ((__packed__)) {
464     float_class_unclassified,
465     float_class_zero,
466     float_class_normal,
467     float_class_inf,
468     float_class_qnan,  /* all NaNs from here */
469     float_class_snan,
470 } FloatClass;
471 
472 #define float_cmask(bit)  (1u << (bit))
473 
474 enum {
475     float_cmask_zero    = float_cmask(float_class_zero),
476     float_cmask_normal  = float_cmask(float_class_normal),
477     float_cmask_inf     = float_cmask(float_class_inf),
478     float_cmask_qnan    = float_cmask(float_class_qnan),
479     float_cmask_snan    = float_cmask(float_class_snan),
480 
481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
483 };
484 
485 
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489     return unlikely(c >= float_class_qnan);
490 }
491 
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494     return c == float_class_snan;
495 }
496 
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499     return c == float_class_qnan;
500 }
501 
502 /*
503  * Structure holding all of the decomposed parts of a float.
504  * The exponent is unbiased and the fraction is normalized.
505  *
506  * The fraction words are stored in big-endian word ordering,
507  * so that truncation from a larger format to a smaller format
508  * can be done simply by ignoring subsequent elements.
509  */
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     union {
516         /* Routines that know the structure may reference the singular name. */
517         uint64_t frac;
518         /*
519          * Routines expanded with multiple structures reference "hi" and "lo"
520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
521          * both the same word and aliased here.
522          */
523         uint64_t frac_hi;
524         uint64_t frac_lo;
525     };
526 } FloatParts64;
527 
528 typedef struct {
529     FloatClass cls;
530     bool sign;
531     int32_t exp;
532     uint64_t frac_hi;
533     uint64_t frac_lo;
534 } FloatParts128;
535 
536 typedef struct {
537     FloatClass cls;
538     bool sign;
539     int32_t exp;
540     uint64_t frac_hi;
541     uint64_t frac_hm;  /* high-middle */
542     uint64_t frac_lm;  /* low-middle */
543     uint64_t frac_lo;
544 } FloatParts256;
545 
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT    63
548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
549 
550 /* Structure holding all of the relevant parameters for a format.
551  *   exp_size: the size of the exponent field
552  *   exp_bias: the offset applied to the exponent field
553  *   exp_max: the maximum normalised exponent
554  *   frac_size: the size of the fraction field
555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556  * The following are computed based the size of fraction
557  *   frac_lsb: least significant bit of fraction
558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
559  *   round_mask/roundeven_mask: masks used for rounding
560  * The following optional modifiers are available:
561  *   arm_althp: handle ARM Alternative Half Precision
562  */
563 typedef struct {
564     int exp_size;
565     int exp_bias;
566     int exp_max;
567     int frac_size;
568     int frac_shift;
569     uint64_t frac_lsb;
570     uint64_t frac_lsbm1;
571     uint64_t round_mask;
572     uint64_t roundeven_mask;
573     bool arm_althp;
574 } FloatFmt;
575 
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F)                                           \
578     .exp_size       = E,                                             \
579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
580     .exp_max        = (1 << E) - 1,                                  \
581     .frac_size      = F,                                             \
582     .frac_shift     = (-F - 1) & 63,                                 \
583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587 
588 static const FloatFmt float16_params = {
589     FLOAT_PARAMS(5, 10)
590 };
591 
592 static const FloatFmt float16_params_ahp = {
593     FLOAT_PARAMS(5, 10),
594     .arm_althp = true
595 };
596 
597 static const FloatFmt bfloat16_params = {
598     FLOAT_PARAMS(8, 7)
599 };
600 
601 static const FloatFmt float32_params = {
602     FLOAT_PARAMS(8, 23)
603 };
604 
605 static const FloatFmt float64_params = {
606     FLOAT_PARAMS(11, 52)
607 };
608 
609 static const FloatFmt float128_params = {
610     FLOAT_PARAMS(15, 112)
611 };
612 
613 /* Unpack a float to parts, but do not canonicalize.  */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616     const int f_size = fmt->frac_size;
617     const int e_size = fmt->exp_size;
618 
619     *r = (FloatParts64) {
620         .cls = float_class_unclassified,
621         .sign = extract64(raw, f_size + e_size, 1),
622         .exp = extract64(raw, f_size, e_size),
623         .frac = extract64(raw, 0, f_size)
624     };
625 }
626 
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629     unpack_raw64(p, &float16_params, f);
630 }
631 
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634     unpack_raw64(p, &bfloat16_params, f);
635 }
636 
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639     unpack_raw64(p, &float32_params, f);
640 }
641 
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644     unpack_raw64(p, &float64_params, f);
645 }
646 
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718 
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722 
723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725 
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728 
729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
730 
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732                                       float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734                                         float_status *s);
735 
736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737 
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739                                              FloatParts64 *c, float_status *s,
740                                              int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742                                                FloatParts128 *b,
743                                                FloatParts128 *c,
744                                                float_status *s,
745                                                int ab_mask, int abc_mask);
746 
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749 
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751                                  const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753                                   const FloatFmt *fmt);
754 
755 #define parts_canonicalize(A, S, F) \
756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757 
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759                             const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761                              const FloatFmt *fmt);
762 
763 #define parts_uncanon(A, S, F) \
764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765 
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769 
770 #define parts_add_normal(A, B) \
771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772 
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776 
777 #define parts_sub_normal(A, B) \
778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779 
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781                                     float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783                                       float_status *s, bool subtract);
784 
785 #define parts_addsub(A, B, S, Z) \
786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787 
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789                                  float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791                                    float_status *s);
792 
793 #define parts_mul(A, B, S) \
794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
795 
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797                                     FloatParts64 *c, int flags,
798                                     float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800                                       FloatParts128 *c, int flags,
801                                       float_status *s);
802 
803 #define parts_muladd(A, B, C, Z, S) \
804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805 
806 /*
807  * Helper functions for softfloat-parts.c.inc, per-size operations.
808  */
809 
810 #define FRAC_GENERIC_64_128(NAME, P) \
811     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
812 
813 #define FRAC_GENERIC_64_128_256(NAME, P) \
814     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
815                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
816 
817 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
818 {
819     return uadd64_overflow(a->frac, b->frac, &r->frac);
820 }
821 
822 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
823 {
824     bool c = 0;
825     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
826     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
827     return c;
828 }
829 
830 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
831 {
832     bool c = 0;
833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
835     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
836     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
837     return c;
838 }
839 
840 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
841 
842 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
843 {
844     return uadd64_overflow(a->frac, c, &r->frac);
845 }
846 
847 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
848 {
849     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
850     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
851 }
852 
853 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
854 
855 static void frac64_allones(FloatParts64 *a)
856 {
857     a->frac = -1;
858 }
859 
860 static void frac128_allones(FloatParts128 *a)
861 {
862     a->frac_hi = a->frac_lo = -1;
863 }
864 
865 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
866 
867 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
868 {
869     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
870 }
871 
872 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
873 {
874     uint64_t ta = a->frac_hi, tb = b->frac_hi;
875     if (ta == tb) {
876         ta = a->frac_lo, tb = b->frac_lo;
877         if (ta == tb) {
878             return 0;
879         }
880     }
881     return ta < tb ? -1 : 1;
882 }
883 
884 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
885 
886 static void frac64_clear(FloatParts64 *a)
887 {
888     a->frac = 0;
889 }
890 
891 static void frac128_clear(FloatParts128 *a)
892 {
893     a->frac_hi = a->frac_lo = 0;
894 }
895 
896 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
897 
898 static bool frac64_eqz(FloatParts64 *a)
899 {
900     return a->frac == 0;
901 }
902 
903 static bool frac128_eqz(FloatParts128 *a)
904 {
905     return (a->frac_hi | a->frac_lo) == 0;
906 }
907 
908 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
909 
910 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
911 {
912     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
913 }
914 
915 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
916 {
917     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
918                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
919 }
920 
921 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
922 
923 static void frac64_neg(FloatParts64 *a)
924 {
925     a->frac = -a->frac;
926 }
927 
928 static void frac128_neg(FloatParts128 *a)
929 {
930     bool c = 0;
931     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
932     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
933 }
934 
935 static void frac256_neg(FloatParts256 *a)
936 {
937     bool c = 0;
938     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
939     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
940     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
941     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
942 }
943 
944 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
945 
946 static int frac64_normalize(FloatParts64 *a)
947 {
948     if (a->frac) {
949         int shift = clz64(a->frac);
950         a->frac <<= shift;
951         return shift;
952     }
953     return 64;
954 }
955 
956 static int frac128_normalize(FloatParts128 *a)
957 {
958     if (a->frac_hi) {
959         int shl = clz64(a->frac_hi);
960         if (shl) {
961             int shr = 64 - shl;
962             a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
963             a->frac_lo = (a->frac_lo << shl);
964         }
965         return shl;
966     } else if (a->frac_lo) {
967         int shl = clz64(a->frac_lo);
968         a->frac_hi = (a->frac_lo << shl);
969         a->frac_lo = 0;
970         return shl + 64;
971     }
972     return 128;
973 }
974 
975 static int frac256_normalize(FloatParts256 *a)
976 {
977     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
978     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
979     int ret, shl, shr;
980 
981     if (likely(a0)) {
982         shl = clz64(a0);
983         if (shl == 0) {
984             return 0;
985         }
986         ret = shl;
987     } else {
988         if (a1) {
989             ret = 64;
990             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
991         } else if (a2) {
992             ret = 128;
993             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
994         } else if (a3) {
995             ret = 192;
996             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
997         } else {
998             ret = 256;
999             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1000             goto done;
1001         }
1002         shl = clz64(a0);
1003         if (shl == 0) {
1004             goto done;
1005         }
1006         ret += shl;
1007     }
1008 
1009     shr = -shl & 63;
1010     a0 = (a0 << shl) | (a1 >> shr);
1011     a1 = (a1 << shl) | (a2 >> shr);
1012     a2 = (a2 << shl) | (a3 >> shr);
1013     a3 = (a3 << shl);
1014 
1015  done:
1016     a->frac_hi = a0;
1017     a->frac_hm = a1;
1018     a->frac_lm = a2;
1019     a->frac_lo = a3;
1020     return ret;
1021 }
1022 
1023 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1024 
1025 static void frac64_shl(FloatParts64 *a, int c)
1026 {
1027     a->frac <<= c;
1028 }
1029 
1030 static void frac128_shl(FloatParts128 *a, int c)
1031 {
1032     shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
1033 }
1034 
1035 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1036 
1037 static void frac64_shr(FloatParts64 *a, int c)
1038 {
1039     a->frac >>= c;
1040 }
1041 
1042 static void frac128_shr(FloatParts128 *a, int c)
1043 {
1044     shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
1045 }
1046 
1047 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1048 
1049 static void frac64_shrjam(FloatParts64 *a, int c)
1050 {
1051     shift64RightJamming(a->frac, c, &a->frac);
1052 }
1053 
1054 static void frac128_shrjam(FloatParts128 *a, int c)
1055 {
1056     shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
1057 }
1058 
1059 static void frac256_shrjam(FloatParts256 *a, int c)
1060 {
1061     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1062     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1063     uint64_t sticky = 0;
1064     int invc;
1065 
1066     if (unlikely(c == 0)) {
1067         return;
1068     } else if (likely(c < 64)) {
1069         /* nothing */
1070     } else if (likely(c < 256)) {
1071         if (unlikely(c & 128)) {
1072             sticky |= a2 | a3;
1073             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1074         }
1075         if (unlikely(c & 64)) {
1076             sticky |= a3;
1077             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1078         }
1079         c &= 63;
1080         if (c == 0) {
1081             goto done;
1082         }
1083     } else {
1084         sticky = a0 | a1 | a2 | a3;
1085         a0 = a1 = a2 = a3 = 0;
1086         goto done;
1087     }
1088 
1089     invc = -c & 63;
1090     sticky |= a3 << invc;
1091     a3 = (a3 >> c) | (a2 << invc);
1092     a2 = (a2 >> c) | (a1 << invc);
1093     a1 = (a1 >> c) | (a0 << invc);
1094     a0 = (a0 >> c);
1095 
1096  done:
1097     a->frac_lo = a3 | (sticky != 0);
1098     a->frac_lm = a2;
1099     a->frac_hm = a1;
1100     a->frac_hi = a0;
1101 }
1102 
1103 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1104 
1105 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1106 {
1107     return usub64_overflow(a->frac, b->frac, &r->frac);
1108 }
1109 
1110 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1111 {
1112     bool c = 0;
1113     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1114     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1115     return c;
1116 }
1117 
1118 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1119 {
1120     bool c = 0;
1121     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1122     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1123     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1124     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1125     return c;
1126 }
1127 
1128 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1129 
1130 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1131 {
1132     r->frac = a->frac_hi | (a->frac_lo != 0);
1133 }
1134 
1135 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1136 {
1137     r->frac_hi = a->frac_hi;
1138     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1139 }
1140 
1141 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1142 
1143 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1144 {
1145     r->frac_hi = a->frac;
1146     r->frac_lo = 0;
1147 }
1148 
1149 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1150 {
1151     r->frac_hi = a->frac_hi;
1152     r->frac_hm = a->frac_lo;
1153     r->frac_lm = 0;
1154     r->frac_lo = 0;
1155 }
1156 
1157 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1158 
1159 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1160 #define FloatPartsN    glue(FloatParts,N)
1161 #define FloatPartsW    glue(FloatParts,W)
1162 
1163 #define N 64
1164 #define W 128
1165 
1166 #include "softfloat-parts-addsub.c.inc"
1167 #include "softfloat-parts.c.inc"
1168 
1169 #undef  N
1170 #undef  W
1171 #define N 128
1172 #define W 256
1173 
1174 #include "softfloat-parts-addsub.c.inc"
1175 #include "softfloat-parts.c.inc"
1176 
1177 #undef  N
1178 #undef  W
1179 #define N            256
1180 
1181 #include "softfloat-parts-addsub.c.inc"
1182 
1183 #undef  N
1184 #undef  W
1185 #undef  partsN
1186 #undef  FloatPartsN
1187 #undef  FloatPartsW
1188 
1189 /*
1190  * Pack/unpack routines with a specific FloatFmt.
1191  */
1192 
1193 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1194                                       float_status *s, const FloatFmt *params)
1195 {
1196     float16_unpack_raw(p, f);
1197     parts_canonicalize(p, s, params);
1198 }
1199 
1200 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1201                                      float_status *s)
1202 {
1203     float16a_unpack_canonical(p, f, s, &float16_params);
1204 }
1205 
1206 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1207                                       float_status *s)
1208 {
1209     bfloat16_unpack_raw(p, f);
1210     parts_canonicalize(p, s, &bfloat16_params);
1211 }
1212 
1213 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1214                                              float_status *s,
1215                                              const FloatFmt *params)
1216 {
1217     parts_uncanon(p, s, params);
1218     return float16_pack_raw(p);
1219 }
1220 
1221 static float16 float16_round_pack_canonical(FloatParts64 *p,
1222                                             float_status *s)
1223 {
1224     return float16a_round_pack_canonical(p, s, &float16_params);
1225 }
1226 
1227 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1228                                               float_status *s)
1229 {
1230     parts_uncanon(p, s, &bfloat16_params);
1231     return bfloat16_pack_raw(p);
1232 }
1233 
1234 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1235                                      float_status *s)
1236 {
1237     float32_unpack_raw(p, f);
1238     parts_canonicalize(p, s, &float32_params);
1239 }
1240 
1241 static float32 float32_round_pack_canonical(FloatParts64 *p,
1242                                             float_status *s)
1243 {
1244     parts_uncanon(p, s, &float32_params);
1245     return float32_pack_raw(p);
1246 }
1247 
1248 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1249                                      float_status *s)
1250 {
1251     float64_unpack_raw(p, f);
1252     parts_canonicalize(p, s, &float64_params);
1253 }
1254 
1255 static float64 float64_round_pack_canonical(FloatParts64 *p,
1256                                             float_status *s)
1257 {
1258     parts_uncanon(p, s, &float64_params);
1259     return float64_pack_raw(p);
1260 }
1261 
1262 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1263                                       float_status *s)
1264 {
1265     float128_unpack_raw(p, f);
1266     parts_canonicalize(p, s, &float128_params);
1267 }
1268 
1269 static float128 float128_round_pack_canonical(FloatParts128 *p,
1270                                               float_status *s)
1271 {
1272     parts_uncanon(p, s, &float128_params);
1273     return float128_pack_raw(p);
1274 }
1275 
1276 /*
1277  * Addition and subtraction
1278  */
1279 
1280 static float16 QEMU_FLATTEN
1281 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1282 {
1283     FloatParts64 pa, pb, *pr;
1284 
1285     float16_unpack_canonical(&pa, a, status);
1286     float16_unpack_canonical(&pb, b, status);
1287     pr = parts_addsub(&pa, &pb, status, subtract);
1288 
1289     return float16_round_pack_canonical(pr, status);
1290 }
1291 
1292 float16 float16_add(float16 a, float16 b, float_status *status)
1293 {
1294     return float16_addsub(a, b, status, false);
1295 }
1296 
1297 float16 float16_sub(float16 a, float16 b, float_status *status)
1298 {
1299     return float16_addsub(a, b, status, true);
1300 }
1301 
1302 static float32 QEMU_SOFTFLOAT_ATTR
1303 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1304 {
1305     FloatParts64 pa, pb, *pr;
1306 
1307     float32_unpack_canonical(&pa, a, status);
1308     float32_unpack_canonical(&pb, b, status);
1309     pr = parts_addsub(&pa, &pb, status, subtract);
1310 
1311     return float32_round_pack_canonical(pr, status);
1312 }
1313 
1314 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1315 {
1316     return soft_f32_addsub(a, b, status, false);
1317 }
1318 
1319 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1320 {
1321     return soft_f32_addsub(a, b, status, true);
1322 }
1323 
1324 static float64 QEMU_SOFTFLOAT_ATTR
1325 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1326 {
1327     FloatParts64 pa, pb, *pr;
1328 
1329     float64_unpack_canonical(&pa, a, status);
1330     float64_unpack_canonical(&pb, b, status);
1331     pr = parts_addsub(&pa, &pb, status, subtract);
1332 
1333     return float64_round_pack_canonical(pr, status);
1334 }
1335 
1336 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1337 {
1338     return soft_f64_addsub(a, b, status, false);
1339 }
1340 
1341 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1342 {
1343     return soft_f64_addsub(a, b, status, true);
1344 }
1345 
1346 static float hard_f32_add(float a, float b)
1347 {
1348     return a + b;
1349 }
1350 
1351 static float hard_f32_sub(float a, float b)
1352 {
1353     return a - b;
1354 }
1355 
1356 static double hard_f64_add(double a, double b)
1357 {
1358     return a + b;
1359 }
1360 
1361 static double hard_f64_sub(double a, double b)
1362 {
1363     return a - b;
1364 }
1365 
1366 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1367 {
1368     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1369         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1370     }
1371     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1372 }
1373 
1374 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1375 {
1376     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1377         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1378     } else {
1379         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1380     }
1381 }
1382 
1383 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1384                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1385 {
1386     return float32_gen2(a, b, s, hard, soft,
1387                         f32_is_zon2, f32_addsubmul_post);
1388 }
1389 
1390 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1391                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1392 {
1393     return float64_gen2(a, b, s, hard, soft,
1394                         f64_is_zon2, f64_addsubmul_post);
1395 }
1396 
1397 float32 QEMU_FLATTEN
1398 float32_add(float32 a, float32 b, float_status *s)
1399 {
1400     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1401 }
1402 
1403 float32 QEMU_FLATTEN
1404 float32_sub(float32 a, float32 b, float_status *s)
1405 {
1406     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1407 }
1408 
1409 float64 QEMU_FLATTEN
1410 float64_add(float64 a, float64 b, float_status *s)
1411 {
1412     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1413 }
1414 
1415 float64 QEMU_FLATTEN
1416 float64_sub(float64 a, float64 b, float_status *s)
1417 {
1418     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1419 }
1420 
1421 static bfloat16 QEMU_FLATTEN
1422 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1423 {
1424     FloatParts64 pa, pb, *pr;
1425 
1426     bfloat16_unpack_canonical(&pa, a, status);
1427     bfloat16_unpack_canonical(&pb, b, status);
1428     pr = parts_addsub(&pa, &pb, status, subtract);
1429 
1430     return bfloat16_round_pack_canonical(pr, status);
1431 }
1432 
1433 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1434 {
1435     return bfloat16_addsub(a, b, status, false);
1436 }
1437 
1438 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1439 {
1440     return bfloat16_addsub(a, b, status, true);
1441 }
1442 
1443 static float128 QEMU_FLATTEN
1444 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1445 {
1446     FloatParts128 pa, pb, *pr;
1447 
1448     float128_unpack_canonical(&pa, a, status);
1449     float128_unpack_canonical(&pb, b, status);
1450     pr = parts_addsub(&pa, &pb, status, subtract);
1451 
1452     return float128_round_pack_canonical(pr, status);
1453 }
1454 
1455 float128 float128_add(float128 a, float128 b, float_status *status)
1456 {
1457     return float128_addsub(a, b, status, false);
1458 }
1459 
1460 float128 float128_sub(float128 a, float128 b, float_status *status)
1461 {
1462     return float128_addsub(a, b, status, true);
1463 }
1464 
1465 /*
1466  * Multiplication
1467  */
1468 
1469 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1470 {
1471     FloatParts64 pa, pb, *pr;
1472 
1473     float16_unpack_canonical(&pa, a, status);
1474     float16_unpack_canonical(&pb, b, status);
1475     pr = parts_mul(&pa, &pb, status);
1476 
1477     return float16_round_pack_canonical(pr, status);
1478 }
1479 
1480 static float32 QEMU_SOFTFLOAT_ATTR
1481 soft_f32_mul(float32 a, float32 b, float_status *status)
1482 {
1483     FloatParts64 pa, pb, *pr;
1484 
1485     float32_unpack_canonical(&pa, a, status);
1486     float32_unpack_canonical(&pb, b, status);
1487     pr = parts_mul(&pa, &pb, status);
1488 
1489     return float32_round_pack_canonical(pr, status);
1490 }
1491 
1492 static float64 QEMU_SOFTFLOAT_ATTR
1493 soft_f64_mul(float64 a, float64 b, float_status *status)
1494 {
1495     FloatParts64 pa, pb, *pr;
1496 
1497     float64_unpack_canonical(&pa, a, status);
1498     float64_unpack_canonical(&pb, b, status);
1499     pr = parts_mul(&pa, &pb, status);
1500 
1501     return float64_round_pack_canonical(pr, status);
1502 }
1503 
1504 static float hard_f32_mul(float a, float b)
1505 {
1506     return a * b;
1507 }
1508 
1509 static double hard_f64_mul(double a, double b)
1510 {
1511     return a * b;
1512 }
1513 
1514 float32 QEMU_FLATTEN
1515 float32_mul(float32 a, float32 b, float_status *s)
1516 {
1517     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1518                         f32_is_zon2, f32_addsubmul_post);
1519 }
1520 
1521 float64 QEMU_FLATTEN
1522 float64_mul(float64 a, float64 b, float_status *s)
1523 {
1524     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1525                         f64_is_zon2, f64_addsubmul_post);
1526 }
1527 
1528 bfloat16 QEMU_FLATTEN
1529 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1530 {
1531     FloatParts64 pa, pb, *pr;
1532 
1533     bfloat16_unpack_canonical(&pa, a, status);
1534     bfloat16_unpack_canonical(&pb, b, status);
1535     pr = parts_mul(&pa, &pb, status);
1536 
1537     return bfloat16_round_pack_canonical(pr, status);
1538 }
1539 
1540 float128 QEMU_FLATTEN
1541 float128_mul(float128 a, float128 b, float_status *status)
1542 {
1543     FloatParts128 pa, pb, *pr;
1544 
1545     float128_unpack_canonical(&pa, a, status);
1546     float128_unpack_canonical(&pb, b, status);
1547     pr = parts_mul(&pa, &pb, status);
1548 
1549     return float128_round_pack_canonical(pr, status);
1550 }
1551 
1552 /*
1553  * Fused multiply-add
1554  */
1555 
1556 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1557                                     int flags, float_status *status)
1558 {
1559     FloatParts64 pa, pb, pc, *pr;
1560 
1561     float16_unpack_canonical(&pa, a, status);
1562     float16_unpack_canonical(&pb, b, status);
1563     float16_unpack_canonical(&pc, c, status);
1564     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1565 
1566     return float16_round_pack_canonical(pr, status);
1567 }
1568 
1569 static float32 QEMU_SOFTFLOAT_ATTR
1570 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1571                 float_status *status)
1572 {
1573     FloatParts64 pa, pb, pc, *pr;
1574 
1575     float32_unpack_canonical(&pa, a, status);
1576     float32_unpack_canonical(&pb, b, status);
1577     float32_unpack_canonical(&pc, c, status);
1578     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1579 
1580     return float32_round_pack_canonical(pr, status);
1581 }
1582 
1583 static float64 QEMU_SOFTFLOAT_ATTR
1584 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1585                 float_status *status)
1586 {
1587     FloatParts64 pa, pb, pc, *pr;
1588 
1589     float64_unpack_canonical(&pa, a, status);
1590     float64_unpack_canonical(&pb, b, status);
1591     float64_unpack_canonical(&pc, c, status);
1592     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1593 
1594     return float64_round_pack_canonical(pr, status);
1595 }
1596 
1597 static bool force_soft_fma;
1598 
1599 float32 QEMU_FLATTEN
1600 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1601 {
1602     union_float32 ua, ub, uc, ur;
1603 
1604     ua.s = xa;
1605     ub.s = xb;
1606     uc.s = xc;
1607 
1608     if (unlikely(!can_use_fpu(s))) {
1609         goto soft;
1610     }
1611     if (unlikely(flags & float_muladd_halve_result)) {
1612         goto soft;
1613     }
1614 
1615     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1616     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1617         goto soft;
1618     }
1619 
1620     if (unlikely(force_soft_fma)) {
1621         goto soft;
1622     }
1623 
1624     /*
1625      * When (a || b) == 0, there's no need to check for under/over flow,
1626      * since we know the addend is (normal || 0) and the product is 0.
1627      */
1628     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1629         union_float32 up;
1630         bool prod_sign;
1631 
1632         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1633         prod_sign ^= !!(flags & float_muladd_negate_product);
1634         up.s = float32_set_sign(float32_zero, prod_sign);
1635 
1636         if (flags & float_muladd_negate_c) {
1637             uc.h = -uc.h;
1638         }
1639         ur.h = up.h + uc.h;
1640     } else {
1641         union_float32 ua_orig = ua;
1642         union_float32 uc_orig = uc;
1643 
1644         if (flags & float_muladd_negate_product) {
1645             ua.h = -ua.h;
1646         }
1647         if (flags & float_muladd_negate_c) {
1648             uc.h = -uc.h;
1649         }
1650 
1651         ur.h = fmaf(ua.h, ub.h, uc.h);
1652 
1653         if (unlikely(f32_is_inf(ur))) {
1654             float_raise(float_flag_overflow, s);
1655         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1656             ua = ua_orig;
1657             uc = uc_orig;
1658             goto soft;
1659         }
1660     }
1661     if (flags & float_muladd_negate_result) {
1662         return float32_chs(ur.s);
1663     }
1664     return ur.s;
1665 
1666  soft:
1667     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1668 }
1669 
1670 float64 QEMU_FLATTEN
1671 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1672 {
1673     union_float64 ua, ub, uc, ur;
1674 
1675     ua.s = xa;
1676     ub.s = xb;
1677     uc.s = xc;
1678 
1679     if (unlikely(!can_use_fpu(s))) {
1680         goto soft;
1681     }
1682     if (unlikely(flags & float_muladd_halve_result)) {
1683         goto soft;
1684     }
1685 
1686     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1687     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1688         goto soft;
1689     }
1690 
1691     if (unlikely(force_soft_fma)) {
1692         goto soft;
1693     }
1694 
1695     /*
1696      * When (a || b) == 0, there's no need to check for under/over flow,
1697      * since we know the addend is (normal || 0) and the product is 0.
1698      */
1699     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1700         union_float64 up;
1701         bool prod_sign;
1702 
1703         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1704         prod_sign ^= !!(flags & float_muladd_negate_product);
1705         up.s = float64_set_sign(float64_zero, prod_sign);
1706 
1707         if (flags & float_muladd_negate_c) {
1708             uc.h = -uc.h;
1709         }
1710         ur.h = up.h + uc.h;
1711     } else {
1712         union_float64 ua_orig = ua;
1713         union_float64 uc_orig = uc;
1714 
1715         if (flags & float_muladd_negate_product) {
1716             ua.h = -ua.h;
1717         }
1718         if (flags & float_muladd_negate_c) {
1719             uc.h = -uc.h;
1720         }
1721 
1722         ur.h = fma(ua.h, ub.h, uc.h);
1723 
1724         if (unlikely(f64_is_inf(ur))) {
1725             float_raise(float_flag_overflow, s);
1726         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1727             ua = ua_orig;
1728             uc = uc_orig;
1729             goto soft;
1730         }
1731     }
1732     if (flags & float_muladd_negate_result) {
1733         return float64_chs(ur.s);
1734     }
1735     return ur.s;
1736 
1737  soft:
1738     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1739 }
1740 
1741 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1742                                       int flags, float_status *status)
1743 {
1744     FloatParts64 pa, pb, pc, *pr;
1745 
1746     bfloat16_unpack_canonical(&pa, a, status);
1747     bfloat16_unpack_canonical(&pb, b, status);
1748     bfloat16_unpack_canonical(&pc, c, status);
1749     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1750 
1751     return bfloat16_round_pack_canonical(pr, status);
1752 }
1753 
1754 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1755                                       int flags, float_status *status)
1756 {
1757     FloatParts128 pa, pb, pc, *pr;
1758 
1759     float128_unpack_canonical(&pa, a, status);
1760     float128_unpack_canonical(&pb, b, status);
1761     float128_unpack_canonical(&pc, c, status);
1762     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1763 
1764     return float128_round_pack_canonical(pr, status);
1765 }
1766 
1767 /*
1768  * Returns the result of dividing the floating-point value `a' by the
1769  * corresponding value `b'. The operation is performed according to
1770  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1771  */
1772 
1773 static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
1774 {
1775     bool sign = a.sign ^ b.sign;
1776 
1777     if (a.cls == float_class_normal && b.cls == float_class_normal) {
1778         uint64_t n0, n1, q, r;
1779         int exp = a.exp - b.exp;
1780 
1781         /*
1782          * We want a 2*N / N-bit division to produce exactly an N-bit
1783          * result, so that we do not lose any precision and so that we
1784          * do not have to renormalize afterward.  If A.frac < B.frac,
1785          * then division would produce an (N-1)-bit result; shift A left
1786          * by one to produce the an N-bit result, and decrement the
1787          * exponent to match.
1788          *
1789          * The udiv_qrnnd algorithm that we're using requires normalization,
1790          * i.e. the msb of the denominator must be set, which is already true.
1791          */
1792         if (a.frac < b.frac) {
1793             exp -= 1;
1794             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1795         } else {
1796             shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
1797         }
1798         q = udiv_qrnnd(&r, n1, n0, b.frac);
1799 
1800         /* Set lsb if there is a remainder, to set inexact. */
1801         a.frac = q | (r != 0);
1802         a.sign = sign;
1803         a.exp = exp;
1804         return a;
1805     }
1806     /* handle all the NaN cases */
1807     if (is_nan(a.cls) || is_nan(b.cls)) {
1808         return *parts_pick_nan(&a, &b, s);
1809     }
1810     /* 0/0 or Inf/Inf */
1811     if (a.cls == b.cls
1812         &&
1813         (a.cls == float_class_inf || a.cls == float_class_zero)) {
1814         float_raise(float_flag_invalid, s);
1815         parts_default_nan(&a, s);
1816         return a;
1817     }
1818     /* Inf / x or 0 / x */
1819     if (a.cls == float_class_inf || a.cls == float_class_zero) {
1820         a.sign = sign;
1821         return a;
1822     }
1823     /* Div 0 => Inf */
1824     if (b.cls == float_class_zero) {
1825         float_raise(float_flag_divbyzero, s);
1826         a.cls = float_class_inf;
1827         a.sign = sign;
1828         return a;
1829     }
1830     /* Div by Inf */
1831     if (b.cls == float_class_inf) {
1832         a.cls = float_class_zero;
1833         a.sign = sign;
1834         return a;
1835     }
1836     g_assert_not_reached();
1837 }
1838 
1839 float16 float16_div(float16 a, float16 b, float_status *status)
1840 {
1841     FloatParts64 pa, pb, pr;
1842 
1843     float16_unpack_canonical(&pa, a, status);
1844     float16_unpack_canonical(&pb, b, status);
1845     pr = div_floats(pa, pb, status);
1846 
1847     return float16_round_pack_canonical(&pr, status);
1848 }
1849 
1850 static float32 QEMU_SOFTFLOAT_ATTR
1851 soft_f32_div(float32 a, float32 b, float_status *status)
1852 {
1853     FloatParts64 pa, pb, pr;
1854 
1855     float32_unpack_canonical(&pa, a, status);
1856     float32_unpack_canonical(&pb, b, status);
1857     pr = div_floats(pa, pb, status);
1858 
1859     return float32_round_pack_canonical(&pr, status);
1860 }
1861 
1862 static float64 QEMU_SOFTFLOAT_ATTR
1863 soft_f64_div(float64 a, float64 b, float_status *status)
1864 {
1865     FloatParts64 pa, pb, pr;
1866 
1867     float64_unpack_canonical(&pa, a, status);
1868     float64_unpack_canonical(&pb, b, status);
1869     pr = div_floats(pa, pb, status);
1870 
1871     return float64_round_pack_canonical(&pr, status);
1872 }
1873 
1874 static float hard_f32_div(float a, float b)
1875 {
1876     return a / b;
1877 }
1878 
1879 static double hard_f64_div(double a, double b)
1880 {
1881     return a / b;
1882 }
1883 
1884 static bool f32_div_pre(union_float32 a, union_float32 b)
1885 {
1886     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1887         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1888                fpclassify(b.h) == FP_NORMAL;
1889     }
1890     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1891 }
1892 
1893 static bool f64_div_pre(union_float64 a, union_float64 b)
1894 {
1895     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1896         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1897                fpclassify(b.h) == FP_NORMAL;
1898     }
1899     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1900 }
1901 
1902 static bool f32_div_post(union_float32 a, union_float32 b)
1903 {
1904     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1905         return fpclassify(a.h) != FP_ZERO;
1906     }
1907     return !float32_is_zero(a.s);
1908 }
1909 
1910 static bool f64_div_post(union_float64 a, union_float64 b)
1911 {
1912     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1913         return fpclassify(a.h) != FP_ZERO;
1914     }
1915     return !float64_is_zero(a.s);
1916 }
1917 
1918 float32 QEMU_FLATTEN
1919 float32_div(float32 a, float32 b, float_status *s)
1920 {
1921     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1922                         f32_div_pre, f32_div_post);
1923 }
1924 
1925 float64 QEMU_FLATTEN
1926 float64_div(float64 a, float64 b, float_status *s)
1927 {
1928     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1929                         f64_div_pre, f64_div_post);
1930 }
1931 
1932 /*
1933  * Returns the result of dividing the bfloat16
1934  * value `a' by the corresponding value `b'.
1935  */
1936 
1937 bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1938 {
1939     FloatParts64 pa, pb, pr;
1940 
1941     bfloat16_unpack_canonical(&pa, a, status);
1942     bfloat16_unpack_canonical(&pb, b, status);
1943     pr = div_floats(pa, pb, status);
1944 
1945     return bfloat16_round_pack_canonical(&pr, status);
1946 }
1947 
1948 /*
1949  * Float to Float conversions
1950  *
1951  * Returns the result of converting one float format to another. The
1952  * conversion is performed according to the IEC/IEEE Standard for
1953  * Binary Floating-Point Arithmetic.
1954  *
1955  * The float_to_float helper only needs to take care of raising
1956  * invalid exceptions and handling the conversion on NaNs.
1957  */
1958 
1959 static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
1960                                  float_status *s)
1961 {
1962     if (dstf->arm_althp) {
1963         switch (a.cls) {
1964         case float_class_qnan:
1965         case float_class_snan:
1966             /* There is no NaN in the destination format.  Raise Invalid
1967              * and return a zero with the sign of the input NaN.
1968              */
1969             float_raise(float_flag_invalid, s);
1970             a.cls = float_class_zero;
1971             a.frac = 0;
1972             a.exp = 0;
1973             break;
1974 
1975         case float_class_inf:
1976             /* There is no Inf in the destination format.  Raise Invalid
1977              * and return the maximum normal with the correct sign.
1978              */
1979             float_raise(float_flag_invalid, s);
1980             a.cls = float_class_normal;
1981             a.exp = dstf->exp_max;
1982             a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1983             break;
1984 
1985         default:
1986             break;
1987         }
1988     } else if (is_nan(a.cls)) {
1989         parts_return_nan(&a, s);
1990     }
1991     return a;
1992 }
1993 
1994 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1995 {
1996     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1997     FloatParts64 pa, pr;
1998 
1999     float16a_unpack_canonical(&pa, a, s, fmt16);
2000     pr = float_to_float(pa, &float32_params, s);
2001     return float32_round_pack_canonical(&pr, s);
2002 }
2003 
2004 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2005 {
2006     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2007     FloatParts64 pa, pr;
2008 
2009     float16a_unpack_canonical(&pa, a, s, fmt16);
2010     pr = float_to_float(pa, &float64_params, s);
2011     return float64_round_pack_canonical(&pr, s);
2012 }
2013 
2014 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2015 {
2016     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2017     FloatParts64 pa, pr;
2018 
2019     float32_unpack_canonical(&pa, a, s);
2020     pr = float_to_float(pa, fmt16, s);
2021     return float16a_round_pack_canonical(&pr, s, fmt16);
2022 }
2023 
2024 static float64 QEMU_SOFTFLOAT_ATTR
2025 soft_float32_to_float64(float32 a, float_status *s)
2026 {
2027     FloatParts64 pa, pr;
2028 
2029     float32_unpack_canonical(&pa, a, s);
2030     pr = float_to_float(pa, &float64_params, s);
2031     return float64_round_pack_canonical(&pr, s);
2032 }
2033 
2034 float64 float32_to_float64(float32 a, float_status *s)
2035 {
2036     if (likely(float32_is_normal(a))) {
2037         /* Widening conversion can never produce inexact results.  */
2038         union_float32 uf;
2039         union_float64 ud;
2040         uf.s = a;
2041         ud.h = uf.h;
2042         return ud.s;
2043     } else if (float32_is_zero(a)) {
2044         return float64_set_sign(float64_zero, float32_is_neg(a));
2045     } else {
2046         return soft_float32_to_float64(a, s);
2047     }
2048 }
2049 
2050 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2051 {
2052     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2053     FloatParts64 pa, pr;
2054 
2055     float64_unpack_canonical(&pa, a, s);
2056     pr = float_to_float(pa, fmt16, s);
2057     return float16a_round_pack_canonical(&pr, s, fmt16);
2058 }
2059 
2060 float32 float64_to_float32(float64 a, float_status *s)
2061 {
2062     FloatParts64 pa, pr;
2063 
2064     float64_unpack_canonical(&pa, a, s);
2065     pr = float_to_float(pa, &float32_params, s);
2066     return float32_round_pack_canonical(&pr, s);
2067 }
2068 
2069 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2070 {
2071     FloatParts64 pa, pr;
2072 
2073     bfloat16_unpack_canonical(&pa, a, s);
2074     pr = float_to_float(pa, &float32_params, s);
2075     return float32_round_pack_canonical(&pr, s);
2076 }
2077 
2078 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2079 {
2080     FloatParts64 pa, pr;
2081 
2082     bfloat16_unpack_canonical(&pa, a, s);
2083     pr = float_to_float(pa, &float64_params, s);
2084     return float64_round_pack_canonical(&pr, s);
2085 }
2086 
2087 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2088 {
2089     FloatParts64 pa, pr;
2090 
2091     float32_unpack_canonical(&pa, a, s);
2092     pr = float_to_float(pa, &bfloat16_params, s);
2093     return bfloat16_round_pack_canonical(&pr, s);
2094 }
2095 
2096 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2097 {
2098     FloatParts64 pa, pr;
2099 
2100     float64_unpack_canonical(&pa, a, s);
2101     pr = float_to_float(pa, &bfloat16_params, s);
2102     return bfloat16_round_pack_canonical(&pr, s);
2103 }
2104 
2105 /*
2106  * Rounds the floating-point value `a' to an integer, and returns the
2107  * result as a floating-point value. The operation is performed
2108  * according to the IEC/IEEE Standard for Binary Floating-Point
2109  * Arithmetic.
2110  */
2111 
2112 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2113                                int scale, float_status *s)
2114 {
2115     switch (a.cls) {
2116     case float_class_qnan:
2117     case float_class_snan:
2118         parts_return_nan(&a, s);
2119         break;
2120 
2121     case float_class_zero:
2122     case float_class_inf:
2123         /* already "integral" */
2124         break;
2125 
2126     case float_class_normal:
2127         scale = MIN(MAX(scale, -0x10000), 0x10000);
2128         a.exp += scale;
2129 
2130         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2131             /* already integral */
2132             break;
2133         }
2134         if (a.exp < 0) {
2135             bool one;
2136             /* all fractional */
2137             float_raise(float_flag_inexact, s);
2138             switch (rmode) {
2139             case float_round_nearest_even:
2140                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2141                 break;
2142             case float_round_ties_away:
2143                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2144                 break;
2145             case float_round_to_zero:
2146                 one = false;
2147                 break;
2148             case float_round_up:
2149                 one = !a.sign;
2150                 break;
2151             case float_round_down:
2152                 one = a.sign;
2153                 break;
2154             case float_round_to_odd:
2155                 one = true;
2156                 break;
2157             default:
2158                 g_assert_not_reached();
2159             }
2160 
2161             if (one) {
2162                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2163                 a.exp = 0;
2164             } else {
2165                 a.cls = float_class_zero;
2166             }
2167         } else {
2168             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2169             uint64_t frac_lsbm1 = frac_lsb >> 1;
2170             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2171             uint64_t rnd_mask = rnd_even_mask >> 1;
2172             uint64_t inc;
2173 
2174             switch (rmode) {
2175             case float_round_nearest_even:
2176                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2177                 break;
2178             case float_round_ties_away:
2179                 inc = frac_lsbm1;
2180                 break;
2181             case float_round_to_zero:
2182                 inc = 0;
2183                 break;
2184             case float_round_up:
2185                 inc = a.sign ? 0 : rnd_mask;
2186                 break;
2187             case float_round_down:
2188                 inc = a.sign ? rnd_mask : 0;
2189                 break;
2190             case float_round_to_odd:
2191                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2192                 break;
2193             default:
2194                 g_assert_not_reached();
2195             }
2196 
2197             if (a.frac & rnd_mask) {
2198                 float_raise(float_flag_inexact, s);
2199                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2200                     a.frac >>= 1;
2201                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2202                     a.exp++;
2203                 }
2204                 a.frac &= ~rnd_mask;
2205             }
2206         }
2207         break;
2208     default:
2209         g_assert_not_reached();
2210     }
2211     return a;
2212 }
2213 
2214 float16 float16_round_to_int(float16 a, float_status *s)
2215 {
2216     FloatParts64 pa, pr;
2217 
2218     float16_unpack_canonical(&pa, a, s);
2219     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2220     return float16_round_pack_canonical(&pr, s);
2221 }
2222 
2223 float32 float32_round_to_int(float32 a, float_status *s)
2224 {
2225     FloatParts64 pa, pr;
2226 
2227     float32_unpack_canonical(&pa, a, s);
2228     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2229     return float32_round_pack_canonical(&pr, s);
2230 }
2231 
2232 float64 float64_round_to_int(float64 a, float_status *s)
2233 {
2234     FloatParts64 pa, pr;
2235 
2236     float64_unpack_canonical(&pa, a, s);
2237     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2238     return float64_round_pack_canonical(&pr, s);
2239 }
2240 
2241 /*
2242  * Rounds the bfloat16 value `a' to an integer, and returns the
2243  * result as a bfloat16 value.
2244  */
2245 
2246 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2247 {
2248     FloatParts64 pa, pr;
2249 
2250     bfloat16_unpack_canonical(&pa, a, s);
2251     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2252     return bfloat16_round_pack_canonical(&pr, s);
2253 }
2254 
2255 /*
2256  * Returns the result of converting the floating-point value `a' to
2257  * the two's complement integer format. The conversion is performed
2258  * according to the IEC/IEEE Standard for Binary Floating-Point
2259  * Arithmetic---which means in particular that the conversion is
2260  * rounded according to the current rounding mode. If `a' is a NaN,
2261  * the largest positive integer is returned. Otherwise, if the
2262  * conversion overflows, the largest integer with the same sign as `a'
2263  * is returned.
2264 */
2265 
2266 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2267                                      int scale, int64_t min, int64_t max,
2268                                      float_status *s)
2269 {
2270     uint64_t r;
2271     int orig_flags = get_float_exception_flags(s);
2272     FloatParts64 p = round_to_int(in, rmode, scale, s);
2273 
2274     switch (p.cls) {
2275     case float_class_snan:
2276     case float_class_qnan:
2277         s->float_exception_flags = orig_flags | float_flag_invalid;
2278         return max;
2279     case float_class_inf:
2280         s->float_exception_flags = orig_flags | float_flag_invalid;
2281         return p.sign ? min : max;
2282     case float_class_zero:
2283         return 0;
2284     case float_class_normal:
2285         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2286             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2287         } else {
2288             r = UINT64_MAX;
2289         }
2290         if (p.sign) {
2291             if (r <= -(uint64_t) min) {
2292                 return -r;
2293             } else {
2294                 s->float_exception_flags = orig_flags | float_flag_invalid;
2295                 return min;
2296             }
2297         } else {
2298             if (r <= max) {
2299                 return r;
2300             } else {
2301                 s->float_exception_flags = orig_flags | float_flag_invalid;
2302                 return max;
2303             }
2304         }
2305     default:
2306         g_assert_not_reached();
2307     }
2308 }
2309 
2310 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2311                               float_status *s)
2312 {
2313     FloatParts64 p;
2314 
2315     float16_unpack_canonical(&p, a, s);
2316     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2317 }
2318 
2319 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2320                                 float_status *s)
2321 {
2322     FloatParts64 p;
2323 
2324     float16_unpack_canonical(&p, a, s);
2325     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2326 }
2327 
2328 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2329                                 float_status *s)
2330 {
2331     FloatParts64 p;
2332 
2333     float16_unpack_canonical(&p, a, s);
2334     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2335 }
2336 
2337 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2338                                 float_status *s)
2339 {
2340     FloatParts64 p;
2341 
2342     float16_unpack_canonical(&p, a, s);
2343     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2344 }
2345 
2346 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2347                                 float_status *s)
2348 {
2349     FloatParts64 p;
2350 
2351     float32_unpack_canonical(&p, a, s);
2352     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2353 }
2354 
2355 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2356                                 float_status *s)
2357 {
2358     FloatParts64 p;
2359 
2360     float32_unpack_canonical(&p, a, s);
2361     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2362 }
2363 
2364 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2365                                 float_status *s)
2366 {
2367     FloatParts64 p;
2368 
2369     float32_unpack_canonical(&p, a, s);
2370     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2371 }
2372 
2373 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2374                                 float_status *s)
2375 {
2376     FloatParts64 p;
2377 
2378     float64_unpack_canonical(&p, a, s);
2379     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2380 }
2381 
2382 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2383                                 float_status *s)
2384 {
2385     FloatParts64 p;
2386 
2387     float64_unpack_canonical(&p, a, s);
2388     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2389 }
2390 
2391 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2392                                 float_status *s)
2393 {
2394     FloatParts64 p;
2395 
2396     float64_unpack_canonical(&p, a, s);
2397     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2398 }
2399 
2400 int8_t float16_to_int8(float16 a, float_status *s)
2401 {
2402     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2403 }
2404 
2405 int16_t float16_to_int16(float16 a, float_status *s)
2406 {
2407     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2408 }
2409 
2410 int32_t float16_to_int32(float16 a, float_status *s)
2411 {
2412     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2413 }
2414 
2415 int64_t float16_to_int64(float16 a, float_status *s)
2416 {
2417     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2418 }
2419 
2420 int16_t float32_to_int16(float32 a, float_status *s)
2421 {
2422     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2423 }
2424 
2425 int32_t float32_to_int32(float32 a, float_status *s)
2426 {
2427     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2428 }
2429 
2430 int64_t float32_to_int64(float32 a, float_status *s)
2431 {
2432     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2433 }
2434 
2435 int16_t float64_to_int16(float64 a, float_status *s)
2436 {
2437     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2438 }
2439 
2440 int32_t float64_to_int32(float64 a, float_status *s)
2441 {
2442     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2443 }
2444 
2445 int64_t float64_to_int64(float64 a, float_status *s)
2446 {
2447     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2448 }
2449 
2450 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2451 {
2452     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2453 }
2454 
2455 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2456 {
2457     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2458 }
2459 
2460 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2461 {
2462     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2463 }
2464 
2465 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2466 {
2467     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2468 }
2469 
2470 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2471 {
2472     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2473 }
2474 
2475 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2476 {
2477     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2478 }
2479 
2480 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2481 {
2482     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2483 }
2484 
2485 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2486 {
2487     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2488 }
2489 
2490 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2491 {
2492     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2493 }
2494 
2495 /*
2496  * Returns the result of converting the floating-point value `a' to
2497  * the two's complement integer format.
2498  */
2499 
2500 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2501                                  float_status *s)
2502 {
2503     FloatParts64 p;
2504 
2505     bfloat16_unpack_canonical(&p, a, s);
2506     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2507 }
2508 
2509 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2510                                  float_status *s)
2511 {
2512     FloatParts64 p;
2513 
2514     bfloat16_unpack_canonical(&p, a, s);
2515     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2516 }
2517 
2518 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2519                                  float_status *s)
2520 {
2521     FloatParts64 p;
2522 
2523     bfloat16_unpack_canonical(&p, a, s);
2524     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2525 }
2526 
2527 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2528 {
2529     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2530 }
2531 
2532 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2533 {
2534     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2535 }
2536 
2537 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2538 {
2539     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2540 }
2541 
2542 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2543 {
2544     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2545 }
2546 
2547 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2548 {
2549     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2550 }
2551 
2552 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2553 {
2554     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2555 }
2556 
2557 /*
2558  *  Returns the result of converting the floating-point value `a' to
2559  *  the unsigned integer format. The conversion is performed according
2560  *  to the IEC/IEEE Standard for Binary Floating-Point
2561  *  Arithmetic---which means in particular that the conversion is
2562  *  rounded according to the current rounding mode. If `a' is a NaN,
2563  *  the largest unsigned integer is returned. Otherwise, if the
2564  *  conversion overflows, the largest unsigned integer is returned. If
2565  *  the 'a' is negative, the result is rounded and zero is returned;
2566  *  values that do not round to zero will raise the inexact exception
2567  *  flag.
2568  */
2569 
2570 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2571                                        int scale, uint64_t max,
2572                                        float_status *s)
2573 {
2574     int orig_flags = get_float_exception_flags(s);
2575     FloatParts64 p = round_to_int(in, rmode, scale, s);
2576     uint64_t r;
2577 
2578     switch (p.cls) {
2579     case float_class_snan:
2580     case float_class_qnan:
2581         s->float_exception_flags = orig_flags | float_flag_invalid;
2582         return max;
2583     case float_class_inf:
2584         s->float_exception_flags = orig_flags | float_flag_invalid;
2585         return p.sign ? 0 : max;
2586     case float_class_zero:
2587         return 0;
2588     case float_class_normal:
2589         if (p.sign) {
2590             s->float_exception_flags = orig_flags | float_flag_invalid;
2591             return 0;
2592         }
2593 
2594         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2595             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2596         } else {
2597             s->float_exception_flags = orig_flags | float_flag_invalid;
2598             return max;
2599         }
2600 
2601         /* For uint64 this will never trip, but if p.exp is too large
2602          * to shift a decomposed fraction we shall have exited via the
2603          * 3rd leg above.
2604          */
2605         if (r > max) {
2606             s->float_exception_flags = orig_flags | float_flag_invalid;
2607             return max;
2608         }
2609         return r;
2610     default:
2611         g_assert_not_reached();
2612     }
2613 }
2614 
2615 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2616                                 float_status *s)
2617 {
2618     FloatParts64 p;
2619 
2620     float16_unpack_canonical(&p, a, s);
2621     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2622 }
2623 
2624 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2625                                   float_status *s)
2626 {
2627     FloatParts64 p;
2628 
2629     float16_unpack_canonical(&p, a, s);
2630     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2631 }
2632 
2633 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2634                                   float_status *s)
2635 {
2636     FloatParts64 p;
2637 
2638     float16_unpack_canonical(&p, a, s);
2639     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2640 }
2641 
2642 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2643                                   float_status *s)
2644 {
2645     FloatParts64 p;
2646 
2647     float16_unpack_canonical(&p, a, s);
2648     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2649 }
2650 
2651 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2652                                   float_status *s)
2653 {
2654     FloatParts64 p;
2655 
2656     float32_unpack_canonical(&p, a, s);
2657     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2658 }
2659 
2660 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2661                                   float_status *s)
2662 {
2663     FloatParts64 p;
2664 
2665     float32_unpack_canonical(&p, a, s);
2666     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2667 }
2668 
2669 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2670                                   float_status *s)
2671 {
2672     FloatParts64 p;
2673 
2674     float32_unpack_canonical(&p, a, s);
2675     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2676 }
2677 
2678 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2679                                   float_status *s)
2680 {
2681     FloatParts64 p;
2682 
2683     float64_unpack_canonical(&p, a, s);
2684     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2685 }
2686 
2687 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2688                                   float_status *s)
2689 {
2690     FloatParts64 p;
2691 
2692     float64_unpack_canonical(&p, a, s);
2693     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2694 }
2695 
2696 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2697                                   float_status *s)
2698 {
2699     FloatParts64 p;
2700 
2701     float64_unpack_canonical(&p, a, s);
2702     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2703 }
2704 
2705 uint8_t float16_to_uint8(float16 a, float_status *s)
2706 {
2707     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2708 }
2709 
2710 uint16_t float16_to_uint16(float16 a, float_status *s)
2711 {
2712     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2713 }
2714 
2715 uint32_t float16_to_uint32(float16 a, float_status *s)
2716 {
2717     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2718 }
2719 
2720 uint64_t float16_to_uint64(float16 a, float_status *s)
2721 {
2722     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2723 }
2724 
2725 uint16_t float32_to_uint16(float32 a, float_status *s)
2726 {
2727     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2728 }
2729 
2730 uint32_t float32_to_uint32(float32 a, float_status *s)
2731 {
2732     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2733 }
2734 
2735 uint64_t float32_to_uint64(float32 a, float_status *s)
2736 {
2737     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2738 }
2739 
2740 uint16_t float64_to_uint16(float64 a, float_status *s)
2741 {
2742     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2743 }
2744 
2745 uint32_t float64_to_uint32(float64 a, float_status *s)
2746 {
2747     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2748 }
2749 
2750 uint64_t float64_to_uint64(float64 a, float_status *s)
2751 {
2752     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2753 }
2754 
2755 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2756 {
2757     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2758 }
2759 
2760 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2761 {
2762     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2763 }
2764 
2765 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2766 {
2767     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2768 }
2769 
2770 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2771 {
2772     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2773 }
2774 
2775 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2776 {
2777     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2778 }
2779 
2780 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2781 {
2782     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2783 }
2784 
2785 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2786 {
2787     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2788 }
2789 
2790 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2791 {
2792     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2793 }
2794 
2795 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2796 {
2797     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2798 }
2799 
2800 /*
2801  *  Returns the result of converting the bfloat16 value `a' to
2802  *  the unsigned integer format.
2803  */
2804 
2805 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2806                                    int scale, float_status *s)
2807 {
2808     FloatParts64 p;
2809 
2810     bfloat16_unpack_canonical(&p, a, s);
2811     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2812 }
2813 
2814 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2815                                    int scale, float_status *s)
2816 {
2817     FloatParts64 p;
2818 
2819     bfloat16_unpack_canonical(&p, a, s);
2820     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2821 }
2822 
2823 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2824                                    int scale, float_status *s)
2825 {
2826     FloatParts64 p;
2827 
2828     bfloat16_unpack_canonical(&p, a, s);
2829     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2830 }
2831 
2832 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2833 {
2834     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2835 }
2836 
2837 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2838 {
2839     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2840 }
2841 
2842 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2843 {
2844     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2845 }
2846 
2847 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2848 {
2849     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2850 }
2851 
2852 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2853 {
2854     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2855 }
2856 
2857 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2858 {
2859     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2860 }
2861 
2862 /*
2863  * Integer to float conversions
2864  *
2865  * Returns the result of converting the two's complement integer `a'
2866  * to the floating-point format. The conversion is performed according
2867  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2868  */
2869 
2870 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2871 {
2872     FloatParts64 r = { .sign = false };
2873 
2874     if (a == 0) {
2875         r.cls = float_class_zero;
2876     } else {
2877         uint64_t f = a;
2878         int shift;
2879 
2880         r.cls = float_class_normal;
2881         if (a < 0) {
2882             f = -f;
2883             r.sign = true;
2884         }
2885         shift = clz64(f);
2886         scale = MIN(MAX(scale, -0x10000), 0x10000);
2887 
2888         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2889         r.frac = f << shift;
2890     }
2891 
2892     return r;
2893 }
2894 
2895 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2896 {
2897     FloatParts64 pa = int_to_float(a, scale, status);
2898     return float16_round_pack_canonical(&pa, status);
2899 }
2900 
2901 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2902 {
2903     return int64_to_float16_scalbn(a, scale, status);
2904 }
2905 
2906 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2907 {
2908     return int64_to_float16_scalbn(a, scale, status);
2909 }
2910 
2911 float16 int64_to_float16(int64_t a, float_status *status)
2912 {
2913     return int64_to_float16_scalbn(a, 0, status);
2914 }
2915 
2916 float16 int32_to_float16(int32_t a, float_status *status)
2917 {
2918     return int64_to_float16_scalbn(a, 0, status);
2919 }
2920 
2921 float16 int16_to_float16(int16_t a, float_status *status)
2922 {
2923     return int64_to_float16_scalbn(a, 0, status);
2924 }
2925 
2926 float16 int8_to_float16(int8_t a, float_status *status)
2927 {
2928     return int64_to_float16_scalbn(a, 0, status);
2929 }
2930 
2931 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2932 {
2933     FloatParts64 pa = int_to_float(a, scale, status);
2934     return float32_round_pack_canonical(&pa, status);
2935 }
2936 
2937 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2938 {
2939     return int64_to_float32_scalbn(a, scale, status);
2940 }
2941 
2942 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2943 {
2944     return int64_to_float32_scalbn(a, scale, status);
2945 }
2946 
2947 float32 int64_to_float32(int64_t a, float_status *status)
2948 {
2949     return int64_to_float32_scalbn(a, 0, status);
2950 }
2951 
2952 float32 int32_to_float32(int32_t a, float_status *status)
2953 {
2954     return int64_to_float32_scalbn(a, 0, status);
2955 }
2956 
2957 float32 int16_to_float32(int16_t a, float_status *status)
2958 {
2959     return int64_to_float32_scalbn(a, 0, status);
2960 }
2961 
2962 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2963 {
2964     FloatParts64 pa = int_to_float(a, scale, status);
2965     return float64_round_pack_canonical(&pa, status);
2966 }
2967 
2968 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2969 {
2970     return int64_to_float64_scalbn(a, scale, status);
2971 }
2972 
2973 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2974 {
2975     return int64_to_float64_scalbn(a, scale, status);
2976 }
2977 
2978 float64 int64_to_float64(int64_t a, float_status *status)
2979 {
2980     return int64_to_float64_scalbn(a, 0, status);
2981 }
2982 
2983 float64 int32_to_float64(int32_t a, float_status *status)
2984 {
2985     return int64_to_float64_scalbn(a, 0, status);
2986 }
2987 
2988 float64 int16_to_float64(int16_t a, float_status *status)
2989 {
2990     return int64_to_float64_scalbn(a, 0, status);
2991 }
2992 
2993 /*
2994  * Returns the result of converting the two's complement integer `a'
2995  * to the bfloat16 format.
2996  */
2997 
2998 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2999 {
3000     FloatParts64 pa = int_to_float(a, scale, status);
3001     return bfloat16_round_pack_canonical(&pa, status);
3002 }
3003 
3004 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3005 {
3006     return int64_to_bfloat16_scalbn(a, scale, status);
3007 }
3008 
3009 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3010 {
3011     return int64_to_bfloat16_scalbn(a, scale, status);
3012 }
3013 
3014 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3015 {
3016     return int64_to_bfloat16_scalbn(a, 0, status);
3017 }
3018 
3019 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3020 {
3021     return int64_to_bfloat16_scalbn(a, 0, status);
3022 }
3023 
3024 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3025 {
3026     return int64_to_bfloat16_scalbn(a, 0, status);
3027 }
3028 
3029 /*
3030  * Unsigned Integer to float conversions
3031  *
3032  * Returns the result of converting the unsigned integer `a' to the
3033  * floating-point format. The conversion is performed according to the
3034  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3035  */
3036 
3037 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3038 {
3039     FloatParts64 r = { .sign = false };
3040     int shift;
3041 
3042     if (a == 0) {
3043         r.cls = float_class_zero;
3044     } else {
3045         scale = MIN(MAX(scale, -0x10000), 0x10000);
3046         shift = clz64(a);
3047         r.cls = float_class_normal;
3048         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3049         r.frac = a << shift;
3050     }
3051 
3052     return r;
3053 }
3054 
3055 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3056 {
3057     FloatParts64 pa = uint_to_float(a, scale, status);
3058     return float16_round_pack_canonical(&pa, status);
3059 }
3060 
3061 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3062 {
3063     return uint64_to_float16_scalbn(a, scale, status);
3064 }
3065 
3066 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3067 {
3068     return uint64_to_float16_scalbn(a, scale, status);
3069 }
3070 
3071 float16 uint64_to_float16(uint64_t a, float_status *status)
3072 {
3073     return uint64_to_float16_scalbn(a, 0, status);
3074 }
3075 
3076 float16 uint32_to_float16(uint32_t a, float_status *status)
3077 {
3078     return uint64_to_float16_scalbn(a, 0, status);
3079 }
3080 
3081 float16 uint16_to_float16(uint16_t a, float_status *status)
3082 {
3083     return uint64_to_float16_scalbn(a, 0, status);
3084 }
3085 
3086 float16 uint8_to_float16(uint8_t a, float_status *status)
3087 {
3088     return uint64_to_float16_scalbn(a, 0, status);
3089 }
3090 
3091 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3092 {
3093     FloatParts64 pa = uint_to_float(a, scale, status);
3094     return float32_round_pack_canonical(&pa, status);
3095 }
3096 
3097 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3098 {
3099     return uint64_to_float32_scalbn(a, scale, status);
3100 }
3101 
3102 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3103 {
3104     return uint64_to_float32_scalbn(a, scale, status);
3105 }
3106 
3107 float32 uint64_to_float32(uint64_t a, float_status *status)
3108 {
3109     return uint64_to_float32_scalbn(a, 0, status);
3110 }
3111 
3112 float32 uint32_to_float32(uint32_t a, float_status *status)
3113 {
3114     return uint64_to_float32_scalbn(a, 0, status);
3115 }
3116 
3117 float32 uint16_to_float32(uint16_t a, float_status *status)
3118 {
3119     return uint64_to_float32_scalbn(a, 0, status);
3120 }
3121 
3122 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3123 {
3124     FloatParts64 pa = uint_to_float(a, scale, status);
3125     return float64_round_pack_canonical(&pa, status);
3126 }
3127 
3128 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3129 {
3130     return uint64_to_float64_scalbn(a, scale, status);
3131 }
3132 
3133 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3134 {
3135     return uint64_to_float64_scalbn(a, scale, status);
3136 }
3137 
3138 float64 uint64_to_float64(uint64_t a, float_status *status)
3139 {
3140     return uint64_to_float64_scalbn(a, 0, status);
3141 }
3142 
3143 float64 uint32_to_float64(uint32_t a, float_status *status)
3144 {
3145     return uint64_to_float64_scalbn(a, 0, status);
3146 }
3147 
3148 float64 uint16_to_float64(uint16_t a, float_status *status)
3149 {
3150     return uint64_to_float64_scalbn(a, 0, status);
3151 }
3152 
3153 /*
3154  * Returns the result of converting the unsigned integer `a' to the
3155  * bfloat16 format.
3156  */
3157 
3158 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3159 {
3160     FloatParts64 pa = uint_to_float(a, scale, status);
3161     return bfloat16_round_pack_canonical(&pa, status);
3162 }
3163 
3164 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3165 {
3166     return uint64_to_bfloat16_scalbn(a, scale, status);
3167 }
3168 
3169 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3170 {
3171     return uint64_to_bfloat16_scalbn(a, scale, status);
3172 }
3173 
3174 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3175 {
3176     return uint64_to_bfloat16_scalbn(a, 0, status);
3177 }
3178 
3179 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3180 {
3181     return uint64_to_bfloat16_scalbn(a, 0, status);
3182 }
3183 
3184 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3185 {
3186     return uint64_to_bfloat16_scalbn(a, 0, status);
3187 }
3188 
3189 /* Float Min/Max */
3190 /* min() and max() functions. These can't be implemented as
3191  * 'compare and pick one input' because that would mishandle
3192  * NaNs and +0 vs -0.
3193  *
3194  * minnum() and maxnum() functions. These are similar to the min()
3195  * and max() functions but if one of the arguments is a QNaN and
3196  * the other is numerical then the numerical argument is returned.
3197  * SNaNs will get quietened before being returned.
3198  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3199  * and maxNum() operations. min() and max() are the typical min/max
3200  * semantics provided by many CPUs which predate that specification.
3201  *
3202  * minnummag() and maxnummag() functions correspond to minNumMag()
3203  * and minNumMag() from the IEEE-754 2008.
3204  */
3205 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3206                                 bool ieee, bool ismag, float_status *s)
3207 {
3208     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3209         if (ieee) {
3210             /* Takes two floating-point values `a' and `b', one of
3211              * which is a NaN, and returns the appropriate NaN
3212              * result. If either `a' or `b' is a signaling NaN,
3213              * the invalid exception is raised.
3214              */
3215             if (is_snan(a.cls) || is_snan(b.cls)) {
3216                 return *parts_pick_nan(&a, &b, s);
3217             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3218                 return b;
3219             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3220                 return a;
3221             }
3222         }
3223         return *parts_pick_nan(&a, &b, s);
3224     } else {
3225         int a_exp, b_exp;
3226 
3227         switch (a.cls) {
3228         case float_class_normal:
3229             a_exp = a.exp;
3230             break;
3231         case float_class_inf:
3232             a_exp = INT_MAX;
3233             break;
3234         case float_class_zero:
3235             a_exp = INT_MIN;
3236             break;
3237         default:
3238             g_assert_not_reached();
3239             break;
3240         }
3241         switch (b.cls) {
3242         case float_class_normal:
3243             b_exp = b.exp;
3244             break;
3245         case float_class_inf:
3246             b_exp = INT_MAX;
3247             break;
3248         case float_class_zero:
3249             b_exp = INT_MIN;
3250             break;
3251         default:
3252             g_assert_not_reached();
3253             break;
3254         }
3255 
3256         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3257             bool a_less = a_exp < b_exp;
3258             if (a_exp == b_exp) {
3259                 a_less = a.frac < b.frac;
3260             }
3261             return a_less ^ ismin ? b : a;
3262         }
3263 
3264         if (a.sign == b.sign) {
3265             bool a_less = a_exp < b_exp;
3266             if (a_exp == b_exp) {
3267                 a_less = a.frac < b.frac;
3268             }
3269             return a.sign ^ a_less ^ ismin ? b : a;
3270         } else {
3271             return a.sign ^ ismin ? b : a;
3272         }
3273     }
3274 }
3275 
3276 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3277 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3278                                      float_status *s)                   \
3279 {                                                                       \
3280     FloatParts64 pa, pb, pr;                                            \
3281     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3282     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3283     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3284     return float ## sz ## _round_pack_canonical(&pr, s);                \
3285 }
3286 
3287 MINMAX(16, min, true, false, false)
3288 MINMAX(16, minnum, true, true, false)
3289 MINMAX(16, minnummag, true, true, true)
3290 MINMAX(16, max, false, false, false)
3291 MINMAX(16, maxnum, false, true, false)
3292 MINMAX(16, maxnummag, false, true, true)
3293 
3294 MINMAX(32, min, true, false, false)
3295 MINMAX(32, minnum, true, true, false)
3296 MINMAX(32, minnummag, true, true, true)
3297 MINMAX(32, max, false, false, false)
3298 MINMAX(32, maxnum, false, true, false)
3299 MINMAX(32, maxnummag, false, true, true)
3300 
3301 MINMAX(64, min, true, false, false)
3302 MINMAX(64, minnum, true, true, false)
3303 MINMAX(64, minnummag, true, true, true)
3304 MINMAX(64, max, false, false, false)
3305 MINMAX(64, maxnum, false, true, false)
3306 MINMAX(64, maxnummag, false, true, true)
3307 
3308 #undef MINMAX
3309 
3310 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3311 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3312 {                                                                       \
3313     FloatParts64 pa, pb, pr;                                            \
3314     bfloat16_unpack_canonical(&pa, a, s);                               \
3315     bfloat16_unpack_canonical(&pb, b, s);                               \
3316     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3317     return bfloat16_round_pack_canonical(&pr, s);                       \
3318 }
3319 
3320 BF16_MINMAX(min, true, false, false)
3321 BF16_MINMAX(minnum, true, true, false)
3322 BF16_MINMAX(minnummag, true, true, true)
3323 BF16_MINMAX(max, false, false, false)
3324 BF16_MINMAX(maxnum, false, true, false)
3325 BF16_MINMAX(maxnummag, false, true, true)
3326 
3327 #undef BF16_MINMAX
3328 
3329 /* Floating point compare */
3330 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3331                                     float_status *s)
3332 {
3333     if (is_nan(a.cls) || is_nan(b.cls)) {
3334         if (!is_quiet ||
3335             a.cls == float_class_snan ||
3336             b.cls == float_class_snan) {
3337             float_raise(float_flag_invalid, s);
3338         }
3339         return float_relation_unordered;
3340     }
3341 
3342     if (a.cls == float_class_zero) {
3343         if (b.cls == float_class_zero) {
3344             return float_relation_equal;
3345         }
3346         return b.sign ? float_relation_greater : float_relation_less;
3347     } else if (b.cls == float_class_zero) {
3348         return a.sign ? float_relation_less : float_relation_greater;
3349     }
3350 
3351     /* The only really important thing about infinity is its sign. If
3352      * both are infinities the sign marks the smallest of the two.
3353      */
3354     if (a.cls == float_class_inf) {
3355         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3356             return float_relation_equal;
3357         }
3358         return a.sign ? float_relation_less : float_relation_greater;
3359     } else if (b.cls == float_class_inf) {
3360         return b.sign ? float_relation_greater : float_relation_less;
3361     }
3362 
3363     if (a.sign != b.sign) {
3364         return a.sign ? float_relation_less : float_relation_greater;
3365     }
3366 
3367     if (a.exp == b.exp) {
3368         if (a.frac == b.frac) {
3369             return float_relation_equal;
3370         }
3371         if (a.sign) {
3372             return a.frac > b.frac ?
3373                 float_relation_less : float_relation_greater;
3374         } else {
3375             return a.frac > b.frac ?
3376                 float_relation_greater : float_relation_less;
3377         }
3378     } else {
3379         if (a.sign) {
3380             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3381         } else {
3382             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3383         }
3384     }
3385 }
3386 
3387 #define COMPARE(name, attr, sz)                                         \
3388 static int attr                                                         \
3389 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3390 {                                                                       \
3391     FloatParts64 pa, pb;                                                \
3392     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3393     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3394     return compare_floats(pa, pb, is_quiet, s);                         \
3395 }
3396 
3397 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3398 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3399 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3400 
3401 #undef COMPARE
3402 
3403 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3404 {
3405     return soft_f16_compare(a, b, false, s);
3406 }
3407 
3408 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3409 {
3410     return soft_f16_compare(a, b, true, s);
3411 }
3412 
3413 static FloatRelation QEMU_FLATTEN
3414 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3415 {
3416     union_float32 ua, ub;
3417 
3418     ua.s = xa;
3419     ub.s = xb;
3420 
3421     if (QEMU_NO_HARDFLOAT) {
3422         goto soft;
3423     }
3424 
3425     float32_input_flush2(&ua.s, &ub.s, s);
3426     if (isgreaterequal(ua.h, ub.h)) {
3427         if (isgreater(ua.h, ub.h)) {
3428             return float_relation_greater;
3429         }
3430         return float_relation_equal;
3431     }
3432     if (likely(isless(ua.h, ub.h))) {
3433         return float_relation_less;
3434     }
3435     /* The only condition remaining is unordered.
3436      * Fall through to set flags.
3437      */
3438  soft:
3439     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3440 }
3441 
3442 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3443 {
3444     return f32_compare(a, b, false, s);
3445 }
3446 
3447 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3448 {
3449     return f32_compare(a, b, true, s);
3450 }
3451 
3452 static FloatRelation QEMU_FLATTEN
3453 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3454 {
3455     union_float64 ua, ub;
3456 
3457     ua.s = xa;
3458     ub.s = xb;
3459 
3460     if (QEMU_NO_HARDFLOAT) {
3461         goto soft;
3462     }
3463 
3464     float64_input_flush2(&ua.s, &ub.s, s);
3465     if (isgreaterequal(ua.h, ub.h)) {
3466         if (isgreater(ua.h, ub.h)) {
3467             return float_relation_greater;
3468         }
3469         return float_relation_equal;
3470     }
3471     if (likely(isless(ua.h, ub.h))) {
3472         return float_relation_less;
3473     }
3474     /* The only condition remaining is unordered.
3475      * Fall through to set flags.
3476      */
3477  soft:
3478     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3479 }
3480 
3481 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3482 {
3483     return f64_compare(a, b, false, s);
3484 }
3485 
3486 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3487 {
3488     return f64_compare(a, b, true, s);
3489 }
3490 
3491 static FloatRelation QEMU_FLATTEN
3492 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3493 {
3494     FloatParts64 pa, pb;
3495 
3496     bfloat16_unpack_canonical(&pa, a, s);
3497     bfloat16_unpack_canonical(&pb, b, s);
3498     return compare_floats(pa, pb, is_quiet, s);
3499 }
3500 
3501 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3502 {
3503     return soft_bf16_compare(a, b, false, s);
3504 }
3505 
3506 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3507 {
3508     return soft_bf16_compare(a, b, true, s);
3509 }
3510 
3511 /* Multiply A by 2 raised to the power N.  */
3512 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3513 {
3514     if (unlikely(is_nan(a.cls))) {
3515         parts_return_nan(&a, s);
3516     }
3517     if (a.cls == float_class_normal) {
3518         /* The largest float type (even though not supported by FloatParts64)
3519          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3520          * still allows rounding to infinity, without allowing overflow
3521          * within the int32_t that backs FloatParts64.exp.
3522          */
3523         n = MIN(MAX(n, -0x10000), 0x10000);
3524         a.exp += n;
3525     }
3526     return a;
3527 }
3528 
3529 float16 float16_scalbn(float16 a, int n, float_status *status)
3530 {
3531     FloatParts64 pa, pr;
3532 
3533     float16_unpack_canonical(&pa, a, status);
3534     pr = scalbn_decomposed(pa, n, status);
3535     return float16_round_pack_canonical(&pr, status);
3536 }
3537 
3538 float32 float32_scalbn(float32 a, int n, float_status *status)
3539 {
3540     FloatParts64 pa, pr;
3541 
3542     float32_unpack_canonical(&pa, a, status);
3543     pr = scalbn_decomposed(pa, n, status);
3544     return float32_round_pack_canonical(&pr, status);
3545 }
3546 
3547 float64 float64_scalbn(float64 a, int n, float_status *status)
3548 {
3549     FloatParts64 pa, pr;
3550 
3551     float64_unpack_canonical(&pa, a, status);
3552     pr = scalbn_decomposed(pa, n, status);
3553     return float64_round_pack_canonical(&pr, status);
3554 }
3555 
3556 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3557 {
3558     FloatParts64 pa, pr;
3559 
3560     bfloat16_unpack_canonical(&pa, a, status);
3561     pr = scalbn_decomposed(pa, n, status);
3562     return bfloat16_round_pack_canonical(&pr, status);
3563 }
3564 
3565 /*
3566  * Square Root
3567  *
3568  * The old softfloat code did an approximation step before zeroing in
3569  * on the final result. However for simpleness we just compute the
3570  * square root by iterating down from the implicit bit to enough extra
3571  * bits to ensure we get a correctly rounded result.
3572  *
3573  * This does mean however the calculation is slower than before,
3574  * especially for 64 bit floats.
3575  */
3576 
3577 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3578 {
3579     uint64_t a_frac, r_frac, s_frac;
3580     int bit, last_bit;
3581 
3582     if (is_nan(a.cls)) {
3583         parts_return_nan(&a, s);
3584         return a;
3585     }
3586     if (a.cls == float_class_zero) {
3587         return a;  /* sqrt(+-0) = +-0 */
3588     }
3589     if (a.sign) {
3590         float_raise(float_flag_invalid, s);
3591         parts_default_nan(&a, s);
3592         return a;
3593     }
3594     if (a.cls == float_class_inf) {
3595         return a;  /* sqrt(+inf) = +inf */
3596     }
3597 
3598     assert(a.cls == float_class_normal);
3599 
3600     /* We need two overflow bits at the top. Adding room for that is a
3601      * right shift. If the exponent is odd, we can discard the low bit
3602      * by multiplying the fraction by 2; that's a left shift. Combine
3603      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3604      */
3605     a_frac = a.frac >> (2 - (a.exp & 1));
3606     a.exp >>= 1;
3607 
3608     /* Bit-by-bit computation of sqrt.  */
3609     r_frac = 0;
3610     s_frac = 0;
3611 
3612     /* Iterate from implicit bit down to the 3 extra bits to compute a
3613      * properly rounded result. Remember we've inserted two more bits
3614      * at the top, so these positions are two less.
3615      */
3616     bit = DECOMPOSED_BINARY_POINT - 2;
3617     last_bit = MAX(p->frac_shift - 4, 0);
3618     do {
3619         uint64_t q = 1ULL << bit;
3620         uint64_t t_frac = s_frac + q;
3621         if (t_frac <= a_frac) {
3622             s_frac = t_frac + q;
3623             a_frac -= t_frac;
3624             r_frac += q;
3625         }
3626         a_frac <<= 1;
3627     } while (--bit >= last_bit);
3628 
3629     /* Undo the right shift done above. If there is any remaining
3630      * fraction, the result is inexact. Set the sticky bit.
3631      */
3632     a.frac = (r_frac << 2) + (a_frac != 0);
3633 
3634     return a;
3635 }
3636 
3637 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3638 {
3639     FloatParts64 pa, pr;
3640 
3641     float16_unpack_canonical(&pa, a, status);
3642     pr = sqrt_float(pa, status, &float16_params);
3643     return float16_round_pack_canonical(&pr, status);
3644 }
3645 
3646 static float32 QEMU_SOFTFLOAT_ATTR
3647 soft_f32_sqrt(float32 a, float_status *status)
3648 {
3649     FloatParts64 pa, pr;
3650 
3651     float32_unpack_canonical(&pa, a, status);
3652     pr = sqrt_float(pa, status, &float32_params);
3653     return float32_round_pack_canonical(&pr, status);
3654 }
3655 
3656 static float64 QEMU_SOFTFLOAT_ATTR
3657 soft_f64_sqrt(float64 a, float_status *status)
3658 {
3659     FloatParts64 pa, pr;
3660 
3661     float64_unpack_canonical(&pa, a, status);
3662     pr = sqrt_float(pa, status, &float64_params);
3663     return float64_round_pack_canonical(&pr, status);
3664 }
3665 
3666 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3667 {
3668     union_float32 ua, ur;
3669 
3670     ua.s = xa;
3671     if (unlikely(!can_use_fpu(s))) {
3672         goto soft;
3673     }
3674 
3675     float32_input_flush1(&ua.s, s);
3676     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3677         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3678                        fpclassify(ua.h) == FP_ZERO) ||
3679                      signbit(ua.h))) {
3680             goto soft;
3681         }
3682     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3683                         float32_is_neg(ua.s))) {
3684         goto soft;
3685     }
3686     ur.h = sqrtf(ua.h);
3687     return ur.s;
3688 
3689  soft:
3690     return soft_f32_sqrt(ua.s, s);
3691 }
3692 
3693 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3694 {
3695     union_float64 ua, ur;
3696 
3697     ua.s = xa;
3698     if (unlikely(!can_use_fpu(s))) {
3699         goto soft;
3700     }
3701 
3702     float64_input_flush1(&ua.s, s);
3703     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3704         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3705                        fpclassify(ua.h) == FP_ZERO) ||
3706                      signbit(ua.h))) {
3707             goto soft;
3708         }
3709     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3710                         float64_is_neg(ua.s))) {
3711         goto soft;
3712     }
3713     ur.h = sqrt(ua.h);
3714     return ur.s;
3715 
3716  soft:
3717     return soft_f64_sqrt(ua.s, s);
3718 }
3719 
3720 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3721 {
3722     FloatParts64 pa, pr;
3723 
3724     bfloat16_unpack_canonical(&pa, a, status);
3725     pr = sqrt_float(pa, status, &bfloat16_params);
3726     return bfloat16_round_pack_canonical(&pr, status);
3727 }
3728 
3729 /*----------------------------------------------------------------------------
3730 | The pattern for a default generated NaN.
3731 *----------------------------------------------------------------------------*/
3732 
3733 float16 float16_default_nan(float_status *status)
3734 {
3735     FloatParts64 p;
3736 
3737     parts_default_nan(&p, status);
3738     p.frac >>= float16_params.frac_shift;
3739     return float16_pack_raw(&p);
3740 }
3741 
3742 float32 float32_default_nan(float_status *status)
3743 {
3744     FloatParts64 p;
3745 
3746     parts_default_nan(&p, status);
3747     p.frac >>= float32_params.frac_shift;
3748     return float32_pack_raw(&p);
3749 }
3750 
3751 float64 float64_default_nan(float_status *status)
3752 {
3753     FloatParts64 p;
3754 
3755     parts_default_nan(&p, status);
3756     p.frac >>= float64_params.frac_shift;
3757     return float64_pack_raw(&p);
3758 }
3759 
3760 float128 float128_default_nan(float_status *status)
3761 {
3762     FloatParts128 p;
3763 
3764     parts_default_nan(&p, status);
3765     frac_shr(&p, float128_params.frac_shift);
3766     return float128_pack_raw(&p);
3767 }
3768 
3769 bfloat16 bfloat16_default_nan(float_status *status)
3770 {
3771     FloatParts64 p;
3772 
3773     parts_default_nan(&p, status);
3774     p.frac >>= bfloat16_params.frac_shift;
3775     return bfloat16_pack_raw(&p);
3776 }
3777 
3778 /*----------------------------------------------------------------------------
3779 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3780 *----------------------------------------------------------------------------*/
3781 
3782 float16 float16_silence_nan(float16 a, float_status *status)
3783 {
3784     FloatParts64 p;
3785 
3786     float16_unpack_raw(&p, a);
3787     p.frac <<= float16_params.frac_shift;
3788     parts_silence_nan(&p, status);
3789     p.frac >>= float16_params.frac_shift;
3790     return float16_pack_raw(&p);
3791 }
3792 
3793 float32 float32_silence_nan(float32 a, float_status *status)
3794 {
3795     FloatParts64 p;
3796 
3797     float32_unpack_raw(&p, a);
3798     p.frac <<= float32_params.frac_shift;
3799     parts_silence_nan(&p, status);
3800     p.frac >>= float32_params.frac_shift;
3801     return float32_pack_raw(&p);
3802 }
3803 
3804 float64 float64_silence_nan(float64 a, float_status *status)
3805 {
3806     FloatParts64 p;
3807 
3808     float64_unpack_raw(&p, a);
3809     p.frac <<= float64_params.frac_shift;
3810     parts_silence_nan(&p, status);
3811     p.frac >>= float64_params.frac_shift;
3812     return float64_pack_raw(&p);
3813 }
3814 
3815 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3816 {
3817     FloatParts64 p;
3818 
3819     bfloat16_unpack_raw(&p, a);
3820     p.frac <<= bfloat16_params.frac_shift;
3821     parts_silence_nan(&p, status);
3822     p.frac >>= bfloat16_params.frac_shift;
3823     return bfloat16_pack_raw(&p);
3824 }
3825 
3826 float128 float128_silence_nan(float128 a, float_status *status)
3827 {
3828     FloatParts128 p;
3829 
3830     float128_unpack_raw(&p, a);
3831     frac_shl(&p, float128_params.frac_shift);
3832     parts_silence_nan(&p, status);
3833     frac_shr(&p, float128_params.frac_shift);
3834     return float128_pack_raw(&p);
3835 }
3836 
3837 /*----------------------------------------------------------------------------
3838 | If `a' is denormal and we are in flush-to-zero mode then set the
3839 | input-denormal exception and return zero. Otherwise just return the value.
3840 *----------------------------------------------------------------------------*/
3841 
3842 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3843 {
3844     if (p.exp == 0 && p.frac != 0) {
3845         float_raise(float_flag_input_denormal, status);
3846         return true;
3847     }
3848 
3849     return false;
3850 }
3851 
3852 float16 float16_squash_input_denormal(float16 a, float_status *status)
3853 {
3854     if (status->flush_inputs_to_zero) {
3855         FloatParts64 p;
3856 
3857         float16_unpack_raw(&p, a);
3858         if (parts_squash_denormal(p, status)) {
3859             return float16_set_sign(float16_zero, p.sign);
3860         }
3861     }
3862     return a;
3863 }
3864 
3865 float32 float32_squash_input_denormal(float32 a, float_status *status)
3866 {
3867     if (status->flush_inputs_to_zero) {
3868         FloatParts64 p;
3869 
3870         float32_unpack_raw(&p, a);
3871         if (parts_squash_denormal(p, status)) {
3872             return float32_set_sign(float32_zero, p.sign);
3873         }
3874     }
3875     return a;
3876 }
3877 
3878 float64 float64_squash_input_denormal(float64 a, float_status *status)
3879 {
3880     if (status->flush_inputs_to_zero) {
3881         FloatParts64 p;
3882 
3883         float64_unpack_raw(&p, a);
3884         if (parts_squash_denormal(p, status)) {
3885             return float64_set_sign(float64_zero, p.sign);
3886         }
3887     }
3888     return a;
3889 }
3890 
3891 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3892 {
3893     if (status->flush_inputs_to_zero) {
3894         FloatParts64 p;
3895 
3896         bfloat16_unpack_raw(&p, a);
3897         if (parts_squash_denormal(p, status)) {
3898             return bfloat16_set_sign(bfloat16_zero, p.sign);
3899         }
3900     }
3901     return a;
3902 }
3903 
3904 /*----------------------------------------------------------------------------
3905 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3906 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3907 | input.  If `zSign' is 1, the input is negated before being converted to an
3908 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3909 | is simply rounded to an integer, with the inexact exception raised if the
3910 | input cannot be represented exactly as an integer.  However, if the fixed-
3911 | point input is too large, the invalid exception is raised and the largest
3912 | positive or negative integer is returned.
3913 *----------------------------------------------------------------------------*/
3914 
3915 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3916                                  float_status *status)
3917 {
3918     int8_t roundingMode;
3919     bool roundNearestEven;
3920     int8_t roundIncrement, roundBits;
3921     int32_t z;
3922 
3923     roundingMode = status->float_rounding_mode;
3924     roundNearestEven = ( roundingMode == float_round_nearest_even );
3925     switch (roundingMode) {
3926     case float_round_nearest_even:
3927     case float_round_ties_away:
3928         roundIncrement = 0x40;
3929         break;
3930     case float_round_to_zero:
3931         roundIncrement = 0;
3932         break;
3933     case float_round_up:
3934         roundIncrement = zSign ? 0 : 0x7f;
3935         break;
3936     case float_round_down:
3937         roundIncrement = zSign ? 0x7f : 0;
3938         break;
3939     case float_round_to_odd:
3940         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3941         break;
3942     default:
3943         abort();
3944     }
3945     roundBits = absZ & 0x7F;
3946     absZ = ( absZ + roundIncrement )>>7;
3947     if (!(roundBits ^ 0x40) && roundNearestEven) {
3948         absZ &= ~1;
3949     }
3950     z = absZ;
3951     if ( zSign ) z = - z;
3952     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3953         float_raise(float_flag_invalid, status);
3954         return zSign ? INT32_MIN : INT32_MAX;
3955     }
3956     if (roundBits) {
3957         float_raise(float_flag_inexact, status);
3958     }
3959     return z;
3960 
3961 }
3962 
3963 /*----------------------------------------------------------------------------
3964 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3965 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3966 | and returns the properly rounded 64-bit integer corresponding to the input.
3967 | If `zSign' is 1, the input is negated before being converted to an integer.
3968 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3969 | the inexact exception raised if the input cannot be represented exactly as
3970 | an integer.  However, if the fixed-point input is too large, the invalid
3971 | exception is raised and the largest positive or negative integer is
3972 | returned.
3973 *----------------------------------------------------------------------------*/
3974 
3975 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3976                                float_status *status)
3977 {
3978     int8_t roundingMode;
3979     bool roundNearestEven, increment;
3980     int64_t z;
3981 
3982     roundingMode = status->float_rounding_mode;
3983     roundNearestEven = ( roundingMode == float_round_nearest_even );
3984     switch (roundingMode) {
3985     case float_round_nearest_even:
3986     case float_round_ties_away:
3987         increment = ((int64_t) absZ1 < 0);
3988         break;
3989     case float_round_to_zero:
3990         increment = 0;
3991         break;
3992     case float_round_up:
3993         increment = !zSign && absZ1;
3994         break;
3995     case float_round_down:
3996         increment = zSign && absZ1;
3997         break;
3998     case float_round_to_odd:
3999         increment = !(absZ0 & 1) && absZ1;
4000         break;
4001     default:
4002         abort();
4003     }
4004     if ( increment ) {
4005         ++absZ0;
4006         if ( absZ0 == 0 ) goto overflow;
4007         if (!(absZ1 << 1) && roundNearestEven) {
4008             absZ0 &= ~1;
4009         }
4010     }
4011     z = absZ0;
4012     if ( zSign ) z = - z;
4013     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4014  overflow:
4015         float_raise(float_flag_invalid, status);
4016         return zSign ? INT64_MIN : INT64_MAX;
4017     }
4018     if (absZ1) {
4019         float_raise(float_flag_inexact, status);
4020     }
4021     return z;
4022 
4023 }
4024 
4025 /*----------------------------------------------------------------------------
4026 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4027 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4028 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4029 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4030 | with the inexact exception raised if the input cannot be represented exactly
4031 | as an integer.  However, if the fixed-point input is too large, the invalid
4032 | exception is raised and the largest unsigned integer is returned.
4033 *----------------------------------------------------------------------------*/
4034 
4035 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4036                                 uint64_t absZ1, float_status *status)
4037 {
4038     int8_t roundingMode;
4039     bool roundNearestEven, increment;
4040 
4041     roundingMode = status->float_rounding_mode;
4042     roundNearestEven = (roundingMode == float_round_nearest_even);
4043     switch (roundingMode) {
4044     case float_round_nearest_even:
4045     case float_round_ties_away:
4046         increment = ((int64_t)absZ1 < 0);
4047         break;
4048     case float_round_to_zero:
4049         increment = 0;
4050         break;
4051     case float_round_up:
4052         increment = !zSign && absZ1;
4053         break;
4054     case float_round_down:
4055         increment = zSign && absZ1;
4056         break;
4057     case float_round_to_odd:
4058         increment = !(absZ0 & 1) && absZ1;
4059         break;
4060     default:
4061         abort();
4062     }
4063     if (increment) {
4064         ++absZ0;
4065         if (absZ0 == 0) {
4066             float_raise(float_flag_invalid, status);
4067             return UINT64_MAX;
4068         }
4069         if (!(absZ1 << 1) && roundNearestEven) {
4070             absZ0 &= ~1;
4071         }
4072     }
4073 
4074     if (zSign && absZ0) {
4075         float_raise(float_flag_invalid, status);
4076         return 0;
4077     }
4078 
4079     if (absZ1) {
4080         float_raise(float_flag_inexact, status);
4081     }
4082     return absZ0;
4083 }
4084 
4085 /*----------------------------------------------------------------------------
4086 | Normalizes the subnormal single-precision floating-point value represented
4087 | by the denormalized significand `aSig'.  The normalized exponent and
4088 | significand are stored at the locations pointed to by `zExpPtr' and
4089 | `zSigPtr', respectively.
4090 *----------------------------------------------------------------------------*/
4091 
4092 static void
4093  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4094 {
4095     int8_t shiftCount;
4096 
4097     shiftCount = clz32(aSig) - 8;
4098     *zSigPtr = aSig<<shiftCount;
4099     *zExpPtr = 1 - shiftCount;
4100 
4101 }
4102 
4103 /*----------------------------------------------------------------------------
4104 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105 | and significand `zSig', and returns the proper single-precision floating-
4106 | point value corresponding to the abstract input.  Ordinarily, the abstract
4107 | value is simply rounded and packed into the single-precision format, with
4108 | the inexact exception raised if the abstract input cannot be represented
4109 | exactly.  However, if the abstract value is too large, the overflow and
4110 | inexact exceptions are raised and an infinity or maximal finite value is
4111 | returned.  If the abstract value is too small, the input value is rounded to
4112 | a subnormal number, and the underflow and inexact exceptions are raised if
4113 | the abstract input cannot be represented exactly as a subnormal single-
4114 | precision floating-point number.
4115 |     The input significand `zSig' has its binary point between bits 30
4116 | and 29, which is 7 bits to the left of the usual location.  This shifted
4117 | significand must be normalized or smaller.  If `zSig' is not normalized,
4118 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4119 | and it must not require rounding.  In the usual case that `zSig' is
4120 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4122 | Binary Floating-Point Arithmetic.
4123 *----------------------------------------------------------------------------*/
4124 
4125 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4126                                    float_status *status)
4127 {
4128     int8_t roundingMode;
4129     bool roundNearestEven;
4130     int8_t roundIncrement, roundBits;
4131     bool isTiny;
4132 
4133     roundingMode = status->float_rounding_mode;
4134     roundNearestEven = ( roundingMode == float_round_nearest_even );
4135     switch (roundingMode) {
4136     case float_round_nearest_even:
4137     case float_round_ties_away:
4138         roundIncrement = 0x40;
4139         break;
4140     case float_round_to_zero:
4141         roundIncrement = 0;
4142         break;
4143     case float_round_up:
4144         roundIncrement = zSign ? 0 : 0x7f;
4145         break;
4146     case float_round_down:
4147         roundIncrement = zSign ? 0x7f : 0;
4148         break;
4149     case float_round_to_odd:
4150         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4151         break;
4152     default:
4153         abort();
4154         break;
4155     }
4156     roundBits = zSig & 0x7F;
4157     if ( 0xFD <= (uint16_t) zExp ) {
4158         if (    ( 0xFD < zExp )
4159              || (    ( zExp == 0xFD )
4160                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4161            ) {
4162             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4163                                    roundIncrement != 0;
4164             float_raise(float_flag_overflow | float_flag_inexact, status);
4165             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4166         }
4167         if ( zExp < 0 ) {
4168             if (status->flush_to_zero) {
4169                 float_raise(float_flag_output_denormal, status);
4170                 return packFloat32(zSign, 0, 0);
4171             }
4172             isTiny = status->tininess_before_rounding
4173                   || (zExp < -1)
4174                   || (zSig + roundIncrement < 0x80000000);
4175             shift32RightJamming( zSig, - zExp, &zSig );
4176             zExp = 0;
4177             roundBits = zSig & 0x7F;
4178             if (isTiny && roundBits) {
4179                 float_raise(float_flag_underflow, status);
4180             }
4181             if (roundingMode == float_round_to_odd) {
4182                 /*
4183                  * For round-to-odd case, the roundIncrement depends on
4184                  * zSig which just changed.
4185                  */
4186                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4187             }
4188         }
4189     }
4190     if (roundBits) {
4191         float_raise(float_flag_inexact, status);
4192     }
4193     zSig = ( zSig + roundIncrement )>>7;
4194     if (!(roundBits ^ 0x40) && roundNearestEven) {
4195         zSig &= ~1;
4196     }
4197     if ( zSig == 0 ) zExp = 0;
4198     return packFloat32( zSign, zExp, zSig );
4199 
4200 }
4201 
4202 /*----------------------------------------------------------------------------
4203 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4204 | and significand `zSig', and returns the proper single-precision floating-
4205 | point value corresponding to the abstract input.  This routine is just like
4206 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4207 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4208 | floating-point exponent.
4209 *----------------------------------------------------------------------------*/
4210 
4211 static float32
4212  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4213                               float_status *status)
4214 {
4215     int8_t shiftCount;
4216 
4217     shiftCount = clz32(zSig) - 1;
4218     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4219                                status);
4220 
4221 }
4222 
4223 /*----------------------------------------------------------------------------
4224 | Normalizes the subnormal double-precision floating-point value represented
4225 | by the denormalized significand `aSig'.  The normalized exponent and
4226 | significand are stored at the locations pointed to by `zExpPtr' and
4227 | `zSigPtr', respectively.
4228 *----------------------------------------------------------------------------*/
4229 
4230 static void
4231  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4232 {
4233     int8_t shiftCount;
4234 
4235     shiftCount = clz64(aSig) - 11;
4236     *zSigPtr = aSig<<shiftCount;
4237     *zExpPtr = 1 - shiftCount;
4238 
4239 }
4240 
4241 /*----------------------------------------------------------------------------
4242 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4243 | double-precision floating-point value, returning the result.  After being
4244 | shifted into the proper positions, the three fields are simply added
4245 | together to form the result.  This means that any integer portion of `zSig'
4246 | will be added into the exponent.  Since a properly normalized significand
4247 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4248 | than the desired result exponent whenever `zSig' is a complete, normalized
4249 | significand.
4250 *----------------------------------------------------------------------------*/
4251 
4252 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4253 {
4254 
4255     return make_float64(
4256         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4257 
4258 }
4259 
4260 /*----------------------------------------------------------------------------
4261 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4262 | and significand `zSig', and returns the proper double-precision floating-
4263 | point value corresponding to the abstract input.  Ordinarily, the abstract
4264 | value is simply rounded and packed into the double-precision format, with
4265 | the inexact exception raised if the abstract input cannot be represented
4266 | exactly.  However, if the abstract value is too large, the overflow and
4267 | inexact exceptions are raised and an infinity or maximal finite value is
4268 | returned.  If the abstract value is too small, the input value is rounded to
4269 | a subnormal number, and the underflow and inexact exceptions are raised if
4270 | the abstract input cannot be represented exactly as a subnormal double-
4271 | precision floating-point number.
4272 |     The input significand `zSig' has its binary point between bits 62
4273 | and 61, which is 10 bits to the left of the usual location.  This shifted
4274 | significand must be normalized or smaller.  If `zSig' is not normalized,
4275 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4276 | and it must not require rounding.  In the usual case that `zSig' is
4277 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4278 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4279 | Binary Floating-Point Arithmetic.
4280 *----------------------------------------------------------------------------*/
4281 
4282 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4283                                    float_status *status)
4284 {
4285     int8_t roundingMode;
4286     bool roundNearestEven;
4287     int roundIncrement, roundBits;
4288     bool isTiny;
4289 
4290     roundingMode = status->float_rounding_mode;
4291     roundNearestEven = ( roundingMode == float_round_nearest_even );
4292     switch (roundingMode) {
4293     case float_round_nearest_even:
4294     case float_round_ties_away:
4295         roundIncrement = 0x200;
4296         break;
4297     case float_round_to_zero:
4298         roundIncrement = 0;
4299         break;
4300     case float_round_up:
4301         roundIncrement = zSign ? 0 : 0x3ff;
4302         break;
4303     case float_round_down:
4304         roundIncrement = zSign ? 0x3ff : 0;
4305         break;
4306     case float_round_to_odd:
4307         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4308         break;
4309     default:
4310         abort();
4311     }
4312     roundBits = zSig & 0x3FF;
4313     if ( 0x7FD <= (uint16_t) zExp ) {
4314         if (    ( 0x7FD < zExp )
4315              || (    ( zExp == 0x7FD )
4316                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4317            ) {
4318             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4319                                    roundIncrement != 0;
4320             float_raise(float_flag_overflow | float_flag_inexact, status);
4321             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4322         }
4323         if ( zExp < 0 ) {
4324             if (status->flush_to_zero) {
4325                 float_raise(float_flag_output_denormal, status);
4326                 return packFloat64(zSign, 0, 0);
4327             }
4328             isTiny = status->tininess_before_rounding
4329                   || (zExp < -1)
4330                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4331             shift64RightJamming( zSig, - zExp, &zSig );
4332             zExp = 0;
4333             roundBits = zSig & 0x3FF;
4334             if (isTiny && roundBits) {
4335                 float_raise(float_flag_underflow, status);
4336             }
4337             if (roundingMode == float_round_to_odd) {
4338                 /*
4339                  * For round-to-odd case, the roundIncrement depends on
4340                  * zSig which just changed.
4341                  */
4342                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4343             }
4344         }
4345     }
4346     if (roundBits) {
4347         float_raise(float_flag_inexact, status);
4348     }
4349     zSig = ( zSig + roundIncrement )>>10;
4350     if (!(roundBits ^ 0x200) && roundNearestEven) {
4351         zSig &= ~1;
4352     }
4353     if ( zSig == 0 ) zExp = 0;
4354     return packFloat64( zSign, zExp, zSig );
4355 
4356 }
4357 
4358 /*----------------------------------------------------------------------------
4359 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4360 | and significand `zSig', and returns the proper double-precision floating-
4361 | point value corresponding to the abstract input.  This routine is just like
4362 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4363 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4364 | floating-point exponent.
4365 *----------------------------------------------------------------------------*/
4366 
4367 static float64
4368  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4369                               float_status *status)
4370 {
4371     int8_t shiftCount;
4372 
4373     shiftCount = clz64(zSig) - 1;
4374     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4375                                status);
4376 
4377 }
4378 
4379 /*----------------------------------------------------------------------------
4380 | Normalizes the subnormal extended double-precision floating-point value
4381 | represented by the denormalized significand `aSig'.  The normalized exponent
4382 | and significand are stored at the locations pointed to by `zExpPtr' and
4383 | `zSigPtr', respectively.
4384 *----------------------------------------------------------------------------*/
4385 
4386 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4387                                 uint64_t *zSigPtr)
4388 {
4389     int8_t shiftCount;
4390 
4391     shiftCount = clz64(aSig);
4392     *zSigPtr = aSig<<shiftCount;
4393     *zExpPtr = 1 - shiftCount;
4394 }
4395 
4396 /*----------------------------------------------------------------------------
4397 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4398 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4399 | and returns the proper extended double-precision floating-point value
4400 | corresponding to the abstract input.  Ordinarily, the abstract value is
4401 | rounded and packed into the extended double-precision format, with the
4402 | inexact exception raised if the abstract input cannot be represented
4403 | exactly.  However, if the abstract value is too large, the overflow and
4404 | inexact exceptions are raised and an infinity or maximal finite value is
4405 | returned.  If the abstract value is too small, the input value is rounded to
4406 | a subnormal number, and the underflow and inexact exceptions are raised if
4407 | the abstract input cannot be represented exactly as a subnormal extended
4408 | double-precision floating-point number.
4409 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4410 | number of bits as single or double precision, respectively.  Otherwise, the
4411 | result is rounded to the full precision of the extended double-precision
4412 | format.
4413 |     The input significand must be normalized or smaller.  If the input
4414 | significand is not normalized, `zExp' must be 0; in that case, the result
4415 | returned is a subnormal number, and it must not require rounding.  The
4416 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4417 | Floating-Point Arithmetic.
4418 *----------------------------------------------------------------------------*/
4419 
4420 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4421                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4422                               float_status *status)
4423 {
4424     int8_t roundingMode;
4425     bool roundNearestEven, increment, isTiny;
4426     int64_t roundIncrement, roundMask, roundBits;
4427 
4428     roundingMode = status->float_rounding_mode;
4429     roundNearestEven = ( roundingMode == float_round_nearest_even );
4430     if ( roundingPrecision == 80 ) goto precision80;
4431     if ( roundingPrecision == 64 ) {
4432         roundIncrement = UINT64_C(0x0000000000000400);
4433         roundMask = UINT64_C(0x00000000000007FF);
4434     }
4435     else if ( roundingPrecision == 32 ) {
4436         roundIncrement = UINT64_C(0x0000008000000000);
4437         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4438     }
4439     else {
4440         goto precision80;
4441     }
4442     zSig0 |= ( zSig1 != 0 );
4443     switch (roundingMode) {
4444     case float_round_nearest_even:
4445     case float_round_ties_away:
4446         break;
4447     case float_round_to_zero:
4448         roundIncrement = 0;
4449         break;
4450     case float_round_up:
4451         roundIncrement = zSign ? 0 : roundMask;
4452         break;
4453     case float_round_down:
4454         roundIncrement = zSign ? roundMask : 0;
4455         break;
4456     default:
4457         abort();
4458     }
4459     roundBits = zSig0 & roundMask;
4460     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4461         if (    ( 0x7FFE < zExp )
4462              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4463            ) {
4464             goto overflow;
4465         }
4466         if ( zExp <= 0 ) {
4467             if (status->flush_to_zero) {
4468                 float_raise(float_flag_output_denormal, status);
4469                 return packFloatx80(zSign, 0, 0);
4470             }
4471             isTiny = status->tininess_before_rounding
4472                   || (zExp < 0 )
4473                   || (zSig0 <= zSig0 + roundIncrement);
4474             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4475             zExp = 0;
4476             roundBits = zSig0 & roundMask;
4477             if (isTiny && roundBits) {
4478                 float_raise(float_flag_underflow, status);
4479             }
4480             if (roundBits) {
4481                 float_raise(float_flag_inexact, status);
4482             }
4483             zSig0 += roundIncrement;
4484             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4485             roundIncrement = roundMask + 1;
4486             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4487                 roundMask |= roundIncrement;
4488             }
4489             zSig0 &= ~ roundMask;
4490             return packFloatx80( zSign, zExp, zSig0 );
4491         }
4492     }
4493     if (roundBits) {
4494         float_raise(float_flag_inexact, status);
4495     }
4496     zSig0 += roundIncrement;
4497     if ( zSig0 < roundIncrement ) {
4498         ++zExp;
4499         zSig0 = UINT64_C(0x8000000000000000);
4500     }
4501     roundIncrement = roundMask + 1;
4502     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4503         roundMask |= roundIncrement;
4504     }
4505     zSig0 &= ~ roundMask;
4506     if ( zSig0 == 0 ) zExp = 0;
4507     return packFloatx80( zSign, zExp, zSig0 );
4508  precision80:
4509     switch (roundingMode) {
4510     case float_round_nearest_even:
4511     case float_round_ties_away:
4512         increment = ((int64_t)zSig1 < 0);
4513         break;
4514     case float_round_to_zero:
4515         increment = 0;
4516         break;
4517     case float_round_up:
4518         increment = !zSign && zSig1;
4519         break;
4520     case float_round_down:
4521         increment = zSign && zSig1;
4522         break;
4523     default:
4524         abort();
4525     }
4526     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4527         if (    ( 0x7FFE < zExp )
4528              || (    ( zExp == 0x7FFE )
4529                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4530                   && increment
4531                 )
4532            ) {
4533             roundMask = 0;
4534  overflow:
4535             float_raise(float_flag_overflow | float_flag_inexact, status);
4536             if (    ( roundingMode == float_round_to_zero )
4537                  || ( zSign && ( roundingMode == float_round_up ) )
4538                  || ( ! zSign && ( roundingMode == float_round_down ) )
4539                ) {
4540                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4541             }
4542             return packFloatx80(zSign,
4543                                 floatx80_infinity_high,
4544                                 floatx80_infinity_low);
4545         }
4546         if ( zExp <= 0 ) {
4547             isTiny = status->tininess_before_rounding
4548                   || (zExp < 0)
4549                   || !increment
4550                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4551             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4552             zExp = 0;
4553             if (isTiny && zSig1) {
4554                 float_raise(float_flag_underflow, status);
4555             }
4556             if (zSig1) {
4557                 float_raise(float_flag_inexact, status);
4558             }
4559             switch (roundingMode) {
4560             case float_round_nearest_even:
4561             case float_round_ties_away:
4562                 increment = ((int64_t)zSig1 < 0);
4563                 break;
4564             case float_round_to_zero:
4565                 increment = 0;
4566                 break;
4567             case float_round_up:
4568                 increment = !zSign && zSig1;
4569                 break;
4570             case float_round_down:
4571                 increment = zSign && zSig1;
4572                 break;
4573             default:
4574                 abort();
4575             }
4576             if ( increment ) {
4577                 ++zSig0;
4578                 if (!(zSig1 << 1) && roundNearestEven) {
4579                     zSig0 &= ~1;
4580                 }
4581                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4582             }
4583             return packFloatx80( zSign, zExp, zSig0 );
4584         }
4585     }
4586     if (zSig1) {
4587         float_raise(float_flag_inexact, status);
4588     }
4589     if ( increment ) {
4590         ++zSig0;
4591         if ( zSig0 == 0 ) {
4592             ++zExp;
4593             zSig0 = UINT64_C(0x8000000000000000);
4594         }
4595         else {
4596             if (!(zSig1 << 1) && roundNearestEven) {
4597                 zSig0 &= ~1;
4598             }
4599         }
4600     }
4601     else {
4602         if ( zSig0 == 0 ) zExp = 0;
4603     }
4604     return packFloatx80( zSign, zExp, zSig0 );
4605 
4606 }
4607 
4608 /*----------------------------------------------------------------------------
4609 | Takes an abstract floating-point value having sign `zSign', exponent
4610 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4611 | and returns the proper extended double-precision floating-point value
4612 | corresponding to the abstract input.  This routine is just like
4613 | `roundAndPackFloatx80' except that the input significand does not have to be
4614 | normalized.
4615 *----------------------------------------------------------------------------*/
4616 
4617 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4618                                        bool zSign, int32_t zExp,
4619                                        uint64_t zSig0, uint64_t zSig1,
4620                                        float_status *status)
4621 {
4622     int8_t shiftCount;
4623 
4624     if ( zSig0 == 0 ) {
4625         zSig0 = zSig1;
4626         zSig1 = 0;
4627         zExp -= 64;
4628     }
4629     shiftCount = clz64(zSig0);
4630     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4631     zExp -= shiftCount;
4632     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4633                                 zSig0, zSig1, status);
4634 
4635 }
4636 
4637 /*----------------------------------------------------------------------------
4638 | Returns the least-significant 64 fraction bits of the quadruple-precision
4639 | floating-point value `a'.
4640 *----------------------------------------------------------------------------*/
4641 
4642 static inline uint64_t extractFloat128Frac1( float128 a )
4643 {
4644 
4645     return a.low;
4646 
4647 }
4648 
4649 /*----------------------------------------------------------------------------
4650 | Returns the most-significant 48 fraction bits of the quadruple-precision
4651 | floating-point value `a'.
4652 *----------------------------------------------------------------------------*/
4653 
4654 static inline uint64_t extractFloat128Frac0( float128 a )
4655 {
4656 
4657     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4658 
4659 }
4660 
4661 /*----------------------------------------------------------------------------
4662 | Returns the exponent bits of the quadruple-precision floating-point value
4663 | `a'.
4664 *----------------------------------------------------------------------------*/
4665 
4666 static inline int32_t extractFloat128Exp( float128 a )
4667 {
4668 
4669     return ( a.high>>48 ) & 0x7FFF;
4670 
4671 }
4672 
4673 /*----------------------------------------------------------------------------
4674 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4675 *----------------------------------------------------------------------------*/
4676 
4677 static inline bool extractFloat128Sign(float128 a)
4678 {
4679     return a.high >> 63;
4680 }
4681 
4682 /*----------------------------------------------------------------------------
4683 | Normalizes the subnormal quadruple-precision floating-point value
4684 | represented by the denormalized significand formed by the concatenation of
4685 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4686 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4687 | significand are stored at the location pointed to by `zSig0Ptr', and the
4688 | least significant 64 bits of the normalized significand are stored at the
4689 | location pointed to by `zSig1Ptr'.
4690 *----------------------------------------------------------------------------*/
4691 
4692 static void
4693  normalizeFloat128Subnormal(
4694      uint64_t aSig0,
4695      uint64_t aSig1,
4696      int32_t *zExpPtr,
4697      uint64_t *zSig0Ptr,
4698      uint64_t *zSig1Ptr
4699  )
4700 {
4701     int8_t shiftCount;
4702 
4703     if ( aSig0 == 0 ) {
4704         shiftCount = clz64(aSig1) - 15;
4705         if ( shiftCount < 0 ) {
4706             *zSig0Ptr = aSig1>>( - shiftCount );
4707             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4708         }
4709         else {
4710             *zSig0Ptr = aSig1<<shiftCount;
4711             *zSig1Ptr = 0;
4712         }
4713         *zExpPtr = - shiftCount - 63;
4714     }
4715     else {
4716         shiftCount = clz64(aSig0) - 15;
4717         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4718         *zExpPtr = 1 - shiftCount;
4719     }
4720 
4721 }
4722 
4723 /*----------------------------------------------------------------------------
4724 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4725 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4726 | floating-point value, returning the result.  After being shifted into the
4727 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4728 | added together to form the most significant 32 bits of the result.  This
4729 | means that any integer portion of `zSig0' will be added into the exponent.
4730 | Since a properly normalized significand will have an integer portion equal
4731 | to 1, the `zExp' input should be 1 less than the desired result exponent
4732 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4733 | significand.
4734 *----------------------------------------------------------------------------*/
4735 
4736 static inline float128
4737 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4738 {
4739     float128 z;
4740 
4741     z.low = zSig1;
4742     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4743     return z;
4744 }
4745 
4746 /*----------------------------------------------------------------------------
4747 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4748 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4749 | and `zSig2', and returns the proper quadruple-precision floating-point value
4750 | corresponding to the abstract input.  Ordinarily, the abstract value is
4751 | simply rounded and packed into the quadruple-precision format, with the
4752 | inexact exception raised if the abstract input cannot be represented
4753 | exactly.  However, if the abstract value is too large, the overflow and
4754 | inexact exceptions are raised and an infinity or maximal finite value is
4755 | returned.  If the abstract value is too small, the input value is rounded to
4756 | a subnormal number, and the underflow and inexact exceptions are raised if
4757 | the abstract input cannot be represented exactly as a subnormal quadruple-
4758 | precision floating-point number.
4759 |     The input significand must be normalized or smaller.  If the input
4760 | significand is not normalized, `zExp' must be 0; in that case, the result
4761 | returned is a subnormal number, and it must not require rounding.  In the
4762 | usual case that the input significand is normalized, `zExp' must be 1 less
4763 | than the ``true'' floating-point exponent.  The handling of underflow and
4764 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4765 *----------------------------------------------------------------------------*/
4766 
4767 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4768                                      uint64_t zSig0, uint64_t zSig1,
4769                                      uint64_t zSig2, float_status *status)
4770 {
4771     int8_t roundingMode;
4772     bool roundNearestEven, increment, isTiny;
4773 
4774     roundingMode = status->float_rounding_mode;
4775     roundNearestEven = ( roundingMode == float_round_nearest_even );
4776     switch (roundingMode) {
4777     case float_round_nearest_even:
4778     case float_round_ties_away:
4779         increment = ((int64_t)zSig2 < 0);
4780         break;
4781     case float_round_to_zero:
4782         increment = 0;
4783         break;
4784     case float_round_up:
4785         increment = !zSign && zSig2;
4786         break;
4787     case float_round_down:
4788         increment = zSign && zSig2;
4789         break;
4790     case float_round_to_odd:
4791         increment = !(zSig1 & 0x1) && zSig2;
4792         break;
4793     default:
4794         abort();
4795     }
4796     if ( 0x7FFD <= (uint32_t) zExp ) {
4797         if (    ( 0x7FFD < zExp )
4798              || (    ( zExp == 0x7FFD )
4799                   && eq128(
4800                          UINT64_C(0x0001FFFFFFFFFFFF),
4801                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4802                          zSig0,
4803                          zSig1
4804                      )
4805                   && increment
4806                 )
4807            ) {
4808             float_raise(float_flag_overflow | float_flag_inexact, status);
4809             if (    ( roundingMode == float_round_to_zero )
4810                  || ( zSign && ( roundingMode == float_round_up ) )
4811                  || ( ! zSign && ( roundingMode == float_round_down ) )
4812                  || (roundingMode == float_round_to_odd)
4813                ) {
4814                 return
4815                     packFloat128(
4816                         zSign,
4817                         0x7FFE,
4818                         UINT64_C(0x0000FFFFFFFFFFFF),
4819                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4820                     );
4821             }
4822             return packFloat128( zSign, 0x7FFF, 0, 0 );
4823         }
4824         if ( zExp < 0 ) {
4825             if (status->flush_to_zero) {
4826                 float_raise(float_flag_output_denormal, status);
4827                 return packFloat128(zSign, 0, 0, 0);
4828             }
4829             isTiny = status->tininess_before_rounding
4830                   || (zExp < -1)
4831                   || !increment
4832                   || lt128(zSig0, zSig1,
4833                            UINT64_C(0x0001FFFFFFFFFFFF),
4834                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4835             shift128ExtraRightJamming(
4836                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4837             zExp = 0;
4838             if (isTiny && zSig2) {
4839                 float_raise(float_flag_underflow, status);
4840             }
4841             switch (roundingMode) {
4842             case float_round_nearest_even:
4843             case float_round_ties_away:
4844                 increment = ((int64_t)zSig2 < 0);
4845                 break;
4846             case float_round_to_zero:
4847                 increment = 0;
4848                 break;
4849             case float_round_up:
4850                 increment = !zSign && zSig2;
4851                 break;
4852             case float_round_down:
4853                 increment = zSign && zSig2;
4854                 break;
4855             case float_round_to_odd:
4856                 increment = !(zSig1 & 0x1) && zSig2;
4857                 break;
4858             default:
4859                 abort();
4860             }
4861         }
4862     }
4863     if (zSig2) {
4864         float_raise(float_flag_inexact, status);
4865     }
4866     if ( increment ) {
4867         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4868         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4869             zSig1 &= ~1;
4870         }
4871     }
4872     else {
4873         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4874     }
4875     return packFloat128( zSign, zExp, zSig0, zSig1 );
4876 
4877 }
4878 
4879 /*----------------------------------------------------------------------------
4880 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4881 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4882 | returns the proper quadruple-precision floating-point value corresponding
4883 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4884 | except that the input significand has fewer bits and does not have to be
4885 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4886 | point exponent.
4887 *----------------------------------------------------------------------------*/
4888 
4889 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4890                                               uint64_t zSig0, uint64_t zSig1,
4891                                               float_status *status)
4892 {
4893     int8_t shiftCount;
4894     uint64_t zSig2;
4895 
4896     if ( zSig0 == 0 ) {
4897         zSig0 = zSig1;
4898         zSig1 = 0;
4899         zExp -= 64;
4900     }
4901     shiftCount = clz64(zSig0) - 15;
4902     if ( 0 <= shiftCount ) {
4903         zSig2 = 0;
4904         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4905     }
4906     else {
4907         shift128ExtraRightJamming(
4908             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4909     }
4910     zExp -= shiftCount;
4911     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4912 
4913 }
4914 
4915 
4916 /*----------------------------------------------------------------------------
4917 | Returns the result of converting the 32-bit two's complement integer `a'
4918 | to the extended double-precision floating-point format.  The conversion
4919 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4920 | Arithmetic.
4921 *----------------------------------------------------------------------------*/
4922 
4923 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4924 {
4925     bool zSign;
4926     uint32_t absA;
4927     int8_t shiftCount;
4928     uint64_t zSig;
4929 
4930     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4931     zSign = ( a < 0 );
4932     absA = zSign ? - a : a;
4933     shiftCount = clz32(absA) + 32;
4934     zSig = absA;
4935     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4936 
4937 }
4938 
4939 /*----------------------------------------------------------------------------
4940 | Returns the result of converting the 32-bit two's complement integer `a' to
4941 | the quadruple-precision floating-point format.  The conversion is performed
4942 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4943 *----------------------------------------------------------------------------*/
4944 
4945 float128 int32_to_float128(int32_t a, float_status *status)
4946 {
4947     bool zSign;
4948     uint32_t absA;
4949     int8_t shiftCount;
4950     uint64_t zSig0;
4951 
4952     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4953     zSign = ( a < 0 );
4954     absA = zSign ? - a : a;
4955     shiftCount = clz32(absA) + 17;
4956     zSig0 = absA;
4957     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4958 
4959 }
4960 
4961 /*----------------------------------------------------------------------------
4962 | Returns the result of converting the 64-bit two's complement integer `a'
4963 | to the extended double-precision floating-point format.  The conversion
4964 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4965 | Arithmetic.
4966 *----------------------------------------------------------------------------*/
4967 
4968 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4969 {
4970     bool zSign;
4971     uint64_t absA;
4972     int8_t shiftCount;
4973 
4974     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4975     zSign = ( a < 0 );
4976     absA = zSign ? - a : a;
4977     shiftCount = clz64(absA);
4978     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4979 
4980 }
4981 
4982 /*----------------------------------------------------------------------------
4983 | Returns the result of converting the 64-bit two's complement integer `a' to
4984 | the quadruple-precision floating-point format.  The conversion is performed
4985 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4986 *----------------------------------------------------------------------------*/
4987 
4988 float128 int64_to_float128(int64_t a, float_status *status)
4989 {
4990     bool zSign;
4991     uint64_t absA;
4992     int8_t shiftCount;
4993     int32_t zExp;
4994     uint64_t zSig0, zSig1;
4995 
4996     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4997     zSign = ( a < 0 );
4998     absA = zSign ? - a : a;
4999     shiftCount = clz64(absA) + 49;
5000     zExp = 0x406E - shiftCount;
5001     if ( 64 <= shiftCount ) {
5002         zSig1 = 0;
5003         zSig0 = absA;
5004         shiftCount -= 64;
5005     }
5006     else {
5007         zSig1 = absA;
5008         zSig0 = 0;
5009     }
5010     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5011     return packFloat128( zSign, zExp, zSig0, zSig1 );
5012 
5013 }
5014 
5015 /*----------------------------------------------------------------------------
5016 | Returns the result of converting the 64-bit unsigned integer `a'
5017 | to the quadruple-precision floating-point format.  The conversion is performed
5018 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5019 *----------------------------------------------------------------------------*/
5020 
5021 float128 uint64_to_float128(uint64_t a, float_status *status)
5022 {
5023     if (a == 0) {
5024         return float128_zero;
5025     }
5026     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5027 }
5028 
5029 /*----------------------------------------------------------------------------
5030 | Returns the result of converting the single-precision floating-point value
5031 | `a' to the extended double-precision floating-point format.  The conversion
5032 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5033 | Arithmetic.
5034 *----------------------------------------------------------------------------*/
5035 
5036 floatx80 float32_to_floatx80(float32 a, float_status *status)
5037 {
5038     bool aSign;
5039     int aExp;
5040     uint32_t aSig;
5041 
5042     a = float32_squash_input_denormal(a, status);
5043     aSig = extractFloat32Frac( a );
5044     aExp = extractFloat32Exp( a );
5045     aSign = extractFloat32Sign( a );
5046     if ( aExp == 0xFF ) {
5047         if (aSig) {
5048             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5049                                                status);
5050             return floatx80_silence_nan(res, status);
5051         }
5052         return packFloatx80(aSign,
5053                             floatx80_infinity_high,
5054                             floatx80_infinity_low);
5055     }
5056     if ( aExp == 0 ) {
5057         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5058         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5059     }
5060     aSig |= 0x00800000;
5061     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5062 
5063 }
5064 
5065 /*----------------------------------------------------------------------------
5066 | Returns the result of converting the single-precision floating-point value
5067 | `a' to the double-precision floating-point format.  The conversion is
5068 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5069 | Arithmetic.
5070 *----------------------------------------------------------------------------*/
5071 
5072 float128 float32_to_float128(float32 a, float_status *status)
5073 {
5074     bool aSign;
5075     int aExp;
5076     uint32_t aSig;
5077 
5078     a = float32_squash_input_denormal(a, status);
5079     aSig = extractFloat32Frac( a );
5080     aExp = extractFloat32Exp( a );
5081     aSign = extractFloat32Sign( a );
5082     if ( aExp == 0xFF ) {
5083         if (aSig) {
5084             return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5085         }
5086         return packFloat128( aSign, 0x7FFF, 0, 0 );
5087     }
5088     if ( aExp == 0 ) {
5089         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5090         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5091         --aExp;
5092     }
5093     return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
5094 
5095 }
5096 
5097 /*----------------------------------------------------------------------------
5098 | Returns the remainder of the single-precision floating-point value `a'
5099 | with respect to the corresponding value `b'.  The operation is performed
5100 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5101 *----------------------------------------------------------------------------*/
5102 
5103 float32 float32_rem(float32 a, float32 b, float_status *status)
5104 {
5105     bool aSign, zSign;
5106     int aExp, bExp, expDiff;
5107     uint32_t aSig, bSig;
5108     uint32_t q;
5109     uint64_t aSig64, bSig64, q64;
5110     uint32_t alternateASig;
5111     int32_t sigMean;
5112     a = float32_squash_input_denormal(a, status);
5113     b = float32_squash_input_denormal(b, status);
5114 
5115     aSig = extractFloat32Frac( a );
5116     aExp = extractFloat32Exp( a );
5117     aSign = extractFloat32Sign( a );
5118     bSig = extractFloat32Frac( b );
5119     bExp = extractFloat32Exp( b );
5120     if ( aExp == 0xFF ) {
5121         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5122             return propagateFloat32NaN(a, b, status);
5123         }
5124         float_raise(float_flag_invalid, status);
5125         return float32_default_nan(status);
5126     }
5127     if ( bExp == 0xFF ) {
5128         if (bSig) {
5129             return propagateFloat32NaN(a, b, status);
5130         }
5131         return a;
5132     }
5133     if ( bExp == 0 ) {
5134         if ( bSig == 0 ) {
5135             float_raise(float_flag_invalid, status);
5136             return float32_default_nan(status);
5137         }
5138         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5139     }
5140     if ( aExp == 0 ) {
5141         if ( aSig == 0 ) return a;
5142         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5143     }
5144     expDiff = aExp - bExp;
5145     aSig |= 0x00800000;
5146     bSig |= 0x00800000;
5147     if ( expDiff < 32 ) {
5148         aSig <<= 8;
5149         bSig <<= 8;
5150         if ( expDiff < 0 ) {
5151             if ( expDiff < -1 ) return a;
5152             aSig >>= 1;
5153         }
5154         q = ( bSig <= aSig );
5155         if ( q ) aSig -= bSig;
5156         if ( 0 < expDiff ) {
5157             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5158             q >>= 32 - expDiff;
5159             bSig >>= 2;
5160             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5161         }
5162         else {
5163             aSig >>= 2;
5164             bSig >>= 2;
5165         }
5166     }
5167     else {
5168         if ( bSig <= aSig ) aSig -= bSig;
5169         aSig64 = ( (uint64_t) aSig )<<40;
5170         bSig64 = ( (uint64_t) bSig )<<40;
5171         expDiff -= 64;
5172         while ( 0 < expDiff ) {
5173             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5174             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5175             aSig64 = - ( ( bSig * q64 )<<38 );
5176             expDiff -= 62;
5177         }
5178         expDiff += 64;
5179         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5180         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5181         q = q64>>( 64 - expDiff );
5182         bSig <<= 6;
5183         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5184     }
5185     do {
5186         alternateASig = aSig;
5187         ++q;
5188         aSig -= bSig;
5189     } while ( 0 <= (int32_t) aSig );
5190     sigMean = aSig + alternateASig;
5191     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5192         aSig = alternateASig;
5193     }
5194     zSign = ( (int32_t) aSig < 0 );
5195     if ( zSign ) aSig = - aSig;
5196     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5197 }
5198 
5199 
5200 
5201 /*----------------------------------------------------------------------------
5202 | Returns the binary exponential of the single-precision floating-point value
5203 | `a'. The operation is performed according to the IEC/IEEE Standard for
5204 | Binary Floating-Point Arithmetic.
5205 |
5206 | Uses the following identities:
5207 |
5208 | 1. -------------------------------------------------------------------------
5209 |      x    x*ln(2)
5210 |     2  = e
5211 |
5212 | 2. -------------------------------------------------------------------------
5213 |                      2     3     4     5           n
5214 |      x        x     x     x     x     x           x
5215 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5216 |               1!    2!    3!    4!    5!          n!
5217 *----------------------------------------------------------------------------*/
5218 
5219 static const float64 float32_exp2_coefficients[15] =
5220 {
5221     const_float64( 0x3ff0000000000000ll ), /*  1 */
5222     const_float64( 0x3fe0000000000000ll ), /*  2 */
5223     const_float64( 0x3fc5555555555555ll ), /*  3 */
5224     const_float64( 0x3fa5555555555555ll ), /*  4 */
5225     const_float64( 0x3f81111111111111ll ), /*  5 */
5226     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5227     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5228     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5229     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5230     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5231     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5232     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5233     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5234     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5235     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5236 };
5237 
5238 float32 float32_exp2(float32 a, float_status *status)
5239 {
5240     bool aSign;
5241     int aExp;
5242     uint32_t aSig;
5243     float64 r, x, xn;
5244     int i;
5245     a = float32_squash_input_denormal(a, status);
5246 
5247     aSig = extractFloat32Frac( a );
5248     aExp = extractFloat32Exp( a );
5249     aSign = extractFloat32Sign( a );
5250 
5251     if ( aExp == 0xFF) {
5252         if (aSig) {
5253             return propagateFloat32NaN(a, float32_zero, status);
5254         }
5255         return (aSign) ? float32_zero : a;
5256     }
5257     if (aExp == 0) {
5258         if (aSig == 0) return float32_one;
5259     }
5260 
5261     float_raise(float_flag_inexact, status);
5262 
5263     /* ******************************* */
5264     /* using float64 for approximation */
5265     /* ******************************* */
5266     x = float32_to_float64(a, status);
5267     x = float64_mul(x, float64_ln2, status);
5268 
5269     xn = x;
5270     r = float64_one;
5271     for (i = 0 ; i < 15 ; i++) {
5272         float64 f;
5273 
5274         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5275         r = float64_add(r, f, status);
5276 
5277         xn = float64_mul(xn, x, status);
5278     }
5279 
5280     return float64_to_float32(r, status);
5281 }
5282 
5283 /*----------------------------------------------------------------------------
5284 | Returns the binary log of the single-precision floating-point value `a'.
5285 | The operation is performed according to the IEC/IEEE Standard for Binary
5286 | Floating-Point Arithmetic.
5287 *----------------------------------------------------------------------------*/
5288 float32 float32_log2(float32 a, float_status *status)
5289 {
5290     bool aSign, zSign;
5291     int aExp;
5292     uint32_t aSig, zSig, i;
5293 
5294     a = float32_squash_input_denormal(a, status);
5295     aSig = extractFloat32Frac( a );
5296     aExp = extractFloat32Exp( a );
5297     aSign = extractFloat32Sign( a );
5298 
5299     if ( aExp == 0 ) {
5300         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5301         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5302     }
5303     if ( aSign ) {
5304         float_raise(float_flag_invalid, status);
5305         return float32_default_nan(status);
5306     }
5307     if ( aExp == 0xFF ) {
5308         if (aSig) {
5309             return propagateFloat32NaN(a, float32_zero, status);
5310         }
5311         return a;
5312     }
5313 
5314     aExp -= 0x7F;
5315     aSig |= 0x00800000;
5316     zSign = aExp < 0;
5317     zSig = aExp << 23;
5318 
5319     for (i = 1 << 22; i > 0; i >>= 1) {
5320         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5321         if ( aSig & 0x01000000 ) {
5322             aSig >>= 1;
5323             zSig |= i;
5324         }
5325     }
5326 
5327     if ( zSign )
5328         zSig = -zSig;
5329 
5330     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5331 }
5332 
5333 /*----------------------------------------------------------------------------
5334 | Returns the result of converting the double-precision floating-point value
5335 | `a' to the extended double-precision floating-point format.  The conversion
5336 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5337 | Arithmetic.
5338 *----------------------------------------------------------------------------*/
5339 
5340 floatx80 float64_to_floatx80(float64 a, float_status *status)
5341 {
5342     bool aSign;
5343     int aExp;
5344     uint64_t aSig;
5345 
5346     a = float64_squash_input_denormal(a, status);
5347     aSig = extractFloat64Frac( a );
5348     aExp = extractFloat64Exp( a );
5349     aSign = extractFloat64Sign( a );
5350     if ( aExp == 0x7FF ) {
5351         if (aSig) {
5352             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5353                                                status);
5354             return floatx80_silence_nan(res, status);
5355         }
5356         return packFloatx80(aSign,
5357                             floatx80_infinity_high,
5358                             floatx80_infinity_low);
5359     }
5360     if ( aExp == 0 ) {
5361         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5362         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5363     }
5364     return
5365         packFloatx80(
5366             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5367 
5368 }
5369 
5370 /*----------------------------------------------------------------------------
5371 | Returns the result of converting the double-precision floating-point value
5372 | `a' to the quadruple-precision floating-point format.  The conversion is
5373 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5374 | Arithmetic.
5375 *----------------------------------------------------------------------------*/
5376 
5377 float128 float64_to_float128(float64 a, float_status *status)
5378 {
5379     bool aSign;
5380     int aExp;
5381     uint64_t aSig, zSig0, zSig1;
5382 
5383     a = float64_squash_input_denormal(a, status);
5384     aSig = extractFloat64Frac( a );
5385     aExp = extractFloat64Exp( a );
5386     aSign = extractFloat64Sign( a );
5387     if ( aExp == 0x7FF ) {
5388         if (aSig) {
5389             return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5390         }
5391         return packFloat128( aSign, 0x7FFF, 0, 0 );
5392     }
5393     if ( aExp == 0 ) {
5394         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5395         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5396         --aExp;
5397     }
5398     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5399     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5400 
5401 }
5402 
5403 
5404 /*----------------------------------------------------------------------------
5405 | Returns the remainder of the double-precision floating-point value `a'
5406 | with respect to the corresponding value `b'.  The operation is performed
5407 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5408 *----------------------------------------------------------------------------*/
5409 
5410 float64 float64_rem(float64 a, float64 b, float_status *status)
5411 {
5412     bool aSign, zSign;
5413     int aExp, bExp, expDiff;
5414     uint64_t aSig, bSig;
5415     uint64_t q, alternateASig;
5416     int64_t sigMean;
5417 
5418     a = float64_squash_input_denormal(a, status);
5419     b = float64_squash_input_denormal(b, status);
5420     aSig = extractFloat64Frac( a );
5421     aExp = extractFloat64Exp( a );
5422     aSign = extractFloat64Sign( a );
5423     bSig = extractFloat64Frac( b );
5424     bExp = extractFloat64Exp( b );
5425     if ( aExp == 0x7FF ) {
5426         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5427             return propagateFloat64NaN(a, b, status);
5428         }
5429         float_raise(float_flag_invalid, status);
5430         return float64_default_nan(status);
5431     }
5432     if ( bExp == 0x7FF ) {
5433         if (bSig) {
5434             return propagateFloat64NaN(a, b, status);
5435         }
5436         return a;
5437     }
5438     if ( bExp == 0 ) {
5439         if ( bSig == 0 ) {
5440             float_raise(float_flag_invalid, status);
5441             return float64_default_nan(status);
5442         }
5443         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5444     }
5445     if ( aExp == 0 ) {
5446         if ( aSig == 0 ) return a;
5447         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5448     }
5449     expDiff = aExp - bExp;
5450     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5451     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5452     if ( expDiff < 0 ) {
5453         if ( expDiff < -1 ) return a;
5454         aSig >>= 1;
5455     }
5456     q = ( bSig <= aSig );
5457     if ( q ) aSig -= bSig;
5458     expDiff -= 64;
5459     while ( 0 < expDiff ) {
5460         q = estimateDiv128To64( aSig, 0, bSig );
5461         q = ( 2 < q ) ? q - 2 : 0;
5462         aSig = - ( ( bSig>>2 ) * q );
5463         expDiff -= 62;
5464     }
5465     expDiff += 64;
5466     if ( 0 < expDiff ) {
5467         q = estimateDiv128To64( aSig, 0, bSig );
5468         q = ( 2 < q ) ? q - 2 : 0;
5469         q >>= 64 - expDiff;
5470         bSig >>= 2;
5471         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5472     }
5473     else {
5474         aSig >>= 2;
5475         bSig >>= 2;
5476     }
5477     do {
5478         alternateASig = aSig;
5479         ++q;
5480         aSig -= bSig;
5481     } while ( 0 <= (int64_t) aSig );
5482     sigMean = aSig + alternateASig;
5483     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5484         aSig = alternateASig;
5485     }
5486     zSign = ( (int64_t) aSig < 0 );
5487     if ( zSign ) aSig = - aSig;
5488     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5489 
5490 }
5491 
5492 /*----------------------------------------------------------------------------
5493 | Returns the binary log of the double-precision floating-point value `a'.
5494 | The operation is performed according to the IEC/IEEE Standard for Binary
5495 | Floating-Point Arithmetic.
5496 *----------------------------------------------------------------------------*/
5497 float64 float64_log2(float64 a, float_status *status)
5498 {
5499     bool aSign, zSign;
5500     int aExp;
5501     uint64_t aSig, aSig0, aSig1, zSig, i;
5502     a = float64_squash_input_denormal(a, status);
5503 
5504     aSig = extractFloat64Frac( a );
5505     aExp = extractFloat64Exp( a );
5506     aSign = extractFloat64Sign( a );
5507 
5508     if ( aExp == 0 ) {
5509         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5510         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5511     }
5512     if ( aSign ) {
5513         float_raise(float_flag_invalid, status);
5514         return float64_default_nan(status);
5515     }
5516     if ( aExp == 0x7FF ) {
5517         if (aSig) {
5518             return propagateFloat64NaN(a, float64_zero, status);
5519         }
5520         return a;
5521     }
5522 
5523     aExp -= 0x3FF;
5524     aSig |= UINT64_C(0x0010000000000000);
5525     zSign = aExp < 0;
5526     zSig = (uint64_t)aExp << 52;
5527     for (i = 1LL << 51; i > 0; i >>= 1) {
5528         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5529         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5530         if ( aSig & UINT64_C(0x0020000000000000) ) {
5531             aSig >>= 1;
5532             zSig |= i;
5533         }
5534     }
5535 
5536     if ( zSign )
5537         zSig = -zSig;
5538     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5539 }
5540 
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the 32-bit two's complement integer format.  The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic---which means in particular that the conversion
5546 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5547 | largest positive integer is returned.  Otherwise, if the conversion
5548 | overflows, the largest integer with the same sign as `a' is returned.
5549 *----------------------------------------------------------------------------*/
5550 
5551 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5552 {
5553     bool aSign;
5554     int32_t aExp, shiftCount;
5555     uint64_t aSig;
5556 
5557     if (floatx80_invalid_encoding(a)) {
5558         float_raise(float_flag_invalid, status);
5559         return 1 << 31;
5560     }
5561     aSig = extractFloatx80Frac( a );
5562     aExp = extractFloatx80Exp( a );
5563     aSign = extractFloatx80Sign( a );
5564     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5565     shiftCount = 0x4037 - aExp;
5566     if ( shiftCount <= 0 ) shiftCount = 1;
5567     shift64RightJamming( aSig, shiftCount, &aSig );
5568     return roundAndPackInt32(aSign, aSig, status);
5569 
5570 }
5571 
5572 /*----------------------------------------------------------------------------
5573 | Returns the result of converting the extended double-precision floating-
5574 | point value `a' to the 32-bit two's complement integer format.  The
5575 | conversion is performed according to the IEC/IEEE Standard for Binary
5576 | Floating-Point Arithmetic, except that the conversion is always rounded
5577 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5578 | Otherwise, if the conversion overflows, the largest integer with the same
5579 | sign as `a' is returned.
5580 *----------------------------------------------------------------------------*/
5581 
5582 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5583 {
5584     bool aSign;
5585     int32_t aExp, shiftCount;
5586     uint64_t aSig, savedASig;
5587     int32_t z;
5588 
5589     if (floatx80_invalid_encoding(a)) {
5590         float_raise(float_flag_invalid, status);
5591         return 1 << 31;
5592     }
5593     aSig = extractFloatx80Frac( a );
5594     aExp = extractFloatx80Exp( a );
5595     aSign = extractFloatx80Sign( a );
5596     if ( 0x401E < aExp ) {
5597         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5598         goto invalid;
5599     }
5600     else if ( aExp < 0x3FFF ) {
5601         if (aExp || aSig) {
5602             float_raise(float_flag_inexact, status);
5603         }
5604         return 0;
5605     }
5606     shiftCount = 0x403E - aExp;
5607     savedASig = aSig;
5608     aSig >>= shiftCount;
5609     z = aSig;
5610     if ( aSign ) z = - z;
5611     if ( ( z < 0 ) ^ aSign ) {
5612  invalid:
5613         float_raise(float_flag_invalid, status);
5614         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5615     }
5616     if ( ( aSig<<shiftCount ) != savedASig ) {
5617         float_raise(float_flag_inexact, status);
5618     }
5619     return z;
5620 
5621 }
5622 
5623 /*----------------------------------------------------------------------------
5624 | Returns the result of converting the extended double-precision floating-
5625 | point value `a' to the 64-bit two's complement integer format.  The
5626 | conversion is performed according to the IEC/IEEE Standard for Binary
5627 | Floating-Point Arithmetic---which means in particular that the conversion
5628 | is rounded according to the current rounding mode.  If `a' is a NaN,
5629 | the largest positive integer is returned.  Otherwise, if the conversion
5630 | overflows, the largest integer with the same sign as `a' is returned.
5631 *----------------------------------------------------------------------------*/
5632 
5633 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5634 {
5635     bool aSign;
5636     int32_t aExp, shiftCount;
5637     uint64_t aSig, aSigExtra;
5638 
5639     if (floatx80_invalid_encoding(a)) {
5640         float_raise(float_flag_invalid, status);
5641         return 1ULL << 63;
5642     }
5643     aSig = extractFloatx80Frac( a );
5644     aExp = extractFloatx80Exp( a );
5645     aSign = extractFloatx80Sign( a );
5646     shiftCount = 0x403E - aExp;
5647     if ( shiftCount <= 0 ) {
5648         if ( shiftCount ) {
5649             float_raise(float_flag_invalid, status);
5650             if (!aSign || floatx80_is_any_nan(a)) {
5651                 return INT64_MAX;
5652             }
5653             return INT64_MIN;
5654         }
5655         aSigExtra = 0;
5656     }
5657     else {
5658         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5659     }
5660     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5661 
5662 }
5663 
5664 /*----------------------------------------------------------------------------
5665 | Returns the result of converting the extended double-precision floating-
5666 | point value `a' to the 64-bit two's complement integer format.  The
5667 | conversion is performed according to the IEC/IEEE Standard for Binary
5668 | Floating-Point Arithmetic, except that the conversion is always rounded
5669 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5670 | Otherwise, if the conversion overflows, the largest integer with the same
5671 | sign as `a' is returned.
5672 *----------------------------------------------------------------------------*/
5673 
5674 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5675 {
5676     bool aSign;
5677     int32_t aExp, shiftCount;
5678     uint64_t aSig;
5679     int64_t z;
5680 
5681     if (floatx80_invalid_encoding(a)) {
5682         float_raise(float_flag_invalid, status);
5683         return 1ULL << 63;
5684     }
5685     aSig = extractFloatx80Frac( a );
5686     aExp = extractFloatx80Exp( a );
5687     aSign = extractFloatx80Sign( a );
5688     shiftCount = aExp - 0x403E;
5689     if ( 0 <= shiftCount ) {
5690         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5691         if ( ( a.high != 0xC03E ) || aSig ) {
5692             float_raise(float_flag_invalid, status);
5693             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5694                 return INT64_MAX;
5695             }
5696         }
5697         return INT64_MIN;
5698     }
5699     else if ( aExp < 0x3FFF ) {
5700         if (aExp | aSig) {
5701             float_raise(float_flag_inexact, status);
5702         }
5703         return 0;
5704     }
5705     z = aSig>>( - shiftCount );
5706     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5707         float_raise(float_flag_inexact, status);
5708     }
5709     if ( aSign ) z = - z;
5710     return z;
5711 
5712 }
5713 
5714 /*----------------------------------------------------------------------------
5715 | Returns the result of converting the extended double-precision floating-
5716 | point value `a' to the single-precision floating-point format.  The
5717 | conversion is performed according to the IEC/IEEE Standard for Binary
5718 | Floating-Point Arithmetic.
5719 *----------------------------------------------------------------------------*/
5720 
5721 float32 floatx80_to_float32(floatx80 a, float_status *status)
5722 {
5723     bool aSign;
5724     int32_t aExp;
5725     uint64_t aSig;
5726 
5727     if (floatx80_invalid_encoding(a)) {
5728         float_raise(float_flag_invalid, status);
5729         return float32_default_nan(status);
5730     }
5731     aSig = extractFloatx80Frac( a );
5732     aExp = extractFloatx80Exp( a );
5733     aSign = extractFloatx80Sign( a );
5734     if ( aExp == 0x7FFF ) {
5735         if ( (uint64_t) ( aSig<<1 ) ) {
5736             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5737                                              status);
5738             return float32_silence_nan(res, status);
5739         }
5740         return packFloat32( aSign, 0xFF, 0 );
5741     }
5742     shift64RightJamming( aSig, 33, &aSig );
5743     if ( aExp || aSig ) aExp -= 0x3F81;
5744     return roundAndPackFloat32(aSign, aExp, aSig, status);
5745 
5746 }
5747 
5748 /*----------------------------------------------------------------------------
5749 | Returns the result of converting the extended double-precision floating-
5750 | point value `a' to the double-precision floating-point format.  The
5751 | conversion is performed according to the IEC/IEEE Standard for Binary
5752 | Floating-Point Arithmetic.
5753 *----------------------------------------------------------------------------*/
5754 
5755 float64 floatx80_to_float64(floatx80 a, float_status *status)
5756 {
5757     bool aSign;
5758     int32_t aExp;
5759     uint64_t aSig, zSig;
5760 
5761     if (floatx80_invalid_encoding(a)) {
5762         float_raise(float_flag_invalid, status);
5763         return float64_default_nan(status);
5764     }
5765     aSig = extractFloatx80Frac( a );
5766     aExp = extractFloatx80Exp( a );
5767     aSign = extractFloatx80Sign( a );
5768     if ( aExp == 0x7FFF ) {
5769         if ( (uint64_t) ( aSig<<1 ) ) {
5770             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5771                                              status);
5772             return float64_silence_nan(res, status);
5773         }
5774         return packFloat64( aSign, 0x7FF, 0 );
5775     }
5776     shift64RightJamming( aSig, 1, &zSig );
5777     if ( aExp || aSig ) aExp -= 0x3C01;
5778     return roundAndPackFloat64(aSign, aExp, zSig, status);
5779 
5780 }
5781 
5782 /*----------------------------------------------------------------------------
5783 | Returns the result of converting the extended double-precision floating-
5784 | point value `a' to the quadruple-precision floating-point format.  The
5785 | conversion is performed according to the IEC/IEEE Standard for Binary
5786 | Floating-Point Arithmetic.
5787 *----------------------------------------------------------------------------*/
5788 
5789 float128 floatx80_to_float128(floatx80 a, float_status *status)
5790 {
5791     bool aSign;
5792     int aExp;
5793     uint64_t aSig, zSig0, zSig1;
5794 
5795     if (floatx80_invalid_encoding(a)) {
5796         float_raise(float_flag_invalid, status);
5797         return float128_default_nan(status);
5798     }
5799     aSig = extractFloatx80Frac( a );
5800     aExp = extractFloatx80Exp( a );
5801     aSign = extractFloatx80Sign( a );
5802     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5803         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5804                                            status);
5805         return float128_silence_nan(res, status);
5806     }
5807     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5808     return packFloat128( aSign, aExp, zSig0, zSig1 );
5809 
5810 }
5811 
5812 /*----------------------------------------------------------------------------
5813 | Rounds the extended double-precision floating-point value `a'
5814 | to the precision provided by floatx80_rounding_precision and returns the
5815 | result as an extended double-precision floating-point value.
5816 | The operation is performed according to the IEC/IEEE Standard for Binary
5817 | Floating-Point Arithmetic.
5818 *----------------------------------------------------------------------------*/
5819 
5820 floatx80 floatx80_round(floatx80 a, float_status *status)
5821 {
5822     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5823                                 extractFloatx80Sign(a),
5824                                 extractFloatx80Exp(a),
5825                                 extractFloatx80Frac(a), 0, status);
5826 }
5827 
5828 /*----------------------------------------------------------------------------
5829 | Rounds the extended double-precision floating-point value `a' to an integer,
5830 | and returns the result as an extended quadruple-precision floating-point
5831 | value.  The operation is performed according to the IEC/IEEE Standard for
5832 | Binary Floating-Point Arithmetic.
5833 *----------------------------------------------------------------------------*/
5834 
5835 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5836 {
5837     bool aSign;
5838     int32_t aExp;
5839     uint64_t lastBitMask, roundBitsMask;
5840     floatx80 z;
5841 
5842     if (floatx80_invalid_encoding(a)) {
5843         float_raise(float_flag_invalid, status);
5844         return floatx80_default_nan(status);
5845     }
5846     aExp = extractFloatx80Exp( a );
5847     if ( 0x403E <= aExp ) {
5848         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5849             return propagateFloatx80NaN(a, a, status);
5850         }
5851         return a;
5852     }
5853     if ( aExp < 0x3FFF ) {
5854         if (    ( aExp == 0 )
5855              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5856             return a;
5857         }
5858         float_raise(float_flag_inexact, status);
5859         aSign = extractFloatx80Sign( a );
5860         switch (status->float_rounding_mode) {
5861          case float_round_nearest_even:
5862             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5863                ) {
5864                 return
5865                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5866             }
5867             break;
5868         case float_round_ties_away:
5869             if (aExp == 0x3FFE) {
5870                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5871             }
5872             break;
5873          case float_round_down:
5874             return
5875                   aSign ?
5876                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5877                 : packFloatx80( 0, 0, 0 );
5878          case float_round_up:
5879             return
5880                   aSign ? packFloatx80( 1, 0, 0 )
5881                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5882 
5883         case float_round_to_zero:
5884             break;
5885         default:
5886             g_assert_not_reached();
5887         }
5888         return packFloatx80( aSign, 0, 0 );
5889     }
5890     lastBitMask = 1;
5891     lastBitMask <<= 0x403E - aExp;
5892     roundBitsMask = lastBitMask - 1;
5893     z = a;
5894     switch (status->float_rounding_mode) {
5895     case float_round_nearest_even:
5896         z.low += lastBitMask>>1;
5897         if ((z.low & roundBitsMask) == 0) {
5898             z.low &= ~lastBitMask;
5899         }
5900         break;
5901     case float_round_ties_away:
5902         z.low += lastBitMask >> 1;
5903         break;
5904     case float_round_to_zero:
5905         break;
5906     case float_round_up:
5907         if (!extractFloatx80Sign(z)) {
5908             z.low += roundBitsMask;
5909         }
5910         break;
5911     case float_round_down:
5912         if (extractFloatx80Sign(z)) {
5913             z.low += roundBitsMask;
5914         }
5915         break;
5916     default:
5917         abort();
5918     }
5919     z.low &= ~ roundBitsMask;
5920     if ( z.low == 0 ) {
5921         ++z.high;
5922         z.low = UINT64_C(0x8000000000000000);
5923     }
5924     if (z.low != a.low) {
5925         float_raise(float_flag_inexact, status);
5926     }
5927     return z;
5928 
5929 }
5930 
5931 /*----------------------------------------------------------------------------
5932 | Returns the result of adding the absolute values of the extended double-
5933 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5934 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5935 | The addition is performed according to the IEC/IEEE Standard for Binary
5936 | Floating-Point Arithmetic.
5937 *----------------------------------------------------------------------------*/
5938 
5939 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5940                                 float_status *status)
5941 {
5942     int32_t aExp, bExp, zExp;
5943     uint64_t aSig, bSig, zSig0, zSig1;
5944     int32_t expDiff;
5945 
5946     aSig = extractFloatx80Frac( a );
5947     aExp = extractFloatx80Exp( a );
5948     bSig = extractFloatx80Frac( b );
5949     bExp = extractFloatx80Exp( b );
5950     expDiff = aExp - bExp;
5951     if ( 0 < expDiff ) {
5952         if ( aExp == 0x7FFF ) {
5953             if ((uint64_t)(aSig << 1)) {
5954                 return propagateFloatx80NaN(a, b, status);
5955             }
5956             return a;
5957         }
5958         if ( bExp == 0 ) --expDiff;
5959         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5960         zExp = aExp;
5961     }
5962     else if ( expDiff < 0 ) {
5963         if ( bExp == 0x7FFF ) {
5964             if ((uint64_t)(bSig << 1)) {
5965                 return propagateFloatx80NaN(a, b, status);
5966             }
5967             return packFloatx80(zSign,
5968                                 floatx80_infinity_high,
5969                                 floatx80_infinity_low);
5970         }
5971         if ( aExp == 0 ) ++expDiff;
5972         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5973         zExp = bExp;
5974     }
5975     else {
5976         if ( aExp == 0x7FFF ) {
5977             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5978                 return propagateFloatx80NaN(a, b, status);
5979             }
5980             return a;
5981         }
5982         zSig1 = 0;
5983         zSig0 = aSig + bSig;
5984         if ( aExp == 0 ) {
5985             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5986                 /* At least one of the values is a pseudo-denormal,
5987                  * and there is a carry out of the result.  */
5988                 zExp = 1;
5989                 goto shiftRight1;
5990             }
5991             if (zSig0 == 0) {
5992                 return packFloatx80(zSign, 0, 0);
5993             }
5994             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5995             goto roundAndPack;
5996         }
5997         zExp = aExp;
5998         goto shiftRight1;
5999     }
6000     zSig0 = aSig + bSig;
6001     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6002  shiftRight1:
6003     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6004     zSig0 |= UINT64_C(0x8000000000000000);
6005     ++zExp;
6006  roundAndPack:
6007     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6008                                 zSign, zExp, zSig0, zSig1, status);
6009 }
6010 
6011 /*----------------------------------------------------------------------------
6012 | Returns the result of subtracting the absolute values of the extended
6013 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6014 | difference is negated before being returned.  `zSign' is ignored if the
6015 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6016 | Standard for Binary Floating-Point Arithmetic.
6017 *----------------------------------------------------------------------------*/
6018 
6019 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6020                                 float_status *status)
6021 {
6022     int32_t aExp, bExp, zExp;
6023     uint64_t aSig, bSig, zSig0, zSig1;
6024     int32_t expDiff;
6025 
6026     aSig = extractFloatx80Frac( a );
6027     aExp = extractFloatx80Exp( a );
6028     bSig = extractFloatx80Frac( b );
6029     bExp = extractFloatx80Exp( b );
6030     expDiff = aExp - bExp;
6031     if ( 0 < expDiff ) goto aExpBigger;
6032     if ( expDiff < 0 ) goto bExpBigger;
6033     if ( aExp == 0x7FFF ) {
6034         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6035             return propagateFloatx80NaN(a, b, status);
6036         }
6037         float_raise(float_flag_invalid, status);
6038         return floatx80_default_nan(status);
6039     }
6040     if ( aExp == 0 ) {
6041         aExp = 1;
6042         bExp = 1;
6043     }
6044     zSig1 = 0;
6045     if ( bSig < aSig ) goto aBigger;
6046     if ( aSig < bSig ) goto bBigger;
6047     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6048  bExpBigger:
6049     if ( bExp == 0x7FFF ) {
6050         if ((uint64_t)(bSig << 1)) {
6051             return propagateFloatx80NaN(a, b, status);
6052         }
6053         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6054                             floatx80_infinity_low);
6055     }
6056     if ( aExp == 0 ) ++expDiff;
6057     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6058  bBigger:
6059     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6060     zExp = bExp;
6061     zSign ^= 1;
6062     goto normalizeRoundAndPack;
6063  aExpBigger:
6064     if ( aExp == 0x7FFF ) {
6065         if ((uint64_t)(aSig << 1)) {
6066             return propagateFloatx80NaN(a, b, status);
6067         }
6068         return a;
6069     }
6070     if ( bExp == 0 ) --expDiff;
6071     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6072  aBigger:
6073     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6074     zExp = aExp;
6075  normalizeRoundAndPack:
6076     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6077                                          zSign, zExp, zSig0, zSig1, status);
6078 }
6079 
6080 /*----------------------------------------------------------------------------
6081 | Returns the result of adding the extended double-precision floating-point
6082 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6083 | Standard for Binary Floating-Point Arithmetic.
6084 *----------------------------------------------------------------------------*/
6085 
6086 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6087 {
6088     bool aSign, bSign;
6089 
6090     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6091         float_raise(float_flag_invalid, status);
6092         return floatx80_default_nan(status);
6093     }
6094     aSign = extractFloatx80Sign( a );
6095     bSign = extractFloatx80Sign( b );
6096     if ( aSign == bSign ) {
6097         return addFloatx80Sigs(a, b, aSign, status);
6098     }
6099     else {
6100         return subFloatx80Sigs(a, b, aSign, status);
6101     }
6102 
6103 }
6104 
6105 /*----------------------------------------------------------------------------
6106 | Returns the result of subtracting the extended double-precision floating-
6107 | point values `a' and `b'.  The operation is performed according to the
6108 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6109 *----------------------------------------------------------------------------*/
6110 
6111 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6112 {
6113     bool aSign, bSign;
6114 
6115     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6116         float_raise(float_flag_invalid, status);
6117         return floatx80_default_nan(status);
6118     }
6119     aSign = extractFloatx80Sign( a );
6120     bSign = extractFloatx80Sign( b );
6121     if ( aSign == bSign ) {
6122         return subFloatx80Sigs(a, b, aSign, status);
6123     }
6124     else {
6125         return addFloatx80Sigs(a, b, aSign, status);
6126     }
6127 
6128 }
6129 
6130 /*----------------------------------------------------------------------------
6131 | Returns the result of multiplying the extended double-precision floating-
6132 | point values `a' and `b'.  The operation is performed according to the
6133 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6134 *----------------------------------------------------------------------------*/
6135 
6136 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6137 {
6138     bool aSign, bSign, zSign;
6139     int32_t aExp, bExp, zExp;
6140     uint64_t aSig, bSig, zSig0, zSig1;
6141 
6142     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6143         float_raise(float_flag_invalid, status);
6144         return floatx80_default_nan(status);
6145     }
6146     aSig = extractFloatx80Frac( a );
6147     aExp = extractFloatx80Exp( a );
6148     aSign = extractFloatx80Sign( a );
6149     bSig = extractFloatx80Frac( b );
6150     bExp = extractFloatx80Exp( b );
6151     bSign = extractFloatx80Sign( b );
6152     zSign = aSign ^ bSign;
6153     if ( aExp == 0x7FFF ) {
6154         if (    (uint64_t) ( aSig<<1 )
6155              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6156             return propagateFloatx80NaN(a, b, status);
6157         }
6158         if ( ( bExp | bSig ) == 0 ) goto invalid;
6159         return packFloatx80(zSign, floatx80_infinity_high,
6160                                    floatx80_infinity_low);
6161     }
6162     if ( bExp == 0x7FFF ) {
6163         if ((uint64_t)(bSig << 1)) {
6164             return propagateFloatx80NaN(a, b, status);
6165         }
6166         if ( ( aExp | aSig ) == 0 ) {
6167  invalid:
6168             float_raise(float_flag_invalid, status);
6169             return floatx80_default_nan(status);
6170         }
6171         return packFloatx80(zSign, floatx80_infinity_high,
6172                                    floatx80_infinity_low);
6173     }
6174     if ( aExp == 0 ) {
6175         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6176         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6177     }
6178     if ( bExp == 0 ) {
6179         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6180         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6181     }
6182     zExp = aExp + bExp - 0x3FFE;
6183     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6184     if ( 0 < (int64_t) zSig0 ) {
6185         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6186         --zExp;
6187     }
6188     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6189                                 zSign, zExp, zSig0, zSig1, status);
6190 }
6191 
6192 /*----------------------------------------------------------------------------
6193 | Returns the result of dividing the extended double-precision floating-point
6194 | value `a' by the corresponding value `b'.  The operation is performed
6195 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6196 *----------------------------------------------------------------------------*/
6197 
6198 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6199 {
6200     bool aSign, bSign, zSign;
6201     int32_t aExp, bExp, zExp;
6202     uint64_t aSig, bSig, zSig0, zSig1;
6203     uint64_t rem0, rem1, rem2, term0, term1, term2;
6204 
6205     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6206         float_raise(float_flag_invalid, status);
6207         return floatx80_default_nan(status);
6208     }
6209     aSig = extractFloatx80Frac( a );
6210     aExp = extractFloatx80Exp( a );
6211     aSign = extractFloatx80Sign( a );
6212     bSig = extractFloatx80Frac( b );
6213     bExp = extractFloatx80Exp( b );
6214     bSign = extractFloatx80Sign( b );
6215     zSign = aSign ^ bSign;
6216     if ( aExp == 0x7FFF ) {
6217         if ((uint64_t)(aSig << 1)) {
6218             return propagateFloatx80NaN(a, b, status);
6219         }
6220         if ( bExp == 0x7FFF ) {
6221             if ((uint64_t)(bSig << 1)) {
6222                 return propagateFloatx80NaN(a, b, status);
6223             }
6224             goto invalid;
6225         }
6226         return packFloatx80(zSign, floatx80_infinity_high,
6227                                    floatx80_infinity_low);
6228     }
6229     if ( bExp == 0x7FFF ) {
6230         if ((uint64_t)(bSig << 1)) {
6231             return propagateFloatx80NaN(a, b, status);
6232         }
6233         return packFloatx80( zSign, 0, 0 );
6234     }
6235     if ( bExp == 0 ) {
6236         if ( bSig == 0 ) {
6237             if ( ( aExp | aSig ) == 0 ) {
6238  invalid:
6239                 float_raise(float_flag_invalid, status);
6240                 return floatx80_default_nan(status);
6241             }
6242             float_raise(float_flag_divbyzero, status);
6243             return packFloatx80(zSign, floatx80_infinity_high,
6244                                        floatx80_infinity_low);
6245         }
6246         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6247     }
6248     if ( aExp == 0 ) {
6249         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6250         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6251     }
6252     zExp = aExp - bExp + 0x3FFE;
6253     rem1 = 0;
6254     if ( bSig <= aSig ) {
6255         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6256         ++zExp;
6257     }
6258     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6259     mul64To128( bSig, zSig0, &term0, &term1 );
6260     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6261     while ( (int64_t) rem0 < 0 ) {
6262         --zSig0;
6263         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6264     }
6265     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6266     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6267         mul64To128( bSig, zSig1, &term1, &term2 );
6268         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6269         while ( (int64_t) rem1 < 0 ) {
6270             --zSig1;
6271             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6272         }
6273         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6274     }
6275     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6276                                 zSign, zExp, zSig0, zSig1, status);
6277 }
6278 
6279 /*----------------------------------------------------------------------------
6280 | Returns the remainder of the extended double-precision floating-point value
6281 | `a' with respect to the corresponding value `b'.  The operation is performed
6282 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6283 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6284 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6285 | the absolute value of the integer quotient.
6286 *----------------------------------------------------------------------------*/
6287 
6288 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6289                          float_status *status)
6290 {
6291     bool aSign, zSign;
6292     int32_t aExp, bExp, expDiff, aExpOrig;
6293     uint64_t aSig0, aSig1, bSig;
6294     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6295 
6296     *quotient = 0;
6297     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6298         float_raise(float_flag_invalid, status);
6299         return floatx80_default_nan(status);
6300     }
6301     aSig0 = extractFloatx80Frac( a );
6302     aExpOrig = aExp = extractFloatx80Exp( a );
6303     aSign = extractFloatx80Sign( a );
6304     bSig = extractFloatx80Frac( b );
6305     bExp = extractFloatx80Exp( b );
6306     if ( aExp == 0x7FFF ) {
6307         if (    (uint64_t) ( aSig0<<1 )
6308              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6309             return propagateFloatx80NaN(a, b, status);
6310         }
6311         goto invalid;
6312     }
6313     if ( bExp == 0x7FFF ) {
6314         if ((uint64_t)(bSig << 1)) {
6315             return propagateFloatx80NaN(a, b, status);
6316         }
6317         if (aExp == 0 && aSig0 >> 63) {
6318             /*
6319              * Pseudo-denormal argument must be returned in normalized
6320              * form.
6321              */
6322             return packFloatx80(aSign, 1, aSig0);
6323         }
6324         return a;
6325     }
6326     if ( bExp == 0 ) {
6327         if ( bSig == 0 ) {
6328  invalid:
6329             float_raise(float_flag_invalid, status);
6330             return floatx80_default_nan(status);
6331         }
6332         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6333     }
6334     if ( aExp == 0 ) {
6335         if ( aSig0 == 0 ) return a;
6336         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6337     }
6338     zSign = aSign;
6339     expDiff = aExp - bExp;
6340     aSig1 = 0;
6341     if ( expDiff < 0 ) {
6342         if ( mod || expDiff < -1 ) {
6343             if (aExp == 1 && aExpOrig == 0) {
6344                 /*
6345                  * Pseudo-denormal argument must be returned in
6346                  * normalized form.
6347                  */
6348                 return packFloatx80(aSign, aExp, aSig0);
6349             }
6350             return a;
6351         }
6352         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6353         expDiff = 0;
6354     }
6355     *quotient = q = ( bSig <= aSig0 );
6356     if ( q ) aSig0 -= bSig;
6357     expDiff -= 64;
6358     while ( 0 < expDiff ) {
6359         q = estimateDiv128To64( aSig0, aSig1, bSig );
6360         q = ( 2 < q ) ? q - 2 : 0;
6361         mul64To128( bSig, q, &term0, &term1 );
6362         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6363         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6364         expDiff -= 62;
6365         *quotient <<= 62;
6366         *quotient += q;
6367     }
6368     expDiff += 64;
6369     if ( 0 < expDiff ) {
6370         q = estimateDiv128To64( aSig0, aSig1, bSig );
6371         q = ( 2 < q ) ? q - 2 : 0;
6372         q >>= 64 - expDiff;
6373         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6374         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6375         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6376         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6377             ++q;
6378             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6379         }
6380         if (expDiff < 64) {
6381             *quotient <<= expDiff;
6382         } else {
6383             *quotient = 0;
6384         }
6385         *quotient += q;
6386     }
6387     else {
6388         term1 = 0;
6389         term0 = bSig;
6390     }
6391     if (!mod) {
6392         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6393         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6394                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6395                         && ( q & 1 ) )
6396             ) {
6397             aSig0 = alternateASig0;
6398             aSig1 = alternateASig1;
6399             zSign = ! zSign;
6400             ++*quotient;
6401         }
6402     }
6403     return
6404         normalizeRoundAndPackFloatx80(
6405             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6406 
6407 }
6408 
6409 /*----------------------------------------------------------------------------
6410 | Returns the remainder of the extended double-precision floating-point value
6411 | `a' with respect to the corresponding value `b'.  The operation is performed
6412 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6413 *----------------------------------------------------------------------------*/
6414 
6415 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6416 {
6417     uint64_t quotient;
6418     return floatx80_modrem(a, b, false, &quotient, status);
6419 }
6420 
6421 /*----------------------------------------------------------------------------
6422 | Returns the remainder of the extended double-precision floating-point value
6423 | `a' with respect to the corresponding value `b', with the quotient truncated
6424 | toward zero.
6425 *----------------------------------------------------------------------------*/
6426 
6427 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6428 {
6429     uint64_t quotient;
6430     return floatx80_modrem(a, b, true, &quotient, status);
6431 }
6432 
6433 /*----------------------------------------------------------------------------
6434 | Returns the square root of the extended double-precision floating-point
6435 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6436 | for Binary Floating-Point Arithmetic.
6437 *----------------------------------------------------------------------------*/
6438 
6439 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6440 {
6441     bool aSign;
6442     int32_t aExp, zExp;
6443     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6444     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6445 
6446     if (floatx80_invalid_encoding(a)) {
6447         float_raise(float_flag_invalid, status);
6448         return floatx80_default_nan(status);
6449     }
6450     aSig0 = extractFloatx80Frac( a );
6451     aExp = extractFloatx80Exp( a );
6452     aSign = extractFloatx80Sign( a );
6453     if ( aExp == 0x7FFF ) {
6454         if ((uint64_t)(aSig0 << 1)) {
6455             return propagateFloatx80NaN(a, a, status);
6456         }
6457         if ( ! aSign ) return a;
6458         goto invalid;
6459     }
6460     if ( aSign ) {
6461         if ( ( aExp | aSig0 ) == 0 ) return a;
6462  invalid:
6463         float_raise(float_flag_invalid, status);
6464         return floatx80_default_nan(status);
6465     }
6466     if ( aExp == 0 ) {
6467         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6468         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6469     }
6470     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6471     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6472     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6473     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6474     doubleZSig0 = zSig0<<1;
6475     mul64To128( zSig0, zSig0, &term0, &term1 );
6476     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6477     while ( (int64_t) rem0 < 0 ) {
6478         --zSig0;
6479         doubleZSig0 -= 2;
6480         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6481     }
6482     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6483     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6484         if ( zSig1 == 0 ) zSig1 = 1;
6485         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6486         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6487         mul64To128( zSig1, zSig1, &term2, &term3 );
6488         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6489         while ( (int64_t) rem1 < 0 ) {
6490             --zSig1;
6491             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6492             term3 |= 1;
6493             term2 |= doubleZSig0;
6494             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6495         }
6496         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6497     }
6498     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6499     zSig0 |= doubleZSig0;
6500     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6501                                 0, zExp, zSig0, zSig1, status);
6502 }
6503 
6504 /*----------------------------------------------------------------------------
6505 | Returns the result of converting the quadruple-precision floating-point
6506 | value `a' to the 32-bit two's complement integer format.  The conversion
6507 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6508 | Arithmetic---which means in particular that the conversion is rounded
6509 | according to the current rounding mode.  If `a' is a NaN, the largest
6510 | positive integer is returned.  Otherwise, if the conversion overflows, the
6511 | largest integer with the same sign as `a' is returned.
6512 *----------------------------------------------------------------------------*/
6513 
6514 int32_t float128_to_int32(float128 a, float_status *status)
6515 {
6516     bool aSign;
6517     int32_t aExp, shiftCount;
6518     uint64_t aSig0, aSig1;
6519 
6520     aSig1 = extractFloat128Frac1( a );
6521     aSig0 = extractFloat128Frac0( a );
6522     aExp = extractFloat128Exp( a );
6523     aSign = extractFloat128Sign( a );
6524     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6525     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6526     aSig0 |= ( aSig1 != 0 );
6527     shiftCount = 0x4028 - aExp;
6528     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6529     return roundAndPackInt32(aSign, aSig0, status);
6530 
6531 }
6532 
6533 /*----------------------------------------------------------------------------
6534 | Returns the result of converting the quadruple-precision floating-point
6535 | value `a' to the 32-bit two's complement integer format.  The conversion
6536 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6537 | Arithmetic, except that the conversion is always rounded toward zero.  If
6538 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6539 | conversion overflows, the largest integer with the same sign as `a' is
6540 | returned.
6541 *----------------------------------------------------------------------------*/
6542 
6543 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6544 {
6545     bool aSign;
6546     int32_t aExp, shiftCount;
6547     uint64_t aSig0, aSig1, savedASig;
6548     int32_t z;
6549 
6550     aSig1 = extractFloat128Frac1( a );
6551     aSig0 = extractFloat128Frac0( a );
6552     aExp = extractFloat128Exp( a );
6553     aSign = extractFloat128Sign( a );
6554     aSig0 |= ( aSig1 != 0 );
6555     if ( 0x401E < aExp ) {
6556         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6557         goto invalid;
6558     }
6559     else if ( aExp < 0x3FFF ) {
6560         if (aExp || aSig0) {
6561             float_raise(float_flag_inexact, status);
6562         }
6563         return 0;
6564     }
6565     aSig0 |= UINT64_C(0x0001000000000000);
6566     shiftCount = 0x402F - aExp;
6567     savedASig = aSig0;
6568     aSig0 >>= shiftCount;
6569     z = aSig0;
6570     if ( aSign ) z = - z;
6571     if ( ( z < 0 ) ^ aSign ) {
6572  invalid:
6573         float_raise(float_flag_invalid, status);
6574         return aSign ? INT32_MIN : INT32_MAX;
6575     }
6576     if ( ( aSig0<<shiftCount ) != savedASig ) {
6577         float_raise(float_flag_inexact, status);
6578     }
6579     return z;
6580 
6581 }
6582 
6583 /*----------------------------------------------------------------------------
6584 | Returns the result of converting the quadruple-precision floating-point
6585 | value `a' to the 64-bit two's complement integer format.  The conversion
6586 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6587 | Arithmetic---which means in particular that the conversion is rounded
6588 | according to the current rounding mode.  If `a' is a NaN, the largest
6589 | positive integer is returned.  Otherwise, if the conversion overflows, the
6590 | largest integer with the same sign as `a' is returned.
6591 *----------------------------------------------------------------------------*/
6592 
6593 int64_t float128_to_int64(float128 a, float_status *status)
6594 {
6595     bool aSign;
6596     int32_t aExp, shiftCount;
6597     uint64_t aSig0, aSig1;
6598 
6599     aSig1 = extractFloat128Frac1( a );
6600     aSig0 = extractFloat128Frac0( a );
6601     aExp = extractFloat128Exp( a );
6602     aSign = extractFloat128Sign( a );
6603     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6604     shiftCount = 0x402F - aExp;
6605     if ( shiftCount <= 0 ) {
6606         if ( 0x403E < aExp ) {
6607             float_raise(float_flag_invalid, status);
6608             if (    ! aSign
6609                  || (    ( aExp == 0x7FFF )
6610                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6611                     )
6612                ) {
6613                 return INT64_MAX;
6614             }
6615             return INT64_MIN;
6616         }
6617         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6618     }
6619     else {
6620         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6621     }
6622     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6623 
6624 }
6625 
6626 /*----------------------------------------------------------------------------
6627 | Returns the result of converting the quadruple-precision floating-point
6628 | value `a' to the 64-bit two's complement integer format.  The conversion
6629 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6630 | Arithmetic, except that the conversion is always rounded toward zero.
6631 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6632 | the conversion overflows, the largest integer with the same sign as `a' is
6633 | returned.
6634 *----------------------------------------------------------------------------*/
6635 
6636 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6637 {
6638     bool aSign;
6639     int32_t aExp, shiftCount;
6640     uint64_t aSig0, aSig1;
6641     int64_t z;
6642 
6643     aSig1 = extractFloat128Frac1( a );
6644     aSig0 = extractFloat128Frac0( a );
6645     aExp = extractFloat128Exp( a );
6646     aSign = extractFloat128Sign( a );
6647     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6648     shiftCount = aExp - 0x402F;
6649     if ( 0 < shiftCount ) {
6650         if ( 0x403E <= aExp ) {
6651             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6652             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6653                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6654                 if (aSig1) {
6655                     float_raise(float_flag_inexact, status);
6656                 }
6657             }
6658             else {
6659                 float_raise(float_flag_invalid, status);
6660                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6661                     return INT64_MAX;
6662                 }
6663             }
6664             return INT64_MIN;
6665         }
6666         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6667         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6668             float_raise(float_flag_inexact, status);
6669         }
6670     }
6671     else {
6672         if ( aExp < 0x3FFF ) {
6673             if ( aExp | aSig0 | aSig1 ) {
6674                 float_raise(float_flag_inexact, status);
6675             }
6676             return 0;
6677         }
6678         z = aSig0>>( - shiftCount );
6679         if (    aSig1
6680              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6681             float_raise(float_flag_inexact, status);
6682         }
6683     }
6684     if ( aSign ) z = - z;
6685     return z;
6686 
6687 }
6688 
6689 /*----------------------------------------------------------------------------
6690 | Returns the result of converting the quadruple-precision floating-point value
6691 | `a' to the 64-bit unsigned integer format.  The conversion is
6692 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6693 | Arithmetic---which means in particular that the conversion is rounded
6694 | according to the current rounding mode.  If `a' is a NaN, the largest
6695 | positive integer is returned.  If the conversion overflows, the
6696 | largest unsigned integer is returned.  If 'a' is negative, the value is
6697 | rounded and zero is returned; negative values that do not round to zero
6698 | will raise the inexact exception.
6699 *----------------------------------------------------------------------------*/
6700 
6701 uint64_t float128_to_uint64(float128 a, float_status *status)
6702 {
6703     bool aSign;
6704     int aExp;
6705     int shiftCount;
6706     uint64_t aSig0, aSig1;
6707 
6708     aSig0 = extractFloat128Frac0(a);
6709     aSig1 = extractFloat128Frac1(a);
6710     aExp = extractFloat128Exp(a);
6711     aSign = extractFloat128Sign(a);
6712     if (aSign && (aExp > 0x3FFE)) {
6713         float_raise(float_flag_invalid, status);
6714         if (float128_is_any_nan(a)) {
6715             return UINT64_MAX;
6716         } else {
6717             return 0;
6718         }
6719     }
6720     if (aExp) {
6721         aSig0 |= UINT64_C(0x0001000000000000);
6722     }
6723     shiftCount = 0x402F - aExp;
6724     if (shiftCount <= 0) {
6725         if (0x403E < aExp) {
6726             float_raise(float_flag_invalid, status);
6727             return UINT64_MAX;
6728         }
6729         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6730     } else {
6731         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6732     }
6733     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6734 }
6735 
6736 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6737 {
6738     uint64_t v;
6739     signed char current_rounding_mode = status->float_rounding_mode;
6740 
6741     set_float_rounding_mode(float_round_to_zero, status);
6742     v = float128_to_uint64(a, status);
6743     set_float_rounding_mode(current_rounding_mode, status);
6744 
6745     return v;
6746 }
6747 
6748 /*----------------------------------------------------------------------------
6749 | Returns the result of converting the quadruple-precision floating-point
6750 | value `a' to the 32-bit unsigned integer format.  The conversion
6751 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6752 | Arithmetic except that the conversion is always rounded toward zero.
6753 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6754 | if the conversion overflows, the largest unsigned integer is returned.
6755 | If 'a' is negative, the value is rounded and zero is returned; negative
6756 | values that do not round to zero will raise the inexact exception.
6757 *----------------------------------------------------------------------------*/
6758 
6759 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6760 {
6761     uint64_t v;
6762     uint32_t res;
6763     int old_exc_flags = get_float_exception_flags(status);
6764 
6765     v = float128_to_uint64_round_to_zero(a, status);
6766     if (v > 0xffffffff) {
6767         res = 0xffffffff;
6768     } else {
6769         return v;
6770     }
6771     set_float_exception_flags(old_exc_flags, status);
6772     float_raise(float_flag_invalid, status);
6773     return res;
6774 }
6775 
6776 /*----------------------------------------------------------------------------
6777 | Returns the result of converting the quadruple-precision floating-point value
6778 | `a' to the 32-bit unsigned integer format.  The conversion is
6779 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6780 | Arithmetic---which means in particular that the conversion is rounded
6781 | according to the current rounding mode.  If `a' is a NaN, the largest
6782 | positive integer is returned.  If the conversion overflows, the
6783 | largest unsigned integer is returned.  If 'a' is negative, the value is
6784 | rounded and zero is returned; negative values that do not round to zero
6785 | will raise the inexact exception.
6786 *----------------------------------------------------------------------------*/
6787 
6788 uint32_t float128_to_uint32(float128 a, float_status *status)
6789 {
6790     uint64_t v;
6791     uint32_t res;
6792     int old_exc_flags = get_float_exception_flags(status);
6793 
6794     v = float128_to_uint64(a, status);
6795     if (v > 0xffffffff) {
6796         res = 0xffffffff;
6797     } else {
6798         return v;
6799     }
6800     set_float_exception_flags(old_exc_flags, status);
6801     float_raise(float_flag_invalid, status);
6802     return res;
6803 }
6804 
6805 /*----------------------------------------------------------------------------
6806 | Returns the result of converting the quadruple-precision floating-point
6807 | value `a' to the single-precision floating-point format.  The conversion
6808 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6809 | Arithmetic.
6810 *----------------------------------------------------------------------------*/
6811 
6812 float32 float128_to_float32(float128 a, float_status *status)
6813 {
6814     bool aSign;
6815     int32_t aExp;
6816     uint64_t aSig0, aSig1;
6817     uint32_t zSig;
6818 
6819     aSig1 = extractFloat128Frac1( a );
6820     aSig0 = extractFloat128Frac0( a );
6821     aExp = extractFloat128Exp( a );
6822     aSign = extractFloat128Sign( a );
6823     if ( aExp == 0x7FFF ) {
6824         if ( aSig0 | aSig1 ) {
6825             return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6826         }
6827         return packFloat32( aSign, 0xFF, 0 );
6828     }
6829     aSig0 |= ( aSig1 != 0 );
6830     shift64RightJamming( aSig0, 18, &aSig0 );
6831     zSig = aSig0;
6832     if ( aExp || zSig ) {
6833         zSig |= 0x40000000;
6834         aExp -= 0x3F81;
6835     }
6836     return roundAndPackFloat32(aSign, aExp, zSig, status);
6837 
6838 }
6839 
6840 /*----------------------------------------------------------------------------
6841 | Returns the result of converting the quadruple-precision floating-point
6842 | value `a' to the double-precision floating-point format.  The conversion
6843 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6844 | Arithmetic.
6845 *----------------------------------------------------------------------------*/
6846 
6847 float64 float128_to_float64(float128 a, float_status *status)
6848 {
6849     bool aSign;
6850     int32_t aExp;
6851     uint64_t aSig0, aSig1;
6852 
6853     aSig1 = extractFloat128Frac1( a );
6854     aSig0 = extractFloat128Frac0( a );
6855     aExp = extractFloat128Exp( a );
6856     aSign = extractFloat128Sign( a );
6857     if ( aExp == 0x7FFF ) {
6858         if ( aSig0 | aSig1 ) {
6859             return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6860         }
6861         return packFloat64( aSign, 0x7FF, 0 );
6862     }
6863     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6864     aSig0 |= ( aSig1 != 0 );
6865     if ( aExp || aSig0 ) {
6866         aSig0 |= UINT64_C(0x4000000000000000);
6867         aExp -= 0x3C01;
6868     }
6869     return roundAndPackFloat64(aSign, aExp, aSig0, status);
6870 
6871 }
6872 
6873 /*----------------------------------------------------------------------------
6874 | Returns the result of converting the quadruple-precision floating-point
6875 | value `a' to the extended double-precision floating-point format.  The
6876 | conversion is performed according to the IEC/IEEE Standard for Binary
6877 | Floating-Point Arithmetic.
6878 *----------------------------------------------------------------------------*/
6879 
6880 floatx80 float128_to_floatx80(float128 a, float_status *status)
6881 {
6882     bool aSign;
6883     int32_t aExp;
6884     uint64_t aSig0, aSig1;
6885 
6886     aSig1 = extractFloat128Frac1( a );
6887     aSig0 = extractFloat128Frac0( a );
6888     aExp = extractFloat128Exp( a );
6889     aSign = extractFloat128Sign( a );
6890     if ( aExp == 0x7FFF ) {
6891         if ( aSig0 | aSig1 ) {
6892             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6893                                                status);
6894             return floatx80_silence_nan(res, status);
6895         }
6896         return packFloatx80(aSign, floatx80_infinity_high,
6897                                    floatx80_infinity_low);
6898     }
6899     if ( aExp == 0 ) {
6900         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6901         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6902     }
6903     else {
6904         aSig0 |= UINT64_C(0x0001000000000000);
6905     }
6906     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6907     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6908 
6909 }
6910 
6911 /*----------------------------------------------------------------------------
6912 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6913 | returns the result as a quadruple-precision floating-point value.  The
6914 | operation is performed according to the IEC/IEEE Standard for Binary
6915 | Floating-Point Arithmetic.
6916 *----------------------------------------------------------------------------*/
6917 
6918 float128 float128_round_to_int(float128 a, float_status *status)
6919 {
6920     bool aSign;
6921     int32_t aExp;
6922     uint64_t lastBitMask, roundBitsMask;
6923     float128 z;
6924 
6925     aExp = extractFloat128Exp( a );
6926     if ( 0x402F <= aExp ) {
6927         if ( 0x406F <= aExp ) {
6928             if (    ( aExp == 0x7FFF )
6929                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6930                ) {
6931                 return propagateFloat128NaN(a, a, status);
6932             }
6933             return a;
6934         }
6935         lastBitMask = 1;
6936         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6937         roundBitsMask = lastBitMask - 1;
6938         z = a;
6939         switch (status->float_rounding_mode) {
6940         case float_round_nearest_even:
6941             if ( lastBitMask ) {
6942                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6943                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6944             }
6945             else {
6946                 if ( (int64_t) z.low < 0 ) {
6947                     ++z.high;
6948                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6949                 }
6950             }
6951             break;
6952         case float_round_ties_away:
6953             if (lastBitMask) {
6954                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6955             } else {
6956                 if ((int64_t) z.low < 0) {
6957                     ++z.high;
6958                 }
6959             }
6960             break;
6961         case float_round_to_zero:
6962             break;
6963         case float_round_up:
6964             if (!extractFloat128Sign(z)) {
6965                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6966             }
6967             break;
6968         case float_round_down:
6969             if (extractFloat128Sign(z)) {
6970                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6971             }
6972             break;
6973         case float_round_to_odd:
6974             /*
6975              * Note that if lastBitMask == 0, the last bit is the lsb
6976              * of high, and roundBitsMask == -1.
6977              */
6978             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6979                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6980             }
6981             break;
6982         default:
6983             abort();
6984         }
6985         z.low &= ~ roundBitsMask;
6986     }
6987     else {
6988         if ( aExp < 0x3FFF ) {
6989             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6990             float_raise(float_flag_inexact, status);
6991             aSign = extractFloat128Sign( a );
6992             switch (status->float_rounding_mode) {
6993             case float_round_nearest_even:
6994                 if (    ( aExp == 0x3FFE )
6995                      && (   extractFloat128Frac0( a )
6996                           | extractFloat128Frac1( a ) )
6997                    ) {
6998                     return packFloat128( aSign, 0x3FFF, 0, 0 );
6999                 }
7000                 break;
7001             case float_round_ties_away:
7002                 if (aExp == 0x3FFE) {
7003                     return packFloat128(aSign, 0x3FFF, 0, 0);
7004                 }
7005                 break;
7006             case float_round_down:
7007                 return
7008                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7009                     : packFloat128( 0, 0, 0, 0 );
7010             case float_round_up:
7011                 return
7012                       aSign ? packFloat128( 1, 0, 0, 0 )
7013                     : packFloat128( 0, 0x3FFF, 0, 0 );
7014 
7015             case float_round_to_odd:
7016                 return packFloat128(aSign, 0x3FFF, 0, 0);
7017 
7018             case float_round_to_zero:
7019                 break;
7020             }
7021             return packFloat128( aSign, 0, 0, 0 );
7022         }
7023         lastBitMask = 1;
7024         lastBitMask <<= 0x402F - aExp;
7025         roundBitsMask = lastBitMask - 1;
7026         z.low = 0;
7027         z.high = a.high;
7028         switch (status->float_rounding_mode) {
7029         case float_round_nearest_even:
7030             z.high += lastBitMask>>1;
7031             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7032                 z.high &= ~ lastBitMask;
7033             }
7034             break;
7035         case float_round_ties_away:
7036             z.high += lastBitMask>>1;
7037             break;
7038         case float_round_to_zero:
7039             break;
7040         case float_round_up:
7041             if (!extractFloat128Sign(z)) {
7042                 z.high |= ( a.low != 0 );
7043                 z.high += roundBitsMask;
7044             }
7045             break;
7046         case float_round_down:
7047             if (extractFloat128Sign(z)) {
7048                 z.high |= (a.low != 0);
7049                 z.high += roundBitsMask;
7050             }
7051             break;
7052         case float_round_to_odd:
7053             if ((z.high & lastBitMask) == 0) {
7054                 z.high |= (a.low != 0);
7055                 z.high += roundBitsMask;
7056             }
7057             break;
7058         default:
7059             abort();
7060         }
7061         z.high &= ~ roundBitsMask;
7062     }
7063     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7064         float_raise(float_flag_inexact, status);
7065     }
7066     return z;
7067 
7068 }
7069 
7070 /*----------------------------------------------------------------------------
7071 | Returns the result of dividing the quadruple-precision floating-point value
7072 | `a' by the corresponding value `b'.  The operation is performed according to
7073 | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7074 *----------------------------------------------------------------------------*/
7075 
7076 float128 float128_div(float128 a, float128 b, float_status *status)
7077 {
7078     bool aSign, bSign, zSign;
7079     int32_t aExp, bExp, zExp;
7080     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7081     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7082 
7083     aSig1 = extractFloat128Frac1( a );
7084     aSig0 = extractFloat128Frac0( a );
7085     aExp = extractFloat128Exp( a );
7086     aSign = extractFloat128Sign( a );
7087     bSig1 = extractFloat128Frac1( b );
7088     bSig0 = extractFloat128Frac0( b );
7089     bExp = extractFloat128Exp( b );
7090     bSign = extractFloat128Sign( b );
7091     zSign = aSign ^ bSign;
7092     if ( aExp == 0x7FFF ) {
7093         if (aSig0 | aSig1) {
7094             return propagateFloat128NaN(a, b, status);
7095         }
7096         if ( bExp == 0x7FFF ) {
7097             if (bSig0 | bSig1) {
7098                 return propagateFloat128NaN(a, b, status);
7099             }
7100             goto invalid;
7101         }
7102         return packFloat128( zSign, 0x7FFF, 0, 0 );
7103     }
7104     if ( bExp == 0x7FFF ) {
7105         if (bSig0 | bSig1) {
7106             return propagateFloat128NaN(a, b, status);
7107         }
7108         return packFloat128( zSign, 0, 0, 0 );
7109     }
7110     if ( bExp == 0 ) {
7111         if ( ( bSig0 | bSig1 ) == 0 ) {
7112             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7113  invalid:
7114                 float_raise(float_flag_invalid, status);
7115                 return float128_default_nan(status);
7116             }
7117             float_raise(float_flag_divbyzero, status);
7118             return packFloat128( zSign, 0x7FFF, 0, 0 );
7119         }
7120         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7121     }
7122     if ( aExp == 0 ) {
7123         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7124         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7125     }
7126     zExp = aExp - bExp + 0x3FFD;
7127     shortShift128Left(
7128         aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
7129     shortShift128Left(
7130         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7131     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7132         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7133         ++zExp;
7134     }
7135     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7136     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7137     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
7138     while ( (int64_t) rem0 < 0 ) {
7139         --zSig0;
7140         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7141     }
7142     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7143     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7144         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7145         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
7146         while ( (int64_t) rem1 < 0 ) {
7147             --zSig1;
7148             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7149         }
7150         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7151     }
7152     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
7153     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7154 
7155 }
7156 
7157 /*----------------------------------------------------------------------------
7158 | Returns the remainder of the quadruple-precision floating-point value `a'
7159 | with respect to the corresponding value `b'.  The operation is performed
7160 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7161 *----------------------------------------------------------------------------*/
7162 
7163 float128 float128_rem(float128 a, float128 b, float_status *status)
7164 {
7165     bool aSign, zSign;
7166     int32_t aExp, bExp, expDiff;
7167     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7168     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7169     int64_t sigMean0;
7170 
7171     aSig1 = extractFloat128Frac1( a );
7172     aSig0 = extractFloat128Frac0( a );
7173     aExp = extractFloat128Exp( a );
7174     aSign = extractFloat128Sign( a );
7175     bSig1 = extractFloat128Frac1( b );
7176     bSig0 = extractFloat128Frac0( b );
7177     bExp = extractFloat128Exp( b );
7178     if ( aExp == 0x7FFF ) {
7179         if (    ( aSig0 | aSig1 )
7180              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7181             return propagateFloat128NaN(a, b, status);
7182         }
7183         goto invalid;
7184     }
7185     if ( bExp == 0x7FFF ) {
7186         if (bSig0 | bSig1) {
7187             return propagateFloat128NaN(a, b, status);
7188         }
7189         return a;
7190     }
7191     if ( bExp == 0 ) {
7192         if ( ( bSig0 | bSig1 ) == 0 ) {
7193  invalid:
7194             float_raise(float_flag_invalid, status);
7195             return float128_default_nan(status);
7196         }
7197         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7198     }
7199     if ( aExp == 0 ) {
7200         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7201         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7202     }
7203     expDiff = aExp - bExp;
7204     if ( expDiff < -1 ) return a;
7205     shortShift128Left(
7206         aSig0 | UINT64_C(0x0001000000000000),
7207         aSig1,
7208         15 - ( expDiff < 0 ),
7209         &aSig0,
7210         &aSig1
7211     );
7212     shortShift128Left(
7213         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7214     q = le128( bSig0, bSig1, aSig0, aSig1 );
7215     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7216     expDiff -= 64;
7217     while ( 0 < expDiff ) {
7218         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7219         q = ( 4 < q ) ? q - 4 : 0;
7220         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7221         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7222         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7223         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7224         expDiff -= 61;
7225     }
7226     if ( -64 < expDiff ) {
7227         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7228         q = ( 4 < q ) ? q - 4 : 0;
7229         q >>= - expDiff;
7230         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7231         expDiff += 52;
7232         if ( expDiff < 0 ) {
7233             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7234         }
7235         else {
7236             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7237         }
7238         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7239         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7240     }
7241     else {
7242         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7243         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7244     }
7245     do {
7246         alternateASig0 = aSig0;
7247         alternateASig1 = aSig1;
7248         ++q;
7249         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7250     } while ( 0 <= (int64_t) aSig0 );
7251     add128(
7252         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7253     if (    ( sigMean0 < 0 )
7254          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7255         aSig0 = alternateASig0;
7256         aSig1 = alternateASig1;
7257     }
7258     zSign = ( (int64_t) aSig0 < 0 );
7259     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7260     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7261                                          status);
7262 }
7263 
7264 /*----------------------------------------------------------------------------
7265 | Returns the square root of the quadruple-precision floating-point value `a'.
7266 | The operation is performed according to the IEC/IEEE Standard for Binary
7267 | Floating-Point Arithmetic.
7268 *----------------------------------------------------------------------------*/
7269 
7270 float128 float128_sqrt(float128 a, float_status *status)
7271 {
7272     bool aSign;
7273     int32_t aExp, zExp;
7274     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7275     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7276 
7277     aSig1 = extractFloat128Frac1( a );
7278     aSig0 = extractFloat128Frac0( a );
7279     aExp = extractFloat128Exp( a );
7280     aSign = extractFloat128Sign( a );
7281     if ( aExp == 0x7FFF ) {
7282         if (aSig0 | aSig1) {
7283             return propagateFloat128NaN(a, a, status);
7284         }
7285         if ( ! aSign ) return a;
7286         goto invalid;
7287     }
7288     if ( aSign ) {
7289         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7290  invalid:
7291         float_raise(float_flag_invalid, status);
7292         return float128_default_nan(status);
7293     }
7294     if ( aExp == 0 ) {
7295         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7296         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7297     }
7298     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7299     aSig0 |= UINT64_C(0x0001000000000000);
7300     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7301     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7302     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7303     doubleZSig0 = zSig0<<1;
7304     mul64To128( zSig0, zSig0, &term0, &term1 );
7305     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7306     while ( (int64_t) rem0 < 0 ) {
7307         --zSig0;
7308         doubleZSig0 -= 2;
7309         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7310     }
7311     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7312     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7313         if ( zSig1 == 0 ) zSig1 = 1;
7314         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7315         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7316         mul64To128( zSig1, zSig1, &term2, &term3 );
7317         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7318         while ( (int64_t) rem1 < 0 ) {
7319             --zSig1;
7320             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7321             term3 |= 1;
7322             term2 |= doubleZSig0;
7323             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7324         }
7325         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7326     }
7327     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7328     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7329 
7330 }
7331 
7332 static inline FloatRelation
7333 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7334                           float_status *status)
7335 {
7336     bool aSign, bSign;
7337 
7338     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7339         float_raise(float_flag_invalid, status);
7340         return float_relation_unordered;
7341     }
7342     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7343           ( extractFloatx80Frac( a )<<1 ) ) ||
7344         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7345           ( extractFloatx80Frac( b )<<1 ) )) {
7346         if (!is_quiet ||
7347             floatx80_is_signaling_nan(a, status) ||
7348             floatx80_is_signaling_nan(b, status)) {
7349             float_raise(float_flag_invalid, status);
7350         }
7351         return float_relation_unordered;
7352     }
7353     aSign = extractFloatx80Sign( a );
7354     bSign = extractFloatx80Sign( b );
7355     if ( aSign != bSign ) {
7356 
7357         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7358              ( ( a.low | b.low ) == 0 ) ) {
7359             /* zero case */
7360             return float_relation_equal;
7361         } else {
7362             return 1 - (2 * aSign);
7363         }
7364     } else {
7365         /* Normalize pseudo-denormals before comparison.  */
7366         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7367             ++a.high;
7368         }
7369         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7370             ++b.high;
7371         }
7372         if (a.low == b.low && a.high == b.high) {
7373             return float_relation_equal;
7374         } else {
7375             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7376         }
7377     }
7378 }
7379 
7380 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7381 {
7382     return floatx80_compare_internal(a, b, 0, status);
7383 }
7384 
7385 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7386                                      float_status *status)
7387 {
7388     return floatx80_compare_internal(a, b, 1, status);
7389 }
7390 
7391 static inline FloatRelation
7392 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7393                           float_status *status)
7394 {
7395     bool aSign, bSign;
7396 
7397     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7398           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7399         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7400           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7401         if (!is_quiet ||
7402             float128_is_signaling_nan(a, status) ||
7403             float128_is_signaling_nan(b, status)) {
7404             float_raise(float_flag_invalid, status);
7405         }
7406         return float_relation_unordered;
7407     }
7408     aSign = extractFloat128Sign( a );
7409     bSign = extractFloat128Sign( b );
7410     if ( aSign != bSign ) {
7411         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7412             /* zero case */
7413             return float_relation_equal;
7414         } else {
7415             return 1 - (2 * aSign);
7416         }
7417     } else {
7418         if (a.low == b.low && a.high == b.high) {
7419             return float_relation_equal;
7420         } else {
7421             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7422         }
7423     }
7424 }
7425 
7426 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7427 {
7428     return float128_compare_internal(a, b, 0, status);
7429 }
7430 
7431 FloatRelation float128_compare_quiet(float128 a, float128 b,
7432                                      float_status *status)
7433 {
7434     return float128_compare_internal(a, b, 1, status);
7435 }
7436 
7437 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7438 {
7439     bool aSign;
7440     int32_t aExp;
7441     uint64_t aSig;
7442 
7443     if (floatx80_invalid_encoding(a)) {
7444         float_raise(float_flag_invalid, status);
7445         return floatx80_default_nan(status);
7446     }
7447     aSig = extractFloatx80Frac( a );
7448     aExp = extractFloatx80Exp( a );
7449     aSign = extractFloatx80Sign( a );
7450 
7451     if ( aExp == 0x7FFF ) {
7452         if ( aSig<<1 ) {
7453             return propagateFloatx80NaN(a, a, status);
7454         }
7455         return a;
7456     }
7457 
7458     if (aExp == 0) {
7459         if (aSig == 0) {
7460             return a;
7461         }
7462         aExp++;
7463     }
7464 
7465     if (n > 0x10000) {
7466         n = 0x10000;
7467     } else if (n < -0x10000) {
7468         n = -0x10000;
7469     }
7470 
7471     aExp += n;
7472     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7473                                          aSign, aExp, aSig, 0, status);
7474 }
7475 
7476 float128 float128_scalbn(float128 a, int n, float_status *status)
7477 {
7478     bool aSign;
7479     int32_t aExp;
7480     uint64_t aSig0, aSig1;
7481 
7482     aSig1 = extractFloat128Frac1( a );
7483     aSig0 = extractFloat128Frac0( a );
7484     aExp = extractFloat128Exp( a );
7485     aSign = extractFloat128Sign( a );
7486     if ( aExp == 0x7FFF ) {
7487         if ( aSig0 | aSig1 ) {
7488             return propagateFloat128NaN(a, a, status);
7489         }
7490         return a;
7491     }
7492     if (aExp != 0) {
7493         aSig0 |= UINT64_C(0x0001000000000000);
7494     } else if (aSig0 == 0 && aSig1 == 0) {
7495         return a;
7496     } else {
7497         aExp++;
7498     }
7499 
7500     if (n > 0x10000) {
7501         n = 0x10000;
7502     } else if (n < -0x10000) {
7503         n = -0x10000;
7504     }
7505 
7506     aExp += n - 1;
7507     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7508                                          , status);
7509 
7510 }
7511 
7512 static void __attribute__((constructor)) softfloat_init(void)
7513 {
7514     union_float64 ua, ub, uc, ur;
7515 
7516     if (QEMU_NO_HARDFLOAT) {
7517         return;
7518     }
7519     /*
7520      * Test that the host's FMA is not obviously broken. For example,
7521      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7522      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7523      */
7524     ua.s = 0x0020000000000001ULL;
7525     ub.s = 0x3ca0000000000000ULL;
7526     uc.s = 0x0020000000000000ULL;
7527     ur.h = fma(ua.h, ub.h, uc.h);
7528     if (ur.s != 0x0020000000000001ULL) {
7529         force_soft_fma = true;
7530     }
7531 }
7532